Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental
This commit is contained in:
commit
6e753c6399
|
@ -7,9 +7,6 @@
|
|||
[submodule "third_party/binutils-ppc-cygwin"]
|
||||
path = third_party/binutils-ppc-cygwin
|
||||
url = https://github.com/benvanik/binutils-ppc-cygwin.git
|
||||
[submodule "third_party/spirv-tools"]
|
||||
path = third_party/spirv-tools
|
||||
url = https://github.com/xenia-project/SPIRV-Tools.git
|
||||
[submodule "third_party/catch"]
|
||||
path = third_party/catch
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
|
@ -22,12 +19,6 @@
|
|||
[submodule "third_party/premake-export-compile-commands"]
|
||||
path = third_party/premake-export-compile-commands
|
||||
url = https://github.com/xenia-project/premake-export-compile-commands.git
|
||||
[submodule "third_party/spirv-headers"]
|
||||
path = third_party/spirv-headers
|
||||
url = https://github.com/KhronosGroup/SPIRV-Headers.git
|
||||
[submodule "third_party/volk"]
|
||||
path = third_party/volk
|
||||
url = https://github.com/zeux/volk.git
|
||||
[submodule "third_party/discord-rpc"]
|
||||
path = third_party/discord-rpc
|
||||
url = https://github.com/discordapp/discord-rpc.git
|
||||
|
@ -85,6 +76,12 @@
|
|||
[submodule "third_party/Vulkan-Headers"]
|
||||
path = third_party/Vulkan-Headers
|
||||
url = https://github.com/KhronosGroup/Vulkan-Headers.git
|
||||
[submodule "third_party/glslang"]
|
||||
path = third_party/glslang
|
||||
url = https://github.com/KhronosGroup/glslang.git
|
||||
[submodule "third_party/SPIRV-Tools"]
|
||||
path = third_party/SPIRV-Tools
|
||||
url = https://github.com/KhronosGroup/SPIRV-Tools.git
|
||||
[submodule "third_party/VulkanMemoryAllocator"]
|
||||
path = third_party/VulkanMemoryAllocator
|
||||
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git
|
||||
|
|
|
@ -247,7 +247,6 @@ workspace("xenia")
|
|||
include("third_party/imgui.lua")
|
||||
include("third_party/mspack.lua")
|
||||
include("third_party/snappy.lua")
|
||||
include("third_party/spirv-tools.lua")
|
||||
include("third_party/xxhash.lua")
|
||||
|
||||
if not os.istarget("android") then
|
||||
|
@ -288,7 +287,6 @@ workspace("xenia")
|
|||
include("src/xenia/kernel")
|
||||
include("src/xenia/patcher")
|
||||
include("src/xenia/ui")
|
||||
include("src/xenia/ui/spirv")
|
||||
include("src/xenia/ui/vulkan")
|
||||
include("src/xenia/vfs")
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@ project("xenia-app")
|
|||
"xenia-kernel",
|
||||
"xenia-patcher",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
"xenia-ui-vulkan",
|
||||
"xenia-patcher",
|
||||
"xenia-vfs",
|
||||
|
@ -44,7 +43,6 @@ project("xenia-app")
|
|||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"xxhash",
|
||||
})
|
||||
defines({
|
||||
|
|
|
@ -59,7 +59,7 @@
|
|||
#include "third_party/fmt/include/fmt/format.h"
|
||||
|
||||
DEFINE_string(apu, "any", "Audio system. Use: [any, nop, sdl, xaudio2]", "APU");
|
||||
DEFINE_string(gpu, "any", "Graphics system. Use: [any, d3d12, null]",
|
||||
DEFINE_string(gpu, "any", "Graphics system. Use: [any, d3d12, vulkan, null]",
|
||||
"GPU");
|
||||
DEFINE_string(hid, "any", "Input system. Use: [any, nop, sdl, winkey, xinput]",
|
||||
"HID");
|
||||
|
@ -259,11 +259,82 @@ std::unique_ptr<apu::AudioSystem> EmulatorApp::CreateAudioSystem(
|
|||
}
|
||||
|
||||
std::unique_ptr<gpu::GraphicsSystem> EmulatorApp::CreateGraphicsSystem() {
|
||||
// While Vulkan is supported by a large variety of operating systems (Windows,
|
||||
// GNU/Linux, Android, also via the MoltenVK translation layer on top of Metal
|
||||
// on macOS and iOS), please don't remove platform-specific GPU backends from
|
||||
// Xenia.
|
||||
//
|
||||
// Regardless of the operating system, having multiple options provides more
|
||||
// stability to users. In case of driver issues, users may try switching
|
||||
// between the available backends. For example, in June 2022, on Nvidia Ampere
|
||||
// (RTX 30xx), Xenia had synchronization issues that resulted in flickering,
|
||||
// most prominently in 4D5307E6, on Direct3D 12 - but the same issue was not
|
||||
// reproducible in the Vulkan backend, however, it used ImageSampleExplicitLod
|
||||
// with explicit gradients for cubemaps, which triggered a different driver
|
||||
// bug on Nvidia (every 1 out of 2x2 pixels receiving junk).
|
||||
//
|
||||
// Specifically on Microsoft platforms, there are a few reasons why supporting
|
||||
// Direct3D 12 is desirable rather than limiting Xenia to Vulkan only:
|
||||
// - Wider hardware support for Direct3D 12 on x86 Windows desktops.
|
||||
// Direct3D 12 requires the minimum of Nvidia Fermi, or, with a pre-2021
|
||||
// driver version, Intel HD Graphics 4200. Vulkan, however, is supported
|
||||
// only starting with Nvidia Kepler and a much more recent Intel UHD
|
||||
// Graphics generation.
|
||||
// - Wider hardware support on other kinds of Microsoft devices. The Xbox One
|
||||
// and the Xbox Series X|S only support Direct3D as the GPU API in their UWP
|
||||
// runtime, and only version 12 can be granted expanded resource access.
|
||||
// Qualcomm, as of June 2022, also doesn't provide a Vulkan implementation
|
||||
// for their Arm-based Windows devices, while Direct3D 12 is available.
|
||||
// - Both older Intel GPUs and the Xbox One apparently, as well as earlier
|
||||
// Windows 10 versions, also require Shader Model 5.1 DXBC shaders rather
|
||||
// than Shader Model 6 DXIL ones, so a DXBC shader translator should be
|
||||
// available in Xenia too, a DXIL one doesn't fully replace it.
|
||||
// - As of June 2022, AMD also refuses to implement the
|
||||
// VK_EXT_fragment_shader_interlock Vulkan extension in their drivers, as
|
||||
// well as its OpenGL counterpart, which is heavily utilized for accurate
|
||||
// support of Xenos render target formats that don't have PC equivalents
|
||||
// (8_8_8_8_GAMMA, 2_10_10_10_FLOAT, 16_16 and 16_16_16_16 with -32 to 32
|
||||
// range, D24FS8) with correct blending. Direct3D 12, however, requires
|
||||
// support for similar functionality (rasterizer-ordered views) on the
|
||||
// feature level 12_1, and the AMD driver implements it on Direct3D, as well
|
||||
// as raster order groups in their Metal driver.
|
||||
//
|
||||
// Additionally, different host GPU APIs receive feature support at different
|
||||
// paces. VK_EXT_fragment_shader_interlock first appeared in 2019, for
|
||||
// instance, while Xenia had been taking advantage of rasterizer-ordered views
|
||||
// on Direct3D 12 for over half a year at that point (they have existed in
|
||||
// Direct3D 12 since the first version).
|
||||
//
|
||||
// MoltenVK on top Metal also has its flaws and limitations. Metal, for
|
||||
// instance, as of June 2022, doesn't provide a switch for primitive restart,
|
||||
// while Vulkan does - so MoltenVK is not completely transparent to Xenia,
|
||||
// many of its issues that may be not very obvious (unlike when the Metal API
|
||||
// is used directly) should be taken into account in Xenia. Also, as of June
|
||||
// 2022, MoltenVK translates SPIR-V shaders into the C++-based Metal Shading
|
||||
// Language rather than AIR directly, which likely massively increases
|
||||
// pipeline object creation time - and Xenia translates shaders and creates
|
||||
// pipelines when they're first actually used for a draw command by the game,
|
||||
// thus it can't precompile anything that hasn't ever been encountered before
|
||||
// there's already no time to waste.
|
||||
//
|
||||
// Very old hardware (Direct3D 10 level) is also not supported by most Vulkan
|
||||
// drivers. However, in the future, Xenia may be ported to it using the
|
||||
// Direct3D 11 API with the feature level 10_1 or 10_0. OpenGL, however, had
|
||||
// been lagging behind Direct3D prior to versions 4.x, and didn't receive
|
||||
// compute shaders until a 4.2 extension (while 4.2 already corresponds
|
||||
// roughly to Direct3D 11 features) - and replacing Xenia compute shaders with
|
||||
// transform feedback / stream output is not always trivial (in particular,
|
||||
// will need to rely on GL_ARB_transform_feedback3 for skipping over memory
|
||||
// locations that shouldn't be overwritten).
|
||||
//
|
||||
// For maintainability, as much implementation code as possible should be
|
||||
// placed in `xe::gpu` and shared between the backends rather than duplicated
|
||||
// between them.
|
||||
Factory<gpu::GraphicsSystem> factory;
|
||||
#if XE_PLATFORM_WIN32
|
||||
factory.Add<gpu::d3d12::D3D12GraphicsSystem>("d3d12");
|
||||
#endif // XE_PLATFORM_WIN32
|
||||
//factory.Add<gpu::vulkan::VulkanGraphicsSystem>("vulkan");
|
||||
factory.Add<gpu::vulkan::VulkanGraphicsSystem>("vulkan");
|
||||
factory.Add<gpu::null::NullGraphicsSystem>("null");
|
||||
return factory.Create(cvars::gpu);
|
||||
}
|
||||
|
|
|
@ -11,20 +11,14 @@ project("xenia-gpu")
|
|||
"fmt",
|
||||
"glslang-spirv",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"xenia-base",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
"xxhash",
|
||||
})
|
||||
defines({
|
||||
})
|
||||
includedirs({
|
||||
project_root.."/third_party/spirv-tools/external/include",
|
||||
project_root.."/third_party/Vulkan-Headers/include",
|
||||
})
|
||||
local_platform_files()
|
||||
-- local_platform_files("spirv")
|
||||
-- local_platform_files("spirv/passes")
|
||||
|
||||
group("src")
|
||||
project("xenia-gpu-shader-compiler")
|
||||
|
@ -36,13 +30,13 @@ project("xenia-gpu-shader-compiler")
|
|||
"fmt",
|
||||
"glslang-spirv",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"xenia-base",
|
||||
"xenia-gpu",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
"xenia-ui-vulkan",
|
||||
})
|
||||
defines({
|
||||
includedirs({
|
||||
project_root.."/third_party/Vulkan-Headers/include",
|
||||
})
|
||||
files({
|
||||
"shader_compiler_main.cc",
|
||||
|
|
|
@ -807,6 +807,9 @@ class Shader {
|
|||
Translation(Shader& shader, uint64_t modification)
|
||||
: shader_(shader), modification_(modification) {}
|
||||
|
||||
// If there was some failure during preparation on the implementation side.
|
||||
void MakeInvalid() { is_valid_ = false; }
|
||||
|
||||
private:
|
||||
friend class Shader;
|
||||
friend class ShaderTranslator;
|
||||
|
|
|
@ -9,9 +9,12 @@
|
|||
|
||||
#include <cinttypes>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "third_party/glslang/SPIRV/disassemble.h"
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/console_app_main.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
|
@ -23,7 +26,7 @@
|
|||
#include "xenia/gpu/shader_translator.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/spirv/spirv_disassembler.h"
|
||||
#include "xenia/ui/vulkan/spirv_tools_context.h"
|
||||
|
||||
// For D3DDisassemble:
|
||||
#if XE_PLATFORM_WIN32
|
||||
|
@ -118,9 +121,10 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
|||
shader->AnalyzeUcode(ucode_disasm_buffer);
|
||||
|
||||
std::unique_ptr<ShaderTranslator> translator;
|
||||
SpirvShaderTranslator::Features spirv_features(true);
|
||||
if (cvars::shader_output_type == "spirv" ||
|
||||
cvars::shader_output_type == "spirvtext") {
|
||||
translator = std::make_unique<SpirvShaderTranslator>();
|
||||
translator = std::make_unique<SpirvShaderTranslator>(spirv_features);
|
||||
} else if (cvars::shader_output_type == "dxbc" ||
|
||||
cvars::shader_output_type == "dxbctext") {
|
||||
translator = std::make_unique<DxbcShaderTranslator>(
|
||||
|
@ -183,13 +187,30 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
|||
const void* source_data = translation->translated_binary().data();
|
||||
size_t source_data_size = translation->translated_binary().size();
|
||||
|
||||
std::unique_ptr<xe::ui::spirv::SpirvDisassembler::Result> spirv_disasm_result;
|
||||
std::string spirv_disasm;
|
||||
if (cvars::shader_output_type == "spirvtext") {
|
||||
// Disassemble SPIRV.
|
||||
spirv_disasm_result = xe::ui::spirv::SpirvDisassembler().Disassemble(
|
||||
reinterpret_cast<const uint32_t*>(source_data), source_data_size / 4);
|
||||
source_data = spirv_disasm_result->text();
|
||||
source_data_size = std::strlen(spirv_disasm_result->text()) + 1;
|
||||
std::ostringstream spirv_disasm_stream;
|
||||
std::vector<unsigned int> spirv_source;
|
||||
spirv_source.reserve(source_data_size / sizeof(unsigned int));
|
||||
spirv_source.insert(spirv_source.cend(),
|
||||
reinterpret_cast<const unsigned int*>(source_data),
|
||||
reinterpret_cast<const unsigned int*>(source_data) +
|
||||
source_data_size / sizeof(unsigned int));
|
||||
spv::Disassemble(spirv_disasm_stream, spirv_source);
|
||||
spirv_disasm = std::move(spirv_disasm_stream.str());
|
||||
ui::vulkan::SpirvToolsContext spirv_tools_context;
|
||||
if (spirv_tools_context.Initialize(spirv_features.spirv_version)) {
|
||||
std::string spirv_validation_error;
|
||||
spirv_tools_context.Validate(
|
||||
reinterpret_cast<const uint32_t*>(spirv_source.data()),
|
||||
spirv_source.size(), &spirv_validation_error);
|
||||
if (!spirv_validation_error.empty()) {
|
||||
spirv_disasm.append(1, '\n');
|
||||
spirv_disasm.append(spirv_validation_error);
|
||||
}
|
||||
}
|
||||
source_data = spirv_disasm.c_str();
|
||||
source_data_size = spirv_disasm.size();
|
||||
}
|
||||
#if XE_PLATFORM_WIN32
|
||||
ID3DBlob* dxbc_disasm_blob = nullptr;
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/spirv/compiler.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace spirv {
|
||||
|
||||
Compiler::Compiler() {}
|
||||
|
||||
void Compiler::AddPass(std::unique_ptr<CompilerPass> pass) {
|
||||
compiler_passes_.push_back(std::move(pass));
|
||||
}
|
||||
|
||||
bool Compiler::Compile(spv::Module* module) {
|
||||
for (auto& pass : compiler_passes_) {
|
||||
if (!pass->Run(module)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Compiler::Reset() { compiler_passes_.clear(); }
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -1,41 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SPIRV_COMPILER_H_
|
||||
#define XENIA_GPU_SPIRV_COMPILER_H_
|
||||
|
||||
#include "xenia/base/arena.h"
|
||||
#include "xenia/gpu/spirv/compiler_pass.h"
|
||||
|
||||
#include "third_party/glslang-spirv/SpvBuilder.h"
|
||||
#include "third_party/spirv/GLSL.std.450.hpp11"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace spirv {
|
||||
|
||||
// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the
|
||||
// drivers.
|
||||
class Compiler {
|
||||
public:
|
||||
Compiler();
|
||||
|
||||
void AddPass(std::unique_ptr<CompilerPass> pass);
|
||||
void Reset();
|
||||
bool Compile(spv::Module* module);
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<CompilerPass>> compiler_passes_;
|
||||
};
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_SPIRV_COMPILER_H_
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_
|
||||
#define XENIA_GPU_SPIRV_COMPILER_PASS_H_
|
||||
|
||||
#include "xenia/base/arena.h"
|
||||
|
||||
#include "third_party/glslang-spirv/SpvBuilder.h"
|
||||
#include "third_party/spirv/GLSL.std.450.hpp11"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace spirv {
|
||||
|
||||
class CompilerPass {
|
||||
public:
|
||||
CompilerPass() = default;
|
||||
virtual ~CompilerPass() {}
|
||||
|
||||
virtual bool Run(spv::Module* module) = 0;
|
||||
|
||||
private:
|
||||
xe::Arena ir_arena_;
|
||||
};
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif
|
|
@ -1,30 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace spirv {
|
||||
|
||||
ControlFlowAnalysisPass::ControlFlowAnalysisPass() {}
|
||||
|
||||
bool ControlFlowAnalysisPass::Run(spv::Module* module) {
|
||||
for (auto function : module->getFunctions()) {
|
||||
// For each OpBranchConditional, see if we can find a point where control
|
||||
// flow converges and then append an OpSelectionMerge.
|
||||
// Potential problems: while loops constructed from branch instructions
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
|
||||
#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
|
||||
|
||||
#include "xenia/gpu/spirv/compiler_pass.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace spirv {
|
||||
|
||||
// Control-flow analysis pass. Runs through control-flow and adds merge opcodes
|
||||
// where necessary.
|
||||
class ControlFlowAnalysisPass : public CompilerPass {
|
||||
public:
|
||||
ControlFlowAnalysisPass();
|
||||
|
||||
bool Run(spv::Module* module) override;
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
|
|
@ -1,48 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace spirv {
|
||||
|
||||
ControlFlowSimplificationPass::ControlFlowSimplificationPass() {}
|
||||
|
||||
bool ControlFlowSimplificationPass::Run(spv::Module* module) {
|
||||
for (auto function : module->getFunctions()) {
|
||||
// Walk through the blocks in the function and merge any blocks which are
|
||||
// unconditionally dominated.
|
||||
for (auto it = function->getBlocks().end() - 1;
|
||||
it != function->getBlocks().begin();) {
|
||||
auto block = *it;
|
||||
if (!block->isUnreachable() && block->getPredecessors().size() == 1) {
|
||||
auto prev_block = block->getPredecessors()[0];
|
||||
auto last_instr =
|
||||
prev_block->getInstruction(prev_block->getInstructionCount() - 1);
|
||||
if (last_instr->getOpCode() == spv::Op::OpBranch) {
|
||||
if (prev_block->getSuccessors().size() == 1 &&
|
||||
prev_block->getSuccessors()[0] == block) {
|
||||
// We're dominated by this block. Merge into it.
|
||||
prev_block->merge(block);
|
||||
block->setUnreachable();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
--it;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
|
||||
#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
|
||||
|
||||
#include "xenia/gpu/spirv/compiler_pass.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace spirv {
|
||||
|
||||
// Control-flow simplification pass. Combines adjacent blocks and marks
|
||||
// any unreachable blocks.
|
||||
class ControlFlowSimplificationPass : public CompilerPass {
|
||||
public:
|
||||
ControlFlowSimplificationPass();
|
||||
|
||||
bool Run(spv::Module* module) override;
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/spirv_shader.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
SpirvShader::SpirvShader(xenos::ShaderType shader_type,
|
||||
uint64_t ucode_data_hash, const uint32_t* ucode_dwords,
|
||||
size_t ucode_dword_count,
|
||||
std::endian ucode_source_endian)
|
||||
: Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count,
|
||||
ucode_source_endian) {}
|
||||
|
||||
Shader::Translation* SpirvShader::CreateTranslationInstance(
|
||||
uint64_t modification) {
|
||||
return new SpirvTranslation(*this, modification);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SPIRV_SHADER_H_
|
||||
#define XENIA_GPU_SPIRV_SHADER_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
class SpirvShader : public Shader {
|
||||
public:
|
||||
class SpirvTranslation : public Translation {
|
||||
public:
|
||||
explicit SpirvTranslation(SpirvShader& shader, uint64_t modification)
|
||||
: Translation(shader, modification) {}
|
||||
};
|
||||
|
||||
explicit SpirvShader(xenos::ShaderType shader_type, uint64_t ucode_data_hash,
|
||||
const uint32_t* ucode_dwords, size_t ucode_dword_count,
|
||||
std::endian ucode_source_endian = std::endian::big);
|
||||
|
||||
// Resource bindings are gathered after the successful translation of any
|
||||
// modification for simplicity of translation (and they don't depend on
|
||||
// modification bits).
|
||||
|
||||
struct TextureBinding {
|
||||
uint32_t fetch_constant : 5;
|
||||
// Stacked and 3D are separate TextureBindings.
|
||||
xenos::FetchOpDimension dimension : 2;
|
||||
uint32_t is_signed : 1;
|
||||
};
|
||||
// Safe to hash and compare with memcmp for layout hashing.
|
||||
const std::vector<TextureBinding>& GetTextureBindingsAfterTranslation()
|
||||
const {
|
||||
return texture_bindings_;
|
||||
}
|
||||
const uint32_t GetUsedTextureMaskAfterTranslation() const {
|
||||
return used_texture_mask_;
|
||||
}
|
||||
|
||||
struct SamplerBinding {
|
||||
uint32_t fetch_constant : 5;
|
||||
xenos::TextureFilter mag_filter : 2;
|
||||
xenos::TextureFilter min_filter : 2;
|
||||
xenos::TextureFilter mip_filter : 2;
|
||||
xenos::AnisoFilter aniso_filter : 3;
|
||||
};
|
||||
const std::vector<SamplerBinding>& GetSamplerBindingsAfterTranslation()
|
||||
const {
|
||||
return sampler_bindings_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint64_t modification) override;
|
||||
|
||||
private:
|
||||
friend class SpirvShaderTranslator;
|
||||
|
||||
std::atomic_flag bindings_setup_entered_ = ATOMIC_FLAG_INIT;
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
uint32_t used_texture_mask_ = 0;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_SPIRV_SHADER_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -10,91 +10,274 @@
|
|||
#ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_
|
||||
#define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "third_party/glslang-spirv/SpvBuilder.h"
|
||||
#include "third_party/spirv/GLSL.std.450.hpp11"
|
||||
#include "third_party/glslang/SPIRV/SpvBuilder.h"
|
||||
#include "xenia/gpu/shader_translator.h"
|
||||
#include "xenia/ui/spirv/spirv_disassembler.h"
|
||||
#include "xenia/ui/spirv/spirv_validator.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
// Push constants embedded within the command buffer.
|
||||
// The total size of this struct must be <= 128b (as that's the commonly
|
||||
// supported size).
|
||||
struct SpirvPushConstants {
|
||||
// Accessible to vertex shader only:
|
||||
float window_scale[4]; // scale x/y, offset x/y (pixels)
|
||||
float vtx_fmt[4];
|
||||
|
||||
// Accessible to geometry shader only:
|
||||
float point_size[4]; // psx, psy, unused, unused
|
||||
|
||||
// Accessible to fragment shader only:
|
||||
float alpha_test[4]; // alpha test enable, func, ref
|
||||
float color_exp_bias[4];
|
||||
uint32_t ps_param_gen;
|
||||
};
|
||||
static_assert(sizeof(SpirvPushConstants) <= 128,
|
||||
"Push constants must fit <= 128b");
|
||||
constexpr uint32_t kSpirvPushConstantVertexRangeOffset = 0;
|
||||
constexpr uint32_t kSpirvPushConstantVertexRangeSize = (sizeof(float) * 4) * 2;
|
||||
constexpr uint32_t kSpirvPushConstantGeometryRangeOffset =
|
||||
kSpirvPushConstantVertexRangeOffset + kSpirvPushConstantVertexRangeSize;
|
||||
constexpr uint32_t kSpirvPushConstantGeometryRangeSize = (sizeof(float) * 4);
|
||||
constexpr uint32_t kSpirvPushConstantFragmentRangeOffset =
|
||||
kSpirvPushConstantGeometryRangeOffset + kSpirvPushConstantGeometryRangeSize;
|
||||
constexpr uint32_t kSpirvPushConstantFragmentRangeSize =
|
||||
(sizeof(float) * 4) + sizeof(uint32_t);
|
||||
constexpr uint32_t kSpirvPushConstantsSize = sizeof(SpirvPushConstants);
|
||||
|
||||
class SpirvShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
SpirvShaderTranslator();
|
||||
~SpirvShaderTranslator() override;
|
||||
union Modification {
|
||||
// If anything in this is structure is changed in a way not compatible with
|
||||
// the previous layout, invalidate the pipeline storages by increasing this
|
||||
// version number (0xYYYYMMDD)!
|
||||
// TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid
|
||||
// prototyping stage (easier to do small granular updates with an
|
||||
// incremental counter).
|
||||
static constexpr uint32_t kVersion = 4;
|
||||
|
||||
enum class DepthStencilMode : uint32_t {
|
||||
kNoModifiers,
|
||||
// Early fragment tests - enable if alpha test and alpha to coverage are
|
||||
// disabled; ignored if anything in the shader blocks early Z writing.
|
||||
kEarlyHint,
|
||||
// TODO(Triang3l): Unorm24 (rounding) and float24 (truncating and
|
||||
// rounding) output modes.
|
||||
};
|
||||
|
||||
struct {
|
||||
// Dynamically indexable register count from SQ_PROGRAM_CNTL.
|
||||
uint32_t dynamic_addressable_register_count : 8;
|
||||
// Pipeline stage and input configuration.
|
||||
Shader::HostVertexShaderType host_vertex_shader_type
|
||||
: Shader::kHostVertexShaderTypeBitCount;
|
||||
} vertex;
|
||||
struct PixelShaderModification {
|
||||
// Dynamically indexable register count from SQ_PROGRAM_CNTL.
|
||||
uint32_t dynamic_addressable_register_count : 8;
|
||||
uint32_t param_gen_enable : 1;
|
||||
uint32_t param_gen_interpolator : 4;
|
||||
// If param_gen_enable is set, this must be set for point primitives, and
|
||||
// must not be set for other primitive types - enables the point sprite
|
||||
// coordinates input, and also effects the flag bits in PsParamGen.
|
||||
uint32_t param_gen_point : 1;
|
||||
// For host render targets - depth / stencil output mode.
|
||||
DepthStencilMode depth_stencil_mode : 3;
|
||||
} pixel;
|
||||
uint64_t value = 0;
|
||||
|
||||
Modification(uint64_t modification_value = 0) : value(modification_value) {}
|
||||
};
|
||||
|
||||
enum : uint32_t {
|
||||
kSysFlag_XYDividedByW_Shift,
|
||||
kSysFlag_ZDividedByW_Shift,
|
||||
kSysFlag_WNotReciprocal_Shift,
|
||||
kSysFlag_PrimitivePolygonal_Shift,
|
||||
kSysFlag_PrimitiveLine_Shift,
|
||||
kSysFlag_AlphaPassIfLess_Shift,
|
||||
kSysFlag_AlphaPassIfEqual_Shift,
|
||||
kSysFlag_AlphaPassIfGreater_Shift,
|
||||
kSysFlag_ConvertColor0ToGamma_Shift,
|
||||
kSysFlag_ConvertColor1ToGamma_Shift,
|
||||
kSysFlag_ConvertColor2ToGamma_Shift,
|
||||
kSysFlag_ConvertColor3ToGamma_Shift,
|
||||
|
||||
kSysFlag_Count,
|
||||
|
||||
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
|
||||
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
|
||||
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
|
||||
kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift,
|
||||
kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift,
|
||||
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
|
||||
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
|
||||
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
|
||||
kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift,
|
||||
kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift,
|
||||
kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift,
|
||||
kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift,
|
||||
};
|
||||
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
|
||||
|
||||
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
|
||||
// - SystemConstantIndex enum.
|
||||
// - Structure members in BeginTranslation.
|
||||
struct SystemConstants {
|
||||
uint32_t flags;
|
||||
xenos::Endian vertex_index_endian;
|
||||
int32_t vertex_base_index;
|
||||
uint32_t padding_vertex_base_index;
|
||||
|
||||
float ndc_scale[3];
|
||||
uint32_t padding_ndc_scale;
|
||||
|
||||
float ndc_offset[3];
|
||||
uint32_t padding_ndc_offset;
|
||||
|
||||
// Each byte contains post-swizzle TextureSign values for each of the needed
|
||||
// components of each of the 32 used texture fetch constants.
|
||||
uint32_t texture_swizzled_signs[8];
|
||||
|
||||
// If the imageViewFormatSwizzle portability subset is not supported, the
|
||||
// component swizzle (taking both guest and host swizzles into account) to
|
||||
// apply to the result directly in the shader code. In each uint32_t,
|
||||
// swizzles for 2 texture fetch constants (in bits 0:11 and 12:23).
|
||||
uint32_t texture_swizzles[16];
|
||||
|
||||
float alpha_test_reference;
|
||||
float padding_alpha_test_reference[3];
|
||||
|
||||
float color_exp_bias[4];
|
||||
};
|
||||
|
||||
// The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for
|
||||
// maxStorageBufferRange it's 128 MB. These are the values of those limits on
|
||||
// Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound,
|
||||
// therefore SSBOs must only be used for shared memory - all other storage
|
||||
// resources must be images or texel buffers.
|
||||
enum DescriptorSet : uint32_t {
|
||||
// According to the "Pipeline Layout Compatibility" section of the Vulkan
|
||||
// specification:
|
||||
// "Two pipeline layouts are defined to be "compatible for set N" if they
|
||||
// were created with identically defined descriptor set layouts for sets
|
||||
// zero through N, and if they were created with identical push constant
|
||||
// ranges."
|
||||
// "Place the least frequently changing descriptor sets near the start of
|
||||
// the pipeline layout, and place the descriptor sets representing the most
|
||||
// frequently changing resources near the end. When pipelines are switched,
|
||||
// only the descriptor set bindings that have been invalidated will need to
|
||||
// be updated and the remainder of the descriptor set bindings will remain
|
||||
// in place."
|
||||
// This is partially the reverse of the Direct3D 12's rule of placing the
|
||||
// most frequently changed descriptor sets in the beginning. Here all
|
||||
// descriptor sets with an immutable layout are placed first, in reverse
|
||||
// frequency of changing, and sets that may be different for different
|
||||
// pipeline states last.
|
||||
|
||||
// Always the same descriptor set layouts for all pipeline layouts:
|
||||
|
||||
// Never changed.
|
||||
kDescriptorSetSharedMemoryAndEdram,
|
||||
// Pretty rarely used and rarely changed - flow control constants.
|
||||
kDescriptorSetBoolLoopConstants,
|
||||
// May stay the same across many draws.
|
||||
kDescriptorSetSystemConstants,
|
||||
// Less frequently changed (per-material).
|
||||
kDescriptorSetFloatConstantsPixel,
|
||||
// Quite frequently changed (for one object drawn multiple times, for
|
||||
// instance - may contain projection matrices).
|
||||
kDescriptorSetFloatConstantsVertex,
|
||||
// Very frequently changed, especially for UI draws, and for models drawn in
|
||||
// multiple parts - contains vertex and texture fetch constants.
|
||||
kDescriptorSetFetchConstants,
|
||||
|
||||
// Mutable part of the pipeline layout:
|
||||
kDescriptorSetMutableLayoutsStart,
|
||||
|
||||
// Rarely used at all, but may be changed at an unpredictable rate when
|
||||
// vertex textures are used.
|
||||
kDescriptorSetSamplersVertex = kDescriptorSetMutableLayoutsStart,
|
||||
kDescriptorSetTexturesVertex,
|
||||
// Per-material textures.
|
||||
kDescriptorSetSamplersPixel,
|
||||
kDescriptorSetTexturesPixel,
|
||||
kDescriptorSetCount,
|
||||
};
|
||||
|
||||
// "Xenia Emulator Microcode Translator".
|
||||
// https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79
|
||||
static constexpr uint32_t kSpirvMagicToolId = 26;
|
||||
|
||||
struct Features {
|
||||
explicit Features(const ui::vulkan::VulkanProvider& provider);
|
||||
explicit Features(bool all = false);
|
||||
unsigned int spirv_version;
|
||||
uint32_t max_storage_buffer_range;
|
||||
bool clip_distance;
|
||||
bool cull_distance;
|
||||
bool image_view_format_swizzle;
|
||||
bool signed_zero_inf_nan_preserve_float32;
|
||||
bool denorm_flush_to_zero_float32;
|
||||
};
|
||||
SpirvShaderTranslator(const Features& features);
|
||||
|
||||
// Not storing anything else in modifications (as this shader translator is
|
||||
// being replaced anyway).
|
||||
uint64_t GetDefaultVertexShaderModification(
|
||||
uint32_t dynamic_addressable_register_count,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
Shader::HostVertexShaderType::kVertex) const override {
|
||||
return dynamic_addressable_register_count;
|
||||
}
|
||||
Shader::HostVertexShaderType::kVertex) const override;
|
||||
uint64_t GetDefaultPixelShaderModification(
|
||||
uint32_t dynamic_addressable_register_count) const override {
|
||||
return dynamic_addressable_register_count;
|
||||
uint32_t dynamic_addressable_register_count) const override;
|
||||
|
||||
static constexpr uint32_t GetSharedMemoryStorageBufferCountLog2(
|
||||
uint32_t max_storage_buffer_range) {
|
||||
if (max_storage_buffer_range >= 512 * 1024 * 1024) {
|
||||
return 0;
|
||||
}
|
||||
if (max_storage_buffer_range >= 256 * 1024 * 1024) {
|
||||
return 1;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
uint32_t GetSharedMemoryStorageBufferCountLog2() const {
|
||||
return GetSharedMemoryStorageBufferCountLog2(
|
||||
features_.max_storage_buffer_range);
|
||||
}
|
||||
|
||||
// Common functions useful not only for the translator, but also for EDRAM
|
||||
// emulation via conventional render targets.
|
||||
|
||||
// Converts the color value externally clamped to [0, 31.875] to 7e3 floating
|
||||
// point, with zeros in bits 10:31, rounding to the nearest even.
|
||||
static spv::Id PreClampedFloat32To7e3(spv::Builder& builder,
|
||||
spv::Id f32_scalar,
|
||||
spv::Id ext_inst_glsl_std_450);
|
||||
// Same as PreClampedFloat32To7e3, but clamps the input to [0, 31.875].
|
||||
static spv::Id UnclampedFloat32To7e3(spv::Builder& builder,
|
||||
spv::Id f32_scalar,
|
||||
spv::Id ext_inst_glsl_std_450);
|
||||
// Converts the 7e3 number in bits [f10_shift, f10_shift + 10) to a 32-bit
|
||||
// float.
|
||||
static spv::Id Float7e3To32(spv::Builder& builder, spv::Id f10_uint_scalar,
|
||||
uint32_t f10_shift, bool result_as_uint,
|
||||
spv::Id ext_inst_glsl_std_450);
|
||||
// Converts the depth value externally clamped to the representable [0, 2)
|
||||
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the
|
||||
// nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed
|
||||
// that 0...1 is pre-remapped to 0...0.5 in the input.
|
||||
static spv::Id PreClampedDepthTo20e4(spv::Builder& builder,
|
||||
spv::Id f32_scalar,
|
||||
bool round_to_nearest_even,
|
||||
bool remap_from_0_to_0_5,
|
||||
spv::Id ext_inst_glsl_std_450);
|
||||
// Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit
|
||||
// float.
|
||||
static spv::Id Depth20e4To32(spv::Builder& builder, spv::Id f24_uint_scalar,
|
||||
uint32_t f24_shift, bool remap_to_0_to_0_5,
|
||||
bool result_as_uint,
|
||||
spv::Id ext_inst_glsl_std_450);
|
||||
|
||||
protected:
|
||||
uint32_t GetModificationRegisterCount() const override {
|
||||
return uint32_t(current_translation().modification());
|
||||
}
|
||||
void Reset() override;
|
||||
|
||||
uint32_t GetModificationRegisterCount() const override;
|
||||
|
||||
void StartTranslation() override;
|
||||
|
||||
std::vector<uint8_t> CompleteTranslation() override;
|
||||
|
||||
void PostTranslation() override;
|
||||
|
||||
void PreProcessControlFlowInstructions(
|
||||
std::vector<ucode::ControlFlowInstruction> instrs) override;
|
||||
void ProcessLabel(uint32_t cf_index) override;
|
||||
void ProcessControlFlowInstructionBegin(uint32_t cf_index) override;
|
||||
void ProcessControlFlowInstructionEnd(uint32_t cf_index) override;
|
||||
void ProcessControlFlowNopInstruction(uint32_t cf_index) override;
|
||||
|
||||
void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override;
|
||||
void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override;
|
||||
void ProcessLoopStartInstruction(
|
||||
const ParsedLoopStartInstruction& instr) override;
|
||||
void ProcessLoopEndInstruction(
|
||||
const ParsedLoopEndInstruction& instr) override;
|
||||
void ProcessCallInstruction(const ParsedCallInstruction& instr) override;
|
||||
void ProcessReturnInstruction(const ParsedReturnInstruction& instr) override;
|
||||
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
|
||||
void ProcessAllocInstruction(const ParsedAllocInstruction& instr) override;
|
||||
|
||||
void ProcessVertexFetchInstruction(
|
||||
const ParsedVertexFetchInstruction& instr) override;
|
||||
void ProcessTextureFetchInstruction(
|
||||
|
@ -102,99 +285,374 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
|
||||
|
||||
private:
|
||||
spv::Function* CreateCubeFunction();
|
||||
struct TextureBinding {
|
||||
uint32_t fetch_constant;
|
||||
// Stacked and 3D are separate TextureBindings.
|
||||
xenos::FetchOpDimension dimension;
|
||||
bool is_signed;
|
||||
|
||||
bool ProcessVectorAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& close_predicate_block);
|
||||
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& close_predicate_block);
|
||||
|
||||
spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed,
|
||||
uint32_t offset, uint32_t count);
|
||||
spv::Id ConvertNormVar(spv::Id var, spv::Id result_type, uint32_t bits,
|
||||
bool is_signed);
|
||||
|
||||
// Creates a call to the given GLSL intrinsic.
|
||||
spv::Id CreateGlslStd450InstructionCall(spv::Decoration precision,
|
||||
spv::Id result_type,
|
||||
spv::GLSLstd450 instruction_ordinal,
|
||||
std::vector<spv::Id> args);
|
||||
|
||||
// Loads an operand into a value.
|
||||
// The value returned will be in the form described in the operand (number of
|
||||
// components, etc).
|
||||
spv::Id LoadFromOperand(const InstructionOperand& op);
|
||||
// Stores a value based on the specified result information.
|
||||
// The value will be transformed into the appropriate form for the result and
|
||||
// the proper components will be selected.
|
||||
void StoreToResult(spv::Id source_value_id, const InstructionResult& result);
|
||||
|
||||
xe::ui::spirv::SpirvDisassembler disassembler_;
|
||||
xe::ui::spirv::SpirvValidator validator_;
|
||||
|
||||
// True if there's an open predicated block
|
||||
bool open_predicated_block_ = false;
|
||||
bool predicated_block_cond_ = false;
|
||||
spv::Block* predicated_block_end_ = nullptr;
|
||||
|
||||
// Exec block conditional?
|
||||
bool exec_cond_ = false;
|
||||
spv::Block* exec_skip_block_ = nullptr;
|
||||
|
||||
// TODO(benvanik): replace with something better, make reusable, etc.
|
||||
std::unique_ptr<spv::Builder> builder_;
|
||||
spv::Id glsl_std_450_instruction_set_ = 0;
|
||||
|
||||
// Generated function
|
||||
spv::Function* translated_main_ = nullptr;
|
||||
spv::Function* cube_function_ = nullptr;
|
||||
|
||||
// Types.
|
||||
spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0;
|
||||
spv::Id vec2_int_type_ = 0, vec2_uint_type_ = 0, vec3_int_type_ = 0;
|
||||
spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0;
|
||||
spv::Id vec4_int_type_ = 0, vec4_uint_type_ = 0;
|
||||
spv::Id vec2_bool_type_ = 0, vec3_bool_type_ = 0, vec4_bool_type_ = 0;
|
||||
spv::Id image_2d_type_ = 0, image_3d_type_ = 0, image_cube_type_ = 0;
|
||||
|
||||
// Constants.
|
||||
spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0;
|
||||
|
||||
// Array of AMD registers.
|
||||
// These values are all pointers.
|
||||
spv::Id registers_ptr_ = 0, registers_type_ = 0;
|
||||
spv::Id consts_ = 0, a0_ = 0, p0_ = 0;
|
||||
spv::Id aL_ = 0; // Loop index stack - .x is active loop
|
||||
spv::Id loop_count_ = 0; // Loop counter stack
|
||||
spv::Id ps_ = 0, pv_ = 0; // IDs of previous results
|
||||
spv::Id pc_ = 0; // Program counter
|
||||
spv::Id lod_ = 0; // LOD register
|
||||
spv::Id pos_ = 0;
|
||||
spv::Id push_consts_ = 0;
|
||||
spv::Id interpolators_ = 0;
|
||||
spv::Id point_size_ = 0;
|
||||
spv::Id point_coord_ = 0;
|
||||
spv::Id vertex_idx_ = 0;
|
||||
spv::Id frag_outputs_ = 0, frag_depth_ = 0;
|
||||
spv::Id samplers_ = 0;
|
||||
spv::Id tex_[3] = {0}; // Images {2D, 3D, Cube}
|
||||
std::unordered_map<uint32_t, uint32_t> tex_binding_map_;
|
||||
spv::Id vtx_ = 0; // Vertex buffer array (32 runtime arrays)
|
||||
std::unordered_map<uint32_t, uint32_t> vtx_binding_map_;
|
||||
|
||||
// SPIR-V IDs that are part of the in/out interface.
|
||||
std::vector<spv::Id> interface_ids_;
|
||||
|
||||
struct CFBlock {
|
||||
spv::Block* block = nullptr;
|
||||
bool labelled = false;
|
||||
spv::Id variable;
|
||||
};
|
||||
std::vector<CFBlock> cf_blocks_;
|
||||
spv::Block* switch_break_block_ = nullptr;
|
||||
spv::Block* loop_head_block_ = nullptr;
|
||||
spv::Block* loop_body_block_ = nullptr;
|
||||
spv::Block* loop_cont_block_ = nullptr;
|
||||
spv::Block* loop_exit_block_ = nullptr;
|
||||
|
||||
struct SamplerBinding {
|
||||
uint32_t fetch_constant;
|
||||
xenos::TextureFilter mag_filter;
|
||||
xenos::TextureFilter min_filter;
|
||||
xenos::TextureFilter mip_filter;
|
||||
xenos::AnisoFilter aniso_filter;
|
||||
|
||||
spv::Id variable;
|
||||
};
|
||||
|
||||
// Builder helpers.
|
||||
spv::Id SpirvSmearScalarResultOrConstant(spv::Id scalar, spv::Id vector_type);
|
||||
void SpirvCreateSelectionMerge(
|
||||
spv::Id merge_block_id, spv::SelectionControlMask selection_control_mask =
|
||||
spv::SelectionControlMaskNone) {
|
||||
std::unique_ptr<spv::Instruction> selection_merge_op =
|
||||
std::make_unique<spv::Instruction>(spv::OpSelectionMerge);
|
||||
selection_merge_op->addIdOperand(merge_block_id);
|
||||
selection_merge_op->addImmediateOperand(selection_control_mask);
|
||||
builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op));
|
||||
}
|
||||
|
||||
Modification GetSpirvShaderModification() const {
|
||||
return Modification(current_translation().modification());
|
||||
}
|
||||
|
||||
bool IsSpirvVertexShader() const {
|
||||
return is_vertex_shader() &&
|
||||
!Shader::IsHostVertexShaderTypeDomain(
|
||||
GetSpirvShaderModification().vertex.host_vertex_shader_type);
|
||||
}
|
||||
bool IsSpirvTessEvalShader() const {
|
||||
return is_vertex_shader() &&
|
||||
Shader::IsHostVertexShaderTypeDomain(
|
||||
GetSpirvShaderModification().vertex.host_vertex_shader_type);
|
||||
}
|
||||
|
||||
bool IsExecutionModeEarlyFragmentTests() const {
|
||||
// TODO(Triang3l): Not applicable to fragment shader interlock.
|
||||
return is_pixel_shader() &&
|
||||
GetSpirvShaderModification().pixel.depth_stencil_mode ==
|
||||
Modification::DepthStencilMode::kEarlyHint &&
|
||||
current_shader().implicit_early_z_write_allowed();
|
||||
}
|
||||
|
||||
// Returns UINT32_MAX if PsParamGen doesn't need to be written.
|
||||
uint32_t GetPsParamGenInterpolator() const;
|
||||
|
||||
// Must be called before emitting any SPIR-V operations that must be in a
|
||||
// block in translator callbacks to ensure that if the last instruction added
|
||||
// was something like OpBranch - in this case, an unreachable block is
|
||||
// created.
|
||||
void EnsureBuildPointAvailable();
|
||||
|
||||
void StartVertexOrTessEvalShaderBeforeMain();
|
||||
void StartVertexOrTessEvalShaderInMain();
|
||||
void CompleteVertexOrTessEvalShaderInMain();
|
||||
|
||||
void StartFragmentShaderBeforeMain();
|
||||
void StartFragmentShaderInMain();
|
||||
void CompleteFragmentShaderInMain();
|
||||
|
||||
// Updates the current flow control condition (to be called in the beginning
|
||||
// of exec and in jumps), closing the previous conditionals if needed.
|
||||
// However, if the condition is not different, the instruction-level predicate
|
||||
// conditional also won't be closed - this must be checked separately if
|
||||
// needed (for example, in jumps).
|
||||
void UpdateExecConditionals(ParsedExecInstruction::Type type,
|
||||
uint32_t bool_constant_index, bool condition);
|
||||
// Opens or reopens the predicate check conditional for the instruction.
|
||||
// Should be called before processing a non-control-flow instruction.
|
||||
void UpdateInstructionPredication(bool predicated, bool condition);
|
||||
// Closes the instruction-level predicate conditional if it's open, useful if
|
||||
// a control flow instruction needs to do some code which needs to respect the
|
||||
// current exec conditional, but can't itself be predicated.
|
||||
void CloseInstructionPredication();
|
||||
// Closes conditionals opened by exec and instructions within them (but not by
|
||||
// labels) and updates the state accordingly.
|
||||
void CloseExecConditionals();
|
||||
|
||||
spv::Id GetStorageAddressingIndex(
|
||||
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index,
|
||||
bool is_float_constant = false);
|
||||
// Loads unswizzled operand without sign modifiers as float4.
|
||||
spv::Id LoadOperandStorage(const InstructionOperand& operand);
|
||||
spv::Id ApplyOperandModifiers(spv::Id operand_value,
|
||||
const InstructionOperand& original_operand,
|
||||
bool invert_negate = false,
|
||||
bool force_absolute = false);
|
||||
// Returns the requested components, with the operand's swizzle applied, in a
|
||||
// condensed form, but without negation / absolute value modifiers. The
|
||||
// storage is float4, no matter what the component count of original_operand
|
||||
// is (the storage will be either r# or c#, but the instruction may be
|
||||
// scalar).
|
||||
spv::Id GetUnmodifiedOperandComponents(
|
||||
spv::Id operand_storage, const InstructionOperand& original_operand,
|
||||
uint32_t components);
|
||||
spv::Id GetOperandComponents(spv::Id operand_storage,
|
||||
const InstructionOperand& original_operand,
|
||||
uint32_t components, bool invert_negate = false,
|
||||
bool force_absolute = false) {
|
||||
return ApplyOperandModifiers(
|
||||
GetUnmodifiedOperandComponents(operand_storage, original_operand,
|
||||
components),
|
||||
original_operand, invert_negate, force_absolute);
|
||||
}
|
||||
// If components are identical, the same Id will be written to both outputs.
|
||||
void GetOperandScalarXY(spv::Id operand_storage,
|
||||
const InstructionOperand& original_operand,
|
||||
spv::Id& a_out, spv::Id& b_out,
|
||||
bool invert_negate = false,
|
||||
bool force_absolute = false);
|
||||
// Gets the absolute value of the loaded operand if it's not absolute already.
|
||||
spv::Id GetAbsoluteOperand(spv::Id operand_storage,
|
||||
const InstructionOperand& original_operand);
|
||||
// The type of the value must be a float vector consisting of
|
||||
// xe::bit_count(result.GetUsedResultComponents()) elements, or (to replicate
|
||||
// a scalar into all used components) float, or the value can be spv::NoResult
|
||||
// if there's no result to store (like constants only).
|
||||
void StoreResult(const InstructionResult& result, spv::Id value);
|
||||
|
||||
// For Shader Model 3 multiplication (+-0 or denormal * anything = +0),
|
||||
// replaces the value with +0 if the minimum of the two operands is 0. This
|
||||
// must be called with absolute values of operands - use GetAbsoluteOperand!
|
||||
spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs,
|
||||
spv::Id operand_1_abs);
|
||||
// Return type is a xe::bit_count(result.GetUsedResultComponents())-component
|
||||
// float vector or a single float, depending on whether it's a reduction
|
||||
// instruction (check getTypeId of the result), or returns spv::NoResult if
|
||||
// nothing to store.
|
||||
spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& predicate_written);
|
||||
// Returns a float value to write to the previous scalar register and to the
|
||||
// destination. If the return value is ps itself (in the retain_prev case),
|
||||
// returns spv::NoResult (handled as a special case, so if it's retain_prev,
|
||||
// but don't need to write to anywhere, no OpLoad(ps) will be done).
|
||||
spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& predicate_written);
|
||||
|
||||
// Perform endian swap of a uint scalar or vector.
|
||||
spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian);
|
||||
|
||||
spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int);
|
||||
|
||||
// The source may be a floating-point scalar or a vector.
|
||||
spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated);
|
||||
spv::Id LinearToPWLGamma(spv::Id linear, bool linear_pre_saturated);
|
||||
|
||||
size_t FindOrAddTextureBinding(uint32_t fetch_constant,
|
||||
xenos::FetchOpDimension dimension,
|
||||
bool is_signed);
|
||||
size_t FindOrAddSamplerBinding(uint32_t fetch_constant,
|
||||
xenos::TextureFilter mag_filter,
|
||||
xenos::TextureFilter min_filter,
|
||||
xenos::TextureFilter mip_filter,
|
||||
xenos::AnisoFilter aniso_filter);
|
||||
// `texture_parameters` need to be set up except for `sampler`, which will be
|
||||
// set internally, optionally doing linear interpolation between the an
|
||||
// existing value and the new one (the result location may be the same as for
|
||||
// the first lerp endpoint, but not across signedness).
|
||||
void SampleTexture(spv::Builder::TextureParameters& texture_parameters,
|
||||
spv::ImageOperandsMask image_operands_mask,
|
||||
spv::Id image_unsigned, spv::Id image_signed,
|
||||
spv::Id sampler, spv::Id is_all_signed,
|
||||
spv::Id is_any_signed, spv::Id& result_unsigned_out,
|
||||
spv::Id& result_signed_out,
|
||||
spv::Id lerp_factor = spv::NoResult,
|
||||
spv::Id lerp_first_unsigned = spv::NoResult,
|
||||
spv::Id lerp_first_signed = spv::NoResult);
|
||||
// `texture_parameters` need to be set up except for `sampler`, which will be
|
||||
// set internally.
|
||||
spv::Id QueryTextureLod(spv::Builder::TextureParameters& texture_parameters,
|
||||
spv::Id image_unsigned, spv::Id image_signed,
|
||||
spv::Id sampler, spv::Id is_all_signed);
|
||||
|
||||
Features features_;
|
||||
|
||||
std::unique_ptr<spv::Builder> builder_;
|
||||
|
||||
std::vector<spv::Id> id_vector_temp_;
|
||||
// For helper functions like operand loading, so they don't conflict with
|
||||
// id_vector_temp_ usage in bigger callbacks.
|
||||
std::vector<spv::Id> id_vector_temp_util_;
|
||||
std::vector<unsigned int> uint_vector_temp_;
|
||||
std::vector<unsigned int> uint_vector_temp_util_;
|
||||
|
||||
spv::Id ext_inst_glsl_std_450_;
|
||||
|
||||
spv::Id type_void_;
|
||||
|
||||
union {
|
||||
struct {
|
||||
spv::Id type_bool_;
|
||||
spv::Id type_bool2_;
|
||||
spv::Id type_bool3_;
|
||||
spv::Id type_bool4_;
|
||||
};
|
||||
// Index = component count - 1.
|
||||
spv::Id type_bool_vectors_[4];
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
spv::Id type_int_;
|
||||
spv::Id type_int2_;
|
||||
spv::Id type_int3_;
|
||||
spv::Id type_int4_;
|
||||
};
|
||||
spv::Id type_int_vectors_[4];
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
spv::Id type_uint_;
|
||||
spv::Id type_uint2_;
|
||||
spv::Id type_uint3_;
|
||||
spv::Id type_uint4_;
|
||||
};
|
||||
spv::Id type_uint_vectors_[4];
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
spv::Id type_float_;
|
||||
spv::Id type_float2_;
|
||||
spv::Id type_float3_;
|
||||
spv::Id type_float4_;
|
||||
};
|
||||
spv::Id type_float_vectors_[4];
|
||||
};
|
||||
|
||||
spv::Id const_int_0_;
|
||||
spv::Id const_int4_0_;
|
||||
spv::Id const_uint_0_;
|
||||
spv::Id const_uint4_0_;
|
||||
union {
|
||||
struct {
|
||||
spv::Id const_float_0_;
|
||||
spv::Id const_float2_0_;
|
||||
spv::Id const_float3_0_;
|
||||
spv::Id const_float4_0_;
|
||||
};
|
||||
spv::Id const_float_vectors_0_[4];
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
spv::Id const_float_1_;
|
||||
spv::Id const_float2_1_;
|
||||
spv::Id const_float3_1_;
|
||||
spv::Id const_float4_1_;
|
||||
};
|
||||
spv::Id const_float_vectors_1_[4];
|
||||
};
|
||||
// vec2(0.0, 1.0), to arbitrarily VectorShuffle non-constant and constant
|
||||
// components.
|
||||
spv::Id const_float2_0_1_;
|
||||
|
||||
enum SystemConstantIndex : unsigned int {
|
||||
kSystemConstantFlags,
|
||||
kSystemConstantIndexVertexIndexEndian,
|
||||
kSystemConstantIndexVertexBaseIndex,
|
||||
kSystemConstantNdcScale,
|
||||
kSystemConstantNdcOffset,
|
||||
kSystemConstantTextureSwizzledSigns,
|
||||
kSystemConstantTextureSwizzles,
|
||||
kSystemConstantAlphaTestReference,
|
||||
kSystemConstantColorExpBias,
|
||||
};
|
||||
spv::Id uniform_system_constants_;
|
||||
spv::Id uniform_float_constants_;
|
||||
spv::Id uniform_bool_loop_constants_;
|
||||
spv::Id uniform_fetch_constants_;
|
||||
|
||||
spv::Id buffers_shared_memory_;
|
||||
|
||||
// Not using combined images and samplers because
|
||||
// maxPerStageDescriptorSamplers is often lower than
|
||||
// maxPerStageDescriptorSampledImages, and for every fetch constant, there
|
||||
// are, for regular fetches, two bindings (unsigned and signed).
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
|
||||
// VS as VS only - int.
|
||||
spv::Id input_vertex_index_;
|
||||
// VS as TES only - int.
|
||||
spv::Id input_primitive_id_;
|
||||
// PS, only when needed - float4.
|
||||
spv::Id input_fragment_coord_;
|
||||
// PS, only when needed - bool.
|
||||
spv::Id input_front_facing_;
|
||||
|
||||
// In vertex or tessellation evaluation shaders - outputs, always
|
||||
// xenos::kMaxInterpolators.
|
||||
// In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()).
|
||||
spv::Id input_output_interpolators_[xenos::kMaxInterpolators];
|
||||
static const std::string kInterpolatorNamePrefix;
|
||||
|
||||
enum OutputPerVertexMember : unsigned int {
|
||||
kOutputPerVertexMemberPosition,
|
||||
kOutputPerVertexMemberCount,
|
||||
};
|
||||
spv::Id output_per_vertex_;
|
||||
|
||||
std::array<spv::Id, xenos::kMaxColorRenderTargets> output_fragment_data_;
|
||||
|
||||
std::vector<spv::Id> main_interface_;
|
||||
spv::Function* function_main_;
|
||||
spv::Id main_system_constant_flags_;
|
||||
// bool.
|
||||
spv::Id var_main_predicate_;
|
||||
// uint4.
|
||||
spv::Id var_main_loop_count_;
|
||||
// int4.
|
||||
spv::Id var_main_loop_address_;
|
||||
// int.
|
||||
spv::Id var_main_address_register_;
|
||||
// float.
|
||||
spv::Id var_main_previous_scalar_;
|
||||
// `base + index * stride` in dwords from the last vfetch_full as it may be
|
||||
// needed by vfetch_mini - int.
|
||||
spv::Id var_main_vfetch_address_;
|
||||
// float.
|
||||
spv::Id var_main_tfetch_lod_;
|
||||
// float3.
|
||||
spv::Id var_main_tfetch_gradients_h_;
|
||||
spv::Id var_main_tfetch_gradients_v_;
|
||||
// float4[register_count()].
|
||||
spv::Id var_main_registers_;
|
||||
// VS only - float3 (special exports).
|
||||
spv::Id var_main_point_size_edge_flag_kill_vertex_;
|
||||
spv::Block* main_loop_header_;
|
||||
spv::Block* main_loop_continue_;
|
||||
spv::Block* main_loop_merge_;
|
||||
spv::Id main_loop_pc_next_;
|
||||
spv::Block* main_switch_header_;
|
||||
std::unique_ptr<spv::Instruction> main_switch_op_;
|
||||
spv::Block* main_switch_merge_;
|
||||
std::vector<spv::Id> main_switch_next_pc_phi_operands_;
|
||||
|
||||
// If the exec bool constant / predicate conditional is open, block after it
|
||||
// (not added to the function yet).
|
||||
spv::Block* cf_exec_conditional_merge_;
|
||||
// If the instruction-level predicate conditional is open, block after it (not
|
||||
// added to the function yet).
|
||||
spv::Block* cf_instruction_predicate_merge_;
|
||||
// When cf_exec_conditional_merge_ is not null:
|
||||
// If the current exec conditional is based on a bool constant: the number of
|
||||
// the bool constant.
|
||||
// If it's based on the predicate value: kCfExecBoolConstantPredicate.
|
||||
uint32_t cf_exec_bool_constant_or_predicate_;
|
||||
static constexpr uint32_t kCfExecBoolConstantPredicate = UINT32_MAX;
|
||||
// When cf_exec_conditional_merge_ is not null, the expected bool constant or
|
||||
// predicate value for the current exec conditional.
|
||||
bool cf_exec_condition_;
|
||||
// When cf_instruction_predicate_merge_ is not null, the expected predicate
|
||||
// value for the current or the last instruction.
|
||||
bool cf_instruction_predicate_condition_;
|
||||
// Whether there was a `setp` in the current exec before the current
|
||||
// instruction, thus instruction-level predicate value can be different than
|
||||
// the exec-level predicate value, and can't merge two execs with the same
|
||||
// predicate condition anymore.
|
||||
bool cf_exec_predicate_written_;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,648 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
spv::Id SpirvShaderTranslator::PreClampedFloat32To7e3(
|
||||
spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) {
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
// Assuming the value is already clamped to [0, 31.875].
|
||||
|
||||
spv::Id type_uint = builder.makeUintType(32);
|
||||
|
||||
// Need the source as uint for bit operations.
|
||||
{
|
||||
spv::Id source_type = builder.getTypeId(f32_scalar);
|
||||
assert_true(builder.isScalarType(source_type));
|
||||
if (!builder.isUintType(source_type)) {
|
||||
f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar);
|
||||
}
|
||||
}
|
||||
|
||||
// The denormal 7e3 case.
|
||||
// denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000
|
||||
spv::Id denormal_biased_f32;
|
||||
{
|
||||
spv::Instruction* denormal_insert_instruction = new spv::Instruction(
|
||||
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
|
||||
denormal_insert_instruction->addIdOperand(f32_scalar);
|
||||
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1));
|
||||
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23));
|
||||
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9));
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(denormal_insert_instruction));
|
||||
denormal_biased_f32 = denormal_insert_instruction->getResultId();
|
||||
}
|
||||
// denormal_biased_f32_shift_amount = min(125 - (f32 >> 23), 24)
|
||||
// Not allowing the shift to overflow as that's undefined in SPIR-V.
|
||||
spv::Id denormal_biased_f32_shift_amount;
|
||||
{
|
||||
spv::Instruction* denormal_shift_amount_instruction =
|
||||
new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst);
|
||||
denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450);
|
||||
denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin);
|
||||
denormal_shift_amount_instruction->addIdOperand(builder.createBinOp(
|
||||
spv::OpISub, type_uint, builder.makeUintConstant(125),
|
||||
builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar,
|
||||
builder.makeUintConstant(23))));
|
||||
denormal_shift_amount_instruction->addIdOperand(
|
||||
builder.makeUintConstant(24));
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(denormal_shift_amount_instruction));
|
||||
denormal_biased_f32_shift_amount =
|
||||
denormal_shift_amount_instruction->getResultId();
|
||||
}
|
||||
// denormal_biased_f32 =
|
||||
// ((f32 & 0x7FFFFF) | 0x800000) >> min(125 - (f32 >> 23), 24)
|
||||
denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint,
|
||||
denormal_biased_f32,
|
||||
denormal_biased_f32_shift_amount);
|
||||
|
||||
// The normal 7e3 case.
|
||||
// Bias the exponent.
|
||||
// normal_biased_f32 = f32 - (124 << 23)
|
||||
spv::Id normal_biased_f32 =
|
||||
builder.createBinOp(spv::OpISub, type_uint, f32_scalar,
|
||||
builder.makeUintConstant(UINT32_C(124) << 23));
|
||||
|
||||
// Select the needed conversion depending on whether the number is too small
|
||||
// to be represented as normalized 7e3.
|
||||
spv::Id biased_f32 = builder.createTriOp(
|
||||
spv::OpSelect, type_uint,
|
||||
builder.createBinOp(spv::OpULessThan, builder.makeBoolType(), f32_scalar,
|
||||
builder.makeUintConstant(0x3E800000)),
|
||||
denormal_biased_f32, normal_biased_f32);
|
||||
|
||||
// Build the 7e3 number rounding to the nearest even.
|
||||
// ((biased_f32 + 0x7FFF + ((biased_f32 >> 16) & 1)) >> 16) & 0x3FF
|
||||
return builder.createTriOp(
|
||||
spv::OpBitFieldUExtract, type_uint,
|
||||
builder.createBinOp(
|
||||
spv::OpIAdd, type_uint,
|
||||
builder.createBinOp(spv::OpIAdd, type_uint, biased_f32,
|
||||
builder.makeUintConstant(0x7FFF)),
|
||||
builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
|
||||
builder.makeUintConstant(16),
|
||||
builder.makeUintConstant(1))),
|
||||
builder.makeUintConstant(16), builder.makeUintConstant(10));
|
||||
}
|
||||
|
||||
spv::Id SpirvShaderTranslator::UnclampedFloat32To7e3(
|
||||
spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) {
|
||||
spv::Id type_float = builder.makeFloatType(32);
|
||||
|
||||
// Need the source as float for clamping.
|
||||
{
|
||||
spv::Id source_type = builder.getTypeId(f32_scalar);
|
||||
assert_true(builder.isScalarType(source_type));
|
||||
if (!builder.isFloatType(source_type)) {
|
||||
f32_scalar =
|
||||
builder.createUnaryOp(spv::OpBitcast, type_float, f32_scalar);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
spv::Instruction* clamp_instruction =
|
||||
new spv::Instruction(builder.getUniqueId(), type_float, spv::OpExtInst);
|
||||
clamp_instruction->addIdOperand(ext_inst_glsl_std_450);
|
||||
clamp_instruction->addImmediateOperand(GLSLstd450NClamp);
|
||||
clamp_instruction->addIdOperand(f32_scalar);
|
||||
clamp_instruction->addIdOperand(builder.makeFloatConstant(0.0f));
|
||||
clamp_instruction->addIdOperand(builder.makeFloatConstant(31.875f));
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(clamp_instruction));
|
||||
f32_scalar = clamp_instruction->getResultId();
|
||||
}
|
||||
|
||||
return PreClampedFloat32To7e3(builder, f32_scalar, ext_inst_glsl_std_450);
|
||||
}
|
||||
|
||||
spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder,
|
||||
spv::Id f10_uint_scalar,
|
||||
uint32_t f10_shift,
|
||||
bool result_as_uint,
|
||||
spv::Id ext_inst_glsl_std_450) {
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
|
||||
assert_true(builder.isUintType(builder.getTypeId(f10_uint_scalar)));
|
||||
assert_true(f10_shift <= (32 - 10));
|
||||
|
||||
spv::Id type_bool = builder.makeBoolType();
|
||||
spv::Id type_int = builder.makeIntType(32);
|
||||
spv::Id type_uint = builder.makeUintType(32);
|
||||
|
||||
spv::Id f10_unbiased_exponent = builder.createTriOp(
|
||||
spv::OpBitFieldUExtract, type_uint, f10_uint_scalar,
|
||||
builder.makeUintConstant(f10_shift + 7), builder.makeUintConstant(3));
|
||||
spv::Id f10_mantissa = builder.createTriOp(
|
||||
spv::OpBitFieldUExtract, type_uint, f10_uint_scalar,
|
||||
builder.makeUintConstant(f10_shift), builder.makeUintConstant(7));
|
||||
|
||||
// The denormal nonzero 7e3 case.
|
||||
// denormal_mantissa_msb = findMSB(f10_mantissa)
|
||||
spv::Id denormal_mantissa_msb;
|
||||
{
|
||||
spv::Instruction* denormal_mantissa_msb_instruction =
|
||||
new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst);
|
||||
denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450);
|
||||
denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb);
|
||||
denormal_mantissa_msb_instruction->addIdOperand(f10_mantissa);
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(denormal_mantissa_msb_instruction));
|
||||
denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId();
|
||||
}
|
||||
denormal_mantissa_msb =
|
||||
builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb);
|
||||
// denormal_f32_unbiased_exponent = 1 - (7 - findMSB(f10_mantissa))
|
||||
// Or:
|
||||
// denormal_f32_unbiased_exponent = findMSB(f10_mantissa) - 6
|
||||
spv::Id denormal_f32_unbiased_exponent =
|
||||
builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb,
|
||||
builder.makeUintConstant(6));
|
||||
// Normalize the mantissa.
|
||||
// denormal_f32_mantissa = f10_mantissa << (7 - findMSB(f10_mantissa))
|
||||
spv::Id denormal_f32_mantissa = builder.createBinOp(
|
||||
spv::OpShiftLeftLogical, type_uint, f10_mantissa,
|
||||
builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(7),
|
||||
denormal_mantissa_msb));
|
||||
// If the 7e3 number is zero, make sure the float32 number is zero too.
|
||||
spv::Id f10_mantissa_is_nonzero = builder.createBinOp(
|
||||
spv::OpINotEqual, type_bool, f10_mantissa, builder.makeUintConstant(0));
|
||||
// Set the unbiased exponent to -124 for zero - 124 will be added later,
|
||||
// resulting in zero float32.
|
||||
denormal_f32_unbiased_exponent = builder.createTriOp(
|
||||
spv::OpSelect, type_uint, f10_mantissa_is_nonzero,
|
||||
denormal_f32_unbiased_exponent, builder.makeUintConstant(uint32_t(-124)));
|
||||
denormal_f32_mantissa =
|
||||
builder.createTriOp(spv::OpSelect, type_uint, f10_mantissa_is_nonzero,
|
||||
denormal_f32_mantissa, builder.makeUintConstant(0));
|
||||
|
||||
// Select the needed conversion depending on whether the number is normal.
|
||||
spv::Id f10_is_normal =
|
||||
builder.createBinOp(spv::OpINotEqual, type_bool, f10_unbiased_exponent,
|
||||
builder.makeUintConstant(0));
|
||||
spv::Id f32_unbiased_exponent = builder.createTriOp(
|
||||
spv::OpSelect, type_uint, f10_is_normal, f10_unbiased_exponent,
|
||||
denormal_f32_unbiased_exponent);
|
||||
spv::Id f32_mantissa =
|
||||
builder.createTriOp(spv::OpSelect, type_uint, f10_is_normal, f10_mantissa,
|
||||
denormal_f32_mantissa);
|
||||
|
||||
// Bias the exponent and construct the build the float32 number.
|
||||
spv::Id f32_shifted;
|
||||
{
|
||||
spv::Instruction* f32_insert_instruction = new spv::Instruction(
|
||||
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
|
||||
f32_insert_instruction->addIdOperand(f32_mantissa);
|
||||
f32_insert_instruction->addIdOperand(
|
||||
builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent,
|
||||
builder.makeUintConstant(124)));
|
||||
f32_insert_instruction->addIdOperand(builder.makeUintConstant(7));
|
||||
f32_insert_instruction->addIdOperand(builder.makeUintConstant(8));
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(f32_insert_instruction));
|
||||
f32_shifted = f32_insert_instruction->getResultId();
|
||||
}
|
||||
spv::Id f32 =
|
||||
builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted,
|
||||
builder.makeUintConstant(23 - 7));
|
||||
|
||||
if (!result_as_uint) {
|
||||
f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32);
|
||||
}
|
||||
|
||||
return f32;
|
||||
}
|
||||
|
||||
spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4(
|
||||
spv::Builder& builder, spv::Id f32_scalar, bool round_to_nearest_even,
|
||||
bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450) {
|
||||
// CFloat24 from d3dref9.dll +
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
// Assuming the value is already clamped to [0, 2) (in all places, the depth
|
||||
// is written with saturation).
|
||||
|
||||
uint32_t remap_bias = uint32_t(remap_from_0_to_0_5);
|
||||
|
||||
spv::Id type_uint = builder.makeUintType(32);
|
||||
|
||||
// Need the source as uint for bit operations.
|
||||
{
|
||||
spv::Id source_type = builder.getTypeId(f32_scalar);
|
||||
assert_true(builder.isScalarType(source_type));
|
||||
if (!builder.isUintType(source_type)) {
|
||||
f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar);
|
||||
}
|
||||
}
|
||||
|
||||
// The denormal 20e4 case.
|
||||
// denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000
|
||||
spv::Id denormal_biased_f32;
|
||||
{
|
||||
spv::Instruction* denormal_insert_instruction = new spv::Instruction(
|
||||
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
|
||||
denormal_insert_instruction->addIdOperand(f32_scalar);
|
||||
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1));
|
||||
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23));
|
||||
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9));
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(denormal_insert_instruction));
|
||||
denormal_biased_f32 = denormal_insert_instruction->getResultId();
|
||||
}
|
||||
// denormal_biased_f32_shift_amount = min(113 - (f32 >> 23), 24)
|
||||
// Not allowing the shift to overflow as that's undefined in SPIR-V.
|
||||
spv::Id denormal_biased_f32_shift_amount;
|
||||
{
|
||||
spv::Instruction* denormal_shift_amount_instruction =
|
||||
new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst);
|
||||
denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450);
|
||||
denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin);
|
||||
denormal_shift_amount_instruction->addIdOperand(builder.createBinOp(
|
||||
spv::OpISub, type_uint, builder.makeUintConstant(113 - remap_bias),
|
||||
builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar,
|
||||
builder.makeUintConstant(23))));
|
||||
denormal_shift_amount_instruction->addIdOperand(
|
||||
builder.makeUintConstant(24));
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(denormal_shift_amount_instruction));
|
||||
denormal_biased_f32_shift_amount =
|
||||
denormal_shift_amount_instruction->getResultId();
|
||||
}
|
||||
// denormal_biased_f32 =
|
||||
// ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24)
|
||||
denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint,
|
||||
denormal_biased_f32,
|
||||
denormal_biased_f32_shift_amount);
|
||||
|
||||
// The normal 20e4 case.
|
||||
// Bias the exponent.
|
||||
// normal_biased_f32 = f32 - (112 << 23)
|
||||
spv::Id normal_biased_f32 = builder.createBinOp(
|
||||
spv::OpISub, type_uint, f32_scalar,
|
||||
builder.makeUintConstant((UINT32_C(112) - remap_bias) << 23));
|
||||
|
||||
// Select the needed conversion depending on whether the number is too small
|
||||
// to be represented as normalized 20e4.
|
||||
spv::Id biased_f32 = builder.createTriOp(
|
||||
spv::OpSelect, type_uint,
|
||||
builder.createBinOp(
|
||||
spv::OpULessThan, builder.makeBoolType(), f32_scalar,
|
||||
builder.makeUintConstant(0x38800000 - (remap_bias << 23))),
|
||||
denormal_biased_f32, normal_biased_f32);
|
||||
|
||||
// Build the 20e4 number rounding to the nearest even or towards zero.
|
||||
if (round_to_nearest_even) {
|
||||
// biased_f32 += 3 + ((biased_f32 >> 3) & 1)
|
||||
biased_f32 = builder.createBinOp(
|
||||
spv::OpIAdd, type_uint,
|
||||
builder.createBinOp(spv::OpIAdd, type_uint, biased_f32,
|
||||
builder.makeUintConstant(3)),
|
||||
builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
|
||||
builder.makeUintConstant(3),
|
||||
builder.makeUintConstant(1)));
|
||||
}
|
||||
return builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
|
||||
builder.makeUintConstant(3),
|
||||
builder.makeUintConstant(24));
|
||||
}
|
||||
|
||||
spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder,
|
||||
spv::Id f24_uint_scalar,
|
||||
uint32_t f24_shift,
|
||||
bool remap_to_0_to_0_5,
|
||||
bool result_as_uint,
|
||||
spv::Id ext_inst_glsl_std_450) {
|
||||
// CFloat24 from d3dref9.dll +
|
||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
|
||||
assert_true(builder.isUintType(builder.getTypeId(f24_uint_scalar)));
|
||||
assert_true(f24_shift <= (32 - 24));
|
||||
|
||||
uint32_t remap_bias = uint32_t(remap_to_0_to_0_5);
|
||||
|
||||
spv::Id type_bool = builder.makeBoolType();
|
||||
spv::Id type_int = builder.makeIntType(32);
|
||||
spv::Id type_uint = builder.makeUintType(32);
|
||||
|
||||
spv::Id f24_unbiased_exponent = builder.createTriOp(
|
||||
spv::OpBitFieldUExtract, type_uint, f24_uint_scalar,
|
||||
builder.makeUintConstant(f24_shift + 20), builder.makeUintConstant(4));
|
||||
spv::Id f24_mantissa = builder.createTriOp(
|
||||
spv::OpBitFieldUExtract, type_uint, f24_uint_scalar,
|
||||
builder.makeUintConstant(f24_shift), builder.makeUintConstant(20));
|
||||
|
||||
// The denormal nonzero 20e4 case.
|
||||
// denormal_mantissa_msb = findMSB(f24_mantissa)
|
||||
spv::Id denormal_mantissa_msb;
|
||||
{
|
||||
spv::Instruction* denormal_mantissa_msb_instruction =
|
||||
new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst);
|
||||
denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450);
|
||||
denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb);
|
||||
denormal_mantissa_msb_instruction->addIdOperand(f24_mantissa);
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(denormal_mantissa_msb_instruction));
|
||||
denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId();
|
||||
}
|
||||
denormal_mantissa_msb =
|
||||
builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb);
|
||||
// denormal_f32_unbiased_exponent = 1 - (20 - findMSB(f24_mantissa))
|
||||
// Or:
|
||||
// denormal_f32_unbiased_exponent = findMSB(f24_mantissa) - 19
|
||||
spv::Id denormal_f32_unbiased_exponent =
|
||||
builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb,
|
||||
builder.makeUintConstant(19));
|
||||
// Normalize the mantissa.
|
||||
// denormal_f32_mantissa = f24_mantissa << (20 - findMSB(f24_mantissa))
|
||||
spv::Id denormal_f32_mantissa = builder.createBinOp(
|
||||
spv::OpShiftLeftLogical, type_uint, f24_mantissa,
|
||||
builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(20),
|
||||
denormal_mantissa_msb));
|
||||
// If the 20e4 number is zero, make sure the float32 number is zero too.
|
||||
spv::Id f24_mantissa_is_nonzero = builder.createBinOp(
|
||||
spv::OpINotEqual, type_bool, f24_mantissa, builder.makeUintConstant(0));
|
||||
// Set the unbiased exponent to -112 for zero - 112 will be added later,
|
||||
// resulting in zero float32.
|
||||
denormal_f32_unbiased_exponent = builder.createTriOp(
|
||||
spv::OpSelect, type_uint, f24_mantissa_is_nonzero,
|
||||
denormal_f32_unbiased_exponent,
|
||||
builder.makeUintConstant(uint32_t(-int32_t(112 - remap_bias))));
|
||||
denormal_f32_mantissa =
|
||||
builder.createTriOp(spv::OpSelect, type_uint, f24_mantissa_is_nonzero,
|
||||
denormal_f32_mantissa, builder.makeUintConstant(0));
|
||||
|
||||
// Select the needed conversion depending on whether the number is normal.
|
||||
spv::Id f24_is_normal =
|
||||
builder.createBinOp(spv::OpINotEqual, type_bool, f24_unbiased_exponent,
|
||||
builder.makeUintConstant(0));
|
||||
spv::Id f32_unbiased_exponent = builder.createTriOp(
|
||||
spv::OpSelect, type_uint, f24_is_normal, f24_unbiased_exponent,
|
||||
denormal_f32_unbiased_exponent);
|
||||
spv::Id f32_mantissa =
|
||||
builder.createTriOp(spv::OpSelect, type_uint, f24_is_normal, f24_mantissa,
|
||||
denormal_f32_mantissa);
|
||||
|
||||
// Bias the exponent and construct the build the float32 number.
|
||||
spv::Id f32_shifted;
|
||||
{
|
||||
spv::Instruction* f32_insert_instruction = new spv::Instruction(
|
||||
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
|
||||
f32_insert_instruction->addIdOperand(f32_mantissa);
|
||||
f32_insert_instruction->addIdOperand(
|
||||
builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent,
|
||||
builder.makeUintConstant(112 - remap_bias)));
|
||||
f32_insert_instruction->addIdOperand(builder.makeUintConstant(20));
|
||||
f32_insert_instruction->addIdOperand(builder.makeUintConstant(8));
|
||||
builder.getBuildPoint()->addInstruction(
|
||||
std::unique_ptr<spv::Instruction>(f32_insert_instruction));
|
||||
f32_shifted = f32_insert_instruction->getResultId();
|
||||
}
|
||||
spv::Id f32 =
|
||||
builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted,
|
||||
builder.makeUintConstant(23 - 20));
|
||||
|
||||
if (!result_as_uint) {
|
||||
f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32);
|
||||
}
|
||||
|
||||
return f32;
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags));
|
||||
spv::Id system_constant_flags = builder_->createLoad(
|
||||
builder_->createAccessChain(spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
|
||||
if (current_shader().writes_color_target(0) &&
|
||||
!IsExecutionModeEarlyFragmentTests()) {
|
||||
// Alpha test.
|
||||
// TODO(Triang3l): Check how alpha test works with NaN on Direct3D 9.
|
||||
// Extract the comparison function (less, equal, greater bits).
|
||||
spv::Id alpha_test_function = builder_->createTriOp(
|
||||
spv::OpBitFieldUExtract, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(kSysFlag_AlphaPassIfLess_Shift),
|
||||
builder_->makeUintConstant(3));
|
||||
// Check if the comparison function is not "always" - that should pass even
|
||||
// for NaN likely, unlike "less, equal or greater".
|
||||
spv::Id alpha_test_function_is_non_always = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_, alpha_test_function,
|
||||
builder_->makeUintConstant(uint32_t(xenos::CompareFunction::kAlways)));
|
||||
spv::Block& block_alpha_test = builder_->makeNewBlock();
|
||||
spv::Block& block_alpha_test_merge = builder_->makeNewBlock();
|
||||
SpirvCreateSelectionMerge(block_alpha_test_merge.getId(),
|
||||
spv::SelectionControlDontFlattenMask);
|
||||
builder_->createConditionalBranch(alpha_test_function_is_non_always,
|
||||
&block_alpha_test,
|
||||
&block_alpha_test_merge);
|
||||
builder_->setBuildPoint(&block_alpha_test);
|
||||
{
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(3));
|
||||
spv::Id alpha_test_alpha =
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassOutput,
|
||||
output_fragment_data_[0], id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantAlphaTestReference));
|
||||
spv::Id alpha_test_reference =
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision);
|
||||
// The comparison function is not "always" - perform the alpha test.
|
||||
// Handle "not equal" specially (specifically as "not equal" so it's true
|
||||
// for NaN, not "less or greater" which is false for NaN).
|
||||
spv::Id alpha_test_function_is_not_equal = builder_->createBinOp(
|
||||
spv::OpIEqual, type_bool_, alpha_test_function,
|
||||
builder_->makeUintConstant(
|
||||
uint32_t(xenos::CompareFunction::kNotEqual)));
|
||||
spv::Block& block_alpha_test_not_equal = builder_->makeNewBlock();
|
||||
spv::Block& block_alpha_test_non_not_equal = builder_->makeNewBlock();
|
||||
spv::Block& block_alpha_test_not_equal_merge = builder_->makeNewBlock();
|
||||
SpirvCreateSelectionMerge(block_alpha_test_not_equal_merge.getId(),
|
||||
spv::SelectionControlDontFlattenMask);
|
||||
builder_->createConditionalBranch(alpha_test_function_is_not_equal,
|
||||
&block_alpha_test_not_equal,
|
||||
&block_alpha_test_non_not_equal);
|
||||
spv::Id alpha_test_result_not_equal, alpha_test_result_non_not_equal;
|
||||
builder_->setBuildPoint(&block_alpha_test_not_equal);
|
||||
{
|
||||
// "Not equal" function.
|
||||
alpha_test_result_not_equal =
|
||||
builder_->createBinOp(spv::OpFUnordNotEqual, type_bool_,
|
||||
alpha_test_alpha, alpha_test_reference);
|
||||
builder_->createBranch(&block_alpha_test_not_equal_merge);
|
||||
}
|
||||
builder_->setBuildPoint(&block_alpha_test_non_not_equal);
|
||||
{
|
||||
// Function other than "not equal".
|
||||
static const spv::Op kAlphaTestOps[] = {
|
||||
spv::OpFOrdLessThan, spv::OpFOrdEqual, spv::OpFOrdGreaterThan};
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
spv::Id alpha_test_comparison_result = builder_->createBinOp(
|
||||
spv::OpLogicalAnd, type_bool_,
|
||||
builder_->createBinOp(kAlphaTestOps[i], type_bool_,
|
||||
alpha_test_alpha, alpha_test_reference),
|
||||
builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, alpha_test_function,
|
||||
builder_->makeUintConstant(UINT32_C(1) << i)),
|
||||
const_uint_0_));
|
||||
if (i) {
|
||||
alpha_test_result_non_not_equal = builder_->createBinOp(
|
||||
spv::OpLogicalOr, type_bool_, alpha_test_result_non_not_equal,
|
||||
alpha_test_comparison_result);
|
||||
} else {
|
||||
alpha_test_result_non_not_equal = alpha_test_comparison_result;
|
||||
}
|
||||
}
|
||||
builder_->createBranch(&block_alpha_test_not_equal_merge);
|
||||
}
|
||||
builder_->setBuildPoint(&block_alpha_test_not_equal_merge);
|
||||
spv::Id alpha_test_result;
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> alpha_test_result_phi_op =
|
||||
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
|
||||
type_bool_, spv::OpPhi);
|
||||
alpha_test_result_phi_op->addIdOperand(alpha_test_result_not_equal);
|
||||
alpha_test_result_phi_op->addIdOperand(
|
||||
block_alpha_test_not_equal.getId());
|
||||
alpha_test_result_phi_op->addIdOperand(alpha_test_result_non_not_equal);
|
||||
alpha_test_result_phi_op->addIdOperand(
|
||||
block_alpha_test_non_not_equal.getId());
|
||||
alpha_test_result = alpha_test_result_phi_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(alpha_test_result_phi_op));
|
||||
}
|
||||
// Discard the pixel if the alpha test has failed. Creating a merge block
|
||||
// even though it will contain just one OpBranch since SPIR-V requires
|
||||
// structured control flow in shaders.
|
||||
spv::Block& block_alpha_test_kill = builder_->makeNewBlock();
|
||||
spv::Block& block_alpha_test_kill_merge = builder_->makeNewBlock();
|
||||
SpirvCreateSelectionMerge(block_alpha_test_kill_merge.getId(),
|
||||
spv::SelectionControlDontFlattenMask);
|
||||
builder_->createConditionalBranch(alpha_test_result,
|
||||
&block_alpha_test_kill_merge,
|
||||
&block_alpha_test_kill);
|
||||
builder_->setBuildPoint(&block_alpha_test_kill);
|
||||
builder_->createNoResultOp(spv::OpKill);
|
||||
// OpKill terminates the block.
|
||||
builder_->setBuildPoint(&block_alpha_test_kill_merge);
|
||||
builder_->createBranch(&block_alpha_test_merge);
|
||||
}
|
||||
builder_->setBuildPoint(&block_alpha_test_merge);
|
||||
}
|
||||
|
||||
uint32_t color_targets_remaining = current_shader().writes_color_targets();
|
||||
uint32_t color_target_index;
|
||||
while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) {
|
||||
color_targets_remaining &= ~(UINT32_C(1) << color_target_index);
|
||||
spv::Id color_variable = output_fragment_data_[color_target_index];
|
||||
spv::Id color = builder_->createLoad(color_variable, spv::NoPrecision);
|
||||
|
||||
// Apply the exponent bias after the alpha test and alpha to coverage
|
||||
// because they need the unbiased alpha from the shader.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.reserve(2);
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantColorExpBias));
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(int32_t(color_target_index)));
|
||||
color = builder_->createBinOp(
|
||||
spv::OpVectorTimesScalar, type_float4_, color,
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision));
|
||||
builder_->addDecoration(color, spv::DecorationNoContraction);
|
||||
|
||||
// Convert to gamma space - this is incorrect, since it must be done after
|
||||
// blending on the Xbox 360, but this is just one of many blending issues in
|
||||
// the host render target path.
|
||||
// TODO(Triang3l): Gamma as sRGB check.
|
||||
spv::Id color_rgb;
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> color_rgb_shuffle_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float3_, spv::OpVectorShuffle);
|
||||
color_rgb_shuffle_op->addIdOperand(color);
|
||||
color_rgb_shuffle_op->addIdOperand(color);
|
||||
color_rgb_shuffle_op->addImmediateOperand(0);
|
||||
color_rgb_shuffle_op->addImmediateOperand(1);
|
||||
color_rgb_shuffle_op->addImmediateOperand(2);
|
||||
color_rgb = color_rgb_shuffle_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(color_rgb_shuffle_op));
|
||||
}
|
||||
spv::Id is_gamma = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(kSysFlag_ConvertColor0ToGamma
|
||||
<< color_target_index)),
|
||||
const_uint_0_);
|
||||
spv::Block& block_gamma_head = *builder_->getBuildPoint();
|
||||
spv::Block& block_gamma = builder_->makeNewBlock();
|
||||
spv::Block& block_gamma_merge = builder_->makeNewBlock();
|
||||
SpirvCreateSelectionMerge(block_gamma_merge.getId());
|
||||
builder_->createConditionalBranch(is_gamma, &block_gamma,
|
||||
&block_gamma_merge);
|
||||
builder_->setBuildPoint(&block_gamma);
|
||||
spv::Id color_rgb_gamma = LinearToPWLGamma(color_rgb, false);
|
||||
builder_->createBranch(&block_gamma_merge);
|
||||
builder_->setBuildPoint(&block_gamma_merge);
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> gamma_phi_op =
|
||||
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
|
||||
type_float3_, spv::OpPhi);
|
||||
gamma_phi_op->addIdOperand(color_rgb_gamma);
|
||||
gamma_phi_op->addIdOperand(block_gamma.getId());
|
||||
gamma_phi_op->addIdOperand(color_rgb);
|
||||
gamma_phi_op->addIdOperand(block_gamma_head.getId());
|
||||
color_rgb = gamma_phi_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(std::move(gamma_phi_op));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> color_rgba_shuffle_op =
|
||||
std::make_unique<spv::Instruction>(
|
||||
builder_->getUniqueId(), type_float4_, spv::OpVectorShuffle);
|
||||
color_rgba_shuffle_op->addIdOperand(color_rgb);
|
||||
color_rgba_shuffle_op->addIdOperand(color);
|
||||
color_rgba_shuffle_op->addImmediateOperand(0);
|
||||
color_rgba_shuffle_op->addImmediateOperand(1);
|
||||
color_rgba_shuffle_op->addImmediateOperand(2);
|
||||
color_rgba_shuffle_op->addImmediateOperand(3 + 3);
|
||||
color = color_rgba_shuffle_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(color_rgba_shuffle_op));
|
||||
}
|
||||
|
||||
builder_->createStore(color, color_variable);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -1,850 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/buffer_cache.h"
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint16_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
|
||||
__m128i shufmask =
|
||||
_mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07,
|
||||
0x04, 0x05, 0x02, 0x03, 0x00, 0x01);
|
||||
__m128i cmpval = _mm_set1_epi16(cmp_value);
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 8 <= count; i += 8) {
|
||||
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
__m128i output = _mm_shuffle_epi8(input, shufmask);
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi16(output, cmpval);
|
||||
output = _mm_or_si128(output, mask);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = byte_swap(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint32_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
|
||||
__m128i shufmask =
|
||||
_mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05,
|
||||
0x06, 0x07, 0x00, 0x01, 0x02, 0x03);
|
||||
__m128i cmpval = _mm_set1_epi32(cmp_value);
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= count; i += 4) {
|
||||
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
__m128i output = _mm_shuffle_epi8(input, shufmask);
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32(output, cmpval);
|
||||
output = _mm_or_si128(output, mask);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = byte_swap(src[i]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint16_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
uint16_t value = byte_swap(src[i]);
|
||||
dest[i] = value == cmp_value ? 0xFFFF : value;
|
||||
}
|
||||
}
|
||||
|
||||
void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint32_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
uint32_t value = byte_swap(src[i]);
|
||||
dest[i] = value == cmp_value ? 0xFFFFFFFF : value;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
using xe::ui::vulkan::util::CheckResult;
|
||||
|
||||
constexpr VkDeviceSize kConstantRegisterUniformRange =
|
||||
512 * 4 * 4 + 8 * 4 + 32 * 4;
|
||||
|
||||
BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
|
||||
const ui::vulkan::VulkanProvider& provider,
|
||||
size_t capacity)
|
||||
: register_file_(register_file), memory_(memory), provider_(provider) {
|
||||
transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(
|
||||
provider_,
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
capacity, 256);
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() { Shutdown(); }
|
||||
|
||||
VkResult BufferCache::Initialize() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult status = VK_SUCCESS;
|
||||
|
||||
VkMemoryRequirements pool_reqs;
|
||||
transient_buffer_->GetBufferMemoryRequirements(&pool_reqs);
|
||||
VkMemoryAllocateInfo pool_allocate_info;
|
||||
pool_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
pool_allocate_info.pNext = nullptr;
|
||||
pool_allocate_info.allocationSize = pool_reqs.size;
|
||||
pool_allocate_info.memoryTypeIndex = ui::vulkan::util::ChooseHostMemoryType(
|
||||
provider_, pool_reqs.memoryTypeBits, false);
|
||||
if (pool_allocate_info.memoryTypeIndex == UINT32_MAX) {
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
status = dfn.vkAllocateMemory(device, &pool_allocate_info, nullptr,
|
||||
&gpu_memory_pool_);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
status = transient_buffer_->Initialize(gpu_memory_pool_, 0);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Create a memory allocator for textures.
|
||||
VmaVulkanFunctions vulkan_funcs = {};
|
||||
ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs, provider_);
|
||||
|
||||
VmaAllocatorCreateInfo alloc_info = {};
|
||||
alloc_info.physicalDevice = provider_.physical_device();
|
||||
alloc_info.device = device;
|
||||
alloc_info.pVulkanFunctions = &vulkan_funcs;
|
||||
alloc_info.instance = provider_.instance();
|
||||
status = vmaCreateAllocator(&alloc_info, &mem_allocator_);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
status = CreateConstantDescriptorSet();
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
status = CreateVertexDescriptorPool();
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult BufferCache::CreateVertexDescriptorPool() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult status;
|
||||
|
||||
std::vector<VkDescriptorPoolSize> pool_sizes;
|
||||
pool_sizes.push_back({
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
32 * 16384,
|
||||
});
|
||||
vertex_descriptor_pool_ = std::make_unique<ui::vulkan::DescriptorPool>(
|
||||
provider_, 32 * 16384, pool_sizes);
|
||||
|
||||
// 32 storage buffers available to vertex shader.
|
||||
// TODO(DrChat): In the future, this could hold memexport staging data.
|
||||
VkDescriptorSetLayoutBinding binding = {
|
||||
0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
32, VK_SHADER_STAGE_VERTEX_BIT,
|
||||
nullptr,
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo layout_info = {
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
nullptr,
|
||||
0,
|
||||
1,
|
||||
&binding,
|
||||
};
|
||||
status = dfn.vkCreateDescriptorSetLayout(device, &layout_info, nullptr,
|
||||
&vertex_descriptor_set_layout_);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void BufferCache::FreeVertexDescriptorPool() {
|
||||
vertex_descriptor_pool_.reset();
|
||||
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
||||
device, vertex_descriptor_set_layout_);
|
||||
}
|
||||
|
||||
VkResult BufferCache::CreateConstantDescriptorSet() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult status = VK_SUCCESS;
|
||||
|
||||
// Descriptor pool used for all of our cached descriptors.
|
||||
// In the steady state we don't allocate anything, so these are all manually
|
||||
// managed.
|
||||
VkDescriptorPoolCreateInfo transient_descriptor_pool_info;
|
||||
transient_descriptor_pool_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
transient_descriptor_pool_info.pNext = nullptr;
|
||||
transient_descriptor_pool_info.flags =
|
||||
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
|
||||
transient_descriptor_pool_info.maxSets = 1;
|
||||
VkDescriptorPoolSize pool_sizes[1];
|
||||
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
pool_sizes[0].descriptorCount = 2;
|
||||
transient_descriptor_pool_info.poolSizeCount = 1;
|
||||
transient_descriptor_pool_info.pPoolSizes = pool_sizes;
|
||||
status = dfn.vkCreateDescriptorPool(device, &transient_descriptor_pool_info,
|
||||
nullptr, &constant_descriptor_pool_);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Create the descriptor set layout used for our uniform buffer.
|
||||
// As it is a static binding that uses dynamic offsets during draws we can
|
||||
// create this once and reuse it forever.
|
||||
VkDescriptorSetLayoutBinding bindings[2] = {};
|
||||
|
||||
// Vertex constants
|
||||
bindings[0].binding = 0;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[0].pImmutableSamplers = nullptr;
|
||||
|
||||
// Fragment constants
|
||||
bindings[1].binding = 1;
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
bindings[1].pImmutableSamplers = nullptr;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {};
|
||||
descriptor_set_layout_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||
descriptor_set_layout_info.pNext = nullptr;
|
||||
descriptor_set_layout_info.flags = 0;
|
||||
descriptor_set_layout_info.bindingCount =
|
||||
static_cast<uint32_t>(xe::countof(bindings));
|
||||
descriptor_set_layout_info.pBindings = bindings;
|
||||
status = dfn.vkCreateDescriptorSetLayout(device, &descriptor_set_layout_info,
|
||||
nullptr,
|
||||
&constant_descriptor_set_layout_);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Create the descriptor we'll use for the uniform buffer.
|
||||
// This is what we hand out to everyone (who then also needs to use our
|
||||
// offsets).
|
||||
VkDescriptorSetAllocateInfo set_alloc_info;
|
||||
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
set_alloc_info.pNext = nullptr;
|
||||
set_alloc_info.descriptorPool = constant_descriptor_pool_;
|
||||
set_alloc_info.descriptorSetCount = 1;
|
||||
set_alloc_info.pSetLayouts = &constant_descriptor_set_layout_;
|
||||
status = dfn.vkAllocateDescriptorSets(device, &set_alloc_info,
|
||||
&constant_descriptor_set_);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Initialize descriptor set with our buffers.
|
||||
VkDescriptorBufferInfo buffer_info;
|
||||
buffer_info.buffer = transient_buffer_->gpu_buffer();
|
||||
buffer_info.offset = 0;
|
||||
buffer_info.range = kConstantRegisterUniformRange;
|
||||
|
||||
VkWriteDescriptorSet descriptor_writes[2];
|
||||
auto& vertex_uniform_binding_write = descriptor_writes[0];
|
||||
vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
vertex_uniform_binding_write.pNext = nullptr;
|
||||
vertex_uniform_binding_write.dstSet = constant_descriptor_set_;
|
||||
vertex_uniform_binding_write.dstBinding = 0;
|
||||
vertex_uniform_binding_write.dstArrayElement = 0;
|
||||
vertex_uniform_binding_write.descriptorCount = 1;
|
||||
vertex_uniform_binding_write.descriptorType =
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
vertex_uniform_binding_write.pBufferInfo = &buffer_info;
|
||||
auto& fragment_uniform_binding_write = descriptor_writes[1];
|
||||
fragment_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
fragment_uniform_binding_write.pNext = nullptr;
|
||||
fragment_uniform_binding_write.dstSet = constant_descriptor_set_;
|
||||
fragment_uniform_binding_write.dstBinding = 1;
|
||||
fragment_uniform_binding_write.dstArrayElement = 0;
|
||||
fragment_uniform_binding_write.descriptorCount = 1;
|
||||
fragment_uniform_binding_write.descriptorType =
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
fragment_uniform_binding_write.pBufferInfo = &buffer_info;
|
||||
dfn.vkUpdateDescriptorSets(device, 2, descriptor_writes, 0, nullptr);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void BufferCache::FreeConstantDescriptorSet() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
if (constant_descriptor_set_) {
|
||||
dfn.vkFreeDescriptorSets(device, constant_descriptor_pool_, 1,
|
||||
&constant_descriptor_set_);
|
||||
constant_descriptor_set_ = nullptr;
|
||||
}
|
||||
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
||||
device,
|
||||
constant_descriptor_set_layout_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
|
||||
constant_descriptor_pool_);
|
||||
}
|
||||
|
||||
void BufferCache::Shutdown() {
|
||||
if (mem_allocator_) {
|
||||
vmaDestroyAllocator(mem_allocator_);
|
||||
mem_allocator_ = nullptr;
|
||||
}
|
||||
|
||||
FreeConstantDescriptorSet();
|
||||
FreeVertexDescriptorPool();
|
||||
|
||||
transient_buffer_->Shutdown();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
gpu_memory_pool_);
|
||||
}
|
||||
|
||||
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
||||
VkCommandBuffer command_buffer,
|
||||
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
||||
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
||||
VkFence fence) {
|
||||
// Fat struct, including all registers:
|
||||
// struct {
|
||||
// vec4 float[512];
|
||||
// uint bool[8];
|
||||
// uint loop[32];
|
||||
// };
|
||||
auto offset = AllocateTransientData(kConstantRegisterUniformRange, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {VK_WHOLE_SIZE, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
// Copy over all the registers.
|
||||
const auto& values = register_file_->values;
|
||||
uint8_t* dest_ptr = transient_buffer_->host_base() + offset;
|
||||
std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
|
||||
(512 * 4 * 4));
|
||||
dest_ptr += 512 * 4 * 4;
|
||||
std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
||||
8 * 4);
|
||||
dest_ptr += 8 * 4;
|
||||
std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32,
|
||||
32 * 4);
|
||||
dest_ptr += 32 * 4;
|
||||
|
||||
transient_buffer_->Flush(offset, kConstantRegisterUniformRange);
|
||||
|
||||
// Append a barrier to the command buffer.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
nullptr,
|
||||
VK_ACCESS_HOST_WRITE_BIT,
|
||||
VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
transient_buffer_->gpu_buffer(),
|
||||
offset,
|
||||
kConstantRegisterUniformRange,
|
||||
};
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
return {offset, offset};
|
||||
|
||||
// Packed upload code.
|
||||
// This is not currently supported by the shaders, but would be awesome.
|
||||
// We should be able to use this for any shader that does not do dynamic
|
||||
// constant indexing.
|
||||
#if 0
|
||||
// Allocate space in the buffer for our data.
|
||||
auto offset =
|
||||
AllocateTransientData(constant_register_map.packed_byte_length, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
// Run through registers and copy them into the buffer.
|
||||
// TODO(benvanik): optimize this - it's hit twice every call.
|
||||
const auto& values = register_file_->values;
|
||||
uint8_t* dest_ptr =
|
||||
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto piece = constant_register_map.float_bitmap[i];
|
||||
if (!piece) {
|
||||
continue;
|
||||
}
|
||||
for (int j = 0, sh = 0; j < 64; ++j, sh << 1) {
|
||||
if (piece & sh) {
|
||||
xe::copy_128_aligned(
|
||||
dest_ptr,
|
||||
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1);
|
||||
dest_ptr += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
if (constant_register_map.loop_bitmap & (1 << i)) {
|
||||
xe::store<uint32_t>(dest_ptr,
|
||||
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
|
||||
dest_ptr += 4;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (constant_register_map.bool_bitmap[i]) {
|
||||
xe::store<uint32_t>(
|
||||
dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32);
|
||||
dest_ptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
return offset;
|
||||
#endif // 0
|
||||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, xenos::IndexFormat format, VkFence fence) {
|
||||
// Allocate space in the buffer for our data.
|
||||
auto offset = AllocateTransientData(source_length, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
const void* source_ptr = memory_->TranslatePhysical(source_addr);
|
||||
|
||||
uint32_t prim_reset_index =
|
||||
register_file_->values[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
|
||||
bool prim_reset_enabled =
|
||||
!!(register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21));
|
||||
|
||||
// Copy data into the buffer. If primitive reset is enabled, translate any
|
||||
// primitive reset indices to something Vulkan understands.
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
if (prim_reset_enabled) {
|
||||
if (format == xenos::IndexFormat::kInt16) {
|
||||
// Endian::k8in16, swap half-words.
|
||||
copy_cmp_swap_16_unaligned(
|
||||
transient_buffer_->host_base() + offset, source_ptr,
|
||||
static_cast<uint16_t>(prim_reset_index), source_length / 2);
|
||||
} else if (format == xenos::IndexFormat::kInt32) {
|
||||
// Endian::k8in32, swap words.
|
||||
copy_cmp_swap_32_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, prim_reset_index,
|
||||
source_length / 4);
|
||||
}
|
||||
} else {
|
||||
if (format == xenos::IndexFormat::kInt16) {
|
||||
// Endian::k8in16, swap half-words.
|
||||
xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 2);
|
||||
} else if (format == xenos::IndexFormat::kInt32) {
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 4);
|
||||
}
|
||||
}
|
||||
|
||||
transient_buffer_->Flush(offset, source_length);
|
||||
|
||||
// Append a barrier to the command buffer.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
nullptr,
|
||||
VK_ACCESS_HOST_WRITE_BIT,
|
||||
VK_ACCESS_INDEX_READ_BIT,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
transient_buffer_->gpu_buffer(),
|
||||
offset,
|
||||
source_length,
|
||||
};
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, xenos::Endian endian, VkFence fence) {
|
||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||
if (offset != VK_WHOLE_SIZE) {
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
// Slow path :)
|
||||
// Expand the region up to the allocation boundary
|
||||
auto physical_heap = memory_->GetPhysicalHeap();
|
||||
uint32_t upload_base = source_addr;
|
||||
uint32_t upload_size = source_length;
|
||||
|
||||
// Ping the memory subsystem for allocation size.
|
||||
// TODO(DrChat): Artifacting occurring in 5841089E with this enabled.
|
||||
// physical_heap->QueryBaseAndSize(&upload_base, &upload_size);
|
||||
assert(upload_base <= source_addr);
|
||||
uint32_t source_offset = source_addr - upload_base;
|
||||
|
||||
// Allocate space in the buffer for our data.
|
||||
offset = AllocateTransientData(upload_size, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
XELOGW(
|
||||
"Failed to allocate transient data for vertex buffer! Wanted to "
|
||||
"allocate {} bytes.",
|
||||
upload_size);
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
const void* upload_ptr = memory_->TranslatePhysical(upload_base);
|
||||
|
||||
// Copy data into the buffer.
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
if (endian == xenos::Endian::k8in32) {
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
|
||||
upload_ptr, source_length / 4);
|
||||
} else if (endian == xenos::Endian::k16in32) {
|
||||
xe::copy_and_swap_16_in_32_unaligned(
|
||||
transient_buffer_->host_base() + offset, upload_ptr, source_length / 4);
|
||||
} else {
|
||||
assert_always();
|
||||
}
|
||||
|
||||
transient_buffer_->Flush(offset, upload_size);
|
||||
|
||||
// Append a barrier to the command buffer.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
nullptr,
|
||||
VK_ACCESS_HOST_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
transient_buffer_->gpu_buffer(),
|
||||
offset,
|
||||
upload_size,
|
||||
};
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr,
|
||||
1, &barrier, 0, nullptr);
|
||||
|
||||
CacheTransientData(upload_base, upload_size, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset + source_offset};
|
||||
}
|
||||
|
||||
void BufferCache::HashVertexBindings(
|
||||
XXH3_state_t* hash_state,
|
||||
const std::vector<Shader::VertexBinding>& vertex_bindings) {
|
||||
auto& regs = *register_file_;
|
||||
for (const auto& vertex_binding : vertex_bindings) {
|
||||
#if 0
|
||||
XXH3_64bits_update(hash_state, &vertex_binding.binding_index, sizeof(vertex_binding.binding_index));
|
||||
XXH3_64bits_update(hash_state, &vertex_binding.fetch_constant, sizeof(vertex_binding.fetch_constant));
|
||||
XXH3_64bits_update(hash_state, &vertex_binding.stride_words, sizeof(vertex_binding.stride_words));
|
||||
#endif
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
(vertex_binding.fetch_constant / 3) * 6;
|
||||
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||
switch (vertex_binding.fetch_constant % 3) {
|
||||
case 0: {
|
||||
auto& fetch = group->vertex_fetch_0;
|
||||
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
|
||||
} break;
|
||||
case 1: {
|
||||
auto& fetch = group->vertex_fetch_1;
|
||||
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
|
||||
} break;
|
||||
case 2: {
|
||||
auto& fetch = group->vertex_fetch_2;
|
||||
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
|
||||
} break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkDescriptorSet BufferCache::PrepareVertexSet(
|
||||
VkCommandBuffer command_buffer, VkFence fence,
|
||||
const std::vector<Shader::VertexBinding>& vertex_bindings) {
|
||||
// (quickly) Generate a hash.
|
||||
XXH3_state_t hash_state;
|
||||
XXH3_64bits_reset(&hash_state);
|
||||
|
||||
// (quickly) Generate a hash.
|
||||
HashVertexBindings(&hash_state, vertex_bindings);
|
||||
uint64_t hash = XXH3_64bits_digest(&hash_state);
|
||||
for (auto it = vertex_sets_.find(hash); it != vertex_sets_.end(); ++it) {
|
||||
// TODO(DrChat): We need to compare the bindings and ensure they're equal.
|
||||
return it->second;
|
||||
}
|
||||
|
||||
if (!vertex_descriptor_pool_->has_open_batch()) {
|
||||
vertex_descriptor_pool_->BeginBatch(fence);
|
||||
}
|
||||
|
||||
VkDescriptorSet set =
|
||||
vertex_descriptor_pool_->AcquireEntry(vertex_descriptor_set_layout_);
|
||||
if (!set) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// TODO(DrChat): Define magic number 32 as a constant somewhere.
|
||||
VkDescriptorBufferInfo buffer_infos[32] = {};
|
||||
VkWriteDescriptorSet descriptor_write = {
|
||||
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
nullptr,
|
||||
set,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
nullptr,
|
||||
buffer_infos,
|
||||
nullptr,
|
||||
};
|
||||
|
||||
auto& regs = *register_file_;
|
||||
for (const auto& vertex_binding : vertex_bindings) {
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
(vertex_binding.fetch_constant / 3) * 6;
|
||||
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||
const xe_gpu_vertex_fetch_t* fetch = nullptr;
|
||||
switch (vertex_binding.fetch_constant % 3) {
|
||||
case 0:
|
||||
fetch = &group->vertex_fetch_0;
|
||||
break;
|
||||
case 1:
|
||||
fetch = &group->vertex_fetch_1;
|
||||
break;
|
||||
case 2:
|
||||
fetch = &group->vertex_fetch_2;
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO(DrChat): Some games use type kInvalidTexture (with no data).
|
||||
switch (fetch->type) {
|
||||
case xenos::FetchConstantType::kVertex:
|
||||
break;
|
||||
case xenos::FetchConstantType::kInvalidVertex:
|
||||
if (cvars::gpu_allow_invalid_fetch_constants) {
|
||||
break;
|
||||
}
|
||||
XELOGW(
|
||||
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
|
||||
"This "
|
||||
"is incorrect behavior, but you can try bypassing this by "
|
||||
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
|
||||
vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1);
|
||||
return nullptr;
|
||||
default:
|
||||
XELOGW(
|
||||
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
|
||||
vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// TODO(benvanik): compute based on indices or vertex count.
|
||||
// THIS CAN BE MASSIVELY INCORRECT (too large).
|
||||
// This may not be possible (with indexed vfetch).
|
||||
uint32_t source_length = fetch->size * 4;
|
||||
uint32_t physical_address = fetch->address << 2;
|
||||
|
||||
// TODO(DrChat): This needs to be put in gpu::CommandProcessor
|
||||
// trace_writer_.WriteMemoryRead(physical_address, source_length);
|
||||
|
||||
// Upload (or get a cached copy of) the buffer.
|
||||
auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address,
|
||||
source_length, fetch->endian, fence);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
XELOGW("Failed to upload vertex buffer!");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Stash the buffer reference for our bulk bind at the end.
|
||||
buffer_infos[descriptor_write.descriptorCount++] = {
|
||||
buffer_ref.first,
|
||||
buffer_ref.second,
|
||||
source_length,
|
||||
};
|
||||
}
|
||||
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
dfn.vkUpdateDescriptorSets(device, 1, &descriptor_write, 0, nullptr);
|
||||
vertex_sets_[hash] = set;
|
||||
return set;
|
||||
}
|
||||
|
||||
VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length,
|
||||
VkFence fence) {
|
||||
// Try fast path (if we have space).
|
||||
VkDeviceSize offset = TryAllocateTransientData(length, fence);
|
||||
if (offset != VK_WHOLE_SIZE) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
// Ran out of easy allocations.
|
||||
// Try consuming fences before we panic.
|
||||
transient_buffer_->Scavenge();
|
||||
|
||||
// Try again. It may still fail if we didn't get enough space back.
|
||||
offset = TryAllocateTransientData(length, fence);
|
||||
return offset;
|
||||
}
|
||||
|
||||
VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length,
|
||||
VkFence fence) {
|
||||
auto alloc = transient_buffer_->Acquire(length, fence);
|
||||
if (alloc) {
|
||||
return alloc->offset;
|
||||
}
|
||||
|
||||
// No more space.
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length) {
|
||||
if (transient_cache_.empty()) {
|
||||
// Short-circuit exit.
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
// Find the first element > guest_address
|
||||
auto it = transient_cache_.upper_bound(guest_address);
|
||||
if (it != transient_cache_.begin()) {
|
||||
// it = first element <= guest_address
|
||||
--it;
|
||||
|
||||
if ((it->first + it->second.first) >= (guest_address + guest_length)) {
|
||||
// This data is contained within some existing transient data.
|
||||
auto source_offset = static_cast<VkDeviceSize>(guest_address - it->first);
|
||||
return it->second.second + source_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
void BufferCache::CacheTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length,
|
||||
VkDeviceSize offset) {
|
||||
transient_cache_[guest_address] = {guest_length, offset};
|
||||
|
||||
// Erase any entries contained within
|
||||
auto it = transient_cache_.upper_bound(guest_address);
|
||||
while (it != transient_cache_.end()) {
|
||||
if ((guest_address + guest_length) >= (it->first + it->second.first)) {
|
||||
it = transient_cache_.erase(it);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::Flush(VkCommandBuffer command_buffer) {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
// If we are flushing a big enough chunk queue up an event.
|
||||
// We don't want to do this for everything but often enough so that we won't
|
||||
// run out of space.
|
||||
if (true) {
|
||||
// VkEvent finish_event;
|
||||
// dfn.vkCmdSetEvent(cmd_buffer, finish_event,
|
||||
// VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||
}
|
||||
|
||||
// Flush memory.
|
||||
// TODO(benvanik): subrange.
|
||||
VkMappedMemoryRange dirty_range;
|
||||
dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
dirty_range.pNext = nullptr;
|
||||
dirty_range.memory = transient_buffer_->gpu_memory();
|
||||
dirty_range.offset = 0;
|
||||
dirty_range.size = transient_buffer_->capacity();
|
||||
dfn.vkFlushMappedMemoryRanges(device, 1, &dirty_range);
|
||||
}
|
||||
|
||||
void BufferCache::InvalidateCache() {
|
||||
// Called by VulkanCommandProcessor::MakeCoherent()
|
||||
// Discard everything?
|
||||
transient_cache_.clear();
|
||||
}
|
||||
|
||||
void BufferCache::ClearCache() { transient_cache_.clear(); }
|
||||
|
||||
void BufferCache::Scavenge() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
transient_cache_.clear();
|
||||
transient_buffer_->Scavenge();
|
||||
|
||||
// TODO(DrChat): These could persist across frames, we just need a smart way
|
||||
// to delete unused ones.
|
||||
vertex_sets_.clear();
|
||||
if (vertex_descriptor_pool_->has_open_batch()) {
|
||||
vertex_descriptor_pool_->EndBatch();
|
||||
}
|
||||
|
||||
vertex_descriptor_pool_->Scavenge();
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -1,175 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
||||
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/vulkan/circular_buffer.h"
|
||||
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
// Efficiently manages buffers of various kinds.
|
||||
// Used primarily for uploading index and vertex data from guest memory and
|
||||
// transient data like shader constants.
|
||||
class BufferCache {
|
||||
public:
|
||||
BufferCache(RegisterFile* register_file, Memory* memory,
|
||||
const ui::vulkan::VulkanProvider& provider, size_t capacity);
|
||||
~BufferCache();
|
||||
|
||||
VkResult Initialize();
|
||||
void Shutdown();
|
||||
|
||||
// Descriptor set containing the dynamic uniform buffer used for constant
|
||||
// uploads. Used in conjunction with a dynamic offset returned by
|
||||
// UploadConstantRegisters.
|
||||
// The set contains two bindings:
|
||||
// binding = 0: for use in vertex shaders
|
||||
// binding = 1: for use in fragment shaders
|
||||
VkDescriptorSet constant_descriptor_set() const {
|
||||
return constant_descriptor_set_;
|
||||
}
|
||||
VkDescriptorSetLayout constant_descriptor_set_layout() const {
|
||||
return constant_descriptor_set_layout_;
|
||||
}
|
||||
|
||||
// Descriptor set containing vertex buffers stored in storage buffers.
|
||||
// This set contains one binding with an array of 32 storage buffers.
|
||||
VkDescriptorSetLayout vertex_descriptor_set_layout() const {
|
||||
return vertex_descriptor_set_layout_;
|
||||
}
|
||||
|
||||
// Uploads the constants specified in the register maps to the transient
|
||||
// uniform storage buffer.
|
||||
// The registers are tightly packed in order as [floats, ints, bools].
|
||||
// Returns an offset that can be used with the transient_descriptor_set or
|
||||
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
|
||||
// The returned offsets may alias.
|
||||
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
|
||||
VkCommandBuffer command_buffer,
|
||||
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
||||
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
||||
VkFence fence);
|
||||
|
||||
// Uploads index buffer data from guest memory, possibly eliding with
|
||||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, xenos::IndexFormat format, VkFence fence);
|
||||
|
||||
// Uploads vertex buffer data from guest memory, possibly eliding with
|
||||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, xenos::Endian endian, VkFence fence);
|
||||
|
||||
// Prepares and returns a vertex descriptor set.
|
||||
VkDescriptorSet PrepareVertexSet(
|
||||
VkCommandBuffer setup_buffer, VkFence fence,
|
||||
const std::vector<Shader::VertexBinding>& vertex_bindings);
|
||||
|
||||
// Flushes all pending data to the GPU.
|
||||
// Until this is called the GPU is not guaranteed to see any data.
|
||||
// The given command buffer will be used to queue up events so that the
|
||||
// cache can determine when data has been consumed.
|
||||
void Flush(VkCommandBuffer command_buffer);
|
||||
|
||||
// Marks the cache as potentially invalid.
|
||||
// This is not as strong as ClearCache and is a hint that any and all data
|
||||
// should be verified before being reused.
|
||||
void InvalidateCache();
|
||||
|
||||
// Clears all cached content and prevents future elision with pending data.
|
||||
void ClearCache();
|
||||
|
||||
// Wipes all data no longer needed.
|
||||
void Scavenge();
|
||||
|
||||
private:
|
||||
// This represents an uploaded vertex buffer.
|
||||
struct VertexBuffer {
|
||||
uint32_t guest_address;
|
||||
uint32_t size;
|
||||
|
||||
VmaAllocation alloc;
|
||||
VmaAllocationInfo alloc_info;
|
||||
};
|
||||
|
||||
VkResult CreateVertexDescriptorPool();
|
||||
void FreeVertexDescriptorPool();
|
||||
|
||||
VkResult CreateConstantDescriptorSet();
|
||||
void FreeConstantDescriptorSet();
|
||||
|
||||
void HashVertexBindings(
|
||||
XXH3_state_t* hash_state,
|
||||
const std::vector<Shader::VertexBinding>& vertex_bindings);
|
||||
|
||||
// Allocates a block of memory in the transient buffer.
|
||||
// When memory is not available fences are checked and space is reclaimed.
|
||||
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
||||
VkDeviceSize AllocateTransientData(VkDeviceSize length, VkFence fence);
|
||||
// Tries to allocate a block of memory in the transient buffer.
|
||||
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
||||
VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence);
|
||||
// Finds a block of data in the transient buffer sourced from the specified
|
||||
// guest address and length.
|
||||
VkDeviceSize FindCachedTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length);
|
||||
// Adds a block of data to the frame cache.
|
||||
void CacheTransientData(uint32_t guest_address, uint32_t guest_length,
|
||||
VkDeviceSize offset);
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
Memory* memory_ = nullptr;
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
|
||||
VkDeviceMemory gpu_memory_pool_ = nullptr;
|
||||
VmaAllocator mem_allocator_ = nullptr;
|
||||
|
||||
// Staging ringbuffer we cycle through fast. Used for data we don't
|
||||
// plan on keeping past the current frame.
|
||||
std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
|
||||
std::map<uint32_t, std::pair<uint32_t, VkDeviceSize>> transient_cache_;
|
||||
|
||||
// Vertex buffer descriptors
|
||||
std::unique_ptr<ui::vulkan::DescriptorPool> vertex_descriptor_pool_ = nullptr;
|
||||
VkDescriptorSetLayout vertex_descriptor_set_layout_ = nullptr;
|
||||
|
||||
// Current frame vertex sets.
|
||||
std::unordered_map<uint64_t, VkDescriptorSet> vertex_sets_;
|
||||
|
||||
// Descriptor set used to hold vertex/pixel shader float constants
|
||||
VkDescriptorPool constant_descriptor_pool_ = nullptr;
|
||||
VkDescriptorSetLayout constant_descriptor_set_layout_ = nullptr;
|
||||
VkDescriptorSet constant_descriptor_set_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
|
@ -0,0 +1,367 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
DeferredCommandBuffer::DeferredCommandBuffer(
|
||||
const VulkanCommandProcessor& command_processor, size_t initial_size)
|
||||
: command_processor_(command_processor) {
|
||||
command_stream_.reserve(initial_size / sizeof(uintmax_t));
|
||||
}
|
||||
|
||||
void DeferredCommandBuffer::Reset() { command_stream_.clear(); }
|
||||
|
||||
void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn =
|
||||
command_processor_.GetVulkanProvider().dfn();
|
||||
const uintmax_t* stream = command_stream_.data();
|
||||
size_t stream_remaining = command_stream_.size();
|
||||
while (stream_remaining) {
|
||||
const CommandHeader& header =
|
||||
*reinterpret_cast<const CommandHeader*>(stream);
|
||||
stream += kCommandHeaderSizeElements;
|
||||
stream_remaining -= kCommandHeaderSizeElements;
|
||||
|
||||
switch (header.command) {
|
||||
case Command::kVkBeginRenderPass: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBeginRenderPass*>(stream);
|
||||
size_t offset_bytes = sizeof(ArgsVkBeginRenderPass);
|
||||
VkRenderPassBeginInfo render_pass_begin_info;
|
||||
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
render_pass_begin_info.pNext = nullptr;
|
||||
render_pass_begin_info.renderPass = args.render_pass;
|
||||
render_pass_begin_info.framebuffer = args.framebuffer;
|
||||
render_pass_begin_info.renderArea = args.render_area;
|
||||
render_pass_begin_info.clearValueCount = args.clear_value_count;
|
||||
if (render_pass_begin_info.clearValueCount) {
|
||||
offset_bytes = xe::align(offset_bytes, alignof(VkClearValue));
|
||||
render_pass_begin_info.pClearValues =
|
||||
reinterpret_cast<const VkClearValue*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes +=
|
||||
sizeof(VkClearValue) * render_pass_begin_info.clearValueCount;
|
||||
} else {
|
||||
render_pass_begin_info.pClearValues = nullptr;
|
||||
}
|
||||
dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info,
|
||||
args.contents);
|
||||
} break;
|
||||
|
||||
case Command::kVkBindDescriptorSets: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBindDescriptorSets*>(stream);
|
||||
size_t offset_bytes = xe::align(sizeof(ArgsVkBindDescriptorSets),
|
||||
alignof(VkDescriptorSet));
|
||||
const VkDescriptorSet* descriptor_sets =
|
||||
reinterpret_cast<const VkDescriptorSet*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes += sizeof(VkDescriptorSet) * args.descriptor_set_count;
|
||||
const uint32_t* dynamic_offsets = nullptr;
|
||||
if (args.dynamic_offset_count) {
|
||||
offset_bytes = xe::align(offset_bytes, alignof(uint32_t));
|
||||
dynamic_offsets = reinterpret_cast<const uint32_t*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes += sizeof(uint32_t) * args.dynamic_offset_count;
|
||||
}
|
||||
dfn.vkCmdBindDescriptorSets(command_buffer, args.pipeline_bind_point,
|
||||
args.layout, args.first_set,
|
||||
args.descriptor_set_count, descriptor_sets,
|
||||
args.dynamic_offset_count, dynamic_offsets);
|
||||
} break;
|
||||
|
||||
case Command::kVkBindIndexBuffer: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBindIndexBuffer*>(stream);
|
||||
dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset,
|
||||
args.index_type);
|
||||
} break;
|
||||
|
||||
case Command::kVkBindPipeline: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBindPipeline*>(stream);
|
||||
dfn.vkCmdBindPipeline(command_buffer, args.pipeline_bind_point,
|
||||
args.pipeline);
|
||||
} break;
|
||||
|
||||
case Command::kVkBindVertexBuffers: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBindVertexBuffers*>(stream);
|
||||
size_t offset_bytes =
|
||||
xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer));
|
||||
const VkBuffer* buffers = reinterpret_cast<const VkBuffer*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes =
|
||||
xe::align(offset_bytes + sizeof(VkBuffer) * args.binding_count,
|
||||
alignof(VkDeviceSize));
|
||||
const VkDeviceSize* offsets = reinterpret_cast<const VkDeviceSize*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
dfn.vkCmdBindVertexBuffers(command_buffer, args.first_binding,
|
||||
args.binding_count, buffers, offsets);
|
||||
} break;
|
||||
|
||||
case Command::kVkClearAttachments: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkClearAttachments*>(stream);
|
||||
size_t offset_bytes = xe::align(sizeof(ArgsVkClearAttachments),
|
||||
alignof(VkClearAttachment));
|
||||
const VkClearAttachment* attachments =
|
||||
reinterpret_cast<const VkClearAttachment*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes = xe::align(
|
||||
offset_bytes + sizeof(VkClearAttachment) * args.attachment_count,
|
||||
alignof(VkClearRect));
|
||||
const VkClearRect* rects = reinterpret_cast<const VkClearRect*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
dfn.vkCmdClearAttachments(command_buffer, args.attachment_count,
|
||||
attachments, args.rect_count, rects);
|
||||
} break;
|
||||
|
||||
case Command::kVkClearColorImage: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkClearColorImage*>(stream);
|
||||
dfn.vkCmdClearColorImage(
|
||||
command_buffer, args.image, args.image_layout, &args.color,
|
||||
args.range_count,
|
||||
reinterpret_cast<const VkImageSubresourceRange*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
xe::align(sizeof(ArgsVkClearColorImage),
|
||||
alignof(VkImageSubresourceRange))));
|
||||
} break;
|
||||
|
||||
case Command::kVkCopyBuffer: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkCopyBuffer*>(stream);
|
||||
dfn.vkCmdCopyBuffer(
|
||||
command_buffer, args.src_buffer, args.dst_buffer, args.region_count,
|
||||
reinterpret_cast<const VkBufferCopy*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy))));
|
||||
} break;
|
||||
|
||||
case Command::kVkCopyBufferToImage: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkCopyBufferToImage*>(stream);
|
||||
dfn.vkCmdCopyBufferToImage(
|
||||
command_buffer, args.src_buffer, args.dst_image,
|
||||
args.dst_image_layout, args.region_count,
|
||||
reinterpret_cast<const VkBufferImageCopy*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
xe::align(sizeof(ArgsVkCopyBufferToImage),
|
||||
alignof(VkBufferImageCopy))));
|
||||
} break;
|
||||
|
||||
case Command::kVkDispatch: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkDispatch*>(stream);
|
||||
dfn.vkCmdDispatch(command_buffer, args.group_count_x,
|
||||
args.group_count_y, args.group_count_z);
|
||||
} break;
|
||||
|
||||
case Command::kVkDraw: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkDraw*>(stream);
|
||||
dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count,
|
||||
args.first_vertex, args.first_instance);
|
||||
} break;
|
||||
|
||||
case Command::kVkDrawIndexed: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkDrawIndexed*>(stream);
|
||||
dfn.vkCmdDrawIndexed(command_buffer, args.index_count,
|
||||
args.instance_count, args.first_index,
|
||||
args.vertex_offset, args.first_instance);
|
||||
} break;
|
||||
|
||||
case Command::kVkEndRenderPass:
|
||||
dfn.vkCmdEndRenderPass(command_buffer);
|
||||
break;
|
||||
|
||||
case Command::kVkPipelineBarrier: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkPipelineBarrier*>(stream);
|
||||
size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier);
|
||||
const VkMemoryBarrier* memory_barriers = nullptr;
|
||||
if (args.memory_barrier_count) {
|
||||
barrier_offset_bytes =
|
||||
xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier));
|
||||
memory_barriers = reinterpret_cast<const VkMemoryBarrier*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
|
||||
barrier_offset_bytes +=
|
||||
sizeof(VkMemoryBarrier) * args.memory_barrier_count;
|
||||
}
|
||||
const VkBufferMemoryBarrier* buffer_memory_barriers = nullptr;
|
||||
if (args.buffer_memory_barrier_count) {
|
||||
barrier_offset_bytes =
|
||||
xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier));
|
||||
buffer_memory_barriers =
|
||||
reinterpret_cast<const VkBufferMemoryBarrier*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
barrier_offset_bytes);
|
||||
barrier_offset_bytes +=
|
||||
sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count;
|
||||
}
|
||||
const VkImageMemoryBarrier* image_memory_barriers = nullptr;
|
||||
if (args.image_memory_barrier_count) {
|
||||
barrier_offset_bytes =
|
||||
xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier));
|
||||
image_memory_barriers = reinterpret_cast<const VkImageMemoryBarrier*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
|
||||
barrier_offset_bytes +=
|
||||
sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count;
|
||||
}
|
||||
dfn.vkCmdPipelineBarrier(
|
||||
command_buffer, args.src_stage_mask, args.dst_stage_mask,
|
||||
args.dependency_flags, args.memory_barrier_count, memory_barriers,
|
||||
args.buffer_memory_barrier_count, buffer_memory_barriers,
|
||||
args.image_memory_barrier_count, image_memory_barriers);
|
||||
} break;
|
||||
|
||||
case Command::kVkPushConstants: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkPushConstants*>(stream);
|
||||
dfn.vkCmdPushConstants(command_buffer, args.layout, args.stage_flags,
|
||||
args.offset, args.size,
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
sizeof(ArgsVkPushConstants));
|
||||
} break;
|
||||
|
||||
case Command::kVkSetBlendConstants: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkSetBlendConstants*>(stream);
|
||||
dfn.vkCmdSetBlendConstants(command_buffer, args.blend_constants);
|
||||
} break;
|
||||
|
||||
case Command::kVkSetDepthBias: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkSetDepthBias*>(stream);
|
||||
dfn.vkCmdSetDepthBias(command_buffer, args.depth_bias_constant_factor,
|
||||
args.depth_bias_clamp,
|
||||
args.depth_bias_slope_factor);
|
||||
} break;
|
||||
|
||||
case Command::kVkSetScissor: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkSetScissor*>(stream);
|
||||
dfn.vkCmdSetScissor(
|
||||
command_buffer, args.first_scissor, args.scissor_count,
|
||||
reinterpret_cast<const VkRect2D*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D))));
|
||||
} break;
|
||||
|
||||
case Command::kVkSetStencilCompareMask: {
|
||||
auto& args =
|
||||
*reinterpret_cast<const ArgsSetStencilMaskReference*>(stream);
|
||||
dfn.vkCmdSetStencilCompareMask(command_buffer, args.face_mask,
|
||||
args.mask_reference);
|
||||
} break;
|
||||
|
||||
case Command::kVkSetStencilReference: {
|
||||
auto& args =
|
||||
*reinterpret_cast<const ArgsSetStencilMaskReference*>(stream);
|
||||
dfn.vkCmdSetStencilReference(command_buffer, args.face_mask,
|
||||
args.mask_reference);
|
||||
} break;
|
||||
|
||||
case Command::kVkSetStencilWriteMask: {
|
||||
auto& args =
|
||||
*reinterpret_cast<const ArgsSetStencilMaskReference*>(stream);
|
||||
dfn.vkCmdSetStencilWriteMask(command_buffer, args.face_mask,
|
||||
args.mask_reference);
|
||||
} break;
|
||||
|
||||
case Command::kVkSetViewport: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkSetViewport*>(stream);
|
||||
dfn.vkCmdSetViewport(
|
||||
command_buffer, args.first_viewport, args.viewport_count,
|
||||
reinterpret_cast<const VkViewport*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport))));
|
||||
} break;
|
||||
|
||||
default:
|
||||
assert_unhandled_case(header.command);
|
||||
break;
|
||||
}
|
||||
|
||||
stream += header.arguments_size_elements;
|
||||
stream_remaining -= header.arguments_size_elements;
|
||||
}
|
||||
}
|
||||
|
||||
void DeferredCommandBuffer::CmdVkPipelineBarrier(
|
||||
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
|
||||
VkDependencyFlags dependency_flags, uint32_t memory_barrier_count,
|
||||
const VkMemoryBarrier* memory_barriers,
|
||||
uint32_t buffer_memory_barrier_count,
|
||||
const VkBufferMemoryBarrier* buffer_memory_barriers,
|
||||
uint32_t image_memory_barrier_count,
|
||||
const VkImageMemoryBarrier* image_memory_barriers) {
|
||||
size_t arguments_size = sizeof(ArgsVkPipelineBarrier);
|
||||
size_t memory_barriers_offset = 0;
|
||||
if (memory_barrier_count) {
|
||||
arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier));
|
||||
memory_barriers_offset = arguments_size;
|
||||
arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count;
|
||||
}
|
||||
size_t buffer_memory_barriers_offset = 0;
|
||||
if (buffer_memory_barrier_count) {
|
||||
arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier));
|
||||
buffer_memory_barriers_offset = arguments_size;
|
||||
arguments_size +=
|
||||
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count;
|
||||
}
|
||||
size_t image_memory_barriers_offset = 0;
|
||||
if (image_memory_barrier_count) {
|
||||
arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier));
|
||||
image_memory_barriers_offset = arguments_size;
|
||||
arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count;
|
||||
}
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkPipelineBarrier, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkPipelineBarrier*>(args_ptr);
|
||||
args.src_stage_mask = src_stage_mask;
|
||||
args.dst_stage_mask = dst_stage_mask;
|
||||
args.dependency_flags = dependency_flags;
|
||||
args.memory_barrier_count = memory_barrier_count;
|
||||
args.buffer_memory_barrier_count = buffer_memory_barrier_count;
|
||||
args.image_memory_barrier_count = image_memory_barrier_count;
|
||||
if (memory_barrier_count) {
|
||||
std::memcpy(args_ptr + memory_barriers_offset, memory_barriers,
|
||||
sizeof(VkMemoryBarrier) * memory_barrier_count);
|
||||
}
|
||||
if (buffer_memory_barrier_count) {
|
||||
std::memcpy(args_ptr + buffer_memory_barriers_offset,
|
||||
buffer_memory_barriers,
|
||||
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count);
|
||||
}
|
||||
if (image_memory_barrier_count) {
|
||||
std::memcpy(args_ptr + image_memory_barriers_offset, image_memory_barriers,
|
||||
sizeof(VkImageMemoryBarrier) * image_memory_barrier_count);
|
||||
}
|
||||
}
|
||||
|
||||
void* DeferredCommandBuffer::WriteCommand(Command command,
|
||||
size_t arguments_size_bytes) {
|
||||
size_t arguments_size_elements =
|
||||
(arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
|
||||
size_t offset = command_stream_.size();
|
||||
command_stream_.resize(offset + kCommandHeaderSizeElements +
|
||||
arguments_size_elements);
|
||||
CommandHeader& header =
|
||||
*reinterpret_cast<CommandHeader*>(command_stream_.data() + offset);
|
||||
header.command = command;
|
||||
header.arguments_size_elements = uint32_t(arguments_size_elements);
|
||||
return command_stream_.data() + (offset + kCommandHeaderSizeElements);
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,550 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_
|
||||
#define XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
class DeferredCommandBuffer {
|
||||
public:
|
||||
DeferredCommandBuffer(const VulkanCommandProcessor& command_processor,
|
||||
size_t initial_size_bytes = 1024 * 1024);
|
||||
|
||||
void Reset();
|
||||
void Execute(VkCommandBuffer command_buffer);
|
||||
|
||||
// render_pass_begin->pNext of all barriers must be null.
|
||||
void CmdVkBeginRenderPass(const VkRenderPassBeginInfo* render_pass_begin,
|
||||
VkSubpassContents contents) {
|
||||
assert_null(render_pass_begin->pNext);
|
||||
size_t arguments_size = sizeof(ArgsVkBeginRenderPass);
|
||||
uint32_t clear_value_count = render_pass_begin->clearValueCount;
|
||||
size_t clear_values_offset = 0;
|
||||
if (clear_value_count) {
|
||||
arguments_size = xe::align(arguments_size, alignof(VkClearValue));
|
||||
clear_values_offset = arguments_size;
|
||||
arguments_size += sizeof(VkClearValue) * clear_value_count;
|
||||
}
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkBeginRenderPass, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkBeginRenderPass*>(args_ptr);
|
||||
args.render_pass = render_pass_begin->renderPass;
|
||||
args.framebuffer = render_pass_begin->framebuffer;
|
||||
args.render_area = render_pass_begin->renderArea;
|
||||
args.clear_value_count = clear_value_count;
|
||||
args.contents = contents;
|
||||
if (clear_value_count) {
|
||||
std::memcpy(args_ptr + clear_values_offset,
|
||||
render_pass_begin->pClearValues,
|
||||
sizeof(VkClearValue) * clear_value_count);
|
||||
}
|
||||
}
|
||||
|
||||
void CmdVkBindDescriptorSets(VkPipelineBindPoint pipeline_bind_point,
|
||||
VkPipelineLayout layout, uint32_t first_set,
|
||||
uint32_t descriptor_set_count,
|
||||
const VkDescriptorSet* descriptor_sets,
|
||||
uint32_t dynamic_offset_count,
|
||||
const uint32_t* dynamic_offsets) {
|
||||
size_t arguments_size =
|
||||
xe::align(sizeof(ArgsVkBindDescriptorSets), alignof(VkDescriptorSet));
|
||||
size_t descriptor_sets_offset = arguments_size;
|
||||
arguments_size += sizeof(VkDescriptorSet) * descriptor_set_count;
|
||||
size_t dynamic_offsets_offset = 0;
|
||||
if (dynamic_offset_count) {
|
||||
arguments_size = xe::align(arguments_size, alignof(uint32_t));
|
||||
dynamic_offsets_offset = arguments_size;
|
||||
arguments_size += sizeof(uint32_t) * dynamic_offset_count;
|
||||
}
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkBindDescriptorSets, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkBindDescriptorSets*>(args_ptr);
|
||||
args.pipeline_bind_point = pipeline_bind_point;
|
||||
args.layout = layout;
|
||||
args.first_set = first_set;
|
||||
args.descriptor_set_count = descriptor_set_count;
|
||||
args.dynamic_offset_count = dynamic_offset_count;
|
||||
std::memcpy(args_ptr + descriptor_sets_offset, descriptor_sets,
|
||||
sizeof(VkDescriptorSet) * descriptor_set_count);
|
||||
if (dynamic_offset_count) {
|
||||
std::memcpy(args_ptr + dynamic_offsets_offset, dynamic_offsets,
|
||||
sizeof(uint32_t) * dynamic_offset_count);
|
||||
}
|
||||
}
|
||||
|
||||
void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
|
||||
VkIndexType index_type) {
|
||||
auto& args = *reinterpret_cast<ArgsVkBindIndexBuffer*>(WriteCommand(
|
||||
Command::kVkBindIndexBuffer, sizeof(ArgsVkBindIndexBuffer)));
|
||||
args.buffer = buffer;
|
||||
args.offset = offset;
|
||||
args.index_type = index_type;
|
||||
}
|
||||
|
||||
void CmdVkBindPipeline(VkPipelineBindPoint pipeline_bind_point,
|
||||
VkPipeline pipeline) {
|
||||
auto& args = *reinterpret_cast<ArgsVkBindPipeline*>(
|
||||
WriteCommand(Command::kVkBindPipeline, sizeof(ArgsVkBindPipeline)));
|
||||
args.pipeline_bind_point = pipeline_bind_point;
|
||||
args.pipeline = pipeline;
|
||||
}
|
||||
|
||||
void CmdVkBindVertexBuffers(uint32_t first_binding, uint32_t binding_count,
|
||||
const VkBuffer* buffers,
|
||||
const VkDeviceSize* offsets) {
|
||||
size_t arguments_size =
|
||||
xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer));
|
||||
size_t buffers_offset = arguments_size;
|
||||
arguments_size =
|
||||
xe::align(arguments_size + sizeof(VkBuffer) * binding_count,
|
||||
alignof(VkDeviceSize));
|
||||
size_t offsets_offset = arguments_size;
|
||||
arguments_size += sizeof(VkDeviceSize) * binding_count;
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkBindVertexBuffers, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkBindVertexBuffers*>(args_ptr);
|
||||
args.first_binding = first_binding;
|
||||
args.binding_count = binding_count;
|
||||
std::memcpy(args_ptr + buffers_offset, buffers,
|
||||
sizeof(VkBuffer) * binding_count);
|
||||
std::memcpy(args_ptr + offsets_offset, offsets,
|
||||
sizeof(VkDeviceSize) * binding_count);
|
||||
}
|
||||
|
||||
void CmdClearAttachmentsEmplace(uint32_t attachment_count,
|
||||
VkClearAttachment*& attachments_out,
|
||||
uint32_t rect_count,
|
||||
VkClearRect*& rects_out) {
|
||||
size_t arguments_size =
|
||||
xe::align(sizeof(ArgsVkClearAttachments), alignof(VkClearAttachment));
|
||||
size_t attachments_offset = arguments_size;
|
||||
arguments_size =
|
||||
xe::align(arguments_size + sizeof(VkClearAttachment) * attachment_count,
|
||||
alignof(VkClearRect));
|
||||
size_t rects_offset = arguments_size;
|
||||
arguments_size += sizeof(VkClearRect) * rect_count;
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkClearAttachments, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkClearAttachments*>(args_ptr);
|
||||
args.attachment_count = attachment_count;
|
||||
args.rect_count = rect_count;
|
||||
attachments_out =
|
||||
reinterpret_cast<VkClearAttachment*>(args_ptr + attachments_offset);
|
||||
rects_out = reinterpret_cast<VkClearRect*>(args_ptr + rects_offset);
|
||||
}
|
||||
void CmdVkClearAttachments(uint32_t attachment_count,
|
||||
const VkClearAttachment* attachments,
|
||||
uint32_t rect_count, const VkClearRect* rects) {
|
||||
VkClearAttachment* attachments_arg;
|
||||
VkClearRect* rects_arg;
|
||||
CmdClearAttachmentsEmplace(attachment_count, attachments_arg, rect_count,
|
||||
rects_arg);
|
||||
std::memcpy(attachments_arg, attachments,
|
||||
sizeof(VkClearAttachment) * attachment_count);
|
||||
std::memcpy(rects_arg, rects, sizeof(VkClearRect) * rect_count);
|
||||
}
|
||||
|
||||
VkImageSubresourceRange* CmdClearColorImageEmplace(
|
||||
VkImage image, VkImageLayout image_layout, const VkClearColorValue* color,
|
||||
uint32_t range_count) {
|
||||
const size_t header_size = xe::align(sizeof(ArgsVkClearColorImage),
|
||||
alignof(VkImageSubresourceRange));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(WriteCommand(
|
||||
Command::kVkClearColorImage,
|
||||
header_size + sizeof(VkImageSubresourceRange) * range_count));
|
||||
auto& args = *reinterpret_cast<ArgsVkClearColorImage*>(args_ptr);
|
||||
args.image = image;
|
||||
args.image_layout = image_layout;
|
||||
args.color = *color;
|
||||
args.range_count = range_count;
|
||||
return reinterpret_cast<VkImageSubresourceRange*>(args_ptr + header_size);
|
||||
}
|
||||
void CmdVkClearColorImage(VkImage image, VkImageLayout image_layout,
|
||||
const VkClearColorValue* color,
|
||||
uint32_t range_count,
|
||||
const VkImageSubresourceRange* ranges) {
|
||||
std::memcpy(
|
||||
CmdClearColorImageEmplace(image, image_layout, color, range_count),
|
||||
ranges, sizeof(VkImageSubresourceRange) * range_count);
|
||||
}
|
||||
|
||||
VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
uint32_t region_count) {
|
||||
const size_t header_size =
|
||||
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkCopyBuffer,
|
||||
header_size + sizeof(VkBufferCopy) * region_count));
|
||||
auto& args = *reinterpret_cast<ArgsVkCopyBuffer*>(args_ptr);
|
||||
args.src_buffer = src_buffer;
|
||||
args.dst_buffer = dst_buffer;
|
||||
args.region_count = region_count;
|
||||
return reinterpret_cast<VkBufferCopy*>(args_ptr + header_size);
|
||||
}
|
||||
void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
uint32_t region_count, const VkBufferCopy* regions) {
|
||||
std::memcpy(CmdCopyBufferEmplace(src_buffer, dst_buffer, region_count),
|
||||
regions, sizeof(VkBufferCopy) * region_count);
|
||||
}
|
||||
|
||||
VkBufferImageCopy* CmdCopyBufferToImageEmplace(VkBuffer src_buffer,
|
||||
VkImage dst_image,
|
||||
VkImageLayout dst_image_layout,
|
||||
uint32_t region_count) {
|
||||
const size_t header_size =
|
||||
xe::align(sizeof(ArgsVkCopyBufferToImage), alignof(VkBufferImageCopy));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkCopyBufferToImage,
|
||||
header_size + sizeof(VkBufferImageCopy) * region_count));
|
||||
auto& args = *reinterpret_cast<ArgsVkCopyBufferToImage*>(args_ptr);
|
||||
args.src_buffer = src_buffer;
|
||||
args.dst_image = dst_image;
|
||||
args.dst_image_layout = dst_image_layout;
|
||||
args.region_count = region_count;
|
||||
return reinterpret_cast<VkBufferImageCopy*>(args_ptr + header_size);
|
||||
}
|
||||
void CmdVkCopyBufferToImage(VkBuffer src_buffer, VkImage dst_image,
|
||||
VkImageLayout dst_image_layout,
|
||||
uint32_t region_count,
|
||||
const VkBufferImageCopy* regions) {
|
||||
std::memcpy(CmdCopyBufferToImageEmplace(src_buffer, dst_image,
|
||||
dst_image_layout, region_count),
|
||||
regions, sizeof(VkBufferImageCopy) * region_count);
|
||||
}
|
||||
|
||||
void CmdVkDispatch(uint32_t group_count_x, uint32_t group_count_y,
|
||||
uint32_t group_count_z) {
|
||||
auto& args = *reinterpret_cast<ArgsVkDispatch*>(
|
||||
WriteCommand(Command::kVkDispatch, sizeof(ArgsVkDispatch)));
|
||||
args.group_count_x = group_count_x;
|
||||
args.group_count_y = group_count_y;
|
||||
args.group_count_z = group_count_z;
|
||||
}
|
||||
|
||||
void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count,
|
||||
uint32_t first_vertex, uint32_t first_instance) {
|
||||
auto& args = *reinterpret_cast<ArgsVkDraw*>(
|
||||
WriteCommand(Command::kVkDraw, sizeof(ArgsVkDraw)));
|
||||
args.vertex_count = vertex_count;
|
||||
args.instance_count = instance_count;
|
||||
args.first_vertex = first_vertex;
|
||||
args.first_instance = first_instance;
|
||||
}
|
||||
|
||||
void CmdVkDrawIndexed(uint32_t index_count, uint32_t instance_count,
|
||||
uint32_t first_index, int32_t vertex_offset,
|
||||
uint32_t first_instance) {
|
||||
auto& args = *reinterpret_cast<ArgsVkDrawIndexed*>(
|
||||
WriteCommand(Command::kVkDrawIndexed, sizeof(ArgsVkDrawIndexed)));
|
||||
args.index_count = index_count;
|
||||
args.instance_count = instance_count;
|
||||
args.first_index = first_index;
|
||||
args.vertex_offset = vertex_offset;
|
||||
args.first_instance = first_instance;
|
||||
}
|
||||
|
||||
void CmdVkEndRenderPass() { WriteCommand(Command::kVkEndRenderPass, 0); }
|
||||
|
||||
// pNext of all barriers must be null.
|
||||
void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask,
|
||||
VkPipelineStageFlags dst_stage_mask,
|
||||
VkDependencyFlags dependency_flags,
|
||||
uint32_t memory_barrier_count,
|
||||
const VkMemoryBarrier* memory_barriers,
|
||||
uint32_t buffer_memory_barrier_count,
|
||||
const VkBufferMemoryBarrier* buffer_memory_barriers,
|
||||
uint32_t image_memory_barrier_count,
|
||||
const VkImageMemoryBarrier* image_memory_barriers);
|
||||
|
||||
void CmdVkPushConstants(VkPipelineLayout layout,
|
||||
VkShaderStageFlags stage_flags, uint32_t offset,
|
||||
uint32_t size, const void* values) {
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(WriteCommand(
|
||||
Command::kVkPushConstants, sizeof(ArgsVkPushConstants) + size));
|
||||
auto& args = *reinterpret_cast<ArgsVkPushConstants*>(args_ptr);
|
||||
args.layout = layout;
|
||||
args.stage_flags = stage_flags;
|
||||
args.offset = offset;
|
||||
args.size = size;
|
||||
std::memcpy(args_ptr + sizeof(ArgsVkPushConstants), values, size);
|
||||
}
|
||||
|
||||
void CmdVkSetBlendConstants(const float* blend_constants) {
|
||||
auto& args = *reinterpret_cast<ArgsVkSetBlendConstants*>(WriteCommand(
|
||||
Command::kVkSetBlendConstants, sizeof(ArgsVkSetBlendConstants)));
|
||||
std::memcpy(args.blend_constants, blend_constants, sizeof(float) * 4);
|
||||
}
|
||||
|
||||
void CmdVkSetDepthBias(float depth_bias_constant_factor,
|
||||
float depth_bias_clamp,
|
||||
float depth_bias_slope_factor) {
|
||||
auto& args = *reinterpret_cast<ArgsVkSetDepthBias*>(
|
||||
WriteCommand(Command::kVkSetDepthBias, sizeof(ArgsVkSetDepthBias)));
|
||||
args.depth_bias_constant_factor = depth_bias_constant_factor;
|
||||
args.depth_bias_clamp = depth_bias_clamp;
|
||||
args.depth_bias_slope_factor = depth_bias_slope_factor;
|
||||
}
|
||||
|
||||
void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count,
|
||||
const VkRect2D* scissors) {
|
||||
const size_t header_size =
|
||||
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkSetScissor,
|
||||
header_size + sizeof(VkRect2D) * scissor_count));
|
||||
auto& args = *reinterpret_cast<ArgsVkSetScissor*>(args_ptr);
|
||||
args.first_scissor = first_scissor;
|
||||
args.scissor_count = scissor_count;
|
||||
std::memcpy(args_ptr + header_size, scissors,
|
||||
sizeof(VkRect2D) * scissor_count);
|
||||
}
|
||||
|
||||
void CmdVkSetStencilCompareMask(VkStencilFaceFlags face_mask,
|
||||
uint32_t compare_mask) {
|
||||
auto& args = *reinterpret_cast<ArgsSetStencilMaskReference*>(
|
||||
WriteCommand(Command::kVkSetStencilCompareMask,
|
||||
sizeof(ArgsSetStencilMaskReference)));
|
||||
args.face_mask = face_mask;
|
||||
args.mask_reference = compare_mask;
|
||||
}
|
||||
|
||||
void CmdVkSetStencilReference(VkStencilFaceFlags face_mask,
|
||||
uint32_t reference) {
|
||||
auto& args = *reinterpret_cast<ArgsSetStencilMaskReference*>(WriteCommand(
|
||||
Command::kVkSetStencilReference, sizeof(ArgsSetStencilMaskReference)));
|
||||
args.face_mask = face_mask;
|
||||
args.mask_reference = reference;
|
||||
}
|
||||
|
||||
void CmdVkSetStencilWriteMask(VkStencilFaceFlags face_mask,
|
||||
uint32_t write_mask) {
|
||||
auto& args = *reinterpret_cast<ArgsSetStencilMaskReference*>(WriteCommand(
|
||||
Command::kVkSetStencilWriteMask, sizeof(ArgsSetStencilMaskReference)));
|
||||
args.face_mask = face_mask;
|
||||
args.mask_reference = write_mask;
|
||||
}
|
||||
|
||||
void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count,
|
||||
const VkViewport* viewports) {
|
||||
const size_t header_size =
|
||||
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkSetViewport,
|
||||
header_size + sizeof(VkViewport) * viewport_count));
|
||||
auto& args = *reinterpret_cast<ArgsVkSetViewport*>(args_ptr);
|
||||
args.first_viewport = first_viewport;
|
||||
args.viewport_count = viewport_count;
|
||||
std::memcpy(args_ptr + header_size, viewports,
|
||||
sizeof(VkViewport) * viewport_count);
|
||||
}
|
||||
|
||||
private:
|
||||
enum class Command {
|
||||
kVkBeginRenderPass,
|
||||
kVkBindDescriptorSets,
|
||||
kVkBindIndexBuffer,
|
||||
kVkBindPipeline,
|
||||
kVkBindVertexBuffers,
|
||||
kVkClearAttachments,
|
||||
kVkClearColorImage,
|
||||
kVkCopyBuffer,
|
||||
kVkCopyBufferToImage,
|
||||
kVkDispatch,
|
||||
kVkDraw,
|
||||
kVkDrawIndexed,
|
||||
kVkEndRenderPass,
|
||||
kVkPipelineBarrier,
|
||||
kVkPushConstants,
|
||||
kVkSetBlendConstants,
|
||||
kVkSetDepthBias,
|
||||
kVkSetScissor,
|
||||
kVkSetStencilCompareMask,
|
||||
kVkSetStencilReference,
|
||||
kVkSetStencilWriteMask,
|
||||
kVkSetViewport,
|
||||
};
|
||||
|
||||
struct CommandHeader {
|
||||
Command command;
|
||||
uint32_t arguments_size_elements;
|
||||
};
|
||||
static constexpr size_t kCommandHeaderSizeElements =
|
||||
(sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
|
||||
|
||||
struct ArgsVkBeginRenderPass {
|
||||
VkRenderPass render_pass;
|
||||
VkFramebuffer framebuffer;
|
||||
VkRect2D render_area;
|
||||
uint32_t clear_value_count;
|
||||
VkSubpassContents contents;
|
||||
// Followed by aligned optional VkClearValue[].
|
||||
static_assert(alignof(VkClearValue) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkBindDescriptorSets {
|
||||
VkPipelineBindPoint pipeline_bind_point;
|
||||
VkPipelineLayout layout;
|
||||
uint32_t first_set;
|
||||
uint32_t descriptor_set_count;
|
||||
uint32_t dynamic_offset_count;
|
||||
// Followed by aligned VkDescriptorSet[], optional uint32_t[].
|
||||
static_assert(alignof(VkDescriptorSet) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkBindIndexBuffer {
|
||||
VkBuffer buffer;
|
||||
VkDeviceSize offset;
|
||||
VkIndexType index_type;
|
||||
};
|
||||
|
||||
struct ArgsVkBindPipeline {
|
||||
VkPipelineBindPoint pipeline_bind_point;
|
||||
VkPipeline pipeline;
|
||||
};
|
||||
|
||||
struct ArgsVkBindVertexBuffers {
|
||||
uint32_t first_binding;
|
||||
uint32_t binding_count;
|
||||
// Followed by aligned VkBuffer[], VkDeviceSize[].
|
||||
static_assert(alignof(VkBuffer) <= alignof(uintmax_t));
|
||||
static_assert(alignof(VkDeviceSize) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkClearAttachments {
|
||||
uint32_t attachment_count;
|
||||
uint32_t rect_count;
|
||||
// Followed by aligned VkClearAttachment[], VkClearRect[].
|
||||
static_assert(alignof(VkClearAttachment) <= alignof(uintmax_t));
|
||||
static_assert(alignof(VkClearRect) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkClearColorImage {
|
||||
VkImage image;
|
||||
VkImageLayout image_layout;
|
||||
VkClearColorValue color;
|
||||
uint32_t range_count;
|
||||
// Followed by aligned VkImageSubresourceRange[].
|
||||
static_assert(alignof(VkImageSubresourceRange) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkCopyBuffer {
|
||||
VkBuffer src_buffer;
|
||||
VkBuffer dst_buffer;
|
||||
uint32_t region_count;
|
||||
// Followed by aligned VkBufferCopy[].
|
||||
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkCopyBufferToImage {
|
||||
VkBuffer src_buffer;
|
||||
VkImage dst_image;
|
||||
VkImageLayout dst_image_layout;
|
||||
uint32_t region_count;
|
||||
// Followed by aligned VkBufferImageCopy[].
|
||||
static_assert(alignof(VkBufferImageCopy) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkDispatch {
|
||||
uint32_t group_count_x;
|
||||
uint32_t group_count_y;
|
||||
uint32_t group_count_z;
|
||||
};
|
||||
|
||||
struct ArgsVkDraw {
|
||||
uint32_t vertex_count;
|
||||
uint32_t instance_count;
|
||||
uint32_t first_vertex;
|
||||
uint32_t first_instance;
|
||||
};
|
||||
|
||||
struct ArgsVkDrawIndexed {
|
||||
uint32_t index_count;
|
||||
uint32_t instance_count;
|
||||
uint32_t first_index;
|
||||
int32_t vertex_offset;
|
||||
uint32_t first_instance;
|
||||
};
|
||||
|
||||
struct ArgsVkPipelineBarrier {
|
||||
VkPipelineStageFlags src_stage_mask;
|
||||
VkPipelineStageFlags dst_stage_mask;
|
||||
VkDependencyFlags dependency_flags;
|
||||
uint32_t memory_barrier_count;
|
||||
uint32_t buffer_memory_barrier_count;
|
||||
uint32_t image_memory_barrier_count;
|
||||
// Followed by aligned optional VkMemoryBarrier[],
|
||||
// optional VkBufferMemoryBarrier[], optional VkImageMemoryBarrier[].
|
||||
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
|
||||
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
|
||||
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkPushConstants {
|
||||
VkPipelineLayout layout;
|
||||
VkShaderStageFlags stage_flags;
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
// Followed by `size` bytes of values.
|
||||
};
|
||||
|
||||
struct ArgsVkSetBlendConstants {
|
||||
float blend_constants[4];
|
||||
};
|
||||
|
||||
struct ArgsVkSetDepthBias {
|
||||
float depth_bias_constant_factor;
|
||||
float depth_bias_clamp;
|
||||
float depth_bias_slope_factor;
|
||||
};
|
||||
|
||||
struct ArgsVkSetScissor {
|
||||
uint32_t first_scissor;
|
||||
uint32_t scissor_count;
|
||||
// Followed by aligned VkRect2D[].
|
||||
static_assert(alignof(VkRect2D) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsSetStencilMaskReference {
|
||||
VkStencilFaceFlags face_mask;
|
||||
uint32_t mask_reference;
|
||||
};
|
||||
|
||||
struct ArgsVkSetViewport {
|
||||
uint32_t first_viewport;
|
||||
uint32_t viewport_count;
|
||||
// Followed by aligned VkViewport[].
|
||||
static_assert(alignof(VkViewport) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
void* WriteCommand(Command command, size_t arguments_size_bytes);
|
||||
|
||||
const VulkanCommandProcessor& command_processor_;
|
||||
|
||||
// uintmax_t to ensure uint64_t and pointer alignment of all structures.
|
||||
std::vector<uintmax_t> command_stream_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_
|
|
@ -8,10 +8,10 @@ project("xenia-gpu-vulkan")
|
|||
language("C++")
|
||||
links({
|
||||
"fmt",
|
||||
"glslang-spirv",
|
||||
"xenia-base",
|
||||
"xenia-gpu",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
"xenia-ui-vulkan",
|
||||
"xxhash",
|
||||
})
|
||||
|
@ -20,10 +20,9 @@ project("xenia-gpu-vulkan")
|
|||
})
|
||||
local_platform_files()
|
||||
files({
|
||||
"shaders/bytecode/vulkan_spirv/*.h",
|
||||
"../shaders/bytecode/vulkan_spirv/*.h",
|
||||
})
|
||||
|
||||
-- TODO(benvanik): kill this and move to the debugger UI.
|
||||
group("src")
|
||||
project("xenia-gpu-vulkan-trace-viewer")
|
||||
uuid("86a1dddc-a26a-4885-8c55-cf745225d93e")
|
||||
|
@ -43,7 +42,6 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
"xenia-kernel",
|
||||
"xenia-patcher",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
"xenia-ui-vulkan",
|
||||
"xenia-vfs",
|
||||
"xenia-patcher",
|
||||
|
@ -58,7 +56,6 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"xxhash",
|
||||
})
|
||||
includedirs({
|
||||
|
@ -77,12 +74,6 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
links({
|
||||
"xenia-apu-xaudio2",
|
||||
"xenia-hid-winkey",
|
||||
"xenia-hid-xinput",
|
||||
})
|
||||
|
||||
-- Only create the .user file if it doesn't already exist.
|
||||
local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user"
|
||||
if not os.isfile(user_file) then
|
||||
|
@ -111,7 +102,6 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
"xenia-hid-nop",
|
||||
"xenia-kernel",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
"xenia-ui-vulkan",
|
||||
"xenia-vfs",
|
||||
"xenia-patcher",
|
||||
|
@ -126,7 +116,6 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"xxhash",
|
||||
})
|
||||
includedirs({
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,406 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
||||
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
// TODO(benvanik): make public API?
|
||||
class CachedTileView;
|
||||
class CachedFramebuffer;
|
||||
class CachedRenderPass;
|
||||
|
||||
// Uniquely identifies EDRAM tiles.
|
||||
struct TileViewKey {
|
||||
// Offset into EDRAM in 5120b tiles.
|
||||
uint16_t tile_offset;
|
||||
// Tile width of the view in base 80x16 tiles.
|
||||
uint16_t tile_width;
|
||||
// Tile height of the view in base 80x16 tiles.
|
||||
uint16_t tile_height;
|
||||
// 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
|
||||
uint16_t color_or_depth : 1;
|
||||
// Surface MSAA samples
|
||||
uint16_t msaa_samples : 2;
|
||||
// Either ColorRenderTargetFormat or DepthRenderTargetFormat.
|
||||
uint16_t edram_format : 13;
|
||||
};
|
||||
static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
|
||||
|
||||
// Cached view representing EDRAM memory.
|
||||
// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
|
||||
// formats?
|
||||
class CachedTileView {
|
||||
public:
|
||||
// Key identifying the view in the cache.
|
||||
TileViewKey key;
|
||||
// Image
|
||||
VkImage image = nullptr;
|
||||
// Simple view on the image matching the format.
|
||||
VkImageView image_view = nullptr;
|
||||
// Image layout
|
||||
VkImageLayout image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
// Memory buffer
|
||||
VkDeviceMemory memory = nullptr;
|
||||
// Image sample count
|
||||
VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
||||
// (if a depth view) Image view of depth aspect
|
||||
VkImageView image_view_depth = nullptr;
|
||||
// (if a depth view) Image view of stencil aspect
|
||||
VkImageView image_view_stencil = nullptr;
|
||||
|
||||
CachedTileView(const ui::vulkan::VulkanProvider& provider,
|
||||
VkDeviceMemory edram_memory, TileViewKey view_key);
|
||||
~CachedTileView();
|
||||
|
||||
VkResult Initialize(VkCommandBuffer command_buffer);
|
||||
|
||||
bool IsEqual(const TileViewKey& other_key) const {
|
||||
auto a = reinterpret_cast<const uint64_t*>(&key);
|
||||
auto b = reinterpret_cast<const uint64_t*>(&other_key);
|
||||
return *a == *b;
|
||||
}
|
||||
|
||||
bool operator<(const CachedTileView& other) const {
|
||||
return key.tile_offset < other.key.tile_offset;
|
||||
}
|
||||
|
||||
VkExtent2D GetSize() const {
|
||||
return {key.tile_width * 80u, key.tile_height * 16u};
|
||||
}
|
||||
|
||||
private:
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
};
|
||||
|
||||
// Parsed render configuration from the current render state.
|
||||
struct RenderConfiguration {
|
||||
// Render mode (color+depth, depth-only, etc).
|
||||
xenos::ModeControl mode_control;
|
||||
// Target surface pitch multiplied by MSAA, in pixels.
|
||||
uint32_t surface_pitch_px;
|
||||
// ESTIMATED target surface height multiplied by MSAA, in pixels.
|
||||
uint32_t surface_height_px;
|
||||
// Surface MSAA setting.
|
||||
xenos::MsaaSamples surface_msaa;
|
||||
// Color attachments for the 4 render targets.
|
||||
struct {
|
||||
bool used;
|
||||
uint32_t edram_base;
|
||||
xenos::ColorRenderTargetFormat format;
|
||||
} color[4];
|
||||
// Depth/stencil attachment.
|
||||
struct {
|
||||
bool used;
|
||||
uint32_t edram_base;
|
||||
xenos::DepthRenderTargetFormat format;
|
||||
} depth_stencil;
|
||||
};
|
||||
|
||||
// Current render state based on the register-specified configuration.
|
||||
struct RenderState {
|
||||
// Parsed configuration.
|
||||
RenderConfiguration config;
|
||||
// Render pass (to be used with pipelines/etc).
|
||||
CachedRenderPass* render_pass = nullptr;
|
||||
VkRenderPass render_pass_handle = nullptr;
|
||||
// Target framebuffer bound to the render pass.
|
||||
CachedFramebuffer* framebuffer = nullptr;
|
||||
VkFramebuffer framebuffer_handle = nullptr;
|
||||
|
||||
bool color_attachment_written[4] = {false};
|
||||
bool depth_attachment_written = false;
|
||||
};
|
||||
|
||||
// Manages the virtualized EDRAM and the render target cache.
|
||||
//
|
||||
// On the 360 the render target is an opaque block of memory in EDRAM that's
|
||||
// only accessible via resolves. We use this to our advantage to simulate
|
||||
// something like it as best we can by having a shared backing memory with
|
||||
// a multitude of views for each tile location in EDRAM.
|
||||
//
|
||||
// This allows us to have the same base address write to the same memory
|
||||
// regardless of framebuffer format. Resolving then uses whatever format the
|
||||
// resolve requests straight from the backing memory.
|
||||
//
|
||||
// EDRAM is a beast and we only approximate it as best we can. Basically,
|
||||
// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px.
|
||||
// +-----+-----+-----+---
|
||||
// |tile0|tile1|tile2|... 2048 times
|
||||
// +-----+-----+-----+---
|
||||
// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile
|
||||
// offset 256, 256*5120=1310720b into the buffer. All rendering operations are
|
||||
// aligned to tiles so trying to draw at 256px wide will have a real width of
|
||||
// 320px by rounding up to the next tile.
|
||||
//
|
||||
// MSAA and other settings will modify the exact pixel sizes, like 4X makes
|
||||
// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still
|
||||
// all 5120b. As we try to emulate this we adjust our viewport when rendering to
|
||||
// stretch pixels as needed.
|
||||
//
|
||||
// It appears that games also take advantage of MSAA stretching tiles when doing
|
||||
// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then
|
||||
// later draw to that view with 1X pitch/height and 1X MSAA.
|
||||
//
|
||||
// The good news is that games cannot read EDRAM directly but must use a copy
|
||||
// operation to get the data out. That gives us a chance to do whatever we
|
||||
// need to (re-tile, etc) only when requested.
|
||||
//
|
||||
// To approximate the tiled EDRAM layout we use a single large chunk of memory.
|
||||
// From this memory we create many VkImages (and VkImageViews) of various
|
||||
// formats and dimensions as requested by the game. These are used as
|
||||
// attachments during rendering and as sources during copies. They are also
|
||||
// heavily aliased - lots of images will reference the same locations in the
|
||||
// underlying EDRAM buffer. The only requirement is that there are no hazards
|
||||
// with specific tiles (reading/writing the same tile through different images)
|
||||
// and otherwise it should be ok *fingers crossed*.
|
||||
//
|
||||
// One complication is the copy/resolve process itself: we need to give back
|
||||
// the data asked for in the format desired and where it goes is arbitrary
|
||||
// (any address in physical memory). If the game is good we get resolves of
|
||||
// EDRAM into fixed base addresses with scissored regions. If the game is bad
|
||||
// we are broken.
|
||||
//
|
||||
// Resolves from EDRAM result in tiled textures - that's texture tiles, not
|
||||
// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to
|
||||
// then tile the images as we wrote them out. For now, we just attempt to
|
||||
// get the (X, Y) in linear space and do that. This really comes into play
|
||||
// when multiple resolves write to the same texture or memory aliased by
|
||||
// multiple textures - which is common due to predicated tiling. The examples
|
||||
// below demonstrate what this looks like, but the important thing is that
|
||||
// we are aware of partial textures and overlapping regions.
|
||||
//
|
||||
// TODO(benvanik): what, if any, barriers do we need? any transitions?
|
||||
//
|
||||
// Example with multiple render targets:
|
||||
// Two color targets of 256x256px tightly packed in EDRAM:
|
||||
// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256
|
||||
// starts at tile 0, buffer offset 0
|
||||
// contains 64 tiles (320/80)*(256/16)
|
||||
// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256
|
||||
// starts at tile 64 (after color target 0), buffer offset 327680b
|
||||
// contains 64 tiles
|
||||
// In EDRAM each set of 64 tiles is contiguous:
|
||||
// +------+------+ +------+------+------+
|
||||
// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |...
|
||||
// +------+------+ +------+------+------+
|
||||
// To render into these, we setup two VkImages:
|
||||
// image 0: bound to buffer offset 0, 320x256x4=327680b
|
||||
// image 1: bound to buffer offset 327680b, 320x256x4=327680b
|
||||
// So when we render to them:
|
||||
// +------+-+ scissored to 256x256, actually 320x256
|
||||
// | . | | <- . appears at some untiled offset in the buffer, but
|
||||
// | | | consistent if aliased with the same format
|
||||
// +------+-+
|
||||
// In theory, this gives us proper aliasing in most cases.
|
||||
//
|
||||
// Example with horizontal predicated tiling:
|
||||
// Trying to render 1024x576 @4X MSAA, splitting into two regions
|
||||
// horizontally:
|
||||
// +----------+
|
||||
// | 1024x288 |
|
||||
// +----------+
|
||||
// | 1024x288 |
|
||||
// +----------+
|
||||
// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA):
|
||||
// color target 0: base 0x0, pitch 1080, 26x36 tiles
|
||||
// First render (top):
|
||||
// window offset 0,0
|
||||
// scissor 0,0, 1024x288
|
||||
// First resolve (top):
|
||||
// RB_COPY_DEST_BASE 0x1F45D000
|
||||
// RB_COPY_DEST_PITCH pitch=1024, height=576
|
||||
// vertices: 0,0, 1024,0, 1024,288
|
||||
// Second render (bottom):
|
||||
// window offset 0,-288
|
||||
// scissor 0,288, 1024x288
|
||||
// Second resolve (bottom):
|
||||
// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b)
|
||||
// RB_COPY_DEST_PITCH pitch=1024, height=576
|
||||
// (exactly 1024x288*4b after first resolve)
|
||||
// vertices: 0,288, 1024,288, 1024,576
|
||||
// Resolving here is easy as the textures are contiguous in memory. We can
|
||||
// snoop in the first resolve with the dest height to know the total size,
|
||||
// and in the second resolve see that it overlaps and place it in the
|
||||
// existing target.
|
||||
//
|
||||
// Example with vertical predicated tiling:
|
||||
// Trying to render 1280x720 @2X MSAA, splitting into two regions
|
||||
// vertically:
|
||||
// +-----+-----+
|
||||
// | 640 | 640 |
|
||||
// | x | x |
|
||||
// | 720 | 720 |
|
||||
// +-----+-----+
|
||||
// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA):
|
||||
// color target 0: base 0x0, pitch 640, 8x92 tiles
|
||||
// First render (left):
|
||||
// window offset 0,0
|
||||
// scissor 0,0, 640x720
|
||||
// First resolve (left):
|
||||
// RB_COPY_DEST_BASE 0x1BC6D000
|
||||
// RB_COPY_DEST_PITCH pitch=1280, height=720
|
||||
// vertices: 0,0, 640,0, 640,720
|
||||
// Second render (right):
|
||||
// window offset -640,0
|
||||
// scissor 640,0, 640x720
|
||||
// Second resolve (right):
|
||||
// RB_COPY_DEST_BASE 0x1BC81000 (+81920b)
|
||||
// RB_COPY_DEST_PITCH pitch=1280, height=720
|
||||
// vertices: 640,0, 1280,0, 1280,720
|
||||
// Resolving here is much more difficult as resolves are tiled and the right
|
||||
// half of the texture is 81920b away:
|
||||
// 81920/4bpp=20480px, /32 (texture tile size)=640px
|
||||
// We know the texture size with the first resolve and with the second we
|
||||
// must check for overlap then compute the offset (in both X and Y).
|
||||
class RenderCache {
|
||||
public:
|
||||
RenderCache(RegisterFile* register_file,
|
||||
const ui::vulkan::VulkanProvider& provider);
|
||||
~RenderCache();
|
||||
|
||||
VkResult Initialize();
|
||||
void Shutdown();
|
||||
|
||||
// Call this to determine if you should start a new render pass or continue
|
||||
// with an already open pass.
|
||||
bool dirty() const;
|
||||
|
||||
CachedTileView* FindTileView(uint32_t base, uint32_t pitch,
|
||||
xenos::MsaaSamples samples, bool color_or_depth,
|
||||
uint32_t format);
|
||||
|
||||
// Begins a render pass targeting the state-specified framebuffer formats.
|
||||
// The command buffer will be transitioned into the render pass phase.
|
||||
const RenderState* BeginRenderPass(VkCommandBuffer command_buffer,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader);
|
||||
|
||||
// Ends the current render pass.
|
||||
// The command buffer will be transitioned out of the render pass phase.
|
||||
void EndRenderPass();
|
||||
|
||||
// Clears all cached content.
|
||||
void ClearCache();
|
||||
|
||||
// Queues commands to copy EDRAM contents into an image.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
|
||||
VkImage image, VkImageLayout image_layout,
|
||||
bool color_or_depth, VkOffset3D offset,
|
||||
VkExtent3D extents);
|
||||
|
||||
// Queues commands to blit EDRAM contents into an image.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
|
||||
uint32_t pitch, uint32_t height,
|
||||
xenos::MsaaSamples num_samples, VkImage image,
|
||||
VkImageLayout image_layout, bool color_or_depth,
|
||||
uint32_t format, VkFilter filter, VkOffset3D offset,
|
||||
VkExtent3D extents);
|
||||
|
||||
// Queues commands to clear EDRAM contents with a solid color.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base,
|
||||
xenos::ColorRenderTargetFormat format, uint32_t pitch,
|
||||
uint32_t height, xenos::MsaaSamples num_samples,
|
||||
float* color);
|
||||
// Queues commands to clear EDRAM contents with depth/stencil values.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
|
||||
uint32_t edram_base,
|
||||
xenos::DepthRenderTargetFormat format,
|
||||
uint32_t pitch, uint32_t height,
|
||||
xenos::MsaaSamples num_samples, float depth,
|
||||
uint32_t stencil);
|
||||
// Queues commands to fill EDRAM contents with a constant value.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value);
|
||||
|
||||
private:
|
||||
// Parses the current state into a configuration object.
|
||||
bool ParseConfiguration(RenderConfiguration* config);
|
||||
|
||||
// Finds a tile view. Returns nullptr if none found matching the key.
|
||||
CachedTileView* FindTileView(const TileViewKey& view_key) const;
|
||||
|
||||
// Gets or creates a tile view with the given parameters.
|
||||
CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer,
|
||||
const TileViewKey& view_key);
|
||||
|
||||
void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view,
|
||||
bool load, bool insert_barrier = true);
|
||||
|
||||
// Gets or creates a render pass and frame buffer for the given configuration.
|
||||
// This attempts to reuse as much as possible across render passes and
|
||||
// framebuffers.
|
||||
bool ConfigureRenderPass(VkCommandBuffer command_buffer,
|
||||
RenderConfiguration* config,
|
||||
CachedRenderPass** out_render_pass,
|
||||
CachedFramebuffer** out_framebuffer);
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
|
||||
// Entire 10MiB of EDRAM.
|
||||
VkDeviceMemory edram_memory_ = nullptr;
|
||||
// Buffer overlayed 1:1 with edram_memory_ to allow raw access.
|
||||
VkBuffer edram_buffer_ = nullptr;
|
||||
|
||||
// Cache of VkImage and VkImageView's for all of our EDRAM tilings.
|
||||
// TODO(benvanik): non-linear lookup? Should only be a small number of these.
|
||||
std::vector<CachedTileView*> cached_tile_views_;
|
||||
|
||||
// Cache of render passes based on formats.
|
||||
std::vector<CachedRenderPass*> cached_render_passes_;
|
||||
|
||||
// Shadows of the registers that impact the render pass we choose.
|
||||
// If the registers don't change between passes we can quickly reuse the
|
||||
// previous one.
|
||||
struct ShadowRegisters {
|
||||
reg::RB_MODECONTROL rb_modecontrol;
|
||||
reg::RB_SURFACE_INFO rb_surface_info;
|
||||
reg::RB_COLOR_INFO rb_color_info;
|
||||
reg::RB_COLOR_INFO rb_color1_info;
|
||||
reg::RB_COLOR_INFO rb_color2_info;
|
||||
reg::RB_COLOR_INFO rb_color3_info;
|
||||
reg::RB_DEPTH_INFO rb_depth_info;
|
||||
uint32_t pa_sc_window_scissor_tl;
|
||||
uint32_t pa_sc_window_scissor_br;
|
||||
|
||||
ShadowRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} shadow_registers_;
|
||||
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||
|
||||
// Configuration used for the current/previous Begin/End, representing the
|
||||
// current shadow register state.
|
||||
RenderState current_state_;
|
||||
|
||||
// Only valid during a BeginRenderPass/EndRenderPass block.
|
||||
VkCommandBuffer current_command_buffer_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
|
@ -1,2 +0,0 @@
|
|||
DisableFormat: true
|
||||
SortIncludes: false
|
|
@ -1,52 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 16104
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability Sampled1D
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %5663 "main" %3302 %4841
|
||||
OpExecutionMode %5663 OriginUpperLeft
|
||||
OpDecorate %3302 Location 0
|
||||
OpDecorate %4841 Location 0
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
|
||||
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
|
||||
%3302 = OpVariable %_ptr_Input__arr_v4float_uint_16 Input
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
|
||||
%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
|
||||
%4841 = OpVariable %_ptr_Output__arr_v4float_uint_4 Output
|
||||
%5663 = OpFunction %void None %1282
|
||||
%16103 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t dummy_ps[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x00003EE8, 0x00000000, 0x00020011,
|
||||
0x00000001, 0x00020011, 0x0000002B, 0x0006000B, 0x00000001, 0x4C534C47,
|
||||
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
|
||||
0x0007000F, 0x00000004, 0x0000161F, 0x6E69616D, 0x00000000, 0x00000CE6,
|
||||
0x000012E9, 0x00030010, 0x0000161F, 0x00000007, 0x00040047, 0x00000CE6,
|
||||
0x0000001E, 0x00000000, 0x00040047, 0x000012E9, 0x0000001E, 0x00000000,
|
||||
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016,
|
||||
0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004,
|
||||
0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B,
|
||||
0x00000A3A, 0x00000010, 0x0004001C, 0x0000056F, 0x0000001D, 0x00000A3A,
|
||||
0x00040020, 0x000007EC, 0x00000001, 0x0000056F, 0x0004003B, 0x000007EC,
|
||||
0x00000CE6, 0x00000001, 0x0004002B, 0x0000000B, 0x00000A16, 0x00000004,
|
||||
0x0004001C, 0x000005C3, 0x0000001D, 0x00000A16, 0x00040020, 0x00000840,
|
||||
0x00000003, 0x000005C3, 0x0004003B, 0x00000840, 0x000012E9, 0x00000003,
|
||||
0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502, 0x000200F8,
|
||||
0x00003EE7, 0x000100FD, 0x00010038,
|
||||
};
|
|
@ -1,193 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 23916
|
||||
; Schema: 0
|
||||
OpCapability Geometry
|
||||
OpCapability GeometryPointSize
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Geometry %5663 "main" %4930 %5305 %5430 %3302 %4044 %4656 %3736
|
||||
OpExecutionMode %5663 InputLinesAdjacency
|
||||
OpExecutionMode %5663 Invocations 1
|
||||
OpExecutionMode %5663 OutputLineStrip
|
||||
OpExecutionMode %5663 OutputVertices 5
|
||||
OpMemberDecorate %_struct_1032 0 BuiltIn Position
|
||||
OpMemberDecorate %_struct_1032 1 BuiltIn PointSize
|
||||
OpDecorate %_struct_1032 Block
|
||||
OpMemberDecorate %_struct_1033 0 BuiltIn Position
|
||||
OpMemberDecorate %_struct_1033 1 BuiltIn PointSize
|
||||
OpDecorate %_struct_1033 Block
|
||||
OpDecorate %5430 Location 0
|
||||
OpDecorate %3302 Location 0
|
||||
OpDecorate %4044 Location 16
|
||||
OpDecorate %4656 Location 17
|
||||
OpDecorate %3736 Location 16
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_struct_1032 = OpTypeStruct %v4float %float
|
||||
%_ptr_Output__struct_1032 = OpTypePointer Output %_struct_1032
|
||||
%4930 = OpVariable %_ptr_Output__struct_1032 Output
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%_struct_1033 = OpTypeStruct %v4float %float
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%_arr__struct_1033_uint_4 = OpTypeArray %_struct_1033 %uint_4
|
||||
%_ptr_Input__arr__struct_1033_uint_4 = OpTypePointer Input %_arr__struct_1033_uint_4
|
||||
%5305 = OpVariable %_ptr_Input__arr__struct_1033_uint_4 Input
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%_ptr_Output_float = OpTypePointer Output %float
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
|
||||
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
|
||||
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
|
||||
%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4
|
||||
%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4
|
||||
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input
|
||||
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
|
||||
%int_2 = OpConstant %int 2
|
||||
%int_3 = OpConstant %int 3
|
||||
%v2float = OpTypeVector %float 2
|
||||
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
|
||||
%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4
|
||||
%4044 = OpVariable %_ptr_Input__arr_v2float_uint_4 Input
|
||||
%_arr_float_uint_4 = OpTypeArray %float %uint_4
|
||||
%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
|
||||
%4656 = OpVariable %_ptr_Input__arr_float_uint_4 Input
|
||||
%_ptr_Output_v2float = OpTypePointer Output %v2float
|
||||
%3736 = OpVariable %_ptr_Output_v2float Output
|
||||
%5663 = OpFunction %void None %1282
|
||||
%23915 = OpLabel
|
||||
%7129 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0
|
||||
%15646 = OpLoad %v4float %7129
|
||||
%19981 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
|
||||
OpStore %19981 %15646
|
||||
%19905 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1
|
||||
%7391 = OpLoad %float %19905
|
||||
%19982 = OpAccessChain %_ptr_Output_float %4930 %int_1
|
||||
OpStore %19982 %7391
|
||||
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
|
||||
%10874 = OpLoad %_arr_v4float_uint_16 %19848
|
||||
OpStore %5430 %10874
|
||||
OpEmitVertex
|
||||
%22812 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0
|
||||
%11398 = OpLoad %v4float %22812
|
||||
OpStore %19981 %11398
|
||||
%16622 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1
|
||||
%7967 = OpLoad %float %16622
|
||||
OpStore %19982 %7967
|
||||
%16623 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1
|
||||
%10875 = OpLoad %_arr_v4float_uint_16 %16623
|
||||
OpStore %5430 %10875
|
||||
OpEmitVertex
|
||||
%22813 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0
|
||||
%11399 = OpLoad %v4float %22813
|
||||
OpStore %19981 %11399
|
||||
%16624 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1
|
||||
%7968 = OpLoad %float %16624
|
||||
OpStore %19982 %7968
|
||||
%16625 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2
|
||||
%10876 = OpLoad %_arr_v4float_uint_16 %16625
|
||||
OpStore %5430 %10876
|
||||
OpEmitVertex
|
||||
%22814 = OpAccessChain %_ptr_Input_v4float %5305 %int_3 %int_0
|
||||
%11400 = OpLoad %v4float %22814
|
||||
OpStore %19981 %11400
|
||||
%16626 = OpAccessChain %_ptr_Input_float %5305 %int_3 %int_1
|
||||
%7969 = OpLoad %float %16626
|
||||
OpStore %19982 %7969
|
||||
%16627 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_3
|
||||
%10877 = OpLoad %_arr_v4float_uint_16 %16627
|
||||
OpStore %5430 %10877
|
||||
OpEmitVertex
|
||||
OpStore %19981 %15646
|
||||
OpStore %19982 %7391
|
||||
OpStore %5430 %10874
|
||||
OpEmitVertex
|
||||
OpEndPrimitive
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t line_quad_list_gs[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x00005D6C, 0x00000000, 0x00020011,
|
||||
0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47,
|
||||
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
|
||||
0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x00001342,
|
||||
0x000014B9, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98,
|
||||
0x00030010, 0x0000161F, 0x00000015, 0x00040010, 0x0000161F, 0x00000000,
|
||||
0x00000001, 0x00030010, 0x0000161F, 0x0000001C, 0x00040010, 0x0000161F,
|
||||
0x0000001A, 0x00000005, 0x00050048, 0x00000408, 0x00000000, 0x0000000B,
|
||||
0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001,
|
||||
0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000,
|
||||
0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B,
|
||||
0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536,
|
||||
0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000,
|
||||
0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230,
|
||||
0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010,
|
||||
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016,
|
||||
0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004,
|
||||
0x0004001E, 0x00000408, 0x0000001D, 0x0000000D, 0x00040020, 0x00000685,
|
||||
0x00000003, 0x00000408, 0x0004003B, 0x00000685, 0x00001342, 0x00000003,
|
||||
0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004002B, 0x0000000C,
|
||||
0x00000A0B, 0x00000000, 0x0004001E, 0x00000409, 0x0000001D, 0x0000000D,
|
||||
0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B,
|
||||
0x00000A16, 0x00000004, 0x0004001C, 0x0000032E, 0x00000409, 0x00000A16,
|
||||
0x00040020, 0x000005AB, 0x00000001, 0x0000032E, 0x0004003B, 0x000005AB,
|
||||
0x000014B9, 0x00000001, 0x00040020, 0x0000029A, 0x00000001, 0x0000001D,
|
||||
0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x0004002B, 0x0000000C,
|
||||
0x00000A0E, 0x00000001, 0x00040020, 0x0000028A, 0x00000001, 0x0000000D,
|
||||
0x00040020, 0x0000028B, 0x00000003, 0x0000000D, 0x0004002B, 0x0000000B,
|
||||
0x00000A3A, 0x00000010, 0x0004001C, 0x00000473, 0x0000001D, 0x00000A3A,
|
||||
0x00040020, 0x000006F0, 0x00000003, 0x00000473, 0x0004003B, 0x000006F0,
|
||||
0x00001536, 0x00000003, 0x0004001C, 0x00000973, 0x00000473, 0x00000A16,
|
||||
0x00040020, 0x0000002D, 0x00000001, 0x00000973, 0x0004003B, 0x0000002D,
|
||||
0x00000CE6, 0x00000001, 0x00040020, 0x000006F1, 0x00000001, 0x00000473,
|
||||
0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0004002B, 0x0000000C,
|
||||
0x00000A14, 0x00000003, 0x00040017, 0x00000013, 0x0000000D, 0x00000002,
|
||||
0x0004001C, 0x000002A2, 0x00000013, 0x00000A16, 0x00040020, 0x0000051F,
|
||||
0x00000001, 0x000002A2, 0x0004003B, 0x0000051F, 0x00000FCC, 0x00000001,
|
||||
0x0004001C, 0x00000248, 0x0000000D, 0x00000A16, 0x00040020, 0x000004C5,
|
||||
0x00000001, 0x00000248, 0x0004003B, 0x000004C5, 0x00001230, 0x00000001,
|
||||
0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B, 0x00000290,
|
||||
0x00000E98, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000,
|
||||
0x00000502, 0x000200F8, 0x00005D6B, 0x00060041, 0x0000029A, 0x00001BD9,
|
||||
0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00003D1E,
|
||||
0x00001BD9, 0x00050041, 0x0000029B, 0x00004E0D, 0x00001342, 0x00000A0B,
|
||||
0x0003003E, 0x00004E0D, 0x00003D1E, 0x00060041, 0x0000028A, 0x00004DC1,
|
||||
0x000014B9, 0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001CDF,
|
||||
0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0E, 0x00001342, 0x00000A0E,
|
||||
0x0003003E, 0x00004E0E, 0x00001CDF, 0x00050041, 0x000006F1, 0x00004D88,
|
||||
0x00000CE6, 0x00000A0B, 0x0004003D, 0x00000473, 0x00002A7A, 0x00004D88,
|
||||
0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00060041, 0x0000029A,
|
||||
0x0000591C, 0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D, 0x0000001D,
|
||||
0x00002C86, 0x0000591C, 0x0003003E, 0x00004E0D, 0x00002C86, 0x00060041,
|
||||
0x0000028A, 0x000040EE, 0x000014B9, 0x00000A0E, 0x00000A0E, 0x0004003D,
|
||||
0x0000000D, 0x00001F1F, 0x000040EE, 0x0003003E, 0x00004E0E, 0x00001F1F,
|
||||
0x00050041, 0x000006F1, 0x000040EF, 0x00000CE6, 0x00000A0E, 0x0004003D,
|
||||
0x00000473, 0x00002A7B, 0x000040EF, 0x0003003E, 0x00001536, 0x00002A7B,
|
||||
0x000100DA, 0x00060041, 0x0000029A, 0x0000591D, 0x000014B9, 0x00000A11,
|
||||
0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C87, 0x0000591D, 0x0003003E,
|
||||
0x00004E0D, 0x00002C87, 0x00060041, 0x0000028A, 0x000040F0, 0x000014B9,
|
||||
0x00000A11, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F20, 0x000040F0,
|
||||
0x0003003E, 0x00004E0E, 0x00001F20, 0x00050041, 0x000006F1, 0x000040F1,
|
||||
0x00000CE6, 0x00000A11, 0x0004003D, 0x00000473, 0x00002A7C, 0x000040F1,
|
||||
0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x00060041, 0x0000029A,
|
||||
0x0000591E, 0x000014B9, 0x00000A14, 0x00000A0B, 0x0004003D, 0x0000001D,
|
||||
0x00002C88, 0x0000591E, 0x0003003E, 0x00004E0D, 0x00002C88, 0x00060041,
|
||||
0x0000028A, 0x000040F2, 0x000014B9, 0x00000A14, 0x00000A0E, 0x0004003D,
|
||||
0x0000000D, 0x00001F21, 0x000040F2, 0x0003003E, 0x00004E0E, 0x00001F21,
|
||||
0x00050041, 0x000006F1, 0x000040F3, 0x00000CE6, 0x00000A14, 0x0004003D,
|
||||
0x00000473, 0x00002A7D, 0x000040F3, 0x0003003E, 0x00001536, 0x00002A7D,
|
||||
0x000100DA, 0x0003003E, 0x00004E0D, 0x00003D1E, 0x0003003E, 0x00004E0E,
|
||||
0x00001CDF, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x000100DB,
|
||||
0x000100FD, 0x00010038,
|
||||
};
|
|
@ -1,244 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 24916
|
||||
; Schema: 0
|
||||
OpCapability Geometry
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Geometry %5663 "main" %5305 %4065 %4930 %5430 %3302 %5753 %5479
|
||||
OpExecutionMode %5663 InputPoints
|
||||
OpExecutionMode %5663 Invocations 1
|
||||
OpExecutionMode %5663 OutputTriangleStrip
|
||||
OpExecutionMode %5663 OutputVertices 4
|
||||
OpMemberDecorate %_struct_1017 0 BuiltIn Position
|
||||
OpDecorate %_struct_1017 Block
|
||||
OpMemberDecorate %_struct_1287 0 Offset 0
|
||||
OpMemberDecorate %_struct_1287 1 Offset 16
|
||||
OpMemberDecorate %_struct_1287 2 Offset 32
|
||||
OpMemberDecorate %_struct_1287 3 Offset 48
|
||||
OpMemberDecorate %_struct_1287 4 Offset 64
|
||||
OpDecorate %_struct_1287 Block
|
||||
OpDecorate %4065 Location 17
|
||||
OpMemberDecorate %_struct_1018 0 BuiltIn Position
|
||||
OpDecorate %_struct_1018 Block
|
||||
OpDecorate %5430 Location 0
|
||||
OpDecorate %3302 Location 0
|
||||
OpDecorate %5753 Location 16
|
||||
OpDecorate %5479 Location 16
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_struct_1017 = OpTypeStruct %v4float
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_arr__struct_1017_uint_1 = OpTypeArray %_struct_1017 %uint_1
|
||||
%_ptr_Input__arr__struct_1017_uint_1 = OpTypePointer Input %_arr__struct_1017_uint_1
|
||||
%5305 = OpVariable %_ptr_Input__arr__struct_1017_uint_1 Input
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%v2float = OpTypeVector %float 2
|
||||
%_ptr_Function_v2float = OpTypePointer Function %v2float
|
||||
%_struct_1287 = OpTypeStruct %v4float %v4float %v4float %v4float %uint
|
||||
%_ptr_PushConstant__struct_1287 = OpTypePointer PushConstant %_struct_1287
|
||||
%3463 = OpVariable %_ptr_PushConstant__struct_1287 PushConstant
|
||||
%int_2 = OpConstant %int 2
|
||||
%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float
|
||||
%_arr_float_uint_1 = OpTypeArray %float %uint_1
|
||||
%_ptr_Input__arr_float_uint_1 = OpTypePointer Input %_arr_float_uint_1
|
||||
%4065 = OpVariable %_ptr_Input__arr_float_uint_1 Input
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%float_0 = OpConstant %float 0
|
||||
%bool = OpTypeBool
|
||||
%int_4 = OpConstant %int 4
|
||||
%_struct_1018 = OpTypeStruct %v4float
|
||||
%_ptr_Output__struct_1018 = OpTypePointer Output %_struct_1018
|
||||
%4930 = OpVariable %_ptr_Output__struct_1018 Output
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
|
||||
%float_n1 = OpConstant %float -1
|
||||
%float_1 = OpConstant %float 1
|
||||
%73 = OpConstantComposite %v2float %float_n1 %float_1
|
||||
%768 = OpConstantComposite %v2float %float_1 %float_1
|
||||
%74 = OpConstantComposite %v2float %float_n1 %float_n1
|
||||
%769 = OpConstantComposite %v2float %float_1 %float_n1
|
||||
%2941 = OpConstantComposite %_arr_v2float_uint_4 %73 %768 %74 %769
|
||||
%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
|
||||
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
|
||||
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
|
||||
%_arr__arr_v4float_uint_16_uint_1 = OpTypeArray %_arr_v4float_uint_16 %uint_1
|
||||
%_ptr_Input__arr__arr_v4float_uint_16_uint_1 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_1
|
||||
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_1 Input
|
||||
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
|
||||
%_ptr_Output_v2float = OpTypePointer Output %v2float
|
||||
%5753 = OpVariable %_ptr_Output_v2float Output
|
||||
%1823 = OpConstantComposite %v2float %float_0 %float_0
|
||||
%int_1 = OpConstant %int 1
|
||||
%_arr_v2float_uint_1 = OpTypeArray %v2float %uint_1
|
||||
%_ptr_Input__arr_v2float_uint_1 = OpTypePointer Input %_arr_v2float_uint_1
|
||||
%5479 = OpVariable %_ptr_Input__arr_v2float_uint_1 Input
|
||||
%5663 = OpFunction %void None %1282
|
||||
%24915 = OpLabel
|
||||
%18491 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function
|
||||
%5238 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function
|
||||
%22270 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0
|
||||
%8181 = OpLoad %v4float %22270
|
||||
%20420 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_2
|
||||
%20062 = OpLoad %v4float %20420
|
||||
%19110 = OpVectorShuffle %v2float %20062 %20062 0 1
|
||||
%7988 = OpAccessChain %_ptr_Input_float %4065 %int_0
|
||||
%13069 = OpLoad %float %7988
|
||||
%23515 = OpFOrdGreaterThan %bool %13069 %float_0
|
||||
OpSelectionMerge %16839 None
|
||||
OpBranchConditional %23515 %13106 %16839
|
||||
%13106 = OpLabel
|
||||
%18836 = OpCompositeConstruct %v2float %13069 %13069
|
||||
OpBranch %16839
|
||||
%16839 = OpLabel
|
||||
%19748 = OpPhi %v2float %19110 %24915 %18836 %13106
|
||||
%24067 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_0
|
||||
%15439 = OpLoad %v4float %24067
|
||||
%10399 = OpVectorShuffle %v2float %15439 %15439 2 3
|
||||
%24282 = OpFDiv %v2float %19748 %10399
|
||||
OpBranch %6318
|
||||
%6318 = OpLabel
|
||||
%22958 = OpPhi %int %int_0 %16839 %11651 %12148
|
||||
%24788 = OpSLessThan %bool %22958 %int_4
|
||||
OpLoopMerge %12265 %12148 None
|
||||
OpBranchConditional %24788 %12148 %12265
|
||||
%12148 = OpLabel
|
||||
%17761 = OpVectorShuffle %v2float %8181 %8181 0 1
|
||||
OpStore %18491 %2941
|
||||
%19574 = OpAccessChain %_ptr_Function_v2float %18491 %22958
|
||||
%15971 = OpLoad %v2float %19574
|
||||
%17243 = OpFMul %v2float %15971 %24282
|
||||
%16594 = OpFAdd %v2float %17761 %17243
|
||||
%10618 = OpCompositeExtract %float %16594 0
|
||||
%14087 = OpCompositeExtract %float %16594 1
|
||||
%7641 = OpCompositeExtract %float %8181 2
|
||||
%7529 = OpCompositeExtract %float %8181 3
|
||||
%18260 = OpCompositeConstruct %v4float %10618 %14087 %7641 %7529
|
||||
%8483 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
|
||||
OpStore %8483 %18260
|
||||
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
|
||||
%7910 = OpLoad %_arr_v4float_uint_16 %19848
|
||||
OpStore %5430 %7910
|
||||
OpStore %5238 %2941
|
||||
%13290 = OpAccessChain %_ptr_Function_v2float %5238 %22958
|
||||
%19207 = OpLoad %v2float %13290
|
||||
%8973 = OpExtInst %v2float %1 FMax %19207 %1823
|
||||
OpStore %5753 %8973
|
||||
OpEmitVertex
|
||||
%11651 = OpIAdd %int %22958 %int_1
|
||||
OpBranch %6318
|
||||
%12265 = OpLabel
|
||||
OpEndPrimitive
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t point_list_gs[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x00006154, 0x00000000, 0x00020011,
|
||||
0x00000002, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
|
||||
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x000C000F, 0x00000003,
|
||||
0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9, 0x00000FE1, 0x00001342,
|
||||
0x00001536, 0x00000CE6, 0x00001679, 0x00001567, 0x00030010, 0x0000161F,
|
||||
0x00000013, 0x00040010, 0x0000161F, 0x00000000, 0x00000001, 0x00030010,
|
||||
0x0000161F, 0x0000001D, 0x00040010, 0x0000161F, 0x0000001A, 0x00000004,
|
||||
0x00050048, 0x000003F9, 0x00000000, 0x0000000B, 0x00000000, 0x00030047,
|
||||
0x000003F9, 0x00000002, 0x00050048, 0x00000507, 0x00000000, 0x00000023,
|
||||
0x00000000, 0x00050048, 0x00000507, 0x00000001, 0x00000023, 0x00000010,
|
||||
0x00050048, 0x00000507, 0x00000002, 0x00000023, 0x00000020, 0x00050048,
|
||||
0x00000507, 0x00000003, 0x00000023, 0x00000030, 0x00050048, 0x00000507,
|
||||
0x00000004, 0x00000023, 0x00000040, 0x00030047, 0x00000507, 0x00000002,
|
||||
0x00040047, 0x00000FE1, 0x0000001E, 0x00000011, 0x00050048, 0x000003FA,
|
||||
0x00000000, 0x0000000B, 0x00000000, 0x00030047, 0x000003FA, 0x00000002,
|
||||
0x00040047, 0x00001536, 0x0000001E, 0x00000000, 0x00040047, 0x00000CE6,
|
||||
0x0000001E, 0x00000000, 0x00040047, 0x00001679, 0x0000001E, 0x00000010,
|
||||
0x00040047, 0x00001567, 0x0000001E, 0x00000010, 0x00020013, 0x00000008,
|
||||
0x00030021, 0x00000502, 0x00000008, 0x00030016, 0x0000000D, 0x00000020,
|
||||
0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0003001E, 0x000003F9,
|
||||
0x0000001D, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B,
|
||||
0x0000000B, 0x00000A0D, 0x00000001, 0x0004001C, 0x0000023D, 0x000003F9,
|
||||
0x00000A0D, 0x00040020, 0x000004BA, 0x00000001, 0x0000023D, 0x0004003B,
|
||||
0x000004BA, 0x000014B9, 0x00000001, 0x00040015, 0x0000000C, 0x00000020,
|
||||
0x00000001, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020,
|
||||
0x0000029A, 0x00000001, 0x0000001D, 0x00040017, 0x00000013, 0x0000000D,
|
||||
0x00000002, 0x00040020, 0x00000290, 0x00000007, 0x00000013, 0x0007001E,
|
||||
0x00000507, 0x0000001D, 0x0000001D, 0x0000001D, 0x0000001D, 0x0000000B,
|
||||
0x00040020, 0x00000784, 0x00000009, 0x00000507, 0x0004003B, 0x00000784,
|
||||
0x00000D87, 0x00000009, 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002,
|
||||
0x00040020, 0x0000029B, 0x00000009, 0x0000001D, 0x0004001C, 0x00000239,
|
||||
0x0000000D, 0x00000A0D, 0x00040020, 0x000004B6, 0x00000001, 0x00000239,
|
||||
0x0004003B, 0x000004B6, 0x00000FE1, 0x00000001, 0x00040020, 0x0000028A,
|
||||
0x00000001, 0x0000000D, 0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000,
|
||||
0x00020014, 0x00000009, 0x0004002B, 0x0000000C, 0x00000A17, 0x00000004,
|
||||
0x0003001E, 0x000003FA, 0x0000001D, 0x00040020, 0x00000676, 0x00000003,
|
||||
0x000003FA, 0x0004003B, 0x00000676, 0x00001342, 0x00000003, 0x0004002B,
|
||||
0x0000000B, 0x00000A16, 0x00000004, 0x0004001C, 0x000004D3, 0x00000013,
|
||||
0x00000A16, 0x0004002B, 0x0000000D, 0x00000341, 0xBF800000, 0x0004002B,
|
||||
0x0000000D, 0x0000008A, 0x3F800000, 0x0005002C, 0x00000013, 0x00000049,
|
||||
0x00000341, 0x0000008A, 0x0005002C, 0x00000013, 0x00000300, 0x0000008A,
|
||||
0x0000008A, 0x0005002C, 0x00000013, 0x0000004A, 0x00000341, 0x00000341,
|
||||
0x0005002C, 0x00000013, 0x00000301, 0x0000008A, 0x00000341, 0x0007002C,
|
||||
0x000004D3, 0x00000B7D, 0x00000049, 0x00000300, 0x0000004A, 0x00000301,
|
||||
0x00040020, 0x00000750, 0x00000007, 0x000004D3, 0x00040020, 0x0000029C,
|
||||
0x00000003, 0x0000001D, 0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010,
|
||||
0x0004001C, 0x00000989, 0x0000001D, 0x00000A3A, 0x00040020, 0x00000043,
|
||||
0x00000003, 0x00000989, 0x0004003B, 0x00000043, 0x00001536, 0x00000003,
|
||||
0x0004001C, 0x00000A2E, 0x00000989, 0x00000A0D, 0x00040020, 0x000000E8,
|
||||
0x00000001, 0x00000A2E, 0x0004003B, 0x000000E8, 0x00000CE6, 0x00000001,
|
||||
0x00040020, 0x00000044, 0x00000001, 0x00000989, 0x00040020, 0x00000291,
|
||||
0x00000003, 0x00000013, 0x0004003B, 0x00000291, 0x00001679, 0x00000003,
|
||||
0x0005002C, 0x00000013, 0x0000071F, 0x00000A0C, 0x00000A0C, 0x0004002B,
|
||||
0x0000000C, 0x00000A0E, 0x00000001, 0x0004001C, 0x00000281, 0x00000013,
|
||||
0x00000A0D, 0x00040020, 0x000004FE, 0x00000001, 0x00000281, 0x0004003B,
|
||||
0x000004FE, 0x00001567, 0x00000001, 0x00050036, 0x00000008, 0x0000161F,
|
||||
0x00000000, 0x00000502, 0x000200F8, 0x00006153, 0x0004003B, 0x00000750,
|
||||
0x0000483B, 0x00000007, 0x0004003B, 0x00000750, 0x00001476, 0x00000007,
|
||||
0x00060041, 0x0000029A, 0x000056FE, 0x000014B9, 0x00000A0B, 0x00000A0B,
|
||||
0x0004003D, 0x0000001D, 0x00001FF5, 0x000056FE, 0x00050041, 0x0000029B,
|
||||
0x00004FC4, 0x00000D87, 0x00000A11, 0x0004003D, 0x0000001D, 0x00004E5E,
|
||||
0x00004FC4, 0x0007004F, 0x00000013, 0x00004AA6, 0x00004E5E, 0x00004E5E,
|
||||
0x00000000, 0x00000001, 0x00050041, 0x0000028A, 0x00001F34, 0x00000FE1,
|
||||
0x00000A0B, 0x0004003D, 0x0000000D, 0x0000330D, 0x00001F34, 0x000500BA,
|
||||
0x00000009, 0x00005BDB, 0x0000330D, 0x00000A0C, 0x000300F7, 0x000041C7,
|
||||
0x00000000, 0x000400FA, 0x00005BDB, 0x00003332, 0x000041C7, 0x000200F8,
|
||||
0x00003332, 0x00050050, 0x00000013, 0x00004994, 0x0000330D, 0x0000330D,
|
||||
0x000200F9, 0x000041C7, 0x000200F8, 0x000041C7, 0x000700F5, 0x00000013,
|
||||
0x00004D24, 0x00004AA6, 0x00006153, 0x00004994, 0x00003332, 0x00050041,
|
||||
0x0000029B, 0x00005E03, 0x00000D87, 0x00000A0B, 0x0004003D, 0x0000001D,
|
||||
0x00003C4F, 0x00005E03, 0x0007004F, 0x00000013, 0x0000289F, 0x00003C4F,
|
||||
0x00003C4F, 0x00000002, 0x00000003, 0x00050088, 0x00000013, 0x00005EDA,
|
||||
0x00004D24, 0x0000289F, 0x000200F9, 0x000018AE, 0x000200F8, 0x000018AE,
|
||||
0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x000041C7, 0x00002D83,
|
||||
0x00002F74, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE, 0x00000A17,
|
||||
0x000400F6, 0x00002FE9, 0x00002F74, 0x00000000, 0x000400FA, 0x000060D4,
|
||||
0x00002F74, 0x00002FE9, 0x000200F8, 0x00002F74, 0x0007004F, 0x00000013,
|
||||
0x00004561, 0x00001FF5, 0x00001FF5, 0x00000000, 0x00000001, 0x0003003E,
|
||||
0x0000483B, 0x00000B7D, 0x00050041, 0x00000290, 0x00004C76, 0x0000483B,
|
||||
0x000059AE, 0x0004003D, 0x00000013, 0x00003E63, 0x00004C76, 0x00050085,
|
||||
0x00000013, 0x0000435B, 0x00003E63, 0x00005EDA, 0x00050081, 0x00000013,
|
||||
0x000040D2, 0x00004561, 0x0000435B, 0x00050051, 0x0000000D, 0x0000297A,
|
||||
0x000040D2, 0x00000000, 0x00050051, 0x0000000D, 0x00003707, 0x000040D2,
|
||||
0x00000001, 0x00050051, 0x0000000D, 0x00001DD9, 0x00001FF5, 0x00000002,
|
||||
0x00050051, 0x0000000D, 0x00001D69, 0x00001FF5, 0x00000003, 0x00070050,
|
||||
0x0000001D, 0x00004754, 0x0000297A, 0x00003707, 0x00001DD9, 0x00001D69,
|
||||
0x00050041, 0x0000029C, 0x00002123, 0x00001342, 0x00000A0B, 0x0003003E,
|
||||
0x00002123, 0x00004754, 0x00050041, 0x00000044, 0x00004D88, 0x00000CE6,
|
||||
0x00000A0B, 0x0004003D, 0x00000989, 0x00001EE6, 0x00004D88, 0x0003003E,
|
||||
0x00001536, 0x00001EE6, 0x0003003E, 0x00001476, 0x00000B7D, 0x00050041,
|
||||
0x00000290, 0x000033EA, 0x00001476, 0x000059AE, 0x0004003D, 0x00000013,
|
||||
0x00004B07, 0x000033EA, 0x0007000C, 0x00000013, 0x0000230D, 0x00000001,
|
||||
0x00000028, 0x00004B07, 0x0000071F, 0x0003003E, 0x00001679, 0x0000230D,
|
||||
0x000100DA, 0x00050080, 0x0000000C, 0x00002D83, 0x000059AE, 0x00000A0E,
|
||||
0x000200F9, 0x000018AE, 0x000200F8, 0x00002FE9, 0x000100DB, 0x000100FD,
|
||||
0x00010038,
|
||||
};
|
|
@ -1,170 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 24789
|
||||
; Schema: 0
|
||||
OpCapability Geometry
|
||||
OpCapability GeometryPointSize
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Geometry %5663 "main" %4930 %5305 %5430 %3302 %4044 %4656 %3736
|
||||
OpExecutionMode %5663 InputLinesAdjacency
|
||||
OpExecutionMode %5663 Invocations 1
|
||||
OpExecutionMode %5663 OutputTriangleStrip
|
||||
OpExecutionMode %5663 OutputVertices 4
|
||||
OpMemberDecorate %_struct_1032 0 BuiltIn Position
|
||||
OpMemberDecorate %_struct_1032 1 BuiltIn PointSize
|
||||
OpDecorate %_struct_1032 Block
|
||||
OpMemberDecorate %_struct_1033 0 BuiltIn Position
|
||||
OpMemberDecorate %_struct_1033 1 BuiltIn PointSize
|
||||
OpDecorate %_struct_1033 Block
|
||||
OpDecorate %5430 Location 0
|
||||
OpDecorate %3302 Location 0
|
||||
OpDecorate %4044 Location 16
|
||||
OpDecorate %4656 Location 17
|
||||
OpDecorate %3736 Location 16
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%int = OpTypeInt 32 1
|
||||
%_ptr_Function_int = OpTypePointer Function %int
|
||||
%int_0 = OpConstant %int 0
|
||||
%int_4 = OpConstant %int 4
|
||||
%bool = OpTypeBool
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%_arr_int_uint_4 = OpTypeArray %int %uint_4
|
||||
%int_1 = OpConstant %int 1
|
||||
%int_3 = OpConstant %int 3
|
||||
%int_2 = OpConstant %int 2
|
||||
%566 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_3 %int_2
|
||||
%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_struct_1032 = OpTypeStruct %v4float %float
|
||||
%_ptr_Output__struct_1032 = OpTypePointer Output %_struct_1032
|
||||
%4930 = OpVariable %_ptr_Output__struct_1032 Output
|
||||
%_struct_1033 = OpTypeStruct %v4float %float
|
||||
%_arr__struct_1033_uint_4 = OpTypeArray %_struct_1033 %uint_4
|
||||
%_ptr_Input__arr__struct_1033_uint_4 = OpTypePointer Input %_arr__struct_1033_uint_4
|
||||
%5305 = OpVariable %_ptr_Input__arr__struct_1033_uint_4 Input
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%_ptr_Output_float = OpTypePointer Output %float
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
|
||||
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
|
||||
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
|
||||
%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4
|
||||
%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4
|
||||
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input
|
||||
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
|
||||
%v2float = OpTypeVector %float 2
|
||||
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
|
||||
%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4
|
||||
%4044 = OpVariable %_ptr_Input__arr_v2float_uint_4 Input
|
||||
%_arr_float_uint_4 = OpTypeArray %float %uint_4
|
||||
%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
|
||||
%4656 = OpVariable %_ptr_Input__arr_float_uint_4 Input
|
||||
%_ptr_Output_v2float = OpTypePointer Output %v2float
|
||||
%3736 = OpVariable %_ptr_Output_v2float Output
|
||||
%5663 = OpFunction %void None %1282
|
||||
%9454 = OpLabel
|
||||
%5238 = OpVariable %_ptr_Function__arr_int_uint_4 Function
|
||||
OpBranch %18173
|
||||
%18173 = OpLabel
|
||||
%22958 = OpPhi %int %int_0 %9454 %11651 %15146
|
||||
%24788 = OpSLessThan %bool %22958 %int_4
|
||||
OpLoopMerge %12265 %15146 None
|
||||
OpBranchConditional %24788 %15146 %12265
|
||||
%15146 = OpLabel
|
||||
OpStore %5238 %566
|
||||
%22512 = OpAccessChain %_ptr_Function_int %5238 %22958
|
||||
%7372 = OpLoad %int %22512
|
||||
%20154 = OpAccessChain %_ptr_Input_v4float %5305 %7372 %int_0
|
||||
%22427 = OpLoad %v4float %20154
|
||||
%19981 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
|
||||
OpStore %19981 %22427
|
||||
%19905 = OpAccessChain %_ptr_Input_float %5305 %7372 %int_1
|
||||
%7391 = OpLoad %float %19905
|
||||
%19982 = OpAccessChain %_ptr_Output_float %4930 %int_1
|
||||
OpStore %19982 %7391
|
||||
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %7372
|
||||
%10874 = OpLoad %_arr_v4float_uint_16 %19848
|
||||
OpStore %5430 %10874
|
||||
OpEmitVertex
|
||||
%11651 = OpIAdd %int %22958 %int_1
|
||||
OpBranch %18173
|
||||
%12265 = OpLabel
|
||||
OpEndPrimitive
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t quad_list_gs[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x000060D5, 0x00000000, 0x00020011,
|
||||
0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47,
|
||||
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
|
||||
0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x00001342,
|
||||
0x000014B9, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98,
|
||||
0x00030010, 0x0000161F, 0x00000015, 0x00040010, 0x0000161F, 0x00000000,
|
||||
0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, 0x0000161F,
|
||||
0x0000001A, 0x00000004, 0x00050048, 0x00000408, 0x00000000, 0x0000000B,
|
||||
0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001,
|
||||
0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000,
|
||||
0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B,
|
||||
0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536,
|
||||
0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000,
|
||||
0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230,
|
||||
0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010,
|
||||
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00040015,
|
||||
0x0000000C, 0x00000020, 0x00000001, 0x00040020, 0x00000289, 0x00000007,
|
||||
0x0000000C, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x0004002B,
|
||||
0x0000000C, 0x00000A17, 0x00000004, 0x00020014, 0x00000009, 0x00040015,
|
||||
0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, 0x00000A16,
|
||||
0x00000004, 0x0004001C, 0x00000251, 0x0000000C, 0x00000A16, 0x0004002B,
|
||||
0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A14,
|
||||
0x00000003, 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0007002C,
|
||||
0x00000251, 0x00000236, 0x00000A0B, 0x00000A0E, 0x00000A14, 0x00000A11,
|
||||
0x00040020, 0x000004CE, 0x00000007, 0x00000251, 0x00030016, 0x0000000D,
|
||||
0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E,
|
||||
0x00000408, 0x0000001D, 0x0000000D, 0x00040020, 0x00000685, 0x00000003,
|
||||
0x00000408, 0x0004003B, 0x00000685, 0x00001342, 0x00000003, 0x0004001E,
|
||||
0x00000409, 0x0000001D, 0x0000000D, 0x0004001C, 0x000003A8, 0x00000409,
|
||||
0x00000A16, 0x00040020, 0x00000625, 0x00000001, 0x000003A8, 0x0004003B,
|
||||
0x00000625, 0x000014B9, 0x00000001, 0x00040020, 0x0000029A, 0x00000001,
|
||||
0x0000001D, 0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x00040020,
|
||||
0x0000028A, 0x00000001, 0x0000000D, 0x00040020, 0x0000028B, 0x00000003,
|
||||
0x0000000D, 0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C,
|
||||
0x00000656, 0x0000001D, 0x00000A3A, 0x00040020, 0x000008D3, 0x00000003,
|
||||
0x00000656, 0x0004003B, 0x000008D3, 0x00001536, 0x00000003, 0x0004001C,
|
||||
0x00000503, 0x00000656, 0x00000A16, 0x00040020, 0x0000077F, 0x00000001,
|
||||
0x00000503, 0x0004003B, 0x0000077F, 0x00000CE6, 0x00000001, 0x00040020,
|
||||
0x000008D4, 0x00000001, 0x00000656, 0x00040017, 0x00000013, 0x0000000D,
|
||||
0x00000002, 0x0004001C, 0x000002E4, 0x00000013, 0x00000A16, 0x00040020,
|
||||
0x00000561, 0x00000001, 0x000002E4, 0x0004003B, 0x00000561, 0x00000FCC,
|
||||
0x00000001, 0x0004001C, 0x00000266, 0x0000000D, 0x00000A16, 0x00040020,
|
||||
0x000004E3, 0x00000001, 0x00000266, 0x0004003B, 0x000004E3, 0x00001230,
|
||||
0x00000001, 0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B,
|
||||
0x00000290, 0x00000E98, 0x00000003, 0x00050036, 0x00000008, 0x0000161F,
|
||||
0x00000000, 0x00000502, 0x000200F8, 0x000024EE, 0x0004003B, 0x000004CE,
|
||||
0x00001476, 0x00000007, 0x000200F9, 0x000046FD, 0x000200F8, 0x000046FD,
|
||||
0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x000024EE, 0x00002D83,
|
||||
0x00003B2A, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE, 0x00000A17,
|
||||
0x000400F6, 0x00002FE9, 0x00003B2A, 0x00000000, 0x000400FA, 0x000060D4,
|
||||
0x00003B2A, 0x00002FE9, 0x000200F8, 0x00003B2A, 0x0003003E, 0x00001476,
|
||||
0x00000236, 0x00050041, 0x00000289, 0x000057F0, 0x00001476, 0x000059AE,
|
||||
0x0004003D, 0x0000000C, 0x00001CCC, 0x000057F0, 0x00060041, 0x0000029A,
|
||||
0x00004EBA, 0x000014B9, 0x00001CCC, 0x00000A0B, 0x0004003D, 0x0000001D,
|
||||
0x0000579B, 0x00004EBA, 0x00050041, 0x0000029B, 0x00004E0D, 0x00001342,
|
||||
0x00000A0B, 0x0003003E, 0x00004E0D, 0x0000579B, 0x00060041, 0x0000028A,
|
||||
0x00004DC1, 0x000014B9, 0x00001CCC, 0x00000A0E, 0x0004003D, 0x0000000D,
|
||||
0x00001CDF, 0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0E, 0x00001342,
|
||||
0x00000A0E, 0x0003003E, 0x00004E0E, 0x00001CDF, 0x00050041, 0x000008D4,
|
||||
0x00004D88, 0x00000CE6, 0x00001CCC, 0x0004003D, 0x00000656, 0x00002A7A,
|
||||
0x00004D88, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00050080,
|
||||
0x0000000C, 0x00002D83, 0x000059AE, 0x00000A0E, 0x000200F9, 0x000046FD,
|
||||
0x000200F8, 0x00002FE9, 0x000100DB, 0x000100FD, 0x00010038,
|
||||
};
|
|
@ -1,430 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 24790
|
||||
; Schema: 0
|
||||
OpCapability Geometry
|
||||
OpCapability GeometryPointSize
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Geometry %5663 "main" %5305 %4930 %5430 %3302 %4044 %4656 %3736
|
||||
OpExecutionMode %5663 Triangles
|
||||
OpExecutionMode %5663 Invocations 1
|
||||
OpExecutionMode %5663 OutputTriangleStrip
|
||||
OpExecutionMode %5663 OutputVertices 6
|
||||
OpMemberDecorate %_struct_1032 0 BuiltIn Position
|
||||
OpMemberDecorate %_struct_1032 1 BuiltIn PointSize
|
||||
OpDecorate %_struct_1032 Block
|
||||
OpMemberDecorate %_struct_1033 0 BuiltIn Position
|
||||
OpMemberDecorate %_struct_1033 1 BuiltIn PointSize
|
||||
OpDecorate %_struct_1033 Block
|
||||
OpDecorate %5430 Location 0
|
||||
OpDecorate %3302 Location 0
|
||||
OpDecorate %4044 Location 16
|
||||
OpDecorate %4656 Location 17
|
||||
OpDecorate %3736 Location 16
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v2float = OpTypeVector %float 2
|
||||
%bool = OpTypeBool
|
||||
%v2bool = OpTypeVector %bool 2
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_struct_1032 = OpTypeStruct %v4float %float
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_3 = OpConstant %uint 3
|
||||
%_arr__struct_1032_uint_3 = OpTypeArray %_struct_1032 %uint_3
|
||||
%_ptr_Input__arr__struct_1032_uint_3 = OpTypePointer Input %_arr__struct_1032_uint_3
|
||||
%5305 = OpVariable %_ptr_Input__arr__struct_1032_uint_3 Input
|
||||
%int = OpTypeInt 32 1
|
||||
%int_0 = OpConstant %int 0
|
||||
%int_2 = OpConstant %int 2
|
||||
%uint_0 = OpConstant %uint 0
|
||||
%_ptr_Input_float = OpTypePointer Input %float
|
||||
%int_1 = OpConstant %int 1
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%float_0_00100000005 = OpConstant %float 0.00100000005
|
||||
%_ptr_Input_v4float = OpTypePointer Input %v4float
|
||||
%_struct_1033 = OpTypeStruct %v4float %float
|
||||
%_ptr_Output__struct_1033 = OpTypePointer Output %_struct_1033
|
||||
%4930 = OpVariable %_ptr_Output__struct_1033 Output
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%_ptr_Output_float = OpTypePointer Output %float
|
||||
%uint_16 = OpConstant %uint 16
|
||||
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
|
||||
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
|
||||
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
|
||||
%_arr__arr_v4float_uint_16_uint_3 = OpTypeArray %_arr_v4float_uint_16 %uint_3
|
||||
%_ptr_Input__arr__arr_v4float_uint_16_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_3
|
||||
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_3 Input
|
||||
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
|
||||
%int_16 = OpConstant %int 16
|
||||
%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
|
||||
%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3
|
||||
%4044 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
|
||||
%_arr_float_uint_3 = OpTypeArray %float %uint_3
|
||||
%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3
|
||||
%4656 = OpVariable %_ptr_Input__arr_float_uint_3 Input
|
||||
%_ptr_Output_v2float = OpTypePointer Output %v2float
|
||||
%3736 = OpVariable %_ptr_Output_v2float Output
|
||||
%1759 = OpConstantComposite %v2float %float_0_00100000005 %float_0_00100000005
|
||||
%5663 = OpFunction %void None %1282
|
||||
%23915 = OpLabel
|
||||
%7129 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_0 %uint_0
|
||||
%15627 = OpLoad %float %7129
|
||||
%20439 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_0 %uint_1
|
||||
%19889 = OpLoad %float %20439
|
||||
%10917 = OpCompositeConstruct %v2float %15627 %19889
|
||||
%24777 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0
|
||||
%7883 = OpLoad %v4float %24777
|
||||
%6765 = OpVectorShuffle %v2float %7883 %7883 0 1
|
||||
%15739 = OpFSub %v2float %6765 %10917
|
||||
%7757 = OpExtInst %v2float %1 FAbs %15739
|
||||
%19021 = OpFOrdLessThanEqual %v2bool %7757 %1759
|
||||
%15711 = OpAll %bool %19021
|
||||
%11402 = OpLogicalNot %bool %15711
|
||||
OpSelectionMerge %13286 None
|
||||
OpBranchConditional %11402 %12129 %13286
|
||||
%12129 = OpLabel
|
||||
%18210 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_0 %uint_0
|
||||
%15628 = OpLoad %float %18210
|
||||
%20440 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_0 %uint_1
|
||||
%21143 = OpLoad %float %20440
|
||||
%17643 = OpCompositeConstruct %v2float %15628 %21143
|
||||
%15490 = OpFSub %v2float %6765 %17643
|
||||
%24406 = OpExtInst %v2float %1 FAbs %15490
|
||||
%20560 = OpFOrdLessThanEqual %v2bool %24406 %1759
|
||||
%20788 = OpAll %bool %20560
|
||||
OpBranch %13286
|
||||
%13286 = OpLabel
|
||||
%10924 = OpPhi %bool %15711 %23915 %20788 %12129
|
||||
OpSelectionMerge %23648 None
|
||||
OpBranchConditional %10924 %12148 %9186
|
||||
%12148 = OpLabel
|
||||
%18037 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
|
||||
OpStore %18037 %7883
|
||||
%19905 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1
|
||||
%7391 = OpLoad %float %19905
|
||||
%19981 = OpAccessChain %_ptr_Output_float %4930 %int_1
|
||||
OpStore %19981 %7391
|
||||
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
|
||||
%10874 = OpLoad %_arr_v4float_uint_16 %19848
|
||||
OpStore %5430 %10874
|
||||
OpEmitVertex
|
||||
%22812 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0
|
||||
%11398 = OpLoad %v4float %22812
|
||||
OpStore %18037 %11398
|
||||
%16622 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1
|
||||
%7967 = OpLoad %float %16622
|
||||
OpStore %19981 %7967
|
||||
%16623 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1
|
||||
%10875 = OpLoad %_arr_v4float_uint_16 %16623
|
||||
OpStore %5430 %10875
|
||||
OpEmitVertex
|
||||
%22813 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0
|
||||
%11399 = OpLoad %v4float %22813
|
||||
OpStore %18037 %11399
|
||||
%16624 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1
|
||||
%7968 = OpLoad %float %16624
|
||||
OpStore %19981 %7968
|
||||
%16625 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2
|
||||
%10876 = OpLoad %_arr_v4float_uint_16 %16625
|
||||
OpStore %5430 %10876
|
||||
OpEmitVertex
|
||||
OpEndPrimitive
|
||||
OpStore %18037 %11399
|
||||
OpStore %19981 %7968
|
||||
OpStore %5430 %10876
|
||||
OpEmitVertex
|
||||
OpStore %18037 %11398
|
||||
OpStore %19981 %7967
|
||||
OpStore %5430 %10875
|
||||
OpEmitVertex
|
||||
%8851 = OpFNegate %v2float %6765
|
||||
%13757 = OpVectorShuffle %v2float %11398 %11398 0 1
|
||||
%21457 = OpFAdd %v2float %8851 %13757
|
||||
%7434 = OpVectorShuffle %v2float %11399 %11399 0 1
|
||||
%21812 = OpFAdd %v2float %21457 %7434
|
||||
%18423 = OpCompositeExtract %float %21812 0
|
||||
%14087 = OpCompositeExtract %float %21812 1
|
||||
%7641 = OpCompositeExtract %float %11399 2
|
||||
%7472 = OpCompositeExtract %float %11399 3
|
||||
%18779 = OpCompositeConstruct %v4float %18423 %14087 %7641 %7472
|
||||
OpStore %18037 %18779
|
||||
OpStore %19981 %7968
|
||||
OpBranch %17364
|
||||
%17364 = OpLabel
|
||||
%22958 = OpPhi %int %int_0 %12148 %21301 %14551
|
||||
%24788 = OpSLessThan %bool %22958 %int_16
|
||||
OpLoopMerge %11792 %14551 None
|
||||
OpBranchConditional %24788 %14551 %11792
|
||||
%14551 = OpLabel
|
||||
%19388 = OpAccessChain %_ptr_Input_v4float %3302 %int_0 %22958
|
||||
%24048 = OpLoad %v4float %19388
|
||||
%19880 = OpFNegate %v4float %24048
|
||||
%6667 = OpAccessChain %_ptr_Input_v4float %3302 %int_1 %22958
|
||||
%6828 = OpLoad %v4float %6667
|
||||
%22565 = OpFAdd %v4float %19880 %6828
|
||||
%18783 = OpAccessChain %_ptr_Input_v4float %3302 %int_2 %22958
|
||||
%21055 = OpLoad %v4float %18783
|
||||
%22584 = OpFAdd %v4float %22565 %21055
|
||||
%18591 = OpAccessChain %_ptr_Output_v4float %5430 %22958
|
||||
OpStore %18591 %22584
|
||||
%21301 = OpIAdd %int %22958 %int_1
|
||||
OpBranch %17364
|
||||
%11792 = OpLabel
|
||||
OpEmitVertex
|
||||
OpEndPrimitive
|
||||
OpBranch %23648
|
||||
%9186 = OpLabel
|
||||
%20459 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
|
||||
OpStore %20459 %7883
|
||||
%19906 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1
|
||||
%7392 = OpLoad %float %19906
|
||||
%19982 = OpAccessChain %_ptr_Output_float %4930 %int_1
|
||||
OpStore %19982 %7392
|
||||
%19849 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
|
||||
%10877 = OpLoad %_arr_v4float_uint_16 %19849
|
||||
OpStore %5430 %10877
|
||||
OpEmitVertex
|
||||
%22814 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0
|
||||
%11400 = OpLoad %v4float %22814
|
||||
OpStore %20459 %11400
|
||||
%16626 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1
|
||||
%7969 = OpLoad %float %16626
|
||||
OpStore %19982 %7969
|
||||
%16627 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1
|
||||
%10878 = OpLoad %_arr_v4float_uint_16 %16627
|
||||
OpStore %5430 %10878
|
||||
OpEmitVertex
|
||||
%22815 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0
|
||||
%11401 = OpLoad %v4float %22815
|
||||
OpStore %20459 %11401
|
||||
%16628 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1
|
||||
%7970 = OpLoad %float %16628
|
||||
OpStore %19982 %7970
|
||||
%16629 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2
|
||||
%10879 = OpLoad %_arr_v4float_uint_16 %16629
|
||||
OpStore %5430 %10879
|
||||
OpEmitVertex
|
||||
OpEndPrimitive
|
||||
OpStore %20459 %7883
|
||||
OpStore %19982 %7392
|
||||
OpStore %5430 %10877
|
||||
OpEmitVertex
|
||||
OpStore %20459 %11401
|
||||
OpStore %19982 %7970
|
||||
OpStore %5430 %10879
|
||||
OpEmitVertex
|
||||
%12391 = OpVectorShuffle %v2float %11400 %11400 0 1
|
||||
%21222 = OpFNegate %v2float %12391
|
||||
%8335 = OpFAdd %v2float %6765 %21222
|
||||
%13861 = OpVectorShuffle %v2float %11401 %11401 0 1
|
||||
%21813 = OpFAdd %v2float %8335 %13861
|
||||
%18424 = OpCompositeExtract %float %21813 0
|
||||
%14088 = OpCompositeExtract %float %21813 1
|
||||
%7642 = OpCompositeExtract %float %11401 2
|
||||
%7473 = OpCompositeExtract %float %11401 3
|
||||
%18780 = OpCompositeConstruct %v4float %18424 %14088 %7642 %7473
|
||||
OpStore %20459 %18780
|
||||
OpStore %19982 %7970
|
||||
OpBranch %17365
|
||||
%17365 = OpLabel
|
||||
%22959 = OpPhi %int %int_0 %9186 %21302 %14552
|
||||
%24789 = OpSLessThan %bool %22959 %int_16
|
||||
OpLoopMerge %11793 %14552 None
|
||||
OpBranchConditional %24789 %14552 %11793
|
||||
%14552 = OpLabel
|
||||
%18211 = OpAccessChain %_ptr_Input_v4float %3302 %int_0 %22959
|
||||
%15629 = OpLoad %v4float %18211
|
||||
%21332 = OpAccessChain %_ptr_Input_v4float %3302 %int_1 %22959
|
||||
%12974 = OpLoad %v4float %21332
|
||||
%8884 = OpFNegate %v4float %12974
|
||||
%7862 = OpFAdd %v4float %15629 %8884
|
||||
%14199 = OpAccessChain %_ptr_Input_v4float %3302 %int_2 %22959
|
||||
%21056 = OpLoad %v4float %14199
|
||||
%22585 = OpFAdd %v4float %7862 %21056
|
||||
%18592 = OpAccessChain %_ptr_Output_v4float %5430 %22959
|
||||
OpStore %18592 %22585
|
||||
%21302 = OpIAdd %int %22959 %int_1
|
||||
OpBranch %17365
|
||||
%11793 = OpLabel
|
||||
OpEmitVertex
|
||||
OpEndPrimitive
|
||||
OpBranch %23648
|
||||
%23648 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t rect_list_gs[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x000060D6, 0x00000000, 0x00020011,
|
||||
0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47,
|
||||
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
|
||||
0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9,
|
||||
0x00001342, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98,
|
||||
0x00030010, 0x0000161F, 0x00000016, 0x00040010, 0x0000161F, 0x00000000,
|
||||
0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, 0x0000161F,
|
||||
0x0000001A, 0x00000006, 0x00050048, 0x00000408, 0x00000000, 0x0000000B,
|
||||
0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001,
|
||||
0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000,
|
||||
0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B,
|
||||
0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536,
|
||||
0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000,
|
||||
0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230,
|
||||
0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010,
|
||||
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016,
|
||||
0x0000000D, 0x00000020, 0x00040017, 0x00000013, 0x0000000D, 0x00000002,
|
||||
0x00020014, 0x00000009, 0x00040017, 0x0000000F, 0x00000009, 0x00000002,
|
||||
0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E, 0x00000408,
|
||||
0x0000001D, 0x0000000D, 0x00040015, 0x0000000B, 0x00000020, 0x00000000,
|
||||
0x0004002B, 0x0000000B, 0x00000A13, 0x00000003, 0x0004001C, 0x0000085F,
|
||||
0x00000408, 0x00000A13, 0x00040020, 0x00000ADC, 0x00000001, 0x0000085F,
|
||||
0x0004003B, 0x00000ADC, 0x000014B9, 0x00000001, 0x00040015, 0x0000000C,
|
||||
0x00000020, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000,
|
||||
0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0004002B, 0x0000000B,
|
||||
0x00000A0A, 0x00000000, 0x00040020, 0x0000028A, 0x00000001, 0x0000000D,
|
||||
0x0004002B, 0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000B,
|
||||
0x00000A0D, 0x00000001, 0x0004002B, 0x0000000D, 0x00000030, 0x3A83126F,
|
||||
0x00040020, 0x0000029A, 0x00000001, 0x0000001D, 0x0004001E, 0x00000409,
|
||||
0x0000001D, 0x0000000D, 0x00040020, 0x00000685, 0x00000003, 0x00000409,
|
||||
0x0004003B, 0x00000685, 0x00001342, 0x00000003, 0x00040020, 0x0000029B,
|
||||
0x00000003, 0x0000001D, 0x00040020, 0x0000028B, 0x00000003, 0x0000000D,
|
||||
0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C, 0x000008F6,
|
||||
0x0000001D, 0x00000A3A, 0x00040020, 0x00000B73, 0x00000003, 0x000008F6,
|
||||
0x0004003B, 0x00000B73, 0x00001536, 0x00000003, 0x0004001C, 0x0000084A,
|
||||
0x000008F6, 0x00000A13, 0x00040020, 0x00000AC7, 0x00000001, 0x0000084A,
|
||||
0x0004003B, 0x00000AC7, 0x00000CE6, 0x00000001, 0x00040020, 0x00000B74,
|
||||
0x00000001, 0x000008F6, 0x0004002B, 0x0000000C, 0x00000A3B, 0x00000010,
|
||||
0x0004001C, 0x00000352, 0x00000013, 0x00000A13, 0x00040020, 0x000005CF,
|
||||
0x00000001, 0x00000352, 0x0004003B, 0x000005CF, 0x00000FCC, 0x00000001,
|
||||
0x0004001C, 0x00000298, 0x0000000D, 0x00000A13, 0x00040020, 0x00000515,
|
||||
0x00000001, 0x00000298, 0x0004003B, 0x00000515, 0x00001230, 0x00000001,
|
||||
0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B, 0x00000290,
|
||||
0x00000E98, 0x00000003, 0x0005002C, 0x00000013, 0x000006DF, 0x00000030,
|
||||
0x00000030, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502,
|
||||
0x000200F8, 0x00005D6B, 0x00070041, 0x0000028A, 0x00001BD9, 0x000014B9,
|
||||
0x00000A11, 0x00000A0B, 0x00000A0A, 0x0004003D, 0x0000000D, 0x00003D0B,
|
||||
0x00001BD9, 0x00070041, 0x0000028A, 0x00004FD7, 0x000014B9, 0x00000A0E,
|
||||
0x00000A0B, 0x00000A0D, 0x0004003D, 0x0000000D, 0x00004DB1, 0x00004FD7,
|
||||
0x00050050, 0x00000013, 0x00002AA5, 0x00003D0B, 0x00004DB1, 0x00060041,
|
||||
0x0000029A, 0x000060C9, 0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D,
|
||||
0x0000001D, 0x00001ECB, 0x000060C9, 0x0007004F, 0x00000013, 0x00001A6D,
|
||||
0x00001ECB, 0x00001ECB, 0x00000000, 0x00000001, 0x00050083, 0x00000013,
|
||||
0x00003D7B, 0x00001A6D, 0x00002AA5, 0x0006000C, 0x00000013, 0x00001E4D,
|
||||
0x00000001, 0x00000004, 0x00003D7B, 0x000500BC, 0x0000000F, 0x00004A4D,
|
||||
0x00001E4D, 0x000006DF, 0x0004009B, 0x00000009, 0x00003D5F, 0x00004A4D,
|
||||
0x000400A8, 0x00000009, 0x00002C8A, 0x00003D5F, 0x000300F7, 0x000033E6,
|
||||
0x00000000, 0x000400FA, 0x00002C8A, 0x00002F61, 0x000033E6, 0x000200F8,
|
||||
0x00002F61, 0x00070041, 0x0000028A, 0x00004722, 0x000014B9, 0x00000A0E,
|
||||
0x00000A0B, 0x00000A0A, 0x0004003D, 0x0000000D, 0x00003D0C, 0x00004722,
|
||||
0x00070041, 0x0000028A, 0x00004FD8, 0x000014B9, 0x00000A11, 0x00000A0B,
|
||||
0x00000A0D, 0x0004003D, 0x0000000D, 0x00005297, 0x00004FD8, 0x00050050,
|
||||
0x00000013, 0x000044EB, 0x00003D0C, 0x00005297, 0x00050083, 0x00000013,
|
||||
0x00003C82, 0x00001A6D, 0x000044EB, 0x0006000C, 0x00000013, 0x00005F56,
|
||||
0x00000001, 0x00000004, 0x00003C82, 0x000500BC, 0x0000000F, 0x00005050,
|
||||
0x00005F56, 0x000006DF, 0x0004009B, 0x00000009, 0x00005134, 0x00005050,
|
||||
0x000200F9, 0x000033E6, 0x000200F8, 0x000033E6, 0x000700F5, 0x00000009,
|
||||
0x00002AAC, 0x00003D5F, 0x00005D6B, 0x00005134, 0x00002F61, 0x000300F7,
|
||||
0x00005C60, 0x00000000, 0x000400FA, 0x00002AAC, 0x00002F74, 0x000023E2,
|
||||
0x000200F8, 0x00002F74, 0x00050041, 0x0000029B, 0x00004675, 0x00001342,
|
||||
0x00000A0B, 0x0003003E, 0x00004675, 0x00001ECB, 0x00060041, 0x0000028A,
|
||||
0x00004DC1, 0x000014B9, 0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D,
|
||||
0x00001CDF, 0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0D, 0x00001342,
|
||||
0x00000A0E, 0x0003003E, 0x00004E0D, 0x00001CDF, 0x00050041, 0x00000B74,
|
||||
0x00004D88, 0x00000CE6, 0x00000A0B, 0x0004003D, 0x000008F6, 0x00002A7A,
|
||||
0x00004D88, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00060041,
|
||||
0x0000029A, 0x0000591C, 0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D,
|
||||
0x0000001D, 0x00002C86, 0x0000591C, 0x0003003E, 0x00004675, 0x00002C86,
|
||||
0x00060041, 0x0000028A, 0x000040EE, 0x000014B9, 0x00000A0E, 0x00000A0E,
|
||||
0x0004003D, 0x0000000D, 0x00001F1F, 0x000040EE, 0x0003003E, 0x00004E0D,
|
||||
0x00001F1F, 0x00050041, 0x00000B74, 0x000040EF, 0x00000CE6, 0x00000A0E,
|
||||
0x0004003D, 0x000008F6, 0x00002A7B, 0x000040EF, 0x0003003E, 0x00001536,
|
||||
0x00002A7B, 0x000100DA, 0x00060041, 0x0000029A, 0x0000591D, 0x000014B9,
|
||||
0x00000A11, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C87, 0x0000591D,
|
||||
0x0003003E, 0x00004675, 0x00002C87, 0x00060041, 0x0000028A, 0x000040F0,
|
||||
0x000014B9, 0x00000A11, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F20,
|
||||
0x000040F0, 0x0003003E, 0x00004E0D, 0x00001F20, 0x00050041, 0x00000B74,
|
||||
0x000040F1, 0x00000CE6, 0x00000A11, 0x0004003D, 0x000008F6, 0x00002A7C,
|
||||
0x000040F1, 0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x000100DB,
|
||||
0x0003003E, 0x00004675, 0x00002C87, 0x0003003E, 0x00004E0D, 0x00001F20,
|
||||
0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x0003003E, 0x00004675,
|
||||
0x00002C86, 0x0003003E, 0x00004E0D, 0x00001F1F, 0x0003003E, 0x00001536,
|
||||
0x00002A7B, 0x000100DA, 0x0004007F, 0x00000013, 0x00002293, 0x00001A6D,
|
||||
0x0007004F, 0x00000013, 0x000035BD, 0x00002C86, 0x00002C86, 0x00000000,
|
||||
0x00000001, 0x00050081, 0x00000013, 0x000053D1, 0x00002293, 0x000035BD,
|
||||
0x0007004F, 0x00000013, 0x00001D0A, 0x00002C87, 0x00002C87, 0x00000000,
|
||||
0x00000001, 0x00050081, 0x00000013, 0x00005534, 0x000053D1, 0x00001D0A,
|
||||
0x00050051, 0x0000000D, 0x000047F7, 0x00005534, 0x00000000, 0x00050051,
|
||||
0x0000000D, 0x00003707, 0x00005534, 0x00000001, 0x00050051, 0x0000000D,
|
||||
0x00001DD9, 0x00002C87, 0x00000002, 0x00050051, 0x0000000D, 0x00001D30,
|
||||
0x00002C87, 0x00000003, 0x00070050, 0x0000001D, 0x0000495B, 0x000047F7,
|
||||
0x00003707, 0x00001DD9, 0x00001D30, 0x0003003E, 0x00004675, 0x0000495B,
|
||||
0x0003003E, 0x00004E0D, 0x00001F20, 0x000200F9, 0x000043D4, 0x000200F8,
|
||||
0x000043D4, 0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x00002F74,
|
||||
0x00005335, 0x000038D7, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE,
|
||||
0x00000A3B, 0x000400F6, 0x00002E10, 0x000038D7, 0x00000000, 0x000400FA,
|
||||
0x000060D4, 0x000038D7, 0x00002E10, 0x000200F8, 0x000038D7, 0x00060041,
|
||||
0x0000029A, 0x00004BBC, 0x00000CE6, 0x00000A0B, 0x000059AE, 0x0004003D,
|
||||
0x0000001D, 0x00005DF0, 0x00004BBC, 0x0004007F, 0x0000001D, 0x00004DA8,
|
||||
0x00005DF0, 0x00060041, 0x0000029A, 0x00001A0B, 0x00000CE6, 0x00000A0E,
|
||||
0x000059AE, 0x0004003D, 0x0000001D, 0x00001AAC, 0x00001A0B, 0x00050081,
|
||||
0x0000001D, 0x00005825, 0x00004DA8, 0x00001AAC, 0x00060041, 0x0000029A,
|
||||
0x0000495F, 0x00000CE6, 0x00000A11, 0x000059AE, 0x0004003D, 0x0000001D,
|
||||
0x0000523F, 0x0000495F, 0x00050081, 0x0000001D, 0x00005838, 0x00005825,
|
||||
0x0000523F, 0x00050041, 0x0000029B, 0x0000489F, 0x00001536, 0x000059AE,
|
||||
0x0003003E, 0x0000489F, 0x00005838, 0x00050080, 0x0000000C, 0x00005335,
|
||||
0x000059AE, 0x00000A0E, 0x000200F9, 0x000043D4, 0x000200F8, 0x00002E10,
|
||||
0x000100DA, 0x000100DB, 0x000200F9, 0x00005C60, 0x000200F8, 0x000023E2,
|
||||
0x00050041, 0x0000029B, 0x00004FEB, 0x00001342, 0x00000A0B, 0x0003003E,
|
||||
0x00004FEB, 0x00001ECB, 0x00060041, 0x0000028A, 0x00004DC2, 0x000014B9,
|
||||
0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001CE0, 0x00004DC2,
|
||||
0x00050041, 0x0000028B, 0x00004E0E, 0x00001342, 0x00000A0E, 0x0003003E,
|
||||
0x00004E0E, 0x00001CE0, 0x00050041, 0x00000B74, 0x00004D89, 0x00000CE6,
|
||||
0x00000A0B, 0x0004003D, 0x000008F6, 0x00002A7D, 0x00004D89, 0x0003003E,
|
||||
0x00001536, 0x00002A7D, 0x000100DA, 0x00060041, 0x0000029A, 0x0000591E,
|
||||
0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C88,
|
||||
0x0000591E, 0x0003003E, 0x00004FEB, 0x00002C88, 0x00060041, 0x0000028A,
|
||||
0x000040F2, 0x000014B9, 0x00000A0E, 0x00000A0E, 0x0004003D, 0x0000000D,
|
||||
0x00001F21, 0x000040F2, 0x0003003E, 0x00004E0E, 0x00001F21, 0x00050041,
|
||||
0x00000B74, 0x000040F3, 0x00000CE6, 0x00000A0E, 0x0004003D, 0x000008F6,
|
||||
0x00002A7E, 0x000040F3, 0x0003003E, 0x00001536, 0x00002A7E, 0x000100DA,
|
||||
0x00060041, 0x0000029A, 0x0000591F, 0x000014B9, 0x00000A11, 0x00000A0B,
|
||||
0x0004003D, 0x0000001D, 0x00002C89, 0x0000591F, 0x0003003E, 0x00004FEB,
|
||||
0x00002C89, 0x00060041, 0x0000028A, 0x000040F4, 0x000014B9, 0x00000A11,
|
||||
0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F22, 0x000040F4, 0x0003003E,
|
||||
0x00004E0E, 0x00001F22, 0x00050041, 0x00000B74, 0x000040F5, 0x00000CE6,
|
||||
0x00000A11, 0x0004003D, 0x000008F6, 0x00002A7F, 0x000040F5, 0x0003003E,
|
||||
0x00001536, 0x00002A7F, 0x000100DA, 0x000100DB, 0x0003003E, 0x00004FEB,
|
||||
0x00001ECB, 0x0003003E, 0x00004E0E, 0x00001CE0, 0x0003003E, 0x00001536,
|
||||
0x00002A7D, 0x000100DA, 0x0003003E, 0x00004FEB, 0x00002C89, 0x0003003E,
|
||||
0x00004E0E, 0x00001F22, 0x0003003E, 0x00001536, 0x00002A7F, 0x000100DA,
|
||||
0x0007004F, 0x00000013, 0x00003067, 0x00002C88, 0x00002C88, 0x00000000,
|
||||
0x00000001, 0x0004007F, 0x00000013, 0x000052E6, 0x00003067, 0x00050081,
|
||||
0x00000013, 0x0000208F, 0x00001A6D, 0x000052E6, 0x0007004F, 0x00000013,
|
||||
0x00003625, 0x00002C89, 0x00002C89, 0x00000000, 0x00000001, 0x00050081,
|
||||
0x00000013, 0x00005535, 0x0000208F, 0x00003625, 0x00050051, 0x0000000D,
|
||||
0x000047F8, 0x00005535, 0x00000000, 0x00050051, 0x0000000D, 0x00003708,
|
||||
0x00005535, 0x00000001, 0x00050051, 0x0000000D, 0x00001DDA, 0x00002C89,
|
||||
0x00000002, 0x00050051, 0x0000000D, 0x00001D31, 0x00002C89, 0x00000003,
|
||||
0x00070050, 0x0000001D, 0x0000495C, 0x000047F8, 0x00003708, 0x00001DDA,
|
||||
0x00001D31, 0x0003003E, 0x00004FEB, 0x0000495C, 0x0003003E, 0x00004E0E,
|
||||
0x00001F22, 0x000200F9, 0x000043D5, 0x000200F8, 0x000043D5, 0x000700F5,
|
||||
0x0000000C, 0x000059AF, 0x00000A0B, 0x000023E2, 0x00005336, 0x000038D8,
|
||||
0x000500B1, 0x00000009, 0x000060D5, 0x000059AF, 0x00000A3B, 0x000400F6,
|
||||
0x00002E11, 0x000038D8, 0x00000000, 0x000400FA, 0x000060D5, 0x000038D8,
|
||||
0x00002E11, 0x000200F8, 0x000038D8, 0x00060041, 0x0000029A, 0x00004723,
|
||||
0x00000CE6, 0x00000A0B, 0x000059AF, 0x0004003D, 0x0000001D, 0x00003D0D,
|
||||
0x00004723, 0x00060041, 0x0000029A, 0x00005354, 0x00000CE6, 0x00000A0E,
|
||||
0x000059AF, 0x0004003D, 0x0000001D, 0x000032AE, 0x00005354, 0x0004007F,
|
||||
0x0000001D, 0x000022B4, 0x000032AE, 0x00050081, 0x0000001D, 0x00001EB6,
|
||||
0x00003D0D, 0x000022B4, 0x00060041, 0x0000029A, 0x00003777, 0x00000CE6,
|
||||
0x00000A11, 0x000059AF, 0x0004003D, 0x0000001D, 0x00005240, 0x00003777,
|
||||
0x00050081, 0x0000001D, 0x00005839, 0x00001EB6, 0x00005240, 0x00050041,
|
||||
0x0000029B, 0x000048A0, 0x00001536, 0x000059AF, 0x0003003E, 0x000048A0,
|
||||
0x00005839, 0x00050080, 0x0000000C, 0x00005336, 0x000059AF, 0x00000A0E,
|
||||
0x000200F9, 0x000043D5, 0x000200F8, 0x00002E11, 0x000100DA, 0x000100DB,
|
||||
0x000200F9, 0x00005C60, 0x000200F8, 0x00005C60, 0x000100FD, 0x00010038,
|
||||
};
|
|
@ -1,35 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
#extension all : warn
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#extension GL_ARB_separate_shader_objects : require
|
||||
#extension GL_ARB_explicit_attrib_location : require
|
||||
|
||||
layout(set = 0, binding = 1) uniform consts_type {
|
||||
vec4 float_consts[512];
|
||||
uint loop_consts[32];
|
||||
uint bool_consts[8];
|
||||
} consts;
|
||||
|
||||
layout(push_constant) uniform push_consts_type {
|
||||
vec4 window_scale;
|
||||
vec4 vtx_fmt;
|
||||
vec4 point_size;
|
||||
vec4 alpha_test;
|
||||
uint ps_param_gen;
|
||||
} push_constants;
|
||||
|
||||
layout(set = 1, binding = 0) uniform sampler1D textures1D[32];
|
||||
layout(set = 1, binding = 1) uniform sampler2D textures2D[32];
|
||||
layout(set = 1, binding = 2) uniform sampler3D textures3D[32];
|
||||
layout(set = 1, binding = 3) uniform samplerCube textures4D[32];
|
||||
|
||||
layout(location = 0) in vec4 in_interpolators[16];
|
||||
layout(location = 0) out vec4 oC[4];
|
||||
|
||||
void main() {
|
||||
// This shader does absolutely nothing!
|
||||
return;
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
#extension all : warn
|
||||
#extension GL_ARB_separate_shader_objects : require
|
||||
#extension GL_ARB_explicit_attrib_location : require
|
||||
|
||||
in gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
// float gl_ClipDistance[];
|
||||
} gl_in[];
|
||||
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
// float gl_ClipDistance[];
|
||||
};
|
||||
|
||||
layout(location = 0) in vec4 in_interpolators[][16];
|
||||
layout(location = 0) out vec4 out_interpolators[16];
|
||||
|
||||
layout(location = 16) in vec2 _in_point_coord_unused[];
|
||||
layout(location = 17) in float _in_point_size_unused[];
|
||||
|
||||
layout(location = 16) out vec2 _out_point_coord_unused;
|
||||
|
||||
layout(lines_adjacency) in;
|
||||
layout(line_strip, max_vertices = 5) out;
|
||||
void main() {
|
||||
gl_Position = gl_in[0].gl_Position;
|
||||
gl_PointSize = gl_in[0].gl_PointSize;
|
||||
out_interpolators = in_interpolators[0];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[1].gl_Position;
|
||||
gl_PointSize = gl_in[1].gl_PointSize;
|
||||
out_interpolators = in_interpolators[1];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[2].gl_Position;
|
||||
gl_PointSize = gl_in[2].gl_PointSize;
|
||||
out_interpolators = in_interpolators[2];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[3].gl_Position;
|
||||
gl_PointSize = gl_in[3].gl_PointSize;
|
||||
out_interpolators = in_interpolators[3];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[0].gl_Position;
|
||||
gl_PointSize = gl_in[0].gl_PointSize;
|
||||
out_interpolators = in_interpolators[0];
|
||||
EmitVertex();
|
||||
EndPrimitive();
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
#extension all : warn
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#extension GL_ARB_separate_shader_objects : require
|
||||
#extension GL_ARB_explicit_attrib_location : require
|
||||
|
||||
layout(push_constant) uniform push_consts_type {
|
||||
vec4 window_scale;
|
||||
vec4 vtx_fmt;
|
||||
vec4 point_size;
|
||||
vec4 alpha_test;
|
||||
uint ps_param_gen;
|
||||
} push_constants;
|
||||
|
||||
in gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
// float gl_ClipDistance[];
|
||||
} gl_in[];
|
||||
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
// float gl_ClipDistance[];
|
||||
};
|
||||
|
||||
layout(location = 0) in vec4 in_interpolators[][16];
|
||||
layout(location = 16) in vec2 in_point_coord_unused[];
|
||||
layout(location = 17) in float point_size[];
|
||||
|
||||
layout(location = 0) out vec4 out_interpolators[16];
|
||||
layout(location = 16) out vec2 point_coord;
|
||||
|
||||
// TODO(benvanik): clamp to min/max.
|
||||
// TODO(benvanik): figure out how to see which interpolator gets adjusted.
|
||||
|
||||
layout(points) in;
|
||||
layout(triangle_strip, max_vertices = 4) out;
|
||||
|
||||
void main() {
|
||||
const vec2 offsets[4] = {
|
||||
vec2(-1.0, 1.0),
|
||||
vec2( 1.0, 1.0),
|
||||
vec2(-1.0, -1.0),
|
||||
vec2( 1.0, -1.0),
|
||||
};
|
||||
vec4 pos = gl_in[0].gl_Position;
|
||||
vec2 window_scaled_psize = push_constants.point_size.xy;
|
||||
// Shader header writes -1.0f to pointSize by default, so any positive value
|
||||
// means that it was overwritten by the translated vertex shader.
|
||||
if (point_size[0] > 0.0f) {
|
||||
window_scaled_psize = vec2(point_size[0]);
|
||||
}
|
||||
window_scaled_psize /= push_constants.window_scale.zw;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
gl_Position = vec4(pos.xy + (offsets[i] * window_scaled_psize), pos.zw);
|
||||
out_interpolators = in_interpolators[0];
|
||||
point_coord = max(offsets[i], vec2(0.0f));
|
||||
EmitVertex();
|
||||
}
|
||||
EndPrimitive();
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
#extension all : warn
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#extension GL_ARB_separate_shader_objects : require
|
||||
#extension GL_ARB_explicit_attrib_location : require
|
||||
|
||||
in gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
// float gl_ClipDistance[];
|
||||
} gl_in[];
|
||||
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
// float gl_ClipDistance[];
|
||||
};
|
||||
|
||||
layout(location = 0) in vec4 in_interpolators[][16];
|
||||
layout(location = 0) out vec4 out_interpolators[16];
|
||||
|
||||
layout(location = 16) in vec2 _in_point_coord_unused[];
|
||||
layout(location = 17) in float _in_point_size_unused[];
|
||||
|
||||
layout(location = 16) out vec2 _out_point_coord_unused;
|
||||
|
||||
layout(lines_adjacency) in;
|
||||
layout(triangle_strip, max_vertices = 4) out;
|
||||
void main() {
|
||||
const int order[4] = { 0, 1, 3, 2 };
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
int input_index = order[i];
|
||||
gl_Position = gl_in[input_index].gl_Position;
|
||||
gl_PointSize = gl_in[input_index].gl_PointSize;
|
||||
out_interpolators = in_interpolators[input_index];
|
||||
EmitVertex();
|
||||
}
|
||||
EndPrimitive();
|
||||
}
|
|
@ -1,124 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
#extension all : warn
|
||||
#extension GL_ARB_separate_shader_objects : require
|
||||
#extension GL_ARB_explicit_attrib_location : require
|
||||
|
||||
in gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
// float gl_ClipDistance[];
|
||||
} gl_in[];
|
||||
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
// float gl_ClipDistance[];
|
||||
};
|
||||
|
||||
layout(location = 0) in vec4 in_interpolators[][16];
|
||||
layout(location = 0) out vec4 out_interpolators[16];
|
||||
|
||||
layout(location = 16) in vec2 _in_point_coord_unused[];
|
||||
layout(location = 17) in float _in_point_size_unused[];
|
||||
|
||||
layout(location = 16) out vec2 _out_point_coord_unused;
|
||||
|
||||
layout(triangles) in;
|
||||
layout(triangle_strip, max_vertices = 6) out;
|
||||
|
||||
bool equalsEpsilon(vec2 left, vec2 right, float epsilon) {
|
||||
return all(lessThanEqual(abs(left - right), vec2(epsilon)));
|
||||
}
|
||||
|
||||
void main() {
|
||||
// Most games use a left-aligned form.
|
||||
if (equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[2].gl_Position.x, gl_in[1].gl_Position.y), 0.001) ||
|
||||
equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[1].gl_Position.x, gl_in[2].gl_Position.y), 0.001)) {
|
||||
// 0 ------ 1 0: -1,-1
|
||||
// | - | 1: 1,-1
|
||||
// | // | 2: -1, 1
|
||||
// | - | 3: [ 1, 1 ]
|
||||
// 2 ----- [3]
|
||||
//
|
||||
// 0 ------ 2 0: -1,-1
|
||||
// | - | 1: -1, 1
|
||||
// | // | 2: 1,-1
|
||||
// | - | 3: [ 1, 1 ]
|
||||
// 1 ------[3]
|
||||
gl_Position = gl_in[0].gl_Position;
|
||||
gl_PointSize = gl_in[0].gl_PointSize;
|
||||
out_interpolators = in_interpolators[0];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[1].gl_Position;
|
||||
gl_PointSize = gl_in[1].gl_PointSize;
|
||||
out_interpolators = in_interpolators[1];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[2].gl_Position;
|
||||
gl_PointSize = gl_in[2].gl_PointSize;
|
||||
out_interpolators = in_interpolators[2];
|
||||
EmitVertex();
|
||||
EndPrimitive();
|
||||
gl_Position = gl_in[2].gl_Position;
|
||||
gl_PointSize = gl_in[2].gl_PointSize;
|
||||
out_interpolators = in_interpolators[2];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[1].gl_Position;
|
||||
gl_PointSize = gl_in[1].gl_PointSize;
|
||||
out_interpolators = in_interpolators[1];
|
||||
EmitVertex();
|
||||
gl_Position = vec4((-gl_in[0].gl_Position.xy) +
|
||||
gl_in[1].gl_Position.xy +
|
||||
gl_in[2].gl_Position.xy,
|
||||
gl_in[2].gl_Position.zw);
|
||||
gl_PointSize = gl_in[2].gl_PointSize;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
out_interpolators[i] = (-in_interpolators[0][i]) +
|
||||
in_interpolators[1][i] +
|
||||
in_interpolators[2][i];
|
||||
}
|
||||
EmitVertex();
|
||||
EndPrimitive();
|
||||
} else {
|
||||
// 0 ------ 1 0: -1,-1
|
||||
// | - | 1: 1,-1
|
||||
// | \\ | 2: 1, 1
|
||||
// | - | 3: [-1, 1 ]
|
||||
// [3] ----- 2
|
||||
gl_Position = gl_in[0].gl_Position;
|
||||
gl_PointSize = gl_in[0].gl_PointSize;
|
||||
out_interpolators = in_interpolators[0];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[1].gl_Position;
|
||||
gl_PointSize = gl_in[1].gl_PointSize;
|
||||
out_interpolators = in_interpolators[1];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[2].gl_Position;
|
||||
gl_PointSize = gl_in[2].gl_PointSize;
|
||||
out_interpolators = in_interpolators[2];
|
||||
EmitVertex();
|
||||
EndPrimitive();
|
||||
gl_Position = gl_in[0].gl_Position;
|
||||
gl_PointSize = gl_in[0].gl_PointSize;
|
||||
out_interpolators = in_interpolators[0];
|
||||
EmitVertex();
|
||||
gl_Position = gl_in[2].gl_Position;
|
||||
gl_PointSize = gl_in[2].gl_PointSize;
|
||||
out_interpolators = in_interpolators[2];
|
||||
EmitVertex();
|
||||
gl_Position = vec4( gl_in[0].gl_Position.xy +
|
||||
(-gl_in[1].gl_Position.xy) +
|
||||
gl_in[2].gl_Position.xy,
|
||||
gl_in[2].gl_Position.zw);
|
||||
gl_PointSize = gl_in[2].gl_PointSize;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
out_interpolators[i] = in_interpolators[0][i] +
|
||||
(-in_interpolators[1][i]) +
|
||||
in_interpolators[2][i];
|
||||
}
|
||||
EmitVertex();
|
||||
EndPrimitive();
|
||||
}
|
||||
}
|
|
@ -1,146 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/texture_config.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
#define COMP_SWIZ(r, g, b, a) \
|
||||
{ \
|
||||
VK_COMPONENT_SWIZZLE_##r, VK_COMPONENT_SWIZZLE_##g, \
|
||||
VK_COMPONENT_SWIZZLE_##b, VK_COMPONENT_SWIZZLE_##a \
|
||||
}
|
||||
#define VEC_SWIZ(x, y, z, w) \
|
||||
{ \
|
||||
VECTOR_SWIZZLE_##x, VECTOR_SWIZZLE_##y, VECTOR_SWIZZLE_##z, \
|
||||
VECTOR_SWIZZLE_##w \
|
||||
}
|
||||
|
||||
#define RGBA COMP_SWIZ(R, G, B, A)
|
||||
#define ___R COMP_SWIZ(IDENTITY, IDENTITY, IDENTITY, R)
|
||||
#define RRRR COMP_SWIZ(R, R, R, R)
|
||||
|
||||
#define XYZW VEC_SWIZ(X, Y, Z, W)
|
||||
#define YXWZ VEC_SWIZ(Y, X, W, Z)
|
||||
#define ZYXW VEC_SWIZ(Z, Y, X, W)
|
||||
|
||||
#define ___(format) \
|
||||
{ VK_FORMAT_##format }
|
||||
#define _c_(format, component_swizzle) \
|
||||
{ VK_FORMAT_##format, component_swizzle, XYZW }
|
||||
#define __v(format, vector_swizzle) \
|
||||
{ VK_FORMAT_##format, RGBA, vector_swizzle }
|
||||
#define _cv(format, component_swizzle, vector_swizzle) \
|
||||
{ VK_FORMAT_##format, component_swizzle, vector_swizzle }
|
||||
|
||||
// https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkFormat.html
|
||||
const TextureConfig texture_configs[64] = {
|
||||
/* k_1_REVERSE */ ___(UNDEFINED),
|
||||
/* k_1 */ ___(UNDEFINED),
|
||||
/* k_8 */ ___(R8_UNORM),
|
||||
/* k_1_5_5_5 */ __v(A1R5G5B5_UNORM_PACK16, ZYXW),
|
||||
/* k_5_6_5 */ __v(R5G6B5_UNORM_PACK16, ZYXW),
|
||||
/* k_6_5_5 */ ___(UNDEFINED),
|
||||
/* k_8_8_8_8 */ ___(R8G8B8A8_UNORM),
|
||||
/* k_2_10_10_10 */ ___(A2R10G10B10_UNORM_PACK32),
|
||||
/* k_8_A */ ___(R8_UNORM),
|
||||
/* k_8_B */ ___(UNDEFINED),
|
||||
/* k_8_8 */ ___(R8G8_UNORM),
|
||||
/* k_Cr_Y1_Cb_Y0_REP */ ___(UNDEFINED),
|
||||
/* k_Y1_Cr_Y0_Cb_REP */ ___(UNDEFINED),
|
||||
/* k_16_16_EDRAM */ ___(UNDEFINED),
|
||||
/* k_8_8_8_8_A */ ___(UNDEFINED),
|
||||
/* k_4_4_4_4 */ __v(R4G4B4A4_UNORM_PACK16, YXWZ),
|
||||
// TODO: Verify if these two are correct (I think not).
|
||||
/* k_10_11_11 */ ___(B10G11R11_UFLOAT_PACK32),
|
||||
/* k_11_11_10 */ ___(B10G11R11_UFLOAT_PACK32),
|
||||
|
||||
/* k_DXT1 */ ___(BC1_RGBA_UNORM_BLOCK),
|
||||
/* k_DXT2_3 */ ___(BC2_UNORM_BLOCK),
|
||||
/* k_DXT4_5 */ ___(BC3_UNORM_BLOCK),
|
||||
/* k_16_16_16_16_EDRAM */ ___(UNDEFINED),
|
||||
|
||||
// TODO: D24 unsupported on AMD.
|
||||
/* k_24_8 */ ___(D24_UNORM_S8_UINT),
|
||||
/* k_24_8_FLOAT */ ___(D32_SFLOAT_S8_UINT),
|
||||
/* k_16 */ ___(R16_UNORM),
|
||||
/* k_16_16 */ ___(R16G16_UNORM),
|
||||
/* k_16_16_16_16 */ ___(R16G16B16A16_UNORM),
|
||||
/* k_16_EXPAND */ ___(R16_SFLOAT),
|
||||
/* k_16_16_EXPAND */ ___(R16G16_SFLOAT),
|
||||
/* k_16_16_16_16_EXPAND */ ___(R16G16B16A16_SFLOAT),
|
||||
/* k_16_FLOAT */ ___(R16_SFLOAT),
|
||||
/* k_16_16_FLOAT */ ___(R16G16_SFLOAT),
|
||||
/* k_16_16_16_16_FLOAT */ ___(R16G16B16A16_SFLOAT),
|
||||
|
||||
// ! These are UNORM formats, not SINT.
|
||||
/* k_32 */ ___(R32_SINT),
|
||||
/* k_32_32 */ ___(R32G32_SINT),
|
||||
/* k_32_32_32_32 */ ___(R32G32B32A32_SINT),
|
||||
/* k_32_FLOAT */ ___(R32_SFLOAT),
|
||||
/* k_32_32_FLOAT */ ___(R32G32_SFLOAT),
|
||||
/* k_32_32_32_32_FLOAT */ ___(R32G32B32A32_SFLOAT),
|
||||
/* k_32_AS_8 */ ___(UNDEFINED),
|
||||
/* k_32_AS_8_8 */ ___(UNDEFINED),
|
||||
/* k_16_MPEG */ ___(UNDEFINED),
|
||||
/* k_16_16_MPEG */ ___(UNDEFINED),
|
||||
/* k_8_INTERLACED */ ___(UNDEFINED),
|
||||
/* k_32_AS_8_INTERLACED */ ___(UNDEFINED),
|
||||
/* k_32_AS_8_8_INTERLACED */ ___(UNDEFINED),
|
||||
/* k_16_INTERLACED */ ___(UNDEFINED),
|
||||
/* k_16_MPEG_INTERLACED */ ___(UNDEFINED),
|
||||
/* k_16_16_MPEG_INTERLACED */ ___(UNDEFINED),
|
||||
|
||||
// https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||
/* k_DXN */ ___(BC5_UNORM_BLOCK), // ?
|
||||
|
||||
/* k_8_8_8_8_AS_16_16_16_16 */ ___(R8G8B8A8_UNORM),
|
||||
/* k_DXT1_AS_16_16_16_16 */ ___(BC1_RGBA_UNORM_BLOCK),
|
||||
/* k_DXT2_3_AS_16_16_16_16 */ ___(BC2_UNORM_BLOCK),
|
||||
/* k_DXT4_5_AS_16_16_16_16 */ ___(BC3_UNORM_BLOCK),
|
||||
|
||||
/* k_2_10_10_10_AS_16_16_16_16 */ ___(A2R10G10B10_UNORM_PACK32),
|
||||
|
||||
// TODO: Verify if these two are correct (I think not).
|
||||
/* k_10_11_11_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ?
|
||||
/* k_11_11_10_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ?
|
||||
/* k_32_32_32_FLOAT */ ___(R32G32B32_SFLOAT),
|
||||
/* k_DXT3A */ _c_(BC2_UNORM_BLOCK, ___R),
|
||||
/* k_DXT5A */ _c_(BC4_UNORM_BLOCK, RRRR), // ATI1N
|
||||
|
||||
// https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||
/* k_CTX1 */ ___(R8G8_UINT),
|
||||
|
||||
/* k_DXT3A_AS_1_1_1_1 */ ___(UNDEFINED),
|
||||
|
||||
/* k_8_8_8_8_GAMMA_EDRAM */ ___(UNDEFINED),
|
||||
/* k_2_10_10_10_FLOAT_EDRAM */ ___(UNDEFINED),
|
||||
};
|
||||
|
||||
#undef _cv
|
||||
#undef __v
|
||||
#undef _c_
|
||||
#undef ___
|
||||
|
||||
#undef ZYXW
|
||||
#undef YXWZ
|
||||
#undef XYZW
|
||||
|
||||
#undef RRRR
|
||||
#undef ___R
|
||||
#undef RGBA
|
||||
|
||||
#undef VEC_SWIZ
|
||||
#undef COMP_SWIZ
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_
|
||||
#define XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_
|
||||
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
typedef enum VectorSwizzle {
|
||||
VECTOR_SWIZZLE_X = 0,
|
||||
VECTOR_SWIZZLE_Y = 1,
|
||||
VECTOR_SWIZZLE_Z = 2,
|
||||
VECTOR_SWIZZLE_W = 3,
|
||||
} VectorSwizzle;
|
||||
|
||||
struct TextureConfig {
|
||||
VkFormat host_format;
|
||||
struct {
|
||||
VkComponentSwizzle r = VK_COMPONENT_SWIZZLE_R;
|
||||
VkComponentSwizzle g = VK_COMPONENT_SWIZZLE_G;
|
||||
VkComponentSwizzle b = VK_COMPONENT_SWIZZLE_B;
|
||||
VkComponentSwizzle a = VK_COMPONENT_SWIZZLE_A;
|
||||
} component_swizzle;
|
||||
struct {
|
||||
VectorSwizzle x = VECTOR_SWIZZLE_X;
|
||||
VectorSwizzle y = VECTOR_SWIZZLE_Y;
|
||||
VectorSwizzle z = VECTOR_SWIZZLE_Z;
|
||||
VectorSwizzle w = VECTOR_SWIZZLE_W;
|
||||
} vector_swizzle;
|
||||
};
|
||||
|
||||
extern const TextureConfig texture_configs[64];
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -10,69 +10,264 @@
|
|||
#ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <array>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/threading.h"
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/gpu/command_processor.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/vulkan/buffer_cache.h"
|
||||
#include "xenia/gpu/vulkan/render_cache.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_primitive_processor.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_texture_cache.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/kernel/xthread.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/vulkan/blitter.h"
|
||||
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h"
|
||||
#include "xenia/ui/vulkan/vulkan_presenter.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_submission_tracker.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanTextureCache;
|
||||
|
||||
class VulkanCommandProcessor : public CommandProcessor {
|
||||
public:
|
||||
// Single-descriptor layouts for use within a single frame.
|
||||
enum class SingleTransientDescriptorLayout {
|
||||
kUniformBufferGuestVertex,
|
||||
kUniformBufferFragment,
|
||||
kUniformBufferGuestShader,
|
||||
kUniformBufferSystemConstants,
|
||||
kUniformBufferCompute,
|
||||
kStorageBufferCompute,
|
||||
kCount,
|
||||
};
|
||||
|
||||
class ScratchBufferAcquisition {
|
||||
public:
|
||||
explicit ScratchBufferAcquisition() = default;
|
||||
explicit ScratchBufferAcquisition(VulkanCommandProcessor& command_processor,
|
||||
VkBuffer buffer,
|
||||
VkPipelineStageFlags stage_mask,
|
||||
VkAccessFlags access_mask)
|
||||
: command_processor_(&command_processor),
|
||||
buffer_(buffer),
|
||||
stage_mask_(stage_mask),
|
||||
access_mask_(access_mask) {}
|
||||
|
||||
ScratchBufferAcquisition(const ScratchBufferAcquisition& acquisition) =
|
||||
delete;
|
||||
ScratchBufferAcquisition& operator=(
|
||||
const ScratchBufferAcquisition& acquisition) = delete;
|
||||
|
||||
ScratchBufferAcquisition(ScratchBufferAcquisition&& acquisition) {
|
||||
command_processor_ = acquisition.command_processor_;
|
||||
buffer_ = acquisition.buffer_;
|
||||
stage_mask_ = acquisition.stage_mask_;
|
||||
access_mask_ = acquisition.access_mask_;
|
||||
acquisition.command_processor_ = nullptr;
|
||||
acquisition.buffer_ = VK_NULL_HANDLE;
|
||||
acquisition.stage_mask_ = 0;
|
||||
acquisition.access_mask_ = 0;
|
||||
}
|
||||
ScratchBufferAcquisition& operator=(
|
||||
ScratchBufferAcquisition&& acquisition) {
|
||||
if (this == &acquisition) {
|
||||
return *this;
|
||||
}
|
||||
command_processor_ = acquisition.command_processor_;
|
||||
buffer_ = acquisition.buffer_;
|
||||
stage_mask_ = acquisition.stage_mask_;
|
||||
access_mask_ = acquisition.access_mask_;
|
||||
acquisition.command_processor_ = nullptr;
|
||||
acquisition.buffer_ = VK_NULL_HANDLE;
|
||||
acquisition.stage_mask_ = 0;
|
||||
acquisition.access_mask_ = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~ScratchBufferAcquisition() {
|
||||
if (buffer_ != VK_NULL_HANDLE) {
|
||||
assert_true(command_processor_->scratch_buffer_used_);
|
||||
assert_true(command_processor_->scratch_buffer_ == buffer_);
|
||||
command_processor_->scratch_buffer_last_stage_mask_ = stage_mask_;
|
||||
command_processor_->scratch_buffer_last_access_mask_ = access_mask_;
|
||||
command_processor_->scratch_buffer_last_usage_submission_ =
|
||||
command_processor_->GetCurrentSubmission();
|
||||
command_processor_->scratch_buffer_used_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
// VK_NULL_HANDLE if failed to acquire or if moved.
|
||||
VkBuffer buffer() const { return buffer_; }
|
||||
|
||||
VkPipelineStageFlags GetStageMask() const { return stage_mask_; }
|
||||
VkPipelineStageFlags SetStageMask(VkPipelineStageFlags new_stage_mask) {
|
||||
VkPipelineStageFlags old_stage_mask = stage_mask_;
|
||||
stage_mask_ = new_stage_mask;
|
||||
return old_stage_mask;
|
||||
}
|
||||
VkAccessFlags GetAccessMask() const { return access_mask_; }
|
||||
VkAccessFlags SetAccessMask(VkAccessFlags new_access_mask) {
|
||||
VkAccessFlags old_access_mask = access_mask_;
|
||||
access_mask_ = new_access_mask;
|
||||
return old_access_mask;
|
||||
}
|
||||
|
||||
private:
|
||||
VulkanCommandProcessor* command_processor_ = nullptr;
|
||||
VkBuffer buffer_ = VK_NULL_HANDLE;
|
||||
VkPipelineStageFlags stage_mask_ = 0;
|
||||
VkAccessFlags access_mask_ = 0;
|
||||
};
|
||||
|
||||
VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system,
|
||||
kernel::KernelState* kernel_state);
|
||||
~VulkanCommandProcessor() override;
|
||||
~VulkanCommandProcessor();
|
||||
|
||||
void RequestFrameTrace(const std::filesystem::path& root_path) override;
|
||||
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
||||
void RestoreEdramSnapshot(const void* snapshot) override;
|
||||
void ClearCaches() override;
|
||||
|
||||
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
||||
|
||||
void RestoreEdramSnapshot(const void* snapshot) override;
|
||||
|
||||
ui::vulkan::VulkanProvider& GetVulkanProvider() const {
|
||||
return *static_cast<ui::vulkan::VulkanProvider*>(
|
||||
graphics_system_->provider());
|
||||
}
|
||||
|
||||
RenderCache* render_cache() { return render_cache_.get(); }
|
||||
// Returns the deferred drawing command list for the currently open
|
||||
// submission.
|
||||
DeferredCommandBuffer& deferred_command_buffer() {
|
||||
assert_true(submission_open_);
|
||||
return deferred_command_buffer_;
|
||||
}
|
||||
|
||||
private:
|
||||
bool submission_open() const { return submission_open_; }
|
||||
uint64_t GetCurrentSubmission() const {
|
||||
return submission_completed_ +
|
||||
uint64_t(submissions_in_flight_fences_.size()) + 1;
|
||||
}
|
||||
uint64_t GetCompletedSubmission() const { return submission_completed_; }
|
||||
|
||||
// Sparse binds are:
|
||||
// - In a single submission, all submitted in one vkQueueBindSparse.
|
||||
// - Sent to the queue without waiting for a semaphore.
|
||||
// Thus, multiple sparse binds between the completed and the current
|
||||
// submission, and within one submission, must not touch any overlapping
|
||||
// memory regions.
|
||||
void SparseBindBuffer(VkBuffer buffer, uint32_t bind_count,
|
||||
const VkSparseMemoryBind* binds,
|
||||
VkPipelineStageFlags wait_stage_mask);
|
||||
|
||||
uint64_t GetCurrentFrame() const { return frame_current_; }
|
||||
uint64_t GetCompletedFrame() const { return frame_completed_; }
|
||||
|
||||
// Submission must be open to insert barriers. If no pipeline stages access
|
||||
// the resource in a synchronization scope, the stage masks should be 0 (top /
|
||||
// bottom of pipe should be specified only if explicitly needed). Returning
|
||||
// true if the barrier has actually been inserted and not dropped.
|
||||
bool PushBufferMemoryBarrier(
|
||||
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
|
||||
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
|
||||
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
|
||||
uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
|
||||
uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
|
||||
bool skip_if_equal = true);
|
||||
bool PushImageMemoryBarrier(
|
||||
VkImage image, const VkImageSubresourceRange& subresource_range,
|
||||
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
|
||||
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
|
||||
VkImageLayout old_layout, VkImageLayout new_layout,
|
||||
uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
|
||||
uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
|
||||
bool skip_if_equal = true);
|
||||
// Returns whether any barriers have been submitted - if true is returned, the
|
||||
// render pass will also be closed.
|
||||
bool SubmitBarriers(bool force_end_render_pass);
|
||||
|
||||
// If not started yet, begins a render pass from the render target cache.
|
||||
// Submission must be open.
|
||||
void SubmitBarriersAndEnterRenderTargetCacheRenderPass(
|
||||
VkRenderPass render_pass,
|
||||
const VulkanRenderTargetCache::Framebuffer* framebuffer);
|
||||
// Must be called before doing anything outside the render pass scope,
|
||||
// including adding pipeline barriers that are not a part of the render pass
|
||||
// scope. Submission must be open.
|
||||
void EndRenderPass();
|
||||
|
||||
VkDescriptorSetLayout GetSingleTransientDescriptorLayout(
|
||||
SingleTransientDescriptorLayout transient_descriptor_layout) const {
|
||||
return descriptor_set_layouts_single_transient_[size_t(
|
||||
transient_descriptor_layout)];
|
||||
}
|
||||
// A frame must be open.
|
||||
VkDescriptorSet AllocateSingleTransientDescriptor(
|
||||
SingleTransientDescriptorLayout transient_descriptor_layout);
|
||||
// Allocates a descriptor, space in the uniform buffer pool, and fills the
|
||||
// VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it.
|
||||
// Returns null in case of failure.
|
||||
uint8_t* WriteTransientUniformBufferBinding(
|
||||
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
|
||||
VkDescriptorBufferInfo& descriptor_buffer_info_out,
|
||||
VkWriteDescriptorSet& write_descriptor_set_out);
|
||||
uint8_t* WriteTransientUniformBufferBinding(
|
||||
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
|
||||
VkDescriptorSet& descriptor_set_out);
|
||||
|
||||
// The returned reference is valid until a cache clear.
|
||||
VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers,
|
||||
bool is_vertex,
|
||||
size_t binding_count);
|
||||
// The returned reference is valid until a cache clear.
|
||||
const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout(
|
||||
size_t texture_count_pixel, size_t sampler_count_pixel,
|
||||
size_t texture_count_vertex, size_t sampler_count_vertex);
|
||||
|
||||
// Returns a single temporary GPU-side buffer within a submission for tasks
|
||||
// like texture untiling and resolving. May push a buffer memory barrier into
|
||||
// the initial usage. Submission must be open.
|
||||
ScratchBufferAcquisition AcquireScratchGpuBuffer(
|
||||
VkDeviceSize size, VkPipelineStageFlags initial_stage_mask,
|
||||
VkAccessFlags initial_access_mask);
|
||||
|
||||
// Binds a graphics pipeline for host-specific purposes, invalidating the
|
||||
// affected state. keep_dynamic_* must be false (to invalidate the dynamic
|
||||
// state after binding the pipeline with the same state being static, or if
|
||||
// the caller changes the dynamic state bypassing the VulkanCommandProcessor)
|
||||
// unless the caller has these state variables as dynamic and uses the
|
||||
// tracking in VulkanCommandProcessor to modify them.
|
||||
void BindExternalGraphicsPipeline(VkPipeline pipeline,
|
||||
bool keep_dynamic_depth_bias = false,
|
||||
bool keep_dynamic_blend_constants = false,
|
||||
bool keep_dynamic_stencil_mask_ref = false);
|
||||
void BindExternalComputePipeline(VkPipeline pipeline);
|
||||
void SetViewport(const VkViewport& viewport);
|
||||
void SetScissor(const VkRect2D& scissor);
|
||||
|
||||
protected:
|
||||
bool SetupContext() override;
|
||||
void ShutdownContext() override;
|
||||
|
||||
void MakeCoherent() override;
|
||||
|
||||
void WriteRegister(uint32_t index, uint32_t value) override;
|
||||
|
||||
void BeginFrame();
|
||||
void EndFrame();
|
||||
void OnGammaRamp256EntryTableValueWritten() override;
|
||||
void OnGammaRampPWLValueWritten() override;
|
||||
|
||||
void IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
|
||||
uint32_t frontbuffer_height) override;
|
||||
|
@ -81,52 +276,459 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
const uint32_t* host_address,
|
||||
uint32_t dword_count) override;
|
||||
|
||||
bool IssueDraw(xenos::PrimitiveType primitive_type, uint32_t index_count,
|
||||
bool IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count,
|
||||
IndexBufferInfo* index_buffer_info,
|
||||
bool major_mode_explicit) override;
|
||||
bool PopulateConstants(VkCommandBuffer command_buffer,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader);
|
||||
bool PopulateIndexBuffer(VkCommandBuffer command_buffer,
|
||||
IndexBufferInfo* index_buffer_info);
|
||||
bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
|
||||
VkCommandBuffer setup_buffer,
|
||||
VulkanShader* vertex_shader);
|
||||
bool PopulateSamplers(VkCommandBuffer command_buffer,
|
||||
VkCommandBuffer setup_buffer,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader);
|
||||
bool IssueCopy() override;
|
||||
|
||||
uint64_t dirty_float_constants_ = 0; // Dirty float constants in blocks of 4
|
||||
uint8_t dirty_bool_constants_ = 0;
|
||||
uint32_t dirty_loop_constants_ = 0;
|
||||
uint8_t dirty_gamma_constants_ = 0;
|
||||
void InitializeTrace() override;
|
||||
|
||||
uint32_t coher_base_vc_ = 0;
|
||||
uint32_t coher_size_vc_ = 0;
|
||||
private:
|
||||
struct CommandBuffer {
|
||||
VkCommandPool pool;
|
||||
VkCommandBuffer buffer;
|
||||
};
|
||||
|
||||
struct SparseBufferBind {
|
||||
VkBuffer buffer;
|
||||
size_t bind_offset;
|
||||
uint32_t bind_count;
|
||||
};
|
||||
|
||||
union TextureDescriptorSetLayoutKey {
|
||||
uint32_t key;
|
||||
struct {
|
||||
// 0 - sampled image descriptors, 1 - sampler descriptors.
|
||||
uint32_t is_samplers : 1;
|
||||
uint32_t is_vertex : 1;
|
||||
// For 0, use descriptor_set_layout_empty_ instead as these are owning
|
||||
// references.
|
||||
uint32_t binding_count : 30;
|
||||
};
|
||||
|
||||
TextureDescriptorSetLayoutKey() : key(0) {
|
||||
static_assert_size(*this, sizeof(key));
|
||||
}
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const TextureDescriptorSetLayoutKey& key) const {
|
||||
return std::hash<decltype(key.key)>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const TextureDescriptorSetLayoutKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const TextureDescriptorSetLayoutKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
};
|
||||
|
||||
union PipelineLayoutKey {
|
||||
uint64_t key;
|
||||
struct {
|
||||
// Pixel textures in the low bits since those are varied much more
|
||||
// commonly.
|
||||
uint16_t texture_count_pixel;
|
||||
uint16_t sampler_count_pixel;
|
||||
uint16_t texture_count_vertex;
|
||||
uint16_t sampler_count_vertex;
|
||||
};
|
||||
|
||||
PipelineLayoutKey() : key(0) { static_assert_size(*this, sizeof(key)); }
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const PipelineLayoutKey& key) const {
|
||||
return std::hash<decltype(key.key)>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const PipelineLayoutKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const PipelineLayoutKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
};
|
||||
|
||||
class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider {
|
||||
public:
|
||||
explicit PipelineLayout(
|
||||
VkPipelineLayout pipeline_layout,
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref,
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref,
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref,
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref)
|
||||
: pipeline_layout_(pipeline_layout),
|
||||
descriptor_set_layout_textures_vertex_ref_(
|
||||
descriptor_set_layout_textures_vertex_ref),
|
||||
descriptor_set_layout_samplers_vertex_ref_(
|
||||
descriptor_set_layout_samplers_vertex_ref),
|
||||
descriptor_set_layout_textures_pixel_ref_(
|
||||
descriptor_set_layout_textures_pixel_ref),
|
||||
descriptor_set_layout_samplers_pixel_ref_(
|
||||
descriptor_set_layout_samplers_pixel_ref) {}
|
||||
VkPipelineLayout GetPipelineLayout() const override {
|
||||
return pipeline_layout_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const {
|
||||
return descriptor_set_layout_textures_vertex_ref_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref() const {
|
||||
return descriptor_set_layout_samplers_vertex_ref_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const {
|
||||
return descriptor_set_layout_textures_pixel_ref_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref() const {
|
||||
return descriptor_set_layout_samplers_pixel_ref_;
|
||||
}
|
||||
|
||||
private:
|
||||
VkPipelineLayout pipeline_layout_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref_;
|
||||
};
|
||||
|
||||
struct UsedSingleTransientDescriptor {
|
||||
uint64_t frame;
|
||||
SingleTransientDescriptorLayout layout;
|
||||
VkDescriptorSet set;
|
||||
};
|
||||
|
||||
struct UsedTextureTransientDescriptorSet {
|
||||
uint64_t frame;
|
||||
TextureDescriptorSetLayoutKey layout;
|
||||
VkDescriptorSet set;
|
||||
};
|
||||
|
||||
enum SwapApplyGammaDescriptorSet : uint32_t {
|
||||
kSwapApplyGammaDescriptorSetRamp,
|
||||
kSwapApplyGammaDescriptorSetSource,
|
||||
|
||||
kSwapApplyGammaDescriptorSetCount,
|
||||
};
|
||||
|
||||
// Framebuffer for the current presenter's guest output image revision, and
|
||||
// its usage tracking.
|
||||
struct SwapFramebuffer {
|
||||
VkFramebuffer framebuffer = VK_NULL_HANDLE;
|
||||
uint64_t version = UINT64_MAX;
|
||||
uint64_t last_submission = 0;
|
||||
};
|
||||
|
||||
// BeginSubmission and EndSubmission may be called at any time. If there's an
|
||||
// open non-frame submission, BeginSubmission(true) will promote it to a
|
||||
// frame. EndSubmission(true) will close the frame no matter whether the
|
||||
// submission has already been closed.
|
||||
// Unlike on Direct3D 12, submission boundaries do not imply any memory
|
||||
// barriers aside from an incoming host write (but not outgoing host read)
|
||||
// dependency.
|
||||
|
||||
// Rechecks submission number and reclaims per-submission resources. Pass 0 as
|
||||
// the submission to await to simply check status, or pass
|
||||
// GetCurrentSubmission() to wait for all queue operations to be completed.
|
||||
void CheckSubmissionFenceAndDeviceLoss(uint64_t await_submission);
|
||||
// If is_guest_command is true, a new full frame - with full cleanup of
|
||||
// resources and, if needed, starting capturing - is opened if pending (as
|
||||
// opposed to simply resuming after mid-frame synchronization). Returns
|
||||
// whether a submission is open currently and the device is not lost.
|
||||
bool BeginSubmission(bool is_guest_command);
|
||||
// If is_swap is true, a full frame is closed - with, if needed, cache
|
||||
// clearing and stopping capturing. Returns whether the submission was done
|
||||
// successfully, if it has failed, leaves it open.
|
||||
bool EndSubmission(bool is_swap);
|
||||
bool AwaitAllQueueOperationsCompletion() {
|
||||
CheckSubmissionFenceAndDeviceLoss(GetCurrentSubmission());
|
||||
return !submission_open_ && submissions_in_flight_fences_.empty();
|
||||
}
|
||||
|
||||
void ClearTransientDescriptorPools();
|
||||
|
||||
void SplitPendingBarrier();
|
||||
|
||||
void DestroyScratchBuffer();
|
||||
|
||||
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
|
||||
bool primitive_polygonal,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control);
|
||||
void UpdateSystemConstantValues(bool primitive_polygonal,
|
||||
xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info,
|
||||
uint32_t used_texture_mask);
|
||||
bool UpdateBindings(const VulkanShader* vertex_shader,
|
||||
const VulkanShader* pixel_shader);
|
||||
// Allocates a descriptor set and fills the VkWriteDescriptorSet structure.
|
||||
// The descriptor set layout must be the one for the given is_samplers,
|
||||
// is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be
|
||||
// already available at the moment of the call, no need to locate it again).
|
||||
// Returns whether the allocation was successful.
|
||||
bool WriteTransientTextureBindings(
|
||||
bool is_samplers, bool is_vertex, uint32_t binding_count,
|
||||
VkDescriptorSetLayout descriptor_set_layout,
|
||||
const VkDescriptorImageInfo* image_info,
|
||||
VkWriteDescriptorSet& write_descriptor_set_out);
|
||||
|
||||
bool device_lost_ = false;
|
||||
|
||||
bool capturing_ = false;
|
||||
bool trace_requested_ = false;
|
||||
bool cache_clear_requested_ = false;
|
||||
|
||||
std::unique_ptr<BufferCache> buffer_cache_;
|
||||
// Host shader types that guest shaders can be translated into - they can
|
||||
// access the shared memory (via vertex fetch, memory export, or manual index
|
||||
// buffer reading) and textures.
|
||||
VkPipelineStageFlags guest_shader_pipeline_stages_ = 0;
|
||||
VkShaderStageFlags guest_shader_vertex_stages_ = 0;
|
||||
|
||||
std::vector<VkFence> fences_free_;
|
||||
std::vector<VkSemaphore> semaphores_free_;
|
||||
|
||||
bool submission_open_ = false;
|
||||
uint64_t submission_completed_ = 0;
|
||||
// In case vkQueueSubmit fails after something like a successful
|
||||
// vkQueueBindSparse, to wait correctly on the next attempt.
|
||||
std::vector<VkSemaphore> current_submission_wait_semaphores_;
|
||||
std::vector<VkPipelineStageFlags> current_submission_wait_stage_masks_;
|
||||
std::vector<VkFence> submissions_in_flight_fences_;
|
||||
std::deque<std::pair<uint64_t, VkSemaphore>>
|
||||
submissions_in_flight_semaphores_;
|
||||
|
||||
static constexpr uint32_t kMaxFramesInFlight = 3;
|
||||
bool frame_open_ = false;
|
||||
// Guest frame index, since some transient resources can be reused across
|
||||
// submissions. Values updated in the beginning of a frame.
|
||||
uint64_t frame_current_ = 1;
|
||||
uint64_t frame_completed_ = 0;
|
||||
// Submission indices of frames that have already been submitted.
|
||||
uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {};
|
||||
|
||||
// <Submission where last used, resource>, sorted by the submission number.
|
||||
std::deque<std::pair<uint64_t, VkDeviceMemory>> destroy_memory_;
|
||||
std::deque<std::pair<uint64_t, VkBuffer>> destroy_buffers_;
|
||||
std::deque<std::pair<uint64_t, VkFramebuffer>> destroy_framebuffers_;
|
||||
|
||||
std::vector<CommandBuffer> command_buffers_writable_;
|
||||
std::deque<std::pair<uint64_t, CommandBuffer>> command_buffers_submitted_;
|
||||
DeferredCommandBuffer deferred_command_buffer_;
|
||||
|
||||
std::vector<VkSparseMemoryBind> sparse_memory_binds_;
|
||||
std::vector<SparseBufferBind> sparse_buffer_binds_;
|
||||
// SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer
|
||||
// on submission (because pBinds should point to a place in std::vector, but
|
||||
// it may be reallocated).
|
||||
std::vector<VkSparseBufferMemoryBindInfo> sparse_buffer_bind_infos_temp_;
|
||||
VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0;
|
||||
|
||||
// Temporary storage with reusable memory for creating descriptor set layouts.
|
||||
std::vector<VkDescriptorSetLayoutBinding> descriptor_set_layout_bindings_;
|
||||
// Temporary storage with reusable memory for writing image and sampler
|
||||
// descriptors.
|
||||
std::vector<VkDescriptorImageInfo> descriptor_write_image_info_;
|
||||
|
||||
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> uniform_buffer_pool_;
|
||||
|
||||
// Descriptor set layouts used by different shaders.
|
||||
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
|
||||
VK_NULL_HANDLE;
|
||||
std::array<VkDescriptorSetLayout,
|
||||
size_t(SingleTransientDescriptorLayout::kCount)>
|
||||
descriptor_set_layouts_single_transient_{};
|
||||
|
||||
// Descriptor set layouts are referenced by pipeline_layouts_.
|
||||
std::unordered_map<TextureDescriptorSetLayoutKey, VkDescriptorSetLayout,
|
||||
TextureDescriptorSetLayoutKey::Hasher>
|
||||
descriptor_set_layouts_textures_;
|
||||
// Pipeline layouts are referenced by VulkanPipelineCache.
|
||||
std::unordered_map<PipelineLayoutKey, PipelineLayout,
|
||||
PipelineLayoutKey::Hasher>
|
||||
pipeline_layouts_;
|
||||
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_uniform_buffer_;
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_storage_buffer_;
|
||||
std::deque<UsedSingleTransientDescriptor> single_transient_descriptors_used_;
|
||||
std::array<std::vector<VkDescriptorSet>,
|
||||
size_t(SingleTransientDescriptorLayout::kCount)>
|
||||
single_transient_descriptors_free_;
|
||||
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_sampled_image_;
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_sampler_;
|
||||
std::deque<UsedTextureTransientDescriptorSet>
|
||||
texture_transient_descriptor_sets_used_;
|
||||
std::unordered_map<TextureDescriptorSetLayoutKey,
|
||||
std::vector<VkDescriptorSet>,
|
||||
TextureDescriptorSetLayoutKey::Hasher>
|
||||
texture_transient_descriptor_sets_free_;
|
||||
|
||||
std::unique_ptr<VulkanSharedMemory> shared_memory_;
|
||||
|
||||
std::unique_ptr<VulkanPrimitiveProcessor> primitive_processor_;
|
||||
|
||||
std::unique_ptr<VulkanRenderTargetCache> render_target_cache_;
|
||||
|
||||
std::unique_ptr<VulkanPipelineCache> pipeline_cache_;
|
||||
std::unique_ptr<RenderCache> render_cache_;
|
||||
|
||||
std::unique_ptr<VulkanTextureCache> texture_cache_;
|
||||
|
||||
std::unique_ptr<ui::vulkan::Blitter> blitter_;
|
||||
std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
|
||||
VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet shared_memory_and_edram_descriptor_set_;
|
||||
|
||||
bool frame_open_ = false;
|
||||
const RenderState* current_render_state_ = nullptr;
|
||||
VkCommandBuffer current_command_buffer_ = nullptr;
|
||||
VkCommandBuffer current_setup_buffer_ = nullptr;
|
||||
VkFence current_batch_fence_;
|
||||
// Bytes 0x0...0x3FF - 256-entry gamma ramp table with B10G10R10X2 data (read
|
||||
// as R10G10B10X2 with swizzle).
|
||||
// Bytes 0x400...0x9FF - 128-entry PWL R16G16 gamma ramp (R - base, G - delta,
|
||||
// low 6 bits of each are zero, 3 elements per entry).
|
||||
// kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and
|
||||
// uploaded directly, one otherwise.
|
||||
VkDeviceMemory gamma_ramp_buffer_memory_ = VK_NULL_HANDLE;
|
||||
VkBuffer gamma_ramp_buffer_ = VK_NULL_HANDLE;
|
||||
// kMaxFramesInFlight pairs, only when the gamma ramp buffer is not
|
||||
// host-visible.
|
||||
VkDeviceMemory gamma_ramp_upload_buffer_memory_ = VK_NULL_HANDLE;
|
||||
VkBuffer gamma_ramp_upload_buffer_ = VK_NULL_HANDLE;
|
||||
VkDeviceSize gamma_ramp_upload_memory_size_;
|
||||
uint32_t gamma_ramp_upload_memory_type_;
|
||||
// Mapping of either gamma_ramp_buffer_memory_ (if it's host-visible) or
|
||||
// gamma_ramp_upload_buffer_memory_ (otherwise).
|
||||
void* gamma_ramp_upload_mapping_;
|
||||
std::array<VkBufferView, 2 * kMaxFramesInFlight> gamma_ramp_buffer_views_{};
|
||||
// UINT32_MAX if outdated.
|
||||
uint32_t gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX;
|
||||
uint32_t gamma_ramp_pwl_current_frame_ = UINT32_MAX;
|
||||
|
||||
ui::vulkan::VulkanSubmissionTracker swap_submission_tracker_;
|
||||
VkFramebuffer swap_framebuffer_ = VK_NULL_HANDLE;
|
||||
uint64_t swap_framebuffer_version_ = UINT64_MAX;
|
||||
VkDescriptorSetLayout swap_descriptor_set_layout_sampled_image_ =
|
||||
VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout swap_descriptor_set_layout_uniform_texel_buffer_ =
|
||||
VK_NULL_HANDLE;
|
||||
|
||||
// Descriptor pool for allocating descriptors needed for presentation, such as
|
||||
// the destination images and the gamma ramps.
|
||||
VkDescriptorPool swap_descriptor_pool_ = VK_NULL_HANDLE;
|
||||
// Interleaved 256-entry table and PWL texel buffer descriptors.
|
||||
// kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and
|
||||
// uploaded directly, one otherwise.
|
||||
std::array<VkDescriptorSet, 2 * kMaxFramesInFlight>
|
||||
swap_descriptors_gamma_ramp_;
|
||||
// Sampled images.
|
||||
std::array<VkDescriptorSet, kMaxFramesInFlight> swap_descriptors_source_;
|
||||
|
||||
VkPipelineLayout swap_apply_gamma_pipeline_layout_ = VK_NULL_HANDLE;
|
||||
// Has no dependencies on specific pipeline stages on both ends to simplify
|
||||
// use in different scenarios with different pipelines - use explicit barriers
|
||||
// for synchronization.
|
||||
VkRenderPass swap_apply_gamma_render_pass_ = VK_NULL_HANDLE;
|
||||
VkPipeline swap_apply_gamma_256_entry_table_pipeline_ = VK_NULL_HANDLE;
|
||||
VkPipeline swap_apply_gamma_pwl_pipeline_ = VK_NULL_HANDLE;
|
||||
|
||||
std::array<SwapFramebuffer,
|
||||
ui::vulkan::VulkanPresenter::kMaxActiveGuestOutputImageVersions>
|
||||
swap_framebuffers_;
|
||||
|
||||
// Pending pipeline barriers.
|
||||
std::vector<VkBufferMemoryBarrier> pending_barriers_buffer_memory_barriers_;
|
||||
std::vector<VkImageMemoryBarrier> pending_barriers_image_memory_barriers_;
|
||||
struct PendingBarrier {
|
||||
VkPipelineStageFlags src_stage_mask = 0;
|
||||
VkPipelineStageFlags dst_stage_mask = 0;
|
||||
size_t buffer_memory_barriers_offset = 0;
|
||||
size_t image_memory_barriers_offset = 0;
|
||||
};
|
||||
std::vector<PendingBarrier> pending_barriers_;
|
||||
PendingBarrier current_pending_barrier_;
|
||||
|
||||
// GPU-local scratch buffer.
|
||||
static constexpr VkDeviceSize kScratchBufferSizeIncrement = 16 * 1024 * 1024;
|
||||
VkDeviceMemory scratch_buffer_memory_ = VK_NULL_HANDLE;
|
||||
VkBuffer scratch_buffer_ = VK_NULL_HANDLE;
|
||||
VkDeviceSize scratch_buffer_size_ = 0;
|
||||
VkPipelineStageFlags scratch_buffer_last_stage_mask_ = 0;
|
||||
VkAccessFlags scratch_buffer_last_access_mask_ = 0;
|
||||
uint64_t scratch_buffer_last_usage_submission_ = 0;
|
||||
bool scratch_buffer_used_ = false;
|
||||
|
||||
// The current dynamic state of the graphics pipeline bind point. Note that
|
||||
// binding any pipeline to the bind point with static state (even if it's
|
||||
// unused, like depth bias being disabled, but the values themselves still not
|
||||
// declared as dynamic in the pipeline) invalidates such dynamic state.
|
||||
VkViewport dynamic_viewport_;
|
||||
VkRect2D dynamic_scissor_;
|
||||
float dynamic_depth_bias_constant_factor_;
|
||||
float dynamic_depth_bias_slope_factor_;
|
||||
float dynamic_blend_constants_[4];
|
||||
// The stencil values are pre-initialized (to D3D11_DEFAULT_STENCIL_*, and the
|
||||
// initial values for front and back are the same for portability subset
|
||||
// safety) because they're updated conditionally to avoid changing the back
|
||||
// face values when stencil is disabled and the primitive type is changed
|
||||
// between polygonal and non-polygonal.
|
||||
uint32_t dynamic_stencil_compare_mask_front_ = UINT8_MAX;
|
||||
uint32_t dynamic_stencil_compare_mask_back_ = UINT8_MAX;
|
||||
uint32_t dynamic_stencil_write_mask_front_ = UINT8_MAX;
|
||||
uint32_t dynamic_stencil_write_mask_back_ = UINT8_MAX;
|
||||
uint32_t dynamic_stencil_reference_front_ = 0;
|
||||
uint32_t dynamic_stencil_reference_back_ = 0;
|
||||
bool dynamic_viewport_update_needed_;
|
||||
bool dynamic_scissor_update_needed_;
|
||||
bool dynamic_depth_bias_update_needed_;
|
||||
bool dynamic_blend_constants_update_needed_;
|
||||
bool dynamic_stencil_compare_mask_front_update_needed_;
|
||||
bool dynamic_stencil_compare_mask_back_update_needed_;
|
||||
bool dynamic_stencil_write_mask_front_update_needed_;
|
||||
bool dynamic_stencil_write_mask_back_update_needed_;
|
||||
bool dynamic_stencil_reference_front_update_needed_;
|
||||
bool dynamic_stencil_reference_back_update_needed_;
|
||||
|
||||
// Currently used samplers.
|
||||
std::vector<std::pair<VulkanTextureCache::SamplerParameters, VkSampler>>
|
||||
current_samplers_vertex_;
|
||||
std::vector<std::pair<VulkanTextureCache::SamplerParameters, VkSampler>>
|
||||
current_samplers_pixel_;
|
||||
|
||||
// Cache render pass currently started in the command buffer with the
|
||||
// framebuffer.
|
||||
VkRenderPass current_render_pass_;
|
||||
const VulkanRenderTargetCache::Framebuffer* current_framebuffer_;
|
||||
|
||||
// Currently bound graphics pipeline, either from the pipeline cache (with
|
||||
// potentially deferred creation - current_external_graphics_pipeline_ is
|
||||
// VK_NULL_HANDLE in this case) or a non-Xenos one
|
||||
// (current_guest_graphics_pipeline_ is VK_NULL_HANDLE in this case).
|
||||
// TODO(Triang3l): Change to a deferred compilation handle.
|
||||
VkPipeline current_guest_graphics_pipeline_;
|
||||
VkPipeline current_external_graphics_pipeline_;
|
||||
VkPipeline current_external_compute_pipeline_;
|
||||
|
||||
// Pipeline layout of the current guest graphics pipeline.
|
||||
const PipelineLayout* current_guest_graphics_pipeline_layout_;
|
||||
VkDescriptorSet current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetCount];
|
||||
// Whether descriptor sets in current_graphics_descriptor_sets_ point to
|
||||
// up-to-date data.
|
||||
uint32_t current_graphics_descriptor_set_values_up_to_date_;
|
||||
// Whether the descriptor sets currently bound to the command buffer - only
|
||||
// low bits for the descriptor set layouts that remained the same are kept
|
||||
// when changing the pipeline layout. May be out of sync with
|
||||
// current_graphics_descriptor_set_values_up_to_date_, but should be ensured
|
||||
// to be a subset of it at some point when it becomes important; bits for
|
||||
// non-existent descriptor set layouts may also be set, but need to be ignored
|
||||
// when they start to matter.
|
||||
uint32_t current_graphics_descriptor_sets_bound_up_to_date_;
|
||||
static_assert(
|
||||
SpirvShaderTranslator::kDescriptorSetCount <=
|
||||
sizeof(current_graphics_descriptor_set_values_up_to_date_) * CHAR_BIT,
|
||||
"Bit fields storing descriptor set validity must be large enough");
|
||||
static_assert(
|
||||
SpirvShaderTranslator::kDescriptorSetCount <=
|
||||
sizeof(current_graphics_descriptor_sets_bound_up_to_date_) * CHAR_BIT,
|
||||
"Bit fields storing descriptor set validity must be large enough");
|
||||
|
||||
// Float constant usage masks of the last draw call.
|
||||
uint64_t current_float_constant_map_vertex_[4];
|
||||
uint64_t current_float_constant_map_pixel_[4];
|
||||
|
||||
// System shader constants.
|
||||
SpirvShaderTranslator::SystemConstants system_constants_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
|
||||
DEFINE_bool(vulkan_renderdoc_capture_all, false,
|
||||
"Capture everything with RenderDoc.", "Vulkan");
|
||||
DEFINE_bool(vulkan_native_msaa, false, "Use native MSAA", "Vulkan");
|
||||
DEFINE_bool(vulkan_dump_disasm, false,
|
||||
"Dump shader disassembly. NVIDIA only supported.", "Vulkan");
|
|
@ -1,20 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_
|
||||
|
||||
#define FINE_GRAINED_DRAW_SCOPES 1
|
||||
#include "xenia/base/cvar.h"
|
||||
|
||||
DECLARE_bool(vulkan_renderdoc_capture_all);
|
||||
DECLARE_bool(vulkan_native_msaa);
|
||||
DECLARE_bool(vulkan_dump_disasm);
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_
|
|
@ -26,7 +26,9 @@ class VulkanGraphicsSystem : public GraphicsSystem {
|
|||
|
||||
static bool IsAvailable() { return true; }
|
||||
|
||||
std::string name() const override { return "Vulkan - obsolete"; }
|
||||
std::string name() const override {
|
||||
return "Vulkan - HEAVILY INCOMPLETE, early development";
|
||||
}
|
||||
|
||||
X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state,
|
||||
ui::WindowedAppContext* app_context,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,312 +2,322 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/primitive_processor.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/render_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/spirv/spirv_disassembler.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
// Configures and caches pipelines based on render state.
|
||||
// This is responsible for properly setting all state required for a draw
|
||||
// including shaders, various blend/etc options, and input configuration.
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D
|
||||
// implementations.
|
||||
class VulkanPipelineCache {
|
||||
public:
|
||||
enum class UpdateStatus {
|
||||
kCompatible,
|
||||
kMismatch,
|
||||
kError,
|
||||
static constexpr size_t kLayoutUIDEmpty = 0;
|
||||
|
||||
class PipelineLayoutProvider {
|
||||
public:
|
||||
virtual ~PipelineLayoutProvider() {}
|
||||
virtual VkPipelineLayout GetPipelineLayout() const = 0;
|
||||
|
||||
protected:
|
||||
PipelineLayoutProvider() = default;
|
||||
};
|
||||
|
||||
VulkanPipelineCache(RegisterFile* register_file,
|
||||
const ui::vulkan::VulkanProvider& provider);
|
||||
VulkanPipelineCache(VulkanCommandProcessor& command_processor,
|
||||
const RegisterFile& register_file,
|
||||
VulkanRenderTargetCache& render_target_cache,
|
||||
VkShaderStageFlags guest_shader_vertex_stages);
|
||||
~VulkanPipelineCache();
|
||||
|
||||
VkResult Initialize(VkDescriptorSetLayout uniform_descriptor_set_layout,
|
||||
VkDescriptorSetLayout texture_descriptor_set_layout,
|
||||
VkDescriptorSetLayout vertex_descriptor_set_layout);
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
|
||||
// Loads a shader from the cache, possibly translating it.
|
||||
VulkanShader* LoadShader(xenos::ShaderType shader_type,
|
||||
uint32_t guest_address, const uint32_t* host_address,
|
||||
uint32_t dword_count);
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
// Analyze shader microcode on the translator thread.
|
||||
void AnalyzeShaderUcode(Shader& shader) {
|
||||
shader.AnalyzeUcode(ucode_disasm_buffer_);
|
||||
}
|
||||
|
||||
// Configures a pipeline using the current render state and the given render
|
||||
// pass. If a previously available pipeline is available it will be used,
|
||||
// otherwise a new one may be created. Any state that can be set dynamically
|
||||
// in the command buffer is issued at this time.
|
||||
// Returns whether the pipeline could be successfully created.
|
||||
UpdateStatus ConfigurePipeline(VkCommandBuffer command_buffer,
|
||||
const RenderState* render_state,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader,
|
||||
xenos::PrimitiveType primitive_type,
|
||||
VkPipeline* pipeline_out);
|
||||
// Retrieves the shader modification for the current state. The shader must
|
||||
// have microcode analyzed.
|
||||
SpirvShaderTranslator::Modification GetCurrentVertexShaderModification(
|
||||
const Shader& shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) const;
|
||||
SpirvShaderTranslator::Modification GetCurrentPixelShaderModification(
|
||||
const Shader& shader, uint32_t normalized_color_mask) const;
|
||||
|
||||
// Sets required dynamic state on the command buffer.
|
||||
// Only state that has changed since the last call will be set unless
|
||||
// full_update is true.
|
||||
bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update);
|
||||
|
||||
// Pipeline layout shared by all pipelines.
|
||||
VkPipelineLayout pipeline_layout() const { return pipeline_layout_; }
|
||||
|
||||
// Clears all cached content.
|
||||
void ClearCache();
|
||||
bool EnsureShadersTranslated(VulkanShader::VulkanTranslation* vertex_shader,
|
||||
VulkanShader::VulkanTranslation* pixel_shader);
|
||||
// TODO(Triang3l): Return a deferred creation handle.
|
||||
bool ConfigurePipeline(
|
||||
VulkanShader::VulkanTranslation* vertex_shader,
|
||||
VulkanShader::VulkanTranslation* pixel_shader,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
uint32_t normalized_color_mask,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
VkPipeline& pipeline_out,
|
||||
const PipelineLayoutProvider*& pipeline_layout_out);
|
||||
|
||||
private:
|
||||
// Creates or retrieves an existing pipeline for the currently configured
|
||||
// state.
|
||||
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
|
||||
enum class PipelineGeometryShader : uint32_t {
|
||||
kNone,
|
||||
kRectangleList,
|
||||
kQuadList,
|
||||
};
|
||||
|
||||
bool TranslateShader(VulkanShader::VulkanTranslation& translation);
|
||||
enum class PipelinePrimitiveTopology : uint32_t {
|
||||
kPointList,
|
||||
kLineList,
|
||||
kLineStrip,
|
||||
kTriangleList,
|
||||
kTriangleStrip,
|
||||
kTriangleFan,
|
||||
kLineListWithAdjacency,
|
||||
kPatchList,
|
||||
};
|
||||
|
||||
void DumpShaderDisasmAMD(VkPipeline pipeline);
|
||||
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
|
||||
enum class PipelinePolygonMode : uint32_t {
|
||||
kFill,
|
||||
kLine,
|
||||
kPoint,
|
||||
};
|
||||
|
||||
// Gets a geometry shader used to emulate the given primitive type.
|
||||
// Returns nullptr if the primitive doesn't need to be emulated.
|
||||
VkShaderModule GetGeometryShader(xenos::PrimitiveType primitive_type,
|
||||
bool is_line_mode);
|
||||
enum class PipelineBlendFactor : uint32_t {
|
||||
kZero,
|
||||
kOne,
|
||||
kSrcColor,
|
||||
kOneMinusSrcColor,
|
||||
kDstColor,
|
||||
kOneMinusDstColor,
|
||||
kSrcAlpha,
|
||||
kOneMinusSrcAlpha,
|
||||
kDstAlpha,
|
||||
kOneMinusDstAlpha,
|
||||
kConstantColor,
|
||||
kOneMinusConstantColor,
|
||||
kConstantAlpha,
|
||||
kOneMinusConstantAlpha,
|
||||
kSrcAlphaSaturate,
|
||||
};
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
// Update PipelineDescription::kVersion if anything is changed!
|
||||
XEPACKEDSTRUCT(PipelineRenderTarget, {
|
||||
PipelineBlendFactor src_color_blend_factor : 4; // 4
|
||||
PipelineBlendFactor dst_color_blend_factor : 4; // 8
|
||||
xenos::BlendOp color_blend_op : 3; // 11
|
||||
PipelineBlendFactor src_alpha_blend_factor : 4; // 15
|
||||
PipelineBlendFactor dst_alpha_blend_factor : 4; // 19
|
||||
xenos::BlendOp alpha_blend_op : 3; // 22
|
||||
uint32_t color_write_mask : 4; // 26
|
||||
});
|
||||
|
||||
// Temporary storage for AnalyzeUcode calls.
|
||||
XEPACKEDSTRUCT(PipelineDescription, {
|
||||
uint64_t vertex_shader_hash;
|
||||
uint64_t vertex_shader_modification;
|
||||
// 0 if no pixel shader.
|
||||
uint64_t pixel_shader_hash;
|
||||
uint64_t pixel_shader_modification;
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key;
|
||||
|
||||
// Shader stages.
|
||||
PipelineGeometryShader geometry_shader : 2; // 2
|
||||
// Input assembly.
|
||||
PipelinePrimitiveTopology primitive_topology : 3; // 5
|
||||
uint32_t primitive_restart : 1; // 6
|
||||
// Rasterization.
|
||||
uint32_t depth_clamp_enable : 1; // 7
|
||||
PipelinePolygonMode polygon_mode : 2; // 9
|
||||
uint32_t cull_front : 1; // 10
|
||||
uint32_t cull_back : 1; // 11
|
||||
uint32_t front_face_clockwise : 1; // 12
|
||||
// Depth / stencil.
|
||||
uint32_t depth_write_enable : 1; // 13
|
||||
xenos::CompareFunction depth_compare_op : 3; // 15
|
||||
uint32_t stencil_test_enable : 1; // 17
|
||||
xenos::StencilOp stencil_front_fail_op : 3; // 20
|
||||
xenos::StencilOp stencil_front_pass_op : 3; // 23
|
||||
xenos::StencilOp stencil_front_depth_fail_op : 3; // 26
|
||||
xenos::CompareFunction stencil_front_compare_op : 3; // 29
|
||||
xenos::StencilOp stencil_back_fail_op : 3; // 32
|
||||
|
||||
xenos::StencilOp stencil_back_pass_op : 3; // 3
|
||||
xenos::StencilOp stencil_back_depth_fail_op : 3; // 6
|
||||
xenos::CompareFunction stencil_back_compare_op : 3; // 9
|
||||
|
||||
// Filled only for the attachments present in the render pass object.
|
||||
PipelineRenderTarget render_targets[xenos::kMaxColorRenderTargets];
|
||||
|
||||
// Including all the padding, for a stable hash.
|
||||
PipelineDescription() { Reset(); }
|
||||
PipelineDescription(const PipelineDescription& description) {
|
||||
std::memcpy(this, &description, sizeof(*this));
|
||||
}
|
||||
PipelineDescription& operator=(const PipelineDescription& description) {
|
||||
std::memcpy(this, &description, sizeof(*this));
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const PipelineDescription& description) const {
|
||||
return std::memcmp(this, &description, sizeof(*this)) == 0;
|
||||
}
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); }
|
||||
struct Hasher {
|
||||
size_t operator()(const PipelineDescription& description) const {
|
||||
return size_t(description.GetHash());
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
struct Pipeline {
|
||||
VkPipeline pipeline = VK_NULL_HANDLE;
|
||||
// The layouts are owned by the VulkanCommandProcessor, and must not be
|
||||
// destroyed by it while the pipeline cache is active.
|
||||
const PipelineLayoutProvider* pipeline_layout;
|
||||
Pipeline(const PipelineLayoutProvider* pipeline_layout_provider)
|
||||
: pipeline_layout(pipeline_layout_provider) {}
|
||||
};
|
||||
|
||||
// Description that can be passed from the command processor thread to the
|
||||
// creation threads, with everything needed from caches pre-looked-up.
|
||||
struct PipelineCreationArguments {
|
||||
std::pair<const PipelineDescription, Pipeline>* pipeline;
|
||||
const VulkanShader::VulkanTranslation* vertex_shader;
|
||||
const VulkanShader::VulkanTranslation* pixel_shader;
|
||||
VkShaderModule geometry_shader;
|
||||
VkRenderPass render_pass;
|
||||
};
|
||||
|
||||
union GeometryShaderKey {
|
||||
uint32_t key;
|
||||
struct {
|
||||
PipelineGeometryShader type : 2;
|
||||
uint32_t interpolator_count : 5;
|
||||
uint32_t user_clip_plane_count : 3;
|
||||
uint32_t user_clip_plane_cull : 1;
|
||||
uint32_t has_vertex_kill_and : 1;
|
||||
uint32_t has_point_size : 1;
|
||||
uint32_t has_point_coordinates : 1;
|
||||
};
|
||||
|
||||
GeometryShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); }
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const GeometryShaderKey& key) const {
|
||||
return std::hash<uint32_t>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const GeometryShaderKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const GeometryShaderKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
};
|
||||
|
||||
// Can be called from multiple threads.
|
||||
bool TranslateAnalyzedShader(SpirvShaderTranslator& translator,
|
||||
VulkanShader::VulkanTranslation& translation);
|
||||
|
||||
void WritePipelineRenderTargetDescription(
|
||||
reg::RB_BLENDCONTROL blend_control, uint32_t write_mask,
|
||||
PipelineRenderTarget& render_target_out) const;
|
||||
bool GetCurrentStateDescription(
|
||||
const VulkanShader::VulkanTranslation* vertex_shader,
|
||||
const VulkanShader::VulkanTranslation* pixel_shader,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
uint32_t normalized_color_mask,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
PipelineDescription& description_out) const;
|
||||
|
||||
// Whether the pipeline for the given description is supported by the device.
|
||||
bool ArePipelineRequirementsMet(const PipelineDescription& description) const;
|
||||
|
||||
static bool GetGeometryShaderKey(PipelineGeometryShader geometry_shader_type,
|
||||
GeometryShaderKey& key_out);
|
||||
VkShaderModule GetGeometryShader(GeometryShaderKey key);
|
||||
|
||||
// Can be called from creation threads - all needed data must be fully set up
|
||||
// at the point of the call: shaders must be translated, pipeline layout and
|
||||
// render pass objects must be available.
|
||||
bool EnsurePipelineCreated(
|
||||
const PipelineCreationArguments& creation_arguments);
|
||||
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
const RegisterFile& register_file_;
|
||||
VulkanRenderTargetCache& render_target_cache_;
|
||||
VkShaderStageFlags guest_shader_vertex_stages_;
|
||||
|
||||
// Temporary storage for AnalyzeUcode calls on the processor thread.
|
||||
StringBuffer ucode_disasm_buffer_;
|
||||
// Reusable shader translator.
|
||||
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
|
||||
// Disassembler used to get the SPIRV disasm. Only used in debug.
|
||||
xe::ui::spirv::SpirvDisassembler disassembler_;
|
||||
// All loaded shaders mapped by their guest hash key.
|
||||
std::unordered_map<uint64_t, VulkanShader*> shader_map_;
|
||||
// Reusable shader translator on the command processor thread.
|
||||
std::unique_ptr<SpirvShaderTranslator> shader_translator_;
|
||||
|
||||
// Vulkan pipeline cache, which in theory helps us out.
|
||||
// This can be serialized to disk and reused, if we want.
|
||||
VkPipelineCache pipeline_cache_ = nullptr;
|
||||
// Layout used for all pipelines describing our uniforms, textures, and push
|
||||
// constants.
|
||||
VkPipelineLayout pipeline_layout_ = nullptr;
|
||||
struct LayoutUID {
|
||||
size_t uid;
|
||||
size_t vector_span_offset;
|
||||
size_t vector_span_length;
|
||||
};
|
||||
std::mutex layouts_mutex_;
|
||||
// Texture binding layouts of different shaders, for obtaining layout UIDs.
|
||||
std::vector<VulkanShader::TextureBinding> texture_binding_layouts_;
|
||||
// Map of texture binding layouts used by shaders, for obtaining UIDs. Keys
|
||||
// are XXH3 hashes of layouts, values need manual collision resolution using
|
||||
// layout_vector_offset:layout_length of texture_binding_layouts_.
|
||||
std::unordered_multimap<uint64_t, LayoutUID,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
texture_binding_layout_map_;
|
||||
|
||||
// Shared geometry shaders.
|
||||
struct {
|
||||
VkShaderModule line_quad_list;
|
||||
VkShaderModule point_list;
|
||||
VkShaderModule quad_list;
|
||||
VkShaderModule rect_list;
|
||||
} geometry_shaders_;
|
||||
// Ucode hash -> shader.
|
||||
std::unordered_map<uint64_t, VulkanShader*,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
shaders_;
|
||||
|
||||
// Shared dummy pixel shader.
|
||||
VkShaderModule dummy_pixel_shader_;
|
||||
// Geometry shaders for Xenos primitive types not supported by Vulkan.
|
||||
// Stores VK_NULL_HANDLE if failed to create.
|
||||
std::unordered_map<GeometryShaderKey, VkShaderModule,
|
||||
GeometryShaderKey::Hasher>
|
||||
geometry_shaders_;
|
||||
|
||||
// Hash state used to incrementally produce pipeline hashes during update.
|
||||
// By the time the full update pass has run the hash will represent the
|
||||
// current state in a way that can uniquely identify the produced VkPipeline.
|
||||
XXH3_state_t hash_state_;
|
||||
// All previously generated pipelines mapped by hash.
|
||||
std::unordered_map<uint64_t, VkPipeline> cached_pipelines_;
|
||||
std::unordered_map<PipelineDescription, Pipeline, PipelineDescription::Hasher>
|
||||
pipelines_;
|
||||
|
||||
// Previously used pipeline. This matches our current state settings
|
||||
// and allows us to quickly(ish) reuse the pipeline if no registers have
|
||||
// changed.
|
||||
VkPipeline current_pipeline_ = nullptr;
|
||||
|
||||
private:
|
||||
UpdateStatus UpdateState(VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader,
|
||||
xenos::PrimitiveType primitive_type);
|
||||
|
||||
UpdateStatus UpdateRenderTargetState();
|
||||
UpdateStatus UpdateShaderStages(VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader,
|
||||
xenos::PrimitiveType primitive_type);
|
||||
UpdateStatus UpdateVertexInputState(VulkanShader* vertex_shader);
|
||||
UpdateStatus UpdateInputAssemblyState(xenos::PrimitiveType primitive_type);
|
||||
UpdateStatus UpdateViewportState();
|
||||
UpdateStatus UpdateRasterizationState(xenos::PrimitiveType primitive_type);
|
||||
UpdateStatus UpdateMultisampleState();
|
||||
UpdateStatus UpdateDepthStencilState();
|
||||
UpdateStatus UpdateColorBlendState();
|
||||
|
||||
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||
bool SetShadowRegister(float* dest, uint32_t register_name);
|
||||
bool SetShadowRegisterArray(uint32_t* dest, uint32_t num,
|
||||
uint32_t register_name);
|
||||
|
||||
struct UpdateRenderTargetsRegisters {
|
||||
uint32_t rb_modecontrol;
|
||||
reg::RB_SURFACE_INFO rb_surface_info;
|
||||
reg::RB_COLOR_INFO rb_color_info;
|
||||
reg::RB_DEPTH_INFO rb_depth_info;
|
||||
reg::RB_COLOR_INFO rb_color1_info;
|
||||
reg::RB_COLOR_INFO rb_color2_info;
|
||||
reg::RB_COLOR_INFO rb_color3_info;
|
||||
uint32_t rb_color_mask;
|
||||
uint32_t rb_depthcontrol;
|
||||
uint32_t rb_stencilrefmask;
|
||||
|
||||
UpdateRenderTargetsRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_render_targets_regs_;
|
||||
|
||||
struct UpdateShaderStagesRegisters {
|
||||
xenos::PrimitiveType primitive_type;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
reg::SQ_PROGRAM_CNTL sq_program_cntl;
|
||||
VulkanShader* vertex_shader;
|
||||
VulkanShader* pixel_shader;
|
||||
|
||||
UpdateShaderStagesRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_shader_stages_regs_;
|
||||
VkPipelineShaderStageCreateInfo update_shader_stages_info_[3];
|
||||
uint32_t update_shader_stages_stage_count_ = 0;
|
||||
|
||||
struct UpdateVertexInputStateRegisters {
|
||||
VulkanShader* vertex_shader;
|
||||
|
||||
UpdateVertexInputStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_vertex_input_state_regs_;
|
||||
VkPipelineVertexInputStateCreateInfo update_vertex_input_state_info_;
|
||||
VkVertexInputBindingDescription update_vertex_input_state_binding_descrs_[32];
|
||||
VkVertexInputAttributeDescription
|
||||
update_vertex_input_state_attrib_descrs_[96];
|
||||
|
||||
struct UpdateInputAssemblyStateRegisters {
|
||||
xenos::PrimitiveType primitive_type;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t multi_prim_ib_reset_index;
|
||||
|
||||
UpdateInputAssemblyStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_input_assembly_state_regs_;
|
||||
VkPipelineInputAssemblyStateCreateInfo update_input_assembly_state_info_;
|
||||
|
||||
struct UpdateViewportStateRegisters {
|
||||
// uint32_t pa_cl_clip_cntl;
|
||||
uint32_t rb_surface_info;
|
||||
uint32_t pa_cl_vte_cntl;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t pa_sc_window_offset;
|
||||
uint32_t pa_sc_window_scissor_tl;
|
||||
uint32_t pa_sc_window_scissor_br;
|
||||
float pa_cl_vport_xoffset;
|
||||
float pa_cl_vport_yoffset;
|
||||
float pa_cl_vport_zoffset;
|
||||
float pa_cl_vport_xscale;
|
||||
float pa_cl_vport_yscale;
|
||||
float pa_cl_vport_zscale;
|
||||
|
||||
UpdateViewportStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_viewport_state_regs_;
|
||||
VkPipelineViewportStateCreateInfo update_viewport_state_info_;
|
||||
|
||||
struct UpdateRasterizationStateRegisters {
|
||||
xenos::PrimitiveType primitive_type;
|
||||
uint32_t pa_cl_clip_cntl;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t pa_sc_screen_scissor_tl;
|
||||
uint32_t pa_sc_screen_scissor_br;
|
||||
uint32_t pa_sc_viz_query;
|
||||
uint32_t pa_su_poly_offset_enable;
|
||||
uint32_t multi_prim_ib_reset_index;
|
||||
|
||||
UpdateRasterizationStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_rasterization_state_regs_;
|
||||
VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_;
|
||||
|
||||
struct UpdateMultisampleStateeRegisters {
|
||||
uint32_t pa_sc_aa_config;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t rb_surface_info;
|
||||
|
||||
UpdateMultisampleStateeRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_multisample_state_regs_;
|
||||
VkPipelineMultisampleStateCreateInfo update_multisample_state_info_;
|
||||
|
||||
struct UpdateDepthStencilStateRegisters {
|
||||
uint32_t rb_depthcontrol;
|
||||
uint32_t rb_stencilrefmask;
|
||||
|
||||
UpdateDepthStencilStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_depth_stencil_state_regs_;
|
||||
VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_;
|
||||
|
||||
struct UpdateColorBlendStateRegisters {
|
||||
uint32_t rb_color_mask;
|
||||
uint32_t rb_blendcontrol[4];
|
||||
uint32_t rb_modecontrol;
|
||||
|
||||
UpdateColorBlendStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_color_blend_state_regs_;
|
||||
VkPipelineColorBlendStateCreateInfo update_color_blend_state_info_;
|
||||
VkPipelineColorBlendAttachmentState update_color_blend_attachment_states_[4];
|
||||
|
||||
struct SetDynamicStateRegisters {
|
||||
uint32_t pa_sc_window_offset;
|
||||
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t pa_sc_window_scissor_tl;
|
||||
uint32_t pa_sc_window_scissor_br;
|
||||
|
||||
uint32_t rb_surface_info;
|
||||
uint32_t pa_su_sc_vtx_cntl;
|
||||
// Bias is in Vulkan units because depth format may potentially effect it.
|
||||
float pa_su_poly_offset_scale;
|
||||
float pa_su_poly_offset_offset;
|
||||
uint32_t pa_cl_vte_cntl;
|
||||
float pa_cl_vport_xoffset;
|
||||
float pa_cl_vport_yoffset;
|
||||
float pa_cl_vport_zoffset;
|
||||
float pa_cl_vport_xscale;
|
||||
float pa_cl_vport_yscale;
|
||||
float pa_cl_vport_zscale;
|
||||
|
||||
float rb_blend_rgba[4];
|
||||
uint32_t rb_stencilrefmask;
|
||||
|
||||
reg::SQ_PROGRAM_CNTL sq_program_cntl;
|
||||
uint32_t sq_context_misc;
|
||||
uint32_t rb_colorcontrol;
|
||||
reg::RB_COLOR_INFO rb_color_info;
|
||||
reg::RB_COLOR_INFO rb_color1_info;
|
||||
reg::RB_COLOR_INFO rb_color2_info;
|
||||
reg::RB_COLOR_INFO rb_color3_info;
|
||||
float rb_alpha_ref;
|
||||
uint32_t pa_su_point_size;
|
||||
|
||||
SetDynamicStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} set_dynamic_state_registers_;
|
||||
// Previously used pipeline, to avoid lookups if the state wasn't changed.
|
||||
const std::pair<const PipelineDescription, Pipeline>* last_pipeline_ =
|
||||
nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
|
||||
|
|
|
@ -0,0 +1,229 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2021 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/vulkan_primitive_processor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); }
|
||||
|
||||
bool VulkanPrimitiveProcessor::Initialize() {
|
||||
// TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch.
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
||||
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
|
||||
device_portability_subset_features =
|
||||
provider.device_portability_subset_features();
|
||||
if (!InitializeCommon(true,
|
||||
!device_portability_subset_features ||
|
||||
device_portability_subset_features->triangleFans,
|
||||
false, device_features.geometryShader)) {
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
frame_index_buffer_pool_ =
|
||||
std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
|
||||
command_processor_.GetVulkanProvider(),
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
||||
std::max(size_t(kMinRequiredConvertedIndexBufferSize),
|
||||
ui::GraphicsUploadBufferPool::kDefaultPageSize));
|
||||
return true;
|
||||
}
|
||||
|
||||
void VulkanPrimitiveProcessor::Shutdown(bool from_destructor) {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
frame_index_buffers_.clear();
|
||||
frame_index_buffer_pool_.reset();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_upload_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
builtin_index_buffer_upload_memory_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
builtin_index_buffer_memory_);
|
||||
|
||||
if (!from_destructor) {
|
||||
ShutdownCommon();
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanPrimitiveProcessor::CompletedSubmissionUpdated() {
|
||||
if (builtin_index_buffer_upload_ != VK_NULL_HANDLE &&
|
||||
command_processor_.GetCompletedSubmission() >=
|
||||
builtin_index_buffer_upload_submission_) {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_upload_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
builtin_index_buffer_upload_memory_);
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanPrimitiveProcessor::BeginSubmission() {
|
||||
if (builtin_index_buffer_upload_ != VK_NULL_HANDLE &&
|
||||
builtin_index_buffer_upload_submission_ == UINT64_MAX) {
|
||||
// No need to submit deferred barriers - builtin_index_buffer_ has never
|
||||
// been used yet, and builtin_index_buffer_upload_ is written before
|
||||
// submitting commands reading it.
|
||||
|
||||
command_processor_.EndRenderPass();
|
||||
|
||||
DeferredCommandBuffer& command_buffer =
|
||||
command_processor_.deferred_command_buffer();
|
||||
|
||||
VkBufferCopy* copy_region = command_buffer.CmdCopyBufferEmplace(
|
||||
builtin_index_buffer_upload_, builtin_index_buffer_, 1);
|
||||
copy_region->srcOffset = 0;
|
||||
copy_region->dstOffset = 0;
|
||||
copy_region->size = builtin_index_buffer_size_;
|
||||
|
||||
command_processor_.PushBufferMemoryBarrier(
|
||||
builtin_index_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_INDEX_READ_BIT);
|
||||
|
||||
builtin_index_buffer_upload_submission_ =
|
||||
command_processor_.GetCurrentSubmission();
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanPrimitiveProcessor::BeginFrame() {
|
||||
frame_index_buffer_pool_->Reclaim(command_processor_.GetCompletedFrame());
|
||||
}
|
||||
|
||||
void VulkanPrimitiveProcessor::EndSubmission() {
|
||||
frame_index_buffer_pool_->FlushWrites();
|
||||
}
|
||||
|
||||
void VulkanPrimitiveProcessor::EndFrame() {
|
||||
ClearPerFrameCache();
|
||||
frame_index_buffers_.clear();
|
||||
}
|
||||
|
||||
bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
|
||||
uint32_t index_count, std::function<void(uint16_t*)> fill_callback) {
|
||||
assert_not_zero(index_count);
|
||||
assert_true(builtin_index_buffer_ == VK_NULL_HANDLE);
|
||||
assert_true(builtin_index_buffer_memory_ == VK_NULL_HANDLE);
|
||||
assert_true(builtin_index_buffer_upload_ == VK_NULL_HANDLE);
|
||||
assert_true(builtin_index_buffer_upload_memory_ == VK_NULL_HANDLE);
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
builtin_index_buffer_size_ = VkDeviceSize(sizeof(uint16_t) * index_count);
|
||||
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
|
||||
provider, builtin_index_buffer_size_,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
||||
ui::vulkan::util::MemoryPurpose::kDeviceLocal, builtin_index_buffer_,
|
||||
builtin_index_buffer_memory_)) {
|
||||
XELOGE(
|
||||
"Vulkan primitive processor: Failed to create the built-in index "
|
||||
"buffer GPU resource with {} 16-bit indices",
|
||||
index_count);
|
||||
return false;
|
||||
}
|
||||
uint32_t upload_memory_type;
|
||||
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
|
||||
provider, builtin_index_buffer_size_,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
ui::vulkan::util::MemoryPurpose::kUpload,
|
||||
builtin_index_buffer_upload_, builtin_index_buffer_upload_memory_,
|
||||
&upload_memory_type)) {
|
||||
XELOGE(
|
||||
"Vulkan primitive processor: Failed to create the built-in index "
|
||||
"buffer upload resource with {} 16-bit indices",
|
||||
index_count);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
builtin_index_buffer_memory_);
|
||||
return false;
|
||||
}
|
||||
|
||||
void* mapping;
|
||||
if (dfn.vkMapMemory(device, builtin_index_buffer_upload_memory_, 0,
|
||||
VK_WHOLE_SIZE, 0, &mapping) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"Vulkan primitive processor: Failed to map the built-in index buffer "
|
||||
"upload resource with {} 16-bit indices",
|
||||
index_count);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_upload_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
builtin_index_buffer_upload_memory_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
builtin_index_buffer_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
builtin_index_buffer_memory_);
|
||||
return false;
|
||||
}
|
||||
fill_callback(reinterpret_cast<uint16_t*>(mapping));
|
||||
ui::vulkan::util::FlushMappedMemoryRange(
|
||||
provider, builtin_index_buffer_memory_, upload_memory_type);
|
||||
dfn.vkUnmapMemory(device, builtin_index_buffer_upload_memory_);
|
||||
|
||||
// Schedule uploading in the first submission.
|
||||
builtin_index_buffer_upload_submission_ = UINT64_MAX;
|
||||
return true;
|
||||
}
|
||||
|
||||
void* VulkanPrimitiveProcessor::RequestHostConvertedIndexBufferForCurrentFrame(
|
||||
xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd,
|
||||
uint32_t coalignment_original_address, size_t& backend_handle_out) {
|
||||
size_t index_size = format == xenos::IndexFormat::kInt16 ? sizeof(uint16_t)
|
||||
: sizeof(uint32_t);
|
||||
VkBuffer buffer;
|
||||
VkDeviceSize offset;
|
||||
uint8_t* mapping = frame_index_buffer_pool_->Request(
|
||||
command_processor_.GetCurrentFrame(),
|
||||
index_size * index_count +
|
||||
(coalign_for_simd ? XE_GPU_PRIMITIVE_PROCESSOR_SIMD_SIZE : 0),
|
||||
index_size, buffer, offset);
|
||||
if (!mapping) {
|
||||
return nullptr;
|
||||
}
|
||||
if (coalign_for_simd) {
|
||||
ptrdiff_t coalignment_offset =
|
||||
GetSimdCoalignmentOffset(mapping, coalignment_original_address);
|
||||
mapping += coalignment_offset;
|
||||
offset = VkDeviceSize(offset + coalignment_offset);
|
||||
}
|
||||
backend_handle_out = frame_index_buffers_.size();
|
||||
frame_index_buffers_.emplace_back(buffer, offset);
|
||||
return mapping;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,92 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2021 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/gpu/primitive_processor.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
class VulkanPrimitiveProcessor final : public PrimitiveProcessor {
|
||||
public:
|
||||
VulkanPrimitiveProcessor(const RegisterFile& register_file, Memory& memory,
|
||||
TraceWriter& trace_writer,
|
||||
SharedMemory& shared_memory,
|
||||
VulkanCommandProcessor& command_processor)
|
||||
: PrimitiveProcessor(register_file, memory, trace_writer, shared_memory),
|
||||
command_processor_(command_processor) {}
|
||||
~VulkanPrimitiveProcessor();
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown(bool from_destructor = false);
|
||||
void ClearCache() { frame_index_buffer_pool_->ClearCache(); }
|
||||
|
||||
void CompletedSubmissionUpdated();
|
||||
void BeginSubmission();
|
||||
void BeginFrame();
|
||||
void EndSubmission();
|
||||
void EndFrame();
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> GetBuiltinIndexBuffer(size_t handle) const {
|
||||
assert_not_null(builtin_index_buffer_);
|
||||
return std::make_pair(
|
||||
builtin_index_buffer_,
|
||||
VkDeviceSize(GetBuiltinIndexBufferOffsetBytes(handle)));
|
||||
}
|
||||
std::pair<VkBuffer, VkDeviceSize> GetConvertedIndexBuffer(
|
||||
size_t handle) const {
|
||||
return frame_index_buffers_[handle];
|
||||
}
|
||||
|
||||
protected:
|
||||
bool InitializeBuiltin16BitIndexBuffer(
|
||||
uint32_t index_count,
|
||||
std::function<void(uint16_t*)> fill_callback) override;
|
||||
|
||||
void* RequestHostConvertedIndexBufferForCurrentFrame(
|
||||
xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd,
|
||||
uint32_t coalignment_original_address,
|
||||
size_t& backend_handle_out) override;
|
||||
|
||||
private:
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
|
||||
VkDeviceSize builtin_index_buffer_size_ = 0;
|
||||
VkBuffer builtin_index_buffer_ = VK_NULL_HANDLE;
|
||||
VkDeviceMemory builtin_index_buffer_memory_ = VK_NULL_HANDLE;
|
||||
// Temporary buffer copied in the beginning of the first submission for
|
||||
// uploading to builtin_index_buffer_, destroyed when the submission when it
|
||||
// was uploaded is completed.
|
||||
VkBuffer builtin_index_buffer_upload_ = VK_NULL_HANDLE;
|
||||
VkDeviceMemory builtin_index_buffer_upload_memory_ = VK_NULL_HANDLE;
|
||||
// UINT64_MAX means not uploaded yet and needs uploading in the first
|
||||
// submission (if the upload buffer exists at all).
|
||||
uint64_t builtin_index_buffer_upload_submission_ = UINT64_MAX;
|
||||
|
||||
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> frame_index_buffer_pool_;
|
||||
// Indexed by the backend handles.
|
||||
std::deque<std::pair<VkBuffer, VkDeviceSize>> frame_index_buffers_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,905 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/render_target_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_texture_cache.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||
public:
|
||||
union RenderPassKey {
|
||||
struct {
|
||||
// If emulating 2x as 4x, this is still 2x for simplicity of using this
|
||||
// field to make guest-related decisions. Render pass objects are not very
|
||||
// expensive, and their dependencies can't be shared between 2x-as-4x and
|
||||
// true 4x MSAA passes (framebuffers because render target cache render
|
||||
// targets are different for 2x and 4x guest MSAA, pipelines because the
|
||||
// sample mask will have 2 samples excluded for 2x-as-4x).
|
||||
xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2
|
||||
// << 0 is depth, << 1...4 is color.
|
||||
uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7
|
||||
// 0 for unused attachments.
|
||||
// If VK_FORMAT_D24_UNORM_S8_UINT is not supported, this must be kD24FS8
|
||||
// even for kD24S8.
|
||||
xenos::DepthRenderTargetFormat depth_format
|
||||
: xenos::kDepthRenderTargetFormatBits; // 8
|
||||
// Linear or sRGB included if host sRGB is used.
|
||||
xenos::ColorRenderTargetFormat color_0_view_format
|
||||
: xenos::kColorRenderTargetFormatBits; // 12
|
||||
xenos::ColorRenderTargetFormat color_1_view_format
|
||||
: xenos::kColorRenderTargetFormatBits; // 16
|
||||
xenos::ColorRenderTargetFormat color_2_view_format
|
||||
: xenos::kColorRenderTargetFormatBits; // 20
|
||||
xenos::ColorRenderTargetFormat color_3_view_format
|
||||
: xenos::kColorRenderTargetFormatBits; // 24
|
||||
uint32_t color_rts_use_transfer_formats : 1; // 25
|
||||
};
|
||||
uint32_t key = 0;
|
||||
struct Hasher {
|
||||
size_t operator()(const RenderPassKey& key) const {
|
||||
return std::hash<uint32_t>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const RenderPassKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const RenderPassKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
bool operator<(const RenderPassKey& other_key) const {
|
||||
return key < other_key.key;
|
||||
}
|
||||
};
|
||||
static_assert_size(RenderPassKey, sizeof(uint32_t));
|
||||
|
||||
struct Framebuffer {
|
||||
VkFramebuffer framebuffer;
|
||||
VkExtent2D host_extent;
|
||||
Framebuffer(VkFramebuffer framebuffer, const VkExtent2D& host_extent)
|
||||
: framebuffer(framebuffer), host_extent(host_extent) {}
|
||||
};
|
||||
|
||||
VulkanRenderTargetCache(const RegisterFile& register_file,
|
||||
const Memory& memory, TraceWriter& trace_writer,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y,
|
||||
VulkanCommandProcessor& command_processor);
|
||||
~VulkanRenderTargetCache();
|
||||
|
||||
// Transient descriptor set layouts must be initialized in the command
|
||||
// processor.
|
||||
bool Initialize();
|
||||
void Shutdown(bool from_destructor = false);
|
||||
void ClearCache() override;
|
||||
|
||||
void CompletedSubmissionUpdated();
|
||||
void EndSubmission();
|
||||
|
||||
// TODO(Triang3l): Fragment shader interlock.
|
||||
Path GetPath() const override { return Path::kHostRenderTargets; }
|
||||
|
||||
// Performs the resolve to a shared memory area according to the current
|
||||
// register values, and also clears the render targets if needed. Must be in a
|
||||
// frame for calling.
|
||||
bool Resolve(const Memory& memory, VulkanSharedMemory& shared_memory,
|
||||
VulkanTextureCache& texture_cache, uint32_t& written_address_out,
|
||||
uint32_t& written_length_out);
|
||||
|
||||
bool Update(bool is_rasterization_done,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
uint32_t normalized_color_mask,
|
||||
const Shader& vertex_shader) override;
|
||||
// Binding information for the last successful update.
|
||||
RenderPassKey last_update_render_pass_key() const {
|
||||
return last_update_render_pass_key_;
|
||||
}
|
||||
VkRenderPass last_update_render_pass() const {
|
||||
return last_update_render_pass_;
|
||||
}
|
||||
const Framebuffer* last_update_framebuffer() const {
|
||||
return last_update_framebuffer_;
|
||||
}
|
||||
|
||||
// Using R16G16[B16A16]_SNORM, which are -1...1, not the needed -32...32.
|
||||
// Persistent data doesn't depend on this, so can be overriden by per-game
|
||||
// configuration.
|
||||
bool IsFixedRG16TruncatedToMinus1To1() const {
|
||||
// TODO(Triang3l): Not float16 condition.
|
||||
return GetPath() == Path::kHostRenderTargets &&
|
||||
!cvars::snorm16_render_target_full_range;
|
||||
}
|
||||
bool IsFixedRGBA16TruncatedToMinus1To1() const {
|
||||
// TODO(Triang3l): Not float16 condition.
|
||||
return GetPath() == Path::kHostRenderTargets &&
|
||||
!cvars::snorm16_render_target_full_range;
|
||||
}
|
||||
|
||||
bool depth_unorm24_vulkan_format_supported() const {
|
||||
return depth_unorm24_vulkan_format_supported_;
|
||||
}
|
||||
bool depth_float24_round() const { return depth_float24_round_; }
|
||||
|
||||
bool msaa_2x_attachments_supported() const {
|
||||
return msaa_2x_attachments_supported_;
|
||||
}
|
||||
bool msaa_2x_no_attachments_supported() const {
|
||||
return msaa_2x_no_attachments_supported_;
|
||||
}
|
||||
bool IsMsaa2xSupported(bool subpass_has_attachments) const {
|
||||
return subpass_has_attachments ? msaa_2x_attachments_supported_
|
||||
: msaa_2x_no_attachments_supported_;
|
||||
}
|
||||
|
||||
// Returns the render pass object, or VK_NULL_HANDLE if failed to create.
|
||||
// A render pass managed by the render target cache may be ended and resumed
|
||||
// at any time (to allow for things like copying and texture loading).
|
||||
VkRenderPass GetRenderPass(RenderPassKey key);
|
||||
|
||||
VkFormat GetDepthVulkanFormat(xenos::DepthRenderTargetFormat format) const;
|
||||
VkFormat GetColorVulkanFormat(xenos::ColorRenderTargetFormat format) const;
|
||||
VkFormat GetColorOwnershipTransferVulkanFormat(
|
||||
xenos::ColorRenderTargetFormat format,
|
||||
bool* is_integer_out = nullptr) const;
|
||||
|
||||
protected:
|
||||
uint32_t GetMaxRenderTargetWidth() const override;
|
||||
uint32_t GetMaxRenderTargetHeight() const override;
|
||||
|
||||
RenderTarget* CreateRenderTarget(RenderTargetKey key) override;
|
||||
|
||||
bool IsHostDepthEncodingDifferent(
|
||||
xenos::DepthRenderTargetFormat format) const override;
|
||||
|
||||
private:
|
||||
enum class EdramBufferUsage {
|
||||
// There's no need for combined fragment and compute usages.
|
||||
// With host render targets, the usual usage sequence is as follows:
|
||||
// - Optionally compute writes - host depth copy storing for EDRAM range
|
||||
// ownership transfers.
|
||||
// - Optionally fragment reads - host depth copy storing for EDRAM range
|
||||
// ownership transfers.
|
||||
// - Compute writes - copying from host render targets during resolving.
|
||||
// - Compute reads - writing to the shared memory during resolving.
|
||||
// With the render backend implementation based on fragment shader
|
||||
// interlocks, it's:
|
||||
// - Fragment reads and writes - depth / stencil and color operations.
|
||||
// - Compute reads - writing to the shared memory during resolving.
|
||||
// So, fragment reads and compute reads normally don't follow each other,
|
||||
// and there's no need to amortize the cost of a read > read barrier in an
|
||||
// exceptional situation by using a wider barrier in the normal scenario.
|
||||
|
||||
// Host depth copy storing.
|
||||
kFragmentRead,
|
||||
// Fragment shader interlock depth / stencil and color operations.
|
||||
kFragmentReadWrite,
|
||||
// Resolve - copying to the shared memory.
|
||||
kComputeRead,
|
||||
// Resolve - copying from host render targets.
|
||||
kComputeWrite,
|
||||
// Trace recording.
|
||||
kTransferRead,
|
||||
// Trace playback.
|
||||
kTransferWrite,
|
||||
};
|
||||
|
||||
enum class EdramBufferModificationStatus {
|
||||
// The values are ordered by how strong the barrier conditions are.
|
||||
// No uncommitted shader writes.
|
||||
kUnmodified,
|
||||
// Need to commit before the next fragment shader interlock usage with
|
||||
// overlap.
|
||||
kViaFragmentShaderInterlock,
|
||||
// Need to commit before any next fragment shader interlock usage.
|
||||
kViaUnordered,
|
||||
};
|
||||
|
||||
enum ResolveCopyDescriptorSet : uint32_t {
|
||||
// Never changes.
|
||||
kResolveCopyDescriptorSetEdram,
|
||||
// Shared memory or a region in it.
|
||||
kResolveCopyDescriptorSetDest,
|
||||
|
||||
kResolveCopyDescriptorSetCount,
|
||||
};
|
||||
|
||||
struct ResolveCopyShaderCode {
|
||||
const uint32_t* unscaled;
|
||||
size_t unscaled_size_bytes;
|
||||
const uint32_t* scaled;
|
||||
size_t scaled_size_bytes;
|
||||
};
|
||||
|
||||
static void GetEdramBufferUsageMasks(EdramBufferUsage usage,
|
||||
VkPipelineStageFlags& stage_mask_out,
|
||||
VkAccessFlags& access_mask_out);
|
||||
void UseEdramBuffer(EdramBufferUsage new_usage);
|
||||
void MarkEdramBufferModified(
|
||||
EdramBufferModificationStatus modification_status =
|
||||
EdramBufferModificationStatus::kViaUnordered);
|
||||
void CommitEdramBufferShaderWrites(
|
||||
EdramBufferModificationStatus commit_status =
|
||||
EdramBufferModificationStatus::kViaFragmentShaderInterlock);
|
||||
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
TraceWriter& trace_writer_;
|
||||
|
||||
// Accessible in fragment and compute shaders.
|
||||
VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout descriptor_set_layout_sampled_image_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout descriptor_set_layout_sampled_image_x2_ =
|
||||
VK_NULL_HANDLE;
|
||||
|
||||
std::unique_ptr<ui::vulkan::SingleLayoutDescriptorSetPool>
|
||||
descriptor_set_pool_sampled_image_;
|
||||
std::unique_ptr<ui::vulkan::SingleLayoutDescriptorSetPool>
|
||||
descriptor_set_pool_sampled_image_x2_;
|
||||
|
||||
VkDeviceMemory edram_buffer_memory_ = VK_NULL_HANDLE;
|
||||
VkBuffer edram_buffer_ = VK_NULL_HANDLE;
|
||||
EdramBufferUsage edram_buffer_usage_;
|
||||
EdramBufferModificationStatus edram_buffer_modification_status_ =
|
||||
EdramBufferModificationStatus::kUnmodified;
|
||||
VkDescriptorPool edram_storage_buffer_descriptor_pool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet edram_storage_buffer_descriptor_set_;
|
||||
|
||||
VkPipelineLayout resolve_copy_pipeline_layout_ = VK_NULL_HANDLE;
|
||||
static const ResolveCopyShaderCode
|
||||
kResolveCopyShaders[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
|
||||
std::array<VkPipeline, size_t(draw_util::ResolveCopyShaderIndex::kCount)>
|
||||
resolve_copy_pipelines_{};
|
||||
|
||||
// RenderPassKey::key -> VkRenderPass.
|
||||
// VK_NULL_HANDLE if failed to create.
|
||||
std::unordered_map<uint32_t, VkRenderPass> render_passes_;
|
||||
|
||||
// For host render targets.
|
||||
|
||||
// Can only be destroyed when framebuffers referencing it are destroyed!
|
||||
class VulkanRenderTarget final : public RenderTarget {
|
||||
public:
|
||||
static constexpr VkPipelineStageFlags kColorDrawStageMask =
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
static constexpr VkAccessFlags kColorDrawAccessMask =
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkImageLayout kColorDrawLayout =
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
static constexpr VkPipelineStageFlags kDepthDrawStageMask =
|
||||
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
static constexpr VkAccessFlags kDepthDrawAccessMask =
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkImageLayout kDepthDrawLayout =
|
||||
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
|
||||
// Takes ownership of the Vulkan objects passed to the constructor.
|
||||
VulkanRenderTarget(RenderTargetKey key,
|
||||
VulkanRenderTargetCache& render_target_cache,
|
||||
VkImage image, VkDeviceMemory memory,
|
||||
VkImageView view_depth_color,
|
||||
VkImageView view_depth_stencil, VkImageView view_stencil,
|
||||
VkImageView view_srgb,
|
||||
VkImageView view_color_transfer_separate,
|
||||
size_t descriptor_set_index_transfer_source)
|
||||
: RenderTarget(key),
|
||||
render_target_cache_(render_target_cache),
|
||||
image_(image),
|
||||
memory_(memory),
|
||||
view_depth_color_(view_depth_color),
|
||||
view_depth_stencil_(view_depth_stencil),
|
||||
view_stencil_(view_stencil),
|
||||
view_srgb_(view_srgb),
|
||||
view_color_transfer_separate_(view_color_transfer_separate),
|
||||
descriptor_set_index_transfer_source_(
|
||||
descriptor_set_index_transfer_source) {}
|
||||
~VulkanRenderTarget();
|
||||
|
||||
VkImage image() const { return image_; }
|
||||
|
||||
VkImageView view_depth_color() const { return view_depth_color_; }
|
||||
VkImageView view_depth_stencil() const { return view_depth_stencil_; }
|
||||
VkImageView view_color_transfer_separate() const {
|
||||
return view_color_transfer_separate_;
|
||||
}
|
||||
VkImageView view_color_transfer() const {
|
||||
return view_color_transfer_separate_ != VK_NULL_HANDLE
|
||||
? view_color_transfer_separate_
|
||||
: view_depth_color_;
|
||||
}
|
||||
VkDescriptorSet GetDescriptorSetTransferSource() const {
|
||||
ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool =
|
||||
key().is_depth
|
||||
? *render_target_cache_.descriptor_set_pool_sampled_image_x2_
|
||||
: *render_target_cache_.descriptor_set_pool_sampled_image_;
|
||||
return descriptor_set_pool.Get(descriptor_set_index_transfer_source_);
|
||||
}
|
||||
|
||||
static void GetDrawUsage(bool is_depth,
|
||||
VkPipelineStageFlags* stage_mask_out,
|
||||
VkAccessFlags* access_mask_out,
|
||||
VkImageLayout* layout_out) {
|
||||
if (stage_mask_out) {
|
||||
*stage_mask_out = is_depth ? kDepthDrawStageMask : kColorDrawStageMask;
|
||||
}
|
||||
if (access_mask_out) {
|
||||
*access_mask_out =
|
||||
is_depth ? kDepthDrawAccessMask : kColorDrawAccessMask;
|
||||
}
|
||||
if (layout_out) {
|
||||
*layout_out = is_depth ? kDepthDrawLayout : kColorDrawLayout;
|
||||
}
|
||||
}
|
||||
void GetDrawUsage(VkPipelineStageFlags* stage_mask_out,
|
||||
VkAccessFlags* access_mask_out,
|
||||
VkImageLayout* layout_out) const {
|
||||
GetDrawUsage(key().is_depth, stage_mask_out, access_mask_out, layout_out);
|
||||
}
|
||||
VkPipelineStageFlags current_stage_mask() const {
|
||||
return current_stage_mask_;
|
||||
}
|
||||
VkAccessFlags current_access_mask() const { return current_access_mask_; }
|
||||
VkImageLayout current_layout() const { return current_layout_; }
|
||||
void SetUsage(VkPipelineStageFlags stage_mask, VkAccessFlags access_mask,
|
||||
VkImageLayout layout) {
|
||||
current_stage_mask_ = stage_mask;
|
||||
current_access_mask_ = access_mask;
|
||||
current_layout_ = layout;
|
||||
}
|
||||
|
||||
uint32_t temporary_sort_index() const { return temporary_sort_index_; }
|
||||
void SetTemporarySortIndex(uint32_t index) {
|
||||
temporary_sort_index_ = index;
|
||||
}
|
||||
|
||||
private:
|
||||
VulkanRenderTargetCache& render_target_cache_;
|
||||
|
||||
VkImage image_;
|
||||
VkDeviceMemory memory_;
|
||||
|
||||
// TODO(Triang3l): Per-format drawing views for mutable formats with EDRAM
|
||||
// aliasing without transfers.
|
||||
VkImageView view_depth_color_;
|
||||
// Optional views.
|
||||
VkImageView view_depth_stencil_;
|
||||
VkImageView view_stencil_;
|
||||
VkImageView view_srgb_;
|
||||
VkImageView view_color_transfer_separate_;
|
||||
|
||||
// 2 sampled images for depth / stencil, 1 sampled image for color.
|
||||
size_t descriptor_set_index_transfer_source_;
|
||||
|
||||
VkPipelineStageFlags current_stage_mask_ = 0;
|
||||
VkAccessFlags current_access_mask_ = 0;
|
||||
VkImageLayout current_layout_ = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
// Temporary storage for indices in operations like transfers and dumps.
|
||||
uint32_t temporary_sort_index_ = 0;
|
||||
};
|
||||
|
||||
struct FramebufferKey {
|
||||
RenderPassKey render_pass_key;
|
||||
|
||||
// Same as RenderTargetKey::pitch_tiles_at_32bpp.
|
||||
uint32_t pitch_tiles_at_32bpp : 8; // 8
|
||||
// [0, 2047].
|
||||
uint32_t depth_base_tiles : xenos::kEdramBaseTilesBits - 1; // 19
|
||||
uint32_t color_0_base_tiles : xenos::kEdramBaseTilesBits - 1; // 30
|
||||
|
||||
uint32_t color_1_base_tiles : xenos::kEdramBaseTilesBits - 1; // 43
|
||||
uint32_t color_2_base_tiles : xenos::kEdramBaseTilesBits - 1; // 54
|
||||
|
||||
uint32_t color_3_base_tiles : xenos::kEdramBaseTilesBits - 1; // 75
|
||||
|
||||
// Including all the padding, for a stable hash.
|
||||
FramebufferKey() { Reset(); }
|
||||
FramebufferKey(const FramebufferKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
}
|
||||
FramebufferKey& operator=(const FramebufferKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const FramebufferKey& key) const {
|
||||
return std::memcmp(this, &key, sizeof(*this)) == 0;
|
||||
}
|
||||
using Hasher = xe::hash::XXHasher<FramebufferKey>;
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
};
|
||||
|
||||
enum TransferUsedDescriptorSet : uint32_t {
|
||||
// Ordered from the least to the most frequently changed.
|
||||
kTransferUsedDescriptorSetHostDepthBuffer,
|
||||
kTransferUsedDescriptorSetHostDepthStencilTextures,
|
||||
kTransferUsedDescriptorSetDepthStencilTextures,
|
||||
// Mutually exclusive with kTransferUsedDescriptorSetDepthStencilTextures.
|
||||
kTransferUsedDescriptorSetColorTexture,
|
||||
|
||||
kTransferUsedDescriptorSetCount,
|
||||
|
||||
kTransferUsedDescriptorSetHostDepthBufferBit =
|
||||
uint32_t(1) << kTransferUsedDescriptorSetHostDepthBuffer,
|
||||
kTransferUsedDescriptorSetHostDepthStencilTexturesBit =
|
||||
uint32_t(1) << kTransferUsedDescriptorSetHostDepthStencilTextures,
|
||||
kTransferUsedDescriptorSetDepthStencilTexturesBit =
|
||||
uint32_t(1) << kTransferUsedDescriptorSetDepthStencilTextures,
|
||||
kTransferUsedDescriptorSetColorTextureBit =
|
||||
uint32_t(1) << kTransferUsedDescriptorSetColorTexture,
|
||||
};
|
||||
|
||||
// 32-bit push constants (for simplicity of size calculation and to avoid
|
||||
// std140 packing issues).
|
||||
enum TransferUsedPushConstantDword : uint32_t {
|
||||
kTransferUsedPushConstantDwordHostDepthAddress,
|
||||
kTransferUsedPushConstantDwordAddress,
|
||||
// Changed 8 times per transfer.
|
||||
kTransferUsedPushConstantDwordStencilMask,
|
||||
|
||||
kTransferUsedPushConstantDwordCount,
|
||||
|
||||
kTransferUsedPushConstantDwordHostDepthAddressBit =
|
||||
uint32_t(1) << kTransferUsedPushConstantDwordHostDepthAddress,
|
||||
kTransferUsedPushConstantDwordAddressBit =
|
||||
uint32_t(1) << kTransferUsedPushConstantDwordAddress,
|
||||
kTransferUsedPushConstantDwordStencilMaskBit =
|
||||
uint32_t(1) << kTransferUsedPushConstantDwordStencilMask,
|
||||
};
|
||||
|
||||
enum class TransferPipelineLayoutIndex {
|
||||
kColor,
|
||||
kDepth,
|
||||
kColorToStencilBit,
|
||||
kDepthToStencilBit,
|
||||
kColorAndHostDepthTexture,
|
||||
kColorAndHostDepthBuffer,
|
||||
kDepthAndHostDepthTexture,
|
||||
kDepthAndHostDepthBuffer,
|
||||
|
||||
kCount,
|
||||
};
|
||||
|
||||
struct TransferPipelineLayoutInfo {
|
||||
uint32_t used_descriptor_sets;
|
||||
uint32_t used_push_constant_dwords;
|
||||
};
|
||||
|
||||
static const TransferPipelineLayoutInfo
|
||||
kTransferPipelineLayoutInfos[size_t(TransferPipelineLayoutIndex::kCount)];
|
||||
|
||||
enum class TransferMode : uint32_t {
|
||||
kColorToDepth,
|
||||
kColorToColor,
|
||||
|
||||
kDepthToDepth,
|
||||
kDepthToColor,
|
||||
|
||||
kColorToStencilBit,
|
||||
kDepthToStencilBit,
|
||||
|
||||
// Two-source modes, using the host depth if it, when converted to the guest
|
||||
// format, matches what's in the owner source (not modified, keep host
|
||||
// precision), or the guest data otherwise (significantly modified, possibly
|
||||
// cleared). Stencil for FragStencilRef is always taken from the guest
|
||||
// source.
|
||||
|
||||
kColorAndHostDepthToDepth,
|
||||
// When using different source and destination depth formats.
|
||||
kDepthAndHostDepthToDepth,
|
||||
|
||||
// If host depth is fetched, but it's the same image as the destination,
|
||||
// it's copied to the EDRAM buffer (but since it's just a scratch buffer,
|
||||
// with tiles laid out linearly with the same pitch as in the original
|
||||
// render target; also no swapping of 40-sample columns as opposed to the
|
||||
// host render target - this is done only for the color source) and fetched
|
||||
// from there instead of the host depth texture.
|
||||
kColorAndHostDepthCopyToDepth,
|
||||
kDepthAndHostDepthCopyToDepth,
|
||||
|
||||
kCount,
|
||||
};
|
||||
|
||||
enum class TransferOutput {
|
||||
kColor,
|
||||
kDepth,
|
||||
kStencilBit,
|
||||
};
|
||||
|
||||
struct TransferModeInfo {
|
||||
TransferOutput output;
|
||||
TransferPipelineLayoutIndex pipeline_layout;
|
||||
};
|
||||
|
||||
static const TransferModeInfo kTransferModes[size_t(TransferMode::kCount)];
|
||||
|
||||
union TransferShaderKey {
|
||||
uint32_t key;
|
||||
struct {
|
||||
xenos::MsaaSamples dest_msaa_samples : xenos::kMsaaSamplesBits;
|
||||
uint32_t dest_color_rt_index : xenos::kColorRenderTargetIndexBits;
|
||||
uint32_t dest_resource_format : xenos::kRenderTargetFormatBits;
|
||||
xenos::MsaaSamples source_msaa_samples : xenos::kMsaaSamplesBits;
|
||||
// Always 1x when the host depth is a copy from a buffer rather than an
|
||||
// image, not to create the same pipeline for different MSAA sample counts
|
||||
// as it doesn't matter in this case.
|
||||
xenos::MsaaSamples host_depth_source_msaa_samples
|
||||
: xenos::kMsaaSamplesBits;
|
||||
uint32_t source_resource_format : xenos::kRenderTargetFormatBits;
|
||||
|
||||
// Last bits because this affects the pipeline layout - after sorting,
|
||||
// only change it as fewer times as possible. Depth buffers have an
|
||||
// additional stencil texture.
|
||||
static_assert(size_t(TransferMode::kCount) <= (size_t(1) << 4));
|
||||
TransferMode mode : 4;
|
||||
};
|
||||
|
||||
TransferShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); }
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const TransferShaderKey& key) const {
|
||||
return std::hash<uint32_t>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const TransferShaderKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const TransferShaderKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
bool operator<(const TransferShaderKey& other_key) const {
|
||||
return key < other_key.key;
|
||||
}
|
||||
};
|
||||
|
||||
struct TransferPipelineKey {
|
||||
RenderPassKey render_pass_key;
|
||||
TransferShaderKey shader_key;
|
||||
|
||||
TransferPipelineKey(RenderPassKey render_pass_key,
|
||||
TransferShaderKey shader_key)
|
||||
: render_pass_key(render_pass_key), shader_key(shader_key) {}
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const TransferPipelineKey& key) const {
|
||||
XXH3_state_t hash_state;
|
||||
XXH3_64bits_reset(&hash_state);
|
||||
XXH3_64bits_update(&hash_state, &key.render_pass_key,
|
||||
sizeof(key.render_pass_key));
|
||||
XXH3_64bits_update(&hash_state, &key.shader_key,
|
||||
sizeof(key.shader_key));
|
||||
return static_cast<size_t>(XXH3_64bits_digest(&hash_state));
|
||||
}
|
||||
};
|
||||
bool operator==(const TransferPipelineKey& other_key) const {
|
||||
return render_pass_key == other_key.render_pass_key &&
|
||||
shader_key == other_key.shader_key;
|
||||
}
|
||||
bool operator!=(const TransferPipelineKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
bool operator<(const TransferPipelineKey& other_key) const {
|
||||
if (render_pass_key != other_key.render_pass_key) {
|
||||
return render_pass_key < other_key.render_pass_key;
|
||||
}
|
||||
return shader_key < other_key.shader_key;
|
||||
}
|
||||
};
|
||||
|
||||
union TransferAddressConstant {
|
||||
uint32_t constant;
|
||||
struct {
|
||||
// All in tiles.
|
||||
uint32_t dest_pitch : xenos::kEdramPitchTilesBits;
|
||||
uint32_t source_pitch : xenos::kEdramPitchTilesBits;
|
||||
// Safe to use 12 bits for signed difference - no ownership transfer can
|
||||
// ever occur between render targets with EDRAM base >= 2048 as this would
|
||||
// result in 0-length spans. 10 + 10 + 12 is exactly 32, any more bits,
|
||||
// and more root 32-bit constants will be used.
|
||||
// Destination base in tiles minus source base in tiles (not vice versa
|
||||
// because this is a transform of the coordinate system, not addresses
|
||||
// themselves).
|
||||
// 0 for host_depth_source_is_copy (ignored in this case anyway as
|
||||
// destination == source anyway).
|
||||
int32_t source_to_dest : xenos::kEdramBaseTilesBits;
|
||||
};
|
||||
TransferAddressConstant() : constant(0) {
|
||||
static_assert_size(*this, sizeof(constant));
|
||||
}
|
||||
bool operator==(const TransferAddressConstant& other_constant) const {
|
||||
return constant == other_constant.constant;
|
||||
}
|
||||
bool operator!=(const TransferAddressConstant& other_constant) const {
|
||||
return !(*this == other_constant);
|
||||
}
|
||||
};
|
||||
|
||||
struct TransferInvocation {
|
||||
Transfer transfer;
|
||||
TransferShaderKey shader_key;
|
||||
TransferInvocation(const Transfer& transfer,
|
||||
const TransferShaderKey& shader_key)
|
||||
: transfer(transfer), shader_key(shader_key) {}
|
||||
bool operator<(const TransferInvocation& other_invocation) {
|
||||
// TODO(Triang3l): See if it may be better to sort by the source in the
|
||||
// first place, especially when reading the same data multiple times (like
|
||||
// to write the stencil bits after depth) for better read locality.
|
||||
// Sort by the shader key primarily to reduce pipeline state (context)
|
||||
// switches.
|
||||
if (shader_key != other_invocation.shader_key) {
|
||||
return shader_key < other_invocation.shader_key;
|
||||
}
|
||||
// Host depth render targets are changed rarely if they exist, won't save
|
||||
// many binding changes, ignore them for simplicity (their existence is
|
||||
// caught by the shader key change).
|
||||
assert_not_null(transfer.source);
|
||||
assert_not_null(other_invocation.transfer.source);
|
||||
uint32_t source_index =
|
||||
static_cast<const VulkanRenderTarget*>(transfer.source)
|
||||
->temporary_sort_index();
|
||||
uint32_t other_source_index = static_cast<const VulkanRenderTarget*>(
|
||||
other_invocation.transfer.source)
|
||||
->temporary_sort_index();
|
||||
if (source_index != other_source_index) {
|
||||
return source_index < other_source_index;
|
||||
}
|
||||
return transfer.start_tiles < other_invocation.transfer.start_tiles;
|
||||
}
|
||||
bool CanBeMergedIntoOneDraw(
|
||||
const TransferInvocation& other_invocation) const {
|
||||
return shader_key == other_invocation.shader_key &&
|
||||
transfer.AreSourcesSame(other_invocation.transfer);
|
||||
}
|
||||
};
|
||||
|
||||
union DumpPipelineKey {
|
||||
uint32_t key;
|
||||
struct {
|
||||
xenos::MsaaSamples msaa_samples : 2;
|
||||
uint32_t resource_format : 4;
|
||||
// Last bit because this affects the pipeline - after sorting, only change
|
||||
// it at most once. Depth buffers have an additional stencil SRV.
|
||||
uint32_t is_depth : 1;
|
||||
};
|
||||
|
||||
DumpPipelineKey() : key(0) { static_assert_size(*this, sizeof(key)); }
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const DumpPipelineKey& key) const {
|
||||
return std::hash<uint32_t>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const DumpPipelineKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const DumpPipelineKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
bool operator<(const DumpPipelineKey& other_key) const {
|
||||
return key < other_key.key;
|
||||
}
|
||||
|
||||
xenos::ColorRenderTargetFormat GetColorFormat() const {
|
||||
assert_false(is_depth);
|
||||
return xenos::ColorRenderTargetFormat(resource_format);
|
||||
}
|
||||
xenos::DepthRenderTargetFormat GetDepthFormat() const {
|
||||
assert_true(is_depth);
|
||||
return xenos::DepthRenderTargetFormat(resource_format);
|
||||
}
|
||||
};
|
||||
|
||||
// There's no strict dependency on the group size in dumping, for simplicity
|
||||
// calculations especially with resolution scaling, dividing manually (as the
|
||||
// group size is not unlimited). The only restriction is that an integer
|
||||
// multiple of it must be 80x16 samples (and no larger than that) for 32bpp,
|
||||
// or 40x16 samples for 64bpp (because only a half of the pair of tiles may
|
||||
// need to be dumped). Using 8x16 since that's 128 - the minimum required
|
||||
// group size on Vulkan, and the maximum number of lanes in a subgroup on
|
||||
// Vulkan.
|
||||
static constexpr uint32_t kDumpSamplesPerGroupX = 8;
|
||||
static constexpr uint32_t kDumpSamplesPerGroupY = 16;
|
||||
|
||||
union DumpPitches {
|
||||
uint32_t pitches;
|
||||
struct {
|
||||
// Both in tiles.
|
||||
uint32_t dest_pitch : xenos::kEdramPitchTilesBits;
|
||||
uint32_t source_pitch : xenos::kEdramPitchTilesBits;
|
||||
};
|
||||
DumpPitches() : pitches(0) { static_assert_size(*this, sizeof(pitches)); }
|
||||
bool operator==(const DumpPitches& other_pitches) const {
|
||||
return pitches == other_pitches.pitches;
|
||||
}
|
||||
bool operator!=(const DumpPitches& other_pitches) const {
|
||||
return !(*this == other_pitches);
|
||||
}
|
||||
};
|
||||
|
||||
union DumpOffsets {
|
||||
uint32_t offsets;
|
||||
struct {
|
||||
uint32_t dispatch_first_tile : xenos::kEdramBaseTilesBits;
|
||||
uint32_t source_base_tiles : xenos::kEdramBaseTilesBits;
|
||||
};
|
||||
DumpOffsets() : offsets(0) { static_assert_size(*this, sizeof(offsets)); }
|
||||
bool operator==(const DumpOffsets& other_offsets) const {
|
||||
return offsets == other_offsets.offsets;
|
||||
}
|
||||
bool operator!=(const DumpOffsets& other_offsets) const {
|
||||
return !(*this == other_offsets);
|
||||
}
|
||||
};
|
||||
|
||||
enum DumpDescriptorSet : uint32_t {
|
||||
// Never changes. Same in both color and depth pipeline layouts, keep the
|
||||
// first for pipeline layout compatibility, to only have to set it once.
|
||||
kDumpDescriptorSetEdram,
|
||||
// One resolve may need multiple sources. Different descriptor set layouts
|
||||
// for color and depth.
|
||||
kDumpDescriptorSetSource,
|
||||
|
||||
kDumpDescriptorSetCount,
|
||||
};
|
||||
|
||||
enum DumpPushConstant : uint32_t {
|
||||
// May be different for different sources.
|
||||
kDumpPushConstantPitches,
|
||||
// May be changed multiple times for the same source.
|
||||
kDumpPushConstantOffsets,
|
||||
|
||||
kDumpPushConstantCount,
|
||||
};
|
||||
|
||||
struct DumpInvocation {
|
||||
ResolveCopyDumpRectangle rectangle;
|
||||
DumpPipelineKey pipeline_key;
|
||||
DumpInvocation(const ResolveCopyDumpRectangle& rectangle,
|
||||
const DumpPipelineKey& pipeline_key)
|
||||
: rectangle(rectangle), pipeline_key(pipeline_key) {}
|
||||
bool operator<(const DumpInvocation& other_invocation) {
|
||||
// Sort by the pipeline key primarily to reduce pipeline state (context)
|
||||
// switches.
|
||||
if (pipeline_key != other_invocation.pipeline_key) {
|
||||
return pipeline_key < other_invocation.pipeline_key;
|
||||
}
|
||||
assert_not_null(rectangle.render_target);
|
||||
uint32_t render_target_index =
|
||||
static_cast<const VulkanRenderTarget*>(rectangle.render_target)
|
||||
->temporary_sort_index();
|
||||
const ResolveCopyDumpRectangle& other_rectangle =
|
||||
other_invocation.rectangle;
|
||||
uint32_t other_render_target_index =
|
||||
static_cast<const VulkanRenderTarget*>(other_rectangle.render_target)
|
||||
->temporary_sort_index();
|
||||
if (render_target_index != other_render_target_index) {
|
||||
return render_target_index < other_render_target_index;
|
||||
}
|
||||
if (rectangle.row_first != other_rectangle.row_first) {
|
||||
return rectangle.row_first < other_rectangle.row_first;
|
||||
}
|
||||
return rectangle.row_first_start < other_rectangle.row_first_start;
|
||||
}
|
||||
};
|
||||
|
||||
// Returns the framebuffer object, or VK_NULL_HANDLE if failed to create.
|
||||
const Framebuffer* GetFramebuffer(
|
||||
RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp,
|
||||
const RenderTarget* const* depth_and_color_render_targets);
|
||||
|
||||
VkShaderModule GetTransferShader(TransferShaderKey key);
|
||||
// With sample-rate shading, returns a pointer to one pipeline. Without
|
||||
// sample-rate shading, returns a pointer to as many pipelines as there are
|
||||
// samples. If there was a failure to create a pipeline, returns nullptr.
|
||||
VkPipeline const* GetTransferPipelines(TransferPipelineKey key);
|
||||
|
||||
// Do ownership transfers for render targets - each render target / vector may
|
||||
// be null / empty in case there's nothing to do for them.
|
||||
// resolve_clear_rectangle is expected to be provided by
|
||||
// PrepareHostRenderTargetsResolveClear which should do all the needed size
|
||||
// bound checks.
|
||||
void PerformTransfersAndResolveClears(
|
||||
uint32_t render_target_count, RenderTarget* const* render_targets,
|
||||
const std::vector<Transfer>* render_target_transfers,
|
||||
const uint64_t* render_target_resolve_clear_values = nullptr,
|
||||
const Transfer::Rectangle* resolve_clear_rectangle = nullptr);
|
||||
|
||||
VkPipeline GetDumpPipeline(DumpPipelineKey key);
|
||||
|
||||
// Writes contents of host render targets within rectangles from
|
||||
// ResolveInfo::GetCopyEdramTileSpan to edram_buffer_.
|
||||
void DumpRenderTargets(uint32_t dump_base, uint32_t dump_row_length_used,
|
||||
uint32_t dump_rows, uint32_t dump_pitch);
|
||||
|
||||
bool gamma_render_target_as_srgb_ = false;
|
||||
|
||||
bool depth_unorm24_vulkan_format_supported_ = false;
|
||||
bool depth_float24_round_ = false;
|
||||
|
||||
bool msaa_2x_attachments_supported_ = false;
|
||||
bool msaa_2x_no_attachments_supported_ = false;
|
||||
|
||||
std::unordered_map<FramebufferKey, Framebuffer, FramebufferKey::Hasher>
|
||||
framebuffers_;
|
||||
|
||||
RenderPassKey last_update_render_pass_key_;
|
||||
VkRenderPass last_update_render_pass_ = VK_NULL_HANDLE;
|
||||
uint32_t last_update_framebuffer_pitch_tiles_at_32bpp_ = 0;
|
||||
const RenderTarget* const*
|
||||
last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] =
|
||||
{};
|
||||
const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE;
|
||||
|
||||
// Set 0 - EDRAM storage buffer, set 1 - source depth sampled image (and
|
||||
// unused stencil from the transfer descriptor set), HostDepthStoreConstants
|
||||
// passed via push constants.
|
||||
VkPipelineLayout host_depth_store_pipeline_layout_ = VK_NULL_HANDLE;
|
||||
VkPipeline host_depth_store_pipelines_[size_t(xenos::MsaaSamples::k4X) + 1] =
|
||||
{};
|
||||
|
||||
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool>
|
||||
transfer_vertex_buffer_pool_;
|
||||
VkShaderModule transfer_passthrough_vertex_shader_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout transfer_pipeline_layouts_[size_t(
|
||||
TransferPipelineLayoutIndex::kCount)] = {};
|
||||
// VK_NULL_HANDLE if failed to create.
|
||||
std::unordered_map<TransferShaderKey, VkShaderModule,
|
||||
TransferShaderKey::Hasher>
|
||||
transfer_shaders_;
|
||||
// With sample-rate shading, one pipeline per entry. Without sample-rate
|
||||
// shading, one pipeline per sample per entry. VK_NULL_HANDLE if failed to
|
||||
// create.
|
||||
std::unordered_map<TransferPipelineKey, std::array<VkPipeline, 4>,
|
||||
TransferPipelineKey::Hasher>
|
||||
transfer_pipelines_;
|
||||
|
||||
VkPipelineLayout dump_pipeline_layout_color_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout dump_pipeline_layout_depth_ = VK_NULL_HANDLE;
|
||||
// Compute pipelines for copying host render target contents to the EDRAM
|
||||
// buffer. VK_NULL_HANDLE if failed to create.
|
||||
std::unordered_map<DumpPipelineKey, VkPipeline, DumpPipelineKey::Hasher>
|
||||
dump_pipelines_;
|
||||
|
||||
// Temporary storage for Resolve.
|
||||
std::vector<Transfer> clear_transfers_[2];
|
||||
|
||||
// Temporary storage for PerformTransfersAndResolveClears.
|
||||
std::vector<TransferInvocation> current_transfer_invocations_;
|
||||
|
||||
// Temporary storage for DumpRenderTargets.
|
||||
std::vector<ResolveCopyDumpRectangle> dump_rectangles_;
|
||||
std::vector<DumpInvocation> dump_invocations_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
|
|
@ -2,24 +2,59 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
#include <cstdint>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
using xe::ui::vulkan::util::CheckResult;
|
||||
VulkanShader::VulkanTranslation::~VulkanTranslation() {
|
||||
if (shader_module_) {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
static_cast<const VulkanShader&>(shader()).provider_;
|
||||
provider.dfn().vkDestroyShaderModule(provider.device(), shader_module_,
|
||||
nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
VkShaderModule VulkanShader::VulkanTranslation::GetOrCreateShaderModule() {
|
||||
if (!is_valid()) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
if (shader_module_ != VK_NULL_HANDLE) {
|
||||
return shader_module_;
|
||||
}
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
static_cast<const VulkanShader&>(shader()).provider_;
|
||||
VkShaderModuleCreateInfo shader_module_create_info;
|
||||
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
shader_module_create_info.pNext = nullptr;
|
||||
shader_module_create_info.flags = 0;
|
||||
shader_module_create_info.codeSize = translated_binary().size();
|
||||
shader_module_create_info.pCode =
|
||||
reinterpret_cast<const uint32_t*>(translated_binary().data());
|
||||
if (provider.dfn().vkCreateShaderModule(provider.device(),
|
||||
&shader_module_create_info, nullptr,
|
||||
&shader_module_) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"VulkanShader::VulkanTranslation: Failed to create a Vulkan shader "
|
||||
"module for shader {:016X} modification {:016X}",
|
||||
shader().ucode_data_hash(), modification());
|
||||
MakeInvalid();
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
return shader_module_;
|
||||
}
|
||||
|
||||
VulkanShader::VulkanShader(const ui::vulkan::VulkanProvider& provider,
|
||||
xenos::ShaderType shader_type,
|
||||
|
@ -27,60 +62,10 @@ VulkanShader::VulkanShader(const ui::vulkan::VulkanProvider& provider,
|
|||
const uint32_t* ucode_dwords,
|
||||
size_t ucode_dword_count,
|
||||
std::endian ucode_source_endian)
|
||||
: Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count,
|
||||
ucode_source_endian),
|
||||
: SpirvShader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count,
|
||||
ucode_source_endian),
|
||||
provider_(provider) {}
|
||||
|
||||
VulkanShader::VulkanTranslation::~VulkanTranslation() {
|
||||
if (shader_module_) {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
static_cast<VulkanShader&>(shader()).provider_;
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
dfn.vkDestroyShaderModule(device, shader_module_, nullptr);
|
||||
shader_module_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool VulkanShader::VulkanTranslation::Prepare() {
|
||||
assert_null(shader_module_);
|
||||
assert_true(is_valid());
|
||||
|
||||
const VulkanShader& vulkan_shader = static_cast<VulkanShader&>(shader());
|
||||
const ui::vulkan::VulkanProvider& provider = vulkan_shader.provider_;
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
// Create the shader module.
|
||||
VkShaderModuleCreateInfo shader_info;
|
||||
shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
shader_info.pNext = nullptr;
|
||||
shader_info.flags = 0;
|
||||
shader_info.codeSize = translated_binary().size();
|
||||
shader_info.pCode =
|
||||
reinterpret_cast<const uint32_t*>(translated_binary().data());
|
||||
auto status =
|
||||
dfn.vkCreateShaderModule(device, &shader_info, nullptr, &shader_module_);
|
||||
CheckResult(status, "vkCreateShaderModule");
|
||||
|
||||
char type_char;
|
||||
switch (vulkan_shader.type()) {
|
||||
case xenos::ShaderType::kVertex:
|
||||
type_char = 'v';
|
||||
break;
|
||||
case xenos::ShaderType::kPixel:
|
||||
type_char = 'p';
|
||||
break;
|
||||
default:
|
||||
type_char = 'u';
|
||||
}
|
||||
provider.SetDeviceObjectName(
|
||||
VK_OBJECT_TYPE_SHADER_MODULE, uint64_t(shader_module_),
|
||||
fmt::format("S({}): {:016X}", type_char, vulkan_shader.ucode_data_hash())
|
||||
.c_str());
|
||||
return status == VK_SUCCESS;
|
||||
}
|
||||
|
||||
Shader::Translation* VulkanShader::CreateTranslationInstance(
|
||||
uint64_t modification) {
|
||||
return new VulkanTranslation(*this, modification);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -10,42 +10,68 @@
|
|||
#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/spirv_shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanShader : public Shader {
|
||||
class VulkanShader : public SpirvShader {
|
||||
public:
|
||||
class VulkanTranslation : public Translation {
|
||||
class VulkanTranslation : public SpirvTranslation {
|
||||
public:
|
||||
VulkanTranslation(VulkanShader& shader, uint64_t modification)
|
||||
: Translation(shader, modification) {}
|
||||
explicit VulkanTranslation(VulkanShader& shader, uint64_t modification)
|
||||
: SpirvTranslation(shader, modification) {}
|
||||
~VulkanTranslation() override;
|
||||
|
||||
bool Prepare();
|
||||
|
||||
// Available only if the translation is_valid and has been prepared.
|
||||
VkShaderModule GetOrCreateShaderModule();
|
||||
VkShaderModule shader_module() const { return shader_module_; }
|
||||
|
||||
private:
|
||||
VkShaderModule shader_module_ = nullptr;
|
||||
VkShaderModule shader_module_ = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
VulkanShader(const ui::vulkan::VulkanProvider& provider,
|
||||
xenos::ShaderType shader_type, uint64_t ucode_data_hash,
|
||||
const uint32_t* ucode_dwords, size_t ucode_dword_count,
|
||||
std::endian ucode_source_endian = std::endian::big);
|
||||
explicit VulkanShader(const ui::vulkan::VulkanProvider& provider,
|
||||
xenos::ShaderType shader_type, uint64_t ucode_data_hash,
|
||||
const uint32_t* ucode_dwords, size_t ucode_dword_count,
|
||||
std::endian ucode_source_endian = std::endian::big);
|
||||
|
||||
// For owning subsystem like the pipeline cache, accessors for unique
|
||||
// identifiers (used instead of hashes to make sure collisions can't happen)
|
||||
// of binding layouts used by the shader, for invalidation if a shader with an
|
||||
// incompatible layout has been bound.
|
||||
size_t GetTextureBindingLayoutUserUID() const {
|
||||
return texture_binding_layout_user_uid_;
|
||||
}
|
||||
size_t GetSamplerBindingLayoutUserUID() const {
|
||||
return sampler_binding_layout_user_uid_;
|
||||
}
|
||||
// Modifications of the same shader can be translated on different threads.
|
||||
// The "set" function must only be called if "enter" returned true - these are
|
||||
// set up only once.
|
||||
bool EnterBindingLayoutUserUIDSetup() {
|
||||
return !binding_layout_user_uids_set_up_.test_and_set();
|
||||
}
|
||||
void SetTextureBindingLayoutUserUID(size_t uid) {
|
||||
texture_binding_layout_user_uid_ = uid;
|
||||
}
|
||||
void SetSamplerBindingLayoutUserUID(size_t uid) {
|
||||
sampler_binding_layout_user_uid_ = uid;
|
||||
}
|
||||
|
||||
protected:
|
||||
Translation* CreateTranslationInstance(uint64_t modification) override;
|
||||
|
||||
private:
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
|
||||
std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT;
|
||||
size_t texture_binding_layout_user_uid_ = 0;
|
||||
size_t sampler_binding_layout_user_uid_ = 0;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
|
@ -0,0 +1,499 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
DEFINE_bool(vulkan_sparse_shared_memory, true,
|
||||
"Enable sparse binding for shared memory emulation. Disabling it "
|
||||
"increases video memory usage - a 512 MB buffer is created - but "
|
||||
"allows graphics debuggers that don't support sparse binding to "
|
||||
"work.",
|
||||
"Vulkan");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
VulkanSharedMemory::VulkanSharedMemory(
|
||||
VulkanCommandProcessor& command_processor, Memory& memory,
|
||||
TraceWriter& trace_writer,
|
||||
VkPipelineStageFlags guest_shader_pipeline_stages)
|
||||
: SharedMemory(memory),
|
||||
command_processor_(command_processor),
|
||||
trace_writer_(trace_writer),
|
||||
guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {}
|
||||
|
||||
VulkanSharedMemory::~VulkanSharedMemory() { Shutdown(true); }
|
||||
|
||||
bool VulkanSharedMemory::Initialize() {
|
||||
InitializeCommon();
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
||||
|
||||
const VkBufferCreateFlags sparse_flags =
|
||||
VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
||||
VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
|
||||
|
||||
// Try to create a sparse buffer.
|
||||
VkBufferCreateInfo buffer_create_info;
|
||||
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
buffer_create_info.pNext = nullptr;
|
||||
buffer_create_info.flags = sparse_flags;
|
||||
buffer_create_info.size = kBufferSize;
|
||||
buffer_create_info.usage =
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
|
||||
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
buffer_create_info.queueFamilyIndexCount = 0;
|
||||
buffer_create_info.pQueueFamilyIndices = nullptr;
|
||||
if (cvars::vulkan_sparse_shared_memory &&
|
||||
provider.IsSparseBindingSupported() &&
|
||||
device_features.sparseResidencyBuffer) {
|
||||
if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) ==
|
||||
VK_SUCCESS) {
|
||||
VkMemoryRequirements buffer_memory_requirements;
|
||||
dfn.vkGetBufferMemoryRequirements(device, buffer_,
|
||||
&buffer_memory_requirements);
|
||||
if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
|
||||
provider.memory_types_device_local(),
|
||||
&buffer_memory_type_)) {
|
||||
uint32_t allocation_size_log2;
|
||||
xe::bit_scan_forward(
|
||||
std::max(uint64_t(buffer_memory_requirements.alignment),
|
||||
uint64_t(1)),
|
||||
&allocation_size_log2);
|
||||
if (allocation_size_log2 < kBufferSizeLog2) {
|
||||
// Maximum of 1024 allocations in the worst case for all of the
|
||||
// buffer because of the overall 4096 allocation count limit on
|
||||
// Windows drivers.
|
||||
InitializeSparseHostGpuMemory(
|
||||
std::max(allocation_size_log2,
|
||||
std::max(kHostGpuMemoryOptimalSparseAllocationLog2,
|
||||
kBufferSizeLog2 - uint32_t(10))));
|
||||
} else {
|
||||
// Shouldn't happen on any real platform, but no point allocating the
|
||||
// buffer sparsely.
|
||||
dfn.vkDestroyBuffer(device, buffer_, nullptr);
|
||||
buffer_ = VK_NULL_HANDLE;
|
||||
}
|
||||
} else {
|
||||
XELOGE(
|
||||
"Shared memory: Failed to get a device-local Vulkan memory type "
|
||||
"for the sparse buffer");
|
||||
dfn.vkDestroyBuffer(device, buffer_, nullptr);
|
||||
buffer_ = VK_NULL_HANDLE;
|
||||
}
|
||||
} else {
|
||||
XELOGE("Shared memory: Failed to create the {} MB Vulkan sparse buffer",
|
||||
kBufferSize >> 20);
|
||||
}
|
||||
}
|
||||
|
||||
// Create a non-sparse buffer if there were issues with the sparse buffer.
|
||||
if (buffer_ == VK_NULL_HANDLE) {
|
||||
XELOGGPU(
|
||||
"Vulkan sparse binding is not used for shared memory emulation - video "
|
||||
"memory usage may increase significantly because a full {} MB buffer "
|
||||
"will be created",
|
||||
kBufferSize >> 20);
|
||||
buffer_create_info.flags &= ~sparse_flags;
|
||||
if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) !=
|
||||
VK_SUCCESS) {
|
||||
XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer",
|
||||
kBufferSize >> 20);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
VkMemoryRequirements buffer_memory_requirements;
|
||||
dfn.vkGetBufferMemoryRequirements(device, buffer_,
|
||||
&buffer_memory_requirements);
|
||||
if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
|
||||
provider.memory_types_device_local(),
|
||||
&buffer_memory_type_)) {
|
||||
XELOGE(
|
||||
"Shared memory: Failed to get a device-local Vulkan memory type for "
|
||||
"the buffer");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
VkMemoryAllocateInfo buffer_memory_allocate_info;
|
||||
VkMemoryAllocateInfo* buffer_memory_allocate_info_last =
|
||||
&buffer_memory_allocate_info;
|
||||
buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
buffer_memory_allocate_info.pNext = nullptr;
|
||||
buffer_memory_allocate_info.allocationSize =
|
||||
buffer_memory_requirements.size;
|
||||
buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
|
||||
VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info;
|
||||
if (provider.device_extensions().khr_dedicated_allocation) {
|
||||
buffer_memory_allocate_info_last->pNext =
|
||||
&buffer_memory_dedicated_allocate_info;
|
||||
buffer_memory_allocate_info_last =
|
||||
reinterpret_cast<VkMemoryAllocateInfo*>(
|
||||
&buffer_memory_dedicated_allocate_info);
|
||||
buffer_memory_dedicated_allocate_info.sType =
|
||||
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
|
||||
buffer_memory_dedicated_allocate_info.pNext = nullptr;
|
||||
buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE;
|
||||
buffer_memory_dedicated_allocate_info.buffer = buffer_;
|
||||
}
|
||||
VkDeviceMemory buffer_memory;
|
||||
if (dfn.vkAllocateMemory(device, &buffer_memory_allocate_info, nullptr,
|
||||
&buffer_memory) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"Shared memory: Failed to allocate {} MB of memory for the Vulkan "
|
||||
"buffer",
|
||||
kBufferSize >> 20);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
buffer_memory_.push_back(buffer_memory);
|
||||
if (dfn.vkBindBufferMemory(device, buffer_, buffer_memory, 0) !=
|
||||
VK_SUCCESS) {
|
||||
XELOGE("Shared memory: Failed to bind memory to the Vulkan buffer");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// The first usage will likely be uploading.
|
||||
last_usage_ = Usage::kTransferDestination;
|
||||
last_written_range_ = std::make_pair<uint32_t, uint32_t>(0, 0);
|
||||
|
||||
upload_buffer_pool_ = std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
|
||||
provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
xe::align(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize,
|
||||
size_t(1) << page_size_log2()));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VulkanSharedMemory::Shutdown(bool from_destructor) {
|
||||
ResetTraceDownload();
|
||||
|
||||
upload_buffer_pool_.reset();
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_);
|
||||
for (VkDeviceMemory memory : buffer_memory_) {
|
||||
dfn.vkFreeMemory(device, memory, nullptr);
|
||||
}
|
||||
buffer_memory_.clear();
|
||||
|
||||
// If calling from the destructor, the SharedMemory destructor will call
|
||||
// ShutdownCommon.
|
||||
if (!from_destructor) {
|
||||
ShutdownCommon();
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanSharedMemory::CompletedSubmissionUpdated() {
|
||||
upload_buffer_pool_->Reclaim(command_processor_.GetCompletedSubmission());
|
||||
}
|
||||
|
||||
void VulkanSharedMemory::EndSubmission() { upload_buffer_pool_->FlushWrites(); }
|
||||
|
||||
void VulkanSharedMemory::Use(Usage usage,
|
||||
std::pair<uint32_t, uint32_t> written_range) {
|
||||
written_range.first = std::min(written_range.first, kBufferSize);
|
||||
written_range.second =
|
||||
std::min(written_range.second, kBufferSize - written_range.first);
|
||||
assert_true(usage != Usage::kRead || !written_range.second);
|
||||
if (last_usage_ != usage || last_written_range_.second) {
|
||||
VkPipelineStageFlags src_stage_mask, dst_stage_mask;
|
||||
VkAccessFlags src_access_mask, dst_access_mask;
|
||||
GetUsageMasks(last_usage_, src_stage_mask, src_access_mask);
|
||||
GetUsageMasks(usage, dst_stage_mask, dst_access_mask);
|
||||
VkDeviceSize offset, size;
|
||||
if (last_usage_ == usage) {
|
||||
// Committing the previous write, while not changing the access mask
|
||||
// (passing false as whether to skip the barrier if no masks are changed
|
||||
// for this reason).
|
||||
offset = VkDeviceSize(last_written_range_.first);
|
||||
size = VkDeviceSize(last_written_range_.second);
|
||||
} else {
|
||||
// Changing the stage and access mask - all preceding writes must be
|
||||
// available not only to the source stage, but to the destination as well.
|
||||
offset = 0;
|
||||
size = VK_WHOLE_SIZE;
|
||||
last_usage_ = usage;
|
||||
}
|
||||
command_processor_.PushBufferMemoryBarrier(
|
||||
buffer_, offset, size, src_stage_mask, dst_stage_mask, src_access_mask,
|
||||
dst_access_mask, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
|
||||
false);
|
||||
}
|
||||
last_written_range_ = written_range;
|
||||
}
|
||||
|
||||
bool VulkanSharedMemory::InitializeTraceSubmitDownloads() {
|
||||
ResetTraceDownload();
|
||||
PrepareForTraceDownload();
|
||||
uint32_t download_page_count = trace_download_page_count();
|
||||
if (!download_page_count) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
|
||||
provider, download_page_count << page_size_log2(),
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
ui::vulkan::util::MemoryPurpose::kReadback, trace_download_buffer_,
|
||||
trace_download_buffer_memory_)) {
|
||||
XELOGE(
|
||||
"Shared memory: Failed to create a {} KB GPU-written memory download "
|
||||
"buffer for frame tracing",
|
||||
download_page_count << page_size_log2() >> 10);
|
||||
ResetTraceDownload();
|
||||
return false;
|
||||
}
|
||||
|
||||
Use(Usage::kRead);
|
||||
command_processor_.SubmitBarriers(true);
|
||||
DeferredCommandBuffer& command_buffer =
|
||||
command_processor_.deferred_command_buffer();
|
||||
|
||||
size_t download_range_count = trace_download_ranges().size();
|
||||
VkBufferCopy* download_regions = command_buffer.CmdCopyBufferEmplace(
|
||||
buffer_, trace_download_buffer_, uint32_t(download_range_count));
|
||||
VkDeviceSize download_buffer_offset = 0;
|
||||
for (size_t i = 0; i < download_range_count; ++i) {
|
||||
VkBufferCopy& download_region = download_regions[i];
|
||||
const std::pair<uint32_t, uint32_t>& download_range =
|
||||
trace_download_ranges()[i];
|
||||
download_region.srcOffset = download_range.first;
|
||||
download_region.dstOffset = download_buffer_offset;
|
||||
download_region.size = download_range.second;
|
||||
download_buffer_offset += download_range.second;
|
||||
}
|
||||
|
||||
command_processor_.PushBufferMemoryBarrier(
|
||||
trace_download_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_HOST_READ_BIT);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VulkanSharedMemory::InitializeTraceCompleteDownloads() {
|
||||
if (!trace_download_buffer_memory_) {
|
||||
return;
|
||||
}
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
void* download_mapping;
|
||||
if (dfn.vkMapMemory(device, trace_download_buffer_memory_, 0, VK_WHOLE_SIZE,
|
||||
0, &download_mapping) == VK_SUCCESS) {
|
||||
uint32_t download_buffer_offset = 0;
|
||||
for (const auto& download_range : trace_download_ranges()) {
|
||||
trace_writer_.WriteMemoryRead(
|
||||
download_range.first, download_range.second,
|
||||
reinterpret_cast<const uint8_t*>(download_mapping) +
|
||||
download_buffer_offset);
|
||||
}
|
||||
dfn.vkUnmapMemory(device, trace_download_buffer_memory_);
|
||||
} else {
|
||||
XELOGE(
|
||||
"Shared memory: Failed to map the GPU-written memory download buffer "
|
||||
"for frame tracing");
|
||||
}
|
||||
ResetTraceDownload();
|
||||
}
|
||||
|
||||
bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange(
|
||||
uint32_t offset_allocations, uint32_t length_allocations) {
|
||||
if (!length_allocations) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
VkMemoryAllocateInfo memory_allocate_info;
|
||||
memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
memory_allocate_info.pNext = nullptr;
|
||||
memory_allocate_info.allocationSize =
|
||||
length_allocations << host_gpu_memory_sparse_granularity_log2();
|
||||
memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
|
||||
VkDeviceMemory memory;
|
||||
if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) !=
|
||||
VK_SUCCESS) {
|
||||
XELOGE("Shared memory: Failed to allocate sparse buffer memory");
|
||||
return false;
|
||||
}
|
||||
buffer_memory_.push_back(memory);
|
||||
|
||||
VkSparseMemoryBind bind;
|
||||
bind.resourceOffset = offset_allocations
|
||||
<< host_gpu_memory_sparse_granularity_log2();
|
||||
bind.size = memory_allocate_info.allocationSize;
|
||||
bind.memory = memory;
|
||||
bind.memoryOffset = 0;
|
||||
bind.flags = 0;
|
||||
VkPipelineStageFlags bind_wait_stage_mask =
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
if (provider.device_features().tessellationShader) {
|
||||
bind_wait_stage_mask |=
|
||||
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
|
||||
}
|
||||
command_processor_.SparseBindBuffer(buffer_, 1, &bind, bind_wait_stage_mask);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanSharedMemory::UploadRanges(
|
||||
const std::vector<std::pair<uint32_t, uint32_t>>& upload_page_ranges) {
|
||||
if (upload_page_ranges.empty()) {
|
||||
return true;
|
||||
}
|
||||
// upload_page_ranges are sorted, use them to determine the range for the
|
||||
// ordering barrier.
|
||||
Use(Usage::kTransferDestination,
|
||||
std::make_pair(
|
||||
upload_page_ranges.front().first << page_size_log2(),
|
||||
(upload_page_ranges.back().first + upload_page_ranges.back().second -
|
||||
upload_page_ranges.front().first)
|
||||
<< page_size_log2()));
|
||||
command_processor_.SubmitBarriers(true);
|
||||
DeferredCommandBuffer& command_buffer =
|
||||
command_processor_.deferred_command_buffer();
|
||||
uint64_t submission_current = command_processor_.GetCurrentSubmission();
|
||||
bool successful = true;
|
||||
upload_regions_.clear();
|
||||
VkBuffer upload_buffer_previous = VK_NULL_HANDLE;
|
||||
for (auto upload_range : upload_page_ranges) {
|
||||
uint32_t upload_range_start = upload_range.first;
|
||||
uint32_t upload_range_length = upload_range.second;
|
||||
trace_writer_.WriteMemoryRead(upload_range_start << page_size_log2(),
|
||||
upload_range_length << page_size_log2());
|
||||
while (upload_range_length) {
|
||||
VkBuffer upload_buffer;
|
||||
VkDeviceSize upload_buffer_offset, upload_buffer_size;
|
||||
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
|
||||
submission_current, upload_range_length << page_size_log2(),
|
||||
size_t(1) << page_size_log2(), upload_buffer, upload_buffer_offset,
|
||||
upload_buffer_size);
|
||||
if (upload_buffer_mapping == nullptr) {
|
||||
XELOGE("Shared memory: Failed to get a Vulkan upload buffer");
|
||||
successful = false;
|
||||
break;
|
||||
}
|
||||
MakeRangeValid(upload_range_start << page_size_log2(),
|
||||
uint32_t(upload_buffer_size), false, false);
|
||||
std::memcpy(
|
||||
upload_buffer_mapping,
|
||||
memory().TranslatePhysical(upload_range_start << page_size_log2()),
|
||||
upload_buffer_size);
|
||||
if (upload_buffer_previous != upload_buffer && !upload_regions_.empty()) {
|
||||
assert_true(upload_buffer_previous != VK_NULL_HANDLE);
|
||||
command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_,
|
||||
uint32_t(upload_regions_.size()),
|
||||
upload_regions_.data());
|
||||
upload_regions_.clear();
|
||||
}
|
||||
upload_buffer_previous = upload_buffer;
|
||||
VkBufferCopy& upload_region = upload_regions_.emplace_back();
|
||||
upload_region.srcOffset = upload_buffer_offset;
|
||||
upload_region.dstOffset =
|
||||
VkDeviceSize(upload_range_start << page_size_log2());
|
||||
upload_region.size = upload_buffer_size;
|
||||
uint32_t upload_buffer_pages =
|
||||
uint32_t(upload_buffer_size >> page_size_log2());
|
||||
upload_range_start += upload_buffer_pages;
|
||||
upload_range_length -= upload_buffer_pages;
|
||||
}
|
||||
if (!successful) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!upload_regions_.empty()) {
|
||||
assert_true(upload_buffer_previous != VK_NULL_HANDLE);
|
||||
command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_,
|
||||
uint32_t(upload_regions_.size()),
|
||||
upload_regions_.data());
|
||||
upload_regions_.clear();
|
||||
}
|
||||
return successful;
|
||||
}
|
||||
|
||||
void VulkanSharedMemory::GetUsageMasks(Usage usage,
|
||||
VkPipelineStageFlags& stage_mask,
|
||||
VkAccessFlags& access_mask) const {
|
||||
switch (usage) {
|
||||
case Usage::kComputeWrite:
|
||||
stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
access_mask = VK_ACCESS_SHADER_READ_BIT;
|
||||
return;
|
||||
case Usage::kTransferDestination:
|
||||
stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
access_mask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
stage_mask =
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | guest_shader_pipeline_stages_;
|
||||
access_mask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
|
||||
switch (usage) {
|
||||
case Usage::kRead:
|
||||
stage_mask |=
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
access_mask |= VK_ACCESS_TRANSFER_READ_BIT;
|
||||
break;
|
||||
case Usage::kGuestDrawReadWrite:
|
||||
access_mask |= VK_ACCESS_SHADER_WRITE_BIT;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(usage);
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanSharedMemory::ResetTraceDownload() {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
|
||||
trace_download_buffer_);
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
|
||||
trace_download_buffer_memory_);
|
||||
ReleaseTraceDownloadRanges();
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,97 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/gpu/shared_memory.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
class VulkanSharedMemory : public SharedMemory {
|
||||
public:
|
||||
VulkanSharedMemory(VulkanCommandProcessor& command_processor, Memory& memory,
|
||||
TraceWriter& trace_writer,
|
||||
VkPipelineStageFlags guest_shader_pipeline_stages);
|
||||
~VulkanSharedMemory() override;
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown(bool from_destructor = false);
|
||||
|
||||
void CompletedSubmissionUpdated();
|
||||
void EndSubmission();
|
||||
|
||||
enum class Usage {
|
||||
// Index buffer, vfetch, compute read, transfer source.
|
||||
kRead,
|
||||
// Index buffer, vfetch, memexport.
|
||||
kGuestDrawReadWrite,
|
||||
kComputeWrite,
|
||||
kTransferDestination,
|
||||
};
|
||||
// Inserts a pipeline barrier for the target usage, also ensuring consecutive
|
||||
// read-write accesses are ordered with each other.
|
||||
void Use(Usage usage, std::pair<uint32_t, uint32_t> written_range = {});
|
||||
|
||||
VkBuffer buffer() const { return buffer_; }
|
||||
|
||||
// Returns true if any downloads were submitted to the command processor.
|
||||
bool InitializeTraceSubmitDownloads();
|
||||
void InitializeTraceCompleteDownloads();
|
||||
|
||||
protected:
|
||||
bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations,
|
||||
uint32_t length_allocations) override;
|
||||
|
||||
bool UploadRanges(const std::vector<std::pair<uint32_t, uint32_t>>&
|
||||
upload_page_ranges) override;
|
||||
|
||||
private:
|
||||
void GetUsageMasks(Usage usage, VkPipelineStageFlags& stage_mask,
|
||||
VkAccessFlags& access_mask) const;
|
||||
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
TraceWriter& trace_writer_;
|
||||
VkPipelineStageFlags guest_shader_pipeline_stages_;
|
||||
|
||||
VkBuffer buffer_ = VK_NULL_HANDLE;
|
||||
uint32_t buffer_memory_type_;
|
||||
// Single for non-sparse, every allocation so far for sparse.
|
||||
std::vector<VkDeviceMemory> buffer_memory_;
|
||||
|
||||
Usage last_usage_;
|
||||
std::pair<uint32_t, uint32_t> last_written_range_;
|
||||
|
||||
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> upload_buffer_pool_;
|
||||
std::vector<VkBufferCopy> upload_regions_;
|
||||
|
||||
// Created temporarily, only for downloading.
|
||||
VkBuffer trace_download_buffer_ = VK_NULL_HANDLE;
|
||||
VkDeviceMemory trace_download_buffer_memory_ = VK_NULL_HANDLE;
|
||||
void ResetTraceDownload();
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -10,22 +10,15 @@
|
|||
#ifndef XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/sampler_info.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/texture_conversion.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/circular_buffer.h"
|
||||
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/gpu/texture_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
|
@ -33,205 +26,334 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
//
|
||||
class VulkanTextureCache {
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
class VulkanTextureCache final : public TextureCache {
|
||||
public:
|
||||
struct TextureView;
|
||||
|
||||
// This represents an uploaded Vulkan texture.
|
||||
struct Texture {
|
||||
TextureInfo texture_info;
|
||||
std::vector<std::unique_ptr<TextureView>> views;
|
||||
|
||||
VkFormat format;
|
||||
VkImage image;
|
||||
VkImageLayout image_layout;
|
||||
VmaAllocation alloc;
|
||||
VmaAllocationInfo alloc_info;
|
||||
VkFramebuffer framebuffer; // Blit target frame buffer.
|
||||
VkImageUsageFlags usage_flags;
|
||||
|
||||
bool is_watched;
|
||||
bool pending_invalidation;
|
||||
|
||||
// Pointer to the latest usage fence.
|
||||
VkFence in_flight_fence;
|
||||
};
|
||||
|
||||
struct TextureView {
|
||||
Texture* texture;
|
||||
VkImageView view;
|
||||
|
||||
union {
|
||||
uint16_t swizzle;
|
||||
struct {
|
||||
// FIXME: This only applies on little-endian platforms!
|
||||
uint16_t swiz_x : 3;
|
||||
uint16_t swiz_y : 3;
|
||||
uint16_t swiz_z : 3;
|
||||
uint16_t swiz_w : 3;
|
||||
uint16_t : 4;
|
||||
};
|
||||
// Sampler parameters that can be directly converted to a host sampler or used
|
||||
// for checking whether samplers bindings are up to date.
|
||||
union SamplerParameters {
|
||||
uint32_t value;
|
||||
struct {
|
||||
xenos::ClampMode clamp_x : 3; // 3
|
||||
xenos::ClampMode clamp_y : 3; // 6
|
||||
xenos::ClampMode clamp_z : 3; // 9
|
||||
xenos::BorderColor border_color : 2; // 11
|
||||
uint32_t mag_linear : 1; // 12
|
||||
uint32_t min_linear : 1; // 13
|
||||
uint32_t mip_linear : 1; // 14
|
||||
xenos::AnisoFilter aniso_filter : 3; // 17
|
||||
uint32_t mip_min_level : 4; // 21
|
||||
uint32_t mip_base_map : 1; // 22
|
||||
// Maximum mip level is in the texture resource itself, but mip_base_map
|
||||
// can be used to limit fetching to mip_min_level.
|
||||
};
|
||||
|
||||
SamplerParameters() : value(0) { static_assert_size(*this, sizeof(value)); }
|
||||
struct Hasher {
|
||||
size_t operator()(const SamplerParameters& parameters) const {
|
||||
return std::hash<uint32_t>{}(parameters.value);
|
||||
}
|
||||
};
|
||||
bool operator==(const SamplerParameters& parameters) const {
|
||||
return value == parameters.value;
|
||||
}
|
||||
bool operator!=(const SamplerParameters& parameters) const {
|
||||
return value != parameters.value;
|
||||
}
|
||||
};
|
||||
|
||||
VulkanTextureCache(Memory* memory, RegisterFile* register_file,
|
||||
TraceWriter* trace_writer,
|
||||
ui::vulkan::VulkanProvider& provider);
|
||||
~VulkanTextureCache();
|
||||
|
||||
VkResult Initialize();
|
||||
void Shutdown();
|
||||
|
||||
// Descriptor set layout containing all possible texture bindings.
|
||||
// The set contains one descriptor for each texture sampler [0-31].
|
||||
VkDescriptorSetLayout texture_descriptor_set_layout() const {
|
||||
return texture_descriptor_set_layout_;
|
||||
// Transient descriptor set layouts must be initialized in the command
|
||||
// processor.
|
||||
static std::unique_ptr<VulkanTextureCache> Create(
|
||||
const RegisterFile& register_file, VulkanSharedMemory& shared_memory,
|
||||
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
|
||||
VulkanCommandProcessor& command_processor,
|
||||
VkPipelineStageFlags guest_shader_pipeline_stages) {
|
||||
std::unique_ptr<VulkanTextureCache> texture_cache(new VulkanTextureCache(
|
||||
register_file, shared_memory, draw_resolution_scale_x,
|
||||
draw_resolution_scale_y, command_processor,
|
||||
guest_shader_pipeline_stages));
|
||||
if (!texture_cache->Initialize()) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::move(texture_cache);
|
||||
}
|
||||
|
||||
// Prepares a descriptor set containing the samplers and images for all
|
||||
// bindings. The textures will be uploaded/converted/etc as needed.
|
||||
// Requires a fence to be provided that will be signaled when finished
|
||||
// using the returned descriptor set.
|
||||
VkDescriptorSet PrepareTextureSet(
|
||||
VkCommandBuffer setup_command_buffer, VkFence completion_fence,
|
||||
const std::vector<Shader::TextureBinding>& vertex_bindings,
|
||||
const std::vector<Shader::TextureBinding>& pixel_bindings);
|
||||
~VulkanTextureCache();
|
||||
|
||||
// TODO(benvanik): ReadTexture.
|
||||
void BeginSubmission(uint64_t new_submission_index) override;
|
||||
|
||||
Texture* Lookup(const TextureInfo& texture_info);
|
||||
// Must be called within a frame - creates and untiles textures needed by
|
||||
// shaders, and enqueues transitioning them into the sampled usage. This may
|
||||
// bind compute pipelines (notifying the command processor about that), and
|
||||
// also since it may insert deferred barriers, before flushing the barriers
|
||||
// preceding host GPU work.
|
||||
void RequestTextures(uint32_t used_texture_mask) override;
|
||||
|
||||
// Looks for a texture either containing or matching these parameters.
|
||||
// Caller is responsible for checking if the texture returned is an exact
|
||||
// match or just contains the texture given by the parameters.
|
||||
// If offset_x and offset_y are not null, this may return a texture that
|
||||
// contains this address at an offset.
|
||||
Texture* LookupAddress(uint32_t guest_address, uint32_t width,
|
||||
uint32_t height, xenos::TextureFormat format,
|
||||
VkOffset2D* out_offset = nullptr);
|
||||
VkImageView GetActiveBindingOrNullImageView(uint32_t fetch_constant_index,
|
||||
xenos::FetchOpDimension dimension,
|
||||
bool is_signed) const;
|
||||
|
||||
TextureView* DemandView(Texture* texture, uint16_t swizzle);
|
||||
SamplerParameters GetSamplerParameters(
|
||||
const VulkanShader::SamplerBinding& binding) const;
|
||||
|
||||
// Demands a texture for the purpose of resolving from EDRAM. This either
|
||||
// creates a new texture or returns a previously created texture.
|
||||
Texture* DemandResolveTexture(const TextureInfo& texture_info);
|
||||
// Must be called for every used sampler at least once in a single submission,
|
||||
// and a submission must be open for this to be callable.
|
||||
// Returns:
|
||||
// - The sampler, if obtained successfully - and increases its last usage
|
||||
// submission index - and has_overflown_out = false.
|
||||
// - VK_NULL_HANDLE and has_overflown_out = true if there's a total sampler
|
||||
// count overflow in a submission that potentially hasn't completed yet.
|
||||
// - VK_NULL_HANDLE and has_overflown_out = false in case of a general failure
|
||||
// to create a sampler.
|
||||
VkSampler UseSampler(SamplerParameters parameters, bool& has_overflown_out);
|
||||
// Returns the submission index to await (may be the current submission in
|
||||
// case of an overflow within a single submission - in this case, it must be
|
||||
// ended, and a new one must be started) in case of sampler count overflow, so
|
||||
// samplers may be freed, and UseSamplers may take their slots.
|
||||
uint64_t GetSubmissionToAwaitOnSamplerOverflow(
|
||||
uint32_t overflowed_sampler_count) const;
|
||||
|
||||
// Clears all cached content.
|
||||
void ClearCache();
|
||||
// Returns the 2D view of the front buffer texture (for fragment shader
|
||||
// reading - the barrier will be pushed in the command processor if needed),
|
||||
// or VK_NULL_HANDLE in case of failure. May call LoadTextureData.
|
||||
VkImageView RequestSwapTexture(uint32_t& width_scaled_out,
|
||||
uint32_t& height_scaled_out,
|
||||
xenos::TextureFormat& format_out);
|
||||
|
||||
// Frees any unused resources
|
||||
void Scavenge();
|
||||
protected:
|
||||
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
|
||||
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
|
||||
|
||||
uint32_t GetMaxHostTextureWidthHeight(
|
||||
xenos::DataDimension dimension) const override;
|
||||
uint32_t GetMaxHostTextureDepthOrArraySize(
|
||||
xenos::DataDimension dimension) const override;
|
||||
|
||||
std::unique_ptr<Texture> CreateTexture(TextureKey key) override;
|
||||
|
||||
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
|
||||
bool load_mips) override;
|
||||
|
||||
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
|
||||
|
||||
private:
|
||||
struct UpdateSetInfo;
|
||||
enum LoadDescriptorSetIndex {
|
||||
kLoadDescriptorSetIndexDestination,
|
||||
kLoadDescriptorSetIndexSource,
|
||||
kLoadDescriptorSetIndexConstants,
|
||||
kLoadDescriptorSetCount,
|
||||
};
|
||||
|
||||
struct HostFormat {
|
||||
LoadShaderIndex load_shader;
|
||||
// Do NOT add integer formats to this - they are not filterable, can only be
|
||||
// read with ImageFetch, not ImageSample! If any game is seen using
|
||||
// num_format 1 for fixed-point formats (for floating-point, it's normally
|
||||
// set to 1 though), add a constant buffer containing multipliers for the
|
||||
// textures and multiplication to the tfetch implementation.
|
||||
VkFormat format;
|
||||
// Whether the format is block-compressed on the host (the host block size
|
||||
// matches the guest format block size in this case), and isn't decompressed
|
||||
// on load.
|
||||
bool block_compressed;
|
||||
|
||||
// Set up dynamically based on what's supported by the device.
|
||||
bool linear_filterable;
|
||||
};
|
||||
|
||||
struct HostFormatPair {
|
||||
HostFormat format_unsigned;
|
||||
HostFormat format_signed;
|
||||
// Mapping of Xenos swizzle components to Vulkan format components.
|
||||
uint32_t swizzle;
|
||||
// Whether the unsigned and the signed formats are compatible for one image
|
||||
// and the same image data (on a portability subset device, this should also
|
||||
// take imageViewFormatReinterpretation into account).
|
||||
bool unsigned_signed_compatible;
|
||||
};
|
||||
|
||||
class VulkanTexture final : public Texture {
|
||||
public:
|
||||
enum class Usage {
|
||||
kUndefined,
|
||||
kTransferDestination,
|
||||
kGuestShaderSampled,
|
||||
kSwapSampled,
|
||||
};
|
||||
|
||||
// Takes ownership of the image and its memory.
|
||||
explicit VulkanTexture(VulkanTextureCache& texture_cache,
|
||||
const TextureKey& key, VkImage image,
|
||||
VmaAllocation allocation);
|
||||
~VulkanTexture();
|
||||
|
||||
VkImage image() const { return image_; }
|
||||
|
||||
// Doesn't transition (the caller must insert the barrier).
|
||||
Usage SetUsage(Usage new_usage) {
|
||||
Usage old_usage = usage_;
|
||||
usage_ = new_usage;
|
||||
return old_usage;
|
||||
}
|
||||
|
||||
VkImageView GetView(bool is_signed, uint32_t host_swizzle,
|
||||
bool is_array = true);
|
||||
|
||||
private:
|
||||
union ViewKey {
|
||||
uint32_t key;
|
||||
struct {
|
||||
uint32_t is_signed_separate_view : 1;
|
||||
uint32_t host_swizzle : 12;
|
||||
uint32_t is_array : 1;
|
||||
};
|
||||
|
||||
ViewKey() : key(0) { static_assert_size(*this, sizeof(key)); }
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const ViewKey& key) const {
|
||||
return std::hash<decltype(key.key)>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const ViewKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const ViewKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr VkComponentSwizzle GetComponentSwizzle(
|
||||
uint32_t texture_swizzle, uint32_t component_index) {
|
||||
xenos::XE_GPU_TEXTURE_SWIZZLE texture_component_swizzle =
|
||||
xenos::XE_GPU_TEXTURE_SWIZZLE(
|
||||
(texture_swizzle >> (3 * component_index)) & 0b111);
|
||||
if (texture_component_swizzle ==
|
||||
xenos::XE_GPU_TEXTURE_SWIZZLE(component_index)) {
|
||||
// The portability subset requires all swizzles to be IDENTITY, return
|
||||
// IDENTITY specifically, not R, G, B, A.
|
||||
return VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
}
|
||||
switch (texture_component_swizzle) {
|
||||
case xenos::XE_GPU_TEXTURE_SWIZZLE_R:
|
||||
return VK_COMPONENT_SWIZZLE_R;
|
||||
case xenos::XE_GPU_TEXTURE_SWIZZLE_G:
|
||||
return VK_COMPONENT_SWIZZLE_G;
|
||||
case xenos::XE_GPU_TEXTURE_SWIZZLE_B:
|
||||
return VK_COMPONENT_SWIZZLE_B;
|
||||
case xenos::XE_GPU_TEXTURE_SWIZZLE_A:
|
||||
return VK_COMPONENT_SWIZZLE_A;
|
||||
case xenos::XE_GPU_TEXTURE_SWIZZLE_0:
|
||||
return VK_COMPONENT_SWIZZLE_ZERO;
|
||||
case xenos::XE_GPU_TEXTURE_SWIZZLE_1:
|
||||
return VK_COMPONENT_SWIZZLE_ONE;
|
||||
default:
|
||||
// An invalid value.
|
||||
return VK_COMPONENT_SWIZZLE_IDENTITY;
|
||||
}
|
||||
}
|
||||
|
||||
VkImage image_;
|
||||
VmaAllocation allocation_;
|
||||
|
||||
Usage usage_ = Usage::kUndefined;
|
||||
|
||||
std::unordered_map<ViewKey, VkImageView, ViewKey::Hasher> views_;
|
||||
};
|
||||
|
||||
struct VulkanTextureBinding {
|
||||
VkImageView image_view_unsigned;
|
||||
VkImageView image_view_signed;
|
||||
|
||||
VulkanTextureBinding() { Reset(); }
|
||||
|
||||
void Reset() {
|
||||
image_view_unsigned = VK_NULL_HANDLE;
|
||||
image_view_signed = VK_NULL_HANDLE;
|
||||
}
|
||||
};
|
||||
|
||||
// Cached Vulkan sampler.
|
||||
struct Sampler {
|
||||
SamplerInfo sampler_info;
|
||||
VkSampler sampler;
|
||||
uint64_t last_usage_submission;
|
||||
std::pair<const SamplerParameters, Sampler>* used_previous;
|
||||
std::pair<const SamplerParameters, Sampler>* used_next;
|
||||
};
|
||||
|
||||
struct WatchedTexture {
|
||||
Texture* texture;
|
||||
bool is_mip;
|
||||
};
|
||||
static constexpr bool AreDimensionsCompatible(
|
||||
xenos::FetchOpDimension binding_dimension,
|
||||
xenos::DataDimension resource_dimension) {
|
||||
switch (binding_dimension) {
|
||||
case xenos::FetchOpDimension::k1D:
|
||||
case xenos::FetchOpDimension::k2D:
|
||||
return resource_dimension == xenos::DataDimension::k1D ||
|
||||
resource_dimension == xenos::DataDimension::k2DOrStacked;
|
||||
case xenos::FetchOpDimension::k3DOrStacked:
|
||||
return resource_dimension == xenos::DataDimension::k3D;
|
||||
case xenos::FetchOpDimension::kCube:
|
||||
return resource_dimension == xenos::DataDimension::kCube;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocates a new texture and memory to back it on the GPU.
|
||||
Texture* AllocateTexture(const TextureInfo& texture_info,
|
||||
VkFormatFeatureFlags required_flags =
|
||||
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
|
||||
bool FreeTexture(Texture* texture);
|
||||
explicit VulkanTextureCache(
|
||||
const RegisterFile& register_file, VulkanSharedMemory& shared_memory,
|
||||
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
|
||||
VulkanCommandProcessor& command_processor,
|
||||
VkPipelineStageFlags guest_shader_pipeline_stages);
|
||||
|
||||
void WatchTexture(Texture* texture);
|
||||
void TextureTouched(Texture* texture);
|
||||
std::pair<uint32_t, uint32_t> MemoryInvalidationCallback(
|
||||
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
||||
static std::pair<uint32_t, uint32_t> MemoryInvalidationCallbackThunk(
|
||||
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
||||
bool exact_range);
|
||||
bool Initialize();
|
||||
|
||||
// Demands a texture. If command_buffer is null and the texture hasn't been
|
||||
// uploaded to graphics memory already, we will return null and bail.
|
||||
Texture* Demand(const TextureInfo& texture_info,
|
||||
VkCommandBuffer command_buffer = nullptr,
|
||||
VkFence completion_fence = nullptr);
|
||||
Sampler* Demand(const SamplerInfo& sampler_info);
|
||||
const HostFormatPair& GetHostFormatPair(TextureKey key) const;
|
||||
|
||||
void FlushPendingCommands(VkCommandBuffer command_buffer,
|
||||
VkFence completion_fence);
|
||||
void GetTextureUsageMasks(VulkanTexture::Usage usage,
|
||||
VkPipelineStageFlags& stage_mask,
|
||||
VkAccessFlags& access_mask, VkImageLayout& layout);
|
||||
|
||||
bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
|
||||
uint32_t mip, const TextureInfo& src);
|
||||
xenos::ClampMode NormalizeClampMode(xenos::ClampMode clamp_mode) const;
|
||||
|
||||
static const FormatInfo* GetFormatInfo(xenos::TextureFormat format);
|
||||
static texture_conversion::CopyBlockCallback GetFormatCopyBlock(
|
||||
xenos::TextureFormat format);
|
||||
static TextureExtent GetMipExtent(const TextureInfo& src, uint32_t mip);
|
||||
static uint32_t ComputeMipStorage(const FormatInfo* format_info,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t depth, uint32_t mip);
|
||||
static uint32_t ComputeMipStorage(const TextureInfo& src, uint32_t mip);
|
||||
static uint32_t ComputeTextureStorage(const TextureInfo& src);
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
VkPipelineStageFlags guest_shader_pipeline_stages_;
|
||||
|
||||
// Writes a texture back into guest memory. This call is (mostly) asynchronous
|
||||
// but the texture must not be flagged for destruction.
|
||||
void WritebackTexture(Texture* texture);
|
||||
// Using the Vulkan Memory Allocator because texture count in games is
|
||||
// naturally pretty much unbounded, while Vulkan implementations, especially
|
||||
// on Windows versions before 10, may have an allocation count limit as low as
|
||||
// 4096.
|
||||
VmaAllocator vma_allocator_ = VK_NULL_HANDLE;
|
||||
|
||||
// Queues commands to upload a texture from system memory, applying any
|
||||
// conversions necessary. This may flush the command buffer to the GPU if we
|
||||
// run out of staging memory.
|
||||
bool UploadTexture(VkCommandBuffer command_buffer, VkFence completion_fence,
|
||||
Texture* dest, const TextureInfo& src);
|
||||
static const HostFormatPair kBestHostFormats[64];
|
||||
static const HostFormatPair kHostFormatGBGRUnaligned;
|
||||
static const HostFormatPair kHostFormatBGRGUnaligned;
|
||||
HostFormatPair host_formats_[64];
|
||||
|
||||
void HashTextureBindings(XXH3_state_t* hash_state, uint32_t& fetch_mask,
|
||||
const std::vector<Shader::TextureBinding>& bindings);
|
||||
bool SetupTextureBindings(
|
||||
VkCommandBuffer command_buffer, VkFence completion_fence,
|
||||
UpdateSetInfo* update_set_info,
|
||||
const std::vector<Shader::TextureBinding>& bindings);
|
||||
bool SetupTextureBinding(VkCommandBuffer command_buffer,
|
||||
VkFence completion_fence,
|
||||
UpdateSetInfo* update_set_info,
|
||||
const Shader::TextureBinding& binding);
|
||||
VkPipelineLayout load_pipeline_layout_ = VK_NULL_HANDLE;
|
||||
std::array<VkPipeline, kLoadShaderCount> load_pipelines_{};
|
||||
std::array<VkPipeline, kLoadShaderCount> load_pipelines_scaled_{};
|
||||
|
||||
// Removes invalidated textures from the cache, queues them for delete.
|
||||
void RemoveInvalidatedTextures();
|
||||
// If both images can be placed in the same allocation, it's one allocation,
|
||||
// otherwise it's two separate.
|
||||
std::array<VkDeviceMemory, 2> null_images_memory_{};
|
||||
VkImage null_image_2d_array_cube_ = VK_NULL_HANDLE;
|
||||
VkImage null_image_3d_ = VK_NULL_HANDLE;
|
||||
VkImageView null_image_view_2d_array_ = VK_NULL_HANDLE;
|
||||
VkImageView null_image_view_cube_ = VK_NULL_HANDLE;
|
||||
VkImageView null_image_view_3d_ = VK_NULL_HANDLE;
|
||||
bool null_images_cleared_ = false;
|
||||
|
||||
Memory* memory_ = nullptr;
|
||||
std::array<VulkanTextureBinding, xenos::kTextureFetchConstantCount>
|
||||
vulkan_texture_bindings_;
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
TraceWriter* trace_writer_ = nullptr;
|
||||
ui::vulkan::VulkanProvider& provider_;
|
||||
uint32_t sampler_max_count_;
|
||||
|
||||
std::unique_ptr<xe::ui::vulkan::CommandBufferPool> wb_command_pool_ = nullptr;
|
||||
std::unique_ptr<xe::ui::vulkan::DescriptorPool> descriptor_pool_ = nullptr;
|
||||
std::unordered_map<uint64_t, VkDescriptorSet> texture_sets_;
|
||||
VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr;
|
||||
xenos::AnisoFilter max_anisotropy_;
|
||||
|
||||
VmaAllocator mem_allocator_ = nullptr;
|
||||
|
||||
ui::vulkan::CircularBuffer staging_buffer_;
|
||||
ui::vulkan::CircularBuffer wb_staging_buffer_;
|
||||
std::unordered_map<uint64_t, Texture*> textures_;
|
||||
std::unordered_map<uint64_t, Sampler*> samplers_;
|
||||
std::list<Texture*> pending_delete_textures_;
|
||||
|
||||
void* memory_invalidation_callback_handle_ = nullptr;
|
||||
|
||||
xe::global_critical_region global_critical_region_;
|
||||
std::list<WatchedTexture> watched_textures_;
|
||||
std::unordered_set<Texture*>* invalidated_textures_;
|
||||
std::unordered_set<Texture*> invalidated_textures_sets_[2];
|
||||
|
||||
struct UpdateSetInfo {
|
||||
// Bitmap of all 32 fetch constants and whether they have been setup yet.
|
||||
// This prevents duplication across the vertex and pixel shader.
|
||||
uint32_t has_setup_fetch_mask;
|
||||
uint32_t image_write_count = 0;
|
||||
VkWriteDescriptorSet image_writes[32];
|
||||
VkDescriptorImageInfo image_infos[32];
|
||||
} update_set_info_;
|
||||
std::unordered_map<SamplerParameters, Sampler, SamplerParameters::Hasher>
|
||||
samplers_;
|
||||
std::pair<const SamplerParameters, Sampler>* sampler_used_first_ = nullptr;
|
||||
std::pair<const SamplerParameters, Sampler>* sampler_used_last_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2021 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -19,8 +19,6 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
class VulkanTraceViewer final : public TraceViewer {
|
||||
public:
|
||||
static std::unique_ptr<WindowedApp> Create(
|
||||
|
@ -35,36 +33,21 @@ class VulkanTraceViewer final : public TraceViewer {
|
|||
uintptr_t GetColorRenderTarget(
|
||||
uint32_t pitch, xenos::MsaaSamples samples, uint32_t base,
|
||||
xenos::ColorRenderTargetFormat format) override {
|
||||
auto command_processor = static_cast<VulkanCommandProcessor*>(
|
||||
graphics_system()->command_processor());
|
||||
// return command_processor->GetColorRenderTarget(pitch, samples, base,
|
||||
// format);
|
||||
// TODO(Triang3l): EDRAM viewer.
|
||||
return 0;
|
||||
}
|
||||
|
||||
uintptr_t GetDepthRenderTarget(
|
||||
uint32_t pitch, xenos::MsaaSamples samples, uint32_t base,
|
||||
xenos::DepthRenderTargetFormat format) override {
|
||||
auto command_processor = static_cast<VulkanCommandProcessor*>(
|
||||
graphics_system()->command_processor());
|
||||
// return command_processor->GetDepthRenderTarget(pitch, samples, base,
|
||||
// format);
|
||||
// TODO(Triang3l): EDRAM viewer.
|
||||
return 0;
|
||||
}
|
||||
|
||||
uintptr_t GetTextureEntry(const TextureInfo& texture_info,
|
||||
const SamplerInfo& sampler_info) override {
|
||||
auto command_processor = static_cast<VulkanCommandProcessor*>(
|
||||
graphics_system()->command_processor());
|
||||
|
||||
// auto entry_view =
|
||||
// command_processor->texture_cache()->Demand(texture_info,
|
||||
// sampler_info);
|
||||
// if (!entry_view) {
|
||||
// return 0;
|
||||
//}
|
||||
// auto texture = entry_view->texture;
|
||||
// return static_cast<uintptr_t>(texture->handle);
|
||||
// TODO(Triang3l): Textures, but from a fetch constant rather than
|
||||
// TextureInfo/SamplerInfo which are going away.
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
project_root = "../../../.."
|
||||
include(project_root.."/tools/build")
|
||||
|
||||
group("src")
|
||||
project("xenia-ui-spirv")
|
||||
uuid("2323a069-5b29-44a3-b524-f35451a81978")
|
||||
kind("StaticLib")
|
||||
language("C++")
|
||||
links({
|
||||
"glslang-spirv",
|
||||
"spirv-tools",
|
||||
"xenia-base",
|
||||
})
|
||||
defines({
|
||||
})
|
||||
includedirs({
|
||||
project_root.."/third_party/spirv-tools/external/include",
|
||||
})
|
||||
local_platform_files()
|
|
@ -1,78 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/spirv/spirv_assembler.h"
|
||||
|
||||
#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
|
||||
#include "xenia/base/logging.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
SpirvAssembler::Result::Result(spv_binary binary, spv_diagnostic diagnostic)
|
||||
: binary_(binary), diagnostic_(diagnostic) {}
|
||||
|
||||
SpirvAssembler::Result::~Result() {
|
||||
if (binary_) {
|
||||
spvBinaryDestroy(binary_);
|
||||
}
|
||||
if (diagnostic_) {
|
||||
spvDiagnosticDestroy(diagnostic_);
|
||||
}
|
||||
}
|
||||
|
||||
bool SpirvAssembler::Result::has_error() const { return !!diagnostic_; }
|
||||
|
||||
size_t SpirvAssembler::Result::error_source_line() const {
|
||||
return diagnostic_ ? diagnostic_->position.line : 0;
|
||||
}
|
||||
|
||||
size_t SpirvAssembler::Result::error_source_column() const {
|
||||
return diagnostic_ ? diagnostic_->position.column : 0;
|
||||
}
|
||||
|
||||
const char* SpirvAssembler::Result::error_string() const {
|
||||
return diagnostic_ ? diagnostic_->error : "";
|
||||
}
|
||||
|
||||
const uint32_t* SpirvAssembler::Result::words() const {
|
||||
return binary_ ? binary_->code : nullptr;
|
||||
}
|
||||
|
||||
size_t SpirvAssembler::Result::word_count() const {
|
||||
return binary_ ? binary_->wordCount : 0;
|
||||
}
|
||||
|
||||
SpirvAssembler::SpirvAssembler()
|
||||
: spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {}
|
||||
|
||||
SpirvAssembler::~SpirvAssembler() { spvContextDestroy(spv_context_); }
|
||||
|
||||
std::unique_ptr<SpirvAssembler::Result> SpirvAssembler::Assemble(
|
||||
const char* source_text, size_t source_text_length) {
|
||||
spv_binary binary = nullptr;
|
||||
spv_diagnostic diagnostic = nullptr;
|
||||
auto result_code = spvTextToBinary(spv_context_, source_text,
|
||||
source_text_length, &binary, &diagnostic);
|
||||
std::unique_ptr<Result> result(new Result(binary, diagnostic));
|
||||
if (result_code) {
|
||||
XELOGE("Failed to assemble spv: {}", result_code);
|
||||
if (result->has_error()) {
|
||||
return result;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_
|
||||
#define XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/ui/spirv/spirv_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
class SpirvAssembler {
|
||||
public:
|
||||
class Result {
|
||||
public:
|
||||
Result(spv_binary binary, spv_diagnostic diagnostic);
|
||||
~Result();
|
||||
|
||||
// True if the result has an error associated with it.
|
||||
bool has_error() const;
|
||||
// Line of the error in the provided source text.
|
||||
size_t error_source_line() const;
|
||||
// Column of the error in the provided source text.
|
||||
size_t error_source_column() const;
|
||||
// Human-readable description of the error.
|
||||
const char* error_string() const;
|
||||
|
||||
// Assembled SPIRV binary.
|
||||
// Returned pointer lifetime is tied to this Result instance.
|
||||
const uint32_t* words() const;
|
||||
// Size of the SPIRV binary, in words.
|
||||
size_t word_count() const;
|
||||
|
||||
private:
|
||||
spv_binary binary_ = nullptr;
|
||||
spv_diagnostic diagnostic_ = nullptr;
|
||||
};
|
||||
|
||||
SpirvAssembler();
|
||||
~SpirvAssembler();
|
||||
|
||||
// Assembles the given source text into a SPIRV binary.
|
||||
// The return will be nullptr if assembly fails due to a library error.
|
||||
// The return may have an error set on it if the source text is malformed.
|
||||
std::unique_ptr<Result> Assemble(const char* source_text,
|
||||
size_t source_text_length);
|
||||
std::unique_ptr<Result> Assemble(const std::string_view source_text) {
|
||||
return Assemble(source_text.data(), source_text.size());
|
||||
}
|
||||
|
||||
private:
|
||||
spv_context spv_context_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_
|
|
@ -1,82 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/spirv/spirv_disassembler.h"
|
||||
|
||||
#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
|
||||
#include "xenia/base/logging.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
SpirvDisassembler::Result::Result(spv_text text, spv_diagnostic diagnostic)
|
||||
: text_(text), diagnostic_(diagnostic) {}
|
||||
|
||||
SpirvDisassembler::Result::~Result() {
|
||||
if (text_) {
|
||||
spvTextDestroy(text_);
|
||||
}
|
||||
if (diagnostic_) {
|
||||
spvDiagnosticDestroy(diagnostic_);
|
||||
}
|
||||
}
|
||||
|
||||
bool SpirvDisassembler::Result::has_error() const { return !!diagnostic_; }
|
||||
|
||||
size_t SpirvDisassembler::Result::error_word_index() const {
|
||||
return diagnostic_ ? diagnostic_->position.index : 0;
|
||||
}
|
||||
|
||||
const char* SpirvDisassembler::Result::error_string() const {
|
||||
return diagnostic_ ? diagnostic_->error : "";
|
||||
}
|
||||
|
||||
const char* SpirvDisassembler::Result::text() const {
|
||||
return text_ ? text_->str : "";
|
||||
}
|
||||
|
||||
std::string SpirvDisassembler::Result::to_string() const {
|
||||
return text_ ? std::string(text_->str, text_->length) : "";
|
||||
}
|
||||
|
||||
void SpirvDisassembler::Result::AppendText(StringBuffer* target_buffer) const {
|
||||
if (text_) {
|
||||
target_buffer->AppendBytes(reinterpret_cast<const uint8_t*>(text_->str),
|
||||
text_->length);
|
||||
}
|
||||
}
|
||||
|
||||
SpirvDisassembler::SpirvDisassembler()
|
||||
: spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {}
|
||||
|
||||
SpirvDisassembler::~SpirvDisassembler() { spvContextDestroy(spv_context_); }
|
||||
|
||||
std::unique_ptr<SpirvDisassembler::Result> SpirvDisassembler::Disassemble(
|
||||
const uint32_t* words, size_t word_count) {
|
||||
spv_text text = nullptr;
|
||||
spv_diagnostic diagnostic = nullptr;
|
||||
auto result_code =
|
||||
spvBinaryToText(spv_context_, words, word_count,
|
||||
SPV_BINARY_TO_TEXT_OPTION_INDENT, &text, &diagnostic);
|
||||
std::unique_ptr<Result> result(new Result(text, diagnostic));
|
||||
if (result_code) {
|
||||
XELOGE("Failed to disassemble spv: {}", result_code);
|
||||
if (result->has_error()) {
|
||||
return result;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,66 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_
|
||||
#define XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/ui/spirv/spirv_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
class SpirvDisassembler {
|
||||
public:
|
||||
class Result {
|
||||
public:
|
||||
Result(spv_text text, spv_diagnostic diagnostic);
|
||||
~Result();
|
||||
|
||||
// True if the result has an error associated with it.
|
||||
bool has_error() const;
|
||||
// Index of the error in the provided binary word data.
|
||||
size_t error_word_index() const;
|
||||
// Human-readable description of the error.
|
||||
const char* error_string() const;
|
||||
|
||||
// Disassembled source text.
|
||||
// Returned pointer lifetime is tied to this Result instance.
|
||||
const char* text() const;
|
||||
// Converts the disassembled source text to a string.
|
||||
std::string to_string() const;
|
||||
// Appends the disassembled source text to the given buffer.
|
||||
void AppendText(StringBuffer* target_buffer) const;
|
||||
|
||||
private:
|
||||
spv_text text_ = nullptr;
|
||||
spv_diagnostic diagnostic_ = nullptr;
|
||||
};
|
||||
|
||||
SpirvDisassembler();
|
||||
~SpirvDisassembler();
|
||||
|
||||
// Disassembles the given SPIRV binary.
|
||||
// The return will be nullptr if disassembly fails due to a library error.
|
||||
// The return may have an error set on it if the SPIRV binary is malformed.
|
||||
std::unique_ptr<Result> Disassemble(const uint32_t* words, size_t word_count);
|
||||
|
||||
private:
|
||||
spv_context spv_context_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_
|
|
@ -1,20 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/spirv/spirv_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
//
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,36 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_SPIRV_SPIRV_UTIL_H_
|
||||
#define XENIA_UI_SPIRV_SPIRV_UTIL_H_
|
||||
|
||||
#include "third_party/spirv-headers/include/spirv/1.1/spirv.hpp11"
|
||||
#include "third_party/spirv/GLSL.std.450.hpp11"
|
||||
|
||||
// Forward declarations from SPIRV-Tools so we don't pollute /so/ much.
|
||||
struct spv_binary_t;
|
||||
typedef spv_binary_t* spv_binary;
|
||||
struct spv_context_t;
|
||||
typedef spv_context_t* spv_context;
|
||||
struct spv_diagnostic_t;
|
||||
typedef spv_diagnostic_t* spv_diagnostic;
|
||||
struct spv_text_t;
|
||||
typedef spv_text_t* spv_text;
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
//
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_SPIRV_SPIRV_UTIL_H_
|
|
@ -1,80 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/spirv/spirv_validator.h"
|
||||
|
||||
#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
|
||||
#include "xenia/base/logging.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
SpirvValidator::Result::Result(spv_text text, spv_diagnostic diagnostic)
|
||||
: text_(text), diagnostic_(diagnostic) {}
|
||||
|
||||
SpirvValidator::Result::~Result() {
|
||||
if (text_) {
|
||||
spvTextDestroy(text_);
|
||||
}
|
||||
if (diagnostic_) {
|
||||
spvDiagnosticDestroy(diagnostic_);
|
||||
}
|
||||
}
|
||||
|
||||
bool SpirvValidator::Result::has_error() const { return !!diagnostic_; }
|
||||
|
||||
size_t SpirvValidator::Result::error_word_index() const {
|
||||
return diagnostic_ ? diagnostic_->position.index : 0;
|
||||
}
|
||||
|
||||
const char* SpirvValidator::Result::error_string() const {
|
||||
return diagnostic_ ? diagnostic_->error : "";
|
||||
}
|
||||
|
||||
const char* SpirvValidator::Result::text() const {
|
||||
return text_ ? text_->str : "";
|
||||
}
|
||||
|
||||
std::string SpirvValidator::Result::to_string() const {
|
||||
return text_ ? std::string(text_->str, text_->length) : "";
|
||||
}
|
||||
|
||||
void SpirvValidator::Result::AppendText(StringBuffer* target_buffer) const {
|
||||
if (text_) {
|
||||
target_buffer->AppendBytes(reinterpret_cast<const uint8_t*>(text_->str),
|
||||
text_->length);
|
||||
}
|
||||
}
|
||||
|
||||
SpirvValidator::SpirvValidator()
|
||||
: spv_context_(spvContextCreate(SPV_ENV_UNIVERSAL_1_1)) {}
|
||||
SpirvValidator::~SpirvValidator() { spvContextDestroy(spv_context_); }
|
||||
|
||||
std::unique_ptr<SpirvValidator::Result> SpirvValidator::Validate(
|
||||
const uint32_t* words, size_t word_count) {
|
||||
spv_text text = nullptr;
|
||||
spv_diagnostic diagnostic = nullptr;
|
||||
spv_const_binary_t binary = {words, word_count};
|
||||
auto result_code = spvValidate(spv_context_, &binary, &diagnostic);
|
||||
std::unique_ptr<Result> result(new Result(text, diagnostic));
|
||||
if (result_code) {
|
||||
XELOGE("Failed to validate spv: {}", result_code);
|
||||
if (result->has_error()) {
|
||||
return result;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,66 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
|
||||
#define XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/ui/spirv/spirv_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace spirv {
|
||||
|
||||
class SpirvValidator {
|
||||
public:
|
||||
class Result {
|
||||
public:
|
||||
Result(spv_text text, spv_diagnostic diagnostic);
|
||||
~Result();
|
||||
|
||||
// True if the result has an error associated with it.
|
||||
bool has_error() const;
|
||||
// Index of the error in the provided binary word data.
|
||||
size_t error_word_index() const;
|
||||
// Human-readable description of the error.
|
||||
const char* error_string() const;
|
||||
|
||||
// Disassembled source text.
|
||||
// Returned pointer lifetime is tied to this Result instance.
|
||||
const char* text() const;
|
||||
// Converts the disassembled source text to a string.
|
||||
std::string to_string() const;
|
||||
// Appends the disassembled source text to the given buffer.
|
||||
void AppendText(StringBuffer* target_buffer) const;
|
||||
|
||||
private:
|
||||
spv_text text_ = nullptr;
|
||||
spv_diagnostic diagnostic_ = nullptr;
|
||||
};
|
||||
|
||||
SpirvValidator();
|
||||
~SpirvValidator();
|
||||
|
||||
// Validates the given SPIRV binary.
|
||||
// The return will be nullptr if validation fails due to a library error.
|
||||
// The return may have an error set on it if the SPIRV binary is malformed.
|
||||
std::unique_ptr<Result> Validate(const uint32_t* words, size_t word_count);
|
||||
|
||||
private:
|
||||
spv_context spv_context_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace spirv
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
|
|
@ -1,574 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/blitter.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
using util::CheckResult;
|
||||
|
||||
// Generated with `xb buildshaders`.
|
||||
namespace shaders {
|
||||
#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_color_ps.h"
|
||||
#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_depth_ps.h"
|
||||
#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_vs.h"
|
||||
} // namespace shaders
|
||||
|
||||
Blitter::Blitter(const VulkanProvider& provider) : provider_(provider) {}
|
||||
Blitter::~Blitter() { Shutdown(); }
|
||||
|
||||
VkResult Blitter::Initialize() {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult status = VK_SUCCESS;
|
||||
|
||||
// Shaders
|
||||
VkShaderModuleCreateInfo shader_create_info;
|
||||
std::memset(&shader_create_info, 0, sizeof(shader_create_info));
|
||||
shader_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
shader_create_info.codeSize = sizeof(shaders::blit_vs);
|
||||
shader_create_info.pCode = shaders::blit_vs;
|
||||
status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr,
|
||||
&blit_vertex_);
|
||||
CheckResult(status, "vkCreateShaderModule");
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE,
|
||||
uint64_t(blit_vertex_), "S(B): Vertex");
|
||||
|
||||
shader_create_info.codeSize = sizeof(shaders::blit_color_ps);
|
||||
shader_create_info.pCode = shaders::blit_color_ps;
|
||||
status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr,
|
||||
&blit_color_);
|
||||
CheckResult(status, "vkCreateShaderModule");
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE,
|
||||
uint64_t(blit_color_), "S(B): Color");
|
||||
|
||||
shader_create_info.codeSize = sizeof(shaders::blit_depth_ps);
|
||||
shader_create_info.pCode = shaders::blit_depth_ps;
|
||||
status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr,
|
||||
&blit_depth_);
|
||||
CheckResult(status, "vkCreateShaderModule");
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE,
|
||||
uint64_t(blit_depth_), "S(B): Depth");
|
||||
|
||||
// Create the descriptor set layout used for our texture sampler.
|
||||
// As it changes almost every draw we cache it per texture.
|
||||
VkDescriptorSetLayoutCreateInfo texture_set_layout_info;
|
||||
texture_set_layout_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||
texture_set_layout_info.pNext = nullptr;
|
||||
texture_set_layout_info.flags = 0;
|
||||
texture_set_layout_info.bindingCount = 1;
|
||||
VkDescriptorSetLayoutBinding texture_binding;
|
||||
texture_binding.binding = 0;
|
||||
texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
texture_binding.descriptorCount = 1;
|
||||
texture_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
texture_binding.pImmutableSamplers = nullptr;
|
||||
texture_set_layout_info.pBindings = &texture_binding;
|
||||
status = dfn.vkCreateDescriptorSetLayout(device, &texture_set_layout_info,
|
||||
nullptr, &descriptor_set_layout_);
|
||||
CheckResult(status, "vkCreateDescriptorSetLayout");
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Create a descriptor pool
|
||||
VkDescriptorPoolSize pool_sizes[1];
|
||||
pool_sizes[0].descriptorCount = 4096;
|
||||
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
descriptor_pool_ = std::make_unique<DescriptorPool>(
|
||||
provider_, 4096,
|
||||
std::vector<VkDescriptorPoolSize>(pool_sizes, std::end(pool_sizes)));
|
||||
|
||||
// Create the pipeline layout used for our pipeline.
|
||||
VkPipelineLayoutCreateInfo pipeline_layout_info;
|
||||
pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
||||
pipeline_layout_info.pNext = nullptr;
|
||||
pipeline_layout_info.flags = 0;
|
||||
VkDescriptorSetLayout set_layouts[] = {descriptor_set_layout_};
|
||||
pipeline_layout_info.setLayoutCount =
|
||||
static_cast<uint32_t>(xe::countof(set_layouts));
|
||||
pipeline_layout_info.pSetLayouts = set_layouts;
|
||||
VkPushConstantRange push_constant_ranges[2];
|
||||
|
||||
push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
push_constant_ranges[0].offset = 0;
|
||||
push_constant_ranges[0].size = sizeof(VtxPushConstants);
|
||||
push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
push_constant_ranges[1].offset = sizeof(VtxPushConstants);
|
||||
push_constant_ranges[1].size = sizeof(PixPushConstants);
|
||||
|
||||
pipeline_layout_info.pushConstantRangeCount =
|
||||
static_cast<uint32_t>(xe::countof(push_constant_ranges));
|
||||
pipeline_layout_info.pPushConstantRanges = push_constant_ranges;
|
||||
status = dfn.vkCreatePipelineLayout(device, &pipeline_layout_info, nullptr,
|
||||
&pipeline_layout_);
|
||||
CheckResult(status, "vkCreatePipelineLayout");
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Create two samplers.
|
||||
VkSamplerCreateInfo sampler_create_info = {
|
||||
VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
nullptr,
|
||||
0,
|
||||
VK_FILTER_NEAREST,
|
||||
VK_FILTER_NEAREST,
|
||||
VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
0.f,
|
||||
VK_FALSE,
|
||||
1.f,
|
||||
VK_FALSE,
|
||||
VK_COMPARE_OP_NEVER,
|
||||
0.f,
|
||||
0.f,
|
||||
VK_BORDER_COLOR_INT_TRANSPARENT_BLACK,
|
||||
VK_FALSE,
|
||||
};
|
||||
status = dfn.vkCreateSampler(device, &sampler_create_info, nullptr,
|
||||
&samp_nearest_);
|
||||
CheckResult(status, "vkCreateSampler");
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
sampler_create_info.minFilter = VK_FILTER_LINEAR;
|
||||
sampler_create_info.magFilter = VK_FILTER_LINEAR;
|
||||
sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
status =
|
||||
dfn.vkCreateSampler(device, &sampler_create_info, nullptr, &samp_linear_);
|
||||
CheckResult(status, "vkCreateSampler");
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void Blitter::Shutdown() {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
util::DestroyAndNullHandle(dfn.vkDestroySampler, device, samp_nearest_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroySampler, device, samp_linear_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_vertex_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_color_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_depth_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_color_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_depth_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device,
|
||||
pipeline_layout_);
|
||||
util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device,
|
||||
descriptor_set_layout_);
|
||||
for (auto& pipeline : pipelines_) {
|
||||
dfn.vkDestroyPipeline(device, pipeline.second, nullptr);
|
||||
}
|
||||
pipelines_.clear();
|
||||
|
||||
for (auto& pass : render_passes_) {
|
||||
dfn.vkDestroyRenderPass(device, pass.second, nullptr);
|
||||
}
|
||||
render_passes_.clear();
|
||||
}
|
||||
|
||||
void Blitter::Scavenge() {
|
||||
if (descriptor_pool_->has_open_batch()) {
|
||||
descriptor_pool_->EndBatch();
|
||||
}
|
||||
|
||||
descriptor_pool_->Scavenge();
|
||||
}
|
||||
|
||||
void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence,
|
||||
VkImageView src_image_view, VkRect2D src_rect,
|
||||
VkExtent2D src_extents, VkFormat dst_image_format,
|
||||
VkRect2D dst_rect, VkExtent2D dst_extents,
|
||||
VkFramebuffer dst_framebuffer, VkViewport viewport,
|
||||
VkRect2D scissor, VkFilter filter,
|
||||
bool color_or_depth, bool swap_channels) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
// Do we need a full draw, or can we cheap out with a blit command?
|
||||
bool full_draw = swap_channels || true;
|
||||
if (full_draw) {
|
||||
if (!descriptor_pool_->has_open_batch()) {
|
||||
descriptor_pool_->BeginBatch(fence);
|
||||
}
|
||||
|
||||
// Acquire a render pass.
|
||||
auto render_pass = GetRenderPass(dst_image_format, color_or_depth);
|
||||
VkRenderPassBeginInfo render_pass_info = {
|
||||
VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
nullptr,
|
||||
render_pass,
|
||||
dst_framebuffer,
|
||||
{{0, 0}, dst_extents},
|
||||
0,
|
||||
nullptr,
|
||||
};
|
||||
|
||||
dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_info,
|
||||
VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
dfn.vkCmdSetViewport(command_buffer, 0, 1, &viewport);
|
||||
dfn.vkCmdSetScissor(command_buffer, 0, 1, &scissor);
|
||||
|
||||
// Acquire a pipeline.
|
||||
auto pipeline =
|
||||
GetPipeline(render_pass, color_or_depth ? blit_color_ : blit_depth_,
|
||||
color_or_depth);
|
||||
dfn.vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
pipeline);
|
||||
|
||||
// Acquire and update a descriptor set for this image.
|
||||
auto set = descriptor_pool_->AcquireEntry(descriptor_set_layout_);
|
||||
if (!set) {
|
||||
assert_always();
|
||||
descriptor_pool_->CancelBatch();
|
||||
return;
|
||||
}
|
||||
|
||||
VkWriteDescriptorSet write;
|
||||
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
write.pNext = nullptr;
|
||||
write.dstSet = set;
|
||||
write.dstBinding = 0;
|
||||
write.dstArrayElement = 0;
|
||||
write.descriptorCount = 1;
|
||||
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
|
||||
VkDescriptorImageInfo image;
|
||||
image.sampler = filter == VK_FILTER_NEAREST ? samp_nearest_ : samp_linear_;
|
||||
image.imageView = src_image_view;
|
||||
image.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
write.pImageInfo = ℑ
|
||||
write.pBufferInfo = nullptr;
|
||||
write.pTexelBufferView = nullptr;
|
||||
dfn.vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
|
||||
|
||||
dfn.vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
pipeline_layout_, 0, 1, &set, 0, nullptr);
|
||||
|
||||
VtxPushConstants vtx_constants = {
|
||||
{
|
||||
float(src_rect.offset.x) / src_extents.width,
|
||||
float(src_rect.offset.y) / src_extents.height,
|
||||
float(src_rect.extent.width) / src_extents.width,
|
||||
float(src_rect.extent.height) / src_extents.height,
|
||||
},
|
||||
{
|
||||
float(dst_rect.offset.x) / dst_extents.width,
|
||||
float(dst_rect.offset.y) / dst_extents.height,
|
||||
float(dst_rect.extent.width) / dst_extents.width,
|
||||
float(dst_rect.extent.height) / dst_extents.height,
|
||||
},
|
||||
};
|
||||
dfn.vkCmdPushConstants(command_buffer, pipeline_layout_,
|
||||
VK_SHADER_STAGE_VERTEX_BIT, 0,
|
||||
sizeof(VtxPushConstants), &vtx_constants);
|
||||
|
||||
PixPushConstants pix_constants = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
swap_channels ? 1 : 0,
|
||||
};
|
||||
dfn.vkCmdPushConstants(
|
||||
command_buffer, pipeline_layout_, VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
sizeof(VtxPushConstants), sizeof(PixPushConstants), &pix_constants);
|
||||
|
||||
dfn.vkCmdDraw(command_buffer, 4, 1, 0, 0);
|
||||
dfn.vkCmdEndRenderPass(command_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void Blitter::CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence,
|
||||
VkImage src_image, VkImageView src_image_view,
|
||||
VkOffset2D src_offset, VkImage dst_image,
|
||||
VkImageView dst_image_view, VkExtent2D extents,
|
||||
VkFilter filter, bool swap_channels) {}
|
||||
|
||||
void Blitter::CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence,
|
||||
VkImage src_image, VkImageView src_image_view,
|
||||
VkOffset2D src_offset, VkImage dst_image,
|
||||
VkImageView dst_image_view, VkExtent2D extents) {
|
||||
}
|
||||
|
||||
VkRenderPass Blitter::GetRenderPass(VkFormat format, bool color_or_depth) {
|
||||
auto pass = render_passes_.find(format);
|
||||
if (pass != render_passes_.end()) {
|
||||
return pass->second;
|
||||
}
|
||||
|
||||
// Create and cache the render pass.
|
||||
VkRenderPass render_pass = CreateRenderPass(format, color_or_depth);
|
||||
if (render_pass) {
|
||||
render_passes_[format] = render_pass;
|
||||
}
|
||||
|
||||
return render_pass;
|
||||
}
|
||||
|
||||
VkPipeline Blitter::GetPipeline(VkRenderPass render_pass,
|
||||
VkShaderModule frag_shader,
|
||||
bool color_or_depth) {
|
||||
auto it = pipelines_.find(std::make_pair(render_pass, frag_shader));
|
||||
if (it != pipelines_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Create and cache the pipeline.
|
||||
VkPipeline pipeline =
|
||||
CreatePipeline(render_pass, frag_shader, color_or_depth);
|
||||
if (pipeline) {
|
||||
pipelines_[std::make_pair(render_pass, frag_shader)] = pipeline;
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
VkRenderPass Blitter::CreateRenderPass(VkFormat output_format,
|
||||
bool color_or_depth) {
|
||||
VkAttachmentDescription attachments[1];
|
||||
std::memset(attachments, 0, sizeof(attachments));
|
||||
|
||||
// Output attachment
|
||||
attachments[0].flags = 0;
|
||||
attachments[0].format = output_format;
|
||||
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachments[0].initialLayout =
|
||||
color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
attachments[0].finalLayout = attachments[0].initialLayout;
|
||||
|
||||
VkAttachmentReference attach_refs[1];
|
||||
attach_refs[0].attachment = 0;
|
||||
attach_refs[0].layout =
|
||||
color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
|
||||
VkSubpassDescription subpass = {
|
||||
0, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
0, nullptr,
|
||||
0, nullptr,
|
||||
nullptr, nullptr,
|
||||
0, nullptr,
|
||||
};
|
||||
|
||||
if (color_or_depth) {
|
||||
subpass.colorAttachmentCount = 1;
|
||||
subpass.pColorAttachments = attach_refs;
|
||||
} else {
|
||||
subpass.pDepthStencilAttachment = attach_refs;
|
||||
}
|
||||
|
||||
VkRenderPassCreateInfo renderpass_info = {
|
||||
VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
nullptr,
|
||||
0,
|
||||
1,
|
||||
attachments,
|
||||
1,
|
||||
&subpass,
|
||||
0,
|
||||
nullptr,
|
||||
};
|
||||
VkRenderPass renderpass = nullptr;
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult result =
|
||||
dfn.vkCreateRenderPass(device, &renderpass_info, nullptr, &renderpass);
|
||||
CheckResult(result, "vkCreateRenderPass");
|
||||
|
||||
return renderpass;
|
||||
}
|
||||
|
||||
VkPipeline Blitter::CreatePipeline(VkRenderPass render_pass,
|
||||
VkShaderModule frag_shader,
|
||||
bool color_or_depth) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
// Pipeline
|
||||
VkGraphicsPipelineCreateInfo pipeline_info;
|
||||
std::memset(&pipeline_info, 0, sizeof(VkGraphicsPipelineCreateInfo));
|
||||
pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
||||
|
||||
// Shaders
|
||||
pipeline_info.stageCount = 2;
|
||||
VkPipelineShaderStageCreateInfo stages[2];
|
||||
std::memset(stages, 0, sizeof(stages));
|
||||
stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
stages[0].module = blit_vertex_;
|
||||
stages[0].pName = "main";
|
||||
stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
stages[1].module = frag_shader;
|
||||
stages[1].pName = "main";
|
||||
|
||||
pipeline_info.pStages = stages;
|
||||
|
||||
// Vertex input
|
||||
VkPipelineVertexInputStateCreateInfo vtx_state;
|
||||
std::memset(&vtx_state, 0, sizeof(vtx_state));
|
||||
vtx_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
|
||||
vtx_state.flags = 0;
|
||||
vtx_state.vertexAttributeDescriptionCount = 0;
|
||||
vtx_state.pVertexAttributeDescriptions = nullptr;
|
||||
vtx_state.vertexBindingDescriptionCount = 0;
|
||||
vtx_state.pVertexBindingDescriptions = nullptr;
|
||||
|
||||
pipeline_info.pVertexInputState = &vtx_state;
|
||||
|
||||
// Input Assembly
|
||||
VkPipelineInputAssemblyStateCreateInfo input_info;
|
||||
input_info.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
||||
input_info.pNext = nullptr;
|
||||
input_info.flags = 0;
|
||||
input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
||||
input_info.primitiveRestartEnable = VK_FALSE;
|
||||
pipeline_info.pInputAssemblyState = &input_info;
|
||||
pipeline_info.pTessellationState = nullptr;
|
||||
VkPipelineViewportStateCreateInfo viewport_state_info;
|
||||
viewport_state_info.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
|
||||
viewport_state_info.pNext = nullptr;
|
||||
viewport_state_info.flags = 0;
|
||||
viewport_state_info.viewportCount = 1;
|
||||
viewport_state_info.pViewports = nullptr;
|
||||
viewport_state_info.scissorCount = 1;
|
||||
viewport_state_info.pScissors = nullptr;
|
||||
pipeline_info.pViewportState = &viewport_state_info;
|
||||
VkPipelineRasterizationStateCreateInfo rasterization_info;
|
||||
rasterization_info.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
||||
rasterization_info.pNext = nullptr;
|
||||
rasterization_info.flags = 0;
|
||||
rasterization_info.depthClampEnable = VK_FALSE;
|
||||
rasterization_info.rasterizerDiscardEnable = VK_FALSE;
|
||||
rasterization_info.polygonMode = VK_POLYGON_MODE_FILL;
|
||||
rasterization_info.cullMode = VK_CULL_MODE_NONE;
|
||||
rasterization_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
|
||||
rasterization_info.depthBiasEnable = VK_FALSE;
|
||||
rasterization_info.depthBiasConstantFactor = 0;
|
||||
rasterization_info.depthBiasClamp = 0;
|
||||
rasterization_info.depthBiasSlopeFactor = 0;
|
||||
rasterization_info.lineWidth = 1.0f;
|
||||
pipeline_info.pRasterizationState = &rasterization_info;
|
||||
VkPipelineMultisampleStateCreateInfo multisample_info;
|
||||
multisample_info.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
||||
multisample_info.pNext = nullptr;
|
||||
multisample_info.flags = 0;
|
||||
multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
multisample_info.sampleShadingEnable = VK_FALSE;
|
||||
multisample_info.minSampleShading = 0;
|
||||
multisample_info.pSampleMask = nullptr;
|
||||
multisample_info.alphaToCoverageEnable = VK_FALSE;
|
||||
multisample_info.alphaToOneEnable = VK_FALSE;
|
||||
pipeline_info.pMultisampleState = &multisample_info;
|
||||
VkPipelineDepthStencilStateCreateInfo depth_info = {
|
||||
VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
nullptr,
|
||||
0,
|
||||
VK_TRUE,
|
||||
VK_TRUE,
|
||||
VK_COMPARE_OP_ALWAYS,
|
||||
VK_FALSE,
|
||||
VK_FALSE,
|
||||
{},
|
||||
{},
|
||||
0.f,
|
||||
1.f,
|
||||
};
|
||||
pipeline_info.pDepthStencilState = &depth_info;
|
||||
VkPipelineColorBlendStateCreateInfo blend_info;
|
||||
blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
||||
blend_info.pNext = nullptr;
|
||||
blend_info.flags = 0;
|
||||
blend_info.logicOpEnable = VK_FALSE;
|
||||
blend_info.logicOp = VK_LOGIC_OP_NO_OP;
|
||||
|
||||
VkPipelineColorBlendAttachmentState blend_attachments[1];
|
||||
if (color_or_depth) {
|
||||
blend_attachments[0].blendEnable = VK_FALSE;
|
||||
blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
|
||||
blend_attachments[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
|
||||
blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD;
|
||||
blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
|
||||
blend_attachments[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
|
||||
blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD;
|
||||
blend_attachments[0].colorWriteMask = 0xF;
|
||||
|
||||
blend_info.attachmentCount =
|
||||
static_cast<uint32_t>(xe::countof(blend_attachments));
|
||||
blend_info.pAttachments = blend_attachments;
|
||||
} else {
|
||||
blend_info.attachmentCount = 0;
|
||||
blend_info.pAttachments = nullptr;
|
||||
}
|
||||
|
||||
std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants));
|
||||
pipeline_info.pColorBlendState = &blend_info;
|
||||
VkPipelineDynamicStateCreateInfo dynamic_state_info;
|
||||
dynamic_state_info.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
|
||||
dynamic_state_info.pNext = nullptr;
|
||||
dynamic_state_info.flags = 0;
|
||||
VkDynamicState dynamic_states[] = {
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
};
|
||||
dynamic_state_info.dynamicStateCount =
|
||||
static_cast<uint32_t>(xe::countof(dynamic_states));
|
||||
dynamic_state_info.pDynamicStates = dynamic_states;
|
||||
pipeline_info.pDynamicState = &dynamic_state_info;
|
||||
pipeline_info.layout = pipeline_layout_;
|
||||
pipeline_info.renderPass = render_pass;
|
||||
pipeline_info.subpass = 0;
|
||||
pipeline_info.basePipelineHandle = nullptr;
|
||||
pipeline_info.basePipelineIndex = -1;
|
||||
|
||||
VkPipeline pipeline = nullptr;
|
||||
result = dfn.vkCreateGraphicsPipelines(device, nullptr, 1, &pipeline_info,
|
||||
nullptr, &pipeline);
|
||||
CheckResult(result, "vkCreateGraphicsPipelines");
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,100 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_BLITTER_H_
|
||||
#define XENIA_UI_VULKAN_BLITTER_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
class DescriptorPool;
|
||||
|
||||
class Blitter {
|
||||
public:
|
||||
Blitter(const VulkanProvider& provider);
|
||||
~Blitter();
|
||||
|
||||
VkResult Initialize();
|
||||
void Scavenge();
|
||||
void Shutdown();
|
||||
|
||||
// Queues commands to blit a texture to another texture.
|
||||
//
|
||||
// src_rect is the rectangle of pixels to copy from the source
|
||||
// src_extents is the actual size of the source image
|
||||
// dst_rect is the rectangle of pixels that are replaced with the source
|
||||
// dst_extents is the actual size of the destination image
|
||||
// dst_framebuffer must only have one attachment, the target texture.
|
||||
// viewport is the viewport rect (set to {0, 0, dst_w, dst_h} if unsure)
|
||||
// scissor is the scissor rect for the dest (set to dst size if unsure)
|
||||
void BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence,
|
||||
VkImageView src_image_view, VkRect2D src_rect,
|
||||
VkExtent2D src_extents, VkFormat dst_image_format,
|
||||
VkRect2D dst_rect, VkExtent2D dst_extents,
|
||||
VkFramebuffer dst_framebuffer, VkViewport viewport,
|
||||
VkRect2D scissor, VkFilter filter, bool color_or_depth,
|
||||
bool swap_channels);
|
||||
|
||||
void CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence,
|
||||
VkImage src_image, VkImageView src_image_view,
|
||||
VkOffset2D src_offset, VkImage dst_image,
|
||||
VkImageView dst_image_view, VkExtent2D extents,
|
||||
VkFilter filter, bool swap_channels);
|
||||
void CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence,
|
||||
VkImage src_image, VkImageView src_image_view,
|
||||
VkOffset2D src_offset, VkImage dst_image,
|
||||
VkImageView dst_image_view, VkExtent2D extents);
|
||||
|
||||
// For framebuffer creation.
|
||||
VkRenderPass GetRenderPass(VkFormat format, bool color_or_depth);
|
||||
|
||||
private:
|
||||
struct VtxPushConstants {
|
||||
float src_uv[4]; // 0x00
|
||||
float dst_uv[4]; // 0x10
|
||||
};
|
||||
|
||||
struct PixPushConstants {
|
||||
int _pad[3]; // 0x20
|
||||
int swap; // 0x2C
|
||||
};
|
||||
|
||||
VkPipeline GetPipeline(VkRenderPass render_pass, VkShaderModule frag_shader,
|
||||
bool color_or_depth);
|
||||
VkRenderPass CreateRenderPass(VkFormat output_format, bool color_or_depth);
|
||||
VkPipeline CreatePipeline(VkRenderPass render_pass,
|
||||
VkShaderModule frag_shader, bool color_or_depth);
|
||||
|
||||
std::unique_ptr<DescriptorPool> descriptor_pool_ = nullptr;
|
||||
const VulkanProvider& provider_;
|
||||
VkPipeline pipeline_color_ = nullptr;
|
||||
VkPipeline pipeline_depth_ = nullptr;
|
||||
VkPipelineLayout pipeline_layout_ = nullptr;
|
||||
VkShaderModule blit_vertex_ = nullptr;
|
||||
VkShaderModule blit_color_ = nullptr;
|
||||
VkShaderModule blit_depth_ = nullptr;
|
||||
VkSampler samp_linear_ = nullptr;
|
||||
VkSampler samp_nearest_ = nullptr;
|
||||
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
|
||||
|
||||
std::map<VkFormat, VkRenderPass> render_passes_;
|
||||
std::map<std::pair<VkRenderPass, VkShaderModule>, VkPipeline> pipelines_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_BLITTER_H_
|
|
@ -1,314 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/circular_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
using util::CheckResult;
|
||||
|
||||
CircularBuffer::CircularBuffer(const VulkanProvider& provider,
|
||||
VkBufferUsageFlags usage, VkDeviceSize capacity,
|
||||
VkDeviceSize alignment)
|
||||
: provider_(provider), capacity_(capacity) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult status = VK_SUCCESS;
|
||||
|
||||
// Create our internal buffer.
|
||||
VkBufferCreateInfo buffer_info;
|
||||
buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
buffer_info.pNext = nullptr;
|
||||
buffer_info.flags = 0;
|
||||
buffer_info.size = capacity;
|
||||
buffer_info.usage = usage;
|
||||
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
buffer_info.queueFamilyIndexCount = 0;
|
||||
buffer_info.pQueueFamilyIndices = nullptr;
|
||||
status = dfn.vkCreateBuffer(device, &buffer_info, nullptr, &gpu_buffer_);
|
||||
CheckResult(status, "vkCreateBuffer");
|
||||
if (status != VK_SUCCESS) {
|
||||
assert_always();
|
||||
}
|
||||
|
||||
VkMemoryRequirements reqs;
|
||||
dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, &reqs);
|
||||
alignment_ = xe::round_up(alignment, reqs.alignment);
|
||||
}
|
||||
CircularBuffer::~CircularBuffer() { Shutdown(); }
|
||||
|
||||
VkResult CircularBuffer::Initialize(VkDeviceMemory memory,
|
||||
VkDeviceSize offset) {
|
||||
assert_true(offset % alignment_ == 0);
|
||||
gpu_memory_ = memory;
|
||||
gpu_base_ = offset;
|
||||
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult status = VK_SUCCESS;
|
||||
|
||||
// Bind the buffer to its backing memory.
|
||||
status = dfn.vkBindBufferMemory(device, gpu_buffer_, gpu_memory_, gpu_base_);
|
||||
CheckResult(status, "vkBindBufferMemory");
|
||||
if (status != VK_SUCCESS) {
|
||||
XELOGE("CircularBuffer::Initialize - Failed to bind memory!");
|
||||
Shutdown();
|
||||
return status;
|
||||
}
|
||||
|
||||
// Map the memory so we can access it.
|
||||
status = dfn.vkMapMemory(device, gpu_memory_, gpu_base_, capacity_, 0,
|
||||
reinterpret_cast<void**>(&host_base_));
|
||||
CheckResult(status, "vkMapMemory");
|
||||
if (status != VK_SUCCESS) {
|
||||
XELOGE("CircularBuffer::Initialize - Failed to map memory!");
|
||||
Shutdown();
|
||||
return status;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult CircularBuffer::Initialize() {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkResult status = VK_SUCCESS;
|
||||
|
||||
VkMemoryRequirements reqs;
|
||||
dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, &reqs);
|
||||
|
||||
// Allocate memory from the device to back the buffer.
|
||||
owns_gpu_memory_ = true;
|
||||
VkMemoryAllocateInfo memory_allocate_info;
|
||||
memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
memory_allocate_info.pNext = nullptr;
|
||||
memory_allocate_info.allocationSize = reqs.size;
|
||||
memory_allocate_info.memoryTypeIndex = ui::vulkan::util::ChooseHostMemoryType(
|
||||
provider_, reqs.memoryTypeBits, false);
|
||||
if (memory_allocate_info.memoryTypeIndex == UINT32_MAX) {
|
||||
XELOGE("CircularBuffer::Initialize - Failed to get memory type!");
|
||||
Shutdown();
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
status = dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr,
|
||||
&gpu_memory_);
|
||||
if (status != VK_SUCCESS) {
|
||||
XELOGE("CircularBuffer::Initialize - Failed to allocate memory!");
|
||||
Shutdown();
|
||||
return status;
|
||||
}
|
||||
|
||||
capacity_ = reqs.size;
|
||||
gpu_base_ = 0;
|
||||
|
||||
// Bind the buffer to its backing memory.
|
||||
status = dfn.vkBindBufferMemory(device, gpu_buffer_, gpu_memory_, gpu_base_);
|
||||
CheckResult(status, "vkBindBufferMemory");
|
||||
if (status != VK_SUCCESS) {
|
||||
XELOGE("CircularBuffer::Initialize - Failed to bind memory!");
|
||||
Shutdown();
|
||||
return status;
|
||||
}
|
||||
|
||||
// Map the memory so we can access it.
|
||||
status = dfn.vkMapMemory(device, gpu_memory_, gpu_base_, capacity_, 0,
|
||||
reinterpret_cast<void**>(&host_base_));
|
||||
CheckResult(status, "vkMapMemory");
|
||||
if (status != VK_SUCCESS) {
|
||||
XELOGE("CircularBuffer::Initialize - Failed to map memory!");
|
||||
Shutdown();
|
||||
return status;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void CircularBuffer::Shutdown() {
|
||||
Clear();
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
if (host_base_) {
|
||||
dfn.vkUnmapMemory(device, gpu_memory_);
|
||||
host_base_ = nullptr;
|
||||
}
|
||||
if (gpu_buffer_) {
|
||||
dfn.vkDestroyBuffer(device, gpu_buffer_, nullptr);
|
||||
gpu_buffer_ = nullptr;
|
||||
}
|
||||
if (gpu_memory_ && owns_gpu_memory_) {
|
||||
dfn.vkFreeMemory(device, gpu_memory_, nullptr);
|
||||
gpu_memory_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void CircularBuffer::GetBufferMemoryRequirements(VkMemoryRequirements* reqs) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, reqs);
|
||||
}
|
||||
|
||||
bool CircularBuffer::CanAcquire(VkDeviceSize length) {
|
||||
// Make sure the length is aligned.
|
||||
length = xe::round_up(length, alignment_);
|
||||
if (allocations_.empty()) {
|
||||
// Read head has caught up to write head (entire buffer available for write)
|
||||
assert_true(read_head_ == write_head_);
|
||||
return capacity_ >= length;
|
||||
} else if (write_head_ < read_head_) {
|
||||
// Write head wrapped around and is behind read head.
|
||||
// | write |---- read ----|
|
||||
return (read_head_ - write_head_) >= length;
|
||||
} else if (write_head_ > read_head_) {
|
||||
// Read head behind write head.
|
||||
// 1. Check if there's enough room from write -> capacity
|
||||
// | |---- read ----| write |
|
||||
if ((capacity_ - write_head_) >= length) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 2. Check if there's enough room from 0 -> read
|
||||
// | write |---- read ----| |
|
||||
if ((read_head_ - 0) >= length) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
CircularBuffer::Allocation* CircularBuffer::Acquire(VkDeviceSize length,
|
||||
VkFence fence) {
|
||||
VkDeviceSize aligned_length = xe::round_up(length, alignment_);
|
||||
if (!CanAcquire(aligned_length)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert_true(write_head_ % alignment_ == 0);
|
||||
if (write_head_ < read_head_) {
|
||||
// Write head behind read head.
|
||||
assert_true(read_head_ - write_head_ >= aligned_length);
|
||||
|
||||
Allocation alloc;
|
||||
alloc.host_ptr = host_base_ + write_head_;
|
||||
alloc.gpu_memory = gpu_memory_;
|
||||
alloc.offset = gpu_base_ + write_head_;
|
||||
alloc.length = length;
|
||||
alloc.aligned_length = aligned_length;
|
||||
alloc.fence = fence;
|
||||
write_head_ += aligned_length;
|
||||
allocations_.push(alloc);
|
||||
|
||||
return &allocations_.back();
|
||||
} else {
|
||||
// Write head equal to/after read head
|
||||
if (capacity_ - write_head_ >= aligned_length) {
|
||||
// Free space from write -> capacity
|
||||
Allocation alloc;
|
||||
alloc.host_ptr = host_base_ + write_head_;
|
||||
alloc.gpu_memory = gpu_memory_;
|
||||
alloc.offset = gpu_base_ + write_head_;
|
||||
alloc.length = length;
|
||||
alloc.aligned_length = aligned_length;
|
||||
alloc.fence = fence;
|
||||
write_head_ += aligned_length;
|
||||
allocations_.push(alloc);
|
||||
|
||||
return &allocations_.back();
|
||||
} else if ((read_head_ - 0) >= aligned_length) {
|
||||
// Not enough space from write -> capacity, but there is enough free space
|
||||
// from begin -> read
|
||||
Allocation alloc;
|
||||
alloc.host_ptr = host_base_ + 0;
|
||||
alloc.gpu_memory = gpu_memory_;
|
||||
alloc.offset = gpu_base_ + 0;
|
||||
alloc.length = length;
|
||||
alloc.aligned_length = aligned_length;
|
||||
alloc.fence = fence;
|
||||
write_head_ = aligned_length;
|
||||
allocations_.push(alloc);
|
||||
|
||||
return &allocations_.back();
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void CircularBuffer::Flush(Allocation* allocation) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkMappedMemoryRange range;
|
||||
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
range.pNext = nullptr;
|
||||
range.memory = gpu_memory_;
|
||||
range.offset = gpu_base_ + allocation->offset;
|
||||
range.size = allocation->length;
|
||||
dfn.vkFlushMappedMemoryRanges(device, 1, &range);
|
||||
}
|
||||
|
||||
void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
VkMappedMemoryRange range;
|
||||
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
range.pNext = nullptr;
|
||||
range.memory = gpu_memory_;
|
||||
range.offset = gpu_base_ + offset;
|
||||
range.size = length;
|
||||
dfn.vkFlushMappedMemoryRanges(device, 1, &range);
|
||||
}
|
||||
|
||||
void CircularBuffer::Clear() {
|
||||
allocations_ = std::queue<Allocation>{};
|
||||
write_head_ = read_head_ = 0;
|
||||
}
|
||||
|
||||
void CircularBuffer::Scavenge() {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
// Stash the last signalled fence
|
||||
VkFence fence = nullptr;
|
||||
while (!allocations_.empty()) {
|
||||
Allocation& alloc = allocations_.front();
|
||||
if (fence != alloc.fence &&
|
||||
dfn.vkGetFenceStatus(device, alloc.fence) != VK_SUCCESS) {
|
||||
// Don't bother freeing following allocations to ensure proper ordering.
|
||||
break;
|
||||
}
|
||||
|
||||
fence = alloc.fence;
|
||||
if (capacity_ - read_head_ < alloc.aligned_length) {
|
||||
// This allocation is stored at the beginning of the buffer.
|
||||
read_head_ = alloc.aligned_length;
|
||||
} else {
|
||||
read_head_ += alloc.aligned_length;
|
||||
}
|
||||
|
||||
allocations_.pop();
|
||||
}
|
||||
|
||||
if (allocations_.empty()) {
|
||||
// Reset R/W heads to work around fragmentation issues.
|
||||
read_head_ = write_head_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,92 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
|
||||
#define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
// A circular buffer, intended to hold (fairly) temporary memory that will be
|
||||
// released when a fence is signaled. Best used when allocations are taken
|
||||
// in-order with command buffer submission.
|
||||
//
|
||||
// Allocations loop around the buffer in circles (but are not fragmented at the
|
||||
// ends of the buffer), where trailing older allocations are freed after use.
|
||||
class CircularBuffer {
|
||||
public:
|
||||
CircularBuffer(const VulkanProvider& provider, VkBufferUsageFlags usage,
|
||||
VkDeviceSize capacity, VkDeviceSize alignment = 256);
|
||||
~CircularBuffer();
|
||||
|
||||
struct Allocation {
|
||||
void* host_ptr;
|
||||
VkDeviceMemory gpu_memory;
|
||||
VkDeviceSize offset;
|
||||
VkDeviceSize length;
|
||||
VkDeviceSize aligned_length;
|
||||
|
||||
// Allocation usage fence. This allocation will be deleted when the fence
|
||||
// becomes signaled.
|
||||
VkFence fence;
|
||||
};
|
||||
|
||||
VkResult Initialize(VkDeviceMemory memory, VkDeviceSize offset);
|
||||
VkResult Initialize();
|
||||
void Shutdown();
|
||||
|
||||
void GetBufferMemoryRequirements(VkMemoryRequirements* reqs);
|
||||
|
||||
VkDeviceSize alignment() const { return alignment_; }
|
||||
VkDeviceSize capacity() const { return capacity_; }
|
||||
VkBuffer gpu_buffer() const { return gpu_buffer_; }
|
||||
VkDeviceMemory gpu_memory() const { return gpu_memory_; }
|
||||
uint8_t* host_base() const { return host_base_; }
|
||||
|
||||
bool CanAcquire(VkDeviceSize length);
|
||||
|
||||
// Acquires space to hold memory. This allocation is only freed when the fence
|
||||
// reaches the signaled state.
|
||||
Allocation* Acquire(VkDeviceSize length, VkFence fence);
|
||||
void Flush(Allocation* allocation);
|
||||
void Flush(VkDeviceSize offset, VkDeviceSize length);
|
||||
|
||||
// Clears all allocations, regardless of whether they've been consumed or not.
|
||||
void Clear();
|
||||
|
||||
// Frees any allocations whose fences have been signaled.
|
||||
void Scavenge();
|
||||
|
||||
private:
|
||||
// All of these variables are relative to gpu_base
|
||||
VkDeviceSize capacity_ = 0;
|
||||
VkDeviceSize alignment_ = 0;
|
||||
VkDeviceSize write_head_ = 0;
|
||||
VkDeviceSize read_head_ = 0;
|
||||
|
||||
const VulkanProvider& provider_;
|
||||
bool owns_gpu_memory_ = false;
|
||||
VkBuffer gpu_buffer_ = nullptr;
|
||||
VkDeviceMemory gpu_memory_ = nullptr;
|
||||
VkDeviceSize gpu_base_ = 0;
|
||||
uint8_t* host_base_ = nullptr;
|
||||
|
||||
std::queue<Allocation> allocations_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_GL_CIRCULAR_BUFFER_H_
|
|
@ -1,142 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
using util::CheckResult;
|
||||
|
||||
CommandBufferPool::CommandBufferPool(const VulkanProvider& provider,
|
||||
uint32_t queue_family_index)
|
||||
: BaseFencedPool(provider) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
// Create the pool used for allocating buffers.
|
||||
// They are marked as transient (short-lived) and cycled frequently.
|
||||
VkCommandPoolCreateInfo cmd_pool_info;
|
||||
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
cmd_pool_info.pNext = nullptr;
|
||||
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
|
||||
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||
cmd_pool_info.queueFamilyIndex = queue_family_index;
|
||||
auto err =
|
||||
dfn.vkCreateCommandPool(device, &cmd_pool_info, nullptr, &command_pool_);
|
||||
CheckResult(err, "vkCreateCommandPool");
|
||||
|
||||
// Allocate a bunch of command buffers to start.
|
||||
constexpr uint32_t kDefaultCount = 32;
|
||||
VkCommandBufferAllocateInfo command_buffer_info;
|
||||
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
command_buffer_info.pNext = nullptr;
|
||||
command_buffer_info.commandPool = command_pool_;
|
||||
command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
command_buffer_info.commandBufferCount = kDefaultCount;
|
||||
VkCommandBuffer command_buffers[kDefaultCount];
|
||||
err = dfn.vkAllocateCommandBuffers(device, &command_buffer_info,
|
||||
command_buffers);
|
||||
CheckResult(err, "vkCreateCommandBuffer");
|
||||
for (size_t i = 0; i < xe::countof(command_buffers); ++i) {
|
||||
PushEntry(command_buffers[i], nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
CommandBufferPool::~CommandBufferPool() {
|
||||
FreeAllEntries();
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
dfn.vkDestroyCommandPool(device, command_pool_, nullptr);
|
||||
command_pool_ = nullptr;
|
||||
}
|
||||
|
||||
VkCommandBuffer CommandBufferPool::AllocateEntry(void* data) {
|
||||
// TODO(benvanik): allocate a bunch at once?
|
||||
VkCommandBufferAllocateInfo command_buffer_info;
|
||||
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
command_buffer_info.pNext = nullptr;
|
||||
command_buffer_info.commandPool = command_pool_;
|
||||
command_buffer_info.level =
|
||||
VkCommandBufferLevel(reinterpret_cast<uintptr_t>(data));
|
||||
command_buffer_info.commandBufferCount = 1;
|
||||
VkCommandBuffer command_buffer;
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
auto err = dfn.vkAllocateCommandBuffers(device, &command_buffer_info,
|
||||
&command_buffer);
|
||||
CheckResult(err, "vkCreateCommandBuffer");
|
||||
return command_buffer;
|
||||
}
|
||||
|
||||
void CommandBufferPool::FreeEntry(VkCommandBuffer handle) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
dfn.vkFreeCommandBuffers(device, command_pool_, 1, &handle);
|
||||
}
|
||||
|
||||
DescriptorPool::DescriptorPool(const VulkanProvider& provider,
|
||||
uint32_t max_count,
|
||||
std::vector<VkDescriptorPoolSize> pool_sizes)
|
||||
: BaseFencedPool(provider) {
|
||||
VkDescriptorPoolCreateInfo descriptor_pool_info;
|
||||
descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
descriptor_pool_info.pNext = nullptr;
|
||||
descriptor_pool_info.flags =
|
||||
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
|
||||
descriptor_pool_info.maxSets = max_count;
|
||||
descriptor_pool_info.poolSizeCount = uint32_t(pool_sizes.size());
|
||||
descriptor_pool_info.pPoolSizes = pool_sizes.data();
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
auto err = dfn.vkCreateDescriptorPool(device, &descriptor_pool_info, nullptr,
|
||||
&descriptor_pool_);
|
||||
CheckResult(err, "vkCreateDescriptorPool");
|
||||
}
|
||||
DescriptorPool::~DescriptorPool() {
|
||||
FreeAllEntries();
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
dfn.vkDestroyDescriptorPool(device, descriptor_pool_, nullptr);
|
||||
descriptor_pool_ = nullptr;
|
||||
}
|
||||
|
||||
VkDescriptorSet DescriptorPool::AllocateEntry(void* data) {
|
||||
VkDescriptorSetLayout layout = reinterpret_cast<VkDescriptorSetLayout>(data);
|
||||
|
||||
VkDescriptorSet descriptor_set = nullptr;
|
||||
VkDescriptorSetAllocateInfo set_alloc_info;
|
||||
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
set_alloc_info.pNext = nullptr;
|
||||
set_alloc_info.descriptorPool = descriptor_pool_;
|
||||
set_alloc_info.descriptorSetCount = 1;
|
||||
set_alloc_info.pSetLayouts = &layout;
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
auto err =
|
||||
dfn.vkAllocateDescriptorSets(device, &set_alloc_info, &descriptor_set);
|
||||
CheckResult(err, "vkAllocateDescriptorSets");
|
||||
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
void DescriptorPool::FreeEntry(VkDescriptorSet handle) {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
dfn.vkFreeDescriptorSets(device, descriptor_pool_, 1, &handle);
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,341 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_
|
||||
#define XENIA_UI_VULKAN_FENCED_POOLS_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
// Simple pool for Vulkan homogenous objects that cannot be reused while
|
||||
// in-flight.
|
||||
// It batches pooled objects into groups and uses a vkQueueSubmit fence to
|
||||
// indicate their availability. If no objects are free when one is requested
|
||||
// the caller is expected to create them.
|
||||
template <typename T, typename HANDLE>
|
||||
class BaseFencedPool {
|
||||
public:
|
||||
BaseFencedPool(const VulkanProvider& provider) : provider_(provider) {}
|
||||
|
||||
virtual ~BaseFencedPool() {
|
||||
// TODO(benvanik): wait on fence until done.
|
||||
assert_null(pending_batch_list_head_);
|
||||
|
||||
// Subclasses must call FreeAllEntries() to properly clean up things.
|
||||
assert_null(free_batch_list_head_);
|
||||
assert_null(free_entry_list_head_);
|
||||
}
|
||||
|
||||
// True if one or more batches are still pending on the GPU.
|
||||
bool has_pending() const { return pending_batch_list_head_ != nullptr; }
|
||||
// True if a batch is open.
|
||||
bool has_open_batch() const { return open_batch_ != nullptr; }
|
||||
|
||||
// Checks all pending batches for completion and scavenges their entries.
|
||||
// This should be called as frequently as reasonable.
|
||||
void Scavenge() {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
while (pending_batch_list_head_) {
|
||||
auto batch = pending_batch_list_head_;
|
||||
assert_not_null(batch->fence);
|
||||
|
||||
VkResult status = dfn.vkGetFenceStatus(device, batch->fence);
|
||||
if (status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST) {
|
||||
// Batch has completed. Reclaim.
|
||||
pending_batch_list_head_ = batch->next;
|
||||
if (batch == pending_batch_list_tail_) {
|
||||
pending_batch_list_tail_ = nullptr;
|
||||
}
|
||||
batch->next = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch;
|
||||
batch->entry_list_tail->next = free_entry_list_head_;
|
||||
free_entry_list_head_ = batch->entry_list_head;
|
||||
batch->entry_list_head = nullptr;
|
||||
batch->entry_list_tail = nullptr;
|
||||
} else {
|
||||
// Batch is still in-flight. Since batches are executed in order we know
|
||||
// no others after it could have completed, so early-exit.
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Begins a new batch.
|
||||
// All entries acquired within this batch will be marked as in-use until
|
||||
// the fence returned is signalled.
|
||||
// Pass in a fence to use an external fence. This assumes the fence has been
|
||||
// reset.
|
||||
VkFence BeginBatch(VkFence fence = nullptr) {
|
||||
assert_null(open_batch_);
|
||||
Batch* batch = nullptr;
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
if (free_batch_list_head_) {
|
||||
// Reuse a batch.
|
||||
batch = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch->next;
|
||||
batch->next = nullptr;
|
||||
|
||||
if (batch->flags & kBatchOwnsFence && !fence) {
|
||||
// Reset owned fence.
|
||||
dfn.vkResetFences(device, 1, &batch->fence);
|
||||
} else if ((batch->flags & kBatchOwnsFence) && fence) {
|
||||
// Transfer owned -> external
|
||||
dfn.vkDestroyFence(device, batch->fence, nullptr);
|
||||
batch->fence = fence;
|
||||
batch->flags &= ~kBatchOwnsFence;
|
||||
} else if (!(batch->flags & kBatchOwnsFence) && !fence) {
|
||||
// external -> owned
|
||||
VkFenceCreateInfo info;
|
||||
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
info.pNext = nullptr;
|
||||
info.flags = 0;
|
||||
VkResult res = dfn.vkCreateFence(device, &info, nullptr, &batch->fence);
|
||||
if (res != VK_SUCCESS) {
|
||||
assert_always();
|
||||
}
|
||||
|
||||
batch->flags |= kBatchOwnsFence;
|
||||
} else {
|
||||
// external -> external
|
||||
batch->fence = fence;
|
||||
}
|
||||
} else {
|
||||
// Allocate new batch.
|
||||
batch = new Batch();
|
||||
batch->next = nullptr;
|
||||
batch->flags = 0;
|
||||
|
||||
if (!fence) {
|
||||
VkFenceCreateInfo info;
|
||||
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
info.pNext = nullptr;
|
||||
info.flags = 0;
|
||||
VkResult res = dfn.vkCreateFence(device, &info, nullptr, &batch->fence);
|
||||
if (res != VK_SUCCESS) {
|
||||
assert_always();
|
||||
}
|
||||
|
||||
batch->flags |= kBatchOwnsFence;
|
||||
} else {
|
||||
batch->fence = fence;
|
||||
}
|
||||
}
|
||||
batch->entry_list_head = nullptr;
|
||||
batch->entry_list_tail = nullptr;
|
||||
open_batch_ = batch;
|
||||
|
||||
return batch->fence;
|
||||
}
|
||||
|
||||
// Cancels an open batch, and releases all entries acquired within.
|
||||
void CancelBatch() {
|
||||
assert_not_null(open_batch_);
|
||||
|
||||
auto batch = open_batch_;
|
||||
open_batch_ = nullptr;
|
||||
|
||||
// Relink the batch back into the free batch list.
|
||||
batch->next = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch;
|
||||
|
||||
// Relink entries back into free entries list.
|
||||
batch->entry_list_tail->next = free_entry_list_head_;
|
||||
free_entry_list_head_ = batch->entry_list_head;
|
||||
batch->entry_list_head = nullptr;
|
||||
batch->entry_list_tail = nullptr;
|
||||
}
|
||||
|
||||
// Ends the current batch.
|
||||
void EndBatch() {
|
||||
assert_not_null(open_batch_);
|
||||
|
||||
// Close and see if we have anything.
|
||||
auto batch = open_batch_;
|
||||
open_batch_ = nullptr;
|
||||
if (!batch->entry_list_head) {
|
||||
// Nothing to do.
|
||||
batch->next = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch;
|
||||
return;
|
||||
}
|
||||
|
||||
// Append to the end of the batch list.
|
||||
batch->next = nullptr;
|
||||
if (!pending_batch_list_head_) {
|
||||
pending_batch_list_head_ = batch;
|
||||
}
|
||||
if (pending_batch_list_tail_) {
|
||||
pending_batch_list_tail_->next = batch;
|
||||
pending_batch_list_tail_ = batch;
|
||||
} else {
|
||||
pending_batch_list_tail_ = batch;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// Attempts to acquire an entry from the pool in the current batch.
|
||||
// If none are available a new one will be allocated.
|
||||
HANDLE AcquireEntry(void* data) {
|
||||
Entry* entry = nullptr;
|
||||
if (free_entry_list_head_) {
|
||||
// Slice off an entry from the free list.
|
||||
Entry* prev = nullptr;
|
||||
Entry* cur = free_entry_list_head_;
|
||||
while (cur != nullptr) {
|
||||
if (cur->data == data) {
|
||||
if (prev) {
|
||||
prev->next = cur->next;
|
||||
} else {
|
||||
free_entry_list_head_ = cur->next;
|
||||
}
|
||||
|
||||
entry = cur;
|
||||
break;
|
||||
}
|
||||
|
||||
prev = cur;
|
||||
cur = cur->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (!entry) {
|
||||
// No entry available; allocate new.
|
||||
entry = new Entry();
|
||||
entry->data = data;
|
||||
entry->handle = static_cast<T*>(this)->AllocateEntry(data);
|
||||
if (!entry->handle) {
|
||||
delete entry;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
entry->next = nullptr;
|
||||
if (!open_batch_->entry_list_head) {
|
||||
open_batch_->entry_list_head = entry;
|
||||
}
|
||||
if (open_batch_->entry_list_tail) {
|
||||
open_batch_->entry_list_tail->next = entry;
|
||||
}
|
||||
open_batch_->entry_list_tail = entry;
|
||||
return entry->handle;
|
||||
}
|
||||
|
||||
void PushEntry(HANDLE handle, void* data) {
|
||||
auto entry = new Entry();
|
||||
entry->next = free_entry_list_head_;
|
||||
entry->data = data;
|
||||
entry->handle = handle;
|
||||
free_entry_list_head_ = entry;
|
||||
}
|
||||
|
||||
void FreeAllEntries() {
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
// Run down free lists.
|
||||
while (free_batch_list_head_) {
|
||||
auto batch = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch->next;
|
||||
|
||||
if (batch->flags & kBatchOwnsFence) {
|
||||
dfn.vkDestroyFence(device, batch->fence, nullptr);
|
||||
batch->fence = nullptr;
|
||||
}
|
||||
delete batch;
|
||||
}
|
||||
while (free_entry_list_head_) {
|
||||
auto entry = free_entry_list_head_;
|
||||
free_entry_list_head_ = entry->next;
|
||||
static_cast<T*>(this)->FreeEntry(entry->handle);
|
||||
delete entry;
|
||||
}
|
||||
}
|
||||
|
||||
const VulkanProvider& provider_;
|
||||
|
||||
private:
|
||||
struct Entry {
|
||||
Entry* next;
|
||||
void* data;
|
||||
HANDLE handle;
|
||||
};
|
||||
struct Batch {
|
||||
Batch* next;
|
||||
Entry* entry_list_head;
|
||||
Entry* entry_list_tail;
|
||||
uint32_t flags;
|
||||
VkFence fence;
|
||||
};
|
||||
|
||||
static const uint32_t kBatchOwnsFence = 1;
|
||||
|
||||
Batch* free_batch_list_head_ = nullptr;
|
||||
Entry* free_entry_list_head_ = nullptr;
|
||||
Batch* pending_batch_list_head_ = nullptr;
|
||||
Batch* pending_batch_list_tail_ = nullptr;
|
||||
Batch* open_batch_ = nullptr;
|
||||
};
|
||||
|
||||
class CommandBufferPool
|
||||
: public BaseFencedPool<CommandBufferPool, VkCommandBuffer> {
|
||||
public:
|
||||
typedef BaseFencedPool<CommandBufferPool, VkCommandBuffer> Base;
|
||||
|
||||
CommandBufferPool(const VulkanProvider& provider,
|
||||
uint32_t queue_family_index);
|
||||
~CommandBufferPool() override;
|
||||
|
||||
VkCommandBuffer AcquireEntry(
|
||||
VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
|
||||
return Base::AcquireEntry(reinterpret_cast<void*>(level));
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class BaseFencedPool<CommandBufferPool, VkCommandBuffer>;
|
||||
VkCommandBuffer AllocateEntry(void* data);
|
||||
void FreeEntry(VkCommandBuffer handle);
|
||||
|
||||
VkCommandPool command_pool_ = nullptr;
|
||||
};
|
||||
|
||||
class DescriptorPool : public BaseFencedPool<DescriptorPool, VkDescriptorSet> {
|
||||
public:
|
||||
typedef BaseFencedPool<DescriptorPool, VkDescriptorSet> Base;
|
||||
|
||||
DescriptorPool(const VulkanProvider& provider, uint32_t max_count,
|
||||
std::vector<VkDescriptorPoolSize> pool_sizes);
|
||||
~DescriptorPool() override;
|
||||
|
||||
VkDescriptorSet AcquireEntry(VkDescriptorSetLayout layout) {
|
||||
return Base::AcquireEntry(layout);
|
||||
}
|
||||
|
||||
// WARNING: Allocating sets from the vulkan pool will not be tracked!
|
||||
VkDescriptorPool descriptor_pool() { return descriptor_pool_; }
|
||||
|
||||
protected:
|
||||
friend class BaseFencedPool<DescriptorPool, VkDescriptorSet>;
|
||||
VkDescriptorSet AllocateEntry(void* data);
|
||||
void FreeEntry(VkDescriptorSet handle);
|
||||
|
||||
VkDescriptorPool descriptor_pool_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_
|
|
@ -10,32 +10,28 @@ XE_UI_VULKAN_FUNCTION(vkCmdBindDescriptorSets)
|
|||
XE_UI_VULKAN_FUNCTION(vkCmdBindIndexBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdBindPipeline)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdBindVertexBuffers)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdBlitImage)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdClearAttachments)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdClearColorImage)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdClearDepthStencilImage)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdCopyBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdCopyBufferToImage)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdCopyImageToBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdDispatch)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdDraw)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdDrawIndexed)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdEndRenderPass)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdExecuteCommands)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdFillBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdPipelineBarrier)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdPushConstants)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdResolveImage)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetBlendConstants)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBias)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBounds)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetLineWidth)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetScissor)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetStencilCompareMask)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetStencilReference)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetStencilWriteMask)
|
||||
XE_UI_VULKAN_FUNCTION(vkCmdSetViewport)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateBufferView)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateCommandPool)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateComputePipelines)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateDescriptorPool)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateDescriptorSetLayout)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateFence)
|
||||
|
@ -43,13 +39,13 @@ XE_UI_VULKAN_FUNCTION(vkCreateFramebuffer)
|
|||
XE_UI_VULKAN_FUNCTION(vkCreateGraphicsPipelines)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateImage)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateImageView)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreatePipelineCache)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreatePipelineLayout)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateRenderPass)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateSampler)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateSemaphore)
|
||||
XE_UI_VULKAN_FUNCTION(vkCreateShaderModule)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyBufferView)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyCommandPool)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorPool)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorSetLayout)
|
||||
|
@ -58,7 +54,6 @@ XE_UI_VULKAN_FUNCTION(vkDestroyFramebuffer)
|
|||
XE_UI_VULKAN_FUNCTION(vkDestroyImage)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyImageView)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyPipeline)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyPipelineCache)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyPipelineLayout)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroyRenderPass)
|
||||
XE_UI_VULKAN_FUNCTION(vkDestroySampler)
|
||||
|
@ -66,23 +61,18 @@ XE_UI_VULKAN_FUNCTION(vkDestroySemaphore)
|
|||
XE_UI_VULKAN_FUNCTION(vkDestroyShaderModule)
|
||||
XE_UI_VULKAN_FUNCTION(vkEndCommandBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkFlushMappedMemoryRanges)
|
||||
XE_UI_VULKAN_FUNCTION(vkFreeCommandBuffers)
|
||||
XE_UI_VULKAN_FUNCTION(vkFreeDescriptorSets)
|
||||
XE_UI_VULKAN_FUNCTION(vkFreeMemory)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetBufferMemoryRequirements)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetDeviceQueue)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetFenceStatus)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetImageMemoryRequirements)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetImageSubresourceLayout)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetPipelineCacheData)
|
||||
XE_UI_VULKAN_FUNCTION(vkInvalidateMappedMemoryRanges)
|
||||
XE_UI_VULKAN_FUNCTION(vkMapMemory)
|
||||
XE_UI_VULKAN_FUNCTION(vkResetCommandBuffer)
|
||||
XE_UI_VULKAN_FUNCTION(vkResetCommandPool)
|
||||
XE_UI_VULKAN_FUNCTION(vkResetDescriptorPool)
|
||||
XE_UI_VULKAN_FUNCTION(vkResetFences)
|
||||
XE_UI_VULKAN_FUNCTION(vkQueueBindSparse)
|
||||
XE_UI_VULKAN_FUNCTION(vkQueueSubmit)
|
||||
XE_UI_VULKAN_FUNCTION(vkQueueWaitIdle)
|
||||
XE_UI_VULKAN_FUNCTION(vkUnmapMemory)
|
||||
XE_UI_VULKAN_FUNCTION(vkUpdateDescriptorSets)
|
||||
XE_UI_VULKAN_FUNCTION(vkWaitForFences)
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
// VK_AMD_shader_info functions used in Xenia.
|
||||
XE_UI_VULKAN_FUNCTION(vkGetShaderInfoAMD)
|
|
@ -0,0 +1,4 @@
|
|||
// VK_KHR_bind_memory2 functions used in Xenia.
|
||||
// Promoted to Vulkan 1.1 core.
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindBufferMemory2KHR, vkBindBufferMemory2)
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindImageMemory2KHR, vkBindImageMemory2)
|
|
@ -0,0 +1,6 @@
|
|||
// VK_KHR_get_memory_requirements2 functions used in Xenia.
|
||||
// Promoted to Vulkan 1.1 core.
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetBufferMemoryRequirements2KHR,
|
||||
vkGetBufferMemoryRequirements2)
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetImageMemoryRequirements2KHR,
|
||||
vkGetImageMemoryRequirements2)
|
|
@ -0,0 +1,6 @@
|
|||
// VK_KHR_maintenance4 functions used in Xenia.
|
||||
// Promoted to Vulkan 1.3 core.
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceBufferMemoryRequirementsKHR,
|
||||
vkGetDeviceBufferMemoryRequirements)
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceImageMemoryRequirementsKHR,
|
||||
vkGetDeviceImageMemoryRequirements)
|
|
@ -6,7 +6,6 @@ XE_UI_VULKAN_FUNCTION(vkEnumeratePhysicalDevices)
|
|||
XE_UI_VULKAN_FUNCTION(vkGetDeviceProcAddr)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFeatures)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFormatProperties)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceImageFormatProperties)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceMemoryProperties)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties)
|
||||
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceQueueFamilyProperties)
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
// VK_KHR_get_physical_device_properties2 functions used in Xenia.
|
||||
// Promoted to Vulkan 1.1 core.
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceMemoryProperties2KHR,
|
||||
vkGetPhysicalDeviceMemoryProperties2)
|
||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceProperties2KHR,
|
||||
vkGetPhysicalDeviceProperties2)
|
||||
|
|
|
@ -7,10 +7,8 @@ project("xenia-ui-vulkan")
|
|||
kind("StaticLib")
|
||||
language("C++")
|
||||
links({
|
||||
"fmt",
|
||||
"xenia-base",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
})
|
||||
includedirs({
|
||||
project_root.."/third_party/Vulkan-Headers/include",
|
||||
|
@ -19,9 +17,7 @@ project("xenia-ui-vulkan")
|
|||
local_platform_files("functions")
|
||||
files({
|
||||
"../shaders/bytecode/vulkan_spirv/*.h",
|
||||
"shaders/bytecode/vulkan_spirv/*.h",
|
||||
})
|
||||
removefiles({"*_demo.cc"})
|
||||
|
||||
group("demos")
|
||||
project("xenia-ui-window-vulkan-demo")
|
||||
|
@ -33,7 +29,6 @@ project("xenia-ui-window-vulkan-demo")
|
|||
"imgui",
|
||||
"xenia-base",
|
||||
"xenia-ui",
|
||||
"xenia-ui-spirv",
|
||||
"xenia-ui-vulkan",
|
||||
})
|
||||
includedirs({
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
precision highp float;
|
||||
|
||||
layout(push_constant) uniform PushConstants {
|
||||
// normalized [x, y, w, h]
|
||||
layout(offset = 0x00) vec4 src_uv;
|
||||
layout(offset = 0x10) vec4 dst_uv;
|
||||
} push_constants;
|
||||
|
||||
layout(location = 0) out vec2 vtx_uv;
|
||||
|
||||
void main() {
|
||||
const vec2 vtx_arr[4]=vec2[4](
|
||||
vec2(0,0),
|
||||
vec2(1,0),
|
||||
vec2(0,1),
|
||||
vec2(1,1)
|
||||
);
|
||||
|
||||
vec2 vfetch_pos = vtx_arr[gl_VertexIndex];
|
||||
vec2 scaled_pos = vfetch_pos.xy * vec2(2.0, 2.0) - vec2(1.0, 1.0);
|
||||
vec4 scaled_dst_uv = push_constants.dst_uv * vec4(2.0);
|
||||
gl_Position =
|
||||
vec4(scaled_dst_uv.xy - vec2(1.0) + vfetch_pos.xy * scaled_dst_uv.zw, 0.0,
|
||||
1.0);
|
||||
|
||||
vtx_uv = vfetch_pos.xy * push_constants.src_uv.zw + push_constants.src_uv.xy;
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
precision highp float;
|
||||
|
||||
layout(push_constant) uniform PushConstants {
|
||||
layout(offset = 0x20) vec3 _pad;
|
||||
layout(offset = 0x2C) int swap;
|
||||
} push_constants;
|
||||
|
||||
layout(set = 0, binding = 0) uniform sampler2D src_texture;
|
||||
|
||||
layout(location = 0) in vec2 vtx_uv;
|
||||
layout(location = 0) out vec4 oC;
|
||||
|
||||
void main() {
|
||||
oC = texture(src_texture, vtx_uv);
|
||||
if (push_constants.swap != 0) oC = oC.bgra;
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
// NOTE: This file is compiled and embedded into the exe.
|
||||
// Use `xenia-build genspirv` and check in any changes under bin/.
|
||||
|
||||
#version 450 core
|
||||
precision highp float;
|
||||
|
||||
layout(push_constant) uniform PushConstants {
|
||||
layout(offset = 0x20) vec3 _pad;
|
||||
layout(offset = 0x2C) int swap;
|
||||
} push_constants;
|
||||
|
||||
layout(set = 0, binding = 0) uniform sampler2D src_texture;
|
||||
|
||||
layout(location = 0) in vec2 vtx_uv;
|
||||
layout(location = 0) out vec4 oC;
|
||||
|
||||
void main() {
|
||||
gl_FragDepth = texture(src_texture, vtx_uv).r;
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
DisableFormat: true
|
||||
SortIncludes: false
|
|
@ -1,99 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 24608
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %5663 "main" %4841 %5592
|
||||
OpExecutionMode %5663 OriginUpperLeft
|
||||
OpDecorate %4841 Location 0
|
||||
OpDecorate %5164 DescriptorSet 0
|
||||
OpDecorate %5164 Binding 0
|
||||
OpDecorate %5592 Location 0
|
||||
OpMemberDecorate %_struct_1019 0 Offset 32
|
||||
OpMemberDecorate %_struct_1019 1 Offset 44
|
||||
OpDecorate %_struct_1019 Block
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%4841 = OpVariable %_ptr_Output_v4float Output
|
||||
%150 = OpTypeImage %float 2D 0 0 0 1 Unknown
|
||||
%510 = OpTypeSampledImage %150
|
||||
%_ptr_UniformConstant_510 = OpTypePointer UniformConstant %510
|
||||
%5164 = OpVariable %_ptr_UniformConstant_510 UniformConstant
|
||||
%v2float = OpTypeVector %float 2
|
||||
%_ptr_Input_v2float = OpTypePointer Input %v2float
|
||||
%5592 = OpVariable %_ptr_Input_v2float Input
|
||||
%v3float = OpTypeVector %float 3
|
||||
%int = OpTypeInt 32 1
|
||||
%_struct_1019 = OpTypeStruct %v3float %int
|
||||
%_ptr_PushConstant__struct_1019 = OpTypePointer PushConstant %_struct_1019
|
||||
%3463 = OpVariable %_ptr_PushConstant__struct_1019 PushConstant
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_PushConstant_int = OpTypePointer PushConstant %int
|
||||
%int_0 = OpConstant %int 0
|
||||
%bool = OpTypeBool
|
||||
%5663 = OpFunction %void None %1282
|
||||
%24607 = OpLabel
|
||||
%21248 = OpLoad %510 %5164
|
||||
%19293 = OpLoad %v2float %5592
|
||||
%8148 = OpImageSampleImplicitLod %v4float %21248 %19293
|
||||
OpStore %4841 %8148
|
||||
%20291 = OpAccessChain %_ptr_PushConstant_int %3463 %int_1
|
||||
%11639 = OpLoad %int %20291
|
||||
%12913 = OpINotEqual %bool %11639 %int_0
|
||||
OpSelectionMerge %19578 None
|
||||
OpBranchConditional %12913 %13163 %19578
|
||||
%13163 = OpLabel
|
||||
%9669 = OpLoad %v4float %4841
|
||||
%6737 = OpVectorShuffle %v4float %9669 %9669 2 1 0 3
|
||||
OpStore %4841 %6737
|
||||
OpBranch %19578
|
||||
%19578 = OpLabel
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t blit_color_ps[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x00006020, 0x00000000, 0x00020011,
|
||||
0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
|
||||
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0007000F, 0x00000004,
|
||||
0x0000161F, 0x6E69616D, 0x00000000, 0x000012E9, 0x000015D8, 0x00030010,
|
||||
0x0000161F, 0x00000007, 0x00040047, 0x000012E9, 0x0000001E, 0x00000000,
|
||||
0x00040047, 0x0000142C, 0x00000022, 0x00000000, 0x00040047, 0x0000142C,
|
||||
0x00000021, 0x00000000, 0x00040047, 0x000015D8, 0x0000001E, 0x00000000,
|
||||
0x00050048, 0x000003FB, 0x00000000, 0x00000023, 0x00000020, 0x00050048,
|
||||
0x000003FB, 0x00000001, 0x00000023, 0x0000002C, 0x00030047, 0x000003FB,
|
||||
0x00000002, 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008,
|
||||
0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D,
|
||||
0x00000004, 0x00040020, 0x0000029A, 0x00000003, 0x0000001D, 0x0004003B,
|
||||
0x0000029A, 0x000012E9, 0x00000003, 0x00090019, 0x00000096, 0x0000000D,
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
|
||||
0x0003001B, 0x000001FE, 0x00000096, 0x00040020, 0x0000047B, 0x00000000,
|
||||
0x000001FE, 0x0004003B, 0x0000047B, 0x0000142C, 0x00000000, 0x00040017,
|
||||
0x00000013, 0x0000000D, 0x00000002, 0x00040020, 0x00000290, 0x00000001,
|
||||
0x00000013, 0x0004003B, 0x00000290, 0x000015D8, 0x00000001, 0x00040017,
|
||||
0x00000018, 0x0000000D, 0x00000003, 0x00040015, 0x0000000C, 0x00000020,
|
||||
0x00000001, 0x0004001E, 0x000003FB, 0x00000018, 0x0000000C, 0x00040020,
|
||||
0x00000678, 0x00000009, 0x000003FB, 0x0004003B, 0x00000678, 0x00000D87,
|
||||
0x00000009, 0x0004002B, 0x0000000C, 0x00000A0E, 0x00000001, 0x00040020,
|
||||
0x00000289, 0x00000009, 0x0000000C, 0x0004002B, 0x0000000C, 0x00000A0B,
|
||||
0x00000000, 0x00020014, 0x00000009, 0x00050036, 0x00000008, 0x0000161F,
|
||||
0x00000000, 0x00000502, 0x000200F8, 0x0000601F, 0x0004003D, 0x000001FE,
|
||||
0x00005300, 0x0000142C, 0x0004003D, 0x00000013, 0x00004B5D, 0x000015D8,
|
||||
0x00050057, 0x0000001D, 0x00001FD4, 0x00005300, 0x00004B5D, 0x0003003E,
|
||||
0x000012E9, 0x00001FD4, 0x00050041, 0x00000289, 0x00004F43, 0x00000D87,
|
||||
0x00000A0E, 0x0004003D, 0x0000000C, 0x00002D77, 0x00004F43, 0x000500AB,
|
||||
0x00000009, 0x00003271, 0x00002D77, 0x00000A0B, 0x000300F7, 0x00004C7A,
|
||||
0x00000000, 0x000400FA, 0x00003271, 0x0000336B, 0x00004C7A, 0x000200F8,
|
||||
0x0000336B, 0x0004003D, 0x0000001D, 0x000025C5, 0x000012E9, 0x0009004F,
|
||||
0x0000001D, 0x00001A51, 0x000025C5, 0x000025C5, 0x00000002, 0x00000001,
|
||||
0x00000000, 0x00000003, 0x0003003E, 0x000012E9, 0x00001A51, 0x000200F9,
|
||||
0x00004C7A, 0x000200F8, 0x00004C7A, 0x000100FD, 0x00010038,
|
||||
};
|
|
@ -1,70 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 24608
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %5663 "main" %gl_FragDepth %5592 %4841
|
||||
OpExecutionMode %5663 OriginUpperLeft
|
||||
OpExecutionMode %5663 DepthReplacing
|
||||
OpDecorate %gl_FragDepth BuiltIn FragDepth
|
||||
OpDecorate %5164 DescriptorSet 0
|
||||
OpDecorate %5164 Binding 0
|
||||
OpDecorate %5592 Location 0
|
||||
OpDecorate %4841 Location 0
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%_ptr_Output_float = OpTypePointer Output %float
|
||||
%gl_FragDepth = OpVariable %_ptr_Output_float Output
|
||||
%150 = OpTypeImage %float 2D 0 0 0 1 Unknown
|
||||
%510 = OpTypeSampledImage %150
|
||||
%_ptr_UniformConstant_510 = OpTypePointer UniformConstant %510
|
||||
%5164 = OpVariable %_ptr_UniformConstant_510 UniformConstant
|
||||
%v2float = OpTypeVector %float 2
|
||||
%_ptr_Input_v2float = OpTypePointer Input %v2float
|
||||
%5592 = OpVariable %_ptr_Input_v2float Input
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%4841 = OpVariable %_ptr_Output_v4float Output
|
||||
%5663 = OpFunction %void None %1282
|
||||
%24607 = OpLabel
|
||||
%21248 = OpLoad %510 %5164
|
||||
%19654 = OpLoad %v2float %5592
|
||||
%23875 = OpImageSampleImplicitLod %v4float %21248 %19654
|
||||
%15662 = OpCompositeExtract %float %23875 0
|
||||
OpStore %gl_FragDepth %15662
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t blit_depth_ps[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x00006020, 0x00000000, 0x00020011,
|
||||
0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
|
||||
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0008000F, 0x00000004,
|
||||
0x0000161F, 0x6E69616D, 0x00000000, 0x000011F3, 0x000015D8, 0x000012E9,
|
||||
0x00030010, 0x0000161F, 0x00000007, 0x00030010, 0x0000161F, 0x0000000C,
|
||||
0x00040047, 0x000011F3, 0x0000000B, 0x00000016, 0x00040047, 0x0000142C,
|
||||
0x00000022, 0x00000000, 0x00040047, 0x0000142C, 0x00000021, 0x00000000,
|
||||
0x00040047, 0x000015D8, 0x0000001E, 0x00000000, 0x00040047, 0x000012E9,
|
||||
0x0000001E, 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502,
|
||||
0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040020, 0x0000028A,
|
||||
0x00000003, 0x0000000D, 0x0004003B, 0x0000028A, 0x000011F3, 0x00000003,
|
||||
0x00090019, 0x00000096, 0x0000000D, 0x00000001, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000001, 0x00000000, 0x0003001B, 0x000001FE, 0x00000096,
|
||||
0x00040020, 0x0000047B, 0x00000000, 0x000001FE, 0x0004003B, 0x0000047B,
|
||||
0x0000142C, 0x00000000, 0x00040017, 0x00000013, 0x0000000D, 0x00000002,
|
||||
0x00040020, 0x00000290, 0x00000001, 0x00000013, 0x0004003B, 0x00000290,
|
||||
0x000015D8, 0x00000001, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004,
|
||||
0x00040020, 0x0000029A, 0x00000003, 0x0000001D, 0x0004003B, 0x0000029A,
|
||||
0x000012E9, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000,
|
||||
0x00000502, 0x000200F8, 0x0000601F, 0x0004003D, 0x000001FE, 0x00005300,
|
||||
0x0000142C, 0x0004003D, 0x00000013, 0x00004CC6, 0x000015D8, 0x00050057,
|
||||
0x0000001D, 0x00005D43, 0x00005300, 0x00004CC6, 0x00050051, 0x0000000D,
|
||||
0x00003D2E, 0x00005D43, 0x00000000, 0x0003003E, 0x000011F3, 0x00003D2E,
|
||||
0x000100FD, 0x00010038,
|
||||
};
|
|
@ -1,149 +0,0 @@
|
|||
// Generated with `xb buildshaders`.
|
||||
#if 0
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos Glslang Reference Front End; 10
|
||||
; Bound: 25137
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Vertex %5663 "main" %gl_VertexIndex %4930 %5592
|
||||
OpDecorate %gl_VertexIndex BuiltIn VertexIndex
|
||||
OpMemberDecorate %_struct_1080 0 Offset 0
|
||||
OpMemberDecorate %_struct_1080 1 Offset 16
|
||||
OpDecorate %_struct_1080 Block
|
||||
OpMemberDecorate %_struct_1589 0 BuiltIn Position
|
||||
OpMemberDecorate %_struct_1589 1 BuiltIn PointSize
|
||||
OpMemberDecorate %_struct_1589 2 BuiltIn ClipDistance
|
||||
OpMemberDecorate %_struct_1589 3 BuiltIn CullDistance
|
||||
OpDecorate %_struct_1589 Block
|
||||
OpDecorate %5592 Location 0
|
||||
%void = OpTypeVoid
|
||||
%1282 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v2float = OpTypeVector %float 2
|
||||
%_ptr_Function_v2float = OpTypePointer Function %v2float
|
||||
%uint = OpTypeInt 32 0
|
||||
%uint_4 = OpConstant %uint 4
|
||||
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
|
||||
%float_0 = OpConstant %float 0
|
||||
%1823 = OpConstantComposite %v2float %float_0 %float_0
|
||||
%float_1 = OpConstant %float 1
|
||||
%312 = OpConstantComposite %v2float %float_1 %float_0
|
||||
%889 = OpConstantComposite %v2float %float_0 %float_1
|
||||
%768 = OpConstantComposite %v2float %float_1 %float_1
|
||||
%809 = OpConstantComposite %_arr_v2float_uint_4 %1823 %312 %889 %768
|
||||
%int = OpTypeInt 32 1
|
||||
%_ptr_Input_int = OpTypePointer Input %int
|
||||
%gl_VertexIndex = OpVariable %_ptr_Input_int Input
|
||||
%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4
|
||||
%float_2 = OpConstant %float 2
|
||||
%v4float = OpTypeVector %float 4
|
||||
%_struct_1080 = OpTypeStruct %v4float %v4float
|
||||
%_ptr_PushConstant__struct_1080 = OpTypePointer PushConstant %_struct_1080
|
||||
%3463 = OpVariable %_ptr_PushConstant__struct_1080 PushConstant
|
||||
%int_1 = OpConstant %int 1
|
||||
%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float
|
||||
%2243 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
|
||||
%uint_1 = OpConstant %uint 1
|
||||
%_arr_float_uint_1 = OpTypeArray %float %uint_1
|
||||
%_struct_1589 = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
|
||||
%_ptr_Output__struct_1589 = OpTypePointer Output %_struct_1589
|
||||
%4930 = OpVariable %_ptr_Output__struct_1589 Output
|
||||
%int_0 = OpConstant %int 0
|
||||
%_ptr_Output_v4float = OpTypePointer Output %v4float
|
||||
%_ptr_Output_v2float = OpTypePointer Output %v2float
|
||||
%5592 = OpVariable %_ptr_Output_v2float Output
|
||||
%5663 = OpFunction %void None %1282
|
||||
%24953 = OpLabel
|
||||
%5238 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function
|
||||
%24173 = OpLoad %int %gl_VertexIndex
|
||||
OpStore %5238 %809
|
||||
%16679 = OpAccessChain %_ptr_Function_v2float %5238 %24173
|
||||
%7372 = OpLoad %v2float %16679
|
||||
%21446 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_1
|
||||
%10986 = OpLoad %v4float %21446
|
||||
%7772 = OpFMul %v4float %10986 %2243
|
||||
%17065 = OpVectorShuffle %v2float %7772 %7772 0 1
|
||||
%22600 = OpFSub %v2float %17065 %768
|
||||
%7156 = OpVectorShuffle %v2float %7772 %7772 2 3
|
||||
%20491 = OpFMul %v2float %7372 %7156
|
||||
%18197 = OpFAdd %v2float %22600 %20491
|
||||
%10599 = OpCompositeExtract %float %18197 0
|
||||
%13956 = OpCompositeExtract %float %18197 1
|
||||
%18260 = OpCompositeConstruct %v4float %10599 %13956 %float_0 %float_1
|
||||
%8483 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
|
||||
OpStore %8483 %18260
|
||||
%20171 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_0
|
||||
%6318 = OpLoad %v4float %20171
|
||||
%7688 = OpVectorShuffle %v2float %6318 %6318 2 3
|
||||
%18797 = OpFMul %v2float %7372 %7688
|
||||
%18691 = OpVectorShuffle %v2float %6318 %6318 0 1
|
||||
%25136 = OpFAdd %v2float %18797 %18691
|
||||
OpStore %5592 %25136
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
#endif
|
||||
|
||||
const uint32_t blit_vs[] = {
|
||||
0x07230203, 0x00010000, 0x0008000A, 0x00006231, 0x00000000, 0x00020011,
|
||||
0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
|
||||
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0008000F, 0x00000000,
|
||||
0x0000161F, 0x6E69616D, 0x00000000, 0x00001029, 0x00001342, 0x000015D8,
|
||||
0x00040047, 0x00001029, 0x0000000B, 0x0000002A, 0x00050048, 0x00000438,
|
||||
0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x00000438, 0x00000001,
|
||||
0x00000023, 0x00000010, 0x00030047, 0x00000438, 0x00000002, 0x00050048,
|
||||
0x00000635, 0x00000000, 0x0000000B, 0x00000000, 0x00050048, 0x00000635,
|
||||
0x00000001, 0x0000000B, 0x00000001, 0x00050048, 0x00000635, 0x00000002,
|
||||
0x0000000B, 0x00000003, 0x00050048, 0x00000635, 0x00000003, 0x0000000B,
|
||||
0x00000004, 0x00030047, 0x00000635, 0x00000002, 0x00040047, 0x000015D8,
|
||||
0x0000001E, 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502,
|
||||
0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x00000013,
|
||||
0x0000000D, 0x00000002, 0x00040020, 0x00000290, 0x00000007, 0x00000013,
|
||||
0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B,
|
||||
0x00000A16, 0x00000004, 0x0004001C, 0x00000276, 0x00000013, 0x00000A16,
|
||||
0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000, 0x0005002C, 0x00000013,
|
||||
0x0000071F, 0x00000A0C, 0x00000A0C, 0x0004002B, 0x0000000D, 0x0000008A,
|
||||
0x3F800000, 0x0005002C, 0x00000013, 0x00000138, 0x0000008A, 0x00000A0C,
|
||||
0x0005002C, 0x00000013, 0x00000379, 0x00000A0C, 0x0000008A, 0x0005002C,
|
||||
0x00000013, 0x00000300, 0x0000008A, 0x0000008A, 0x0007002C, 0x00000276,
|
||||
0x00000329, 0x0000071F, 0x00000138, 0x00000379, 0x00000300, 0x00040015,
|
||||
0x0000000C, 0x00000020, 0x00000001, 0x00040020, 0x00000289, 0x00000001,
|
||||
0x0000000C, 0x0004003B, 0x00000289, 0x00001029, 0x00000001, 0x00040020,
|
||||
0x000004F3, 0x00000007, 0x00000276, 0x0004002B, 0x0000000D, 0x00000018,
|
||||
0x40000000, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E,
|
||||
0x00000438, 0x0000001D, 0x0000001D, 0x00040020, 0x000006B5, 0x00000009,
|
||||
0x00000438, 0x0004003B, 0x000006B5, 0x00000D87, 0x00000009, 0x0004002B,
|
||||
0x0000000C, 0x00000A0E, 0x00000001, 0x00040020, 0x0000029A, 0x00000009,
|
||||
0x0000001D, 0x0007002C, 0x0000001D, 0x000008C3, 0x00000018, 0x00000018,
|
||||
0x00000018, 0x00000018, 0x0004002B, 0x0000000B, 0x00000A0D, 0x00000001,
|
||||
0x0004001C, 0x000002E3, 0x0000000D, 0x00000A0D, 0x0006001E, 0x00000635,
|
||||
0x0000001D, 0x0000000D, 0x000002E3, 0x000002E3, 0x00040020, 0x000008B2,
|
||||
0x00000003, 0x00000635, 0x0004003B, 0x000008B2, 0x00001342, 0x00000003,
|
||||
0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020, 0x0000029B,
|
||||
0x00000003, 0x0000001D, 0x00040020, 0x00000291, 0x00000003, 0x00000013,
|
||||
0x0004003B, 0x00000291, 0x000015D8, 0x00000003, 0x00050036, 0x00000008,
|
||||
0x0000161F, 0x00000000, 0x00000502, 0x000200F8, 0x00006179, 0x0004003B,
|
||||
0x000004F3, 0x00001476, 0x00000007, 0x0004003D, 0x0000000C, 0x00005E6D,
|
||||
0x00001029, 0x0003003E, 0x00001476, 0x00000329, 0x00050041, 0x00000290,
|
||||
0x00004127, 0x00001476, 0x00005E6D, 0x0004003D, 0x00000013, 0x00001CCC,
|
||||
0x00004127, 0x00050041, 0x0000029A, 0x000053C6, 0x00000D87, 0x00000A0E,
|
||||
0x0004003D, 0x0000001D, 0x00002AEA, 0x000053C6, 0x00050085, 0x0000001D,
|
||||
0x00001E5C, 0x00002AEA, 0x000008C3, 0x0007004F, 0x00000013, 0x000042A9,
|
||||
0x00001E5C, 0x00001E5C, 0x00000000, 0x00000001, 0x00050083, 0x00000013,
|
||||
0x00005848, 0x000042A9, 0x00000300, 0x0007004F, 0x00000013, 0x00001BF4,
|
||||
0x00001E5C, 0x00001E5C, 0x00000002, 0x00000003, 0x00050085, 0x00000013,
|
||||
0x0000500B, 0x00001CCC, 0x00001BF4, 0x00050081, 0x00000013, 0x00004715,
|
||||
0x00005848, 0x0000500B, 0x00050051, 0x0000000D, 0x00002967, 0x00004715,
|
||||
0x00000000, 0x00050051, 0x0000000D, 0x00003684, 0x00004715, 0x00000001,
|
||||
0x00070050, 0x0000001D, 0x00004754, 0x00002967, 0x00003684, 0x00000A0C,
|
||||
0x0000008A, 0x00050041, 0x0000029B, 0x00002123, 0x00001342, 0x00000A0B,
|
||||
0x0003003E, 0x00002123, 0x00004754, 0x00050041, 0x0000029A, 0x00004ECB,
|
||||
0x00000D87, 0x00000A0B, 0x0004003D, 0x0000001D, 0x000018AE, 0x00004ECB,
|
||||
0x0007004F, 0x00000013, 0x00001E08, 0x000018AE, 0x000018AE, 0x00000002,
|
||||
0x00000003, 0x00050085, 0x00000013, 0x0000496D, 0x00001CCC, 0x00001E08,
|
||||
0x0007004F, 0x00000013, 0x00004903, 0x000018AE, 0x000018AE, 0x00000000,
|
||||
0x00000001, 0x00050081, 0x00000013, 0x00006230, 0x0000496D, 0x00004903,
|
||||
0x0003003E, 0x000015D8, 0x00006230, 0x000100FD, 0x00010038,
|
||||
};
|
|
@ -0,0 +1,119 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
SingleLayoutDescriptorSetPool::SingleLayoutDescriptorSetPool(
|
||||
const VulkanProvider& provider, uint32_t pool_set_count,
|
||||
uint32_t set_layout_descriptor_counts_count,
|
||||
const VkDescriptorPoolSize* set_layout_descriptor_counts,
|
||||
VkDescriptorSetLayout set_layout)
|
||||
: provider_(provider),
|
||||
pool_set_count_(pool_set_count),
|
||||
set_layout_(set_layout) {
|
||||
assert_not_zero(pool_set_count);
|
||||
pool_descriptor_counts_.resize(set_layout_descriptor_counts_count);
|
||||
for (uint32_t i = 0; i < set_layout_descriptor_counts_count; ++i) {
|
||||
VkDescriptorPoolSize& pool_descriptor_type_count =
|
||||
pool_descriptor_counts_[i];
|
||||
const VkDescriptorPoolSize& set_layout_descriptor_type_count =
|
||||
set_layout_descriptor_counts[i];
|
||||
pool_descriptor_type_count.type = set_layout_descriptor_type_count.type;
|
||||
pool_descriptor_type_count.descriptorCount =
|
||||
set_layout_descriptor_type_count.descriptorCount * pool_set_count;
|
||||
}
|
||||
}
|
||||
|
||||
SingleLayoutDescriptorSetPool::~SingleLayoutDescriptorSetPool() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
if (current_pool_ != VK_NULL_HANDLE) {
|
||||
dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr);
|
||||
}
|
||||
for (VkDescriptorPool pool : full_pools_) {
|
||||
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
size_t SingleLayoutDescriptorSetPool::Allocate() {
|
||||
if (!descriptor_sets_free_.empty()) {
|
||||
size_t free_index = descriptor_sets_free_.back();
|
||||
descriptor_sets_free_.pop_back();
|
||||
return free_index;
|
||||
}
|
||||
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
// Two iterations so if vkAllocateDescriptorSets fails even with a non-zero
|
||||
// current_pool_sets_remaining_, another attempt will be made in a new pool.
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
if (current_pool_ != VK_NULL_HANDLE && !current_pool_sets_remaining_) {
|
||||
full_pools_.push_back(current_pool_);
|
||||
current_pool_ = VK_NULL_HANDLE;
|
||||
}
|
||||
if (current_pool_ == VK_NULL_HANDLE) {
|
||||
VkDescriptorPoolCreateInfo pool_create_info;
|
||||
pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
pool_create_info.pNext = nullptr;
|
||||
pool_create_info.flags = 0;
|
||||
pool_create_info.maxSets = pool_set_count_;
|
||||
pool_create_info.poolSizeCount = uint32_t(pool_descriptor_counts_.size());
|
||||
pool_create_info.pPoolSizes = pool_descriptor_counts_.data();
|
||||
if (dfn.vkCreateDescriptorPool(device, &pool_create_info, nullptr,
|
||||
¤t_pool_) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleLayoutDescriptorSetPool: Failed to create a descriptor "
|
||||
"pool");
|
||||
return SIZE_MAX;
|
||||
}
|
||||
current_pool_sets_remaining_ = pool_set_count_;
|
||||
}
|
||||
|
||||
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
|
||||
descriptor_set_allocate_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
descriptor_set_allocate_info.pNext = nullptr;
|
||||
descriptor_set_allocate_info.descriptorPool = current_pool_;
|
||||
descriptor_set_allocate_info.descriptorSetCount = 1;
|
||||
descriptor_set_allocate_info.pSetLayouts = &set_layout_;
|
||||
VkDescriptorSet descriptor_set;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleLayoutDescriptorSetPool: Failed to allocate a descriptor set");
|
||||
if (current_pool_sets_remaining_ >= pool_set_count_) {
|
||||
// Failed to allocate in a new pool - something completely wrong, don't
|
||||
// store empty pools as full.
|
||||
dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr);
|
||||
current_pool_ = VK_NULL_HANDLE;
|
||||
return SIZE_MAX;
|
||||
}
|
||||
full_pools_.push_back(current_pool_);
|
||||
current_pool_ = VK_NULL_HANDLE;
|
||||
}
|
||||
--current_pool_sets_remaining_;
|
||||
descriptor_sets_.push_back(descriptor_set);
|
||||
return descriptor_sets_.size() - 1;
|
||||
}
|
||||
|
||||
// Both attempts have failed.
|
||||
return SIZE_MAX;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_
|
||||
#define XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
class SingleLayoutDescriptorSetPool {
|
||||
public:
|
||||
// set_layout_descriptor_counts must contain the numbers of descriptors of
|
||||
// each type in a single set with the layout (the multiplication by the pool
|
||||
// set count will be done internally). The descriptor set layout must not be
|
||||
// destroyed until this object is also destroyed.
|
||||
SingleLayoutDescriptorSetPool(
|
||||
const VulkanProvider& provider, uint32_t pool_set_count,
|
||||
uint32_t set_layout_descriptor_counts_count,
|
||||
const VkDescriptorPoolSize* set_layout_descriptor_counts,
|
||||
VkDescriptorSetLayout set_layout);
|
||||
~SingleLayoutDescriptorSetPool();
|
||||
|
||||
// Returns SIZE_MAX in case of a failure.
|
||||
size_t Allocate();
|
||||
void Free(size_t index) {
|
||||
assert_true(index < descriptor_sets_.size());
|
||||
descriptor_sets_free_.push_back(index);
|
||||
}
|
||||
VkDescriptorSet Get(size_t index) const { return descriptor_sets_[index]; }
|
||||
|
||||
private:
|
||||
const VulkanProvider& provider_;
|
||||
uint32_t pool_set_count_;
|
||||
std::vector<VkDescriptorPoolSize> pool_descriptor_counts_;
|
||||
VkDescriptorSetLayout set_layout_;
|
||||
|
||||
std::vector<VkDescriptorPool> full_pools_;
|
||||
VkDescriptorPool current_pool_ = VK_NULL_HANDLE;
|
||||
uint32_t current_pool_sets_remaining_ = 0;
|
||||
|
||||
std::vector<VkDescriptorSet> descriptor_sets_;
|
||||
std::vector<size_t> descriptor_sets_free_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_
|
|
@ -0,0 +1,216 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h"
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
void SingleTypeDescriptorSetAllocator::Reset() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
|
||||
page_usable_latest_.pool);
|
||||
for (const std::pair<uint32_t, Page>& page_pair : pages_usable_) {
|
||||
dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr);
|
||||
}
|
||||
pages_usable_.clear();
|
||||
for (VkDescriptorPool pool : pages_full_) {
|
||||
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
|
||||
}
|
||||
pages_full_.clear();
|
||||
}
|
||||
|
||||
VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate(
|
||||
VkDescriptorSetLayout descriptor_set_layout, uint32_t descriptor_count) {
|
||||
assert_not_zero(descriptor_count);
|
||||
if (descriptor_count == 0) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
|
||||
descriptor_set_allocate_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
descriptor_set_allocate_info.pNext = nullptr;
|
||||
descriptor_set_allocate_info.descriptorSetCount = 1;
|
||||
descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout;
|
||||
VkDescriptorSet descriptor_set;
|
||||
|
||||
if (descriptor_count > descriptor_pool_size_.descriptorCount) {
|
||||
// Can't allocate in the pool, need a dedicated allocation.
|
||||
VkDescriptorPoolSize dedicated_descriptor_pool_size;
|
||||
dedicated_descriptor_pool_size.type = descriptor_pool_size_.type;
|
||||
dedicated_descriptor_pool_size.descriptorCount = descriptor_count;
|
||||
VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info;
|
||||
dedicated_descriptor_pool_create_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
dedicated_descriptor_pool_create_info.pNext = nullptr;
|
||||
dedicated_descriptor_pool_create_info.flags = 0;
|
||||
dedicated_descriptor_pool_create_info.maxSets = 1;
|
||||
dedicated_descriptor_pool_create_info.poolSizeCount = 1;
|
||||
dedicated_descriptor_pool_create_info.pPoolSizes =
|
||||
&dedicated_descriptor_pool_size;
|
||||
VkDescriptorPool dedicated_descriptor_pool;
|
||||
if (dfn.vkCreateDescriptorPool(
|
||||
device, &dedicated_descriptor_pool_create_info, nullptr,
|
||||
&dedicated_descriptor_pool) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to create a dedicated pool "
|
||||
"for {} descriptors",
|
||||
dedicated_descriptor_pool_size.descriptorCount);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors "
|
||||
"in a dedicated pool",
|
||||
descriptor_count);
|
||||
dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
pages_full_.push_back(dedicated_descriptor_pool);
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
// Try allocating from the latest page an allocation has happened from, to
|
||||
// avoid detaching from the map and re-attaching for every allocation.
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
assert_not_zero(page_usable_latest_.descriptors_remaining);
|
||||
assert_not_zero(page_usable_latest_.descriptor_sets_remaining);
|
||||
if (page_usable_latest_.descriptors_remaining >= descriptor_count) {
|
||||
descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) == VK_SUCCESS) {
|
||||
page_usable_latest_.descriptors_remaining -= descriptor_count;
|
||||
--page_usable_latest_.descriptor_sets_remaining;
|
||||
if (!page_usable_latest_.descriptors_remaining ||
|
||||
!page_usable_latest_.descriptor_sets_remaining) {
|
||||
pages_full_.push_back(page_usable_latest_.pool);
|
||||
page_usable_latest_.pool = VK_NULL_HANDLE;
|
||||
}
|
||||
return descriptor_set;
|
||||
}
|
||||
// Failed to allocate internally even though there should be enough space,
|
||||
// don't try to allocate from this pool again at all.
|
||||
pages_full_.push_back(page_usable_latest_.pool);
|
||||
page_usable_latest_.pool = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
// If allocating from the latest pool wasn't possible, pick any that has free
|
||||
// space. Prefer filling pages that have the most free space as they can more
|
||||
// likely be used for more allocations later.
|
||||
while (!pages_usable_.empty()) {
|
||||
auto page_usable_last_it = std::prev(pages_usable_.cend());
|
||||
if (page_usable_last_it->second.descriptors_remaining < descriptor_count) {
|
||||
// All other pages_usable_ entries have fewer free descriptors too (the
|
||||
// remaining count is the map key).
|
||||
break;
|
||||
}
|
||||
// Remove the page from the map unconditionally - in case of a successful
|
||||
// allocation, it will have a different number of free descriptors, thus a
|
||||
// new map key (but it will also become page_usable_latest_ instead even),
|
||||
// or will become full, and in case of a failure to allocate internally even
|
||||
// though there still should be enough space, it should never be allocated
|
||||
// from again.
|
||||
Page map_page = page_usable_last_it->second;
|
||||
pages_usable_.erase(page_usable_last_it);
|
||||
descriptor_set_allocate_info.descriptorPool = map_page.pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
pages_full_.push_back(map_page.pool);
|
||||
continue;
|
||||
}
|
||||
map_page.descriptors_remaining -= descriptor_count;
|
||||
--map_page.descriptor_sets_remaining;
|
||||
if (!map_page.descriptors_remaining ||
|
||||
!map_page.descriptor_sets_remaining) {
|
||||
pages_full_.push_back(map_page.pool);
|
||||
} else {
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
// Make the page with more free descriptors the next to allocate from.
|
||||
if (map_page.descriptors_remaining >
|
||||
page_usable_latest_.descriptors_remaining) {
|
||||
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
|
||||
page_usable_latest_);
|
||||
page_usable_latest_ = map_page;
|
||||
} else {
|
||||
pages_usable_.emplace(map_page.descriptors_remaining, map_page);
|
||||
}
|
||||
} else {
|
||||
page_usable_latest_ = map_page;
|
||||
}
|
||||
}
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
// Try allocating from a new page.
|
||||
VkDescriptorPoolCreateInfo new_descriptor_pool_create_info;
|
||||
new_descriptor_pool_create_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
new_descriptor_pool_create_info.pNext = nullptr;
|
||||
new_descriptor_pool_create_info.flags = 0;
|
||||
new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_;
|
||||
new_descriptor_pool_create_info.poolSizeCount = 1;
|
||||
new_descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size_;
|
||||
VkDescriptorPool new_descriptor_pool;
|
||||
if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info,
|
||||
nullptr, &new_descriptor_pool) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to create a pool for {} sets "
|
||||
"with {} descriptors",
|
||||
descriptor_sets_per_page_, descriptor_pool_size_.descriptorCount);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
descriptor_set_allocate_info.descriptorPool = new_descriptor_pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors",
|
||||
descriptor_count);
|
||||
dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
Page new_page;
|
||||
new_page.pool = new_descriptor_pool;
|
||||
new_page.descriptors_remaining =
|
||||
descriptor_pool_size_.descriptorCount - descriptor_count;
|
||||
new_page.descriptor_sets_remaining = descriptor_sets_per_page_ - 1;
|
||||
if (!new_page.descriptors_remaining || !new_page.descriptor_sets_remaining) {
|
||||
pages_full_.push_back(new_page.pool);
|
||||
} else {
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
// Make the page with more free descriptors the next to allocate from.
|
||||
if (new_page.descriptors_remaining >
|
||||
page_usable_latest_.descriptors_remaining) {
|
||||
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
|
||||
page_usable_latest_);
|
||||
page_usable_latest_ = new_page;
|
||||
} else {
|
||||
pages_usable_.emplace(new_page.descriptors_remaining, new_page);
|
||||
}
|
||||
} else {
|
||||
page_usable_latest_ = new_page;
|
||||
}
|
||||
}
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
||||
#define XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
// Allocates multiple descriptors of a single type in descriptor set layouts
|
||||
// consisting of descriptors of only that type. There's no way to free these
|
||||
// descriptors within the SingleTypeDescriptorSetAllocator, per-layout free
|
||||
// lists should be used externally.
|
||||
class SingleTypeDescriptorSetAllocator {
|
||||
public:
|
||||
explicit SingleTypeDescriptorSetAllocator(
|
||||
const ui::vulkan::VulkanProvider& provider,
|
||||
VkDescriptorType descriptor_type, uint32_t descriptors_per_page,
|
||||
uint32_t descriptor_sets_per_page)
|
||||
: provider_(provider),
|
||||
descriptor_sets_per_page_(descriptor_sets_per_page) {
|
||||
assert_not_zero(descriptor_sets_per_page_);
|
||||
descriptor_pool_size_.type = descriptor_type;
|
||||
// Not allocating sets with 0 descriptors using the allocator - pointless to
|
||||
// have the descriptor count below the set count.
|
||||
descriptor_pool_size_.descriptorCount =
|
||||
std::max(descriptors_per_page, descriptor_sets_per_page);
|
||||
}
|
||||
SingleTypeDescriptorSetAllocator(
|
||||
const SingleTypeDescriptorSetAllocator& allocator) = delete;
|
||||
SingleTypeDescriptorSetAllocator& operator=(
|
||||
const SingleTypeDescriptorSetAllocator& allocator) = delete;
|
||||
~SingleTypeDescriptorSetAllocator() { Reset(); }
|
||||
|
||||
void Reset();
|
||||
|
||||
VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout,
|
||||
uint32_t descriptor_count);
|
||||
|
||||
private:
|
||||
struct Page {
|
||||
VkDescriptorPool pool;
|
||||
uint32_t descriptors_remaining;
|
||||
uint32_t descriptor_sets_remaining;
|
||||
};
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
|
||||
VkDescriptorPoolSize descriptor_pool_size_;
|
||||
uint32_t descriptor_sets_per_page_;
|
||||
|
||||
std::vector<VkDescriptorPool> pages_full_;
|
||||
// Because allocations must be contiguous, overflow may happen even if a page
|
||||
// still has free descriptors, so multiple pages may have free space.
|
||||
// To avoid removing and re-adding the page to the map that keeps them sorted
|
||||
// (the key is the number of free descriptors remaining, and it changes at
|
||||
// every allocation from a page), instead of always looking for a free space
|
||||
// in the map, maintaining one page outside the map, and allocation attempts
|
||||
// will be made from that page first.
|
||||
std::multimap<uint32_t, Page> pages_usable_;
|
||||
// Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE.
|
||||
Page page_usable_latest_ = {};
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
|
@ -0,0 +1,123 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/spirv_tools_context.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
#if XE_PLATFORM_LINUX
|
||||
#include <dlfcn.h>
|
||||
#elif XE_PLATFORM_WIN32
|
||||
#include "xenia/base/platform_win.h"
|
||||
#endif
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
bool SpirvToolsContext::Initialize(unsigned int spirv_version) {
|
||||
const char* vulkan_sdk_env = std::getenv("VULKAN_SDK");
|
||||
if (!vulkan_sdk_env) {
|
||||
XELOGE("SPIRV-Tools: Failed to get the VULKAN_SDK environment variable");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
std::filesystem::path vulkan_sdk_path(vulkan_sdk_env);
|
||||
#if XE_PLATFORM_LINUX
|
||||
library_ = dlopen((vulkan_sdk_path / "bin/libSPIRV-Tools-shared.so").c_str(),
|
||||
RTLD_NOW | RTLD_LOCAL);
|
||||
if (!library_) {
|
||||
XELOGE(
|
||||
"SPIRV-Tools: Failed to load $VULKAN_SDK/bin/libSPIRV-Tools-shared.so");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
#elif XE_PLATFORM_WIN32
|
||||
library_ = LoadLibraryW(
|
||||
(vulkan_sdk_path / "Bin/SPIRV-Tools-shared.dll").wstring().c_str());
|
||||
if (!library_) {
|
||||
XELOGE(
|
||||
"SPIRV-Tools: Failed to load %VULKAN_SDK%/Bin/SPIRV-Tools-shared.dll");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
#error No SPIRV-Tools library loading provided for the target platform.
|
||||
#endif
|
||||
if (!LoadLibraryFunction(fn_spvContextCreate_, "spvContextCreate") ||
|
||||
!LoadLibraryFunction(fn_spvContextDestroy_, "spvContextDestroy") ||
|
||||
!LoadLibraryFunction(fn_spvValidateBinary_, "spvValidateBinary") ||
|
||||
!LoadLibraryFunction(fn_spvDiagnosticDestroy_, "spvDiagnosticDestroy")) {
|
||||
XELOGE("SPIRV-Tools: Failed to get library function pointers");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
spv_target_env target_env;
|
||||
if (spirv_version >= 0x10500) {
|
||||
target_env = SPV_ENV_VULKAN_1_2;
|
||||
} else if (spirv_version >= 0x10400) {
|
||||
target_env = SPV_ENV_VULKAN_1_1_SPIRV_1_4;
|
||||
} else if (spirv_version >= 0x10300) {
|
||||
target_env = SPV_ENV_VULKAN_1_1;
|
||||
} else {
|
||||
target_env = SPV_ENV_VULKAN_1_0;
|
||||
}
|
||||
context_ = fn_spvContextCreate_(target_env);
|
||||
if (!context_) {
|
||||
XELOGE("SPIRV-Tools: Failed to create a Vulkan 1.0 context");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SpirvToolsContext::Shutdown() {
|
||||
if (context_) {
|
||||
fn_spvContextDestroy_(context_);
|
||||
context_ = nullptr;
|
||||
}
|
||||
if (library_) {
|
||||
#if XE_PLATFORM_LINUX
|
||||
dlclose(library_);
|
||||
#elif XE_PLATFORM_WIN32
|
||||
FreeLibrary(library_);
|
||||
#endif
|
||||
library_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
spv_result_t SpirvToolsContext::Validate(const uint32_t* words,
|
||||
size_t num_words,
|
||||
std::string* error) const {
|
||||
if (error) {
|
||||
error->clear();
|
||||
}
|
||||
if (!context_) {
|
||||
return SPV_UNSUPPORTED;
|
||||
}
|
||||
spv_diagnostic diagnostic = nullptr;
|
||||
spv_result_t result =
|
||||
fn_spvValidateBinary_(context_, words, num_words, &diagnostic);
|
||||
if (diagnostic) {
|
||||
if (error && diagnostic && diagnostic->error) {
|
||||
*error = diagnostic->error;
|
||||
}
|
||||
fn_spvDiagnosticDestroy_(diagnostic);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_
|
||||
#define XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "third_party/SPIRV-Tools/include/spirv-tools/libspirv.h"
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
#if XE_PLATFORM_LINUX
|
||||
#include <dlfcn.h>
|
||||
#elif XE_PLATFORM_WIN32
|
||||
#include "xenia/base/platform_win.h"
|
||||
#endif
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
class SpirvToolsContext {
|
||||
public:
|
||||
SpirvToolsContext() {}
|
||||
SpirvToolsContext(const SpirvToolsContext& context) = delete;
|
||||
SpirvToolsContext& operator=(const SpirvToolsContext& context) = delete;
|
||||
~SpirvToolsContext() { Shutdown(); }
|
||||
bool Initialize(unsigned int spirv_version);
|
||||
void Shutdown();
|
||||
|
||||
spv_result_t Validate(const uint32_t* words, size_t num_words,
|
||||
std::string* error) const;
|
||||
|
||||
private:
|
||||
#if XE_PLATFORM_LINUX
|
||||
void* library_ = nullptr;
|
||||
#elif XE_PLATFORM_WIN32
|
||||
HMODULE library_ = nullptr;
|
||||
#endif
|
||||
|
||||
template <typename FunctionPointer>
|
||||
bool LoadLibraryFunction(FunctionPointer& function, const char* name) {
|
||||
#if XE_PLATFORM_LINUX
|
||||
function = reinterpret_cast<FunctionPointer>(dlsym(library_, name));
|
||||
#elif XE_PLATFORM_WIN32
|
||||
function =
|
||||
reinterpret_cast<FunctionPointer>(GetProcAddress(library_, name));
|
||||
#else
|
||||
#error No SPIRV-Tools LoadLibraryFunction provided for the target platform.
|
||||
#endif
|
||||
return function != nullptr;
|
||||
}
|
||||
decltype(&spvContextCreate) fn_spvContextCreate_ = nullptr;
|
||||
decltype(&spvContextDestroy) fn_spvContextDestroy_ = nullptr;
|
||||
decltype(&spvValidateBinary) fn_spvValidateBinary_ = nullptr;
|
||||
decltype(&spvDiagnosticDestroy) fn_spvDiagnosticDestroy_ = nullptr;
|
||||
|
||||
spv_context context_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
// Implementing VMA in this translation unit.
|
||||
#define VMA_IMPLEMENTATION
|
||||
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
VmaAllocator CreateVmaAllocator(const VulkanProvider& provider,
|
||||
bool externally_synchronized) {
|
||||
const VulkanProvider::LibraryFunctions& lfn = provider.lfn();
|
||||
const VulkanProvider::InstanceFunctions& ifn = provider.ifn();
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
const VulkanProvider::InstanceExtensions& instance_extensions =
|
||||
provider.instance_extensions();
|
||||
const VulkanProvider::DeviceExtensions& device_extensions =
|
||||
provider.device_extensions();
|
||||
|
||||
VmaVulkanFunctions vma_vulkan_functions = {};
|
||||
VmaAllocatorCreateInfo allocator_create_info = {};
|
||||
|
||||
vma_vulkan_functions.vkGetInstanceProcAddr = lfn.vkGetInstanceProcAddr;
|
||||
vma_vulkan_functions.vkGetDeviceProcAddr = ifn.vkGetDeviceProcAddr;
|
||||
vma_vulkan_functions.vkGetPhysicalDeviceProperties =
|
||||
ifn.vkGetPhysicalDeviceProperties;
|
||||
vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties =
|
||||
ifn.vkGetPhysicalDeviceMemoryProperties;
|
||||
vma_vulkan_functions.vkAllocateMemory = dfn.vkAllocateMemory;
|
||||
vma_vulkan_functions.vkFreeMemory = dfn.vkFreeMemory;
|
||||
vma_vulkan_functions.vkMapMemory = dfn.vkMapMemory;
|
||||
vma_vulkan_functions.vkUnmapMemory = dfn.vkUnmapMemory;
|
||||
vma_vulkan_functions.vkFlushMappedMemoryRanges =
|
||||
dfn.vkFlushMappedMemoryRanges;
|
||||
vma_vulkan_functions.vkInvalidateMappedMemoryRanges =
|
||||
dfn.vkInvalidateMappedMemoryRanges;
|
||||
vma_vulkan_functions.vkBindBufferMemory = dfn.vkBindBufferMemory;
|
||||
vma_vulkan_functions.vkBindImageMemory = dfn.vkBindImageMemory;
|
||||
vma_vulkan_functions.vkGetBufferMemoryRequirements =
|
||||
dfn.vkGetBufferMemoryRequirements;
|
||||
vma_vulkan_functions.vkGetImageMemoryRequirements =
|
||||
dfn.vkGetImageMemoryRequirements;
|
||||
vma_vulkan_functions.vkCreateBuffer = dfn.vkCreateBuffer;
|
||||
vma_vulkan_functions.vkDestroyBuffer = dfn.vkDestroyBuffer;
|
||||
vma_vulkan_functions.vkCreateImage = dfn.vkCreateImage;
|
||||
vma_vulkan_functions.vkDestroyImage = dfn.vkDestroyImage;
|
||||
vma_vulkan_functions.vkCmdCopyBuffer = dfn.vkCmdCopyBuffer;
|
||||
if (device_extensions.khr_get_memory_requirements2) {
|
||||
vma_vulkan_functions.vkGetBufferMemoryRequirements2KHR =
|
||||
dfn.vkGetBufferMemoryRequirements2KHR;
|
||||
vma_vulkan_functions.vkGetImageMemoryRequirements2KHR =
|
||||
dfn.vkGetImageMemoryRequirements2KHR;
|
||||
if (device_extensions.khr_dedicated_allocation) {
|
||||
allocator_create_info.flags |=
|
||||
VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
|
||||
}
|
||||
}
|
||||
if (device_extensions.khr_bind_memory2) {
|
||||
vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2KHR;
|
||||
vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2KHR;
|
||||
allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT;
|
||||
}
|
||||
if (instance_extensions.khr_get_physical_device_properties2) {
|
||||
vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties2KHR =
|
||||
ifn.vkGetPhysicalDeviceMemoryProperties2KHR;
|
||||
if (device_extensions.ext_memory_budget) {
|
||||
allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
|
||||
}
|
||||
}
|
||||
if (device_extensions.khr_maintenance4) {
|
||||
vma_vulkan_functions.vkGetDeviceImageMemoryRequirements =
|
||||
dfn.vkGetDeviceImageMemoryRequirementsKHR;
|
||||
}
|
||||
|
||||
if (externally_synchronized) {
|
||||
allocator_create_info.flags |=
|
||||
VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
|
||||
}
|
||||
allocator_create_info.physicalDevice = provider.physical_device();
|
||||
allocator_create_info.device = provider.device();
|
||||
allocator_create_info.pVulkanFunctions = &vma_vulkan_functions;
|
||||
allocator_create_info.instance = provider.instance();
|
||||
allocator_create_info.vulkanApiVersion =
|
||||
provider.device_properties().apiVersion;
|
||||
VmaAllocator allocator;
|
||||
if (vmaCreateAllocator(&allocator_create_info, &allocator) != VK_SUCCESS) {
|
||||
XELOGE("Failed to create a Vulkan Memory Allocator instance");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
return allocator;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -29,33 +29,8 @@ namespace xe {
|
|||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
inline void FillVMAVulkanFunctions(VmaVulkanFunctions* vma_funcs,
|
||||
const VulkanProvider& provider) {
|
||||
const VulkanProvider::LibraryFunctions& lfn = provider.lfn();
|
||||
const VulkanProvider::InstanceFunctions& ifn = provider.ifn();
|
||||
const VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
vma_funcs->vkGetInstanceProcAddr = lfn.vkGetInstanceProcAddr;
|
||||
vma_funcs->vkGetDeviceProcAddr = ifn.vkGetDeviceProcAddr;
|
||||
vma_funcs->vkGetPhysicalDeviceProperties = ifn.vkGetPhysicalDeviceProperties;
|
||||
vma_funcs->vkGetPhysicalDeviceMemoryProperties =
|
||||
ifn.vkGetPhysicalDeviceMemoryProperties;
|
||||
vma_funcs->vkAllocateMemory = dfn.vkAllocateMemory;
|
||||
vma_funcs->vkFreeMemory = dfn.vkFreeMemory;
|
||||
vma_funcs->vkMapMemory = dfn.vkMapMemory;
|
||||
vma_funcs->vkUnmapMemory = dfn.vkUnmapMemory;
|
||||
vma_funcs->vkFlushMappedMemoryRanges = dfn.vkFlushMappedMemoryRanges;
|
||||
vma_funcs->vkInvalidateMappedMemoryRanges =
|
||||
dfn.vkInvalidateMappedMemoryRanges;
|
||||
vma_funcs->vkBindBufferMemory = dfn.vkBindBufferMemory;
|
||||
vma_funcs->vkBindImageMemory = dfn.vkBindImageMemory;
|
||||
vma_funcs->vkGetBufferMemoryRequirements = dfn.vkGetBufferMemoryRequirements;
|
||||
vma_funcs->vkGetImageMemoryRequirements = dfn.vkGetImageMemoryRequirements;
|
||||
vma_funcs->vkCreateBuffer = dfn.vkCreateBuffer;
|
||||
vma_funcs->vkDestroyBuffer = dfn.vkDestroyBuffer;
|
||||
vma_funcs->vkCreateImage = dfn.vkCreateImage;
|
||||
vma_funcs->vkDestroyImage = dfn.vkDestroyImage;
|
||||
vma_funcs->vkCmdCopyBuffer = dfn.vkCmdCopyBuffer;
|
||||
}
|
||||
VmaAllocator CreateVmaAllocator(const VulkanProvider& provider,
|
||||
bool externally_synchronized);
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
|
|
|
@ -29,13 +29,8 @@
|
|||
#include "xenia/base/platform_win.h"
|
||||
#endif
|
||||
|
||||
// Implement AMD's VMA here.
|
||||
#define VMA_IMPLEMENTATION
|
||||
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
|
||||
|
||||
// TODO(Triang3l): Disable Vulkan validation before releasing a stable version.
|
||||
DEFINE_bool(
|
||||
vulkan_validation, true,
|
||||
vulkan_validation, false,
|
||||
"Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be "
|
||||
"written to the OS debug log without vulkan_debug_messenger or to the "
|
||||
"Xenia log with it.",
|
||||
|
@ -548,22 +543,10 @@ bool VulkanProvider::Initialize() {
|
|||
++i) {
|
||||
VkPhysicalDevice physical_device_current = physical_devices[i];
|
||||
|
||||
// Get physical device features and check if the needed ones are supported.
|
||||
// Need this before obtaining the queues as sparse binding is an optional
|
||||
// feature.
|
||||
// Get physical device features. Need this before obtaining the queues as
|
||||
// sparse binding is an optional feature.
|
||||
ifn_.vkGetPhysicalDeviceFeatures(physical_device_current,
|
||||
&device_features_);
|
||||
// Passing indices directly from guest memory, where they are big-endian; a
|
||||
// workaround using fetch from shared memory for 32-bit indices that need
|
||||
// swapping isn't implemented yet. Not supported only Qualcomm Adreno 4xx.
|
||||
if (!device_features_.fullDrawIndexUint32) {
|
||||
continue;
|
||||
}
|
||||
// TODO(Triang3l): Make geometry shaders optional by providing compute
|
||||
// shader fallback (though that would require vertex shader stores).
|
||||
if (!device_features_.geometryShader) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the needed queues:
|
||||
// - Graphics and compute.
|
||||
|
@ -704,11 +687,17 @@ bool VulkanProvider::Initialize() {
|
|||
}
|
||||
std::memset(&device_extensions_, 0, sizeof(device_extensions_));
|
||||
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) {
|
||||
device_extensions_.khr_bind_memory2 = true;
|
||||
device_extensions_.khr_dedicated_allocation = true;
|
||||
device_extensions_.khr_get_memory_requirements2 = true;
|
||||
device_extensions_.khr_sampler_ycbcr_conversion = true;
|
||||
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) {
|
||||
device_extensions_.khr_image_format_list = true;
|
||||
device_extensions_.khr_shader_float_controls = true;
|
||||
device_extensions_.khr_spirv_1_4 = true;
|
||||
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) {
|
||||
device_extensions_.khr_maintenance4 = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
device_extensions_enabled.clear();
|
||||
|
@ -717,15 +706,28 @@ bool VulkanProvider::Initialize() {
|
|||
// core to device_extensions_enabled. Adding literals to
|
||||
// device_extensions_enabled for the most C string lifetime safety.
|
||||
static const std::pair<const char*, size_t> kUsedDeviceExtensions[] = {
|
||||
{"VK_AMD_shader_info", offsetof(DeviceExtensions, amd_shader_info)},
|
||||
{"VK_EXT_fragment_shader_interlock",
|
||||
offsetof(DeviceExtensions, ext_fragment_shader_interlock)},
|
||||
{"VK_EXT_memory_budget", offsetof(DeviceExtensions, ext_memory_budget)},
|
||||
{"VK_EXT_shader_stencil_export",
|
||||
offsetof(DeviceExtensions, ext_shader_stencil_export)},
|
||||
{"VK_KHR_bind_memory2", offsetof(DeviceExtensions, khr_bind_memory2)},
|
||||
{"VK_KHR_dedicated_allocation",
|
||||
offsetof(DeviceExtensions, khr_dedicated_allocation)},
|
||||
{"VK_KHR_get_memory_requirements2",
|
||||
offsetof(DeviceExtensions, khr_get_memory_requirements2)},
|
||||
{"VK_KHR_image_format_list",
|
||||
offsetof(DeviceExtensions, khr_image_format_list)},
|
||||
{"VK_KHR_maintenance4", offsetof(DeviceExtensions, khr_maintenance4)},
|
||||
{"VK_KHR_portability_subset",
|
||||
offsetof(DeviceExtensions, khr_portability_subset)},
|
||||
// While vkGetPhysicalDeviceFormatProperties should be used to check the
|
||||
// format support (device support for Y'CbCr formats is not required by
|
||||
// this extension or by Vulkan 1.1), still adding
|
||||
// VK_KHR_sampler_ycbcr_conversion to this list to enable this extension
|
||||
// on the device on Vulkan 1.0.
|
||||
{"VK_KHR_sampler_ycbcr_conversion",
|
||||
offsetof(DeviceExtensions, khr_sampler_ycbcr_conversion)},
|
||||
{"VK_KHR_shader_float_controls",
|
||||
offsetof(DeviceExtensions, khr_shader_float_controls)},
|
||||
{"VK_KHR_spirv_1_4", offsetof(DeviceExtensions, khr_spirv_1_4)},
|
||||
|
@ -917,10 +919,47 @@ bool VulkanProvider::Initialize() {
|
|||
}
|
||||
}
|
||||
// Extensions - disable the specific extension if failed to get its functions.
|
||||
if (device_extensions_.amd_shader_info) {
|
||||
if (device_extensions_.khr_bind_memory2) {
|
||||
bool functions_loaded = true;
|
||||
#include "xenia/ui/vulkan/functions/device_amd_shader_info.inc"
|
||||
device_extensions_.amd_shader_info = functions_loaded;
|
||||
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) {
|
||||
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE
|
||||
#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc"
|
||||
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
|
||||
} else {
|
||||
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE
|
||||
#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc"
|
||||
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
|
||||
}
|
||||
device_extensions_.khr_bind_memory2 = functions_loaded;
|
||||
}
|
||||
if (device_extensions_.khr_get_memory_requirements2) {
|
||||
bool functions_loaded = true;
|
||||
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) {
|
||||
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE
|
||||
#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc"
|
||||
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
|
||||
} else {
|
||||
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE
|
||||
#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc"
|
||||
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
|
||||
}
|
||||
device_extensions_.khr_get_memory_requirements2 = functions_loaded;
|
||||
// VK_KHR_dedicated_allocation can still work without the dedicated
|
||||
// allocation preference getter even though it requires
|
||||
// VK_KHR_get_memory_requirements2 to be supported and enabled.
|
||||
}
|
||||
if (device_extensions_.khr_maintenance4) {
|
||||
bool functions_loaded = true;
|
||||
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) {
|
||||
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE
|
||||
#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc"
|
||||
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
|
||||
} else {
|
||||
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE
|
||||
#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc"
|
||||
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
|
||||
}
|
||||
device_extensions_.khr_maintenance4 = functions_loaded;
|
||||
}
|
||||
if (device_extensions_.khr_swapchain) {
|
||||
bool functions_loaded = true;
|
||||
|
@ -954,14 +993,22 @@ bool VulkanProvider::Initialize() {
|
|||
VK_VERSION_MINOR(device_properties_.apiVersion),
|
||||
VK_VERSION_PATCH(device_properties_.apiVersion));
|
||||
XELOGVK("Vulkan device extensions:");
|
||||
XELOGVK("* VK_AMD_shader_info: {}",
|
||||
device_extensions_.amd_shader_info ? "yes" : "no");
|
||||
XELOGVK("* VK_EXT_fragment_shader_interlock: {}",
|
||||
device_extensions_.ext_fragment_shader_interlock ? "yes" : "no");
|
||||
XELOGVK("* VK_EXT_memory_budget: {}",
|
||||
device_extensions_.ext_memory_budget ? "yes" : "no");
|
||||
XELOGVK("* VK_EXT_shader_stencil_export: {}",
|
||||
device_extensions_.ext_shader_stencil_export ? "yes" : "no");
|
||||
XELOGVK("* VK_KHR_bind_memory2: {}",
|
||||
device_extensions_.khr_bind_memory2 ? "yes" : "no");
|
||||
XELOGVK("* VK_KHR_dedicated_allocation: {}",
|
||||
device_extensions_.khr_dedicated_allocation ? "yes" : "no");
|
||||
XELOGVK("* VK_KHR_get_memory_requirements2: {}",
|
||||
device_extensions_.khr_get_memory_requirements2 ? "yes" : "no");
|
||||
XELOGVK("* VK_KHR_image_format_list: {}",
|
||||
device_extensions_.khr_image_format_list ? "yes" : "no");
|
||||
XELOGVK("* VK_KHR_maintenance4: {}",
|
||||
device_extensions_.khr_maintenance4 ? "yes" : "no");
|
||||
XELOGVK("* VK_KHR_portability_subset: {}",
|
||||
device_extensions_.khr_portability_subset ? "yes" : "no");
|
||||
if (device_extensions_.khr_portability_subset) {
|
||||
|
@ -990,6 +1037,8 @@ bool VulkanProvider::Initialize() {
|
|||
XELOGVK(" * Triangle fans: {}",
|
||||
device_portability_subset_features_.triangleFans ? "yes" : "no");
|
||||
}
|
||||
XELOGVK("* VK_KHR_sampler_ycbcr_conversion: {}",
|
||||
device_extensions_.khr_sampler_ycbcr_conversion ? "yes" : "no");
|
||||
XELOGVK("* VK_KHR_shader_float_controls: {}",
|
||||
device_extensions_.khr_shader_float_controls ? "yes" : "no");
|
||||
if (device_extensions_.khr_shader_float_controls) {
|
||||
|
|
|
@ -131,14 +131,23 @@ class VulkanProvider : public GraphicsProvider {
|
|||
return device_features_;
|
||||
}
|
||||
struct DeviceExtensions {
|
||||
bool amd_shader_info;
|
||||
bool ext_fragment_shader_interlock;
|
||||
bool ext_memory_budget;
|
||||
bool ext_shader_stencil_export;
|
||||
// Core since 1.1.0.
|
||||
bool khr_bind_memory2;
|
||||
// Core since 1.1.0.
|
||||
bool khr_dedicated_allocation;
|
||||
// Core since 1.1.0.
|
||||
bool khr_get_memory_requirements2;
|
||||
// Core since 1.2.0.
|
||||
bool khr_image_format_list;
|
||||
// Core since 1.3.0.
|
||||
bool khr_maintenance4;
|
||||
// Requires the VK_KHR_get_physical_device_properties2 instance extension.
|
||||
bool khr_portability_subset;
|
||||
// Core since 1.1.0.
|
||||
bool khr_sampler_ycbcr_conversion;
|
||||
// Core since 1.2.0.
|
||||
bool khr_shader_float_controls;
|
||||
// Core since 1.2.0.
|
||||
|
@ -215,9 +224,14 @@ class VulkanProvider : public GraphicsProvider {
|
|||
VkDevice device() const { return device_; }
|
||||
struct DeviceFunctions {
|
||||
#define XE_UI_VULKAN_FUNCTION(name) PFN_##name name;
|
||||
#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \
|
||||
PFN_##extension_name extension_name;
|
||||
#include "xenia/ui/vulkan/functions/device_1_0.inc"
|
||||
#include "xenia/ui/vulkan/functions/device_amd_shader_info.inc"
|
||||
#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc"
|
||||
#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc"
|
||||
#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc"
|
||||
#include "xenia/ui/vulkan/functions/device_khr_swapchain.inc"
|
||||
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
|
||||
#undef XE_UI_VULKAN_FUNCTION
|
||||
};
|
||||
const DeviceFunctions& dfn() const { return dfn_; }
|
||||
|
|
|
@ -189,6 +189,53 @@ bool CreateDedicatedAllocationImage(const VulkanProvider& provider,
|
|||
return true;
|
||||
}
|
||||
|
||||
VkPipeline CreateComputePipeline(
|
||||
const VulkanProvider& provider, VkPipelineLayout layout,
|
||||
VkShaderModule shader, const VkSpecializationInfo* specialization_info,
|
||||
const char* entry_point) {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
VkComputePipelineCreateInfo pipeline_create_info;
|
||||
pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
|
||||
pipeline_create_info.pNext = nullptr;
|
||||
pipeline_create_info.flags = 0;
|
||||
pipeline_create_info.stage.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
pipeline_create_info.stage.pNext = nullptr;
|
||||
pipeline_create_info.stage.flags = 0;
|
||||
pipeline_create_info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
pipeline_create_info.stage.module = shader;
|
||||
pipeline_create_info.stage.pName = entry_point;
|
||||
pipeline_create_info.stage.pSpecializationInfo = specialization_info;
|
||||
pipeline_create_info.layout = layout;
|
||||
pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
|
||||
pipeline_create_info.basePipelineIndex = -1;
|
||||
VkPipeline pipeline;
|
||||
if (dfn.vkCreateComputePipelines(device, VK_NULL_HANDLE, 1,
|
||||
&pipeline_create_info, nullptr,
|
||||
&pipeline) != VK_SUCCESS) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
VkPipeline CreateComputePipeline(
|
||||
const VulkanProvider& provider, VkPipelineLayout layout,
|
||||
const uint32_t* shader_code, size_t shader_code_size_bytes,
|
||||
const VkSpecializationInfo* specialization_info, const char* entry_point) {
|
||||
VkShaderModule shader =
|
||||
CreateShaderModule(provider, shader_code, shader_code_size_bytes);
|
||||
if (shader == VK_NULL_HANDLE) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
VkPipeline pipeline = CreateComputePipeline(provider, layout, shader,
|
||||
specialization_info, entry_point);
|
||||
dfn.vkDestroyShaderModule(device, shader, nullptr);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue