From 9f52f2e8198de8673e7514597ebb10986063d470 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 17 Feb 2016 17:42:25 -0800 Subject: [PATCH 001/145] Initial vulkan context and immediate drawer. Extremely rough, just checking in so DrChat can snoop. --- premake5.lua | 2 + src/xenia/gpu/spirv_shader_translator.cc | 4 +- src/xenia/ui/spirv/spirv_assembler.cc | 2 +- src/xenia/ui/spirv/spirv_disassembler.cc | 2 +- src/xenia/ui/spirv/spirv_util.h | 4 +- src/xenia/ui/vulkan/premake5.lua | 53 + src/xenia/ui/vulkan/shaders/build.bat | 2 + src/xenia/ui/vulkan/shaders/immediate.frag | 23 + src/xenia/ui/vulkan/shaders/immediate.frag.h | 124 + .../ui/vulkan/shaders/immediate.frag.spv | Bin 0 -> 1464 bytes src/xenia/ui/vulkan/shaders/immediate.vert | 21 + src/xenia/ui/vulkan/shaders/immediate.vert.h | 136 + .../ui/vulkan/shaders/immediate.vert.spv | Bin 0 -> 1608 bytes src/xenia/ui/vulkan/vulkan.cc | 12 + src/xenia/ui/vulkan/vulkan.h | 34 + src/xenia/ui/vulkan/vulkan_context.cc | 148 + src/xenia/ui/vulkan/vulkan_context.h | 63 + src/xenia/ui/vulkan/vulkan_device.cc | 222 + src/xenia/ui/vulkan/vulkan_device.h | 83 + .../ui/vulkan/vulkan_immediate_drawer.cc | 734 +++ src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 69 + src/xenia/ui/vulkan/vulkan_instance.cc | 486 ++ src/xenia/ui/vulkan/vulkan_instance.h | 95 + src/xenia/ui/vulkan/vulkan_provider.cc | 107 + src/xenia/ui/vulkan/vulkan_provider.h | 50 + src/xenia/ui/vulkan/vulkan_swap_chain.cc | 510 ++ src/xenia/ui/vulkan/vulkan_swap_chain.h | 80 + src/xenia/ui/vulkan/vulkan_util.cc | 464 ++ src/xenia/ui/vulkan/vulkan_util.h | 101 + src/xenia/ui/vulkan/vulkan_window_demo.cc | 30 + src/xenia/ui/window_demo.cc | 9 +- third_party/spirv-tools | 2 +- third_party/spirv-tools.lua | 17 +- third_party/spirv/GLSL.std.450.h | 176 +- third_party/spirv/GLSL.std.450.hpp11 | 135 + third_party/spirv/OpenCL.std.h | 272 + third_party/spirv/spirv.h | 1652 +++--- third_party/spirv/spirv.hpp11 | 880 ++++ third_party/vulkan/icd-spv.h | 42 + third_party/vulkan/loader/cJSON.c | 1358 +++++ third_party/vulkan/loader/cJSON.h | 189 + third_party/vulkan/loader/debug_report.c | 319 ++ third_party/vulkan/loader/debug_report.h | 150 + .../vulkan/loader/dev_ext_trampoline.c | 2038 ++++++++ third_party/vulkan/loader/dirent_on_windows.c | 130 + third_party/vulkan/loader/dirent_on_windows.h | 51 + third_party/vulkan/loader/gpa_helper.h | 379 ++ third_party/vulkan/loader/loader.c | 4504 +++++++++++++++++ third_party/vulkan/loader/loader.h | 551 ++ third_party/vulkan/loader/murmurhash.c | 97 + third_party/vulkan/loader/murmurhash.h | 52 + third_party/vulkan/loader/premake5.lua | 24 + third_party/vulkan/loader/table_ops.h | 710 +++ third_party/vulkan/loader/trampoline.c | 1731 +++++++ .../vulkan/loader/vk_loader_platform.h | 449 ++ third_party/vulkan/loader/wsi.c | 1092 ++++ third_party/vulkan/loader/wsi.h | 120 + third_party/vulkan/vk_debug_marker_layer.h | 44 + third_party/vulkan/vk_icd.h | 114 + third_party/vulkan/vk_layer.h | 313 ++ third_party/vulkan/vk_lunarg_debug_marker.h | 98 + third_party/vulkan/vk_platform.h | 127 + third_party/vulkan/vk_sdk_platform.h | 53 + third_party/vulkan/vulkan.h | 3775 ++++++++++++++ 64 files changed, 24379 insertions(+), 935 deletions(-) create mode 100644 src/xenia/ui/vulkan/premake5.lua create mode 100644 src/xenia/ui/vulkan/shaders/build.bat create mode 100644 src/xenia/ui/vulkan/shaders/immediate.frag create mode 100644 src/xenia/ui/vulkan/shaders/immediate.frag.h create mode 100644 src/xenia/ui/vulkan/shaders/immediate.frag.spv create mode 100644 src/xenia/ui/vulkan/shaders/immediate.vert create mode 100644 src/xenia/ui/vulkan/shaders/immediate.vert.h create mode 100644 src/xenia/ui/vulkan/shaders/immediate.vert.spv create mode 100644 src/xenia/ui/vulkan/vulkan.cc create mode 100644 src/xenia/ui/vulkan/vulkan.h create mode 100644 src/xenia/ui/vulkan/vulkan_context.cc create mode 100644 src/xenia/ui/vulkan/vulkan_context.h create mode 100644 src/xenia/ui/vulkan/vulkan_device.cc create mode 100644 src/xenia/ui/vulkan/vulkan_device.h create mode 100644 src/xenia/ui/vulkan/vulkan_immediate_drawer.cc create mode 100644 src/xenia/ui/vulkan/vulkan_immediate_drawer.h create mode 100644 src/xenia/ui/vulkan/vulkan_instance.cc create mode 100644 src/xenia/ui/vulkan/vulkan_instance.h create mode 100644 src/xenia/ui/vulkan/vulkan_provider.cc create mode 100644 src/xenia/ui/vulkan/vulkan_provider.h create mode 100644 src/xenia/ui/vulkan/vulkan_swap_chain.cc create mode 100644 src/xenia/ui/vulkan/vulkan_swap_chain.h create mode 100644 src/xenia/ui/vulkan/vulkan_util.cc create mode 100644 src/xenia/ui/vulkan/vulkan_util.h create mode 100644 src/xenia/ui/vulkan/vulkan_window_demo.cc create mode 100644 third_party/spirv/GLSL.std.450.hpp11 create mode 100644 third_party/spirv/OpenCL.std.h create mode 100644 third_party/spirv/spirv.hpp11 create mode 100644 third_party/vulkan/icd-spv.h create mode 100644 third_party/vulkan/loader/cJSON.c create mode 100644 third_party/vulkan/loader/cJSON.h create mode 100644 third_party/vulkan/loader/debug_report.c create mode 100644 third_party/vulkan/loader/debug_report.h create mode 100644 third_party/vulkan/loader/dev_ext_trampoline.c create mode 100644 third_party/vulkan/loader/dirent_on_windows.c create mode 100644 third_party/vulkan/loader/dirent_on_windows.h create mode 100644 third_party/vulkan/loader/gpa_helper.h create mode 100644 third_party/vulkan/loader/loader.c create mode 100644 third_party/vulkan/loader/loader.h create mode 100644 third_party/vulkan/loader/murmurhash.c create mode 100644 third_party/vulkan/loader/murmurhash.h create mode 100644 third_party/vulkan/loader/premake5.lua create mode 100644 third_party/vulkan/loader/table_ops.h create mode 100644 third_party/vulkan/loader/trampoline.c create mode 100644 third_party/vulkan/loader/vk_loader_platform.h create mode 100644 third_party/vulkan/loader/wsi.c create mode 100644 third_party/vulkan/loader/wsi.h create mode 100644 third_party/vulkan/vk_debug_marker_layer.h create mode 100644 third_party/vulkan/vk_icd.h create mode 100644 third_party/vulkan/vk_layer.h create mode 100644 third_party/vulkan/vk_lunarg_debug_marker.h create mode 100644 third_party/vulkan/vk_platform.h create mode 100644 third_party/vulkan/vk_sdk_platform.h create mode 100644 third_party/vulkan/vulkan.h diff --git a/premake5.lua b/premake5.lua index 54277ad27..94df2fde5 100644 --- a/premake5.lua +++ b/premake5.lua @@ -169,6 +169,7 @@ solution("xenia") include("third_party/libav.lua") include("third_party/snappy.lua") include("third_party/spirv-tools.lua") + include("third_party/vulkan/loader") include("third_party/xxhash.lua") include("third_party/yaml-cpp.lua") @@ -188,6 +189,7 @@ solution("xenia") include("src/xenia/ui") include("src/xenia/ui/gl") include("src/xenia/ui/spirv") + include("src/xenia/ui/vulkan") include("src/xenia/vfs") if os.is("windows") then diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 7919a3fe3..b9af44c22 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -24,7 +24,7 @@ void SpirvShaderTranslator::StartTranslation() { auto fn = e.MakeMainEntry(); auto float_1_0 = e.MakeFloatConstant(1.0f); auto acos = e.CreateGlslStd450InstructionCall( - spv::Decoration::Invariant, e.MakeFloatType(32), spv::GLSLstd450::Acos, + spv::Decoration::Invariant, e.MakeFloatType(32), spv::GLSLstd450::kAcos, {float_1_0}); e.MakeReturn(true); } @@ -188,7 +188,7 @@ spv::Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { if (op.is_absolute_value) { current_value_id = e.CreateGlslStd450InstructionCall( spv::Decoration::RelaxedPrecision, current_type_id, - spv::GLSLstd450::FAbs, {current_value_id}); + spv::GLSLstd450::kFAbs, {current_value_id}); } if (op.is_negated) { current_value_id = diff --git a/src/xenia/ui/spirv/spirv_assembler.cc b/src/xenia/ui/spirv/spirv_assembler.cc index 823779e5f..4ade9204b 100644 --- a/src/xenia/ui/spirv/spirv_assembler.cc +++ b/src/xenia/ui/spirv/spirv_assembler.cc @@ -9,7 +9,7 @@ #include "xenia/ui/spirv/spirv_assembler.h" -#include "third_party/spirv-tools/include/libspirv/libspirv.h" +#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" #include "xenia/base/logging.h" namespace xe { diff --git a/src/xenia/ui/spirv/spirv_disassembler.cc b/src/xenia/ui/spirv/spirv_disassembler.cc index b119ac734..d213582ae 100644 --- a/src/xenia/ui/spirv/spirv_disassembler.cc +++ b/src/xenia/ui/spirv/spirv_disassembler.cc @@ -9,7 +9,7 @@ #include "xenia/ui/spirv/spirv_disassembler.h" -#include "third_party/spirv-tools/include/libspirv/libspirv.h" +#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" #include "xenia/base/logging.h" namespace xe { diff --git a/src/xenia/ui/spirv/spirv_util.h b/src/xenia/ui/spirv/spirv_util.h index 5a3bb52f8..ac7a9a05e 100644 --- a/src/xenia/ui/spirv/spirv_util.h +++ b/src/xenia/ui/spirv/spirv_util.h @@ -10,8 +10,8 @@ #ifndef XENIA_UI_SPIRV_SPIRV_UTIL_H_ #define XENIA_UI_SPIRV_SPIRV_UTIL_H_ -#include "third_party/spirv/GLSL.std.450.h" -#include "third_party/spirv/spirv.h" +#include "third_party/spirv/GLSL.std.450.hpp11" +#include "third_party/spirv/spirv.hpp11" // Forward declarations from SPIRV-Tools so we don't pollute /so/ much. struct spv_binary_t; diff --git a/src/xenia/ui/vulkan/premake5.lua b/src/xenia/ui/vulkan/premake5.lua new file mode 100644 index 000000000..2144ca30a --- /dev/null +++ b/src/xenia/ui/vulkan/premake5.lua @@ -0,0 +1,53 @@ +project_root = "../../../.." +include(project_root.."/tools/build") + +group("src") +project("xenia-ui-vulkan") + uuid("4933d81e-1c2c-4d5d-b104-3c0eb9dc2f00") + kind("StaticLib") + language("C++") + links({ + "xenia-base", + "xenia-ui", + "xenia-ui-spirv", + }) + defines({ + }) + includedirs({ + project_root.."/third_party/gflags/src", + project_root.."/third_party/vulkan/", + }) + local_platform_files() + removefiles({"*_demo.cc"}) + +group("demos") +project("xenia-ui-window-vulkan-demo") + uuid("97598f13-3177-454c-8e58-c59e2b6ede27") + kind("WindowedApp") + language("C++") + links({ + "gflags", + "imgui", + "vulkan-loader", + "xenia-base", + "xenia-ui", + "xenia-ui-spirv", + "xenia-ui-vulkan", + }) + flags({ + "WinMain", -- Use WinMain instead of main. + }) + defines({ + }) + includedirs({ + project_root.."/third_party/gflags/src", + project_root.."/third_party/vulkan/", + }) + files({ + "../window_demo.cc", + "vulkan_window_demo.cc", + project_root.."/src/xenia/base/main_"..platform_suffix..".cc", + }) + resincludedirs({ + project_root, + }) diff --git a/src/xenia/ui/vulkan/shaders/build.bat b/src/xenia/ui/vulkan/shaders/build.bat new file mode 100644 index 000000000..c3e0322b0 --- /dev/null +++ b/src/xenia/ui/vulkan/shaders/build.bat @@ -0,0 +1,2 @@ +glslangValidator -V immediate.vert -o immediate.vert.spv +glslangValidator -V immediate.frag -o immediate.frag.spv diff --git a/src/xenia/ui/vulkan/shaders/immediate.frag b/src/xenia/ui/vulkan/shaders/immediate.frag new file mode 100644 index 000000000..b5fcdda35 --- /dev/null +++ b/src/xenia/ui/vulkan/shaders/immediate.frag @@ -0,0 +1,23 @@ +#version 450 core +precision highp float; + +layout(push_constant) uniform PushConstants { + mat4 projection_matrix; + int restrict_texture_samples; +} push_constants; + +layout(set = 0, binding = 0) uniform sampler2D texture_sampler; + +layout(location = 0) in vec2 vtx_uv; +layout(location = 1) in vec4 vtx_color; + +layout(location = 0) out vec4 out_color; + +void main() { + out_color = vtx_color; + if (push_constants.restrict_texture_samples == 0 || vtx_uv.x <= 1.0) { + vec4 tex_color = texture(texture_sampler, vtx_uv); + out_color *= tex_color; + // TODO(benvanik): microprofiler shadows. + } +} diff --git a/src/xenia/ui/vulkan/shaders/immediate.frag.h b/src/xenia/ui/vulkan/shaders/immediate.frag.h new file mode 100644 index 000000000..e1efd613a --- /dev/null +++ b/src/xenia/ui/vulkan/shaders/immediate.frag.h @@ -0,0 +1,124 @@ +const uint8_t immediate_frag_spv[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x08, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x6F, 0x75, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, + 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x50, 0x75, 0x73, 0x68, 0x43, 0x6F, 0x6E, 0x73, + 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x72, 0x6F, 0x6A, + 0x65, 0x63, 0x74, 0x69, 0x6F, 0x6E, 0x5F, 0x6D, 0x61, 0x74, 0x72, 0x69, + 0x78, 0x00, 0x00, 0x00, 0x06, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x72, 0x65, 0x73, 0x74, 0x72, 0x69, 0x63, 0x74, + 0x5F, 0x74, 0x65, 0x78, 0x74, 0x75, 0x72, 0x65, 0x5F, 0x73, 0x61, 0x6D, + 0x70, 0x6C, 0x65, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x70, 0x75, 0x73, 0x68, 0x5F, 0x63, 0x6F, 0x6E, + 0x73, 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, 0x75, 0x76, 0x00, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x5F, + 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x74, 0x75, 0x72, 0x65, 0x5F, + 0x73, 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x72, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, + 0x20, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0xAA, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x04, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0xF7, 0x00, 0x03, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFA, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0xBC, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0xF5, 0x00, 0x07, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x28, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x28, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; diff --git a/src/xenia/ui/vulkan/shaders/immediate.frag.spv b/src/xenia/ui/vulkan/shaders/immediate.frag.spv new file mode 100644 index 0000000000000000000000000000000000000000..9e0e6bec70231a6e265cb77110969afa0bd4cfd1 GIT binary patch literal 1464 zcmY+C%T7~K6oxk~tq2MtcTs4?J6ch}m>6$yPzDT495`&6+Qb@qNqb5^E;$uL92!SExs+~u!!_Oo2G4Bfh z3&;x4ZwuWS^-1{Yu*lsy^5#`{cDda&eu=yB_OYDWjL+I^O$ePIozekxA+C)x2CVP{g$o$3|)B+_%6@!c8BV(h;VN!z}>n}{oUazkpGHL z@`efLPQTzIw*l6{PjYS|J%6ytUjlOW<=t9Uq~E4{t^jMf6Tfx&Z)vUTDd#?a6QAW# z{$Ltw-vHKjX8o;UAGeSm`_Shmkh2f()^w-$f%{bV8`uN(^^T10>jBV5-krHmIsLUi g1ag`Fh`W3JoW}2Sm(wrXxt#N4{V4zPMLYrj0IM5j>i_@% literal 0 HcmV?d00001 diff --git a/src/xenia/ui/vulkan/shaders/immediate.vert b/src/xenia/ui/vulkan/shaders/immediate.vert new file mode 100644 index 000000000..732553dcf --- /dev/null +++ b/src/xenia/ui/vulkan/shaders/immediate.vert @@ -0,0 +1,21 @@ +#version 450 core +precision highp float; + +layout(push_constant) uniform PushConstants { + mat4 projection_matrix; + int restrict_texture_samples; +} push_constants; + +layout(location = 0) in vec2 in_pos; +layout(location = 1) in vec2 in_uv; +layout(location = 2) in vec4 in_color; + +layout(location = 0) out vec2 vtx_uv; +layout(location = 1) out vec4 vtx_color; + +void main() { + gl_Position = push_constants.projection_matrix * vec4(in_pos.xy, 0.0, 1.0); + gl_Position.y = -gl_Position.y; + vtx_uv = in_uv; + vtx_color = in_color; +} diff --git a/src/xenia/ui/vulkan/shaders/immediate.vert.h b/src/xenia/ui/vulkan/shaders/immediate.vert.h new file mode 100644 index 000000000..b454eb260 --- /dev/null +++ b/src/xenia/ui/vulkan/shaders/immediate.vert.h @@ -0,0 +1,136 @@ +const uint8_t immediate_vert_spv[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, + 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0B, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, + 0x6E, 0x63, 0x65, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x75, 0x6C, 0x6C, 0x44, + 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x50, 0x75, 0x73, 0x68, 0x43, 0x6F, 0x6E, 0x73, + 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x72, 0x6F, 0x6A, + 0x65, 0x63, 0x74, 0x69, 0x6F, 0x6E, 0x5F, 0x6D, 0x61, 0x74, 0x72, 0x69, + 0x78, 0x00, 0x00, 0x00, 0x06, 0x00, 0x0A, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x72, 0x65, 0x73, 0x74, 0x72, 0x69, 0x63, 0x74, + 0x5F, 0x74, 0x65, 0x78, 0x74, 0x75, 0x72, 0x65, 0x5F, 0x73, 0x61, 0x6D, + 0x70, 0x6C, 0x65, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x70, 0x75, 0x73, 0x68, 0x5F, 0x63, 0x6F, 0x6E, + 0x73, 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x70, 0x6F, 0x73, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, + 0x75, 0x76, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x69, 0x6E, 0x5F, 0x75, 0x76, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, + 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x69, 0x6E, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x20, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; diff --git a/src/xenia/ui/vulkan/shaders/immediate.vert.spv b/src/xenia/ui/vulkan/shaders/immediate.vert.spv new file mode 100644 index 0000000000000000000000000000000000000000..581d87bc672ab5bfbf858cce7ecea9047249de28 GIT binary patch literal 1608 zcmYk4OK;Oa6orSRPD_E7Qu+W&NE%+H5Lf_0LOiM>rHd|tDzUX9*D6Mi9obG*v4Ow9 zU*cD>LE?PlnUINY?woTUbLWhkjozX$m-JgQFHF7GOheche8>5&{1wx({NVWHxL;PM z{gLmFdljmccrPyJbOg<%LHOiuR7UKrQrfE8h z1)7i9S9H@9;^(7wVX0pa^^Nj>LFP0*iwg?g_K#;dAr>n(K2M=aTnP zMql#iJ5XOV+Lf`uXw$Z+#%ME-(FROU^7;1WF}i^1zpj6LS7&lJ_M6(Z^qa_^Y-*kIwxaXEUbE5(AZF%klPdymz;h2ZqJ2IBX zI~M)?Kz&zx?!t`Vp2O6EA3N;(gZCWg&*LpQ4@^JalAhE*lP5-h;$Yqu?zO%(?twNe zYW|4f+dR-eci)hq2b#W9COT}&;K9swTgII~tA@GnI!`=?#K>wD!Sd5`8e;#e9ZH}#eO*a&~d)*k??#Sz2V67 ccih(=or$48m>9D@m!T;!v + +#include "xenia/base/platform.h" + +#if XE_PLATFORM_WIN32 +#define VK_USE_PLATFORM_WIN32_KHR 1 +#else +#error Platform not yet supported. +#endif // XE_PLATFORM_WIN32 + +// We are statically linked with the loader, so use function prototypes. +#define VK_PROTOTYPES +#include "third_party/vulkan/vulkan.h" + +// NOTE: header order matters here, unfortunately: +#include "third_party/vulkan/vk_lunarg_debug_marker.h" + +#define XELOGVK XELOGI + +DECLARE_bool(vulkan_validation); + +#endif // XENIA_UI_VULKAN_VULKAN_H_ diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc new file mode 100644 index 000000000..5d82f4f46 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -0,0 +1,148 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_context.h" + +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" +#include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan_instance.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_swap_chain.h" +#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/ui/window.h" + +namespace xe { +namespace ui { +namespace vulkan { + +VulkanContext::VulkanContext(VulkanProvider* provider, Window* target_window) + : GraphicsContext(provider, target_window) {} + +VulkanContext::~VulkanContext() { + auto provider = static_cast(provider_); + auto device = provider->device(); + vkQueueWaitIdle(device->primary_queue()); + immediate_drawer_.reset(); + swap_chain_.reset(); + if (cmd_pool_) { + vkDestroyCommandPool(*device, cmd_pool_, nullptr); + } +} + +bool VulkanContext::Initialize() { + auto provider = static_cast(provider_); + auto device = provider->device(); + + // All context-specific commands will be allocated from this. + // We may want to have additional pools for different rendering subsystems. + VkCommandPoolCreateInfo cmd_pool_info; + cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmd_pool_info.pNext = nullptr; + cmd_pool_info.queueFamilyIndex = device->queue_family_index(); + cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + auto err = vkCreateCommandPool(*device, &cmd_pool_info, nullptr, &cmd_pool_); + CheckResult(err, "vkCreateCommandPool"); + + if (target_window_) { + // Create swap chain used to present to the window. + VkSurfaceKHR surface = nullptr; +#if XE_PLATFORM_WIN32 + VkWin32SurfaceCreateInfoKHR create_info; + create_info.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; + create_info.pNext = nullptr; + create_info.flags = 0; + create_info.hinstance = + static_cast(target_window_->native_platform_handle()); + create_info.hwnd = static_cast(target_window_->native_handle()); + err = vkCreateWin32SurfaceKHR(*provider->instance(), &create_info, nullptr, + &surface); + CheckResult(err, "vkCreateWin32SurfaceKHR"); +#else +#error Platform not yet implemented. +#endif // XE_PLATFORM_WIN32 + swap_chain_ = std::make_unique(provider->instance(), + provider->device()); + if (!swap_chain_->Initialize(surface)) { + XELOGE("Unable to initialize swap chain"); + vkDestroySurfaceKHR(*provider->instance(), surface, nullptr); + return false; + } + + // Only initialize immediate mode drawer if we are not an offscreen context. + immediate_drawer_ = std::make_unique(this); + } + + return true; +} + +ImmediateDrawer* VulkanContext::immediate_drawer() { + return immediate_drawer_.get(); +} + +VulkanInstance* VulkanContext::instance() const { + return static_cast(provider_)->instance(); +} + +VulkanDevice* VulkanContext::device() const { + return static_cast(provider_)->device(); +} + +bool VulkanContext::is_current() { return false; } + +bool VulkanContext::MakeCurrent() { + SCOPE_profile_cpu_f("gpu"); + return true; +} + +void VulkanContext::ClearCurrent() {} + +void VulkanContext::BeginSwap() { + SCOPE_profile_cpu_f("gpu"); + auto provider = static_cast(provider_); + auto device = provider->device(); + + // Acquire the next image and set it up for use. + swap_chain_->Begin(); + + // TODO(benvanik): use a fence instead? May not be possible with target image. + auto err = vkQueueWaitIdle(device->primary_queue()); + CheckResult(err, "vkQueueWaitIdle"); +} + +void VulkanContext::EndSwap() { + SCOPE_profile_cpu_f("gpu"); + auto provider = static_cast(provider_); + auto device = provider->device(); + + // Notify the presentation engine the image is ready. + // The contents must be in a coherent state. + swap_chain_->End(); + + // Wait until the queue is idle. + // TODO(benvanik): is this required? + auto err = vkQueueWaitIdle(device->primary_queue()); + CheckResult(err, "vkQueueWaitIdle"); +} + +std::unique_ptr VulkanContext::Capture() { + assert_always(); + return nullptr; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h new file mode 100644 index 000000000..1893ca287 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -0,0 +1,63 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_CONTEXT_H_ +#define XENIA_UI_VULKAN_VULKAN_CONTEXT_H_ + +#include + +#include "xenia/ui/graphics_context.h" +#include "xenia/ui/vulkan/vulkan.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class VulkanDevice; +class VulkanImmediateDrawer; +class VulkanInstance; +class VulkanProvider; +class VulkanSwapChain; + +class VulkanContext : public GraphicsContext { + public: + ~VulkanContext() override; + + ImmediateDrawer* immediate_drawer() override; + VulkanSwapChain* swap_chain() const { return swap_chain_.get(); } + VulkanInstance* instance() const; + VulkanDevice* device() const; + + bool is_current() override; + bool MakeCurrent() override; + void ClearCurrent() override; + + void BeginSwap() override; + void EndSwap() override; + + std::unique_ptr Capture() override; + + private: + friend class VulkanProvider; + + explicit VulkanContext(VulkanProvider* provider, Window* target_window); + + private: + bool Initialize(); + + std::unique_ptr swap_chain_; + std::unique_ptr immediate_drawer_; + VkCommandPool cmd_pool_ = nullptr; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_CONTEXT_H_ diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc new file mode 100644 index 000000000..8f862f444 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_device.cc @@ -0,0 +1,222 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_device.h" + +#include + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/ui/window.h" + +namespace xe { +namespace ui { +namespace vulkan { + +VulkanDevice::VulkanDevice(VulkanInstance* instance) : instance_(instance) { + if (FLAGS_vulkan_validation) { + /*DeclareRequiredLayer("VK_LAYER_GOOGLE_unique_objects", + Version::Make(0, 0, 0), true);*/ + DeclareRequiredLayer("VK_LAYER_LUNARG_threading", Version::Make(0, 0, 0), + true); + /*DeclareRequiredLayer("VK_LAYER_LUNARG_mem_tracker", Version::Make(0, 0, + 0), + true);*/ + DeclareRequiredLayer("VK_LAYER_LUNARG_object_tracker", + Version::Make(0, 0, 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_draw_state", Version::Make(0, 0, 0), + true); + DeclareRequiredLayer("VK_LAYER_LUNARG_param_checker", + Version::Make(0, 0, 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_swapchain", Version::Make(0, 0, 0), + true); + DeclareRequiredLayer("VK_LAYER_LUNARG_device_limits", + Version::Make(0, 0, 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_image", Version::Make(0, 0, 0), true); + } +} + +VulkanDevice::~VulkanDevice() { + if (handle) { + vkDestroyDevice(handle, nullptr); + handle = nullptr; + } +} + +bool VulkanDevice::Initialize(DeviceInfo device_info) { + // Gather list of enabled layer names. + auto layers_result = CheckRequirements(required_layers_, device_info.layers); + auto& enabled_layers = layers_result.second; + + // Gather list of enabled extension names. + auto extensions_result = + CheckRequirements(required_extensions_, device_info.extensions); + auto& enabled_extensions = extensions_result.second; + + // We wait until both extensions and layers are checked before failing out so + // that the user gets a complete list of what they have/don't. + if (!extensions_result.first || !layers_result.first) { + FatalVulkanError( + "Layer and extension verification failed; aborting initialization"); + return false; + } + + // Query supported features so we can make sure we have what we need. + VkPhysicalDeviceFeatures supported_features; + vkGetPhysicalDeviceFeatures(device_info.handle, &supported_features); + VkPhysicalDeviceFeatures enabled_features = {0}; + bool any_features_missing = false; +#define ENABLE_AND_EXPECT(name) \ + if (!supported_features.name) { \ + any_features_missing = true; \ + FatalVulkanError("Vulkan device is missing feature " #name); \ + } else { \ + enabled_features.name = VK_TRUE; \ + } + ENABLE_AND_EXPECT(geometryShader); + ENABLE_AND_EXPECT(depthClamp); + ENABLE_AND_EXPECT(alphaToOne); + ENABLE_AND_EXPECT(multiViewport); + // TODO(benvanik): add other features. + if (any_features_missing) { + XELOGE( + "One or more required device features are missing; aborting " + "initialization"); + return false; + } + + // Pick a queue. + // Any queue we use must support both graphics and presentation. + // TODO(benvanik): use multiple queues (DMA-only, compute-only, etc). + if (device_info.queue_family_properties.empty()) { + FatalVulkanError("No queue families available"); + return false; + } + uint32_t ideal_queue_family_index = UINT_MAX; + uint32_t queue_count = 1; + for (size_t i = 0; i < device_info.queue_family_properties.size(); ++i) { + auto queue_flags = device_info.queue_family_properties[i].queueFlags; + if (!device_info.queue_family_supports_present[i]) { + // Can't present from this queue, so ignore it. + continue; + } + if (queue_flags & VK_QUEUE_GRAPHICS_BIT) { + // Can do graphics and present - good! + ideal_queue_family_index = static_cast(i); + // TODO(benvanik): pick a higher queue count? + queue_count = 1; + break; + } + } + if (ideal_queue_family_index == UINT_MAX) { + FatalVulkanError( + "No queue families available that can both do graphics and present"); + return false; + } + + VkDeviceQueueCreateInfo queue_info; + queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_info.pNext = nullptr; + queue_info.flags = 0; + queue_info.queueFamilyIndex = ideal_queue_family_index; + queue_info.queueCount = queue_count; + std::vector queue_priorities(queue_count); + queue_info.pQueuePriorities = queue_priorities.data(); + + VkDeviceCreateInfo create_info; + create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + create_info.pNext = nullptr; + create_info.flags = 0; + create_info.queueCreateInfoCount = 1; + create_info.pQueueCreateInfos = &queue_info; + create_info.enabledLayerCount = static_cast(enabled_layers.size()); + create_info.ppEnabledLayerNames = enabled_layers.data(); + create_info.enabledExtensionCount = + static_cast(enabled_extensions.size()); + create_info.ppEnabledExtensionNames = enabled_extensions.data(); + create_info.pEnabledFeatures = &enabled_features; + + auto err = vkCreateDevice(device_info.handle, &create_info, nullptr, &handle); + switch (err) { + case VK_SUCCESS: + // Ok! + break; + case VK_ERROR_INITIALIZATION_FAILED: + FatalVulkanError("Device initialization failed; generic"); + return false; + case VK_ERROR_EXTENSION_NOT_PRESENT: + FatalVulkanError( + "Device initialization failed; requested extension not present"); + return false; + case VK_ERROR_LAYER_NOT_PRESENT: + FatalVulkanError( + "Device initialization failed; requested layer not present"); + return false; + default: + FatalVulkanError(std::string("Device initialization failed; unknown: ") + + to_string(err)); + return false; + } + + device_info_ = std::move(device_info); + queue_family_index_ = ideal_queue_family_index; + + // Get the primary queue used for most submissions/etc. + vkGetDeviceQueue(handle, queue_family_index_, 0, &primary_queue_); + + XELOGVK("Device initialized successfully!"); + return true; +} + +VkDeviceMemory VulkanDevice::AllocateMemory( + const VkMemoryRequirements& requirements, VkFlags required_properties) { + // Search memory types to find one matching our requirements and our + // properties. + uint32_t type_index = UINT_MAX; + for (uint32_t i = 0; i < device_info_.memory_properties.memoryTypeCount; + ++i) { + const auto& memory_type = device_info_.memory_properties.memoryTypes[i]; + if (((requirements.memoryTypeBits >> i) & 1) == 1) { + // Type is available for use; check for a match on properties. + if ((memory_type.propertyFlags & required_properties) == + required_properties) { + type_index = i; + break; + } + } + } + if (type_index == UINT_MAX) { + XELOGE("Unable to find a matching memory type"); + return nullptr; + } + + // Allocate the memory. + VkMemoryAllocateInfo memory_info; + memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_info.pNext = nullptr; + memory_info.allocationSize = requirements.size; + memory_info.memoryTypeIndex = type_index; + VkDeviceMemory memory = nullptr; + auto err = vkAllocateMemory(handle, &memory_info, nullptr, &memory); + CheckResult(err, "vkAllocateMemory"); + return memory; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_device.h b/src/xenia/ui/vulkan/vulkan_device.h new file mode 100644 index 000000000..f1194d662 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_device.h @@ -0,0 +1,83 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_DEVICE_H_ +#define XENIA_UI_VULKAN_VULKAN_DEVICE_H_ + +#include +#include +#include + +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class VulkanInstance; + +// Wrapper and utilities for VkDevice. +// Prefer passing this around over a VkDevice and casting as needed to call +// APIs. +class VulkanDevice { + public: + VulkanDevice(VulkanInstance* instance); + ~VulkanDevice(); + + VkDevice handle = nullptr; + + operator VkDevice() const { return handle; } + operator VkPhysicalDevice() const { return device_info_.handle; } + + // Declares a layer to verify and enable upon initialization. + // Must be called before Initialize. + void DeclareRequiredLayer(std::string name, uint32_t min_version, + bool is_optional) { + required_layers_.push_back({name, min_version, is_optional}); + } + + // Declares an extension to verify and enable upon initialization. + // Must be called before Initialize. + void DeclareRequiredExtension(std::string name, uint32_t min_version, + bool is_optional) { + required_extensions_.push_back({name, min_version, is_optional}); + } + + // Initializes the device, querying and enabling extensions and layers and + // preparing the device for general use. + // If initialization succeeds it's likely that no more failures beyond runtime + // issues will occur. + bool Initialize(DeviceInfo device_info); + + uint32_t queue_family_index() const { return queue_family_index_; } + VkQueue primary_queue() const { return primary_queue_; } + const DeviceInfo& device_info() const { return device_info_; } + + // Allocates memory of the given size matching the required properties. + VkDeviceMemory AllocateMemory( + const VkMemoryRequirements& requirements, + VkFlags required_properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + + private: + VulkanInstance* instance_ = nullptr; + + std::vector required_layers_; + std::vector required_extensions_; + + DeviceInfo device_info_; + uint32_t queue_family_index_ = 0; + VkQueue primary_queue_ = nullptr; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_DEVICE_H_ diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc new file mode 100644 index 000000000..97b31de98 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -0,0 +1,734 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_immediate_drawer.h" + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/ui/graphics_context.h" +#include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_device.h" +#include "xenia/ui/vulkan/vulkan_swap_chain.h" + +namespace xe { +namespace ui { +namespace vulkan { + +#include "xenia/ui/vulkan/shaders/immediate.frag.h" +#include "xenia/ui/vulkan/shaders/immediate.vert.h" + +constexpr uint32_t kCircularBufferCapacity = 2 * 1024 * 1024; + +class LightweightCircularBuffer { + public: + LightweightCircularBuffer(VulkanDevice* device) : device_(*device) { + buffer_capacity_ = xe::round_up(kCircularBufferCapacity, 4096); + + // Index buffer. + VkBufferCreateInfo index_buffer_info; + index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + index_buffer_info.pNext = nullptr; + index_buffer_info.flags = 0; + index_buffer_info.size = buffer_capacity_; + index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + index_buffer_info.queueFamilyIndexCount = 0; + index_buffer_info.pQueueFamilyIndices = nullptr; + auto err = + vkCreateBuffer(device_, &index_buffer_info, nullptr, &index_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Vertex buffer. + VkBufferCreateInfo vertex_buffer_info; + vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + vertex_buffer_info.pNext = nullptr; + vertex_buffer_info.flags = 0; + vertex_buffer_info.size = buffer_capacity_; + vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + vertex_buffer_info.queueFamilyIndexCount = 0; + vertex_buffer_info.pQueueFamilyIndices = nullptr; + err = + vkCreateBuffer(*device, &vertex_buffer_info, nullptr, &vertex_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Allocate underlying buffer. + // We alias it for both vertices and indices. + VkMemoryRequirements buffer_requirements; + vkGetBufferMemoryRequirements(device_, index_buffer_, &buffer_requirements); + buffer_memory_ = device->AllocateMemory( + buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + vkBindBufferMemory(*device, index_buffer_, buffer_memory_, 0); + vkBindBufferMemory(*device, vertex_buffer_, buffer_memory_, 0); + + // Persistent mapping. + err = vkMapMemory(device_, buffer_memory_, 0, VK_WHOLE_SIZE, 0, + &buffer_data_); + CheckResult(err, "vkMapMemory"); + } + + ~LightweightCircularBuffer() { + vkUnmapMemory(device_, buffer_memory_); + vkDestroyBuffer(device_, index_buffer_, nullptr); + vkDestroyBuffer(device_, vertex_buffer_, nullptr); + vkFreeMemory(device_, buffer_memory_, nullptr); + } + + VkBuffer vertex_buffer() const { return vertex_buffer_; } + VkBuffer index_buffer() const { return index_buffer_; } + + // Allocates space for data and copies it into the buffer. + // Returns the offset in the buffer of the data or VK_WHOLE_SIZE if the buffer + // is full. + VkDeviceSize Emplace(const void* source_data, size_t source_length) { + // TODO(benvanik): query actual alignment. + source_length = xe::round_up(source_length, 256); + + // Run down old fences to free up space. + + // Check to see if we have space. + // return VK_WHOLE_SIZE; + + // Compute new range and mark as in use. + if (current_offset_ + source_length > buffer_capacity_) { + // Wraps around. + current_offset_ = 0; + } + VkDeviceSize offset = current_offset_; + current_offset_ += source_length; + + // Copy data. + auto dest_ptr = reinterpret_cast(buffer_data_) + offset; + std::memcpy(dest_ptr, source_data, source_length); + + // Insert fence. + // TODO(benvanik): coarse-grained fences, these may be too fine. + + // Flush memory. + // TODO(benvanik): do only in large batches? can barrier it. + VkMappedMemoryRange dirty_range; + dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + dirty_range.pNext = nullptr; + dirty_range.memory = buffer_memory_; + dirty_range.offset = offset; + dirty_range.size = source_length; + vkFlushMappedMemoryRanges(device_, 1, &dirty_range); + return offset; + } + + private: + VkDevice device_ = nullptr; + + VkBuffer index_buffer_ = nullptr; + VkBuffer vertex_buffer_ = nullptr; + VkDeviceMemory buffer_memory_ = nullptr; + void* buffer_data_ = nullptr; + size_t buffer_capacity_ = 0; + size_t current_offset_ = 0; +}; + +class VulkanImmediateTexture : public ImmediateTexture { + public: + VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool, + VkDescriptorSetLayout descriptor_set_layout, + VkSampler sampler, uint32_t width, uint32_t height) + : ImmediateTexture(width, height), + device_(*device), + descriptor_pool_(descriptor_pool), + sampler_(sampler) { + handle = reinterpret_cast(this); + + // Create image object. + VkImageCreateInfo image_info; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = nullptr; + image_info.flags = 0; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_info.extent = {width, height, 1}; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_LINEAR; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + auto err = vkCreateImage(device_, &image_info, nullptr, &image_); + CheckResult(err, "vkCreateImage"); + + // Allocate memory for the image. + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(device_, image_, &memory_requirements); + device_memory_ = device->AllocateMemory( + memory_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + + // Bind memory and the image together. + err = vkBindImageMemory(device_, image_, device_memory_, 0); + CheckResult(err, "vkBindImageMemory"); + + // Create image view used by the shader. + VkImageViewCreateInfo view_info; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.pNext = nullptr; + view_info.flags = 0; + view_info.image = image_; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = VK_FORMAT_R8G8B8A8_UNORM; + view_info.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, + }; + view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + err = vkCreateImageView(device_, &view_info, nullptr, &image_view_); + CheckResult(err, "vkCreateImageView"); + + // Create descriptor set used just for this texture. + // It never changes, so we can reuse it and not worry with updates. + VkDescriptorSetAllocateInfo set_alloc_info; + set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_alloc_info.pNext = nullptr; + set_alloc_info.descriptorPool = descriptor_pool_; + set_alloc_info.descriptorSetCount = 1; + set_alloc_info.pSetLayouts = &descriptor_set_layout; + err = vkAllocateDescriptorSets(device_, &set_alloc_info, &descriptor_set_); + CheckResult(err, "vkAllocateDescriptorSets"); + + // Initialize descriptor with our texture. + VkDescriptorImageInfo texture_info; + texture_info.sampler = sampler_; + texture_info.imageView = image_view_; + texture_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + VkWriteDescriptorSet descriptor_write; + descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_write.pNext = nullptr; + descriptor_write.dstSet = descriptor_set_; + descriptor_write.dstBinding = 0; + descriptor_write.dstArrayElement = 0; + descriptor_write.descriptorCount = 1; + descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_write.pImageInfo = &texture_info; + vkUpdateDescriptorSets(device_, 1, &descriptor_write, 0, nullptr); + } + + ~VulkanImmediateTexture() override { + vkFreeDescriptorSets(device_, descriptor_pool_, 1, &descriptor_set_); + vkDestroyImageView(device_, image_view_, nullptr); + vkDestroyImage(device_, image_, nullptr); + vkFreeMemory(device_, device_memory_, nullptr); + } + + void Upload(const uint8_t* src_data) { + // TODO(benvanik): assert not in use? textures aren't dynamic right now. + + // Get device image layout. + VkImageSubresource subresource; + subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresource.mipLevel = 0; + subresource.arrayLayer = 0; + VkSubresourceLayout layout; + vkGetImageSubresourceLayout(device_, image_, &subresource, &layout); + + // Map memory for upload. + void* gpu_data = nullptr; + auto err = + vkMapMemory(device_, device_memory_, 0, layout.size, 0, &gpu_data); + CheckResult(err, "vkMapMemory"); + + // Copy the entire texture, hoping its layout matches what we expect. + std::memcpy(gpu_data, src_data, layout.size); + + vkUnmapMemory(device_, device_memory_); + } + + VkDescriptorSet descriptor_set() const { return descriptor_set_; } + + private: + VkDevice device_ = nullptr; + VkDescriptorPool descriptor_pool_ = nullptr; + VkSampler sampler_ = nullptr; // Not owned. + VkImage image_ = nullptr; + VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; + VkDeviceMemory device_memory_ = nullptr; + VkImageView image_view_ = nullptr; + VkDescriptorSet descriptor_set_ = nullptr; +}; + +VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context) + : ImmediateDrawer(graphics_context), context_(graphics_context) { + auto device = context_->device(); + + // NEAREST + CLAMP + VkSamplerCreateInfo sampler_info; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.pNext = nullptr; + sampler_info.magFilter = VK_FILTER_NEAREST; + sampler_info.minFilter = VK_FILTER_NEAREST; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_info.mipLodBias = 0.0f; + sampler_info.anisotropyEnable = VK_FALSE; + sampler_info.maxAnisotropy = 1; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.minLod = 0.0f; + sampler_info.maxLod = 0.0f; + sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + sampler_info.unnormalizedCoordinates = VK_FALSE; + auto err = vkCreateSampler(*device, &sampler_info, nullptr, + &samplers_.nearest_clamp); + CheckResult(err, "vkCreateSampler"); + + // NEAREST + REPEAT + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + err = vkCreateSampler(*device, &sampler_info, nullptr, + &samplers_.nearest_repeat); + CheckResult(err, "vkCreateSampler"); + + // LINEAR + CLAMP + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + err = + vkCreateSampler(*device, &sampler_info, nullptr, &samplers_.linear_clamp); + CheckResult(err, "vkCreateSampler"); + + // LINEAR + REPEAT + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + err = vkCreateSampler(*device, &sampler_info, nullptr, + &samplers_.linear_repeat); + CheckResult(err, "vkCreateSampler"); + + // Create the descriptor set layout used for our texture sampler. + // As it changes almost every draw we keep it separate from the uniform buffer + // and cache it on the textures. + VkDescriptorSetLayoutCreateInfo texture_set_layout_info; + texture_set_layout_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + texture_set_layout_info.pNext = nullptr; + texture_set_layout_info.flags = 0; + texture_set_layout_info.bindingCount = 1; + VkDescriptorSetLayoutBinding texture_binding; + texture_binding.binding = 0; + texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + texture_binding.descriptorCount = 1; + texture_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + texture_binding.pImmutableSamplers = nullptr; + texture_set_layout_info.pBindings = &texture_binding; + err = vkCreateDescriptorSetLayout(*device, &texture_set_layout_info, nullptr, + &texture_set_layout_); + CheckResult(err, "vkCreateDescriptorSetLayout"); + + // Descriptor pool used for all of our cached descriptors. + // In the steady state we don't allocate anything, so these are all manually + // managed. + VkDescriptorPoolCreateInfo descriptor_pool_info; + descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_info.pNext = nullptr; + descriptor_pool_info.flags = + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + descriptor_pool_info.maxSets = 128; + VkDescriptorPoolSize pool_sizes[1]; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + pool_sizes[0].descriptorCount = 128; + descriptor_pool_info.poolSizeCount = 1; + descriptor_pool_info.pPoolSizes = pool_sizes; + err = vkCreateDescriptorPool(*device, &descriptor_pool_info, nullptr, + &descriptor_pool_); + CheckResult(err, "vkCreateDescriptorPool"); + + // Create the pipeline layout used for our pipeline. + // If we had multiple pipelines they would share this. + VkPipelineLayoutCreateInfo pipeline_layout_info; + pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_info.pNext = nullptr; + pipeline_layout_info.flags = 0; + VkDescriptorSetLayout set_layouts[] = {texture_set_layout_}; + pipeline_layout_info.setLayoutCount = + static_cast(xe::countof(set_layouts)); + pipeline_layout_info.pSetLayouts = set_layouts; + VkPushConstantRange push_constant_ranges[2]; + push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + push_constant_ranges[0].offset = 0; + push_constant_ranges[0].size = sizeof(float) * 16; + push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + push_constant_ranges[1].offset = sizeof(float) * 16; + push_constant_ranges[1].size = sizeof(int); + pipeline_layout_info.pushConstantRangeCount = + static_cast(xe::countof(push_constant_ranges)); + pipeline_layout_info.pPushConstantRanges = push_constant_ranges; + err = vkCreatePipelineLayout(*device, &pipeline_layout_info, nullptr, + &pipeline_layout_); + CheckResult(err, "vkCreatePipelineLayout"); + + // Vertex and fragment shaders. + VkShaderModuleCreateInfo vertex_shader_info; + vertex_shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + vertex_shader_info.pNext = nullptr; + vertex_shader_info.flags = 0; + vertex_shader_info.codeSize = sizeof(immediate_vert_spv); + vertex_shader_info.pCode = + reinterpret_cast(immediate_vert_spv); + VkShaderModule vertex_shader; + err = vkCreateShaderModule(*device, &vertex_shader_info, nullptr, + &vertex_shader); + CheckResult(err, "vkCreateShaderModule"); + VkShaderModuleCreateInfo fragment_shader_info; + fragment_shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + fragment_shader_info.pNext = nullptr; + fragment_shader_info.flags = 0; + fragment_shader_info.codeSize = sizeof(immediate_frag_spv); + fragment_shader_info.pCode = + reinterpret_cast(immediate_frag_spv); + VkShaderModule fragment_shader; + err = vkCreateShaderModule(*device, &fragment_shader_info, nullptr, + &fragment_shader); + CheckResult(err, "vkCreateShaderModule"); + + // Pipeline used when rendering triangles. + VkGraphicsPipelineCreateInfo pipeline_info; + pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_info.pNext = nullptr; + pipeline_info.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; + VkPipelineShaderStageCreateInfo pipeline_stages[2]; + pipeline_stages[0].sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pipeline_stages[0].pNext = nullptr; + pipeline_stages[0].flags = 0; + pipeline_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + pipeline_stages[0].module = vertex_shader; + pipeline_stages[0].pName = "main"; + pipeline_stages[0].pSpecializationInfo = nullptr; + pipeline_stages[1].sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pipeline_stages[1].pNext = nullptr; + pipeline_stages[1].flags = 0; + pipeline_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + pipeline_stages[1].module = fragment_shader; + pipeline_stages[1].pName = "main"; + pipeline_stages[1].pSpecializationInfo = nullptr; + pipeline_info.stageCount = 2; + pipeline_info.pStages = pipeline_stages; + VkPipelineVertexInputStateCreateInfo vertex_state_info; + vertex_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_state_info.pNext = nullptr; + VkVertexInputBindingDescription vertex_binding_descrs[1]; + vertex_binding_descrs[0].binding = 0; + vertex_binding_descrs[0].stride = sizeof(ImmediateVertex); + vertex_binding_descrs[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + vertex_state_info.vertexBindingDescriptionCount = + static_cast(xe::countof(vertex_binding_descrs)); + vertex_state_info.pVertexBindingDescriptions = vertex_binding_descrs; + VkVertexInputAttributeDescription vertex_attrib_descrs[3]; + vertex_attrib_descrs[0].location = 0; + vertex_attrib_descrs[0].binding = 0; + vertex_attrib_descrs[0].format = VK_FORMAT_R32G32_SFLOAT; + vertex_attrib_descrs[0].offset = offsetof(ImmediateVertex, x); + vertex_attrib_descrs[1].location = 1; + vertex_attrib_descrs[1].binding = 0; + vertex_attrib_descrs[1].format = VK_FORMAT_R32G32_SFLOAT; + vertex_attrib_descrs[1].offset = offsetof(ImmediateVertex, u); + vertex_attrib_descrs[2].location = 2; + vertex_attrib_descrs[2].binding = 0; + vertex_attrib_descrs[2].format = VK_FORMAT_R8G8B8A8_UNORM; + vertex_attrib_descrs[2].offset = offsetof(ImmediateVertex, color); + vertex_state_info.vertexAttributeDescriptionCount = + static_cast(xe::countof(vertex_attrib_descrs)); + vertex_state_info.pVertexAttributeDescriptions = vertex_attrib_descrs; + pipeline_info.pVertexInputState = &vertex_state_info; + VkPipelineInputAssemblyStateCreateInfo input_info; + input_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_info.pNext = nullptr; + input_info.flags = 0; + input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + input_info.primitiveRestartEnable = VK_FALSE; + pipeline_info.pInputAssemblyState = &input_info; + pipeline_info.pTessellationState = nullptr; + VkPipelineViewportStateCreateInfo viewport_state_info; + viewport_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state_info.pNext = nullptr; + viewport_state_info.flags = 0; + viewport_state_info.viewportCount = 1; + viewport_state_info.pViewports = nullptr; + viewport_state_info.scissorCount = 1; + viewport_state_info.pScissors = nullptr; + pipeline_info.pViewportState = &viewport_state_info; + VkPipelineRasterizationStateCreateInfo rasterization_info; + rasterization_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_info.pNext = nullptr; + rasterization_info.flags = 0; + rasterization_info.depthClampEnable = VK_FALSE; + rasterization_info.rasterizerDiscardEnable = VK_FALSE; + rasterization_info.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_info.cullMode = VK_CULL_MODE_BACK_BIT; + rasterization_info.frontFace = VK_FRONT_FACE_CLOCKWISE; + rasterization_info.depthBiasEnable = VK_FALSE; + rasterization_info.depthBiasConstantFactor = 0; + rasterization_info.depthBiasClamp = 0; + rasterization_info.depthBiasSlopeFactor = 0; + rasterization_info.lineWidth = 1.0f; + pipeline_info.pRasterizationState = &rasterization_info; + VkPipelineMultisampleStateCreateInfo multisample_info; + multisample_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_info.pNext = nullptr; + multisample_info.flags = 0; + multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisample_info.sampleShadingEnable = VK_FALSE; + multisample_info.minSampleShading = 0; + multisample_info.pSampleMask = nullptr; + multisample_info.alphaToCoverageEnable = VK_FALSE; + multisample_info.alphaToOneEnable = VK_FALSE; + pipeline_info.pMultisampleState = &multisample_info; + pipeline_info.pDepthStencilState = nullptr; + VkPipelineColorBlendStateCreateInfo blend_info; + blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + blend_info.pNext = nullptr; + blend_info.flags = 0; + blend_info.logicOpEnable = VK_FALSE; + blend_info.logicOp = VK_LOGIC_OP_NO_OP; + VkPipelineColorBlendAttachmentState blend_attachments[1]; + blend_attachments[0].blendEnable = VK_TRUE; + blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + blend_attachments[0].dstColorBlendFactor = + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD; + blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + blend_attachments[0].dstAlphaBlendFactor = + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD; + blend_attachments[0].colorWriteMask = 0xF; + blend_info.attachmentCount = + static_cast(xe::countof(blend_attachments)); + blend_info.pAttachments = blend_attachments; + std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants)); + pipeline_info.pColorBlendState = &blend_info; + VkPipelineDynamicStateCreateInfo dynamic_state_info; + dynamic_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_info.pNext = nullptr; + dynamic_state_info.flags = 0; + VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + }; + dynamic_state_info.dynamicStateCount = + static_cast(xe::countof(dynamic_states)); + dynamic_state_info.pDynamicStates = dynamic_states; + pipeline_info.pDynamicState = &dynamic_state_info; + pipeline_info.layout = pipeline_layout_; + pipeline_info.renderPass = context_->swap_chain()->render_pass(); + pipeline_info.subpass = 0; + pipeline_info.basePipelineHandle = nullptr; + pipeline_info.basePipelineIndex = 0; + err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr, + &triangle_pipeline_); + CheckResult(err, "vkCreateGraphicsPipelines"); + + // Silly, but let's make a pipeline just for drawing lines. + pipeline_info.flags = VK_PIPELINE_CREATE_DERIVATIVE_BIT; + input_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + pipeline_info.basePipelineHandle = triangle_pipeline_; + pipeline_info.basePipelineIndex = 0; + err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr, + &line_pipeline_); + CheckResult(err, "vkCreateGraphicsPipelines"); + + vkDestroyShaderModule(*device, vertex_shader, nullptr); + vkDestroyShaderModule(*device, fragment_shader, nullptr); + + // Allocate the buffer we'll use for our vertex and index data. + circular_buffer_ = std::make_unique(device); +} + +VulkanImmediateDrawer::~VulkanImmediateDrawer() { + auto device = context_->device(); + + circular_buffer_.reset(); + + vkDestroyPipeline(*device, line_pipeline_, nullptr); + vkDestroyPipeline(*device, triangle_pipeline_, nullptr); + vkDestroyPipelineLayout(*device, pipeline_layout_, nullptr); + + vkDestroyDescriptorPool(*device, descriptor_pool_, nullptr); + vkDestroyDescriptorSetLayout(*device, texture_set_layout_, nullptr); + + vkDestroySampler(*device, samplers_.nearest_clamp, nullptr); + vkDestroySampler(*device, samplers_.nearest_repeat, nullptr); + vkDestroySampler(*device, samplers_.linear_clamp, nullptr); + vkDestroySampler(*device, samplers_.linear_repeat, nullptr); +} + +std::unique_ptr VulkanImmediateDrawer::CreateTexture( + uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool repeat, + const uint8_t* data) { + auto device = context_->device(); + + VkSampler sampler = nullptr; + switch (filter) { + case ImmediateTextureFilter::kNearest: + sampler = repeat ? samplers_.nearest_repeat : samplers_.nearest_clamp; + break; + case ImmediateTextureFilter::kLinear: + sampler = repeat ? samplers_.linear_repeat : samplers_.linear_clamp; + break; + default: + assert_unhandled_case(filter); + sampler = samplers_.nearest_clamp; + break; + } + + auto texture = std::make_unique( + device, descriptor_pool_, texture_set_layout_, sampler, width, height); + if (data) { + UpdateTexture(texture.get(), data); + } + return std::unique_ptr(texture.release()); +} + +void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture, + const uint8_t* data) { + static_cast(texture)->Upload(data); +} + +void VulkanImmediateDrawer::Begin(int render_target_width, + int render_target_height) { + auto device = context_->device(); + auto swap_chain = context_->swap_chain(); + assert_null(current_cmd_buffer_); + current_cmd_buffer_ = swap_chain->render_cmd_buffer(); + + // Viewport changes only once per batch. + VkViewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(render_target_width); + viewport.height = static_cast(render_target_height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + vkCmdSetViewport(current_cmd_buffer_, 0, 1, &viewport); + + // Update projection matrix. + const float ortho_projection[4][4] = { + {2.0f / render_target_width, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.0f / -render_target_height, 0.0f, 0.0f}, + {0.0f, 0.0f, -1.0f, 0.0f}, + {-1.0f, 1.0f, 0.0f, 1.0f}, + }; + vkCmdPushConstants(current_cmd_buffer_, pipeline_layout_, + VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(float) * 16, + ortho_projection); +} + +void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { + auto device = context_->device(); + + // Upload vertices. + VkDeviceSize vertices_offset = circular_buffer_->Emplace( + batch.vertices, batch.vertex_count * sizeof(ImmediateVertex)); + if (vertices_offset == VK_WHOLE_SIZE) { + // TODO(benvanik): die? + return; + } + auto vertex_buffer = circular_buffer_->vertex_buffer(); + vkCmdBindVertexBuffers(current_cmd_buffer_, 0, 1, &vertex_buffer, + &vertices_offset); + + // Upload indices. + if (batch.indices) { + VkDeviceSize indices_offset = circular_buffer_->Emplace( + batch.indices, batch.index_count * sizeof(uint16_t)); + if (indices_offset == VK_WHOLE_SIZE) { + // TODO(benvanik): die? + return; + } + vkCmdBindIndexBuffer(current_cmd_buffer_, circular_buffer_->index_buffer(), + indices_offset, VK_INDEX_TYPE_UINT16); + } + + batch_has_index_buffer_ = !!batch.indices; +} + +void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { + auto swap_chain = context_->swap_chain(); + + if (draw.primitive_type != ImmediatePrimitiveType::kTriangles) { + return; + } + switch (draw.primitive_type) { + case ImmediatePrimitiveType::kLines: + vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, + line_pipeline_); + break; + case ImmediatePrimitiveType::kTriangles: + vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, + triangle_pipeline_); + break; + } + + // Setup texture binding. + VkDescriptorSet texture_set = nullptr; + auto texture = reinterpret_cast(draw.texture_handle); + if (texture) { + texture_set = texture->descriptor_set(); + } + vkCmdBindDescriptorSets(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout_, 0, 1, &texture_set, 0, nullptr); + + // Use push constants for our per-draw changes. + // Here, the restrict_texture_samples uniform. + int restrict_texture_samples = draw.restrict_texture_samples ? 1 : 0; + vkCmdPushConstants(current_cmd_buffer_, pipeline_layout_, + VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 16, + sizeof(int), &restrict_texture_samples); + + // Scissor, if enabled. + // Scissor can be disabled by making it the full screen. + VkRect2D scissor; + if (draw.scissor) { + scissor.offset.x = draw.scissor_rect[0]; + scissor.offset.y = swap_chain->surface_height() - + (draw.scissor_rect[1] + draw.scissor_rect[3]); + scissor.extent.width = draw.scissor_rect[2]; + scissor.extent.height = draw.scissor_rect[3]; + } else { + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = swap_chain->surface_width(); + scissor.extent.height = swap_chain->surface_height(); + } + vkCmdSetScissor(current_cmd_buffer_, 0, 1, &scissor); + + // Issue draw. + if (batch_has_index_buffer_) { + vkCmdDrawIndexed(current_cmd_buffer_, draw.count, 1, draw.index_offset, + draw.base_vertex, 0); + } else { + vkCmdDraw(current_cmd_buffer_, draw.count, 1, draw.base_vertex, 0); + } +} + +void VulkanImmediateDrawer::EndDrawBatch() {} + +void VulkanImmediateDrawer::End() { current_cmd_buffer_ = nullptr; } + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h new file mode 100644 index 000000000..004804e66 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -0,0 +1,69 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ +#define XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ + +#include + +#include "xenia/ui/immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class LightweightCircularBuffer; +class VulkanContext; + +class VulkanImmediateDrawer : public ImmediateDrawer { + public: + VulkanImmediateDrawer(VulkanContext* graphics_context); + ~VulkanImmediateDrawer() override; + + std::unique_ptr CreateTexture(uint32_t width, + uint32_t height, + ImmediateTextureFilter filter, + bool repeat, + const uint8_t* data) override; + void UpdateTexture(ImmediateTexture* texture, const uint8_t* data) override; + + void Begin(int render_target_width, int render_target_height) override; + void BeginDrawBatch(const ImmediateDrawBatch& batch) override; + void Draw(const ImmediateDraw& draw) override; + void EndDrawBatch() override; + void End() override; + + private: + VulkanContext* context_ = nullptr; + + struct { + VkSampler nearest_clamp = nullptr; + VkSampler nearest_repeat = nullptr; + VkSampler linear_clamp = nullptr; + VkSampler linear_repeat = nullptr; + } samplers_; + + VkDescriptorSetLayout texture_set_layout_ = nullptr; + VkDescriptorPool descriptor_pool_ = nullptr; + VkPipelineLayout pipeline_layout_ = nullptr; + VkPipeline triangle_pipeline_ = nullptr; + VkPipeline line_pipeline_ = nullptr; + + std::unique_ptr circular_buffer_; + + bool batch_has_index_buffer_ = false; + VkCommandBuffer current_cmd_buffer_ = nullptr; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ diff --git a/src/xenia/ui/vulkan/vulkan_instance.cc b/src/xenia/ui/vulkan/vulkan_instance.cc new file mode 100644 index 000000000..900bf66e1 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_instance.cc @@ -0,0 +1,486 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_instance.h" + +#include + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/ui/window.h" + +namespace xe { +namespace ui { +namespace vulkan { + +VulkanInstance::VulkanInstance() { + if (FLAGS_vulkan_validation) { + // DeclareRequiredLayer("VK_LAYER_GOOGLE_unique_objects", Version::Make(0, + // 0, 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_threading", Version::Make(0, 0, 0), + true); + // DeclareRequiredLayer("VK_LAYER_LUNARG_mem_tracker", Version::Make(0, 0, + // 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_object_tracker", + Version::Make(0, 0, 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_draw_state", Version::Make(0, 0, 0), + true); + DeclareRequiredLayer("VK_LAYER_LUNARG_param_checker", + Version::Make(0, 0, 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_swapchain", Version::Make(0, 0, 0), + true); + DeclareRequiredLayer("VK_LAYER_LUNARG_device_limits", + Version::Make(0, 0, 0), true); + DeclareRequiredLayer("VK_LAYER_LUNARG_image", Version::Make(0, 0, 0), true); + DeclareRequiredExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, + Version::Make(0, 0, 0), true); + } +} + +VulkanInstance::~VulkanInstance() { DestroyInstance(); } + +bool VulkanInstance::Initialize(Window* any_target_window) { + auto version = Version::Parse(VK_API_VERSION); + XELOGVK("Initializing Vulkan %s...", version.pretty_string.c_str()); + + // Get all of the global layers and extensions provided by the system. + if (!QueryGlobals()) { + XELOGE("Failed to query instance globals"); + return false; + } + + // Create the vulkan instance used by the application with our required + // extensions and layers. + if (!CreateInstance()) { + XELOGE("Failed to create instance"); + return false; + } + + // Query available devices so that we can pick one. + if (!QueryDevices(any_target_window)) { + XELOGE("Failed to query devices"); + return false; + } + + XELOGVK("Instance initialized successfully!"); + return true; +} + +bool VulkanInstance::QueryGlobals() { + // Scan global layers and accumulate properties. + // We do this in a loop so that we can allocate the required amount of + // memory and handle race conditions while querying. + uint32_t count = 0; + std::vector global_layer_properties; + VkResult err; + do { + err = vkEnumerateInstanceLayerProperties(&count, nullptr); + CheckResult(err, "vkEnumerateInstanceLayerProperties"); + global_layer_properties.resize(count); + err = vkEnumerateInstanceLayerProperties(&count, + global_layer_properties.data()); + } while (err == VK_INCOMPLETE); + CheckResult(err, "vkEnumerateInstanceLayerProperties"); + global_layers_.resize(count); + for (size_t i = 0; i < global_layers_.size(); ++i) { + auto& global_layer = global_layers_[i]; + global_layer.properties = global_layer_properties[i]; + + // Get all extensions available for the layer. + do { + err = vkEnumerateInstanceExtensionProperties( + global_layer.properties.layerName, &count, nullptr); + CheckResult(err, "vkEnumerateInstanceExtensionProperties"); + global_layer.extensions.resize(count); + err = vkEnumerateInstanceExtensionProperties( + global_layer.properties.layerName, &count, + global_layer.extensions.data()); + } while (err == VK_INCOMPLETE); + CheckResult(err, "vkEnumerateInstanceExtensionProperties"); + } + XELOGVK("Found %d global layers:", global_layers_.size()); + for (size_t i = 0; i < global_layers_.size(); ++i) { + auto& global_layer = global_layers_[i]; + auto spec_version = Version::Parse(global_layer.properties.specVersion); + auto impl_version = + Version::Parse(global_layer.properties.implementationVersion); + XELOGVK("- %s (spec: %s, impl: %s)", global_layer.properties.layerName, + spec_version.pretty_string.c_str(), + impl_version.pretty_string.c_str()); + XELOGVK(" %s", global_layer.properties.description); + if (!global_layer.extensions.empty()) { + XELOGVK(" %d extensions:", global_layer.extensions.size()); + DumpExtensions(global_layer.extensions, " "); + } + } + + // Scan global extensions. + do { + err = vkEnumerateInstanceExtensionProperties(nullptr, &count, nullptr); + CheckResult(err, "vkEnumerateInstanceExtensionProperties"); + global_extensions_.resize(count); + err = vkEnumerateInstanceExtensionProperties(nullptr, &count, + global_extensions_.data()); + } while (err == VK_INCOMPLETE); + CheckResult(err, "vkEnumerateInstanceExtensionProperties"); + XELOGVK("Found %d global extensions:", global_extensions_.size()); + DumpExtensions(global_extensions_, ""); + + return true; +} + +bool VulkanInstance::CreateInstance() { + XELOGVK("Verifying layers and extensions..."); + + // Gather list of enabled layer names. + auto layers_result = CheckRequirements(required_layers_, global_layers_); + auto& enabled_layers = layers_result.second; + + // Gather list of enabled extension names. + auto extensions_result = + CheckRequirements(required_extensions_, global_extensions_); + auto& enabled_extensions = extensions_result.second; + + // We wait until both extensions and layers are checked before failing out so + // that the user gets a complete list of what they have/don't. + if (!extensions_result.first || !layers_result.first) { + XELOGE("Layer and extension verification failed; aborting initialization"); + return false; + } + + XELOGVK("Initializing application instance..."); + + // TODO(benvanik): use GetEntryInfo? + VkApplicationInfo application_info; + application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + application_info.pNext = nullptr; + application_info.pApplicationName = "xenia"; + application_info.applicationVersion = 1; + application_info.pEngineName = "xenia"; + application_info.engineVersion = 1; + application_info.apiVersion = VK_API_VERSION; + + VkInstanceCreateInfo instance_info; + instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_info.pNext = nullptr; + instance_info.flags = 0; + instance_info.pApplicationInfo = &application_info; + instance_info.enabledLayerCount = + static_cast(enabled_layers.size()); + instance_info.ppEnabledLayerNames = enabled_layers.data(); + instance_info.enabledExtensionCount = + static_cast(enabled_extensions.size()); + instance_info.ppEnabledExtensionNames = enabled_extensions.data(); + + auto err = vkCreateInstance(&instance_info, nullptr, &handle); + switch (err) { + case VK_SUCCESS: + // Ok! + break; + case VK_ERROR_INITIALIZATION_FAILED: + XELOGE("Instance initialization failed; generic"); + return false; + case VK_ERROR_INCOMPATIBLE_DRIVER: + XELOGE( + "Instance initialization failed; cannot find a compatible Vulkan " + "installable client driver (ICD)"); + return false; + case VK_ERROR_EXTENSION_NOT_PRESENT: + XELOGE("Instance initialization failed; requested extension not present"); + return false; + case VK_ERROR_LAYER_NOT_PRESENT: + XELOGE("Instance initialization failed; requested layer not present"); + return false; + default: + XELOGE("Instance initialization failed; unknown: %s", to_string(err)); + return false; + } + + // Enable debug validation, if needed. + EnableDebugValidation(); + + return true; +} + +void VulkanInstance::DestroyInstance() { + if (!handle) { + return; + } + DisableDebugValidation(); + vkDestroyInstance(handle, nullptr); + handle = nullptr; +} + +VkBool32 VKAPI_PTR DebugMessageCallback(VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char* pMessage, void* pUserData) { + auto instance = reinterpret_cast(pUserData); + const char* message_type = "UNKNOWN"; + if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) { + message_type = "ERROR"; + } else if (flags & VK_DEBUG_REPORT_WARNING_BIT_EXT) { + message_type = "WARN"; + } else if (flags & VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT) { + message_type = "PERF WARN"; + } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) { + message_type = "INFO"; + } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) { + message_type = "DEBUG"; + } + XELOGVK("[%s/%s:%d] %s", pLayerPrefix, message_type, messageCode, pMessage); + return false; +} + +void VulkanInstance::EnableDebugValidation() { + if (dbg_report_callback_) { + DisableDebugValidation(); + } + auto vk_create_debug_report_callback_ext = + reinterpret_cast( + vkGetInstanceProcAddr(handle, "vkCreateDebugReportCallbackEXT")); + if (!vk_create_debug_report_callback_ext) { + XELOGVK("Debug validation layer not installed; ignoring"); + return; + } + VkDebugReportCallbackCreateInfoEXT create_info; + create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT; + create_info.pNext = nullptr; + // TODO(benvanik): flags to set these. + create_info.flags = + VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | + VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | + VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_DEBUG_BIT_EXT; + create_info.pfnCallback = &DebugMessageCallback; + create_info.pUserData = this; + auto err = vk_create_debug_report_callback_ext(handle, &create_info, nullptr, + &dbg_report_callback_); + CheckResult(err, "vkCreateDebugReportCallbackEXT"); + XELOGVK("Debug validation layer enabled"); +} + +void VulkanInstance::DisableDebugValidation() { + if (!dbg_report_callback_) { + return; + } + auto vk_destroy_debug_report_callback_ext = + reinterpret_cast( + vkGetInstanceProcAddr(handle, "vkDestroyDebugReportCallbackEXT")); + if (!vk_destroy_debug_report_callback_ext) { + return; + } + vk_destroy_debug_report_callback_ext(handle, dbg_report_callback_, nullptr); + dbg_report_callback_ = nullptr; +} + +bool VulkanInstance::QueryDevices(Window* any_target_window) { + // Get handles to all devices. + uint32_t count = 0; + std::vector device_handles; + auto err = vkEnumeratePhysicalDevices(handle, &count, nullptr); + CheckResult(err, "vkEnumeratePhysicalDevices"); + device_handles.resize(count); + err = vkEnumeratePhysicalDevices(handle, &count, device_handles.data()); + CheckResult(err, "vkEnumeratePhysicalDevices"); + + // Query device info. + for (size_t i = 0; i < device_handles.size(); ++i) { + auto device_handle = device_handles[i]; + DeviceInfo device_info; + device_info.handle = device_handle; + + // Query general attributes. + vkGetPhysicalDeviceProperties(device_handle, &device_info.properties); + vkGetPhysicalDeviceFeatures(device_handle, &device_info.features); + vkGetPhysicalDeviceMemoryProperties(device_handle, + &device_info.memory_properties); + + // Gather queue family properties. + vkGetPhysicalDeviceQueueFamilyProperties(device_handle, &count, nullptr); + device_info.queue_family_properties.resize(count); + vkGetPhysicalDeviceQueueFamilyProperties( + device_handle, &count, device_info.queue_family_properties.data()); + + // Gather queue family presentation support. + // TODO(benvanik): move to swap chain? + VkSurfaceKHR any_surface = nullptr; +#if XE_PLATFORM_WIN32 + VkWin32SurfaceCreateInfoKHR create_info; + create_info.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; + create_info.pNext = nullptr; + create_info.flags = 0; + create_info.hinstance = + static_cast(any_target_window->native_platform_handle()); + create_info.hwnd = static_cast(any_target_window->native_handle()); + err = vkCreateWin32SurfaceKHR(handle, &create_info, nullptr, &any_surface); + CheckResult(err, "vkCreateWin32SurfaceKHR"); +#else +#error Platform not yet implemented. +#endif // XE_PLATFORM_WIN32 + device_info.queue_family_supports_present.resize( + device_info.queue_family_properties.size()); + for (size_t j = 0; j < device_info.queue_family_supports_present.size(); + ++j) { + err = vkGetPhysicalDeviceSurfaceSupportKHR( + device_handle, static_cast(j), any_surface, + &device_info.queue_family_supports_present[j]); + CheckResult(err, "vkGetPhysicalDeviceSurfaceSupportKHR"); + } + vkDestroySurfaceKHR(handle, any_surface, nullptr); + + // Gather layers. + std::vector layer_properties; + err = vkEnumerateDeviceLayerProperties(device_handle, &count, nullptr); + CheckResult(err, "vkEnumerateDeviceLayerProperties"); + layer_properties.resize(count); + err = vkEnumerateDeviceLayerProperties(device_handle, &count, + layer_properties.data()); + CheckResult(err, "vkEnumerateDeviceLayerProperties"); + for (size_t j = 0; j < layer_properties.size(); ++j) { + LayerInfo layer_info; + layer_info.properties = layer_properties[j]; + err = vkEnumerateDeviceExtensionProperties( + device_handle, layer_info.properties.layerName, &count, nullptr); + CheckResult(err, "vkEnumerateDeviceExtensionProperties"); + layer_info.extensions.resize(count); + err = vkEnumerateDeviceExtensionProperties( + device_handle, layer_info.properties.layerName, &count, + layer_info.extensions.data()); + CheckResult(err, "vkEnumerateDeviceExtensionProperties"); + device_info.layers.push_back(std::move(layer_info)); + } + + // Gather extensions. + err = vkEnumerateDeviceExtensionProperties(device_handle, nullptr, &count, + nullptr); + CheckResult(err, "vkEnumerateDeviceExtensionProperties"); + device_info.extensions.resize(count); + err = vkEnumerateDeviceExtensionProperties(device_handle, nullptr, &count, + device_info.extensions.data()); + CheckResult(err, "vkEnumerateDeviceExtensionProperties"); + + available_devices_.push_back(std::move(device_info)); + } + + XELOGVK("Found %d physical devices:", available_devices_.size()); + for (size_t i = 0; i < available_devices_.size(); ++i) { + auto& device_info = available_devices_[i]; + XELOGVK("- Device %d:", i); + DumpDeviceInfo(device_info); + } + + return true; +} + +void VulkanInstance::DumpLayers(const std::vector& layers, + const char* indent) { + for (size_t i = 0; i < layers.size(); ++i) { + auto& layer = layers[i]; + auto spec_version = Version::Parse(layer.properties.specVersion); + auto impl_version = Version::Parse(layer.properties.implementationVersion); + XELOGVK("%s- %s (spec: %s, impl: %s)", indent, layer.properties.layerName, + spec_version.pretty_string.c_str(), + impl_version.pretty_string.c_str()); + XELOGVK("%s %s", indent, layer.properties.description); + if (!layer.extensions.empty()) { + XELOGVK("%s %d extensions:", indent, layer.extensions.size()); + DumpExtensions(layer.extensions, std::strlen(indent) ? " " : " "); + } + } +} + +void VulkanInstance::DumpExtensions( + const std::vector& extensions, const char* indent) { + for (size_t i = 0; i < extensions.size(); ++i) { + auto& extension = extensions[i]; + auto version = Version::Parse(extension.specVersion); + XELOGVK("%s- %s (%s)", indent, extension.extensionName, + version.pretty_string.c_str()); + } +} + +void VulkanInstance::DumpDeviceInfo(const DeviceInfo& device_info) { + auto& properties = device_info.properties; + auto api_version = Version::Parse(properties.apiVersion); + auto driver_version = Version::Parse(properties.driverVersion); + XELOGVK(" apiVersion = %s", api_version.pretty_string.c_str()); + XELOGVK(" driverVersion = %s", driver_version.pretty_string.c_str()); + XELOGVK(" vendorId = 0x%04x", properties.vendorID); + XELOGVK(" deviceId = 0x%04x", properties.deviceID); + XELOGVK(" deviceType = %s", to_string(properties.deviceType)); + XELOGVK(" deviceName = %s", properties.deviceName); + + auto& memory_props = device_info.memory_properties; + XELOGVK(" Memory Heaps:"); + for (size_t j = 0; j < memory_props.memoryHeapCount; ++j) { + XELOGVK(" - Heap %u: %" PRIu64 " bytes", j, + memory_props.memoryHeaps[j].size); + for (size_t k = 0; k < memory_props.memoryTypeCount; ++k) { + if (memory_props.memoryTypes[k].heapIndex == j) { + XELOGVK(" - Type %u:", k); + auto type_flags = memory_props.memoryTypes[k].propertyFlags; + if (!type_flags) { + XELOGVK(" VK_MEMORY_PROPERTY_DEVICE_ONLY"); + } + if (type_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + XELOGVK(" VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT"); + } + if (type_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + XELOGVK(" VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT"); + } + if (type_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { + XELOGVK(" VK_MEMORY_PROPERTY_HOST_COHERENT_BIT"); + } + if (type_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + XELOGVK(" VK_MEMORY_PROPERTY_HOST_CACHED_BIT"); + } + if (type_flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) { + XELOGVK(" VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT"); + } + } + } + } + + XELOGVK(" Queue Families:"); + for (size_t j = 0; j < device_info.queue_family_properties.size(); ++j) { + auto& queue_props = device_info.queue_family_properties[j]; + XELOGVK(" - Queue %d:", j); + XELOGVK( + " queueFlags = %s, %s, %s, %s", + (queue_props.queueFlags & VK_QUEUE_GRAPHICS_BIT) ? "graphics" : "", + (queue_props.queueFlags & VK_QUEUE_COMPUTE_BIT) ? "compute" : "", + (queue_props.queueFlags & VK_QUEUE_TRANSFER_BIT) ? "transfer" : "", + (queue_props.queueFlags & VK_QUEUE_SPARSE_BINDING_BIT) ? "sparse" : ""); + XELOGVK(" queueCount = %u", queue_props.queueCount); + XELOGVK(" timestampValidBits = %u", queue_props.timestampValidBits); + XELOGVK(" supportsPresent = %s", + device_info.queue_family_supports_present[j] ? "true" : "false"); + } + + XELOGVK(" Layers:"); + DumpLayers(device_info.layers, " "); + + XELOGVK(" Extensions:"); + DumpExtensions(device_info.extensions, " "); +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_instance.h b/src/xenia/ui/vulkan/vulkan_instance.h new file mode 100644 index 000000000..c292f3020 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_instance.h @@ -0,0 +1,95 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_INSTANCE_H_ +#define XENIA_UI_VULKAN_VULKAN_INSTANCE_H_ + +#include +#include +#include + +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/ui/window.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// Wrappers and utilities for VkInstance. +class VulkanInstance { + public: + VulkanInstance(); + ~VulkanInstance(); + + VkInstance handle = nullptr; + + operator VkInstance() const { return handle; } + + // Declares a layer to verify and enable upon initialization. + // Must be called before Initialize. + void DeclareRequiredLayer(std::string name, uint32_t min_version, + bool is_optional) { + required_layers_.push_back({name, min_version, is_optional}); + } + + // Declares an extension to verify and enable upon initialization. + // Must be called before Initialize. + void DeclareRequiredExtension(std::string name, uint32_t min_version, + bool is_optional) { + required_extensions_.push_back({name, min_version, is_optional}); + } + + // Initializes the instance, querying and enabling extensions and layers and + // preparing the instance for general use. + // If initialization succeeds it's likely that no more failures beyond runtime + // issues will occur. + // TODO(benvanik): remove need for any_target_window - it's just for queries. + bool Initialize(Window* any_target_window); + + // Returns a list of all available devices as detected during initialization. + const std::vector& available_devices() const { + return available_devices_; + } + + private: + // Queries the system to find global extensions and layers. + bool QueryGlobals(); + + // Creates the instance, enabling required extensions and layers. + bool CreateInstance(); + void DestroyInstance(); + + // Enables debugging info and callbacks for supported layers. + void EnableDebugValidation(); + void DisableDebugValidation(); + + // Queries all available physical devices. + bool QueryDevices(Window* any_target_window); + + void DumpLayers(const std::vector& layers, const char* indent); + void DumpExtensions(const std::vector& extensions, + const char* indent); + void DumpDeviceInfo(const DeviceInfo& device_info); + + std::vector required_layers_; + std::vector required_extensions_; + + std::vector global_layers_; + std::vector global_extensions_; + std::vector available_devices_; + + VkDebugReportCallbackEXT dbg_report_callback_ = nullptr; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_INSTANCE_H_ diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc new file mode 100644 index 000000000..300604bfb --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -0,0 +1,107 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_provider.h" + +#include + +#include + +#include "xenia/base/logging.h" +#include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_device.h" +#include "xenia/ui/vulkan/vulkan_instance.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +DEFINE_uint64(vulkan_device_index, 0, "Index of the physical device to use."); + +namespace xe { +namespace ui { +namespace vulkan { + +std::unique_ptr VulkanProvider::Create(Window* main_window) { + std::unique_ptr provider(new VulkanProvider(main_window)); + if (!provider->Initialize()) { + xe::FatalError( + "Unable to initialize Vulkan graphics subsystem.\n" + "Ensure you have the latest drivers for your GPU and that it " + "supports Vulkan. See http://xenia.jp/faq/ for more information and a " + "list of supported GPUs."); + return nullptr; + } + return std::unique_ptr(provider.release()); +} + +VulkanProvider::VulkanProvider(Window* main_window) + : GraphicsProvider(main_window) {} + +VulkanProvider::~VulkanProvider() { + device_.reset(); + instance_.reset(); +} + +bool VulkanProvider::Initialize() { + instance_ = std::make_unique(); + + // Always enable the swapchain. + instance_->DeclareRequiredExtension("VK_KHR_surface", Version::Make(0, 0, 0), + false); + instance_->DeclareRequiredExtension("VK_KHR_win32_surface", + Version::Make(0, 0, 0), false); + + // Attempt initialization and device query. + if (!instance_->Initialize(main_window_)) { + XELOGE("Failed to initialize vulkan instance"); + return false; + } + + // Pick the device to use. + auto available_devices = instance_->available_devices(); + if (available_devices.empty()) { + XELOGE("No devices available for use"); + return false; + } + size_t device_index = + std::min(available_devices.size(), FLAGS_vulkan_device_index); + auto& device_info = available_devices[device_index]; + + // Create the device. + device_ = std::make_unique(instance_.get()); + device_->DeclareRequiredExtension("VK_KHR_swapchain", Version::Make(0, 0, 0), + false); + if (!device_->Initialize(device_info)) { + XELOGE("Unable to initialize device"); + return false; + } + + return true; +} + +std::unique_ptr VulkanProvider::CreateContext( + Window* target_window) { + auto new_context = + std::unique_ptr(new VulkanContext(this, target_window)); + if (!new_context->Initialize()) { + return nullptr; + } + return std::unique_ptr(new_context.release()); +} + +std::unique_ptr VulkanProvider::CreateOffscreenContext() { + auto new_context = + std::unique_ptr(new VulkanContext(this, nullptr)); + if (!new_context->Initialize()) { + return nullptr; + } + return std::unique_ptr(new_context.release()); +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h new file mode 100644 index 000000000..efc174614 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -0,0 +1,50 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_PROVIDER_H_ +#define XENIA_UI_VULKAN_VULKAN_PROVIDER_H_ + +#include + +#include "xenia/ui/graphics_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class VulkanDevice; +class VulkanInstance; + +class VulkanProvider : public GraphicsProvider { + public: + ~VulkanProvider() override; + + static std::unique_ptr Create(Window* main_window); + + VulkanInstance* instance() const { return instance_.get(); } + VulkanDevice* device() const { return device_.get(); } + + std::unique_ptr CreateContext( + Window* target_window) override; + std::unique_ptr CreateOffscreenContext() override; + + protected: + explicit VulkanProvider(Window* main_window); + + bool Initialize(); + + std::unique_ptr instance_; + std::unique_ptr device_; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_PROVIDER_H_ diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc new file mode 100644 index 000000000..ec640d92f --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.cc @@ -0,0 +1,510 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_swap_chain.h" + +#include + +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" +#include "xenia/ui/vulkan/vulkan_instance.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +DEFINE_bool(vulkan_random_clear_color, false, + "Randomizes framebuffer clear color."); + +namespace xe { +namespace ui { +namespace vulkan { + +VulkanSwapChain::VulkanSwapChain(VulkanInstance* instance, VulkanDevice* device) + : instance_(instance), device_(device) {} + +VulkanSwapChain::~VulkanSwapChain() { + for (auto& buffer : buffers_) { + DestroyBuffer(&buffer); + } + if (image_available_semaphore_) { + vkDestroySemaphore(*device_, image_available_semaphore_, nullptr); + } + if (render_pass_) { + vkDestroyRenderPass(*device_, render_pass_, nullptr); + } + if (render_cmd_buffer_) { + vkFreeCommandBuffers(*device_, cmd_pool_, 1, &render_cmd_buffer_); + } + if (cmd_pool_) { + vkDestroyCommandPool(*device_, cmd_pool_, nullptr); + } + // images_ doesn't need to be cleaned up as the swapchain does it implicitly. + if (handle) { + vkDestroySwapchainKHR(*device_, handle, nullptr); + handle = nullptr; + } + if (surface_) { + vkDestroySurfaceKHR(*instance_, surface_, nullptr); + } +} + +bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) { + surface_ = surface; + + // Query supported target formats. + uint32_t count = 0; + auto err = + vkGetPhysicalDeviceSurfaceFormatsKHR(*device_, surface_, &count, nullptr); + CheckResult(err, "vkGetPhysicalDeviceSurfaceFormatsKHR"); + std::vector surface_formats; + surface_formats.resize(count); + err = vkGetPhysicalDeviceSurfaceFormatsKHR(*device_, surface_, &count, + surface_formats.data()); + CheckResult(err, "vkGetPhysicalDeviceSurfaceFormatsKHR"); + + // If the format list includes just one entry of VK_FORMAT_UNDEFINED the + // surface has no preferred format. + // Otherwise, at least one supported format will be returned. + assert_true(surface_formats.size() >= 1); + if (surface_formats.size() == 1 && + surface_formats[0].format == VK_FORMAT_UNDEFINED) { + // Fallback to common RGBA. + surface_format_ = VK_FORMAT_R8G8B8A8_UNORM; + } else { + // Use first defined format. + surface_format_ = surface_formats[0].format; + } + + // Query surface min/max/caps. + VkSurfaceCapabilitiesKHR surface_caps; + err = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(*device_, surface_, + &surface_caps); + CheckResult(err, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); + + // Query surface properties so we can configure ourselves within bounds. + std::vector present_modes; + err = vkGetPhysicalDeviceSurfacePresentModesKHR(*device_, surface_, &count, + nullptr); + CheckResult(err, "vkGetPhysicalDeviceSurfacePresentModesKHR"); + present_modes.resize(count); + err = vkGetPhysicalDeviceSurfacePresentModesKHR(*device_, surface_, &count, + present_modes.data()); + CheckResult(err, "vkGetPhysicalDeviceSurfacePresentModesKHR"); + + // Calculate swapchain target dimensions. + VkExtent2D extent = surface_caps.currentExtent; + if (surface_caps.currentExtent.width == -1) { + assert_true(surface_caps.currentExtent.height == -1); + // Undefined extents, so we need to pick something. + XELOGI("Swap chain target surface extents undefined; guessing value"); + extent.width = 1280; + extent.height = 720; + } + surface_width_ = extent.width; + surface_height_ = extent.height; + + // Always prefer mailbox mode (non-tearing, low-latency). + // If it's not available we'll use immediate (tearing, low-latency). + // If not even that we fall back to FIFO, which sucks. + VkPresentModeKHR present_mode = VK_PRESENT_MODE_FIFO_KHR; + for (size_t i = 0; i < present_modes.size(); ++i) { + if (present_modes[i] == VK_PRESENT_MODE_MAILBOX_KHR) { + // This is the best, so early-out. + present_mode = VK_PRESENT_MODE_MAILBOX_KHR; + break; + } else if (present_modes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR) { + present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + } + } + + // Determine the number of images (1 + number queued). + uint32_t image_count = surface_caps.minImageCount + 1; + if (surface_caps.maxImageCount > 0 && + image_count > surface_caps.maxImageCount) { + // Too many requested - use whatever we can. + XELOGI("Requested number of swapchain images (%d) exceeds maximum (%d)", + image_count, surface_caps.maxImageCount); + image_count = surface_caps.maxImageCount; + } + + // Always pass through whatever transform the surface started with (so long + // as it's supported). + VkSurfaceTransformFlagBitsKHR pre_transform = surface_caps.currentTransform; + + VkSwapchainCreateInfoKHR create_info; + create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + create_info.pNext = nullptr; + create_info.flags = 0; + create_info.surface = surface_; + create_info.minImageCount = image_count; + create_info.imageFormat = surface_format_; + create_info.imageColorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR; + create_info.imageExtent.width = extent.width; + create_info.imageExtent.height = extent.height; + create_info.imageArrayLayers = 1; + create_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + create_info.queueFamilyIndexCount = 0; + create_info.pQueueFamilyIndices = nullptr; + create_info.preTransform = pre_transform; + create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + create_info.presentMode = present_mode; + create_info.clipped = VK_TRUE; + create_info.oldSwapchain = nullptr; + + XELOGVK("Creating swap chain:"); + XELOGVK(" minImageCount = %u", create_info.minImageCount); + XELOGVK(" imageFormat = %s", to_string(create_info.imageFormat)); + XELOGVK(" imageExtent = %d x %d", create_info.imageExtent.width, + create_info.imageExtent.height); + auto pre_transform_str = to_flags_string(create_info.preTransform); + XELOGVK(" preTransform = %s", pre_transform_str.c_str()); + XELOGVK(" imageArrayLayers = %u", create_info.imageArrayLayers); + XELOGVK(" presentMode = %s", to_string(create_info.presentMode)); + XELOGVK(" clipped = %s", create_info.clipped ? "true" : "false"); + XELOGVK(" imageColorSpace = %s", to_string(create_info.imageColorSpace)); + auto image_usage_flags_str = to_flags_string(create_info.imageUsage); + XELOGVK(" imageUsageFlags = %s", image_usage_flags_str.c_str()); + XELOGVK(" imageSharingMode = %s", to_string(create_info.imageSharingMode)); + XELOGVK(" queueFamilyCount = %u", create_info.queueFamilyIndexCount); + + err = vkCreateSwapchainKHR(*device_, &create_info, nullptr, &handle); + if (err) { + XELOGE("Failed to create swapchain: %s", to_string(err)); + return false; + } + + // Create the pool used for transient buffers, so we can reset them all at + // once. + VkCommandPoolCreateInfo cmd_pool_info; + cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmd_pool_info.pNext = nullptr; + cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + cmd_pool_info.queueFamilyIndex = device_->queue_family_index(); + err = vkCreateCommandPool(*device_, &cmd_pool_info, nullptr, &cmd_pool_); + CheckResult(err, "vkCreateCommandPool"); + + // Make a command buffer we'll do all our primary rendering from. + VkCommandBufferAllocateInfo cmd_buffer_info; + cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + cmd_buffer_info.pNext = nullptr; + cmd_buffer_info.commandPool = cmd_pool_; + cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd_buffer_info.commandBufferCount = 1; + err = + vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &render_cmd_buffer_); + CheckResult(err, "vkCreateCommandBuffer"); + + // Create the render pass used to draw to the swap chain. + // The actual framebuffer attached will depend on which image we are drawing + // into. + VkAttachmentDescription color_attachment; + color_attachment.flags = 0; + color_attachment.format = surface_format_; + color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + color_attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentReference color_reference; + color_reference.attachment = 0; + color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentReference depth_reference; + depth_reference.attachment = VK_ATTACHMENT_UNUSED; + depth_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED; + VkSubpassDescription render_subpass; + render_subpass.flags = 0; + render_subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + render_subpass.inputAttachmentCount = 0; + render_subpass.pInputAttachments = nullptr; + render_subpass.colorAttachmentCount = 1; + render_subpass.pColorAttachments = &color_reference; + render_subpass.pResolveAttachments = nullptr; + render_subpass.pDepthStencilAttachment = &depth_reference; + render_subpass.preserveAttachmentCount = 0, + render_subpass.pPreserveAttachments = nullptr; + VkRenderPassCreateInfo render_pass_info; + render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_info.pNext = nullptr; + render_pass_info.attachmentCount = 1; + render_pass_info.pAttachments = &color_attachment; + render_pass_info.subpassCount = 1; + render_pass_info.pSubpasses = &render_subpass; + render_pass_info.dependencyCount = 0; + render_pass_info.pDependencies = nullptr; + err = vkCreateRenderPass(*device_, &render_pass_info, nullptr, &render_pass_); + CheckResult(err, "vkCreateRenderPass"); + + // Create a semaphore we'll use to synchronize with the swapchain. + VkSemaphoreCreateInfo semaphore_info; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_info.pNext = nullptr; + semaphore_info.flags = 0; + err = vkCreateSemaphore(*device_, &semaphore_info, nullptr, + &image_available_semaphore_); + CheckResult(err, "vkCreateSemaphore"); + + // Get images we will be presenting to. + // Note that this may differ from our requested amount. + uint32_t actual_image_count = 0; + std::vector images; + err = vkGetSwapchainImagesKHR(*device_, handle, &actual_image_count, nullptr); + CheckResult(err, "vkGetSwapchainImagesKHR"); + images.resize(actual_image_count); + err = vkGetSwapchainImagesKHR(*device_, handle, &actual_image_count, + images.data()); + CheckResult(err, "vkGetSwapchainImagesKHR"); + + // Create all buffers. + buffers_.resize(images.size()); + for (size_t i = 0; i < buffers_.size(); ++i) { + if (!InitializeBuffer(&buffers_[i], images[i])) { + XELOGE("Failed to initialize a swapchain buffer"); + return false; + } + } + + XELOGVK("Swap chain initialized successfully!"); + return true; +} + +bool VulkanSwapChain::InitializeBuffer(Buffer* buffer, VkImage target_image) { + DestroyBuffer(buffer); + buffer->image = target_image; + + // Create an image view for the presentation image. + // This will be used as a framebuffer attachment. + VkImageViewCreateInfo image_view_info; + image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + image_view_info.pNext = nullptr; + image_view_info.flags = 0; + image_view_info.image = buffer->image; + image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + image_view_info.format = surface_format_; + image_view_info.components.r = VK_COMPONENT_SWIZZLE_R; + image_view_info.components.g = VK_COMPONENT_SWIZZLE_G; + image_view_info.components.b = VK_COMPONENT_SWIZZLE_B; + image_view_info.components.a = VK_COMPONENT_SWIZZLE_A; + image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_view_info.subresourceRange.baseMipLevel = 0; + image_view_info.subresourceRange.levelCount = 1; + image_view_info.subresourceRange.baseArrayLayer = 0; + image_view_info.subresourceRange.layerCount = 1; + auto err = vkCreateImageView(*device_, &image_view_info, nullptr, + &buffer->image_view); + CheckResult(err, "vkCreateImageView"); + + // Create the framebuffer used to render into this image. + VkImageView attachments[] = {buffer->image_view}; + VkFramebufferCreateInfo framebuffer_info; + framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_info.pNext = nullptr; + framebuffer_info.renderPass = render_pass_; + framebuffer_info.attachmentCount = + static_cast(xe::countof(attachments)); + framebuffer_info.pAttachments = attachments; + framebuffer_info.width = surface_width_; + framebuffer_info.height = surface_height_; + framebuffer_info.layers = 1; + err = vkCreateFramebuffer(*device_, &framebuffer_info, nullptr, + &buffer->framebuffer); + CheckResult(err, "vkCreateFramebuffer"); + + return true; +} + +void VulkanSwapChain::DestroyBuffer(Buffer* buffer) { + if (buffer->framebuffer) { + vkDestroyFramebuffer(*device_, buffer->framebuffer, nullptr); + buffer->framebuffer = nullptr; + } + if (buffer->image_view) { + vkDestroyImageView(*device_, buffer->image_view, nullptr); + buffer->image_view = nullptr; + } + // Image is taken care of by the presentation engine. + buffer->image = nullptr; +} + +bool VulkanSwapChain::Begin() { + // Get the index of the next available swapchain image. + auto err = + vkAcquireNextImageKHR(*device_, handle, 0, image_available_semaphore_, + nullptr, ¤t_buffer_index_); + CheckResult(err, "vkAcquireNextImageKHR"); + + // Wait for the acquire semaphore to be signaled so that the following + // operations know they can start modifying the image. + VkSubmitInfo wait_submit_info; + wait_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + wait_submit_info.pNext = nullptr; + wait_submit_info.waitSemaphoreCount = 1; + wait_submit_info.pWaitSemaphores = &image_available_semaphore_; + wait_submit_info.commandBufferCount = 0; + wait_submit_info.pCommandBuffers = nullptr; + wait_submit_info.signalSemaphoreCount = 0; + wait_submit_info.pSignalSemaphores = nullptr; + err = vkQueueSubmit(device_->primary_queue(), 1, &wait_submit_info, nullptr); + CheckResult(err, "vkQueueSubmit"); + + // Reset all command buffers. + vkResetCommandBuffer(render_cmd_buffer_, + VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + auto& current_buffer = buffers_[current_buffer_index_]; + + // Build the command buffer that will execute all queued rendering buffers. + VkCommandBufferBeginInfo begin_info; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.pNext = nullptr; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + begin_info.pInheritanceInfo = nullptr; + err = vkBeginCommandBuffer(render_cmd_buffer_, &begin_info); + CheckResult(err, "vkBeginCommandBuffer"); + + // Transition the image to a format we can render to. + VkImageMemoryBarrier pre_image_memory_barrier; + pre_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + pre_image_memory_barrier.pNext = nullptr; + pre_image_memory_barrier.srcAccessMask = 0; + pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + pre_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + pre_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + pre_image_memory_barrier.image = current_buffer.image; + pre_image_memory_barrier.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_COLOR_BIT; + pre_image_memory_barrier.subresourceRange.baseMipLevel = 0; + pre_image_memory_barrier.subresourceRange.levelCount = 1; + pre_image_memory_barrier.subresourceRange.baseArrayLayer = 0; + pre_image_memory_barrier.subresourceRange.layerCount = 1; + vkCmdPipelineBarrier(render_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &pre_image_memory_barrier); + + // Begin render pass. + VkClearValue color_clear_value; + color_clear_value.color.float32[0] = 238 / 255.0f; + color_clear_value.color.float32[1] = 238 / 255.0f; + color_clear_value.color.float32[2] = 238 / 255.0f; + color_clear_value.color.float32[3] = 1.0f; + if (FLAGS_vulkan_random_clear_color) { + color_clear_value.color.float32[0] = + rand() / static_cast(RAND_MAX); // NOLINT(runtime/threadsafe_fn) + color_clear_value.color.float32[1] = 1.0f; + color_clear_value.color.float32[2] = 0.0f; + } + VkClearValue clear_values[] = {color_clear_value}; + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = render_pass_; + render_pass_begin_info.framebuffer = current_buffer.framebuffer; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + render_pass_begin_info.renderArea.extent.width = surface_width_; + render_pass_begin_info.renderArea.extent.height = surface_height_; + render_pass_begin_info.clearValueCount = + static_cast(xe::countof(clear_values)); + render_pass_begin_info.pClearValues = clear_values; + vkCmdBeginRenderPass(render_cmd_buffer_, &render_pass_begin_info, + VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS); + + return true; +} + +bool VulkanSwapChain::End() { + auto& current_buffer = buffers_[current_buffer_index_]; + + // End render pass. + vkCmdEndRenderPass(render_cmd_buffer_); + + // Transition the image to a format the presentation engine can source from. + VkImageMemoryBarrier post_image_memory_barrier; + post_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + post_image_memory_barrier.pNext = nullptr; + post_image_memory_barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + post_image_memory_barrier.dstAccessMask = 0; + post_image_memory_barrier.oldLayout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + post_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + post_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + post_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + post_image_memory_barrier.image = current_buffer.image; + post_image_memory_barrier.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_COLOR_BIT; + post_image_memory_barrier.subresourceRange.baseMipLevel = 0; + post_image_memory_barrier.subresourceRange.levelCount = 1; + post_image_memory_barrier.subresourceRange.baseArrayLayer = 0; + post_image_memory_barrier.subresourceRange.layerCount = 1; + vkCmdPipelineBarrier(render_cmd_buffer_, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &post_image_memory_barrier); + + auto err = vkEndCommandBuffer(render_cmd_buffer_); + CheckResult(err, "vkEndCommandBuffer"); + + // Submit rendering. + VkSubmitInfo render_submit_info; + render_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + render_submit_info.pNext = nullptr; + render_submit_info.waitSemaphoreCount = 0; + render_submit_info.pWaitSemaphores = nullptr; + render_submit_info.commandBufferCount = 1; + render_submit_info.pCommandBuffers = &render_cmd_buffer_; + render_submit_info.signalSemaphoreCount = 0; + render_submit_info.pSignalSemaphores = nullptr; + err = + vkQueueSubmit(device_->primary_queue(), 1, &render_submit_info, nullptr); + CheckResult(err, "vkQueueSubmit"); + + // Queue the present of our current image. + const VkSwapchainKHR swap_chains[] = {handle}; + const uint32_t swap_chain_image_indices[] = {current_buffer_index_}; + VkPresentInfoKHR present_info; + present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present_info.pNext = nullptr; + present_info.waitSemaphoreCount = 0; + present_info.pWaitSemaphores = nullptr; + present_info.swapchainCount = static_cast(xe::countof(swap_chains)); + present_info.pSwapchains = swap_chains; + present_info.pImageIndices = swap_chain_image_indices; + present_info.pResults = nullptr; + err = vkQueuePresentKHR(device_->primary_queue(), &present_info); + switch (err) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + // We are not rendering at the right size - but the presentation engine + // will scale the output for us. + break; + case VK_ERROR_OUT_OF_DATE_KHR: + // Lost presentation ability; need to recreate the swapchain. + // TODO(benvanik): recreate swapchain. + assert_always("Swapchain recreation not implemented"); + break; + default: + XELOGE("Failed to queue present: %s", to_string(err)); + assert_always("Unexpected queue present failure"); + return false; + } + + return true; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.h b/src/xenia/ui/vulkan/vulkan_swap_chain.h new file mode 100644 index 000000000..18bb26cee --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.h @@ -0,0 +1,80 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_SWAP_CHAIN_H_ +#define XENIA_UI_VULKAN_VULKAN_SWAP_CHAIN_H_ + +#include +#include +#include + +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class VulkanDevice; +class VulkanInstance; + +class VulkanSwapChain { + public: + VulkanSwapChain(VulkanInstance* instance, VulkanDevice* device); + ~VulkanSwapChain(); + + VkSwapchainKHR handle = nullptr; + + operator VkSwapchainKHR() const { return handle; } + + uint32_t surface_width() const { return surface_width_; } + uint32_t surface_height() const { return surface_height_; } + + // Render pass used for compositing. + VkRenderPass render_pass() const { return render_pass_; } + // Render command buffer, active inside the render pass from Begin to End. + VkCommandBuffer render_cmd_buffer() const { return render_cmd_buffer_; } + + bool Initialize(VkSurfaceKHR surface); + + // Begins the swap operation, preparing state for rendering. + bool Begin(); + // Ends the swap operation, finalizing rendering and presenting the results. + bool End(); + + private: + struct Buffer { + VkImage image = nullptr; + VkImageView image_view = nullptr; + VkFramebuffer framebuffer = nullptr; + }; + + bool InitializeBuffer(Buffer* buffer, VkImage target_image); + void DestroyBuffer(Buffer* buffer); + + VulkanInstance* instance_ = nullptr; + VulkanDevice* device_ = nullptr; + + VkSurfaceKHR surface_ = nullptr; + uint32_t surface_width_ = 0; + uint32_t surface_height_ = 0; + VkFormat surface_format_ = VK_FORMAT_UNDEFINED; + VkCommandPool cmd_pool_ = nullptr; + VkCommandBuffer render_cmd_buffer_ = nullptr; + VkRenderPass render_pass_ = nullptr; + VkSemaphore image_available_semaphore_ = nullptr; + uint32_t current_buffer_index_ = 0; + std::vector buffers_; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_SWAP_CHAIN_H_ diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc new file mode 100644 index 000000000..54402286f --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_util.cc @@ -0,0 +1,464 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_util.h" + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" + +namespace xe { +namespace ui { +namespace vulkan { + +uint32_t Version::Make(uint32_t major, uint32_t minor, uint32_t patch) { + return VK_MAKE_VERSION(major, minor, patch); +} + +Version Version::Parse(uint32_t value) { + Version version; + version.major = VK_VERSION_MAJOR(value); + version.minor = VK_VERSION_MINOR(value); + version.patch = VK_VERSION_PATCH(value); + version.pretty_string = xe::format_string("%u.%u.%u", version.major, + version.minor, version.patch); + return version; +} + +const char* to_string(VkFormat format) { + switch (format) { +#define STR(r) \ + case r: \ + return #r + STR(VK_FORMAT_UNDEFINED); + STR(VK_FORMAT_R4G4_UNORM_PACK8); + STR(VK_FORMAT_R4G4B4A4_UNORM_PACK16); + STR(VK_FORMAT_B4G4R4A4_UNORM_PACK16); + STR(VK_FORMAT_R5G6B5_UNORM_PACK16); + STR(VK_FORMAT_B5G6R5_UNORM_PACK16); + STR(VK_FORMAT_R5G5B5A1_UNORM_PACK16); + STR(VK_FORMAT_B5G5R5A1_UNORM_PACK16); + STR(VK_FORMAT_A1R5G5B5_UNORM_PACK16); + STR(VK_FORMAT_R8_UNORM); + STR(VK_FORMAT_R8_SNORM); + STR(VK_FORMAT_R8_USCALED); + STR(VK_FORMAT_R8_SSCALED); + STR(VK_FORMAT_R8_UINT); + STR(VK_FORMAT_R8_SINT); + STR(VK_FORMAT_R8_SRGB); + STR(VK_FORMAT_R8G8_UNORM); + STR(VK_FORMAT_R8G8_SNORM); + STR(VK_FORMAT_R8G8_USCALED); + STR(VK_FORMAT_R8G8_SSCALED); + STR(VK_FORMAT_R8G8_UINT); + STR(VK_FORMAT_R8G8_SINT); + STR(VK_FORMAT_R8G8_SRGB); + STR(VK_FORMAT_R8G8B8_UNORM); + STR(VK_FORMAT_R8G8B8_SNORM); + STR(VK_FORMAT_R8G8B8_USCALED); + STR(VK_FORMAT_R8G8B8_SSCALED); + STR(VK_FORMAT_R8G8B8_UINT); + STR(VK_FORMAT_R8G8B8_SINT); + STR(VK_FORMAT_R8G8B8_SRGB); + STR(VK_FORMAT_B8G8R8_UNORM); + STR(VK_FORMAT_B8G8R8_SNORM); + STR(VK_FORMAT_B8G8R8_USCALED); + STR(VK_FORMAT_B8G8R8_SSCALED); + STR(VK_FORMAT_B8G8R8_UINT); + STR(VK_FORMAT_B8G8R8_SINT); + STR(VK_FORMAT_B8G8R8_SRGB); + STR(VK_FORMAT_R8G8B8A8_UNORM); + STR(VK_FORMAT_R8G8B8A8_SNORM); + STR(VK_FORMAT_R8G8B8A8_USCALED); + STR(VK_FORMAT_R8G8B8A8_SSCALED); + STR(VK_FORMAT_R8G8B8A8_UINT); + STR(VK_FORMAT_R8G8B8A8_SINT); + STR(VK_FORMAT_R8G8B8A8_SRGB); + STR(VK_FORMAT_B8G8R8A8_UNORM); + STR(VK_FORMAT_B8G8R8A8_SNORM); + STR(VK_FORMAT_B8G8R8A8_USCALED); + STR(VK_FORMAT_B8G8R8A8_SSCALED); + STR(VK_FORMAT_B8G8R8A8_UINT); + STR(VK_FORMAT_B8G8R8A8_SINT); + STR(VK_FORMAT_B8G8R8A8_SRGB); + STR(VK_FORMAT_A8B8G8R8_UNORM_PACK32); + STR(VK_FORMAT_A8B8G8R8_SNORM_PACK32); + STR(VK_FORMAT_A8B8G8R8_USCALED_PACK32); + STR(VK_FORMAT_A8B8G8R8_SSCALED_PACK32); + STR(VK_FORMAT_A8B8G8R8_UINT_PACK32); + STR(VK_FORMAT_A8B8G8R8_SINT_PACK32); + STR(VK_FORMAT_A8B8G8R8_SRGB_PACK32); + STR(VK_FORMAT_A2R10G10B10_UNORM_PACK32); + STR(VK_FORMAT_A2R10G10B10_SNORM_PACK32); + STR(VK_FORMAT_A2R10G10B10_USCALED_PACK32); + STR(VK_FORMAT_A2R10G10B10_SSCALED_PACK32); + STR(VK_FORMAT_A2R10G10B10_UINT_PACK32); + STR(VK_FORMAT_A2R10G10B10_SINT_PACK32); + STR(VK_FORMAT_A2B10G10R10_UNORM_PACK32); + STR(VK_FORMAT_A2B10G10R10_SNORM_PACK32); + STR(VK_FORMAT_A2B10G10R10_USCALED_PACK32); + STR(VK_FORMAT_A2B10G10R10_SSCALED_PACK32); + STR(VK_FORMAT_A2B10G10R10_UINT_PACK32); + STR(VK_FORMAT_A2B10G10R10_SINT_PACK32); + STR(VK_FORMAT_R16_UNORM); + STR(VK_FORMAT_R16_SNORM); + STR(VK_FORMAT_R16_USCALED); + STR(VK_FORMAT_R16_SSCALED); + STR(VK_FORMAT_R16_UINT); + STR(VK_FORMAT_R16_SINT); + STR(VK_FORMAT_R16_SFLOAT); + STR(VK_FORMAT_R16G16_UNORM); + STR(VK_FORMAT_R16G16_SNORM); + STR(VK_FORMAT_R16G16_USCALED); + STR(VK_FORMAT_R16G16_SSCALED); + STR(VK_FORMAT_R16G16_UINT); + STR(VK_FORMAT_R16G16_SINT); + STR(VK_FORMAT_R16G16_SFLOAT); + STR(VK_FORMAT_R16G16B16_UNORM); + STR(VK_FORMAT_R16G16B16_SNORM); + STR(VK_FORMAT_R16G16B16_USCALED); + STR(VK_FORMAT_R16G16B16_SSCALED); + STR(VK_FORMAT_R16G16B16_UINT); + STR(VK_FORMAT_R16G16B16_SINT); + STR(VK_FORMAT_R16G16B16_SFLOAT); + STR(VK_FORMAT_R16G16B16A16_UNORM); + STR(VK_FORMAT_R16G16B16A16_SNORM); + STR(VK_FORMAT_R16G16B16A16_USCALED); + STR(VK_FORMAT_R16G16B16A16_SSCALED); + STR(VK_FORMAT_R16G16B16A16_UINT); + STR(VK_FORMAT_R16G16B16A16_SINT); + STR(VK_FORMAT_R16G16B16A16_SFLOAT); + STR(VK_FORMAT_R32_UINT); + STR(VK_FORMAT_R32_SINT); + STR(VK_FORMAT_R32_SFLOAT); + STR(VK_FORMAT_R32G32_UINT); + STR(VK_FORMAT_R32G32_SINT); + STR(VK_FORMAT_R32G32_SFLOAT); + STR(VK_FORMAT_R32G32B32_UINT); + STR(VK_FORMAT_R32G32B32_SINT); + STR(VK_FORMAT_R32G32B32_SFLOAT); + STR(VK_FORMAT_R32G32B32A32_UINT); + STR(VK_FORMAT_R32G32B32A32_SINT); + STR(VK_FORMAT_R32G32B32A32_SFLOAT); + STR(VK_FORMAT_R64_UINT); + STR(VK_FORMAT_R64_SINT); + STR(VK_FORMAT_R64_SFLOAT); + STR(VK_FORMAT_R64G64_UINT); + STR(VK_FORMAT_R64G64_SINT); + STR(VK_FORMAT_R64G64_SFLOAT); + STR(VK_FORMAT_R64G64B64_UINT); + STR(VK_FORMAT_R64G64B64_SINT); + STR(VK_FORMAT_R64G64B64_SFLOAT); + STR(VK_FORMAT_R64G64B64A64_UINT); + STR(VK_FORMAT_R64G64B64A64_SINT); + STR(VK_FORMAT_R64G64B64A64_SFLOAT); + STR(VK_FORMAT_B10G11R11_UFLOAT_PACK32); + STR(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32); + STR(VK_FORMAT_D16_UNORM); + STR(VK_FORMAT_X8_D24_UNORM_PACK32); + STR(VK_FORMAT_D32_SFLOAT); + STR(VK_FORMAT_S8_UINT); + STR(VK_FORMAT_D16_UNORM_S8_UINT); + STR(VK_FORMAT_D24_UNORM_S8_UINT); + STR(VK_FORMAT_D32_SFLOAT_S8_UINT); + STR(VK_FORMAT_BC1_RGB_UNORM_BLOCK); + STR(VK_FORMAT_BC1_RGB_SRGB_BLOCK); + STR(VK_FORMAT_BC1_RGBA_UNORM_BLOCK); + STR(VK_FORMAT_BC1_RGBA_SRGB_BLOCK); + STR(VK_FORMAT_BC2_UNORM_BLOCK); + STR(VK_FORMAT_BC2_SRGB_BLOCK); + STR(VK_FORMAT_BC3_UNORM_BLOCK); + STR(VK_FORMAT_BC3_SRGB_BLOCK); + STR(VK_FORMAT_BC4_UNORM_BLOCK); + STR(VK_FORMAT_BC4_SNORM_BLOCK); + STR(VK_FORMAT_BC5_UNORM_BLOCK); + STR(VK_FORMAT_BC5_SNORM_BLOCK); + STR(VK_FORMAT_BC6H_UFLOAT_BLOCK); + STR(VK_FORMAT_BC6H_SFLOAT_BLOCK); + STR(VK_FORMAT_BC7_UNORM_BLOCK); + STR(VK_FORMAT_BC7_SRGB_BLOCK); + STR(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK); + STR(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK); + STR(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK); + STR(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK); + STR(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK); + STR(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK); + STR(VK_FORMAT_EAC_R11_UNORM_BLOCK); + STR(VK_FORMAT_EAC_R11_SNORM_BLOCK); + STR(VK_FORMAT_EAC_R11G11_UNORM_BLOCK); + STR(VK_FORMAT_EAC_R11G11_SNORM_BLOCK); + STR(VK_FORMAT_ASTC_4x4_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_4x4_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_5x4_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_5x4_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_5x5_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_5x5_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_6x5_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_6x5_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_6x6_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_6x6_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_8x5_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_8x5_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_8x6_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_8x6_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_8x8_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_8x8_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_10x5_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_10x5_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_10x6_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_10x6_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_10x8_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_10x8_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_10x10_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_10x10_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_12x10_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_12x10_SRGB_BLOCK); + STR(VK_FORMAT_ASTC_12x12_UNORM_BLOCK); + STR(VK_FORMAT_ASTC_12x12_SRGB_BLOCK); +#undef STR + default: + return "UNKNOWN_FORMAT"; + } +} + +const char* to_string(VkPhysicalDeviceType type) { + switch (type) { +#define STR(r) \ + case r: \ + return #r + STR(VK_PHYSICAL_DEVICE_TYPE_OTHER); + STR(VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU); + STR(VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU); + STR(VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU); + STR(VK_PHYSICAL_DEVICE_TYPE_CPU); +#undef STR + default: + return "UNKNOWN_DEVICE"; + } +} + +const char* to_string(VkSharingMode sharing_mode) { + switch (sharing_mode) { +#define STR(r) \ + case r: \ + return #r + STR(VK_SHARING_MODE_EXCLUSIVE); + STR(VK_SHARING_MODE_CONCURRENT); +#undef STR + default: + return "UNKNOWN_SHARING_MODE"; + } +} + +const char* to_string(VkResult result) { + switch (result) { +#define STR(r) \ + case r: \ + return #r + STR(VK_SUCCESS); + STR(VK_NOT_READY); + STR(VK_TIMEOUT); + STR(VK_EVENT_SET); + STR(VK_EVENT_RESET); + STR(VK_INCOMPLETE); + STR(VK_ERROR_OUT_OF_HOST_MEMORY); + STR(VK_ERROR_OUT_OF_DEVICE_MEMORY); + STR(VK_ERROR_INITIALIZATION_FAILED); + STR(VK_ERROR_DEVICE_LOST); + STR(VK_ERROR_MEMORY_MAP_FAILED); + STR(VK_ERROR_LAYER_NOT_PRESENT); + STR(VK_ERROR_EXTENSION_NOT_PRESENT); + STR(VK_ERROR_FEATURE_NOT_PRESENT); + STR(VK_ERROR_INCOMPATIBLE_DRIVER); + STR(VK_ERROR_TOO_MANY_OBJECTS); + STR(VK_ERROR_FORMAT_NOT_SUPPORTED); + STR(VK_ERROR_SURFACE_LOST_KHR); + STR(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); + STR(VK_SUBOPTIMAL_KHR); + STR(VK_ERROR_OUT_OF_DATE_KHR); + STR(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); + STR(VK_ERROR_VALIDATION_FAILED_EXT); +#undef STR + default: + return "UNKNOWN_RESULT"; + } +} + +std::string to_flags_string(VkImageUsageFlags flags) { + std::string result; +#define OR_FLAG(f) \ + if (flags & f) { \ + if (!result.empty()) { \ + result += " | "; \ + } \ + result += #f; \ + } + OR_FLAG(VK_IMAGE_USAGE_TRANSFER_SRC_BIT); + OR_FLAG(VK_IMAGE_USAGE_TRANSFER_DST_BIT); + OR_FLAG(VK_IMAGE_USAGE_SAMPLED_BIT); + OR_FLAG(VK_IMAGE_USAGE_STORAGE_BIT); + OR_FLAG(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + OR_FLAG(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + OR_FLAG(VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT); + OR_FLAG(VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); +#undef OR_FLAG + return result; +} + +std::string to_flags_string(VkSurfaceTransformFlagBitsKHR flags) { + std::string result; +#define OR_FLAG(f) \ + if (flags & f) { \ + if (!result.empty()) { \ + result += " | "; \ + } \ + result += #f; \ + } + OR_FLAG(VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_90_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_180_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_270_BIT_KHR); + OR_FLAG(VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR); +#undef OR_FLAG + return result; +} + +const char* to_string(VkColorSpaceKHR color_space) { + switch (color_space) { +#define STR(r) \ + case r: \ + return #r + STR(VK_COLORSPACE_SRGB_NONLINEAR_KHR); +#undef STR + default: + return "UNKNOWN_COLORSPACE"; + } +} + +const char* to_string(VkPresentModeKHR present_mode) { + switch (present_mode) { +#define STR(r) \ + case r: \ + return #r + STR(VK_PRESENT_MODE_IMMEDIATE_KHR); + STR(VK_PRESENT_MODE_MAILBOX_KHR); + STR(VK_PRESENT_MODE_FIFO_KHR); + STR(VK_PRESENT_MODE_FIFO_RELAXED_KHR); +#undef STR + default: + return "UNKNOWN_PRESENT_MODE"; + } +} + +void FatalVulkanError(std::string error) { + xe::FatalError( + error + + "\nEnsure you have the latest drivers for your GPU and that it supports " + "Vulkan. See http://xenia.jp/faq/ for more information and a list" + "of supported GPUs."); +} + +void CheckResult(VkResult result, const char* action) { + if (result) { + XELOGE("Vulkan check: %s returned %s", action, to_string(result)); + } + assert_true(result == VK_SUCCESS, action); +} + +std::pair> CheckRequirements( + const std::vector& requirements, + const std::vector& layer_infos) { + bool any_missing = false; + std::vector enabled_layers; + for (auto& requirement : requirements) { + bool found = false; + for (size_t j = 0; j < layer_infos.size(); ++j) { + auto layer_name = layer_infos[j].properties.layerName; + auto layer_version = + Version::Parse(layer_infos[j].properties.specVersion); + if (requirement.name == layer_name) { + found = true; + if (requirement.min_version > layer_infos[j].properties.specVersion) { + if (requirement.is_optional) { + XELOGVK("- optional layer %s (%s) version mismatch", layer_name, + layer_version.pretty_string.c_str()); + continue; + } + XELOGE("ERROR: required layer %s (%s) version mismatch", layer_name, + layer_version.pretty_string.c_str()); + any_missing = true; + break; + } + XELOGVK("- enabling layer %s (%s)", layer_name, + layer_version.pretty_string.c_str()); + enabled_layers.push_back(layer_name); + break; + } + } + if (!found) { + if (requirement.is_optional) { + XELOGVK("- optional layer %s not found", requirement.name.c_str()); + } else { + XELOGE("ERROR: required layer %s not found", requirement.name.c_str()); + any_missing = true; + } + } + } + return {!any_missing, enabled_layers}; +} + +std::pair> CheckRequirements( + const std::vector& requirements, + const std::vector& extension_properties) { + bool any_missing = false; + std::vector enabled_extensions; + for (auto& requirement : requirements) { + bool found = false; + for (size_t j = 0; j < extension_properties.size(); ++j) { + auto extension_name = extension_properties[j].extensionName; + auto extension_version = + Version::Parse(extension_properties[j].specVersion); + if (requirement.name == extension_name) { + found = true; + if (requirement.min_version > extension_properties[j].specVersion) { + if (requirement.is_optional) { + XELOGVK("- optional extension %s (%s) version mismatch", + extension_name, extension_version.pretty_string.c_str()); + continue; + } + XELOGE("ERROR: required extension %s (%s) version mismatch", + extension_name, extension_version.pretty_string.c_str()); + any_missing = true; + break; + } + XELOGVK("- enabling extension %s (%s)", extension_name, + extension_version.pretty_string.c_str()); + enabled_extensions.push_back(extension_name); + break; + } + } + if (!found) { + if (requirement.is_optional) { + XELOGVK("- optional extension %s not found", requirement.name.c_str()); + } else { + XELOGE("ERROR: required extension %s not found", + requirement.name.c_str()); + any_missing = true; + } + } + } + return {!any_missing, enabled_extensions}; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h new file mode 100644 index 000000000..fcf9e4f8f --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -0,0 +1,101 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_UTIL_H_ +#define XENIA_UI_VULKAN_VULKAN_UTIL_H_ + +#include +#include + +#include "xenia/ui/vulkan/vulkan.h" + +namespace xe { +namespace ui { +class Window; +} // namespace ui +} // namespace xe + +namespace xe { +namespace ui { +namespace vulkan { + +struct Version { + uint32_t major; + uint32_t minor; + uint32_t patch; + std::string pretty_string; + + static uint32_t Make(uint32_t major, uint32_t minor, uint32_t patch); + + static Version Parse(uint32_t value); +}; + +const char* to_string(VkFormat format); +const char* to_string(VkPhysicalDeviceType type); +const char* to_string(VkSharingMode sharing_mode); +const char* to_string(VkResult result); + +std::string to_flags_string(VkImageUsageFlags flags); +std::string to_flags_string(VkSurfaceTransformFlagBitsKHR flags); + +const char* to_string(VkColorSpaceKHR color_space); +const char* to_string(VkPresentModeKHR present_mode); + +// Throws a fatal error with some Vulkan help text. +void FatalVulkanError(std::string error); + +// Logs and assets expecting the result to be VK_SUCCESS. +void CheckResult(VkResult result, const char* action); + +struct LayerInfo { + VkLayerProperties properties; + std::vector extensions; +}; + +struct DeviceInfo { + VkPhysicalDevice handle; + VkPhysicalDeviceProperties properties; + VkPhysicalDeviceFeatures features; + VkPhysicalDeviceMemoryProperties memory_properties; + std::vector queue_family_properties; + std::vector queue_family_supports_present; + std::vector layers; + std::vector extensions; +}; + +// Defines a requirement for a layer or extension, used to both verify and +// enable them on initialization. +struct Requirement { + // Layer or extension name. + std::string name; + // Minimum required spec version of the layer or extension. + uint32_t min_version; + // True if the requirement is optional (will not cause verification to fail). + bool is_optional; +}; + +// Gets a list of enabled layer names based on the given layer requirements and +// available layer info. +// Returns a boolean indicating whether all required layers are present. +std::pair> CheckRequirements( + const std::vector& requirements, + const std::vector& layer_infos); + +// Gets a list of enabled extension names based on the given extension +// requirements and available extensions. +// Returns a boolean indicating whether all required extensions are present. +std::pair> CheckRequirements( + const std::vector& requirements, + const std::vector& extension_properties); + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_UTIL_H_ diff --git a/src/xenia/ui/vulkan/vulkan_window_demo.cc b/src/xenia/ui/vulkan/vulkan_window_demo.cc new file mode 100644 index 000000000..fad5e90e2 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_window_demo.cc @@ -0,0 +1,30 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include +#include + +#include "xenia/base/main.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/window.h" + +namespace xe { +namespace ui { + +int window_demo_main(const std::vector& args); + +std::unique_ptr CreateDemoGraphicsProvider(Window* window) { + return xe::ui::vulkan::VulkanProvider::Create(window); +} + +} // namespace ui +} // namespace xe + +DEFINE_ENTRY_POINT(L"xenia-ui-window-vulkan-demo", + L"xenia-ui-window-vulkan-demo", xe::ui::window_demo_main); diff --git a/src/xenia/ui/window_demo.cc b/src/xenia/ui/window_demo.cc index 75c74931b..454864a92 100644 --- a/src/xenia/ui/window_demo.cc +++ b/src/xenia/ui/window_demo.cc @@ -87,11 +87,10 @@ int window_demo_main(const std::vector& args) { }); window->on_closed.AddListener( - [&loop, &graphics_provider](xe::ui::UIEvent* e) { + [&loop, &window, &graphics_provider](xe::ui::UIEvent* e) { loop->Quit(); + Profiler::Shutdown(); XELOGI("User-initiated death!"); - graphics_provider.reset(); - exit(1); }); loop->on_quit.AddListener([&window](xe::ui::UIEvent* e) { window.reset(); }); @@ -116,11 +115,9 @@ int window_demo_main(const std::vector& args) { // Wait until we are exited. loop->AwaitQuit(); - loop->PostSynchronous([&graphics_provider]() { graphics_provider.reset(); }); window.reset(); loop.reset(); - Profiler::Dump(); - Profiler::Shutdown(); + graphics_provider.reset(); return 0; } diff --git a/third_party/spirv-tools b/third_party/spirv-tools index 224348faf..4d2f2239b 160000 --- a/third_party/spirv-tools +++ b/third_party/spirv-tools @@ -1 +1 @@ -Subproject commit 224348faf0616b5bea635141f4b28ee0faab3002 +Subproject commit 4d2f2239bf896dc14127e25011f41ac79d687052 diff --git a/third_party/spirv-tools.lua b/third_party/spirv-tools.lua index 90d0e151a..4218ff08e 100644 --- a/third_party/spirv-tools.lua +++ b/third_party/spirv-tools.lua @@ -16,9 +16,7 @@ project("spirv-tools") "spirv-tools/external/include/headers/GLSL.std.450.h", "spirv-tools/external/include/headers/OpenCL.std.h", "spirv-tools/external/include/headers/spirv.h", - "spirv-tools/include/libspirv/libspirv.h", - "spirv-tools/include/util/bitutils.h", - "spirv-tools/include/util/hex_float.h", + "spirv-tools/include/spirv-tools/libspirv.h", "spirv-tools/source/assembly_grammar.cpp", "spirv-tools/source/assembly_grammar.h", "spirv-tools/source/binary.cpp", @@ -26,10 +24,9 @@ project("spirv-tools") "spirv-tools/source/diagnostic.cpp", "spirv-tools/source/diagnostic.h", "spirv-tools/source/disassemble.cpp", - "spirv-tools/source/endian.cpp", - "spirv-tools/source/endian.h", "spirv-tools/source/ext_inst.cpp", "spirv-tools/source/ext_inst.h", + "spirv-tools/source/instruction.cpp", "spirv-tools/source/instruction.h", "spirv-tools/source/opcode.cpp", "spirv-tools/source/opcode.h", @@ -41,6 +38,8 @@ project("spirv-tools") "spirv-tools/source/print.h", "spirv-tools/source/spirv_constant.h", "spirv-tools/source/spirv_definition.h", + "spirv-tools/source/spirv_endian.cpp", + "spirv-tools/source/spirv_endian.h", "spirv-tools/source/spirv_operands.h", "spirv-tools/source/table.cpp", "spirv-tools/source/table.h", @@ -50,5 +49,13 @@ project("spirv-tools") "spirv-tools/source/text_handler.h", "spirv-tools/source/validate.cpp", "spirv-tools/source/validate.h", + "spirv-tools/source/validate_cfg.cpp", "spirv-tools/source/validate_id.cpp", + "spirv-tools/source/validate_instruction.cpp", + "spirv-tools/source/validate_layout.cpp", + "spirv-tools/source/validate_passes.h", + "spirv-tools/source/validate_ssa.cpp", + "spirv-tools/source/validate_types.cpp", + "spirv-tools/source/util/bitutils.h", + "spirv-tools/source/util/hex_float.h", }) diff --git a/third_party/spirv/GLSL.std.450.h b/third_party/spirv/GLSL.std.450.h index ed6f8b671..df31092be 100644 --- a/third_party/spirv/GLSL.std.450.h +++ b/third_party/spirv/GLSL.std.450.h @@ -1,5 +1,5 @@ /* -** Copyright (c) 2014-2015 The Khronos Group Inc. +** Copyright (c) 2014-2016 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a copy ** of this software and/or associated documentation files (the "Materials"), @@ -13,7 +13,7 @@ ** ** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS ** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ ** ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -27,105 +27,105 @@ #ifndef GLSLstd450_H #define GLSLstd450_H -namespace spv { +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; -const int GLSLstd450Version = 99; -const int GLSLstd450Revision = 3; +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use -enum class GLSLstd450 : unsigned { - Bad = 0, // Don't use + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, - Round = 1, - RoundEven = 2, - Trunc = 3, - FAbs = 4, - SAbs = 5, - FSign = 6, - SSign = 7, - Floor = 8, - Ceil = 9, - Fract = 10, + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, - Radians = 11, - Degrees = 12, - Sin = 13, - Cos = 14, - Tan = 15, - Asin = 16, - Acos = 17, - Atan = 18, - Sinh = 19, - Cosh = 20, - Tanh = 21, - Asinh = 22, - Acosh = 23, - Atanh = 24, - Atan2 = 25, + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, - Pow = 26, - Exp = 27, - Log = 28, - Exp2 = 29, - Log2 = 30, - Sqrt = 31, - InverseSqrt = 32, + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, - Determinant = 33, - MatrixInverse = 34, + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, - Modf = 35, // second operand needs an OpVariable to write to - ModfStruct = 36, // no OpVariable operand - FMin = 37, - UMin = 38, - SMin = 39, - FMax = 40, - UMax = 41, - SMax = 42, - FClamp = 43, - UClamp = 44, - SClamp = 45, - FMix = 46, - IMix = 47, - Step = 48, - SmoothStep = 49, + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, - Fma = 50, - Frexp = 51, // second operand needs an OpVariable to write to - FrexpStruct = 52, // no OpVariable operand - Ldexp = 53, + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, - PackSnorm4x8 = 54, - PackUnorm4x8 = 55, - PackSnorm2x16 = 56, - PackUnorm2x16 = 57, - PackHalf2x16 = 58, - PackDouble2x32 = 59, - UnpackSnorm2x16 = 60, - UnpackUnorm2x16 = 61, - UnpackHalf2x16 = 62, - UnpackSnorm4x8 = 63, - UnpackUnorm4x8 = 64, - UnpackDouble2x32 = 65, + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, - Length = 66, - Distance = 67, - Cross = 68, - Normalize = 69, - FaceForward = 70, - Reflect = 71, - Refract = 72, + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, - FindILsb = 73, - FindSMsb = 74, - FindUMsb = 75, + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, - InterpolateAtCentroid = 76, - InterpolateAtSample = 77, - InterpolateAtOffset = 78, + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, - Count + GLSLstd450Count }; -} // end namespace spv - #endif // #ifndef GLSLstd450_H diff --git a/third_party/spirv/GLSL.std.450.hpp11 b/third_party/spirv/GLSL.std.450.hpp11 new file mode 100644 index 000000000..526912006 --- /dev/null +++ b/third_party/spirv/GLSL.std.450.hpp11 @@ -0,0 +1,135 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_HPP +#define GLSLstd450_HPP + +namespace spv { + +constexpr int GLSLstd450Version = 100; +constexpr int GLSLstd450Revision = 1; + +enum class GLSLstd450 { + kBad = 0, // Don't use + + kRound = 1, + kRoundEven = 2, + kTrunc = 3, + kFAbs = 4, + kSAbs = 5, + kFSign = 6, + kSSign = 7, + kFloor = 8, + kCeil = 9, + kFract = 10, + + kRadians = 11, + kDegrees = 12, + kSin = 13, + kCos = 14, + kTan = 15, + kAsin = 16, + kAcos = 17, + kAtan = 18, + kSinh = 19, + kCosh = 20, + kTanh = 21, + kAsinh = 22, + kAcosh = 23, + kAtanh = 24, + kAtan2 = 25, + + kPow = 26, + kExp = 27, + kLog = 28, + kExp2 = 29, + kLog2 = 30, + kSqrt = 31, + kInverseSqrt = 32, + + kDeterminant = 33, + kMatrixInverse = 34, + + kModf = 35, // second operand needs an OpVariable to write to + kModfStruct = 36, // no OpVariable operand + kFMin = 37, + kUMin = 38, + kSMin = 39, + kFMax = 40, + kUMax = 41, + kSMax = 42, + kFClamp = 43, + kUClamp = 44, + kSClamp = 45, + kFMix = 46, + kIMix = 47, // Reserved + kStep = 48, + kSmoothStep = 49, + + kFma = 50, + kFrexp = 51, // second operand needs an OpVariable to write to + kFrexpStruct = 52, // no OpVariable operand + kLdexp = 53, + + kPackSnorm4x8 = 54, + kPackUnorm4x8 = 55, + kPackSnorm2x16 = 56, + kPackUnorm2x16 = 57, + kPackHalf2x16 = 58, + kPackDouble2x32 = 59, + kUnpackSnorm2x16 = 60, + kUnpackUnorm2x16 = 61, + kUnpackHalf2x16 = 62, + kUnpackSnorm4x8 = 63, + kUnpackUnorm4x8 = 64, + kUnpackDouble2x32 = 65, + + kLength = 66, + kDistance = 67, + kCross = 68, + kNormalize = 69, + kFaceForward = 70, + kReflect = 71, + kRefract = 72, + + kFindILsb = 73, + kFindSMsb = 74, + kFindUMsb = 75, + + kInterpolateAtCentroid = 76, + kInterpolateAtSample = 77, + kInterpolateAtOffset = 78, + + kNMin = 79, + kNMax = 80, + kNClamp = 81, + + kCount +}; + +} // namespace spv + +#endif // #ifndef GLSLstd450_HPP diff --git a/third_party/spirv/OpenCL.std.h b/third_party/spirv/OpenCL.std.h new file mode 100644 index 000000000..af29c527e --- /dev/null +++ b/third_party/spirv/OpenCL.std.h @@ -0,0 +1,272 @@ +/* +** Copyright (c) 2015-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +// +// Author: Boaz Ouriel, Intel +// + +namespace OpenCLLIB { + +enum Entrypoints { + + // math functions + Acos = 0, + Acosh = 1, + Acospi = 2, + Asin = 3, + Asinh = 4, + Asinpi = 5, + Atan = 6, + Atan2 = 7, + Atanh = 8, + Atanpi = 9, + Atan2pi = 10, + Cbrt = 11, + Ceil = 12, + Copysign = 13, + Cos = 14, + Cosh = 15, + Cospi = 16, + Erfc = 17, + Erf = 18, + Exp = 19, + Exp2 = 20, + Exp10 = 21, + Expm1 = 22, + Fabs = 23, + Fdim = 24, + Floor = 25, + Fma = 26, + Fmax = 27, + Fmin = 28, + Fmod = 29, + Fract = 30, + Frexp = 31, + Hypot = 32, + Ilogb = 33, + Ldexp = 34, + Lgamma = 35, + Lgamma_r = 36, + Log = 37, + Log2 = 38, + Log10 = 39, + Log1p = 40, + Logb = 41, + Mad = 42, + Maxmag = 43, + Minmag = 44, + Modf = 45, + Nan = 46, + Nextafter = 47, + Pow = 48, + Pown = 49, + Powr = 50, + Remainder = 51, + Remquo = 52, + Rint = 53, + Rootn = 54, + Round = 55, + Rsqrt = 56, + Sin = 57, + Sincos = 58, + Sinh = 59, + Sinpi = 60, + Sqrt = 61, + Tan = 62, + Tanh = 63, + Tanpi = 64, + Tgamma = 65, + Trunc = 66, + Half_cos = 67, + Half_divide = 68, + Half_exp = 69, + Half_exp2 = 70, + Half_exp10 = 71, + Half_log = 72, + Half_log2 = 73, + Half_log10 = 74, + Half_powr = 75, + Half_recip = 76, + Half_rsqrt = 77, + Half_sin = 78, + Half_sqrt = 79, + Half_tan = 80, + Native_cos = 81, + Native_divide = 82, + Native_exp = 83, + Native_exp2 = 84, + Native_exp10 = 85, + Native_log = 86, + Native_log2 = 87, + Native_log10 = 88, + Native_powr = 89, + Native_recip = 90, + Native_rsqrt = 91, + Native_sin = 92, + Native_sqrt = 93, + Native_tan = 94, + + // Common + FClamp = 95, + Degrees = 96, + FMax_common = 97, + FMin_common = 98, + Mix = 99, + Radians = 100, + Step = 101, + Smoothstep = 102, + Sign = 103, + + // Geometrics + Cross = 104, + Distance = 105, + Length = 106, + Normalize = 107, + Fast_distance = 108, + Fast_length = 109, + Fast_normalize = 110, + + // Images - Deprecated + Read_imagef = 111, + Read_imagei = 112, + Read_imageui = 113, + Read_imageh = 114, + + Read_imagef_samplerless = 115, + Read_imagei_samplerless = 116, + Read_imageui_samplerless = 117, + Read_imageh_samplerless = 118, + + Write_imagef = 119, + Write_imagei = 120, + Write_imageui = 121, + Write_imageh = 122, + Read_imagef_mipmap_lod = 123, + Read_imagei_mipmap_lod = 124, + Read_imageui_mipmap_lod = 125, + Read_imagef_mipmap_grad = 126, + Read_imagei_mipmap_grad = 127, + Read_imageui_mipmap_grad = 128, + + // Image write with LOD + Write_imagef_mipmap_lod = 129, + Write_imagei_mipmap_lod = 130, + Write_imageui_mipmap_lod = 131, + + // Images - Deprecated + Get_image_width = 132, + Get_image_height = 133, + Get_image_depth = 134, + Get_image_channel_data_type = 135, + Get_image_channel_order = 136, + Get_image_dim = 137, + Get_image_array_size = 138, + Get_image_num_samples = 139, + Get_image_num_mip_levels = 140, + + // Integers + SAbs = 141, + SAbs_diff = 142, + SAdd_sat = 143, + UAdd_sat = 144, + SHadd = 145, + UHadd = 146, + SRhadd = 147, + URhadd = 148, + SClamp = 149, + UClamp = 150, + Clz = 151, + Ctz = 152, + SMad_hi = 153, + UMad_sat = 154, + SMad_sat = 155, + SMax = 156, + UMax = 157, + SMin = 158, + UMin = 159, + SMul_hi = 160, + Rotate = 161, + SSub_sat = 162, + USub_sat = 163, + U_Upsample = 164, + S_Upsample = 165, + Popcount = 166, + SMad24 = 167, + UMad24 = 168, + SMul24 = 169, + UMul24 = 170, + + // Vector Loads/Stores + Vloadn = 171, + Vstoren = 172, + Vload_half = 173, + Vload_halfn = 174, + Vstore_half = 175, + Vstore_half_r = 176, + Vstore_halfn = 177, + Vstore_halfn_r = 178, + Vloada_halfn = 179, + Vstorea_halfn = 180, + Vstorea_halfn_r = 181, + + // Vector Misc + Shuffle = 182, + Shuffle2 = 183, + + // + Printf = 184, + Prefetch = 185, + + // Relationals + Bitselect = 186, + Select = 187, + + // pipes + Read_pipe = 188, + Write_pipe = 189, + Reserve_read_pipe = 190, + Reserve_write_pipe = 191, + Commit_read_pipe = 192, + Commit_write_pipe = 193, + Is_valid_reserve_id = 194, + Work_group_reserve_read_pipe = 195, + Work_group_reserve_write_pipe = 196, + Work_group_commit_read_pipe = 197, + Work_group_commit_write_pipe = 198, + Get_pipe_num_packets = 199, + Get_pipe_max_packets = 200, + + // more integers + UAbs = 201, + UAbs_diff = 202, + UMul_hi = 203, + UMad_hi = 204, +}; + + + +}; // end namespace OpenCL20 + diff --git a/third_party/spirv/spirv.h b/third_party/spirv/spirv.h index 136121600..d48488e94 100644 --- a/third_party/spirv/spirv.h +++ b/third_party/spirv/spirv.h @@ -1,877 +1,871 @@ -// Copyright (c) 2014-2015 The Khronos Group Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ -// This header is automatically generated by the same tool that creates -// the Binary Section of the SPIR-V specification. +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ -// Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python -// -// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -// -// Some tokens act like mask values, which can be OR'd together, -// while others are mutually exclusive. The mask-like ones have -// "Mask" in their name, and a parallel enum that has the shift -// amount (1 << x) for each corresponding enumerant. +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ -#ifndef spirv_H11 -#define spirv_H11 +#ifndef spirv_H +#define spirv_H -namespace spv { - -typedef unsigned int Id; +typedef unsigned int SpvId; #define SPV_VERSION 0x10000 -#define SPV_REVISION 2 +#define SPV_REVISION 3 -static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010000; -static const unsigned int Revision = 2; -static const unsigned int OpCodeMask = 0xffff; -static const unsigned int WordCountShift = 16; +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010000; +static const unsigned int SpvRevision = 3; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; -enum class SourceLanguage : unsigned { - Unknown = 0, - ESSL = 1, - GLSL = 2, - OpenCL_C = 3, - OpenCL_CPP = 4, -}; +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, +} SpvSourceLanguage; -enum class ExecutionModel : unsigned { - Vertex = 0, - TessellationControl = 1, - TessellationEvaluation = 2, - Geometry = 3, - Fragment = 4, - GLCompute = 5, - Kernel = 6, -}; +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; -enum class AddressingModel : unsigned { - Logical = 0, - Physical32 = 1, - Physical64 = 2, -}; +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; -enum class MemoryModel : unsigned { - Simple = 0, - GLSL450 = 1, - OpenCL = 2, -}; +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, +} SpvMemoryModel; -enum class ExecutionMode : unsigned { - Invocations = 0, - SpacingEqual = 1, - SpacingFractionalEven = 2, - SpacingFractionalOdd = 3, - VertexOrderCw = 4, - VertexOrderCcw = 5, - PixelCenterInteger = 6, - OriginUpperLeft = 7, - OriginLowerLeft = 8, - EarlyFragmentTests = 9, - PointMode = 10, - Xfb = 11, - DepthReplacing = 12, - DepthGreater = 14, - DepthLess = 15, - DepthUnchanged = 16, - LocalSize = 17, - LocalSizeHint = 18, - InputPoints = 19, - InputLines = 20, - InputLinesAdjacency = 21, - Triangles = 22, - InputTrianglesAdjacency = 23, - Quads = 24, - Isolines = 25, - OutputVertices = 26, - OutputPoints = 27, - OutputLineStrip = 28, - OutputTriangleStrip = 29, - VecTypeHint = 30, - ContractionOff = 31, -}; +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, +} SpvExecutionMode; -enum class StorageClass : unsigned { - UniformConstant = 0, - Input = 1, - Uniform = 2, - Output = 3, - Workgroup = 4, - CrossWorkgroup = 5, - Private = 6, - Function = 7, - Generic = 8, - PushConstant = 9, - AtomicCounter = 10, - Image = 11, -}; +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, +} SpvStorageClass; -enum class Dim : unsigned { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - Cube = 3, - Rect = 4, - Buffer = 5, - SubpassData = 6, -}; +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, +} SpvDim; -enum class SamplerAddressingMode : unsigned { - None = 0, - ClampToEdge = 1, - Clamp = 2, - Repeat = 3, - RepeatMirrored = 4, -}; +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; -enum class SamplerFilterMode : unsigned { - Nearest = 0, - Linear = 1, -}; +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; -enum class ImageFormat : unsigned { - Unknown = 0, - Rgba32f = 1, - Rgba16f = 2, - R32f = 3, - Rgba8 = 4, - Rgba8Snorm = 5, - Rg32f = 6, - Rg16f = 7, - R11fG11fB10f = 8, - R16f = 9, - Rgba16 = 10, - Rgb10A2 = 11, - Rg16 = 12, - Rg8 = 13, - R16 = 14, - R8 = 15, - Rgba16Snorm = 16, - Rg16Snorm = 17, - Rg8Snorm = 18, - R16Snorm = 19, - R8Snorm = 20, - Rgba32i = 21, - Rgba16i = 22, - Rgba8i = 23, - R32i = 24, - Rg32i = 25, - Rg16i = 26, - Rg8i = 27, - R16i = 28, - R8i = 29, - Rgba32ui = 30, - Rgba16ui = 31, - Rgba8ui = 32, - R32ui = 33, - Rgb10a2ui = 34, - Rg32ui = 35, - Rg16ui = 36, - Rg8ui = 37, - R16ui = 38, - R8ui = 39, -}; +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; -enum class ImageChannelOrder : unsigned { - R = 0, - A = 1, - RG = 2, - RA = 3, - RGB = 4, - RGBA = 5, - BGRA = 6, - ARGB = 7, - Intensity = 8, - Luminance = 9, - Rx = 10, - RGx = 11, - RGBx = 12, - Depth = 13, - DepthStencil = 14, - sRGB = 15, - sRGBx = 16, - sRGBA = 17, - sBGRA = 18, -}; +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; -enum class ImageChannelDataType : unsigned { - SnormInt8 = 0, - SnormInt16 = 1, - UnormInt8 = 2, - UnormInt16 = 3, - UnormShort565 = 4, - UnormShort555 = 5, - UnormInt101010 = 6, - SignedInt8 = 7, - SignedInt16 = 8, - SignedInt32 = 9, - UnsignedInt8 = 10, - UnsignedInt16 = 11, - UnsignedInt32 = 12, - HalfFloat = 13, - Float = 14, - UnormInt24 = 15, - UnormInt101010_2 = 16, -}; +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, +} SpvImageChannelDataType; -enum class ImageOperandsShift : unsigned { - Bias = 0, - Lod = 1, - Grad = 2, - ConstOffset = 3, - Offset = 4, - ConstOffsets = 5, - Sample = 6, - MinLod = 7, -}; +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, +} SpvImageOperandsShift; -enum class ImageOperandsMask : unsigned { - MaskNone = 0, - Bias = 0x00000001, - Lod = 0x00000002, - Grad = 0x00000004, - ConstOffset = 0x00000008, - Offset = 0x00000010, - ConstOffsets = 0x00000020, - Sample = 0x00000040, - MinLod = 0x00000080, -}; +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, +} SpvImageOperandsMask; -enum class FPFastMathModeShift : unsigned { - NotNaN = 0, - NotInf = 1, - NSZ = 2, - AllowRecip = 3, - Fast = 4, -}; +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; -enum class FPFastMathModeMask : unsigned { - MaskNone = 0, - NotNaN = 0x00000001, - NotInf = 0x00000002, - NSZ = 0x00000004, - AllowRecip = 0x00000008, - Fast = 0x00000010, -}; +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; -enum class FPRoundingMode : unsigned { - RTE = 0, - RTZ = 1, - RTP = 2, - RTN = 3, -}; +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; -enum class LinkageType : unsigned { - Export = 0, - Import = 1, -}; +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; -enum class AccessQualifier : unsigned { - ReadOnly = 0, - WriteOnly = 1, - ReadWrite = 2, -}; +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; -enum class FunctionParameterAttribute : unsigned { - Zext = 0, - Sext = 1, - ByVal = 2, - Sret = 3, - NoAlias = 4, - NoCapture = 5, - NoWrite = 6, - NoReadWrite = 7, -}; +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, +} SpvFunctionParameterAttribute; -enum class Decoration : unsigned { - RelaxedPrecision = 0, - SpecId = 1, - Block = 2, - BufferBlock = 3, - RowMajor = 4, - ColMajor = 5, - ArrayStride = 6, - MatrixStride = 7, - GLSLShared = 8, - GLSLPacked = 9, - CPacked = 10, - BuiltIn = 11, - NoPerspective = 13, - Flat = 14, - Patch = 15, - Centroid = 16, - Sample = 17, - Invariant = 18, - Restrict = 19, - Aliased = 20, - Volatile = 21, - Constant = 22, - Coherent = 23, - NonWritable = 24, - NonReadable = 25, - Uniform = 26, - SaturatedConversion = 28, - Stream = 29, - Location = 30, - Component = 31, - Index = 32, - Binding = 33, - DescriptorSet = 34, - Offset = 35, - XfbBuffer = 36, - XfbStride = 37, - FuncParamAttr = 38, - FPRoundingMode = 39, - FPFastMathMode = 40, - LinkageAttributes = 41, - NoContraction = 42, - InputAttachmentIndex = 43, - Alignment = 44, -}; +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, +} SpvDecoration; -enum class BuiltIn : unsigned { - Position = 0, - PointSize = 1, - ClipDistance = 3, - CullDistance = 4, - VertexId = 5, - InstanceId = 6, - PrimitiveId = 7, - InvocationId = 8, - Layer = 9, - ViewportIndex = 10, - TessLevelOuter = 11, - TessLevelInner = 12, - TessCoord = 13, - PatchVertices = 14, - FragCoord = 15, - PointCoord = 16, - FrontFacing = 17, - SampleId = 18, - SamplePosition = 19, - SampleMask = 20, - FragDepth = 22, - HelperInvocation = 23, - NumWorkgroups = 24, - WorkgroupSize = 25, - WorkgroupId = 26, - LocalInvocationId = 27, - GlobalInvocationId = 28, - LocalInvocationIndex = 29, - WorkDim = 30, - GlobalSize = 31, - EnqueuedWorkgroupSize = 32, - GlobalOffset = 33, - GlobalLinearId = 34, - SubgroupSize = 36, - SubgroupMaxSize = 37, - NumSubgroups = 38, - NumEnqueuedSubgroups = 39, - SubgroupId = 40, - SubgroupLocalInvocationId = 41, - VertexIndex = 42, - InstanceIndex = 43, -}; +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, +} SpvBuiltIn; -enum class SelectionControlShift : unsigned { - Flatten = 0, - DontFlatten = 1, -}; +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; -enum class SelectionControlMask : unsigned { - MaskNone = 0, - Flatten = 0x00000001, - DontFlatten = 0x00000002, -}; +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; -enum class LoopControlShift : unsigned { - Unroll = 0, - DontUnroll = 1, -}; +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; -enum class LoopControlMask : unsigned { - MaskNone = 0, - Unroll = 0x00000001, - DontUnroll = 0x00000002, -}; +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; -enum class FunctionControlShift : unsigned { - Inline = 0, - DontInline = 1, - Pure = 2, - Const = 3, -}; +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; -enum class FunctionControlMask : unsigned { - MaskNone = 0, - Inline = 0x00000001, - DontInline = 0x00000002, - Pure = 0x00000004, - Const = 0x00000008, -}; +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; -enum class MemorySemanticsShift : unsigned { - Acquire = 1, - Release = 2, - AcquireRelease = 3, - SequentiallyConsistent = 4, - UniformMemory = 6, - SubgroupMemory = 7, - WorkgroupMemory = 8, - CrossWorkgroupMemory = 9, - AtomicCounterMemory = 10, - ImageMemory = 11, -}; +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, +} SpvMemorySemanticsShift; -enum class MemorySemanticsMask : unsigned { - MaskNone = 0, - Acquire = 0x00000002, - Release = 0x00000004, - AcquireRelease = 0x00000008, - SequentiallyConsistent = 0x00000010, - UniformMemory = 0x00000040, - SubgroupMemory = 0x00000080, - WorkgroupMemory = 0x00000100, - CrossWorkgroupMemory = 0x00000200, - AtomicCounterMemory = 0x00000400, - ImageMemory = 0x00000800, -}; +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, +} SpvMemorySemanticsMask; -enum class MemoryAccessShift : unsigned { - Volatile = 0, - Aligned = 1, - Nontemporal = 2, -}; +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, +} SpvMemoryAccessShift; -enum class MemoryAccessMask : unsigned { - MaskNone = 0, - Volatile = 0x00000001, - Aligned = 0x00000002, - Nontemporal = 0x00000004, -}; +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, +} SpvMemoryAccessMask; -enum class Scope : unsigned { - CrossDevice = 0, - Device = 1, - Workgroup = 2, - Subgroup = 3, - Invocation = 4, -}; +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; -enum class GroupOperation : unsigned { - Reduce = 0, - InclusiveScan = 1, - ExclusiveScan = 2, -}; +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; -enum class KernelEnqueueFlags : unsigned { - NoWait = 0, - WaitKernel = 1, - WaitWorkGroup = 2, -}; +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; -enum class KernelProfilingInfoShift : unsigned { - CmdExecTime = 0, -}; +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; -enum class KernelProfilingInfoMask : unsigned { - MaskNone = 0, - CmdExecTime = 0x00000001, -}; +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; -enum class Capability : unsigned { - Matrix = 0, - Shader = 1, - Geometry = 2, - Tessellation = 3, - Addresses = 4, - Linkage = 5, - Kernel = 6, - Vector16 = 7, - Float16Buffer = 8, - Float16 = 9, - Float64 = 10, - Int64 = 11, - Int64Atomics = 12, - ImageBasic = 13, - ImageReadWrite = 14, - ImageMipmap = 15, - Pipes = 17, - Groups = 18, - DeviceEnqueue = 19, - LiteralSampler = 20, - AtomicStorage = 21, - Int16 = 22, - TessellationPointSize = 23, - GeometryPointSize = 24, - ImageGatherExtended = 25, - StorageImageMultisample = 27, - UniformBufferArrayDynamicIndexing = 28, - SampledImageArrayDynamicIndexing = 29, - StorageBufferArrayDynamicIndexing = 30, - StorageImageArrayDynamicIndexing = 31, - ClipDistance = 32, - CullDistance = 33, - ImageCubeArray = 34, - SampleRateShading = 35, - ImageRect = 36, - SampledRect = 37, - GenericPointer = 38, - Int8 = 39, - InputAttachment = 40, - SparseResidency = 41, - MinLod = 42, - Sampled1D = 43, - Image1D = 44, - SampledCubeArray = 45, - SampledBuffer = 46, - ImageBuffer = 47, - ImageMSArray = 48, - StorageImageExtendedFormats = 49, - ImageQuery = 50, - DerivativeControl = 51, - InterpolationFunction = 52, - TransformFeedback = 53, - GeometryStreams = 54, - StorageImageReadWithoutFormat = 55, - StorageImageWriteWithoutFormat = 56, -}; +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, +} SpvCapability; -enum class Op : unsigned { - OpNop = 0, - OpUndef = 1, - OpSourceContinued = 2, - OpSource = 3, - OpSourceExtension = 4, - OpName = 5, - OpMemberName = 6, - OpString = 7, - OpLine = 8, - OpExtension = 10, - OpExtInstImport = 11, - OpExtInst = 12, - OpMemoryModel = 14, - OpEntryPoint = 15, - OpExecutionMode = 16, - OpCapability = 17, - OpTypeVoid = 19, - OpTypeBool = 20, - OpTypeInt = 21, - OpTypeFloat = 22, - OpTypeVector = 23, - OpTypeMatrix = 24, - OpTypeImage = 25, - OpTypeSampler = 26, - OpTypeSampledImage = 27, - OpTypeArray = 28, - OpTypeRuntimeArray = 29, - OpTypeStruct = 30, - OpTypeOpaque = 31, - OpTypePointer = 32, - OpTypeFunction = 33, - OpTypeEvent = 34, - OpTypeDeviceEvent = 35, - OpTypeReserveId = 36, - OpTypeQueue = 37, - OpTypePipe = 38, - OpTypeForwardPointer = 39, - OpConstantTrue = 41, - OpConstantFalse = 42, - OpConstant = 43, - OpConstantComposite = 44, - OpConstantSampler = 45, - OpConstantNull = 46, - OpSpecConstantTrue = 48, - OpSpecConstantFalse = 49, - OpSpecConstant = 50, - OpSpecConstantComposite = 51, - OpSpecConstantOp = 52, - OpFunction = 54, - OpFunctionParameter = 55, - OpFunctionEnd = 56, - OpFunctionCall = 57, - OpVariable = 59, - OpImageTexelPointer = 60, - OpLoad = 61, - OpStore = 62, - OpCopyMemory = 63, - OpCopyMemorySized = 64, - OpAccessChain = 65, - OpInBoundsAccessChain = 66, - OpPtrAccessChain = 67, - OpArrayLength = 68, - OpGenericPtrMemSemantics = 69, - OpInBoundsPtrAccessChain = 70, - OpDecorate = 71, - OpMemberDecorate = 72, - OpDecorationGroup = 73, - OpGroupDecorate = 74, - OpGroupMemberDecorate = 75, - OpVectorExtractDynamic = 77, - OpVectorInsertDynamic = 78, - OpVectorShuffle = 79, - OpCompositeConstruct = 80, - OpCompositeExtract = 81, - OpCompositeInsert = 82, - OpCopyObject = 83, - OpTranspose = 84, - OpSampledImage = 86, - OpImageSampleImplicitLod = 87, - OpImageSampleExplicitLod = 88, - OpImageSampleDrefImplicitLod = 89, - OpImageSampleDrefExplicitLod = 90, - OpImageSampleProjImplicitLod = 91, - OpImageSampleProjExplicitLod = 92, - OpImageSampleProjDrefImplicitLod = 93, - OpImageSampleProjDrefExplicitLod = 94, - OpImageFetch = 95, - OpImageGather = 96, - OpImageDrefGather = 97, - OpImageRead = 98, - OpImageWrite = 99, - OpImage = 100, - OpImageQueryFormat = 101, - OpImageQueryOrder = 102, - OpImageQuerySizeLod = 103, - OpImageQuerySize = 104, - OpImageQueryLod = 105, - OpImageQueryLevels = 106, - OpImageQuerySamples = 107, - OpConvertFToU = 109, - OpConvertFToS = 110, - OpConvertSToF = 111, - OpConvertUToF = 112, - OpUConvert = 113, - OpSConvert = 114, - OpFConvert = 115, - OpQuantizeToF16 = 116, - OpConvertPtrToU = 117, - OpSatConvertSToU = 118, - OpSatConvertUToS = 119, - OpConvertUToPtr = 120, - OpPtrCastToGeneric = 121, - OpGenericCastToPtr = 122, - OpGenericCastToPtrExplicit = 123, - OpBitcast = 124, - OpSNegate = 126, - OpFNegate = 127, - OpIAdd = 128, - OpFAdd = 129, - OpISub = 130, - OpFSub = 131, - OpIMul = 132, - OpFMul = 133, - OpUDiv = 134, - OpSDiv = 135, - OpFDiv = 136, - OpUMod = 137, - OpSRem = 138, - OpSMod = 139, - OpFRem = 140, - OpFMod = 141, - OpVectorTimesScalar = 142, - OpMatrixTimesScalar = 143, - OpVectorTimesMatrix = 144, - OpMatrixTimesVector = 145, - OpMatrixTimesMatrix = 146, - OpOuterProduct = 147, - OpDot = 148, - OpIAddCarry = 149, - OpISubBorrow = 150, - OpUMulExtended = 151, - OpSMulExtended = 152, - OpAny = 154, - OpAll = 155, - OpIsNan = 156, - OpIsInf = 157, - OpIsFinite = 158, - OpIsNormal = 159, - OpSignBitSet = 160, - OpLessOrGreater = 161, - OpOrdered = 162, - OpUnordered = 163, - OpLogicalEqual = 164, - OpLogicalNotEqual = 165, - OpLogicalOr = 166, - OpLogicalAnd = 167, - OpLogicalNot = 168, - OpSelect = 169, - OpIEqual = 170, - OpINotEqual = 171, - OpUGreaterThan = 172, - OpSGreaterThan = 173, - OpUGreaterThanEqual = 174, - OpSGreaterThanEqual = 175, - OpULessThan = 176, - OpSLessThan = 177, - OpULessThanEqual = 178, - OpSLessThanEqual = 179, - OpFOrdEqual = 180, - OpFUnordEqual = 181, - OpFOrdNotEqual = 182, - OpFUnordNotEqual = 183, - OpFOrdLessThan = 184, - OpFUnordLessThan = 185, - OpFOrdGreaterThan = 186, - OpFUnordGreaterThan = 187, - OpFOrdLessThanEqual = 188, - OpFUnordLessThanEqual = 189, - OpFOrdGreaterThanEqual = 190, - OpFUnordGreaterThanEqual = 191, - OpShiftRightLogical = 194, - OpShiftRightArithmetic = 195, - OpShiftLeftLogical = 196, - OpBitwiseOr = 197, - OpBitwiseXor = 198, - OpBitwiseAnd = 199, - OpNot = 200, - OpBitFieldInsert = 201, - OpBitFieldSExtract = 202, - OpBitFieldUExtract = 203, - OpBitReverse = 204, - OpBitCount = 205, - OpDPdx = 207, - OpDPdy = 208, - OpFwidth = 209, - OpDPdxFine = 210, - OpDPdyFine = 211, - OpFwidthFine = 212, - OpDPdxCoarse = 213, - OpDPdyCoarse = 214, - OpFwidthCoarse = 215, - OpEmitVertex = 218, - OpEndPrimitive = 219, - OpEmitStreamVertex = 220, - OpEndStreamPrimitive = 221, - OpControlBarrier = 224, - OpMemoryBarrier = 225, - OpAtomicLoad = 227, - OpAtomicStore = 228, - OpAtomicExchange = 229, - OpAtomicCompareExchange = 230, - OpAtomicCompareExchangeWeak = 231, - OpAtomicIIncrement = 232, - OpAtomicIDecrement = 233, - OpAtomicIAdd = 234, - OpAtomicISub = 235, - OpAtomicSMin = 236, - OpAtomicUMin = 237, - OpAtomicSMax = 238, - OpAtomicUMax = 239, - OpAtomicAnd = 240, - OpAtomicOr = 241, - OpAtomicXor = 242, - OpPhi = 245, - OpLoopMerge = 246, - OpSelectionMerge = 247, - OpLabel = 248, - OpBranch = 249, - OpBranchConditional = 250, - OpSwitch = 251, - OpKill = 252, - OpReturn = 253, - OpReturnValue = 254, - OpUnreachable = 255, - OpLifetimeStart = 256, - OpLifetimeStop = 257, - OpGroupAsyncCopy = 259, - OpGroupWaitEvents = 260, - OpGroupAll = 261, - OpGroupAny = 262, - OpGroupBroadcast = 263, - OpGroupIAdd = 264, - OpGroupFAdd = 265, - OpGroupFMin = 266, - OpGroupUMin = 267, - OpGroupSMin = 268, - OpGroupFMax = 269, - OpGroupUMax = 270, - OpGroupSMax = 271, - OpReadPipe = 274, - OpWritePipe = 275, - OpReservedReadPipe = 276, - OpReservedWritePipe = 277, - OpReserveReadPipePackets = 278, - OpReserveWritePipePackets = 279, - OpCommitReadPipe = 280, - OpCommitWritePipe = 281, - OpIsValidReserveId = 282, - OpGetNumPipePackets = 283, - OpGetMaxPipePackets = 284, - OpGroupReserveReadPipePackets = 285, - OpGroupReserveWritePipePackets = 286, - OpGroupCommitReadPipe = 287, - OpGroupCommitWritePipe = 288, - OpEnqueueMarker = 291, - OpEnqueueKernel = 292, - OpGetKernelNDrangeSubGroupCount = 293, - OpGetKernelNDrangeMaxSubGroupSize = 294, - OpGetKernelWorkGroupSize = 295, - OpGetKernelPreferredWorkGroupSizeMultiple = 296, - OpRetainEvent = 297, - OpReleaseEvent = 298, - OpCreateUserEvent = 299, - OpIsValidEvent = 300, - OpSetUserEventStatus = 301, - OpCaptureEventProfilingInfo = 302, - OpGetDefaultQueue = 303, - OpBuildNDRange = 304, - OpImageSparseSampleImplicitLod = 305, - OpImageSparseSampleExplicitLod = 306, - OpImageSparseSampleDrefImplicitLod = 307, - OpImageSparseSampleDrefExplicitLod = 308, - OpImageSparseSampleProjImplicitLod = 309, - OpImageSparseSampleProjExplicitLod = 310, - OpImageSparseSampleProjDrefImplicitLod = 311, - OpImageSparseSampleProjDrefExplicitLod = 312, - OpImageSparseFetch = 313, - OpImageSparseGather = 314, - OpImageSparseDrefGather = 315, - OpImageSparseTexelsResident = 316, - OpNoLine = 317, - OpAtomicFlagTestAndSet = 318, - OpAtomicFlagClear = 319, -}; +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, +} SpvOp; -// Overload operator| for mask bit combining +#endif // #ifndef spirv_H -inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } -inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } -inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } -inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } -inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } -inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } -inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } -inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } - -} // end namespace spv - -#endif // #ifndef spirv_H11 diff --git a/third_party/spirv/spirv.hpp11 b/third_party/spirv/spirv.hpp11 new file mode 100644 index 000000000..03faaac38 --- /dev/null +++ b/third_party/spirv/spirv.hpp11 @@ -0,0 +1,880 @@ +// Copyright (c) 2014-2016 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 3 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010000; +static const unsigned int Revision = 3; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum class SourceLanguage : unsigned { + Unknown = 0, + ESSL = 1, + GLSL = 2, + OpenCL_C = 3, + OpenCL_CPP = 4, +}; + +enum class ExecutionModel : unsigned { + Vertex = 0, + TessellationControl = 1, + TessellationEvaluation = 2, + Geometry = 3, + Fragment = 4, + GLCompute = 5, + Kernel = 6, +}; + +enum class AddressingModel : unsigned { + Logical = 0, + Physical32 = 1, + Physical64 = 2, +}; + +enum class MemoryModel : unsigned { + Simple = 0, + GLSL450 = 1, + OpenCL = 2, +}; + +enum class ExecutionMode : unsigned { + Invocations = 0, + SpacingEqual = 1, + SpacingFractionalEven = 2, + SpacingFractionalOdd = 3, + VertexOrderCw = 4, + VertexOrderCcw = 5, + PixelCenterInteger = 6, + OriginUpperLeft = 7, + OriginLowerLeft = 8, + EarlyFragmentTests = 9, + PointMode = 10, + Xfb = 11, + DepthReplacing = 12, + DepthGreater = 14, + DepthLess = 15, + DepthUnchanged = 16, + LocalSize = 17, + LocalSizeHint = 18, + InputPoints = 19, + InputLines = 20, + InputLinesAdjacency = 21, + Triangles = 22, + InputTrianglesAdjacency = 23, + Quads = 24, + Isolines = 25, + OutputVertices = 26, + OutputPoints = 27, + OutputLineStrip = 28, + OutputTriangleStrip = 29, + VecTypeHint = 30, + ContractionOff = 31, +}; + +enum class StorageClass : unsigned { + UniformConstant = 0, + Input = 1, + Uniform = 2, + Output = 3, + Workgroup = 4, + CrossWorkgroup = 5, + Private = 6, + Function = 7, + Generic = 8, + PushConstant = 9, + AtomicCounter = 10, + Image = 11, +}; + +enum class Dim : unsigned { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + Cube = 3, + Rect = 4, + Buffer = 5, + SubpassData = 6, +}; + +enum class SamplerAddressingMode : unsigned { + None = 0, + ClampToEdge = 1, + Clamp = 2, + Repeat = 3, + RepeatMirrored = 4, +}; + +enum class SamplerFilterMode : unsigned { + Nearest = 0, + Linear = 1, +}; + +enum class ImageFormat : unsigned { + Unknown = 0, + Rgba32f = 1, + Rgba16f = 2, + R32f = 3, + Rgba8 = 4, + Rgba8Snorm = 5, + Rg32f = 6, + Rg16f = 7, + R11fG11fB10f = 8, + R16f = 9, + Rgba16 = 10, + Rgb10A2 = 11, + Rg16 = 12, + Rg8 = 13, + R16 = 14, + R8 = 15, + Rgba16Snorm = 16, + Rg16Snorm = 17, + Rg8Snorm = 18, + R16Snorm = 19, + R8Snorm = 20, + Rgba32i = 21, + Rgba16i = 22, + Rgba8i = 23, + R32i = 24, + Rg32i = 25, + Rg16i = 26, + Rg8i = 27, + R16i = 28, + R8i = 29, + Rgba32ui = 30, + Rgba16ui = 31, + Rgba8ui = 32, + R32ui = 33, + Rgb10a2ui = 34, + Rg32ui = 35, + Rg16ui = 36, + Rg8ui = 37, + R16ui = 38, + R8ui = 39, +}; + +enum class ImageChannelOrder : unsigned { + R = 0, + A = 1, + RG = 2, + RA = 3, + RGB = 4, + RGBA = 5, + BGRA = 6, + ARGB = 7, + Intensity = 8, + Luminance = 9, + Rx = 10, + RGx = 11, + RGBx = 12, + Depth = 13, + DepthStencil = 14, + sRGB = 15, + sRGBx = 16, + sRGBA = 17, + sBGRA = 18, +}; + +enum class ImageChannelDataType : unsigned { + SnormInt8 = 0, + SnormInt16 = 1, + UnormInt8 = 2, + UnormInt16 = 3, + UnormShort565 = 4, + UnormShort555 = 5, + UnormInt101010 = 6, + SignedInt8 = 7, + SignedInt16 = 8, + SignedInt32 = 9, + UnsignedInt8 = 10, + UnsignedInt16 = 11, + UnsignedInt32 = 12, + HalfFloat = 13, + Float = 14, + UnormInt24 = 15, + UnormInt101010_2 = 16, +}; + +enum class ImageOperandsShift : unsigned { + Bias = 0, + Lod = 1, + Grad = 2, + ConstOffset = 3, + Offset = 4, + ConstOffsets = 5, + Sample = 6, + MinLod = 7, +}; + +enum class ImageOperandsMask : unsigned { + MaskNone = 0, + Bias = 0x00000001, + Lod = 0x00000002, + Grad = 0x00000004, + ConstOffset = 0x00000008, + Offset = 0x00000010, + ConstOffsets = 0x00000020, + Sample = 0x00000040, + MinLod = 0x00000080, +}; + +enum class FPFastMathModeShift : unsigned { + NotNaN = 0, + NotInf = 1, + NSZ = 2, + AllowRecip = 3, + Fast = 4, +}; + +enum class FPFastMathModeMask : unsigned { + MaskNone = 0, + NotNaN = 0x00000001, + NotInf = 0x00000002, + NSZ = 0x00000004, + AllowRecip = 0x00000008, + Fast = 0x00000010, +}; + +enum class FPRoundingMode : unsigned { + RTE = 0, + RTZ = 1, + RTP = 2, + RTN = 3, +}; + +enum class LinkageType : unsigned { + Export = 0, + Import = 1, +}; + +enum class AccessQualifier : unsigned { + ReadOnly = 0, + WriteOnly = 1, + ReadWrite = 2, +}; + +enum class FunctionParameterAttribute : unsigned { + Zext = 0, + Sext = 1, + ByVal = 2, + Sret = 3, + NoAlias = 4, + NoCapture = 5, + NoWrite = 6, + NoReadWrite = 7, +}; + +enum class Decoration : unsigned { + RelaxedPrecision = 0, + SpecId = 1, + Block = 2, + BufferBlock = 3, + RowMajor = 4, + ColMajor = 5, + ArrayStride = 6, + MatrixStride = 7, + GLSLShared = 8, + GLSLPacked = 9, + CPacked = 10, + BuiltIn = 11, + NoPerspective = 13, + Flat = 14, + Patch = 15, + Centroid = 16, + Sample = 17, + Invariant = 18, + Restrict = 19, + Aliased = 20, + Volatile = 21, + Constant = 22, + Coherent = 23, + NonWritable = 24, + NonReadable = 25, + Uniform = 26, + SaturatedConversion = 28, + Stream = 29, + Location = 30, + Component = 31, + Index = 32, + Binding = 33, + DescriptorSet = 34, + Offset = 35, + XfbBuffer = 36, + XfbStride = 37, + FuncParamAttr = 38, + FPRoundingMode = 39, + FPFastMathMode = 40, + LinkageAttributes = 41, + NoContraction = 42, + InputAttachmentIndex = 43, + Alignment = 44, +}; + +enum class BuiltIn : unsigned { + Position = 0, + PointSize = 1, + ClipDistance = 3, + CullDistance = 4, + VertexId = 5, + InstanceId = 6, + PrimitiveId = 7, + InvocationId = 8, + Layer = 9, + ViewportIndex = 10, + TessLevelOuter = 11, + TessLevelInner = 12, + TessCoord = 13, + PatchVertices = 14, + FragCoord = 15, + PointCoord = 16, + FrontFacing = 17, + SampleId = 18, + SamplePosition = 19, + SampleMask = 20, + FragDepth = 22, + HelperInvocation = 23, + NumWorkgroups = 24, + WorkgroupSize = 25, + WorkgroupId = 26, + LocalInvocationId = 27, + GlobalInvocationId = 28, + LocalInvocationIndex = 29, + WorkDim = 30, + GlobalSize = 31, + EnqueuedWorkgroupSize = 32, + GlobalOffset = 33, + GlobalLinearId = 34, + SubgroupSize = 36, + SubgroupMaxSize = 37, + NumSubgroups = 38, + NumEnqueuedSubgroups = 39, + SubgroupId = 40, + SubgroupLocalInvocationId = 41, + VertexIndex = 42, + InstanceIndex = 43, +}; + +enum class SelectionControlShift : unsigned { + Flatten = 0, + DontFlatten = 1, +}; + +enum class SelectionControlMask : unsigned { + MaskNone = 0, + Flatten = 0x00000001, + DontFlatten = 0x00000002, +}; + +enum class LoopControlShift : unsigned { + Unroll = 0, + DontUnroll = 1, +}; + +enum class LoopControlMask : unsigned { + MaskNone = 0, + Unroll = 0x00000001, + DontUnroll = 0x00000002, +}; + +enum class FunctionControlShift : unsigned { + Inline = 0, + DontInline = 1, + Pure = 2, + Const = 3, +}; + +enum class FunctionControlMask : unsigned { + MaskNone = 0, + Inline = 0x00000001, + DontInline = 0x00000002, + Pure = 0x00000004, + Const = 0x00000008, +}; + +enum class MemorySemanticsShift : unsigned { + Acquire = 1, + Release = 2, + AcquireRelease = 3, + SequentiallyConsistent = 4, + UniformMemory = 6, + SubgroupMemory = 7, + WorkgroupMemory = 8, + CrossWorkgroupMemory = 9, + AtomicCounterMemory = 10, + ImageMemory = 11, +}; + +enum class MemorySemanticsMask : unsigned { + MaskNone = 0, + Acquire = 0x00000002, + Release = 0x00000004, + AcquireRelease = 0x00000008, + SequentiallyConsistent = 0x00000010, + UniformMemory = 0x00000040, + SubgroupMemory = 0x00000080, + WorkgroupMemory = 0x00000100, + CrossWorkgroupMemory = 0x00000200, + AtomicCounterMemory = 0x00000400, + ImageMemory = 0x00000800, +}; + +enum class MemoryAccessShift : unsigned { + Volatile = 0, + Aligned = 1, + Nontemporal = 2, +}; + +enum class MemoryAccessMask : unsigned { + MaskNone = 0, + Volatile = 0x00000001, + Aligned = 0x00000002, + Nontemporal = 0x00000004, +}; + +enum class Scope : unsigned { + CrossDevice = 0, + Device = 1, + Workgroup = 2, + Subgroup = 3, + Invocation = 4, +}; + +enum class GroupOperation : unsigned { + Reduce = 0, + InclusiveScan = 1, + ExclusiveScan = 2, +}; + +enum class KernelEnqueueFlags : unsigned { + NoWait = 0, + WaitKernel = 1, + WaitWorkGroup = 2, +}; + +enum class KernelProfilingInfoShift : unsigned { + CmdExecTime = 0, +}; + +enum class KernelProfilingInfoMask : unsigned { + MaskNone = 0, + CmdExecTime = 0x00000001, +}; + +enum class Capability : unsigned { + Matrix = 0, + Shader = 1, + Geometry = 2, + Tessellation = 3, + Addresses = 4, + Linkage = 5, + Kernel = 6, + Vector16 = 7, + Float16Buffer = 8, + Float16 = 9, + Float64 = 10, + Int64 = 11, + Int64Atomics = 12, + ImageBasic = 13, + ImageReadWrite = 14, + ImageMipmap = 15, + Pipes = 17, + Groups = 18, + DeviceEnqueue = 19, + LiteralSampler = 20, + AtomicStorage = 21, + Int16 = 22, + TessellationPointSize = 23, + GeometryPointSize = 24, + ImageGatherExtended = 25, + StorageImageMultisample = 27, + UniformBufferArrayDynamicIndexing = 28, + SampledImageArrayDynamicIndexing = 29, + StorageBufferArrayDynamicIndexing = 30, + StorageImageArrayDynamicIndexing = 31, + ClipDistance = 32, + CullDistance = 33, + ImageCubeArray = 34, + SampleRateShading = 35, + ImageRect = 36, + SampledRect = 37, + GenericPointer = 38, + Int8 = 39, + InputAttachment = 40, + SparseResidency = 41, + MinLod = 42, + Sampled1D = 43, + Image1D = 44, + SampledCubeArray = 45, + SampledBuffer = 46, + ImageBuffer = 47, + ImageMSArray = 48, + StorageImageExtendedFormats = 49, + ImageQuery = 50, + DerivativeControl = 51, + InterpolationFunction = 52, + TransformFeedback = 53, + GeometryStreams = 54, + StorageImageReadWithoutFormat = 55, + StorageImageWriteWithoutFormat = 56, + MultiViewport = 57, +}; + +enum class Op : unsigned { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, +}; + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP + diff --git a/third_party/vulkan/icd-spv.h b/third_party/vulkan/icd-spv.h new file mode 100644 index 000000000..b6640a4b3 --- /dev/null +++ b/third_party/vulkan/icd-spv.h @@ -0,0 +1,42 @@ +/* + * + * Copyright (C) 2015-2016 Valve Corporation + * Copyright (C) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Cody Northrop + * + */ + +#ifndef ICD_SPV_H +#define ICD_SPV_H + +#include + +#define ICD_SPV_MAGIC 0x07230203 +#define ICD_SPV_VERSION 99 + +struct icd_spv_header { + uint32_t magic; + uint32_t version; + uint32_t gen_magic; // Generator's magic number +}; + +#endif /* ICD_SPV_H */ diff --git a/third_party/vulkan/loader/cJSON.c b/third_party/vulkan/loader/cJSON.c new file mode 100644 index 000000000..097866032 --- /dev/null +++ b/third_party/vulkan/loader/cJSON.c @@ -0,0 +1,1358 @@ +/* + Copyright (c) 2009 Dave Gamble + Copyright (c) 2015-2016 The Khronos Group Inc. + Copyright (c) 2015-2016 Valve Corporation + Copyright (c) 2015-2016 LunarG, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* cJSON */ +/* JSON parser in C. */ + +#include +#include +#include +#include +#include +#include +#include +#include "cJSON.h" + +static const char *ep; + +const char *cJSON_GetErrorPtr(void) { return ep; } + +static void *(*cJSON_malloc)(size_t sz) = malloc; +static void (*cJSON_free)(void *ptr) = free; + +static char *cJSON_strdup(const char *str) { + size_t len; + char *copy; + + len = strlen(str) + 1; + if (!(copy = (char *)cJSON_malloc(len))) + return 0; + memcpy(copy, str, len); + return copy; +} + +void cJSON_InitHooks(cJSON_Hooks *hooks) { + if (!hooks) { /* Reset hooks */ + cJSON_malloc = malloc; + cJSON_free = free; + return; + } + + cJSON_malloc = (hooks->malloc_fn) ? hooks->malloc_fn : malloc; + cJSON_free = (hooks->free_fn) ? hooks->free_fn : free; +} + +/* Internal constructor. */ +static cJSON *cJSON_New_Item(void) { + cJSON *node = (cJSON *)cJSON_malloc(sizeof(cJSON)); + if (node) + memset(node, 0, sizeof(cJSON)); + return node; +} + +/* Delete a cJSON structure. */ +void cJSON_Delete(cJSON *c) { + cJSON *next; + while (c) { + next = c->next; + if (!(c->type & cJSON_IsReference) && c->child) + cJSON_Delete(c->child); + if (!(c->type & cJSON_IsReference) && c->valuestring) + cJSON_free(c->valuestring); + if (!(c->type & cJSON_StringIsConst) && c->string) + cJSON_free(c->string); + cJSON_free(c); + c = next; + } +} + +/* Parse the input text to generate a number, and populate the result into item. + */ +static const char *parse_number(cJSON *item, const char *num) { + double n = 0, sign = 1, scale = 0; + int subscale = 0, signsubscale = 1; + + if (*num == '-') + sign = -1, num++; /* Has sign? */ + if (*num == '0') + num++; /* is zero */ + if (*num >= '1' && *num <= '9') + do + n = (n * 10.0) + (*num++ - '0'); + while (*num >= '0' && *num <= '9'); /* Number? */ + if (*num == '.' && num[1] >= '0' && num[1] <= '9') { + num++; + do + n = (n * 10.0) + (*num++ - '0'), scale--; + while (*num >= '0' && *num <= '9'); + } /* Fractional part? */ + if (*num == 'e' || *num == 'E') /* Exponent? */ + { + num++; + if (*num == '+') + num++; + else if (*num == '-') + signsubscale = -1, num++; /* With sign? */ + while (*num >= '0' && *num <= '9') + subscale = (subscale * 10) + (*num++ - '0'); /* Number? */ + } + + n = sign * n * + pow(10.0, (scale + subscale * signsubscale)); /* number = +/- + number.fraction * + 10^+/- exponent */ + + item->valuedouble = n; + item->valueint = (int)n; + item->type = cJSON_Number; + return num; +} + +static size_t pow2gt(size_t x) { + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return x + 1; +} + +typedef struct { + char *buffer; + size_t length; + size_t offset; +} printbuffer; + +static char *ensure(printbuffer *p, size_t needed) { + char *newbuffer; + size_t newsize; + if (!p || !p->buffer) + return 0; + needed += p->offset; + if (needed <= p->length) + return p->buffer + p->offset; + + newsize = pow2gt(needed); + newbuffer = (char *)cJSON_malloc(newsize); + if (!newbuffer) { + cJSON_free(p->buffer); + p->length = 0, p->buffer = 0; + return 0; + } + if (newbuffer) + memcpy(newbuffer, p->buffer, p->length); + cJSON_free(p->buffer); + p->length = newsize; + p->buffer = newbuffer; + return newbuffer + p->offset; +} + +static size_t update(printbuffer *p) { + char *str; + if (!p || !p->buffer) + return 0; + str = p->buffer + p->offset; + return p->offset + strlen(str); +} + +/* Render the number nicely from the given item into a string. */ +static char *print_number(cJSON *item, printbuffer *p) { + char *str = 0; + double d = item->valuedouble; + if (d == 0) { + if (p) + str = ensure(p, 2); + else + str = (char *)cJSON_malloc(2); /* special case for 0. */ + if (str) + strcpy(str, "0"); + } else if (fabs(((double)item->valueint) - d) <= DBL_EPSILON && + d <= INT_MAX && d >= INT_MIN) { + if (p) + str = ensure(p, 21); + else + str = (char *)cJSON_malloc( + 21); /* 2^64+1 can be represented in 21 chars. */ + if (str) + sprintf(str, "%d", item->valueint); + } else { + if (p) + str = ensure(p, 64); + else + str = (char *)cJSON_malloc(64); /* This is a nice tradeoff. */ + if (str) { + if (fabs(floor(d) - d) <= DBL_EPSILON && fabs(d) < 1.0e60) + sprintf(str, "%.0f", d); + else if (fabs(d) < 1.0e-6 || fabs(d) > 1.0e9) + sprintf(str, "%e", d); + else + sprintf(str, "%f", d); + } + } + return str; +} + +static unsigned parse_hex4(const char *str) { + unsigned h = 0; + if (*str >= '0' && *str <= '9') + h += (*str) - '0'; + else if (*str >= 'A' && *str <= 'F') + h += 10 + (*str) - 'A'; + else if (*str >= 'a' && *str <= 'f') + h += 10 + (*str) - 'a'; + else + return 0; + h = h << 4; + str++; + if (*str >= '0' && *str <= '9') + h += (*str) - '0'; + else if (*str >= 'A' && *str <= 'F') + h += 10 + (*str) - 'A'; + else if (*str >= 'a' && *str <= 'f') + h += 10 + (*str) - 'a'; + else + return 0; + h = h << 4; + str++; + if (*str >= '0' && *str <= '9') + h += (*str) - '0'; + else if (*str >= 'A' && *str <= 'F') + h += 10 + (*str) - 'A'; + else if (*str >= 'a' && *str <= 'f') + h += 10 + (*str) - 'a'; + else + return 0; + h = h << 4; + str++; + if (*str >= '0' && *str <= '9') + h += (*str) - '0'; + else if (*str >= 'A' && *str <= 'F') + h += 10 + (*str) - 'A'; + else if (*str >= 'a' && *str <= 'f') + h += 10 + (*str) - 'a'; + else + return 0; + return h; +} + +/* Parse the input text into an unescaped cstring, and populate item. */ +static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, + 0xF0, 0xF8, 0xFC}; +static const char *parse_string(cJSON *item, const char *str) { + const char *ptr = str + 1; + char *ptr2; + char *out; + int len = 0; + unsigned uc, uc2; + if (*str != '\"') { + ep = str; + return 0; + } /* not a string! */ + + while (*ptr != '\"' && *ptr && ++len) + if (*ptr++ == '\\') + ptr++; /* Skip escaped quotes. */ + + out = (char *)cJSON_malloc( + len + 1); /* This is how long we need for the string, roughly. */ + if (!out) + return 0; + + ptr = str + 1; + ptr2 = out; + while (*ptr != '\"' && *ptr) { + if (*ptr != '\\') + *ptr2++ = *ptr++; + else { + ptr++; + switch (*ptr) { + case 'b': + *ptr2++ = '\b'; + break; + case 'f': + *ptr2++ = '\f'; + break; + case 'n': + *ptr2++ = '\n'; + break; + case 'r': + *ptr2++ = '\r'; + break; + case 't': + *ptr2++ = '\t'; + break; + case 'u': /* transcode utf16 to utf8. */ + uc = parse_hex4(ptr + 1); + ptr += 4; /* get the unicode char. */ + + if ((uc >= 0xDC00 && uc <= 0xDFFF) || uc == 0) + break; /* check for invalid. */ + + if (uc >= 0xD800 && + uc <= 0xDBFF) /* UTF16 surrogate pairs. */ + { + if (ptr[1] != '\\' || ptr[2] != 'u') + break; /* missing second-half of surrogate. */ + uc2 = parse_hex4(ptr + 3); + ptr += 6; + if (uc2 < 0xDC00 || uc2 > 0xDFFF) + break; /* invalid second-half of surrogate. */ + uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF)); + } + + len = 4; + if (uc < 0x80) + len = 1; + else if (uc < 0x800) + len = 2; + else if (uc < 0x10000) + len = 3; + ptr2 += len; + + switch (len) { + case 4: + *--ptr2 = ((uc | 0x80) & 0xBF); + uc >>= 6; + case 3: + *--ptr2 = ((uc | 0x80) & 0xBF); + uc >>= 6; + case 2: + *--ptr2 = ((uc | 0x80) & 0xBF); + uc >>= 6; + case 1: + *--ptr2 = (uc | firstByteMark[len]); + } + ptr2 += len; + break; + default: + *ptr2++ = *ptr; + break; + } + ptr++; + } + } + *ptr2 = 0; + if (*ptr == '\"') + ptr++; + item->valuestring = out; + item->type = cJSON_String; + return ptr; +} + +/* Render the cstring provided to an escaped version that can be printed. */ +static char *print_string_ptr(const char *str, printbuffer *p) { + const char *ptr; + char *ptr2; + char *out; + size_t len = 0, flag = 0; + unsigned char token; + + for (ptr = str; *ptr; ptr++) + flag |= ((*ptr > 0 && *ptr < 32) || (*ptr == '\"') || (*ptr == '\\')) + ? 1 + : 0; + if (!flag) { + len = ptr - str; + if (p) + out = ensure(p, len + 3); + else + out = (char *)cJSON_malloc(len + 3); + if (!out) + return 0; + ptr2 = out; + *ptr2++ = '\"'; + strcpy(ptr2, str); + ptr2[len] = '\"'; + ptr2[len + 1] = 0; + return out; + } + + if (!str) { + if (p) + out = ensure(p, 3); + else + out = (char *)cJSON_malloc(3); + if (!out) + return 0; + strcpy(out, "\"\""); + return out; + } + ptr = str; + while ((token = *ptr) && ++len) { + if (strchr("\"\\\b\f\n\r\t", token)) + len++; + else if (token < 32) + len += 5; + ptr++; + } + + if (p) + out = ensure(p, len + 3); + else + out = (char *)cJSON_malloc(len + 3); + if (!out) + return 0; + + ptr2 = out; + ptr = str; + *ptr2++ = '\"'; + while (*ptr) { + if ((unsigned char)*ptr > 31 && *ptr != '\"' && *ptr != '\\') + *ptr2++ = *ptr++; + else { + *ptr2++ = '\\'; + switch (token = *ptr++) { + case '\\': + *ptr2++ = '\\'; + break; + case '\"': + *ptr2++ = '\"'; + break; + case '\b': + *ptr2++ = 'b'; + break; + case '\f': + *ptr2++ = 'f'; + break; + case '\n': + *ptr2++ = 'n'; + break; + case '\r': + *ptr2++ = 'r'; + break; + case '\t': + *ptr2++ = 't'; + break; + default: + sprintf(ptr2, "u%04x", token); + ptr2 += 5; + break; /* escape and print */ + } + } + } + *ptr2++ = '\"'; + *ptr2++ = 0; + return out; +} +/* Invote print_string_ptr (which is useful) on an item. */ +static char *print_string(cJSON *item, printbuffer *p) { + return print_string_ptr(item->valuestring, p); +} + +/* Predeclare these prototypes. */ +static const char *parse_value(cJSON *item, const char *value); +static char *print_value(cJSON *item, int depth, int fmt, printbuffer *p); +static const char *parse_array(cJSON *item, const char *value); +static char *print_array(cJSON *item, int depth, int fmt, printbuffer *p); +static const char *parse_object(cJSON *item, const char *value); +static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p); + +/* Utility to jump whitespace and cr/lf */ +static const char *skip(const char *in) { + while (in && *in && (unsigned char)*in <= 32) + in++; + return in; +} + +/* Parse an object - create a new root, and populate. */ +cJSON *cJSON_ParseWithOpts(const char *value, const char **return_parse_end, + int require_null_terminated) { + const char *end = 0; + cJSON *c = cJSON_New_Item(); + ep = 0; + if (!c) + return 0; /* memory fail */ + + end = parse_value(c, skip(value)); + if (!end) { + cJSON_Delete(c); + return 0; + } /* parse failure. ep is set. */ + + /* if we require null-terminated JSON without appended garbage, skip and + * then check for a null terminator */ + if (require_null_terminated) { + end = skip(end); + if (*end) { + cJSON_Delete(c); + ep = end; + return 0; + } + } + if (return_parse_end) + *return_parse_end = end; + return c; +} +/* Default options for cJSON_Parse */ +cJSON *cJSON_Parse(const char *value) { + return cJSON_ParseWithOpts(value, 0, 0); +} + +/* Render a cJSON item/entity/structure to text. */ +char *cJSON_Print(cJSON *item) { return print_value(item, 0, 1, 0); } +char *cJSON_PrintUnformatted(cJSON *item) { return print_value(item, 0, 0, 0); } + +char *cJSON_PrintBuffered(cJSON *item, int prebuffer, int fmt) { + printbuffer p; + p.buffer = (char *)cJSON_malloc(prebuffer); + p.length = prebuffer; + p.offset = 0; + return print_value(item, 0, fmt, &p); + return p.buffer; +} + +/* Parser core - when encountering text, process appropriately. */ +static const char *parse_value(cJSON *item, const char *value) { + if (!value) + return 0; /* Fail on null. */ + if (!strncmp(value, "null", 4)) { + item->type = cJSON_NULL; + return value + 4; + } + if (!strncmp(value, "false", 5)) { + item->type = cJSON_False; + return value + 5; + } + if (!strncmp(value, "true", 4)) { + item->type = cJSON_True; + item->valueint = 1; + return value + 4; + } + if (*value == '\"') { + return parse_string(item, value); + } + if (*value == '-' || (*value >= '0' && *value <= '9')) { + return parse_number(item, value); + } + if (*value == '[') { + return parse_array(item, value); + } + if (*value == '{') { + return parse_object(item, value); + } + + ep = value; + return 0; /* failure. */ +} + +/* Render a value to text. */ +static char *print_value(cJSON *item, int depth, int fmt, printbuffer *p) { + char *out = 0; + if (!item) + return 0; + if (p) { + switch ((item->type) & 255) { + case cJSON_NULL: { + out = ensure(p, 5); + if (out) + strcpy(out, "null"); + break; + } + case cJSON_False: { + out = ensure(p, 6); + if (out) + strcpy(out, "false"); + break; + } + case cJSON_True: { + out = ensure(p, 5); + if (out) + strcpy(out, "true"); + break; + } + case cJSON_Number: + out = print_number(item, p); + break; + case cJSON_String: + out = print_string(item, p); + break; + case cJSON_Array: + out = print_array(item, depth, fmt, p); + break; + case cJSON_Object: + out = print_object(item, depth, fmt, p); + break; + } + } else { + switch ((item->type) & 255) { + case cJSON_NULL: + out = cJSON_strdup("null"); + break; + case cJSON_False: + out = cJSON_strdup("false"); + break; + case cJSON_True: + out = cJSON_strdup("true"); + break; + case cJSON_Number: + out = print_number(item, 0); + break; + case cJSON_String: + out = print_string(item, 0); + break; + case cJSON_Array: + out = print_array(item, depth, fmt, 0); + break; + case cJSON_Object: + out = print_object(item, depth, fmt, 0); + break; + } + } + return out; +} + +/* Build an array from input text. */ +static const char *parse_array(cJSON *item, const char *value) { + cJSON *child; + if (*value != '[') { + ep = value; + return 0; + } /* not an array! */ + + item->type = cJSON_Array; + value = skip(value + 1); + if (*value == ']') + return value + 1; /* empty array. */ + + item->child = child = cJSON_New_Item(); + if (!item->child) + return 0; /* memory fail */ + value = skip( + parse_value(child, skip(value))); /* skip any spacing, get the value. */ + if (!value) + return 0; + + while (*value == ',') { + cJSON *new_item; + if (!(new_item = cJSON_New_Item())) + return 0; /* memory fail */ + child->next = new_item; + new_item->prev = child; + child = new_item; + value = skip(parse_value(child, skip(value + 1))); + if (!value) + return 0; /* memory fail */ + } + + if (*value == ']') + return value + 1; /* end of array */ + ep = value; + return 0; /* malformed. */ +} + +/* Render an array to text */ +static char *print_array(cJSON *item, int depth, int fmt, printbuffer *p) { + char **entries; + char *out = 0, *ptr, *ret; + size_t len = 5; + cJSON *child = item->child; + int numentries = 0, fail = 0, j = 0; + size_t tmplen = 0, i = 0; + + /* How many entries in the array? */ + while (child) + numentries++, child = child->next; + /* Explicitly handle numentries==0 */ + if (!numentries) { + if (p) + out = ensure(p, 3); + else + out = (char *)cJSON_malloc(3); + if (out) + strcpy(out, "[]"); + return out; + } + + if (p) { + /* Compose the output array. */ + i = p->offset; + ptr = ensure(p, 1); + if (!ptr) + return 0; + *ptr = '['; + p->offset++; + child = item->child; + while (child && !fail) { + print_value(child, depth + 1, fmt, p); + p->offset = update(p); + if (child->next) { + len = fmt ? 2 : 1; + ptr = ensure(p, len + 1); + if (!ptr) + return 0; + *ptr++ = ','; + if (fmt) + *ptr++ = ' '; + *ptr = 0; + p->offset += len; + } + child = child->next; + } + ptr = ensure(p, 2); + if (!ptr) + return 0; + *ptr++ = ']'; + *ptr = 0; + out = (p->buffer) + i; + } else { + /* Allocate an array to hold the values for each */ + entries = (char **)cJSON_malloc(numentries * sizeof(char *)); + if (!entries) + return 0; + memset(entries, 0, numentries * sizeof(char *)); + /* Retrieve all the results: */ + child = item->child; + while (child && !fail) { + ret = print_value(child, depth + 1, fmt, 0); + entries[i++] = ret; + if (ret) + len += strlen(ret) + 2 + (fmt ? 1 : 0); + else + fail = 1; + child = child->next; + } + + /* If we didn't fail, try to malloc the output string */ + if (!fail) + out = (char *)cJSON_malloc(len); + /* If that fails, we fail. */ + if (!out) + fail = 1; + + /* Handle failure. */ + if (fail) { + for (j = 0; j < numentries; j++) + if (entries[j]) + cJSON_free(entries[j]); + cJSON_free(entries); + return 0; + } + + /* Compose the output array. */ + *out = '['; + ptr = out + 1; + *ptr = 0; + for (j = 0; j < numentries; j++) { + tmplen = strlen(entries[j]); + memcpy(ptr, entries[j], tmplen); + ptr += tmplen; + if (j != numentries - 1) { + *ptr++ = ','; + if (fmt) + *ptr++ = ' '; + *ptr = 0; + } + cJSON_free(entries[j]); + } + cJSON_free(entries); + *ptr++ = ']'; + *ptr++ = 0; + } + return out; +} + +/* Build an object from the text. */ +static const char *parse_object(cJSON *item, const char *value) { + cJSON *child; + if (*value != '{') { + ep = value; + return 0; + } /* not an object! */ + + item->type = cJSON_Object; + value = skip(value + 1); + if (*value == '}') + return value + 1; /* empty array. */ + + item->child = child = cJSON_New_Item(); + if (!item->child) + return 0; + value = skip(parse_string(child, skip(value))); + if (!value) + return 0; + child->string = child->valuestring; + child->valuestring = 0; + if (*value != ':') { + ep = value; + return 0; + } /* fail! */ + value = skip(parse_value( + child, skip(value + 1))); /* skip any spacing, get the value. */ + if (!value) + return 0; + + while (*value == ',') { + cJSON *new_item; + if (!(new_item = cJSON_New_Item())) + return 0; /* memory fail */ + child->next = new_item; + new_item->prev = child; + child = new_item; + value = skip(parse_string(child, skip(value + 1))); + if (!value) + return 0; + child->string = child->valuestring; + child->valuestring = 0; + if (*value != ':') { + ep = value; + return 0; + } /* fail! */ + value = skip(parse_value( + child, skip(value + 1))); /* skip any spacing, get the value. */ + if (!value) + return 0; + } + + if (*value == '}') + return value + 1; /* end of array */ + ep = value; + return 0; /* malformed. */ +} + +/* Render an object to text. */ +static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p) { + char **entries = 0, **names = 0; + char *out = 0, *ptr, *ret, *str; + int j; + cJSON *child = item->child; + int numentries = 0, fail = 0, k; + size_t tmplen = 0, i = 0, len = 7; + /* Count the number of entries. */ + while (child) + numentries++, child = child->next; + /* Explicitly handle empty object case */ + if (!numentries) { + if (p) + out = ensure(p, fmt ? depth + 4 : 3); + else + out = (char *)cJSON_malloc(fmt ? depth + 4 : 3); + if (!out) + return 0; + ptr = out; + *ptr++ = '{'; + if (fmt) { + *ptr++ = '\n'; + for (j = 0; j < depth - 1; j++) + *ptr++ = '\t'; + } + *ptr++ = '}'; + *ptr++ = 0; + return out; + } + if (p) { + /* Compose the output: */ + i = p->offset; + len = fmt ? 2 : 1; + ptr = ensure(p, len + 1); + if (!ptr) + return 0; + *ptr++ = '{'; + if (fmt) + *ptr++ = '\n'; + *ptr = 0; + p->offset += len; + child = item->child; + depth++; + while (child) { + if (fmt) { + ptr = ensure(p, depth); + if (!ptr) + return 0; + for (j = 0; j < depth; j++) + *ptr++ = '\t'; + p->offset += depth; + } + print_string_ptr(child->string, p); + p->offset = update(p); + + len = fmt ? 2 : 1; + ptr = ensure(p, len); + if (!ptr) + return 0; + *ptr++ = ':'; + if (fmt) + *ptr++ = '\t'; + p->offset += len; + + print_value(child, depth, fmt, p); + p->offset = update(p); + + len = (fmt ? 1 : 0) + (child->next ? 1 : 0); + ptr = ensure(p, len + 1); + if (!ptr) + return 0; + if (child->next) + *ptr++ = ','; + if (fmt) + *ptr++ = '\n'; + *ptr = 0; + p->offset += len; + child = child->next; + } + ptr = ensure(p, fmt ? (depth + 1) : 2); + if (!ptr) + return 0; + if (fmt) + for (j = 0; j < depth - 1; j++) + *ptr++ = '\t'; + *ptr++ = '}'; + *ptr = 0; + out = (p->buffer) + i; + } else { + /* Allocate space for the names and the objects */ + entries = (char **)cJSON_malloc(numentries * sizeof(char *)); + if (!entries) + return 0; + names = (char **)cJSON_malloc(numentries * sizeof(char *)); + if (!names) { + cJSON_free(entries); + return 0; + } + memset(entries, 0, sizeof(char *) * numentries); + memset(names, 0, sizeof(char *) * numentries); + + /* Collect all the results into our arrays: */ + child = item->child; + depth++; + if (fmt) + len += depth; + while (child) { + names[i] = str = print_string_ptr(child->string, 0); + entries[i++] = ret = print_value(child, depth, fmt, 0); + if (str && ret) + len += strlen(ret) + strlen(str) + 2 + (fmt ? 2 + depth : 0); + else + fail = 1; + child = child->next; + } + + /* Try to allocate the output string */ + if (!fail) + out = (char *)cJSON_malloc(len); + if (!out) + fail = 1; + + /* Handle failure */ + if (fail) { + for (j = 0; j < numentries; j++) { + if (names[i]) + cJSON_free(names[j]); + if (entries[j]) + cJSON_free(entries[j]); + } + cJSON_free(names); + cJSON_free(entries); + return 0; + } + + /* Compose the output: */ + *out = '{'; + ptr = out + 1; + if (fmt) + *ptr++ = '\n'; + *ptr = 0; + for (j = 0; j < numentries; j++) { + if (fmt) + for (k = 0; k < depth; k++) + *ptr++ = '\t'; + tmplen = strlen(names[j]); + memcpy(ptr, names[j], tmplen); + ptr += tmplen; + *ptr++ = ':'; + if (fmt) + *ptr++ = '\t'; + strcpy(ptr, entries[j]); + ptr += strlen(entries[j]); + if (j != numentries - 1) + *ptr++ = ','; + if (fmt) + *ptr++ = '\n'; + *ptr = 0; + cJSON_free(names[j]); + cJSON_free(entries[j]); + } + + cJSON_free(names); + cJSON_free(entries); + if (fmt) + for (j = 0; j < depth - 1; j++) + *ptr++ = '\t'; + *ptr++ = '}'; + *ptr++ = 0; + } + return out; +} + +/* Get Array size/item / object item. */ +int cJSON_GetArraySize(cJSON *array) { + cJSON *c = array->child; + int i = 0; + while (c) + i++, c = c->next; + return i; +} +cJSON *cJSON_GetArrayItem(cJSON *array, int item) { + cJSON *c = array->child; + while (c && item > 0) + item--, c = c->next; + return c; +} +cJSON *cJSON_GetObjectItem(cJSON *object, const char *string) { + cJSON *c = object->child; + while (c && strcmp(c->string, string)) + c = c->next; + return c; +} + +/* Utility for array list handling. */ +static void suffix_object(cJSON *prev, cJSON *item) { + prev->next = item; + item->prev = prev; +} +/* Utility for handling references. */ +static cJSON *create_reference(cJSON *item) { + cJSON *ref = cJSON_New_Item(); + if (!ref) + return 0; + memcpy(ref, item, sizeof(cJSON)); + ref->string = 0; + ref->type |= cJSON_IsReference; + ref->next = ref->prev = 0; + return ref; +} + +/* Add item to array/object. */ +void cJSON_AddItemToArray(cJSON *array, cJSON *item) { + cJSON *c = array->child; + if (!item) + return; + if (!c) { + array->child = item; + } else { + while (c && c->next) + c = c->next; + suffix_object(c, item); + } +} +void cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item) { + if (!item) + return; + if (item->string) + cJSON_free(item->string); + item->string = cJSON_strdup(string); + cJSON_AddItemToArray(object, item); +} +void cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item) { + if (!item) + return; + if (!(item->type & cJSON_StringIsConst) && item->string) + cJSON_free(item->string); + item->string = (char *)string; + item->type |= cJSON_StringIsConst; + cJSON_AddItemToArray(object, item); +} +void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item) { + cJSON_AddItemToArray(array, create_reference(item)); +} +void cJSON_AddItemReferenceToObject(cJSON *object, const char *string, + cJSON *item) { + cJSON_AddItemToObject(object, string, create_reference(item)); +} + +cJSON *cJSON_DetachItemFromArray(cJSON *array, int which) { + cJSON *c = array->child; + while (c && which > 0) + c = c->next, which--; + if (!c) + return 0; + if (c->prev) + c->prev->next = c->next; + if (c->next) + c->next->prev = c->prev; + if (c == array->child) + array->child = c->next; + c->prev = c->next = 0; + return c; +} +void cJSON_DeleteItemFromArray(cJSON *array, int which) { + cJSON_Delete(cJSON_DetachItemFromArray(array, which)); +} +cJSON *cJSON_DetachItemFromObject(cJSON *object, const char *string) { + int i = 0; + cJSON *c = object->child; + while (c && strcmp(c->string, string)) + i++, c = c->next; + if (c) + return cJSON_DetachItemFromArray(object, i); + return 0; +} +void cJSON_DeleteItemFromObject(cJSON *object, const char *string) { + cJSON_Delete(cJSON_DetachItemFromObject(object, string)); +} + +/* Replace array/object items with new ones. */ +void cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem) { + cJSON *c = array->child; + while (c && which > 0) + c = c->next, which--; + if (!c) { + cJSON_AddItemToArray(array, newitem); + return; + } + newitem->next = c; + newitem->prev = c->prev; + c->prev = newitem; + if (c == array->child) + array->child = newitem; + else + newitem->prev->next = newitem; +} +void cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem) { + cJSON *c = array->child; + while (c && which > 0) + c = c->next, which--; + if (!c) + return; + newitem->next = c->next; + newitem->prev = c->prev; + if (newitem->next) + newitem->next->prev = newitem; + if (c == array->child) + array->child = newitem; + else + newitem->prev->next = newitem; + c->next = c->prev = 0; + cJSON_Delete(c); +} +void cJSON_ReplaceItemInObject(cJSON *object, const char *string, + cJSON *newitem) { + int i = 0; + cJSON *c = object->child; + while (c && strcmp(c->string, string)) + i++, c = c->next; + if (c) { + newitem->string = cJSON_strdup(string); + cJSON_ReplaceItemInArray(object, i, newitem); + } +} + +/* Create basic types: */ +cJSON *cJSON_CreateNull(void) { + cJSON *item = cJSON_New_Item(); + if (item) + item->type = cJSON_NULL; + return item; +} +cJSON *cJSON_CreateTrue(void) { + cJSON *item = cJSON_New_Item(); + if (item) + item->type = cJSON_True; + return item; +} +cJSON *cJSON_CreateFalse(void) { + cJSON *item = cJSON_New_Item(); + if (item) + item->type = cJSON_False; + return item; +} +cJSON *cJSON_CreateBool(int b) { + cJSON *item = cJSON_New_Item(); + if (item) + item->type = b ? cJSON_True : cJSON_False; + return item; +} +cJSON *cJSON_CreateNumber(double num) { + cJSON *item = cJSON_New_Item(); + if (item) { + item->type = cJSON_Number; + item->valuedouble = num; + item->valueint = (int)num; + } + return item; +} +cJSON *cJSON_CreateString(const char *string) { + cJSON *item = cJSON_New_Item(); + if (item) { + item->type = cJSON_String; + item->valuestring = cJSON_strdup(string); + } + return item; +} +cJSON *cJSON_CreateArray(void) { + cJSON *item = cJSON_New_Item(); + if (item) + item->type = cJSON_Array; + return item; +} +cJSON *cJSON_CreateObject(void) { + cJSON *item = cJSON_New_Item(); + if (item) + item->type = cJSON_Object; + return item; +} + +/* Create Arrays: */ +cJSON *cJSON_CreateIntArray(const int *numbers, int count) { + int i; + cJSON *n = 0, *p = 0, *a = cJSON_CreateArray(); + for (i = 0; a && i < count; i++) { + n = cJSON_CreateNumber(numbers[i]); + if (!i) + a->child = n; + else + suffix_object(p, n); + p = n; + } + return a; +} +cJSON *cJSON_CreateFloatArray(const float *numbers, int count) { + int i; + cJSON *n = 0, *p = 0, *a = cJSON_CreateArray(); + for (i = 0; a && i < count; i++) { + n = cJSON_CreateNumber(numbers[i]); + if (!i) + a->child = n; + else + suffix_object(p, n); + p = n; + } + return a; +} +cJSON *cJSON_CreateDoubleArray(const double *numbers, int count) { + int i; + cJSON *n = 0, *p = 0, *a = cJSON_CreateArray(); + for (i = 0; a && i < count; i++) { + n = cJSON_CreateNumber(numbers[i]); + if (!i) + a->child = n; + else + suffix_object(p, n); + p = n; + } + return a; +} +cJSON *cJSON_CreateStringArray(const char **strings, int count) { + int i; + cJSON *n = 0, *p = 0, *a = cJSON_CreateArray(); + for (i = 0; a && i < count; i++) { + n = cJSON_CreateString(strings[i]); + if (!i) + a->child = n; + else + suffix_object(p, n); + p = n; + } + return a; +} + +/* Duplication */ +cJSON *cJSON_Duplicate(cJSON *item, int recurse) { + cJSON *newitem, *cptr, *nptr = 0, *newchild; + /* Bail on bad ptr */ + if (!item) + return 0; + /* Create new item */ + newitem = cJSON_New_Item(); + if (!newitem) + return 0; + /* Copy over all vars */ + newitem->type = item->type & (~cJSON_IsReference), + newitem->valueint = item->valueint, + newitem->valuedouble = item->valuedouble; + if (item->valuestring) { + newitem->valuestring = cJSON_strdup(item->valuestring); + if (!newitem->valuestring) { + cJSON_Delete(newitem); + return 0; + } + } + if (item->string) { + newitem->string = cJSON_strdup(item->string); + if (!newitem->string) { + cJSON_Delete(newitem); + return 0; + } + } + /* If non-recursive, then we're done! */ + if (!recurse) + return newitem; + /* Walk the ->next chain for the child. */ + cptr = item->child; + while (cptr) { + newchild = cJSON_Duplicate( + cptr, + 1); /* Duplicate (with recurse) each item in the ->next chain */ + if (!newchild) { + cJSON_Delete(newitem); + return 0; + } + if (nptr) { + nptr->next = newchild, newchild->prev = nptr; + nptr = newchild; + } /* If newitem->child already set, then crosswire ->prev and ->next and + move on */ + else { + newitem->child = newchild; + nptr = newchild; + } /* Set newitem->child and move to it */ + cptr = cptr->next; + } + return newitem; +} + +void cJSON_Minify(char *json) { + char *into = json; + while (*json) { + if (*json == ' ') + json++; + else if (*json == '\t') + json++; /* Whitespace characters. */ + else if (*json == '\r') + json++; + else if (*json == '\n') + json++; + else if (*json == '/' && json[1] == '/') + while (*json && *json != '\n') + json++; /* double-slash comments, to end of line. */ + else if (*json == '/' && json[1] == '*') { + while (*json && !(*json == '*' && json[1] == '/')) + json++; + json += 2; + } /* multiline comments. */ + else if (*json == '\"') { + *into++ = *json++; + while (*json && *json != '\"') { + if (*json == '\\') + *into++ = *json++; + *into++ = *json++; + } + *into++ = *json++; + } /* string literals, which are \" sensitive. */ + else + *into++ = *json++; /* All other characters. */ + } + *into = 0; /* and null-terminate. */ +} diff --git a/third_party/vulkan/loader/cJSON.h b/third_party/vulkan/loader/cJSON.h new file mode 100644 index 000000000..e4c747c12 --- /dev/null +++ b/third_party/vulkan/loader/cJSON.h @@ -0,0 +1,189 @@ +/* + Copyright (c) 2009 Dave Gamble + Copyright (c) 2015-2016 The Khronos Group Inc. + Copyright (c) 2015-2016 Valve Corporation + Copyright (c) 2015-2016 LunarG, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#ifndef cJSON__h +#define cJSON__h + +#ifdef __cplusplus +extern "C" { +#endif + +/* cJSON Types: */ +#define cJSON_False 0 +#define cJSON_True 1 +#define cJSON_NULL 2 +#define cJSON_Number 3 +#define cJSON_String 4 +#define cJSON_Array 5 +#define cJSON_Object 6 + +#define cJSON_IsReference 256 +#define cJSON_StringIsConst 512 + +/* The cJSON structure: */ +typedef struct cJSON { + struct cJSON *next, *prev; /* next/prev allow you to walk array/object + chains. Alternatively, use + GetArraySize/GetArrayItem/GetObjectItem */ + struct cJSON *child; /* An array or object item will have a child pointer + pointing to a chain of the items in the + array/object. */ + + int type; /* The type of the item, as above. */ + + char *valuestring; /* The item's string, if type==cJSON_String */ + int valueint; /* The item's number, if type==cJSON_Number */ + double valuedouble; /* The item's number, if type==cJSON_Number */ + + char * + string; /* The item's name string, if this item is the child of, or is + in the list of subitems of an object. */ +} cJSON; + +typedef struct cJSON_Hooks { + void *(*malloc_fn)(size_t sz); + void (*free_fn)(void *ptr); +} cJSON_Hooks; + +/* Supply malloc, realloc and free functions to cJSON */ +extern void cJSON_InitHooks(cJSON_Hooks *hooks); + +/* Supply a block of JSON, and this returns a cJSON object you can interrogate. + * Call cJSON_Delete when finished. */ +extern cJSON *cJSON_Parse(const char *value); +/* Render a cJSON entity to text for transfer/storage. Free the char* when + * finished. */ +extern char *cJSON_Print(cJSON *item); +/* Render a cJSON entity to text for transfer/storage without any formatting. + * Free the char* when finished. */ +extern char *cJSON_PrintUnformatted(cJSON *item); +/* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess + * at the final size. guessing well reduces reallocation. fmt=0 gives + * unformatted, =1 gives formatted */ +extern char *cJSON_PrintBuffered(cJSON *item, int prebuffer, int fmt); +/* Delete a cJSON entity and all subentities. */ +extern void cJSON_Delete(cJSON *c); + +/* Returns the number of items in an array (or object). */ +extern int cJSON_GetArraySize(cJSON *array); +/* Retrieve item number "item" from array "array". Returns NULL if unsuccessful. + */ +extern cJSON *cJSON_GetArrayItem(cJSON *array, int item); +/* Get item "string" from object. Case insensitive. */ +extern cJSON *cJSON_GetObjectItem(cJSON *object, const char *string); + +/* For analysing failed parses. This returns a pointer to the parse error. + * You'll probably need to look a few chars back to make sense of it. Defined + * when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */ +extern const char *cJSON_GetErrorPtr(void); + +/* These calls create a cJSON item of the appropriate type. */ +extern cJSON *cJSON_CreateNull(void); +extern cJSON *cJSON_CreateTrue(void); +extern cJSON *cJSON_CreateFalse(void); +extern cJSON *cJSON_CreateBool(int b); +extern cJSON *cJSON_CreateNumber(double num); +extern cJSON *cJSON_CreateString(const char *string); +extern cJSON *cJSON_CreateArray(void); +extern cJSON *cJSON_CreateObject(void); + +/* These utilities create an Array of count items. */ +extern cJSON *cJSON_CreateIntArray(const int *numbers, int count); +extern cJSON *cJSON_CreateFloatArray(const float *numbers, int count); +extern cJSON *cJSON_CreateDoubleArray(const double *numbers, int count); +extern cJSON *cJSON_CreateStringArray(const char **strings, int count); + +/* Append item to the specified array/object. */ +extern void cJSON_AddItemToArray(cJSON *array, cJSON *item); +extern void cJSON_AddItemToObject(cJSON *object, const char *string, + cJSON *item); +extern void cJSON_AddItemToObjectCS( + cJSON *object, const char *string, + cJSON *item); /* Use this when string is definitely const (i.e. a literal, + or as good as), and will definitely survive the cJSON + object */ +/* Append reference to item to the specified array/object. Use this when you + * want to add an existing cJSON to a new cJSON, but don't want to corrupt your + * existing cJSON. */ +extern void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item); +extern void cJSON_AddItemReferenceToObject(cJSON *object, const char *string, + cJSON *item); + +/* Remove/Detatch items from Arrays/Objects. */ +extern cJSON *cJSON_DetachItemFromArray(cJSON *array, int which); +extern void cJSON_DeleteItemFromArray(cJSON *array, int which); +extern cJSON *cJSON_DetachItemFromObject(cJSON *object, const char *string); +extern void cJSON_DeleteItemFromObject(cJSON *object, const char *string); + +/* Update array items. */ +extern void cJSON_InsertItemInArray( + cJSON *array, int which, + cJSON *newitem); /* Shifts pre-existing items to the right. */ +extern void cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem); +extern void cJSON_ReplaceItemInObject(cJSON *object, const char *string, + cJSON *newitem); + +/* Duplicate a cJSON item */ +extern cJSON *cJSON_Duplicate(cJSON *item, int recurse); +/* Duplicate will create a new, identical cJSON item to the one you pass, in new +memory that will +need to be released. With recurse!=0, it will duplicate any children connected +to the item. +The item->next and ->prev pointers are always zero on return from Duplicate. */ + +/* ParseWithOpts allows you to require (and check) that the JSON is null + * terminated, and to retrieve the pointer to the final byte parsed. */ +extern cJSON *cJSON_ParseWithOpts(const char *value, + const char **return_parse_end, + int require_null_terminated); + +extern void cJSON_Minify(char *json); + +/* Macros for creating things quickly. */ +#define cJSON_AddNullToObject(object, name) \ + cJSON_AddItemToObject(object, name, cJSON_CreateNull()) +#define cJSON_AddTrueToObject(object, name) \ + cJSON_AddItemToObject(object, name, cJSON_CreateTrue()) +#define cJSON_AddFalseToObject(object, name) \ + cJSON_AddItemToObject(object, name, cJSON_CreateFalse()) +#define cJSON_AddBoolToObject(object, name, b) \ + cJSON_AddItemToObject(object, name, cJSON_CreateBool(b)) +#define cJSON_AddNumberToObject(object, name, n) \ + cJSON_AddItemToObject(object, name, cJSON_CreateNumber(n)) +#define cJSON_AddStringToObject(object, name, s) \ + cJSON_AddItemToObject(object, name, cJSON_CreateString(s)) + +/* When assigning an integer value, it needs to be propagated to valuedouble + * too. */ +#define cJSON_SetIntValue(object, val) \ + ((object) ? (object)->valueint = (object)->valuedouble = (val) : (val)) +#define cJSON_SetNumberValue(object, val) \ + ((object) ? (object)->valueint = (object)->valuedouble = (val) : (val)) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/loader/debug_report.c b/third_party/vulkan/loader/debug_report.c new file mode 100644 index 000000000..232fa6d6b --- /dev/null +++ b/third_party/vulkan/loader/debug_report.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * Copyright (C) 2015-2016 Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Courtney Goeltzenleuchter + * Author: Jon Ashburn + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#ifndef WIN32 +#include +#else +#endif +#include "vk_loader_platform.h" +#include "debug_report.h" +#include "vulkan/vk_layer.h" + +typedef void(VKAPI_PTR *PFN_stringCallback)(char *message); + +static const VkExtensionProperties debug_report_extension_info = { + .extensionName = VK_EXT_DEBUG_REPORT_EXTENSION_NAME, + .specVersion = VK_EXT_DEBUG_REPORT_SPEC_VERSION, +}; + +void debug_report_add_instance_extensions( + const struct loader_instance *inst, + struct loader_extension_list *ext_list) { + loader_add_to_ext_list(inst, ext_list, 1, &debug_report_extension_info); +} + +void debug_report_create_instance(struct loader_instance *ptr_instance, + const VkInstanceCreateInfo *pCreateInfo) { + ptr_instance->debug_report_enabled = false; + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_EXT_DEBUG_REPORT_EXTENSION_NAME) == 0) { + ptr_instance->debug_report_enabled = true; + return; + } + } +} + +VkResult +util_CreateDebugReportCallback(struct loader_instance *inst, + VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugReportCallbackEXT callback) { + VkLayerDbgFunctionNode *pNewDbgFuncNode; + if (pAllocator != NULL) { + pNewDbgFuncNode = (VkLayerDbgFunctionNode *)pAllocator->pfnAllocation( + pAllocator->pUserData, sizeof(VkLayerDbgFunctionNode), + sizeof(int *), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + } else { + pNewDbgFuncNode = (VkLayerDbgFunctionNode *)loader_heap_alloc( + inst, sizeof(VkLayerDbgFunctionNode), + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + } + if (!pNewDbgFuncNode) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + pNewDbgFuncNode->msgCallback = callback; + pNewDbgFuncNode->pfnMsgCallback = pCreateInfo->pfnCallback; + pNewDbgFuncNode->msgFlags = pCreateInfo->flags; + pNewDbgFuncNode->pUserData = pCreateInfo->pUserData; + pNewDbgFuncNode->pNext = inst->DbgFunctionHead; + inst->DbgFunctionHead = pNewDbgFuncNode; + + return VK_SUCCESS; +} + +static VKAPI_ATTR VkResult VKAPI_CALL debug_report_CreateDebugReportCallback( + VkInstance instance, VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + VkAllocationCallbacks *pAllocator, VkDebugReportCallbackEXT *pCallback) { + struct loader_instance *inst = loader_get_instance(instance); + loader_platform_thread_lock_mutex(&loader_lock); + VkResult result = inst->disp->CreateDebugReportCallbackEXT( + instance, pCreateInfo, pAllocator, pCallback); + if (result == VK_SUCCESS) { + result = util_CreateDebugReportCallback(inst, pCreateInfo, pAllocator, + *pCallback); + } + loader_platform_thread_unlock_mutex(&loader_lock); + return result; +} + +// Utility function to handle reporting +VkBool32 util_DebugReportMessage(const struct loader_instance *inst, + VkFlags msgFlags, + VkDebugReportObjectTypeEXT objectType, + uint64_t srcObject, size_t location, + int32_t msgCode, const char *pLayerPrefix, + const char *pMsg) { + VkBool32 bail = false; + VkLayerDbgFunctionNode *pTrav = inst->DbgFunctionHead; + while (pTrav) { + if (pTrav->msgFlags & msgFlags) { + if (pTrav->pfnMsgCallback(msgFlags, objectType, srcObject, location, + msgCode, pLayerPrefix, pMsg, + pTrav->pUserData)) { + bail = true; + } + } + pTrav = pTrav->pNext; + } + + return bail; +} + +void util_DestroyDebugReportCallback(struct loader_instance *inst, + VkDebugReportCallbackEXT callback, + const VkAllocationCallbacks *pAllocator) { + VkLayerDbgFunctionNode *pTrav = inst->DbgFunctionHead; + VkLayerDbgFunctionNode *pPrev = pTrav; + + while (pTrav) { + if (pTrav->msgCallback == callback) { + pPrev->pNext = pTrav->pNext; + if (inst->DbgFunctionHead == pTrav) + inst->DbgFunctionHead = pTrav->pNext; + if (pAllocator != NULL) { + pAllocator->pfnFree(pAllocator->pUserData, pTrav); + } else { + loader_heap_free(inst, pTrav); + } + break; + } + pPrev = pTrav; + pTrav = pTrav->pNext; + } +} + +static VKAPI_ATTR void VKAPI_CALL +debug_report_DestroyDebugReportCallback(VkInstance instance, + VkDebugReportCallbackEXT callback, + VkAllocationCallbacks *pAllocator) { + struct loader_instance *inst = loader_get_instance(instance); + loader_platform_thread_lock_mutex(&loader_lock); + + inst->disp->DestroyDebugReportCallbackEXT(instance, callback, pAllocator); + + util_DestroyDebugReportCallback(inst, callback, pAllocator); + + loader_platform_thread_unlock_mutex(&loader_lock); +} + +static VKAPI_ATTR void VKAPI_CALL debug_report_DebugReportMessage( + VkInstance instance, VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objType, uint64_t object, size_t location, + int32_t msgCode, const char *pLayerPrefix, const char *pMsg) { + struct loader_instance *inst = loader_get_instance(instance); + + inst->disp->DebugReportMessageEXT(instance, flags, objType, object, + location, msgCode, pLayerPrefix, pMsg); +} + +/* + * This is the instance chain terminator function + * for CreateDebugReportCallback + */ + +VKAPI_ATTR VkResult VKAPI_CALL loader_CreateDebugReportCallback( + VkInstance instance, const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugReportCallbackEXT *pCallback) { + VkDebugReportCallbackEXT *icd_info; + const struct loader_icd *icd; + struct loader_instance *inst = (struct loader_instance *)instance; + VkResult res; + uint32_t storage_idx; + + icd_info = calloc(sizeof(VkDebugReportCallbackEXT), inst->total_icd_count); + if (!icd_info) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + storage_idx = 0; + for (icd = inst->icds; icd; icd = icd->next) { + if (!icd->CreateDebugReportCallbackEXT) { + continue; + } + + res = icd->CreateDebugReportCallbackEXT( + icd->instance, pCreateInfo, pAllocator, &icd_info[storage_idx]); + + if (res != VK_SUCCESS) { + break; + } + storage_idx++; + } + + /* roll back on errors */ + if (icd) { + storage_idx = 0; + for (icd = inst->icds; icd; icd = icd->next) { + if (icd_info[storage_idx]) { + icd->DestroyDebugReportCallbackEXT( + icd->instance, icd_info[storage_idx], pAllocator); + } + storage_idx++; + } + + return res; + } + + *(VkDebugReportCallbackEXT **)pCallback = icd_info; + + return VK_SUCCESS; +} + +/* + * This is the instance chain terminator function + * for DestroyDebugReportCallback + */ +VKAPI_ATTR void VKAPI_CALL +loader_DestroyDebugReportCallback(VkInstance instance, + VkDebugReportCallbackEXT callback, + const VkAllocationCallbacks *pAllocator) { + uint32_t storage_idx; + VkDebugReportCallbackEXT *icd_info; + const struct loader_icd *icd; + + struct loader_instance *inst = (struct loader_instance *)instance; + icd_info = *(VkDebugReportCallbackEXT **)&callback; + storage_idx = 0; + for (icd = inst->icds; icd; icd = icd->next) { + if (icd_info[storage_idx]) { + icd->DestroyDebugReportCallbackEXT( + icd->instance, icd_info[storage_idx], pAllocator); + } + storage_idx++; + } +} + +/* + * This is the instance chain terminator function + * for DebugReportMessage + */ +VKAPI_ATTR void VKAPI_CALL +loader_DebugReportMessage(VkInstance instance, VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objType, uint64_t object, + size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg) { + const struct loader_icd *icd; + + struct loader_instance *inst = (struct loader_instance *)instance; + + loader_platform_thread_lock_mutex(&loader_lock); + for (icd = inst->icds; icd; icd = icd->next) { + if (icd->DebugReportMessageEXT != NULL) { + icd->DebugReportMessageEXT(icd->instance, flags, objType, object, + location, msgCode, pLayerPrefix, pMsg); + } + } + + /* + * Now that all ICDs have seen the message, call the necessary callbacks. + * Ignoring "bail" return value as there is nothing to bail from at this + * point. + */ + + util_DebugReportMessage(inst, flags, objType, object, location, msgCode, + pLayerPrefix, pMsg); + + loader_platform_thread_unlock_mutex(&loader_lock); +} + +bool debug_report_instance_gpa(struct loader_instance *ptr_instance, + const char *name, void **addr) { + // debug_report is currently advertised to be supported by the loader, + // so always return the entry points if name matches and it's enabled + *addr = NULL; + + if (!strcmp("vkCreateDebugReportCallbackEXT", name)) { + *addr = ptr_instance->debug_report_enabled + ? (void *)debug_report_CreateDebugReportCallback + : NULL; + return true; + } + if (!strcmp("vkDestroyDebugReportCallbackEXT", name)) { + *addr = ptr_instance->debug_report_enabled + ? (void *)debug_report_DestroyDebugReportCallback + : NULL; + return true; + } + if (!strcmp("vkDebugReportMessageEXT", name)) { + *addr = ptr_instance->debug_report_enabled + ? (void *)debug_report_DebugReportMessage + : NULL; + return true; + } + return false; +} diff --git a/third_party/vulkan/loader/debug_report.h b/third_party/vulkan/loader/debug_report.h new file mode 100644 index 000000000..7b665a5f3 --- /dev/null +++ b/third_party/vulkan/loader/debug_report.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * Copyright (C) 2015-2016 Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Courtney Goeltzenleuchter + * Author: Jon Ashburn + * + */ + +#include "vk_loader_platform.h" +#include "loader.h" +/* + * CreateMsgCallback is global and needs to be + * applied to all layers and ICDs. + * What happens if a layer is enabled on both the instance chain + * as well as the device chain and a call to CreateMsgCallback is made? + * Do we need to make sure that each layer / driver only gets called once? + * Should a layer implementing support for CreateMsgCallback only be allowed (?) + * to live on one chain? Or maybe make it the application's responsibility. + * If the app enables DRAW_STATE on at both CreateInstance time and CreateDevice + * time, CreateMsgCallback will call the DRAW_STATE layer twice. Once via + * the instance chain and once via the device chain. + * The loader should only return the DEBUG_REPORT extension as supported + * for the GetGlobalExtensionSupport call. That should help eliminate one + * duplication. + * Since the instance chain requires us iterating over the available ICDs + * and each ICD will have it's own unique MsgCallback object we need to + * track those objects to give back the right one. + * This also implies that the loader has to intercept vkDestroyObject and + * if the extension is enabled and the object type is a MsgCallback then + * we must translate the object into the proper ICD specific ones. + * DestroyObject works on a device chain. Should not be what's destroying + * the MsgCallback object. That needs to be an instance thing. So, since + * we used an instance to create it, we need a custom Destroy that also + * takes an instance. That way we can iterate over the ICDs properly. + * Example use: + * CreateInstance: DEBUG_REPORT + * Loader will create instance chain with enabled extensions. + * TODO: Should validation layers be enabled here? If not, they will not be in + * the instance chain. + * fn = GetProcAddr(INSTANCE, "vkCreateMsgCallback") -> point to loader's + * vkCreateMsgCallback + * App creates a callback object: fn(..., &MsgCallbackObject1) + * Have only established the instance chain so far. Loader will call the + * instance chain. + * Each layer in the instance chain will call down to the next layer, + * terminating with + * the CreateMsgCallback loader terminator function that creates the actual + * MsgCallbackObject1 object. + * The loader CreateMsgCallback terminator will iterate over the ICDs. + * Calling each ICD that supports vkCreateMsgCallback and collect answers in + * icd_msg_callback_map here. + * As result is sent back up the chain each layer has opportunity to record the + * callback operation and + * appropriate MsgCallback object. + * ... + * Any reports matching the flags set in MsgCallbackObject1 will generate the + * defined callback behavior + * in the layer / ICD that initiated that report. + * ... + * CreateDevice: MemTracker:... + * App does not include DEBUG_REPORT as that is a global extension. + * TODO: GetExtensionSupport must not report DEBUG_REPORT when using instance. + * App MUST include any desired validation layers or they will not participate + * in the device call chain. + * App creates a callback object: fn(..., &MsgCallbackObject2) + * Loader's vkCreateMsgCallback is called. + * Loader sends call down instance chain - this is a global extension - any + * validation layer that was + * enabled at CreateInstance will be able to register the callback. Loader will + * iterate over the ICDs and + * will record the ICD's version of the MsgCallback2 object here. + * ... + * Any report will go to the layer's report function and it will check the flags + * for MsgCallbackObject1 + * and MsgCallbackObject2 and take the appropriate action as indicated by the + * app. + * ... + * App calls vkDestroyMsgCallback( MsgCallbackObject1 ) + * Loader's DestroyMsgCallback is where call starts. DestroyMsgCallback will be + * sent down instance chain + * ending in the loader's DestroyMsgCallback terminator which will iterate over + * the ICD's destroying each + * ICD version of that MsgCallback object and then destroy the loader's version + * of the object. + * Any reports generated after this will only have MsgCallbackObject2 available. + */ + +void debug_report_add_instance_extensions( + const struct loader_instance *inst, struct loader_extension_list *ext_list); + +void debug_report_create_instance(struct loader_instance *ptr_instance, + const VkInstanceCreateInfo *pCreateInfo); + +bool debug_report_instance_gpa(struct loader_instance *ptr_instance, + const char *name, void **addr); + +VKAPI_ATTR VkResult VKAPI_CALL loader_CreateDebugReportCallback( + VkInstance instance, const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugReportCallbackEXT *pCallback); + +VKAPI_ATTR void VKAPI_CALL +loader_DestroyDebugReportCallback(VkInstance instance, + VkDebugReportCallbackEXT callback, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR void VKAPI_CALL +loader_DebugReportMessage(VkInstance instance, VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objType, uint64_t object, + size_t location, int32_t msgCode, + const char *pLayerPrefix, const char *pMsg); + +VkResult +util_CreateDebugReportCallback(struct loader_instance *inst, + VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugReportCallbackEXT callback); + +void util_DestroyDebugReportCallback(struct loader_instance *inst, + VkDebugReportCallbackEXT callback, + const VkAllocationCallbacks *pAllocator); + +VkBool32 util_DebugReportMessage(const struct loader_instance *inst, + VkFlags msgFlags, + VkDebugReportObjectTypeEXT objectType, + uint64_t srcObject, size_t location, + int32_t msgCode, const char *pLayerPrefix, + const char *pMsg); diff --git a/third_party/vulkan/loader/dev_ext_trampoline.c b/third_party/vulkan/loader/dev_ext_trampoline.c new file mode 100644 index 000000000..b752086b5 --- /dev/null +++ b/third_party/vulkan/loader/dev_ext_trampoline.c @@ -0,0 +1,2038 @@ +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Jon Ashburn + */ + +#include "vk_loader_platform.h" +#include "loader.h" +#if defined(__linux__) +#pragma GCC optimize(3) // force gcc to use tail-calls +#endif + +VKAPI_ATTR void VKAPI_CALL vkDevExt0(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[0](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt1(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[1](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt2(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[2](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt3(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[3](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt4(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[4](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt5(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[5](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt6(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[6](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt7(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[7](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt8(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[8](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt9(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[9](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt10(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[10](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt11(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[11](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt12(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[12](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt13(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[13](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt14(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[14](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt15(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[15](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt16(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[16](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt17(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[17](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt18(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[18](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt19(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[19](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt20(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[20](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt21(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[21](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt22(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[22](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt23(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[23](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt24(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[24](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt25(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[25](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt26(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[26](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt27(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[27](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt28(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[28](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt29(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[29](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt30(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[30](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt31(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[31](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt32(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[32](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt33(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[33](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt34(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[34](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt35(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[35](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt36(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[36](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt37(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[37](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt38(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[38](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt39(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[39](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt40(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[40](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt41(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[41](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt42(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[42](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt43(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[43](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt44(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[44](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt45(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[45](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt46(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[46](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt47(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[47](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt48(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[48](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt49(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[49](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt50(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[50](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt51(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[51](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt52(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[52](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt53(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[53](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt54(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[54](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt55(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[55](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt56(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[56](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt57(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[57](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt58(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[58](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt59(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[59](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt60(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[60](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt61(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[61](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt62(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[62](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt63(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[63](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt64(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[64](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt65(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[65](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt66(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[66](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt67(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[67](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt68(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[68](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt69(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[69](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt70(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[70](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt71(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[71](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt72(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[72](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt73(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[73](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt74(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[74](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt75(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[75](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt76(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[76](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt77(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[77](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt78(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[78](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt79(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[79](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt80(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[80](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt81(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[81](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt82(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[82](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt83(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[83](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt84(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[84](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt85(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[85](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt86(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[86](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt87(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[87](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt88(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[88](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt89(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[89](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt90(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[90](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt91(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[91](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt92(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[92](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt93(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[93](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt94(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[94](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt95(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[95](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt96(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[96](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt97(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[97](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt98(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[98](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt99(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[99](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt100(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[100](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt101(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[101](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt102(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[102](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt103(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[103](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt104(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[104](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt105(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[105](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt106(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[106](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt107(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[107](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt108(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[108](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt109(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[109](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt110(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[110](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt111(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[111](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt112(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[112](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt113(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[113](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt114(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[114](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt115(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[115](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt116(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[116](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt117(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[117](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt118(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[118](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt119(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[119](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt120(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[120](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt121(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[121](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt122(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[122](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt123(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[123](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt124(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[124](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt125(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[125](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt126(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[126](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt127(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[127](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt128(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[128](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt129(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[129](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt130(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[130](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt131(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[131](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt132(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[132](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt133(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[133](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt134(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[134](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt135(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[135](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt136(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[136](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt137(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[137](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt138(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[138](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt139(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[139](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt140(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[140](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt141(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[141](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt142(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[142](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt143(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[143](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt144(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[144](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt145(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[145](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt146(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[146](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt147(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[147](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt148(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[148](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt149(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[149](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt150(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[150](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt151(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[151](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt152(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[152](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt153(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[153](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt154(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[154](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt155(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[155](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt156(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[156](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt157(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[157](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt158(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[158](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt159(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[159](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt160(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[160](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt161(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[161](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt162(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[162](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt163(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[163](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt164(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[164](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt165(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[165](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt166(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[166](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt167(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[167](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt168(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[168](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt169(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[169](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt170(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[170](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt171(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[171](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt172(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[172](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt173(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[173](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt174(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[174](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt175(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[175](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt176(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[176](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt177(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[177](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt178(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[178](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt179(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[179](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt180(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[180](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt181(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[181](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt182(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[182](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt183(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[183](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt184(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[184](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt185(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[185](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt186(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[186](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt187(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[187](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt188(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[188](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt189(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[189](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt190(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[190](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt191(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[191](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt192(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[192](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt193(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[193](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt194(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[194](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt195(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[195](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt196(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[196](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt197(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[197](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt198(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[198](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt199(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[199](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt200(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[200](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt201(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[201](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt202(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[202](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt203(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[203](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt204(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[204](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt205(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[205](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt206(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[206](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt207(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[207](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt208(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[208](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt209(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[209](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt210(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[210](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt211(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[211](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt212(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[212](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt213(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[213](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt214(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[214](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt215(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[215](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt216(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[216](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt217(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[217](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt218(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[218](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt219(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[219](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt220(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[220](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt221(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[221](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt222(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[222](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt223(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[223](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt224(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[224](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt225(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[225](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt226(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[226](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt227(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[227](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt228(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[228](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt229(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[229](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt230(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[230](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt231(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[231](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt232(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[232](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt233(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[233](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt234(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[234](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt235(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[235](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt236(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[236](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt237(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[237](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt238(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[238](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt239(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[239](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt240(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[240](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt241(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[241](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt242(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[242](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt243(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[243](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt244(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[244](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt245(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[245](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt246(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[246](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt247(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[247](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt248(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[248](device); +} + +VKAPI_ATTR void VKAPI_CALL vkDevExt249(VkDevice device) { + const struct loader_dev_dispatch_table *disp; + disp = loader_get_dev_dispatch(device); + disp->ext_dispatch.DevExt[249](device); +} + +void *loader_get_dev_ext_trampoline(uint32_t index) { + switch (index) { + case 0: + return vkDevExt0; + case 1: + return vkDevExt1; + case 2: + return vkDevExt2; + case 3: + return vkDevExt3; + case 4: + return vkDevExt4; + case 5: + return vkDevExt5; + case 6: + return vkDevExt6; + case 7: + return vkDevExt7; + case 8: + return vkDevExt8; + case 9: + return vkDevExt9; + case 10: + return vkDevExt10; + case 11: + return vkDevExt11; + case 12: + return vkDevExt12; + case 13: + return vkDevExt13; + case 14: + return vkDevExt14; + case 15: + return vkDevExt15; + case 16: + return vkDevExt16; + case 17: + return vkDevExt17; + case 18: + return vkDevExt18; + case 19: + return vkDevExt19; + case 20: + return vkDevExt20; + case 21: + return vkDevExt21; + case 22: + return vkDevExt22; + case 23: + return vkDevExt23; + case 24: + return vkDevExt24; + case 25: + return vkDevExt25; + case 26: + return vkDevExt26; + case 27: + return vkDevExt27; + case 28: + return vkDevExt28; + case 29: + return vkDevExt29; + case 30: + return vkDevExt30; + case 31: + return vkDevExt31; + case 32: + return vkDevExt32; + case 33: + return vkDevExt33; + case 34: + return vkDevExt34; + case 35: + return vkDevExt35; + case 36: + return vkDevExt36; + case 37: + return vkDevExt37; + case 38: + return vkDevExt38; + case 39: + return vkDevExt39; + case 40: + return vkDevExt40; + case 41: + return vkDevExt41; + case 42: + return vkDevExt42; + case 43: + return vkDevExt43; + case 44: + return vkDevExt44; + case 45: + return vkDevExt45; + case 46: + return vkDevExt46; + case 47: + return vkDevExt47; + case 48: + return vkDevExt48; + case 49: + return vkDevExt49; + case 50: + return vkDevExt50; + case 51: + return vkDevExt51; + case 52: + return vkDevExt52; + case 53: + return vkDevExt53; + case 54: + return vkDevExt54; + case 55: + return vkDevExt55; + case 56: + return vkDevExt56; + case 57: + return vkDevExt57; + case 58: + return vkDevExt58; + case 59: + return vkDevExt59; + case 60: + return vkDevExt60; + case 61: + return vkDevExt61; + case 62: + return vkDevExt62; + case 63: + return vkDevExt63; + case 64: + return vkDevExt64; + case 65: + return vkDevExt65; + case 66: + return vkDevExt66; + case 67: + return vkDevExt67; + case 68: + return vkDevExt68; + case 69: + return vkDevExt69; + case 70: + return vkDevExt70; + case 71: + return vkDevExt71; + case 72: + return vkDevExt72; + case 73: + return vkDevExt73; + case 74: + return vkDevExt74; + case 75: + return vkDevExt75; + case 76: + return vkDevExt76; + case 77: + return vkDevExt77; + case 78: + return vkDevExt78; + case 79: + return vkDevExt79; + case 80: + return vkDevExt80; + case 81: + return vkDevExt81; + case 82: + return vkDevExt82; + case 83: + return vkDevExt83; + case 84: + return vkDevExt84; + case 85: + return vkDevExt85; + case 86: + return vkDevExt86; + case 87: + return vkDevExt87; + case 88: + return vkDevExt88; + case 89: + return vkDevExt89; + case 90: + return vkDevExt90; + case 91: + return vkDevExt91; + case 92: + return vkDevExt92; + case 93: + return vkDevExt93; + case 94: + return vkDevExt94; + case 95: + return vkDevExt95; + case 96: + return vkDevExt96; + case 97: + return vkDevExt97; + case 98: + return vkDevExt98; + case 99: + return vkDevExt99; + case 100: + return vkDevExt100; + case 101: + return vkDevExt101; + case 102: + return vkDevExt102; + case 103: + return vkDevExt103; + case 104: + return vkDevExt104; + case 105: + return vkDevExt105; + case 106: + return vkDevExt106; + case 107: + return vkDevExt107; + case 108: + return vkDevExt108; + case 109: + return vkDevExt109; + case 110: + return vkDevExt110; + case 111: + return vkDevExt111; + case 112: + return vkDevExt112; + case 113: + return vkDevExt113; + case 114: + return vkDevExt114; + case 115: + return vkDevExt115; + case 116: + return vkDevExt116; + case 117: + return vkDevExt117; + case 118: + return vkDevExt118; + case 119: + return vkDevExt119; + case 120: + return vkDevExt120; + case 121: + return vkDevExt121; + case 122: + return vkDevExt122; + case 123: + return vkDevExt123; + case 124: + return vkDevExt124; + case 125: + return vkDevExt125; + case 126: + return vkDevExt126; + case 127: + return vkDevExt127; + case 128: + return vkDevExt128; + case 129: + return vkDevExt129; + case 130: + return vkDevExt130; + case 131: + return vkDevExt131; + case 132: + return vkDevExt132; + case 133: + return vkDevExt133; + case 134: + return vkDevExt134; + case 135: + return vkDevExt135; + case 136: + return vkDevExt136; + case 137: + return vkDevExt137; + case 138: + return vkDevExt138; + case 139: + return vkDevExt139; + case 140: + return vkDevExt140; + case 141: + return vkDevExt141; + case 142: + return vkDevExt142; + case 143: + return vkDevExt143; + case 144: + return vkDevExt144; + case 145: + return vkDevExt145; + case 146: + return vkDevExt146; + case 147: + return vkDevExt147; + case 148: + return vkDevExt148; + case 149: + return vkDevExt149; + case 150: + return vkDevExt150; + case 151: + return vkDevExt151; + case 152: + return vkDevExt152; + case 153: + return vkDevExt153; + case 154: + return vkDevExt154; + case 155: + return vkDevExt155; + case 156: + return vkDevExt156; + case 157: + return vkDevExt157; + case 158: + return vkDevExt158; + case 159: + return vkDevExt159; + case 160: + return vkDevExt160; + case 161: + return vkDevExt161; + case 162: + return vkDevExt162; + case 163: + return vkDevExt163; + case 164: + return vkDevExt164; + case 165: + return vkDevExt165; + case 166: + return vkDevExt166; + case 167: + return vkDevExt167; + case 168: + return vkDevExt168; + case 169: + return vkDevExt169; + case 170: + return vkDevExt170; + case 171: + return vkDevExt171; + case 172: + return vkDevExt172; + case 173: + return vkDevExt173; + case 174: + return vkDevExt174; + case 175: + return vkDevExt175; + case 176: + return vkDevExt176; + case 177: + return vkDevExt177; + case 178: + return vkDevExt178; + case 179: + return vkDevExt179; + case 180: + return vkDevExt180; + case 181: + return vkDevExt181; + case 182: + return vkDevExt182; + case 183: + return vkDevExt183; + case 184: + return vkDevExt184; + case 185: + return vkDevExt185; + case 186: + return vkDevExt186; + case 187: + return vkDevExt187; + case 188: + return vkDevExt188; + case 189: + return vkDevExt189; + case 190: + return vkDevExt190; + case 191: + return vkDevExt191; + case 192: + return vkDevExt192; + case 193: + return vkDevExt193; + case 194: + return vkDevExt194; + case 195: + return vkDevExt195; + case 196: + return vkDevExt196; + case 197: + return vkDevExt197; + case 198: + return vkDevExt198; + case 199: + return vkDevExt199; + case 200: + return vkDevExt200; + case 201: + return vkDevExt201; + case 202: + return vkDevExt202; + case 203: + return vkDevExt203; + case 204: + return vkDevExt204; + case 205: + return vkDevExt205; + case 206: + return vkDevExt206; + case 207: + return vkDevExt207; + case 208: + return vkDevExt208; + case 209: + return vkDevExt209; + case 210: + return vkDevExt210; + case 211: + return vkDevExt211; + case 212: + return vkDevExt212; + case 213: + return vkDevExt213; + case 214: + return vkDevExt214; + case 215: + return vkDevExt215; + case 216: + return vkDevExt216; + case 217: + return vkDevExt217; + case 218: + return vkDevExt218; + case 219: + return vkDevExt219; + case 220: + return vkDevExt220; + case 221: + return vkDevExt221; + case 222: + return vkDevExt222; + case 223: + return vkDevExt223; + case 224: + return vkDevExt224; + case 225: + return vkDevExt225; + case 226: + return vkDevExt226; + case 227: + return vkDevExt227; + case 228: + return vkDevExt228; + case 229: + return vkDevExt229; + case 230: + return vkDevExt230; + case 231: + return vkDevExt231; + case 232: + return vkDevExt232; + case 233: + return vkDevExt233; + case 234: + return vkDevExt234; + case 235: + return vkDevExt235; + case 236: + return vkDevExt236; + case 237: + return vkDevExt237; + case 238: + return vkDevExt238; + case 239: + return vkDevExt239; + case 240: + return vkDevExt240; + case 241: + return vkDevExt241; + case 242: + return vkDevExt242; + case 243: + return vkDevExt243; + case 244: + return vkDevExt244; + case 245: + return vkDevExt245; + case 246: + return vkDevExt246; + case 247: + return vkDevExt247; + case 248: + return vkDevExt248; + case 249: + return vkDevExt249; + } + return NULL; +} diff --git a/third_party/vulkan/loader/dirent_on_windows.c b/third_party/vulkan/loader/dirent_on_windows.c new file mode 100644 index 000000000..985fb6a1a --- /dev/null +++ b/third_party/vulkan/loader/dirent_on_windows.c @@ -0,0 +1,130 @@ +/* + + Implementation of POSIX directory browsing functions and types for Win32. + + Author: Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com) + History: Created March 1997. Updated June 2003 and July 2012. + Rights: See end of file. + +*/ +#include +#include +#include /* _findfirst and _findnext set errno iff they return -1 */ +#include +#include +#include "vk_loader_platform.h" +#include "loader.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef ptrdiff_t handle_type; /* C99's intptr_t not sufficiently portable */ + +struct DIR { + handle_type handle; /* -1 for failed rewind */ + struct _finddata_t info; + struct dirent result; /* d_name null iff first time */ + char *name; /* null-terminated char string */ +}; + +DIR *opendir(const char *name) { + DIR *dir = 0; + + if (name && name[0]) { + size_t base_length = strlen(name); + const char *all = /* search pattern must end with suitable wildcard */ + strchr("/\\", name[base_length - 1]) ? "*" : "/*"; + + if ((dir = (DIR *)loader_tls_heap_alloc(sizeof *dir)) != 0 && + (dir->name = (char *)loader_tls_heap_alloc(base_length + + strlen(all) + 1)) != 0) { + strcat(strcpy(dir->name, name), all); + + if ((dir->handle = + (handle_type)_findfirst(dir->name, &dir->info)) != -1) { + dir->result.d_name = 0; + } else /* rollback */ + { + loader_tls_heap_free(dir->name); + loader_tls_heap_free(dir); + dir = 0; + } + } else /* rollback */ + { + loader_tls_heap_free(dir); + dir = 0; + errno = ENOMEM; + } + } else { + errno = EINVAL; + } + + return dir; +} + +int closedir(DIR *dir) { + int result = -1; + + if (dir) { + if (dir->handle != -1) { + result = _findclose(dir->handle); + } + + loader_tls_heap_free(dir->name); + loader_tls_heap_free(dir); + } + + if (result == -1) /* map all errors to EBADF */ + { + errno = EBADF; + } + + return result; +} + +struct dirent *readdir(DIR *dir) { + struct dirent *result = 0; + + if (dir && dir->handle != -1) { + if (!dir->result.d_name || _findnext(dir->handle, &dir->info) != -1) { + result = &dir->result; + result->d_name = dir->info.name; + } + } else { + errno = EBADF; + } + + return result; +} + +void rewinddir(DIR *dir) { + if (dir && dir->handle != -1) { + _findclose(dir->handle); + dir->handle = (handle_type)_findfirst(dir->name, &dir->info); + dir->result.d_name = 0; + } else { + errno = EBADF; + } +} + +#ifdef __cplusplus +} +#endif + +/* + + Copyright Kevlin Henney, 1997, 2003, 2012. All rights reserved. + Copyright (c) 2015 The Khronos Group Inc. + Copyright (c) 2015 Valve Corporation + Copyright (c) 2015 LunarG, Inc. + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose is hereby granted without fee, provided + that this copyright and permissions notice appear in all copies and + derivatives. + + This software is supplied "as is" without express or implied warranty. + + But that said, if there are any problems please get in touch. + +*/ diff --git a/third_party/vulkan/loader/dirent_on_windows.h b/third_party/vulkan/loader/dirent_on_windows.h new file mode 100644 index 000000000..8600f8ef0 --- /dev/null +++ b/third_party/vulkan/loader/dirent_on_windows.h @@ -0,0 +1,51 @@ +#ifndef DIRENT_INCLUDED +#define DIRENT_INCLUDED + +/* + + Declaration of POSIX directory browsing functions and types for Win32. + + Author: Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com) + History: Created March 1997. Updated June 2003. + Rights: See end of file. + +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct DIR DIR; + +struct dirent { + char *d_name; +}; + +DIR *opendir(const char *); +int closedir(DIR *); +struct dirent *readdir(DIR *); +void rewinddir(DIR *); + +/* + + Copyright Kevlin Henney, 1997, 2003. All rights reserved. + Copyright (c) 2015 The Khronos Group Inc. + Copyright (c) 2015 Valve Corporation + Copyright (c) 2015 LunarG, Inc. + + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose is hereby granted without fee, provided + that this copyright and permissions notice appear in all copies and + derivatives. + + This software is supplied "as is" without express or implied warranty. + + But that said, if there are any problems please get in touch. + +*/ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/loader/gpa_helper.h b/third_party/vulkan/loader/gpa_helper.h new file mode 100644 index 000000000..664d3dbc5 --- /dev/null +++ b/third_party/vulkan/loader/gpa_helper.h @@ -0,0 +1,379 @@ +/* + * + * Copyright (c) 2015 The Khronos Group Inc. + * Copyright (c) 2015 Valve Corporation + * Copyright (c) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Jon Ashburn + */ + +#include +#include "debug_report.h" +#include "wsi.h" + +static inline void *trampolineGetProcAddr(struct loader_instance *inst, + const char *funcName) { + // Don't include or check global functions + if (!strcmp(funcName, "vkGetInstanceProcAddr")) + return (PFN_vkVoidFunction)vkGetInstanceProcAddr; + if (!strcmp(funcName, "vkDestroyInstance")) + return (PFN_vkVoidFunction)vkDestroyInstance; + if (!strcmp(funcName, "vkEnumeratePhysicalDevices")) + return (PFN_vkVoidFunction)vkEnumeratePhysicalDevices; + if (!strcmp(funcName, "vkGetPhysicalDeviceFeatures")) + return (PFN_vkVoidFunction)vkGetPhysicalDeviceFeatures; + if (!strcmp(funcName, "vkGetPhysicalDeviceFormatProperties")) + return (PFN_vkVoidFunction)vkGetPhysicalDeviceFormatProperties; + if (!strcmp(funcName, "vkGetPhysicalDeviceImageFormatProperties")) + return (PFN_vkVoidFunction)vkGetPhysicalDeviceImageFormatProperties; + if (!strcmp(funcName, "vkGetPhysicalDeviceSparseImageFormatProperties")) + return ( + PFN_vkVoidFunction)vkGetPhysicalDeviceSparseImageFormatProperties; + if (!strcmp(funcName, "vkGetPhysicalDeviceProperties")) + return (PFN_vkVoidFunction)vkGetPhysicalDeviceProperties; + if (!strcmp(funcName, "vkGetPhysicalDeviceQueueFamilyProperties")) + return (PFN_vkVoidFunction)vkGetPhysicalDeviceQueueFamilyProperties; + if (!strcmp(funcName, "vkGetPhysicalDeviceMemoryProperties")) + return (PFN_vkVoidFunction)vkGetPhysicalDeviceMemoryProperties; + if (!strcmp(funcName, "vkEnumerateDeviceLayerProperties")) + return (PFN_vkVoidFunction)vkEnumerateDeviceLayerProperties; + if (!strcmp(funcName, "vkEnumerateDeviceExtensionProperties")) + return (PFN_vkVoidFunction)vkEnumerateDeviceExtensionProperties; + if (!strcmp(funcName, "vkCreateDevice")) + return (PFN_vkVoidFunction)vkCreateDevice; + if (!strcmp(funcName, "vkGetDeviceProcAddr")) + return (PFN_vkVoidFunction)vkGetDeviceProcAddr; + if (!strcmp(funcName, "vkDestroyDevice")) + return (PFN_vkVoidFunction)vkDestroyDevice; + if (!strcmp(funcName, "vkGetDeviceQueue")) + return (PFN_vkVoidFunction)vkGetDeviceQueue; + if (!strcmp(funcName, "vkQueueSubmit")) + return (PFN_vkVoidFunction)vkQueueSubmit; + if (!strcmp(funcName, "vkQueueWaitIdle")) + return (PFN_vkVoidFunction)vkQueueWaitIdle; + if (!strcmp(funcName, "vkDeviceWaitIdle")) + return (PFN_vkVoidFunction)vkDeviceWaitIdle; + if (!strcmp(funcName, "vkAllocateMemory")) + return (PFN_vkVoidFunction)vkAllocateMemory; + if (!strcmp(funcName, "vkFreeMemory")) + return (PFN_vkVoidFunction)vkFreeMemory; + if (!strcmp(funcName, "vkMapMemory")) + return (PFN_vkVoidFunction)vkMapMemory; + if (!strcmp(funcName, "vkUnmapMemory")) + return (PFN_vkVoidFunction)vkUnmapMemory; + if (!strcmp(funcName, "vkFlushMappedMemoryRanges")) + return (PFN_vkVoidFunction)vkFlushMappedMemoryRanges; + if (!strcmp(funcName, "vkInvalidateMappedMemoryRanges")) + return (PFN_vkVoidFunction)vkInvalidateMappedMemoryRanges; + if (!strcmp(funcName, "vkGetDeviceMemoryCommitment")) + return (PFN_vkVoidFunction)vkGetDeviceMemoryCommitment; + if (!strcmp(funcName, "vkGetImageSparseMemoryRequirements")) + return (PFN_vkVoidFunction)vkGetImageSparseMemoryRequirements; + if (!strcmp(funcName, "vkGetImageMemoryRequirements")) + return (PFN_vkVoidFunction)vkGetImageMemoryRequirements; + if (!strcmp(funcName, "vkGetBufferMemoryRequirements")) + return (PFN_vkVoidFunction)vkGetBufferMemoryRequirements; + if (!strcmp(funcName, "vkBindImageMemory")) + return (PFN_vkVoidFunction)vkBindImageMemory; + if (!strcmp(funcName, "vkBindBufferMemory")) + return (PFN_vkVoidFunction)vkBindBufferMemory; + if (!strcmp(funcName, "vkQueueBindSparse")) + return (PFN_vkVoidFunction)vkQueueBindSparse; + if (!strcmp(funcName, "vkCreateFence")) + return (PFN_vkVoidFunction)vkCreateFence; + if (!strcmp(funcName, "vkDestroyFence")) + return (PFN_vkVoidFunction)vkDestroyFence; + if (!strcmp(funcName, "vkGetFenceStatus")) + return (PFN_vkVoidFunction)vkGetFenceStatus; + if (!strcmp(funcName, "vkResetFences")) + return (PFN_vkVoidFunction)vkResetFences; + if (!strcmp(funcName, "vkWaitForFences")) + return (PFN_vkVoidFunction)vkWaitForFences; + if (!strcmp(funcName, "vkCreateSemaphore")) + return (PFN_vkVoidFunction)vkCreateSemaphore; + if (!strcmp(funcName, "vkDestroySemaphore")) + return (PFN_vkVoidFunction)vkDestroySemaphore; + if (!strcmp(funcName, "vkCreateEvent")) + return (PFN_vkVoidFunction)vkCreateEvent; + if (!strcmp(funcName, "vkDestroyEvent")) + return (PFN_vkVoidFunction)vkDestroyEvent; + if (!strcmp(funcName, "vkGetEventStatus")) + return (PFN_vkVoidFunction)vkGetEventStatus; + if (!strcmp(funcName, "vkSetEvent")) + return (PFN_vkVoidFunction)vkSetEvent; + if (!strcmp(funcName, "vkResetEvent")) + return (PFN_vkVoidFunction)vkResetEvent; + if (!strcmp(funcName, "vkCreateQueryPool")) + return (PFN_vkVoidFunction)vkCreateQueryPool; + if (!strcmp(funcName, "vkDestroyQueryPool")) + return (PFN_vkVoidFunction)vkDestroyQueryPool; + if (!strcmp(funcName, "vkGetQueryPoolResults")) + return (PFN_vkVoidFunction)vkGetQueryPoolResults; + if (!strcmp(funcName, "vkCreateBuffer")) + return (PFN_vkVoidFunction)vkCreateBuffer; + if (!strcmp(funcName, "vkDestroyBuffer")) + return (PFN_vkVoidFunction)vkDestroyBuffer; + if (!strcmp(funcName, "vkCreateBufferView")) + return (PFN_vkVoidFunction)vkCreateBufferView; + if (!strcmp(funcName, "vkDestroyBufferView")) + return (PFN_vkVoidFunction)vkDestroyBufferView; + if (!strcmp(funcName, "vkCreateImage")) + return (PFN_vkVoidFunction)vkCreateImage; + if (!strcmp(funcName, "vkDestroyImage")) + return (PFN_vkVoidFunction)vkDestroyImage; + if (!strcmp(funcName, "vkGetImageSubresourceLayout")) + return (PFN_vkVoidFunction)vkGetImageSubresourceLayout; + if (!strcmp(funcName, "vkCreateImageView")) + return (PFN_vkVoidFunction)vkCreateImageView; + if (!strcmp(funcName, "vkDestroyImageView")) + return (PFN_vkVoidFunction)vkDestroyImageView; + if (!strcmp(funcName, "vkCreateShaderModule")) + return (PFN_vkVoidFunction)vkCreateShaderModule; + if (!strcmp(funcName, "vkDestroyShaderModule")) + return (PFN_vkVoidFunction)vkDestroyShaderModule; + if (!strcmp(funcName, "vkCreatePipelineCache")) + return (PFN_vkVoidFunction)vkCreatePipelineCache; + if (!strcmp(funcName, "vkDestroyPipelineCache")) + return (PFN_vkVoidFunction)vkDestroyPipelineCache; + if (!strcmp(funcName, "vkGetPipelineCacheData")) + return (PFN_vkVoidFunction)vkGetPipelineCacheData; + if (!strcmp(funcName, "vkMergePipelineCaches")) + return (PFN_vkVoidFunction)vkMergePipelineCaches; + if (!strcmp(funcName, "vkCreateGraphicsPipelines")) + return (PFN_vkVoidFunction)vkCreateGraphicsPipelines; + if (!strcmp(funcName, "vkCreateComputePipelines")) + return (PFN_vkVoidFunction)vkCreateComputePipelines; + if (!strcmp(funcName, "vkDestroyPipeline")) + return (PFN_vkVoidFunction)vkDestroyPipeline; + if (!strcmp(funcName, "vkCreatePipelineLayout")) + return (PFN_vkVoidFunction)vkCreatePipelineLayout; + if (!strcmp(funcName, "vkDestroyPipelineLayout")) + return (PFN_vkVoidFunction)vkDestroyPipelineLayout; + if (!strcmp(funcName, "vkCreateSampler")) + return (PFN_vkVoidFunction)vkCreateSampler; + if (!strcmp(funcName, "vkDestroySampler")) + return (PFN_vkVoidFunction)vkDestroySampler; + if (!strcmp(funcName, "vkCreateDescriptorSetLayout")) + return (PFN_vkVoidFunction)vkCreateDescriptorSetLayout; + if (!strcmp(funcName, "vkDestroyDescriptorSetLayout")) + return (PFN_vkVoidFunction)vkDestroyDescriptorSetLayout; + if (!strcmp(funcName, "vkCreateDescriptorPool")) + return (PFN_vkVoidFunction)vkCreateDescriptorPool; + if (!strcmp(funcName, "vkDestroyDescriptorPool")) + return (PFN_vkVoidFunction)vkDestroyDescriptorPool; + if (!strcmp(funcName, "vkResetDescriptorPool")) + return (PFN_vkVoidFunction)vkResetDescriptorPool; + if (!strcmp(funcName, "vkAllocateDescriptorSets")) + return (PFN_vkVoidFunction)vkAllocateDescriptorSets; + if (!strcmp(funcName, "vkFreeDescriptorSets")) + return (PFN_vkVoidFunction)vkFreeDescriptorSets; + if (!strcmp(funcName, "vkUpdateDescriptorSets")) + return (PFN_vkVoidFunction)vkUpdateDescriptorSets; + if (!strcmp(funcName, "vkCreateFramebuffer")) + return (PFN_vkVoidFunction)vkCreateFramebuffer; + if (!strcmp(funcName, "vkDestroyFramebuffer")) + return (PFN_vkVoidFunction)vkDestroyFramebuffer; + if (!strcmp(funcName, "vkCreateRenderPass")) + return (PFN_vkVoidFunction)vkCreateRenderPass; + if (!strcmp(funcName, "vkDestroyRenderPass")) + return (PFN_vkVoidFunction)vkDestroyRenderPass; + if (!strcmp(funcName, "vkGetRenderAreaGranularity")) + return (PFN_vkVoidFunction)vkGetRenderAreaGranularity; + if (!strcmp(funcName, "vkCreateCommandPool")) + return (PFN_vkVoidFunction)vkCreateCommandPool; + if (!strcmp(funcName, "vkDestroyCommandPool")) + return (PFN_vkVoidFunction)vkDestroyCommandPool; + if (!strcmp(funcName, "vkResetCommandPool")) + return (PFN_vkVoidFunction)vkResetCommandPool; + if (!strcmp(funcName, "vkAllocateCommandBuffers")) + return (PFN_vkVoidFunction)vkAllocateCommandBuffers; + if (!strcmp(funcName, "vkFreeCommandBuffers")) + return (PFN_vkVoidFunction)vkFreeCommandBuffers; + if (!strcmp(funcName, "vkBeginCommandBuffer")) + return (PFN_vkVoidFunction)vkBeginCommandBuffer; + if (!strcmp(funcName, "vkEndCommandBuffer")) + return (PFN_vkVoidFunction)vkEndCommandBuffer; + if (!strcmp(funcName, "vkResetCommandBuffer")) + return (PFN_vkVoidFunction)vkResetCommandBuffer; + if (!strcmp(funcName, "vkCmdBindPipeline")) + return (PFN_vkVoidFunction)vkCmdBindPipeline; + if (!strcmp(funcName, "vkCmdBindDescriptorSets")) + return (PFN_vkVoidFunction)vkCmdBindDescriptorSets; + if (!strcmp(funcName, "vkCmdBindVertexBuffers")) + return (PFN_vkVoidFunction)vkCmdBindVertexBuffers; + if (!strcmp(funcName, "vkCmdBindIndexBuffer")) + return (PFN_vkVoidFunction)vkCmdBindIndexBuffer; + if (!strcmp(funcName, "vkCmdSetViewport")) + return (PFN_vkVoidFunction)vkCmdSetViewport; + if (!strcmp(funcName, "vkCmdSetScissor")) + return (PFN_vkVoidFunction)vkCmdSetScissor; + if (!strcmp(funcName, "vkCmdSetLineWidth")) + return (PFN_vkVoidFunction)vkCmdSetLineWidth; + if (!strcmp(funcName, "vkCmdSetDepthBias")) + return (PFN_vkVoidFunction)vkCmdSetDepthBias; + if (!strcmp(funcName, "vkCmdSetBlendConstants")) + return (PFN_vkVoidFunction)vkCmdSetBlendConstants; + if (!strcmp(funcName, "vkCmdSetDepthBounds")) + return (PFN_vkVoidFunction)vkCmdSetDepthBounds; + if (!strcmp(funcName, "vkCmdSetStencilCompareMask")) + return (PFN_vkVoidFunction)vkCmdSetStencilCompareMask; + if (!strcmp(funcName, "vkCmdSetStencilWriteMask")) + return (PFN_vkVoidFunction)vkCmdSetStencilWriteMask; + if (!strcmp(funcName, "vkCmdSetStencilReference")) + return (PFN_vkVoidFunction)vkCmdSetStencilReference; + if (!strcmp(funcName, "vkCmdDraw")) + return (PFN_vkVoidFunction)vkCmdDraw; + if (!strcmp(funcName, "vkCmdDrawIndexed")) + return (PFN_vkVoidFunction)vkCmdDrawIndexed; + if (!strcmp(funcName, "vkCmdDrawIndirect")) + return (PFN_vkVoidFunction)vkCmdDrawIndirect; + if (!strcmp(funcName, "vkCmdDrawIndexedIndirect")) + return (PFN_vkVoidFunction)vkCmdDrawIndexedIndirect; + if (!strcmp(funcName, "vkCmdDispatch")) + return (PFN_vkVoidFunction)vkCmdDispatch; + if (!strcmp(funcName, "vkCmdDispatchIndirect")) + return (PFN_vkVoidFunction)vkCmdDispatchIndirect; + if (!strcmp(funcName, "vkCmdCopyBuffer")) + return (PFN_vkVoidFunction)vkCmdCopyBuffer; + if (!strcmp(funcName, "vkCmdCopyImage")) + return (PFN_vkVoidFunction)vkCmdCopyImage; + if (!strcmp(funcName, "vkCmdBlitImage")) + return (PFN_vkVoidFunction)vkCmdBlitImage; + if (!strcmp(funcName, "vkCmdCopyBufferToImage")) + return (PFN_vkVoidFunction)vkCmdCopyBufferToImage; + if (!strcmp(funcName, "vkCmdCopyImageToBuffer")) + return (PFN_vkVoidFunction)vkCmdCopyImageToBuffer; + if (!strcmp(funcName, "vkCmdUpdateBuffer")) + return (PFN_vkVoidFunction)vkCmdUpdateBuffer; + if (!strcmp(funcName, "vkCmdFillBuffer")) + return (PFN_vkVoidFunction)vkCmdFillBuffer; + if (!strcmp(funcName, "vkCmdClearColorImage")) + return (PFN_vkVoidFunction)vkCmdClearColorImage; + if (!strcmp(funcName, "vkCmdClearDepthStencilImage")) + return (PFN_vkVoidFunction)vkCmdClearDepthStencilImage; + if (!strcmp(funcName, "vkCmdClearAttachments")) + return (PFN_vkVoidFunction)vkCmdClearAttachments; + if (!strcmp(funcName, "vkCmdResolveImage")) + return (PFN_vkVoidFunction)vkCmdResolveImage; + if (!strcmp(funcName, "vkCmdSetEvent")) + return (PFN_vkVoidFunction)vkCmdSetEvent; + if (!strcmp(funcName, "vkCmdResetEvent")) + return (PFN_vkVoidFunction)vkCmdResetEvent; + if (!strcmp(funcName, "vkCmdWaitEvents")) + return (PFN_vkVoidFunction)vkCmdWaitEvents; + if (!strcmp(funcName, "vkCmdPipelineBarrier")) + return (PFN_vkVoidFunction)vkCmdPipelineBarrier; + if (!strcmp(funcName, "vkCmdBeginQuery")) + return (PFN_vkVoidFunction)vkCmdBeginQuery; + if (!strcmp(funcName, "vkCmdEndQuery")) + return (PFN_vkVoidFunction)vkCmdEndQuery; + if (!strcmp(funcName, "vkCmdResetQueryPool")) + return (PFN_vkVoidFunction)vkCmdResetQueryPool; + if (!strcmp(funcName, "vkCmdWriteTimestamp")) + return (PFN_vkVoidFunction)vkCmdWriteTimestamp; + if (!strcmp(funcName, "vkCmdCopyQueryPoolResults")) + return (PFN_vkVoidFunction)vkCmdCopyQueryPoolResults; + if (!strcmp(funcName, "vkCmdPushConstants")) + return (PFN_vkVoidFunction)vkCmdPushConstants; + if (!strcmp(funcName, "vkCmdBeginRenderPass")) + return (PFN_vkVoidFunction)vkCmdBeginRenderPass; + if (!strcmp(funcName, "vkCmdNextSubpass")) + return (PFN_vkVoidFunction)vkCmdNextSubpass; + if (!strcmp(funcName, "vkCmdEndRenderPass")) + return (PFN_vkVoidFunction)vkCmdEndRenderPass; + if (!strcmp(funcName, "vkCmdExecuteCommands")) + return (PFN_vkVoidFunction)vkCmdExecuteCommands; + + // Instance extensions + void *addr; + if (debug_report_instance_gpa(inst, funcName, &addr)) + return addr; + + if (wsi_swapchain_instance_gpa(inst, funcName, &addr)) + return addr; + + addr = loader_dev_ext_gpa(inst, funcName); + return addr; +} + +static inline void *globalGetProcAddr(const char *name) { + if (!name || name[0] != 'v' || name[1] != 'k') + return NULL; + + name += 2; + if (!strcmp(name, "CreateInstance")) + return (void *)vkCreateInstance; + if (!strcmp(name, "EnumerateInstanceExtensionProperties")) + return (void *)vkEnumerateInstanceExtensionProperties; + if (!strcmp(name, "EnumerateInstanceLayerProperties")) + return (void *)vkEnumerateInstanceLayerProperties; + + return NULL; +} + +/* These functions require special handling by the loader. +* They are not just generic trampoline code entrypoints. +* Thus GPA must return loader entrypoint for these instead of first function +* in the chain. */ +static inline void *loader_non_passthrough_gipa(const char *name) { + if (!name || name[0] != 'v' || name[1] != 'k') + return NULL; + + name += 2; + if (!strcmp(name, "CreateInstance")) + return (void *)vkCreateInstance; + if (!strcmp(name, "DestroyInstance")) + return (void *)vkDestroyInstance; + if (!strcmp(name, "GetDeviceProcAddr")) + return (void *)vkGetDeviceProcAddr; + // remove once no longer locks + if (!strcmp(name, "EnumeratePhysicalDevices")) + return (void *)vkEnumeratePhysicalDevices; + if (!strcmp(name, "EnumerateDeviceExtensionProperties")) + return (void *)vkEnumerateDeviceExtensionProperties; + if (!strcmp(name, "EnumerateDeviceLayerProperties")) + return (void *)vkEnumerateDeviceLayerProperties; + if (!strcmp(name, "GetInstanceProcAddr")) + return (void *)vkGetInstanceProcAddr; + if (!strcmp(name, "CreateDevice")) + return (void *)vkCreateDevice; + + return NULL; +} + +static inline void *loader_non_passthrough_gdpa(const char *name) { + if (!name || name[0] != 'v' || name[1] != 'k') + return NULL; + + name += 2; + + if (!strcmp(name, "GetDeviceProcAddr")) + return (void *)vkGetDeviceProcAddr; + if (!strcmp(name, "DestroyDevice")) + return (void *)vkDestroyDevice; + if (!strcmp(name, "GetDeviceQueue")) + return (void *)vkGetDeviceQueue; + if (!strcmp(name, "AllocateCommandBuffers")) + return (void *)vkAllocateCommandBuffers; + + return NULL; +} diff --git a/third_party/vulkan/loader/loader.c b/third_party/vulkan/loader/loader.c new file mode 100644 index 000000000..5de0ffba3 --- /dev/null +++ b/third_party/vulkan/loader/loader.c @@ -0,0 +1,4504 @@ +/* + * + * Copyright (c) 2014-2016 The Khronos Group Inc. + * Copyright (c) 2014-2016 Valve Corporation + * Copyright (c) 2014-2016 LunarG, Inc. + * Copyright (C) 2015 Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Jon Ashburn + * Author: Courtney Goeltzenleuchter + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include +#if defined(_WIN32) +#include "dirent_on_windows.h" +#else // _WIN32 +#include +#endif // _WIN32 +#include "vk_loader_platform.h" +#include "loader.h" +#include "gpa_helper.h" +#include "table_ops.h" +#include "debug_report.h" +#include "wsi.h" +#include "vulkan/vk_icd.h" +#include "cJSON.h" +#include "murmurhash.h" + +static loader_platform_dl_handle +loader_add_layer_lib(const struct loader_instance *inst, const char *chain_type, + struct loader_layer_properties *layer_prop); + +static void loader_remove_layer_lib(struct loader_instance *inst, + struct loader_layer_properties *layer_prop); + +struct loader_struct loader = {0}; +// TLS for instance for alloc/free callbacks +THREAD_LOCAL_DECL struct loader_instance *tls_instance; + +static bool loader_init_generic_list(const struct loader_instance *inst, + struct loader_generic_list *list_info, + size_t element_size); + +static size_t loader_platform_combine_path(char *dest, size_t len, ...); + +struct loader_phys_dev_per_icd { + uint32_t count; + VkPhysicalDevice *phys_devs; +}; + +enum loader_debug { + LOADER_INFO_BIT = 0x01, + LOADER_WARN_BIT = 0x02, + LOADER_PERF_BIT = 0x04, + LOADER_ERROR_BIT = 0x08, + LOADER_DEBUG_BIT = 0x10, +}; + +uint32_t g_loader_debug = 0; +uint32_t g_loader_log_msgs = 0; + +// thread safety lock for accessing global data structures such as "loader" +// all entrypoints on the instance chain need to be locked except GPA +// additionally CreateDevice and DestroyDevice needs to be locked +loader_platform_thread_mutex loader_lock; +loader_platform_thread_mutex loader_json_lock; + +const char *std_validation_str = "VK_LAYER_LUNARG_standard_validation"; + +// This table contains the loader's instance dispatch table, which contains +// default functions if no instance layers are activated. This contains +// pointers to "terminator functions". +const VkLayerInstanceDispatchTable instance_disp = { + .GetInstanceProcAddr = vkGetInstanceProcAddr, + .DestroyInstance = loader_DestroyInstance, + .EnumeratePhysicalDevices = loader_EnumeratePhysicalDevices, + .GetPhysicalDeviceFeatures = loader_GetPhysicalDeviceFeatures, + .GetPhysicalDeviceFormatProperties = + loader_GetPhysicalDeviceFormatProperties, + .GetPhysicalDeviceImageFormatProperties = + loader_GetPhysicalDeviceImageFormatProperties, + .GetPhysicalDeviceProperties = loader_GetPhysicalDeviceProperties, + .GetPhysicalDeviceQueueFamilyProperties = + loader_GetPhysicalDeviceQueueFamilyProperties, + .GetPhysicalDeviceMemoryProperties = + loader_GetPhysicalDeviceMemoryProperties, + .EnumerateDeviceExtensionProperties = + loader_EnumerateDeviceExtensionProperties, + .EnumerateDeviceLayerProperties = loader_EnumerateDeviceLayerProperties, + .GetPhysicalDeviceSparseImageFormatProperties = + loader_GetPhysicalDeviceSparseImageFormatProperties, + .DestroySurfaceKHR = loader_DestroySurfaceKHR, + .GetPhysicalDeviceSurfaceSupportKHR = + loader_GetPhysicalDeviceSurfaceSupportKHR, + .GetPhysicalDeviceSurfaceCapabilitiesKHR = + loader_GetPhysicalDeviceSurfaceCapabilitiesKHR, + .GetPhysicalDeviceSurfaceFormatsKHR = + loader_GetPhysicalDeviceSurfaceFormatsKHR, + .GetPhysicalDeviceSurfacePresentModesKHR = + loader_GetPhysicalDeviceSurfacePresentModesKHR, + .CreateDebugReportCallbackEXT = loader_CreateDebugReportCallback, + .DestroyDebugReportCallbackEXT = loader_DestroyDebugReportCallback, + .DebugReportMessageEXT = loader_DebugReportMessage, +#ifdef VK_USE_PLATFORM_MIR_KHR + .CreateMirSurfaceKHR = loader_CreateMirSurfaceKHR, + .GetPhysicalDeviceMirPresentationSupportKHR = + loader_GetPhysicalDeviceMirPresentationSupportKHR, +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + .CreateWaylandSurfaceKHR = loader_CreateWaylandSurfaceKHR, + .GetPhysicalDeviceWaylandPresentationSupportKHR = + loader_GetPhysicalDeviceWaylandPresentationSupportKHR, +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + .CreateWin32SurfaceKHR = loader_CreateWin32SurfaceKHR, + .GetPhysicalDeviceWin32PresentationSupportKHR = + loader_GetPhysicalDeviceWin32PresentationSupportKHR, +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + .CreateXcbSurfaceKHR = loader_CreateXcbSurfaceKHR, + .GetPhysicalDeviceXcbPresentationSupportKHR = + loader_GetPhysicalDeviceXcbPresentationSupportKHR, +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + .CreateXlibSurfaceKHR = loader_CreateXlibSurfaceKHR, + .GetPhysicalDeviceXlibPresentationSupportKHR = + loader_GetPhysicalDeviceXlibPresentationSupportKHR, +#endif +#ifdef VK_USE_PLATFORM_ANDROID_KHR + .CreateAndroidSurfaceKHR = loader_CreateAndroidSurfaceKHR, +#endif +}; + +LOADER_PLATFORM_THREAD_ONCE_DECLARATION(once_init); + +void *loader_heap_alloc(const struct loader_instance *instance, size_t size, + VkSystemAllocationScope alloc_scope) { + if (instance && instance->alloc_callbacks.pfnAllocation) { + /* TODO: What should default alignment be? 1, 4, 8, other? */ + return instance->alloc_callbacks.pfnAllocation( + instance->alloc_callbacks.pUserData, size, sizeof(int), + alloc_scope); + } + return malloc(size); +} + +void loader_heap_free(const struct loader_instance *instance, void *pMemory) { + if (pMemory == NULL) + return; + if (instance && instance->alloc_callbacks.pfnFree) { + instance->alloc_callbacks.pfnFree(instance->alloc_callbacks.pUserData, + pMemory); + return; + } + free(pMemory); +} + +void *loader_heap_realloc(const struct loader_instance *instance, void *pMemory, + size_t orig_size, size_t size, + VkSystemAllocationScope alloc_scope) { + if (pMemory == NULL || orig_size == 0) + return loader_heap_alloc(instance, size, alloc_scope); + if (size == 0) { + loader_heap_free(instance, pMemory); + return NULL; + } + // TODO use the callback realloc function + if (instance && instance->alloc_callbacks.pfnAllocation) { + if (size <= orig_size) { + memset(((uint8_t *)pMemory) + size, 0, orig_size - size); + return pMemory; + } + /* TODO: What should default alignment be? 1, 4, 8, other? */ + void *new_ptr = instance->alloc_callbacks.pfnAllocation( + instance->alloc_callbacks.pUserData, size, sizeof(int), + alloc_scope); + if (!new_ptr) + return NULL; + memcpy(new_ptr, pMemory, orig_size); + instance->alloc_callbacks.pfnFree(instance->alloc_callbacks.pUserData, + pMemory); + return new_ptr; + } + return realloc(pMemory, size); +} + +void *loader_tls_heap_alloc(size_t size) { + return loader_heap_alloc(tls_instance, size, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); +} + +void loader_tls_heap_free(void *pMemory) { + loader_heap_free(tls_instance, pMemory); +} + +void loader_log(const struct loader_instance *inst, VkFlags msg_type, + int32_t msg_code, const char *format, ...) { + char msg[512]; + va_list ap; + int ret; + + va_start(ap, format); + ret = vsnprintf(msg, sizeof(msg), format, ap); + if ((ret >= (int)sizeof(msg)) || ret < 0) { + msg[sizeof(msg) - 1] = '\0'; + } + va_end(ap); + + if (inst) { + util_DebugReportMessage(inst, msg_type, + VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT, + (uint64_t)inst, 0, msg_code, "loader", msg); + } + + if (!(msg_type & g_loader_log_msgs)) { + return; + } + +#if defined(WIN32) + OutputDebugString(msg); + OutputDebugString("\n"); +#endif + fputs(msg, stderr); + fputc('\n', stderr); +} + +#if defined(WIN32) +static char *loader_get_next_path(char *path); +/** +* Find the list of registry files (names within a key) in key "location". +* +* This function looks in the registry (hive = DEFAULT_VK_REGISTRY_HIVE) key as +*given in "location" +* for a list or name/values which are added to a returned list (function return +*value). +* The DWORD values within the key must be 0 or they are skipped. +* Function return is a string with a ';' separated list of filenames. +* Function return is NULL if no valid name/value pairs are found in the key, +* or the key is not found. +* +* \returns +* A string list of filenames as pointer. +* When done using the returned string list, pointer should be freed. +*/ +static char *loader_get_registry_files(const struct loader_instance *inst, + char *location) { + LONG rtn_value; + HKEY hive, key; + DWORD access_flags; + char name[2048]; + char *out = NULL; + char *loc = location; + char *next; + DWORD idx = 0; + DWORD name_size = sizeof(name); + DWORD value; + DWORD total_size = 4096; + DWORD value_size = sizeof(value); + + while (*loc) { + next = loader_get_next_path(loc); + hive = DEFAULT_VK_REGISTRY_HIVE; + access_flags = KEY_QUERY_VALUE; + rtn_value = RegOpenKeyEx(hive, loc, 0, access_flags, &key); + if (rtn_value != ERROR_SUCCESS) { + // We still couldn't find the key, so give up: + loc = next; + continue; + } + + while ((rtn_value = RegEnumValue(key, idx++, name, &name_size, NULL, + NULL, (LPBYTE)&value, &value_size)) == + ERROR_SUCCESS) { + if (value_size == sizeof(value) && value == 0) { + if (out == NULL) { + out = loader_heap_alloc( + inst, total_size, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + out[0] = '\0'; + } else if (strlen(out) + name_size + 1 > total_size) { + out = loader_heap_realloc( + inst, out, total_size, total_size * 2, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + total_size *= 2; + } + if (out == NULL) { + loader_log( + inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory, failed loader_get_registry_files"); + return NULL; + } + if (strlen(out) == 0) + snprintf(out, name_size + 1, "%s", name); + else + snprintf(out + strlen(out), name_size + 2, "%c%s", + PATH_SEPERATOR, name); + } + name_size = 2048; + } + loc = next; + } + + return out; +} + +#endif // WIN32 + +/** + * Combine path elements, separating each element with the platform-specific + * directory separator, and save the combined string to a destination buffer, + * not exceeding the given length. Path elements are given as variadic args, + * with a NULL element terminating the list. + * + * \returns the total length of the combined string, not including an ASCII + * NUL termination character. This length may exceed the available storage: + * in this case, the written string will be truncated to avoid a buffer + * overrun, and the return value will greater than or equal to the storage + * size. A NULL argument may be provided as the destination buffer in order + * to determine the required string length without actually writing a string. + */ + +static size_t loader_platform_combine_path(char *dest, size_t len, ...) { + size_t required_len = 0; + va_list ap; + const char *component; + + va_start(ap, len); + + while ((component = va_arg(ap, const char *))) { + if (required_len > 0) { + // This path element is not the first non-empty element; prepend + // a directory separator if space allows + if (dest && required_len + 1 < len) { + snprintf(dest + required_len, len - required_len, "%c", + DIRECTORY_SYMBOL); + } + required_len++; + } + + if (dest && required_len < len) { + strncpy(dest + required_len, component, len - required_len); + } + required_len += strlen(component); + } + + va_end(ap); + + // strncpy(3) won't add a NUL terminating byte in the event of truncation. + if (dest && required_len >= len) { + dest[len - 1] = '\0'; + } + + return required_len; +} + +/** + * Given string of three part form "maj.min.pat" convert to a vulkan version + * number. + */ +static uint32_t loader_make_version(const char *vers_str) { + uint32_t vers = 0, major = 0, minor = 0, patch = 0; + char *minor_str = NULL; + char *patch_str = NULL; + char *cstr; + char *str; + + if (!vers_str) + return vers; + cstr = loader_stack_alloc(strlen(vers_str) + 1); + strcpy(cstr, vers_str); + while ((str = strchr(cstr, '.')) != NULL) { + if (minor_str == NULL) { + minor_str = str + 1; + *str = '\0'; + major = atoi(cstr); + } else if (patch_str == NULL) { + patch_str = str + 1; + *str = '\0'; + minor = atoi(minor_str); + } else { + return vers; + } + cstr = str + 1; + } + patch = atoi(patch_str); + + return VK_MAKE_VERSION(major, minor, patch); +} + +bool compare_vk_extension_properties(const VkExtensionProperties *op1, + const VkExtensionProperties *op2) { + return strcmp(op1->extensionName, op2->extensionName) == 0 ? true : false; +} + +/** + * Search the given ext_array for an extension + * matching the given vk_ext_prop + */ +bool has_vk_extension_property_array(const VkExtensionProperties *vk_ext_prop, + const uint32_t count, + const VkExtensionProperties *ext_array) { + for (uint32_t i = 0; i < count; i++) { + if (compare_vk_extension_properties(vk_ext_prop, &ext_array[i])) + return true; + } + return false; +} + +/** + * Search the given ext_list for an extension + * matching the given vk_ext_prop + */ +bool has_vk_extension_property(const VkExtensionProperties *vk_ext_prop, + const struct loader_extension_list *ext_list) { + for (uint32_t i = 0; i < ext_list->count; i++) { + if (compare_vk_extension_properties(&ext_list->list[i], vk_ext_prop)) + return true; + } + return false; +} + +static inline bool loader_is_layer_type_device(const enum layer_type type) { + if ((type & VK_LAYER_TYPE_DEVICE_EXPLICIT) || + (type & VK_LAYER_TYPE_DEVICE_IMPLICIT)) + return true; + return false; +} + +/* + * Search the given layer list for a layer matching the given layer name + */ +static struct loader_layer_properties * +loader_get_layer_property(const char *name, + const struct loader_layer_list *layer_list) { + for (uint32_t i = 0; i < layer_list->count; i++) { + const VkLayerProperties *item = &layer_list->list[i].info; + if (strcmp(name, item->layerName) == 0) + return &layer_list->list[i]; + } + return NULL; +} + +/** + * Get the next unused layer property in the list. Init the property to zero. + */ +static struct loader_layer_properties * +loader_get_next_layer_property(const struct loader_instance *inst, + struct loader_layer_list *layer_list) { + if (layer_list->capacity == 0) { + layer_list->list = + loader_heap_alloc(inst, sizeof(struct loader_layer_properties) * 64, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (layer_list->list == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory can't add any layer properties to list"); + return NULL; + } + memset(layer_list->list, 0, + sizeof(struct loader_layer_properties) * 64); + layer_list->capacity = sizeof(struct loader_layer_properties) * 64; + } + + // ensure enough room to add an entry + if ((layer_list->count + 1) * sizeof(struct loader_layer_properties) > + layer_list->capacity) { + layer_list->list = loader_heap_realloc( + inst, layer_list->list, layer_list->capacity, + layer_list->capacity * 2, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (layer_list->list == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "realloc failed for layer list"); + } + layer_list->capacity *= 2; + } + + layer_list->count++; + return &(layer_list->list[layer_list->count - 1]); +} + +/** + * Remove all layer properties entrys from the list + */ +void loader_delete_layer_properties(const struct loader_instance *inst, + struct loader_layer_list *layer_list) { + uint32_t i, j; + struct loader_device_extension_list *dev_ext_list; + if (!layer_list) + return; + + for (i = 0; i < layer_list->count; i++) { + loader_destroy_generic_list( + inst, (struct loader_generic_list *)&layer_list->list[i] + .instance_extension_list); + dev_ext_list = &layer_list->list[i].device_extension_list; + if (dev_ext_list->capacity > 0 && + dev_ext_list->list->entrypoint_count > 0) { + for (j = 0; j < dev_ext_list->list->entrypoint_count; j++) { + loader_heap_free(inst, dev_ext_list->list->entrypoints[j]); + } + loader_heap_free(inst, dev_ext_list->list->entrypoints); + } + loader_destroy_generic_list(inst, + (struct loader_generic_list *)dev_ext_list); + } + layer_list->count = 0; + + if (layer_list->capacity > 0) { + layer_list->capacity = 0; + loader_heap_free(inst, layer_list->list); + } +} + +static void loader_add_instance_extensions( + const struct loader_instance *inst, + const PFN_vkEnumerateInstanceExtensionProperties fp_get_props, + const char *lib_name, struct loader_extension_list *ext_list) { + uint32_t i, count = 0; + VkExtensionProperties *ext_props; + VkResult res; + + if (!fp_get_props) { + /* No EnumerateInstanceExtensionProperties defined */ + return; + } + + res = fp_get_props(NULL, &count, NULL); + if (res != VK_SUCCESS) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Error getting Instance extension count from %s", lib_name); + return; + } + + if (count == 0) { + /* No ExtensionProperties to report */ + return; + } + + ext_props = loader_stack_alloc(count * sizeof(VkExtensionProperties)); + + res = fp_get_props(NULL, &count, ext_props); + if (res != VK_SUCCESS) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Error getting Instance extensions from %s", lib_name); + return; + } + + for (i = 0; i < count; i++) { + char spec_version[64]; + + snprintf(spec_version, sizeof(spec_version), "%d.%d.%d", + VK_MAJOR(ext_props[i].specVersion), + VK_MINOR(ext_props[i].specVersion), + VK_PATCH(ext_props[i].specVersion)); + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Instance Extension: %s (%s) version %s", + ext_props[i].extensionName, lib_name, spec_version); + loader_add_to_ext_list(inst, ext_list, 1, &ext_props[i]); + } + + return; +} + +/* + * Initialize ext_list with the physical device extensions. + * The extension properties are passed as inputs in count and ext_props. + */ +static VkResult +loader_init_device_extensions(const struct loader_instance *inst, + struct loader_physical_device *phys_dev, + uint32_t count, VkExtensionProperties *ext_props, + struct loader_extension_list *ext_list) { + VkResult res; + uint32_t i; + + if (!loader_init_generic_list(inst, (struct loader_generic_list *)ext_list, + sizeof(VkExtensionProperties))) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + for (i = 0; i < count; i++) { + char spec_version[64]; + + snprintf(spec_version, sizeof(spec_version), "%d.%d.%d", + VK_MAJOR(ext_props[i].specVersion), + VK_MINOR(ext_props[i].specVersion), + VK_PATCH(ext_props[i].specVersion)); + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Device Extension: %s (%s) version %s", + ext_props[i].extensionName, + phys_dev->this_icd->this_icd_lib->lib_name, spec_version); + res = loader_add_to_ext_list(inst, ext_list, 1, &ext_props[i]); + if (res != VK_SUCCESS) + return res; + } + + return VK_SUCCESS; +} + +static VkResult loader_add_device_extensions( + const struct loader_instance *inst, struct loader_icd *icd, + VkPhysicalDevice physical_device, const char *lib_name, + struct loader_extension_list *ext_list) { + uint32_t i, count; + VkResult res; + VkExtensionProperties *ext_props; + + res = icd->EnumerateDeviceExtensionProperties(physical_device, NULL, &count, + NULL); + if (res == VK_SUCCESS && count > 0) { + ext_props = loader_stack_alloc(count * sizeof(VkExtensionProperties)); + if (!ext_props) + return VK_ERROR_OUT_OF_HOST_MEMORY; + res = icd->EnumerateDeviceExtensionProperties(physical_device, NULL, + &count, ext_props); + if (res != VK_SUCCESS) + return res; + for (i = 0; i < count; i++) { + char spec_version[64]; + + snprintf(spec_version, sizeof(spec_version), "%d.%d.%d", + VK_MAJOR(ext_props[i].specVersion), + VK_MINOR(ext_props[i].specVersion), + VK_PATCH(ext_props[i].specVersion)); + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Device Extension: %s (%s) version %s", + ext_props[i].extensionName, lib_name, spec_version); + res = loader_add_to_ext_list(inst, ext_list, 1, &ext_props[i]); + if (res != VK_SUCCESS) + return res; + } + } else { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Error getting physical device extension info count from " + "library %s", + lib_name); + return res; + } + + return VK_SUCCESS; +} + +static bool loader_init_generic_list(const struct loader_instance *inst, + struct loader_generic_list *list_info, + size_t element_size) { + list_info->capacity = 32 * element_size; + list_info->list = loader_heap_alloc(inst, list_info->capacity, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (list_info->list == NULL) { + return false; + } + memset(list_info->list, 0, list_info->capacity); + list_info->count = 0; + return true; +} + +void loader_destroy_generic_list(const struct loader_instance *inst, + struct loader_generic_list *list) { + loader_heap_free(inst, list->list); + list->count = 0; + list->capacity = 0; +} + +/* + * Append non-duplicate extension properties defined in props + * to the given ext_list. + * Return + * Vk_SUCCESS on success + */ +VkResult loader_add_to_ext_list(const struct loader_instance *inst, + struct loader_extension_list *ext_list, + uint32_t prop_list_count, + const VkExtensionProperties *props) { + uint32_t i; + const VkExtensionProperties *cur_ext; + + if (ext_list->list == NULL || ext_list->capacity == 0) { + loader_init_generic_list(inst, (struct loader_generic_list *)ext_list, + sizeof(VkExtensionProperties)); + } + + if (ext_list->list == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + for (i = 0; i < prop_list_count; i++) { + cur_ext = &props[i]; + + // look for duplicates + if (has_vk_extension_property(cur_ext, ext_list)) { + continue; + } + + // add to list at end + // check for enough capacity + if (ext_list->count * sizeof(VkExtensionProperties) >= + ext_list->capacity) { + + ext_list->list = loader_heap_realloc( + inst, ext_list->list, ext_list->capacity, + ext_list->capacity * 2, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (ext_list->list == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + // double capacity + ext_list->capacity *= 2; + } + + memcpy(&ext_list->list[ext_list->count], cur_ext, + sizeof(VkExtensionProperties)); + ext_list->count++; + } + return VK_SUCCESS; +} + +/* + * Append one extension property defined in props with entrypoints + * defined in entrys to the given ext_list. + * Return + * Vk_SUCCESS on success + */ +VkResult +loader_add_to_dev_ext_list(const struct loader_instance *inst, + struct loader_device_extension_list *ext_list, + const VkExtensionProperties *props, + uint32_t entry_count, char **entrys) { + uint32_t idx; + if (ext_list->list == NULL || ext_list->capacity == 0) { + loader_init_generic_list(inst, (struct loader_generic_list *)ext_list, + sizeof(struct loader_dev_ext_props)); + } + + if (ext_list->list == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + idx = ext_list->count; + // add to list at end + // check for enough capacity + if (idx * sizeof(struct loader_dev_ext_props) >= ext_list->capacity) { + + ext_list->list = loader_heap_realloc( + inst, ext_list->list, ext_list->capacity, ext_list->capacity * 2, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (ext_list->list == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + // double capacity + ext_list->capacity *= 2; + } + + memcpy(&ext_list->list[idx].props, props, + sizeof(struct loader_dev_ext_props)); + ext_list->list[idx].entrypoint_count = entry_count; + ext_list->list[idx].entrypoints = + loader_heap_alloc(inst, sizeof(char *) * entry_count, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (ext_list->list[idx].entrypoints == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + for (uint32_t i = 0; i < entry_count; i++) { + ext_list->list[idx].entrypoints[i] = loader_heap_alloc( + inst, strlen(entrys[i]) + 1, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (ext_list->list[idx].entrypoints[i] == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + strcpy(ext_list->list[idx].entrypoints[i], entrys[i]); + } + ext_list->count++; + + return VK_SUCCESS; +} + +/** + * Search the given search_list for any layers in the props list. + * Add these to the output layer_list. Don't add duplicates to the output + * layer_list. + */ +static VkResult +loader_add_layer_names_to_list(const struct loader_instance *inst, + struct loader_layer_list *output_list, + uint32_t name_count, const char *const *names, + const struct loader_layer_list *search_list) { + struct loader_layer_properties *layer_prop; + VkResult err = VK_SUCCESS; + + for (uint32_t i = 0; i < name_count; i++) { + const char *search_target = names[i]; + layer_prop = loader_get_layer_property(search_target, search_list); + if (!layer_prop) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Unable to find layer %s", search_target); + err = VK_ERROR_LAYER_NOT_PRESENT; + continue; + } + + loader_add_to_layer_list(inst, output_list, 1, layer_prop); + } + + return err; +} + +/* + * Manage lists of VkLayerProperties + */ +static bool loader_init_layer_list(const struct loader_instance *inst, + struct loader_layer_list *list) { + list->capacity = 32 * sizeof(struct loader_layer_properties); + list->list = loader_heap_alloc(inst, list->capacity, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (list->list == NULL) { + return false; + } + memset(list->list, 0, list->capacity); + list->count = 0; + return true; +} + +void loader_destroy_layer_list(const struct loader_instance *inst, + struct loader_layer_list *layer_list) { + loader_heap_free(inst, layer_list->list); + layer_list->count = 0; + layer_list->capacity = 0; +} + +/* + * Manage list of layer libraries (loader_lib_info) + */ +static bool +loader_init_layer_library_list(const struct loader_instance *inst, + struct loader_layer_library_list *list) { + list->capacity = 32 * sizeof(struct loader_lib_info); + list->list = loader_heap_alloc(inst, list->capacity, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (list->list == NULL) { + return false; + } + memset(list->list, 0, list->capacity); + list->count = 0; + return true; +} + +void loader_destroy_layer_library_list(const struct loader_instance *inst, + struct loader_layer_library_list *list) { + for (uint32_t i = 0; i < list->count; i++) { + loader_heap_free(inst, list->list[i].lib_name); + } + loader_heap_free(inst, list->list); + list->count = 0; + list->capacity = 0; +} + +void loader_add_to_layer_library_list(const struct loader_instance *inst, + struct loader_layer_library_list *list, + uint32_t item_count, + const struct loader_lib_info *new_items) { + uint32_t i; + struct loader_lib_info *item; + + if (list->list == NULL || list->capacity == 0) { + loader_init_layer_library_list(inst, list); + } + + if (list->list == NULL) + return; + + for (i = 0; i < item_count; i++) { + item = (struct loader_lib_info *)&new_items[i]; + + // look for duplicates + for (uint32_t j = 0; j < list->count; j++) { + if (strcmp(list->list[i].lib_name, new_items->lib_name) == 0) { + continue; + } + } + + // add to list at end + // check for enough capacity + if (list->count * sizeof(struct loader_lib_info) >= list->capacity) { + + list->list = loader_heap_realloc( + inst, list->list, list->capacity, list->capacity * 2, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + // double capacity + list->capacity *= 2; + } + + memcpy(&list->list[list->count], item, sizeof(struct loader_lib_info)); + list->count++; + } +} + +/* + * Search the given layer list for a list + * matching the given VkLayerProperties + */ +bool has_vk_layer_property(const VkLayerProperties *vk_layer_prop, + const struct loader_layer_list *list) { + for (uint32_t i = 0; i < list->count; i++) { + if (strcmp(vk_layer_prop->layerName, list->list[i].info.layerName) == 0) + return true; + } + return false; +} + +/* + * Search the given layer list for a layer + * matching the given name + */ +bool has_layer_name(const char *name, const struct loader_layer_list *list) { + for (uint32_t i = 0; i < list->count; i++) { + if (strcmp(name, list->list[i].info.layerName) == 0) + return true; + } + return false; +} + +/* + * Append non-duplicate layer properties defined in prop_list + * to the given layer_info list + */ +void loader_add_to_layer_list(const struct loader_instance *inst, + struct loader_layer_list *list, + uint32_t prop_list_count, + const struct loader_layer_properties *props) { + uint32_t i; + struct loader_layer_properties *layer; + + if (list->list == NULL || list->capacity == 0) { + loader_init_layer_list(inst, list); + } + + if (list->list == NULL) + return; + + for (i = 0; i < prop_list_count; i++) { + layer = (struct loader_layer_properties *)&props[i]; + + // look for duplicates + if (has_vk_layer_property(&layer->info, list)) { + continue; + } + + // add to list at end + // check for enough capacity + if (list->count * sizeof(struct loader_layer_properties) >= + list->capacity) { + + list->list = loader_heap_realloc( + inst, list->list, list->capacity, list->capacity * 2, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + // double capacity + list->capacity *= 2; + } + + memcpy(&list->list[list->count], layer, + sizeof(struct loader_layer_properties)); + list->count++; + } +} + +/** + * Search the search_list for any layer with a name + * that matches the given name and a type that matches the given type + * Add all matching layers to the found_list + * Do not add if found loader_layer_properties is already + * on the found_list. + */ +static void +loader_find_layer_name_add_list(const struct loader_instance *inst, + const char *name, const enum layer_type type, + const struct loader_layer_list *search_list, + struct loader_layer_list *found_list) { + bool found = false; + for (uint32_t i = 0; i < search_list->count; i++) { + struct loader_layer_properties *layer_prop = &search_list->list[i]; + if (0 == strcmp(layer_prop->info.layerName, name) && + (layer_prop->type & type)) { + /* Found a layer with the same name, add to found_list */ + loader_add_to_layer_list(inst, found_list, 1, layer_prop); + found = true; + } + } + if (!found) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Warning, couldn't find layer name %s to activate", name); + } +} + +static VkExtensionProperties * +get_extension_property(const char *name, + const struct loader_extension_list *list) { + for (uint32_t i = 0; i < list->count; i++) { + if (strcmp(name, list->list[i].extensionName) == 0) + return &list->list[i]; + } + return NULL; +} + +static VkExtensionProperties * +get_dev_extension_property(const char *name, + const struct loader_device_extension_list *list) { + for (uint32_t i = 0; i < list->count; i++) { + if (strcmp(name, list->list[i].props.extensionName) == 0) + return &list->list[i].props; + } + return NULL; +} + +/* + * This function will return the pNext pointer of any + * CreateInfo extensions that are not loader extensions. + * This is used to skip past the loader extensions prepended + * to the list during CreateInstance and CreateDevice. + */ +void *loader_strip_create_extensions(const void *pNext) { + VkLayerInstanceCreateInfo *create_info = (VkLayerInstanceCreateInfo *)pNext; + + while ( + create_info && + (create_info->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO || + create_info->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO)) { + create_info = (VkLayerInstanceCreateInfo *)create_info->pNext; + } + + return create_info; +} + +/* + * For Instance extensions implemented within the loader (i.e. DEBUG_REPORT + * the extension must provide two entry points for the loader to use: + * - "trampoline" entry point - this is the address returned by GetProcAddr + * and will always do what's necessary to support a global call. + * - "terminator" function - this function will be put at the end of the + * instance chain and will contain the necessary logic to call / process + * the extension for the appropriate ICDs that are available. + * There is no generic mechanism for including these functions, the references + * must be placed into the appropriate loader entry points. + * GetInstanceProcAddr: call extension GetInstanceProcAddr to check for + * GetProcAddr requests + * loader_coalesce_extensions(void) - add extension records to the list of + * global + * extension available to the app. + * instance_disp - add function pointer for terminator function to this array. + * The extension itself should be in a separate file that will be + * linked directly with the loader. + */ + +void loader_get_icd_loader_instance_extensions( + const struct loader_instance *inst, struct loader_icd_libs *icd_libs, + struct loader_extension_list *inst_exts) { + struct loader_extension_list icd_exts; + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Build ICD instance extension list"); + // traverse scanned icd list adding non-duplicate extensions to the list + for (uint32_t i = 0; i < icd_libs->count; i++) { + loader_init_generic_list(inst, (struct loader_generic_list *)&icd_exts, + sizeof(VkExtensionProperties)); + loader_add_instance_extensions( + inst, icd_libs->list[i].EnumerateInstanceExtensionProperties, + icd_libs->list[i].lib_name, &icd_exts); + loader_add_to_ext_list(inst, inst_exts, icd_exts.count, icd_exts.list); + loader_destroy_generic_list(inst, + (struct loader_generic_list *)&icd_exts); + }; + + // Traverse loader's extensions, adding non-duplicate extensions to the list + wsi_add_instance_extensions(inst, inst_exts); + debug_report_add_instance_extensions(inst, inst_exts); +} + +struct loader_physical_device * +loader_get_physical_device(const VkPhysicalDevice physdev) { + uint32_t i; + for (struct loader_instance *inst = loader.instances; inst; + inst = inst->next) { + for (i = 0; i < inst->total_gpu_count; i++) { + // TODO this aliases physDevices within instances, need for this + // function to go away + if (inst->phys_devs[i].disp == + loader_get_instance_dispatch(physdev)) { + return &inst->phys_devs[i]; + } + } + } + return NULL; +} + +struct loader_icd *loader_get_icd_and_device(const VkDevice device, + struct loader_device **found_dev) { + *found_dev = NULL; + for (struct loader_instance *inst = loader.instances; inst; + inst = inst->next) { + for (struct loader_icd *icd = inst->icds; icd; icd = icd->next) { + for (struct loader_device *dev = icd->logical_device_list; dev; + dev = dev->next) + /* Value comparison of device prevents object wrapping by layers + */ + if (loader_get_dispatch(dev->device) == + loader_get_dispatch(device)) { + *found_dev = dev; + return icd; + } + } + } + return NULL; +} + +static void loader_destroy_logical_device(const struct loader_instance *inst, + struct loader_device *dev) { + loader_heap_free(inst, dev->app_extension_props); + loader_destroy_layer_list(inst, &dev->activated_layer_list); + loader_heap_free(inst, dev); +} + +static struct loader_device * +loader_add_logical_device(const struct loader_instance *inst, + struct loader_device **device_list) { + struct loader_device *new_dev; + + new_dev = loader_heap_alloc(inst, sizeof(struct loader_device), + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!new_dev) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to alloc struct loader-device"); + return NULL; + } + + memset(new_dev, 0, sizeof(struct loader_device)); + + new_dev->next = *device_list; + *device_list = new_dev; + return new_dev; +} + +void loader_remove_logical_device(const struct loader_instance *inst, + struct loader_icd *icd, + struct loader_device *found_dev) { + struct loader_device *dev, *prev_dev; + + if (!icd || !found_dev) + return; + + prev_dev = NULL; + dev = icd->logical_device_list; + while (dev && dev != found_dev) { + prev_dev = dev; + dev = dev->next; + } + + if (prev_dev) + prev_dev->next = found_dev->next; + else + icd->logical_device_list = found_dev->next; + loader_destroy_logical_device(inst, found_dev); +} + +static void loader_icd_destroy(struct loader_instance *ptr_inst, + struct loader_icd *icd) { + ptr_inst->total_icd_count--; + for (struct loader_device *dev = icd->logical_device_list; dev;) { + struct loader_device *next_dev = dev->next; + loader_destroy_logical_device(ptr_inst, dev); + dev = next_dev; + } + + loader_heap_free(ptr_inst, icd); +} + +static struct loader_icd * +loader_icd_create(const struct loader_instance *inst) { + struct loader_icd *icd; + + icd = loader_heap_alloc(inst, sizeof(*icd), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!icd) + return NULL; + + memset(icd, 0, sizeof(*icd)); + + return icd; +} + +static struct loader_icd * +loader_icd_add(struct loader_instance *ptr_inst, + const struct loader_scanned_icds *icd_lib) { + struct loader_icd *icd; + + icd = loader_icd_create(ptr_inst); + if (!icd) + return NULL; + + icd->this_icd_lib = icd_lib; + icd->this_instance = ptr_inst; + + /* prepend to the list */ + icd->next = ptr_inst->icds; + ptr_inst->icds = icd; + ptr_inst->total_icd_count++; + + return icd; +} + +void loader_scanned_icd_clear(const struct loader_instance *inst, + struct loader_icd_libs *icd_libs) { + if (icd_libs->capacity == 0) + return; + for (uint32_t i = 0; i < icd_libs->count; i++) { + loader_platform_close_library(icd_libs->list[i].handle); + loader_heap_free(inst, icd_libs->list[i].lib_name); + } + loader_heap_free(inst, icd_libs->list); + icd_libs->capacity = 0; + icd_libs->count = 0; + icd_libs->list = NULL; +} + +static void loader_scanned_icd_init(const struct loader_instance *inst, + struct loader_icd_libs *icd_libs) { + loader_scanned_icd_clear(inst, icd_libs); + icd_libs->capacity = 8 * sizeof(struct loader_scanned_icds); + icd_libs->list = loader_heap_alloc(inst, icd_libs->capacity, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); +} + +static void loader_scanned_icd_add(const struct loader_instance *inst, + struct loader_icd_libs *icd_libs, + const char *filename, uint32_t api_version) { + loader_platform_dl_handle handle; + PFN_vkCreateInstance fp_create_inst; + PFN_vkEnumerateInstanceExtensionProperties fp_get_inst_ext_props; + PFN_vkGetInstanceProcAddr fp_get_proc_addr; + struct loader_scanned_icds *new_node; + + /* TODO implement ref counting of libraries, for now this function leaves + libraries open and the scanned_icd_clear closes them */ + // Used to call: dlopen(filename, RTLD_LAZY); + handle = loader_platform_open_library(filename); + if (!handle) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + loader_platform_open_library_error(filename)); + return; + } + + fp_get_proc_addr = + loader_platform_get_proc_address(handle, "vk_icdGetInstanceProcAddr"); + if (!fp_get_proc_addr) { + // Use deprecated interface + fp_get_proc_addr = + loader_platform_get_proc_address(handle, "vkGetInstanceProcAddr"); + if (!fp_get_proc_addr) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + loader_platform_get_proc_address_error( + "vk_icdGetInstanceProcAddr")); + return; + } else { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Using deprecated ICD interface of " + "vkGetInstanceProcAddr instead of " + "vk_icdGetInstanceProcAddr"); + } + fp_create_inst = + loader_platform_get_proc_address(handle, "vkCreateInstance"); + if (!fp_create_inst) { + loader_log( + inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Couldn't get vkCreateInstance via dlsym/loadlibrary from ICD"); + return; + } + fp_get_inst_ext_props = loader_platform_get_proc_address( + handle, "vkEnumerateInstanceExtensionProperties"); + if (!fp_get_inst_ext_props) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Couldn't get vkEnumerateInstanceExtensionProperties " + "via dlsym/loadlibrary from ICD"); + return; + } + } else { + // Use newer interface + fp_create_inst = + (PFN_vkCreateInstance)fp_get_proc_addr(NULL, "vkCreateInstance"); + if (!fp_create_inst) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Couldn't get vkCreateInstance via " + "vk_icdGetInstanceProcAddr from ICD"); + return; + } + fp_get_inst_ext_props = + (PFN_vkEnumerateInstanceExtensionProperties)fp_get_proc_addr( + NULL, "vkEnumerateInstanceExtensionProperties"); + if (!fp_get_inst_ext_props) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Couldn't get vkEnumerateInstanceExtensionProperties " + "via vk_icdGetInstanceProcAddr from ICD"); + return; + } + } + + // check for enough capacity + if ((icd_libs->count * sizeof(struct loader_scanned_icds)) >= + icd_libs->capacity) { + + icd_libs->list = loader_heap_realloc( + inst, icd_libs->list, icd_libs->capacity, icd_libs->capacity * 2, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + // double capacity + icd_libs->capacity *= 2; + } + new_node = &(icd_libs->list[icd_libs->count]); + + new_node->handle = handle; + new_node->api_version = api_version; + new_node->GetInstanceProcAddr = fp_get_proc_addr; + new_node->EnumerateInstanceExtensionProperties = fp_get_inst_ext_props; + new_node->CreateInstance = fp_create_inst; + + new_node->lib_name = (char *)loader_heap_alloc( + inst, strlen(filename) + 1, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!new_node->lib_name) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Out of memory can't add icd"); + return; + } + strcpy(new_node->lib_name, filename); + icd_libs->count++; +} + +static bool loader_icd_init_entrys(struct loader_icd *icd, VkInstance inst, + const PFN_vkGetInstanceProcAddr fp_gipa) { +/* initialize entrypoint function pointers */ + +#define LOOKUP_GIPA(func, required) \ + do { \ + icd->func = (PFN_vk##func)fp_gipa(inst, "vk" #func); \ + if (!icd->func && required) { \ + loader_log((struct loader_instance *)inst, \ + VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, \ + loader_platform_get_proc_address_error("vk" #func)); \ + return false; \ + } \ + } while (0) + + LOOKUP_GIPA(GetDeviceProcAddr, true); + LOOKUP_GIPA(DestroyInstance, true); + LOOKUP_GIPA(EnumeratePhysicalDevices, true); + LOOKUP_GIPA(GetPhysicalDeviceFeatures, true); + LOOKUP_GIPA(GetPhysicalDeviceFormatProperties, true); + LOOKUP_GIPA(GetPhysicalDeviceImageFormatProperties, true); + LOOKUP_GIPA(CreateDevice, true); + LOOKUP_GIPA(GetPhysicalDeviceProperties, true); + LOOKUP_GIPA(GetPhysicalDeviceMemoryProperties, true); + LOOKUP_GIPA(GetPhysicalDeviceQueueFamilyProperties, true); + LOOKUP_GIPA(EnumerateDeviceExtensionProperties, true); + LOOKUP_GIPA(GetPhysicalDeviceSparseImageFormatProperties, true); + LOOKUP_GIPA(CreateDebugReportCallbackEXT, false); + LOOKUP_GIPA(DestroyDebugReportCallbackEXT, false); + LOOKUP_GIPA(GetPhysicalDeviceSurfaceSupportKHR, false); + LOOKUP_GIPA(GetPhysicalDeviceSurfaceCapabilitiesKHR, false); + LOOKUP_GIPA(GetPhysicalDeviceSurfaceFormatsKHR, false); + LOOKUP_GIPA(GetPhysicalDeviceSurfacePresentModesKHR, false); +#ifdef VK_USE_PLATFORM_WIN32_KHR + LOOKUP_GIPA(GetPhysicalDeviceWin32PresentationSupportKHR, false); +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + LOOKUP_GIPA(GetPhysicalDeviceXcbPresentationSupportKHR, false); +#endif + +#undef LOOKUP_GIPA + + return true; +} + +static void loader_debug_init(void) { + const char *env, *orig; + + if (g_loader_debug > 0) + return; + + g_loader_debug = 0; + + /* parse comma-separated debug options */ + orig = env = loader_getenv("VK_LOADER_DEBUG"); + while (env) { + const char *p = strchr(env, ','); + size_t len; + + if (p) + len = p - env; + else + len = strlen(env); + + if (len > 0) { + if (strncmp(env, "all", len) == 0) { + g_loader_debug = ~0u; + g_loader_log_msgs = ~0u; + } else if (strncmp(env, "warn", len) == 0) { + g_loader_debug |= LOADER_WARN_BIT; + g_loader_log_msgs |= VK_DEBUG_REPORT_WARNING_BIT_EXT; + } else if (strncmp(env, "info", len) == 0) { + g_loader_debug |= LOADER_INFO_BIT; + g_loader_log_msgs |= VK_DEBUG_REPORT_INFORMATION_BIT_EXT; + } else if (strncmp(env, "perf", len) == 0) { + g_loader_debug |= LOADER_PERF_BIT; + g_loader_log_msgs |= VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT; + } else if (strncmp(env, "error", len) == 0) { + g_loader_debug |= LOADER_ERROR_BIT; + g_loader_log_msgs |= VK_DEBUG_REPORT_ERROR_BIT_EXT; + } else if (strncmp(env, "debug", len) == 0) { + g_loader_debug |= LOADER_DEBUG_BIT; + g_loader_log_msgs |= VK_DEBUG_REPORT_DEBUG_BIT_EXT; + } + } + + if (!p) + break; + + env = p + 1; + } + + loader_free_getenv(orig); +} + +void loader_initialize(void) { + // initialize mutexs + loader_platform_thread_create_mutex(&loader_lock); + loader_platform_thread_create_mutex(&loader_json_lock); + + // initialize logging + loader_debug_init(); + + // initial cJSON to use alloc callbacks + cJSON_Hooks alloc_fns = { + .malloc_fn = loader_tls_heap_alloc, .free_fn = loader_tls_heap_free, + }; + cJSON_InitHooks(&alloc_fns); +} + +struct loader_manifest_files { + uint32_t count; + char **filename_list; +}; + +/** + * Get next file or dirname given a string list or registry key path + * + * \returns + * A pointer to first char in the next path. + * The next path (or NULL) in the list is returned in next_path. + * Note: input string is modified in some cases. PASS IN A COPY! + */ +static char *loader_get_next_path(char *path) { + uint32_t len; + char *next; + + if (path == NULL) + return NULL; + next = strchr(path, PATH_SEPERATOR); + if (next == NULL) { + len = (uint32_t)strlen(path); + next = path + len; + } else { + *next = '\0'; + next++; + } + + return next; +} + +/** + * Given a path which is absolute or relative, expand the path if relative or + * leave the path unmodified if absolute. The base path to prepend to relative + * paths is given in rel_base. + * + * \returns + * A string in out_fullpath of the full absolute path + */ +static void loader_expand_path(const char *path, const char *rel_base, + size_t out_size, char *out_fullpath) { + if (loader_platform_is_path_absolute(path)) { + // do not prepend a base to an absolute path + rel_base = ""; + } + + loader_platform_combine_path(out_fullpath, out_size, rel_base, path, NULL); +} + +/** + * Given a filename (file) and a list of paths (dir), try to find an existing + * file in the paths. If filename already is a path then no + * searching in the given paths. + * + * \returns + * A string in out_fullpath of either the full path or file. + */ +static void loader_get_fullpath(const char *file, const char *dirs, + size_t out_size, char *out_fullpath) { + if (!loader_platform_is_path(file) && *dirs) { + char *dirs_copy, *dir, *next_dir; + + dirs_copy = loader_stack_alloc(strlen(dirs) + 1); + strcpy(dirs_copy, dirs); + + // find if file exists after prepending paths in given list + for (dir = dirs_copy; *dir && (next_dir = loader_get_next_path(dir)); + dir = next_dir) { + loader_platform_combine_path(out_fullpath, out_size, dir, file, + NULL); + if (loader_platform_file_exists(out_fullpath)) { + return; + } + } + } + + snprintf(out_fullpath, out_size, "%s", file); +} + +/** + * Read a JSON file into a buffer. + * + * \returns + * A pointer to a cJSON object representing the JSON parse tree. + * This returned buffer should be freed by caller. + */ +static cJSON *loader_get_json(const struct loader_instance *inst, + const char *filename) { + FILE *file; + char *json_buf; + cJSON *json; + size_t len; + file = fopen(filename, "rb"); + if (!file) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Couldn't open JSON file %s", filename); + return NULL; + } + fseek(file, 0, SEEK_END); + len = ftell(file); + fseek(file, 0, SEEK_SET); + json_buf = (char *)loader_stack_alloc(len + 1); + if (json_buf == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory can't get JSON file"); + fclose(file); + return NULL; + } + if (fread(json_buf, sizeof(char), len, file) != len) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "fread failed can't get JSON file"); + fclose(file); + return NULL; + } + fclose(file); + json_buf[len] = '\0'; + + // parse text from file + json = cJSON_Parse(json_buf); + if (json == NULL) + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Can't parse JSON file %s", filename); + return json; +} + +/** + * Do a deep copy of the loader_layer_properties structure. + */ +static void loader_copy_layer_properties(const struct loader_instance *inst, + struct loader_layer_properties *dst, + struct loader_layer_properties *src) { + uint32_t cnt, i; + memcpy(dst, src, sizeof(*src)); + dst->instance_extension_list.list = + loader_heap_alloc(inst, sizeof(VkExtensionProperties) * + src->instance_extension_list.count, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + dst->instance_extension_list.capacity = + sizeof(VkExtensionProperties) * src->instance_extension_list.count; + memcpy(dst->instance_extension_list.list, src->instance_extension_list.list, + dst->instance_extension_list.capacity); + dst->device_extension_list.list = + loader_heap_alloc(inst, sizeof(struct loader_dev_ext_props) * + src->device_extension_list.count, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + dst->device_extension_list.capacity = + sizeof(struct loader_dev_ext_props) * src->device_extension_list.count; + memcpy(dst->device_extension_list.list, src->device_extension_list.list, + dst->device_extension_list.capacity); + if (src->device_extension_list.count > 0 && + src->device_extension_list.list->entrypoint_count > 0) { + cnt = src->device_extension_list.list->entrypoint_count; + dst->device_extension_list.list->entrypoints = loader_heap_alloc( + inst, sizeof(char *) * cnt, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + for (i = 0; i < cnt; i++) { + dst->device_extension_list.list->entrypoints[i] = loader_heap_alloc( + inst, + strlen(src->device_extension_list.list->entrypoints[i]) + 1, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + strcpy(dst->device_extension_list.list->entrypoints[i], + src->device_extension_list.list->entrypoints[i]); + } + } +} + +static bool +loader_find_layer_name_list(const char *name, + const struct loader_layer_list *layer_list) { + if (!layer_list) + return false; + for (uint32_t j = 0; j < layer_list->count; j++) + if (!strcmp(name, layer_list->list[j].info.layerName)) + return true; + return false; +} + +static bool loader_find_layer_name(const char *name, uint32_t layer_count, + const char **layer_list) { + if (!layer_list) + return false; + for (uint32_t j = 0; j < layer_count; j++) + if (!strcmp(name, layer_list[j])) + return true; + return false; +} + +static bool loader_find_layer_name_array( + const char *name, uint32_t layer_count, + const char layer_list[][VK_MAX_EXTENSION_NAME_SIZE]) { + if (!layer_list) + return false; + for (uint32_t j = 0; j < layer_count; j++) + if (!strcmp(name, layer_list[j])) + return true; + return false; +} + +/** + * Searches through an array of layer names (ppp_layer_names) looking for a + * layer key_name. + * If not found then simply returns updating nothing. + * Otherwise, it uses expand_count, expand_names adding them to layer names. + * Any duplicate (pre-existing) exapand_names in layer names are removed. + * Expand names are added to the back/end of the list of layer names. + * @param inst + * @param layer_count + * @param ppp_layer_names + */ +void loader_expand_layer_names( + const struct loader_instance *inst, const char *key_name, + uint32_t expand_count, + const char expand_names[][VK_MAX_EXTENSION_NAME_SIZE], + uint32_t *layer_count, char ***ppp_layer_names) { + char **pp_layer_names, **pp_src_layers = *ppp_layer_names; + + if (!loader_find_layer_name(key_name, *layer_count, + (const char **)pp_src_layers)) + return; // didn't find the key_name in the list + + // since the total number of layers may expand, allocate new memory for the + // array of pointers + pp_layer_names = + loader_heap_alloc(inst, (expand_count + *layer_count) * sizeof(char *), + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + + loader_log(inst, VK_DEBUG_REPORT_INFORMATION_BIT_EXT, 0, + "Found meta layer %s, replacing with actual layer group", + key_name); + // In place removal of any expand_names found in layer_name (remove + // duplicates) + // Also remove the key_name + uint32_t src_idx, dst_idx, cnt = *layer_count; + for (src_idx = 0; src_idx < *layer_count; src_idx++) { + if (loader_find_layer_name_array(pp_src_layers[src_idx], expand_count, + expand_names)) { + pp_src_layers[src_idx] = NULL; + cnt--; + } else if (!strcmp(pp_src_layers[src_idx], key_name)) { + pp_src_layers[src_idx] = NULL; + cnt--; + } + pp_layer_names[src_idx] = pp_src_layers[src_idx]; + } + for (dst_idx = 0; dst_idx < cnt; dst_idx++) { + if (pp_layer_names[dst_idx] == NULL) { + src_idx = dst_idx + 1; + while (src_idx < *layer_count && pp_src_layers[src_idx] == NULL) + src_idx++; + if (src_idx < *layer_count && pp_src_layers[src_idx] != NULL) + pp_layer_names[dst_idx] = pp_src_layers[src_idx]; + } + } + + // Add the expand_names to layer_names + src_idx = 0; + for (dst_idx = cnt; dst_idx < cnt + expand_count; dst_idx++) { + pp_layer_names[dst_idx] = (char *)&expand_names[src_idx++][0]; + } + *layer_count = expand_count + cnt; + *ppp_layer_names = pp_layer_names; + return; +} + +/** + * Restores the layer name list and count into the pCreatInfo structure. + * If is_device == tru then pCreateInfo is a device structure else an instance + * structure. + * @param layer_count + * @param layer_names + * @param pCreateInfo + */ +void loader_unexpand_dev_layer_names(const struct loader_instance *inst, + uint32_t layer_count, char **layer_names, + char **layer_ptr, + const VkDeviceCreateInfo *pCreateInfo) { + uint32_t *p_cnt = (uint32_t *)&pCreateInfo->enabledLayerCount; + *p_cnt = layer_count; + + char ***p_ptr = (char ***)&pCreateInfo->ppEnabledLayerNames; + if ((char **)pCreateInfo->ppEnabledLayerNames != layer_ptr) + loader_heap_free(inst, (void *)pCreateInfo->ppEnabledLayerNames); + *p_ptr = layer_ptr; + for (uint32_t i = 0; i < layer_count; i++) { + char **pp_str = (char **)&pCreateInfo->ppEnabledLayerNames[i]; + *pp_str = layer_names[i]; + } +} + +void loader_unexpand_inst_layer_names(const struct loader_instance *inst, + uint32_t layer_count, char **layer_names, + char **layer_ptr, + const VkInstanceCreateInfo *pCreateInfo) { + uint32_t *p_cnt = (uint32_t *)&pCreateInfo->enabledLayerCount; + *p_cnt = layer_count; + + char ***p_ptr = (char ***)&pCreateInfo->ppEnabledLayerNames; + if ((char **)pCreateInfo->ppEnabledLayerNames != layer_ptr) + loader_heap_free(inst, (void *)pCreateInfo->ppEnabledLayerNames); + *p_ptr = layer_ptr; + for (uint32_t i = 0; i < layer_count; i++) { + char **pp_str = (char **)&pCreateInfo->ppEnabledLayerNames[i]; + *pp_str = layer_names[i]; + } +} + +/** + * Searches through the existing instance and device layer lists looking for + * the set of required layer names. If found then it adds a meta property to the + * layer list. + * Assumes the required layers are the same for both instance and device lists. + * @param inst + * @param layer_count number of layers in layer_names + * @param layer_names array of required layer names + * @param layer_instance_list + * @param layer_device_list + */ +static void loader_add_layer_property_meta( + const struct loader_instance *inst, uint32_t layer_count, + const char layer_names[][VK_MAX_EXTENSION_NAME_SIZE], + struct loader_layer_list *layer_instance_list, + struct loader_layer_list *layer_device_list) { + uint32_t i, j; + bool found; + struct loader_layer_list *layer_list; + + if (0 == layer_count || + NULL == layer_instance_list || + NULL == layer_device_list || + (layer_count > layer_instance_list->count && + layer_count > layer_device_list->count)) + return; + + for (j = 0; j < 2; j++) { + if (j == 0) + layer_list = layer_instance_list; + else + layer_list = layer_device_list; + found = true; + for (i = 0; i < layer_count; i++) { + if (loader_find_layer_name_list(layer_names[i], layer_list)) + continue; + found = false; + break; + } + + struct loader_layer_properties *props; + if (found) { + props = loader_get_next_layer_property(inst, layer_list); + props->type = VK_LAYER_TYPE_META_EXPLICT; + strncpy(props->info.description, "LunarG Standard Validation Layer", + sizeof(props->info.description)); + props->info.implementationVersion = 1; + strncpy(props->info.layerName, std_validation_str, + sizeof(props->info.layerName)); + // TODO what about specVersion? for now insert loader's built + // version + props->info.specVersion = VK_API_VERSION; + } + } +} + +/** + * Given a cJSON struct (json) of the top level JSON object from layer manifest + * file, add entry to the layer_list. + * Fill out the layer_properties in this list entry from the input cJSON object. + * + * \returns + * void + * layer_list has a new entry and initialized accordingly. + * If the json input object does not have all the required fields no entry + * is added to the list. + */ +static void +loader_add_layer_properties(const struct loader_instance *inst, + struct loader_layer_list *layer_instance_list, + struct loader_layer_list *layer_device_list, + cJSON *json, bool is_implicit, char *filename) { + /* Fields in layer manifest file that are required: + * (required) “file_format_version” + * following are required in the "layer" object: + * (required) "name" + * (required) "type" + * (required) “library_path” + * (required) “api_version” + * (required) “implementation_version” + * (required) “description” + * (required for implicit layers) “disable_environment” + * + * First get all required items and if any missing abort + */ + + cJSON *item, *layer_node, *ext_item; + char *temp; + char *name, *type, *library_path, *api_version; + char *implementation_version, *description; + cJSON *disable_environment; + int i, j; + VkExtensionProperties ext_prop; + item = cJSON_GetObjectItem(json, "file_format_version"); + if (item == NULL) { + return; + } + char *file_vers = cJSON_PrintUnformatted(item); + loader_log(inst, VK_DEBUG_REPORT_INFORMATION_BIT_EXT, 0, + "Found manifest file %s, version %s", filename, file_vers); + if (strcmp(file_vers, "\"1.0.0\"") != 0) + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Unexpected manifest file version (expected 1.0.0), may " + "cause errors"); + loader_tls_heap_free(file_vers); + + layer_node = cJSON_GetObjectItem(json, "layer"); + if (layer_node == NULL) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Can't find \"layer\" object in manifest JSON file, " + "skipping this file"); + return; + } + + // loop through all "layer" objects in the file + do { +#define GET_JSON_OBJECT(node, var) \ + { \ + var = cJSON_GetObjectItem(node, #var); \ + if (var == NULL) { \ + layer_node = layer_node->next; \ + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, \ + "Didn't find required layer object %s in manifest " \ + "JSON file, skipping this layer", \ + #var); \ + continue; \ + } \ + } +#define GET_JSON_ITEM(node, var) \ + { \ + item = cJSON_GetObjectItem(node, #var); \ + if (item == NULL) { \ + layer_node = layer_node->next; \ + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, \ + "Didn't find required layer value %s in manifest JSON " \ + "file, skipping this layer", \ + #var); \ + continue; \ + } \ + temp = cJSON_Print(item); \ + temp[strlen(temp) - 1] = '\0'; \ + var = loader_stack_alloc(strlen(temp) + 1); \ + strcpy(var, &temp[1]); \ + loader_tls_heap_free(temp); \ + } + GET_JSON_ITEM(layer_node, name) + GET_JSON_ITEM(layer_node, type) + GET_JSON_ITEM(layer_node, library_path) + GET_JSON_ITEM(layer_node, api_version) + GET_JSON_ITEM(layer_node, implementation_version) + GET_JSON_ITEM(layer_node, description) + if (is_implicit) { + GET_JSON_OBJECT(layer_node, disable_environment) + } +#undef GET_JSON_ITEM +#undef GET_JSON_OBJECT + + // add list entry + struct loader_layer_properties *props = NULL; + if (!strcmp(type, "DEVICE")) { + if (layer_device_list == NULL) { + layer_node = layer_node->next; + continue; + } + props = loader_get_next_layer_property(inst, layer_device_list); + props->type = (is_implicit) ? VK_LAYER_TYPE_DEVICE_IMPLICIT + : VK_LAYER_TYPE_DEVICE_EXPLICIT; + } + if (!strcmp(type, "INSTANCE")) { + if (layer_instance_list == NULL) { + layer_node = layer_node->next; + continue; + } + props = loader_get_next_layer_property(inst, layer_instance_list); + props->type = (is_implicit) ? VK_LAYER_TYPE_INSTANCE_IMPLICIT + : VK_LAYER_TYPE_INSTANCE_EXPLICIT; + } + if (!strcmp(type, "GLOBAL")) { + if (layer_instance_list != NULL) + props = + loader_get_next_layer_property(inst, layer_instance_list); + else if (layer_device_list != NULL) + props = loader_get_next_layer_property(inst, layer_device_list); + else { + layer_node = layer_node->next; + continue; + } + props->type = (is_implicit) ? VK_LAYER_TYPE_GLOBAL_IMPLICIT + : VK_LAYER_TYPE_GLOBAL_EXPLICIT; + } + + if (props == NULL) { + layer_node = layer_node->next; + continue; + } + + strncpy(props->info.layerName, name, sizeof(props->info.layerName)); + props->info.layerName[sizeof(props->info.layerName) - 1] = '\0'; + + char *fullpath = props->lib_name; + char *rel_base; + if (loader_platform_is_path(library_path)) { + // a relative or absolute path + char *name_copy = loader_stack_alloc(strlen(filename) + 1); + strcpy(name_copy, filename); + rel_base = loader_platform_dirname(name_copy); + loader_expand_path(library_path, rel_base, MAX_STRING_SIZE, + fullpath); + } else { + // a filename which is assumed in a system directory + loader_get_fullpath(library_path, DEFAULT_VK_LAYERS_PATH, + MAX_STRING_SIZE, fullpath); + } + props->info.specVersion = loader_make_version(api_version); + props->info.implementationVersion = atoi(implementation_version); + strncpy((char *)props->info.description, description, + sizeof(props->info.description)); + props->info.description[sizeof(props->info.description) - 1] = '\0'; + if (is_implicit) { + strncpy(props->disable_env_var.name, + disable_environment->child->string, + sizeof(props->disable_env_var.name)); + props->disable_env_var + .name[sizeof(props->disable_env_var.name) - 1] = '\0'; + strncpy(props->disable_env_var.value, + disable_environment->child->valuestring, + sizeof(props->disable_env_var.value)); + props->disable_env_var + .value[sizeof(props->disable_env_var.value) - 1] = '\0'; + } + +/** + * Now get all optional items and objects and put in list: + * functions + * instance_extensions + * device_extensions + * enable_environment (implicit layers only) + */ +#define GET_JSON_OBJECT(node, var) \ + { var = cJSON_GetObjectItem(node, #var); } +#define GET_JSON_ITEM(node, var) \ + { \ + item = cJSON_GetObjectItem(node, #var); \ + if (item != NULL) { \ + temp = cJSON_Print(item); \ + temp[strlen(temp) - 1] = '\0'; \ + var = loader_stack_alloc(strlen(temp) + 1); \ + strcpy(var, &temp[1]); \ + loader_tls_heap_free(temp); \ + } \ + } + + cJSON *instance_extensions, *device_extensions, *functions, + *enable_environment; + cJSON *entrypoints; + char *vkGetInstanceProcAddr, *vkGetDeviceProcAddr, *spec_version; + char **entry_array; + vkGetInstanceProcAddr = NULL; + vkGetDeviceProcAddr = NULL; + spec_version = NULL; + entrypoints = NULL; + entry_array = NULL; + /** + * functions + * vkGetInstanceProcAddr + * vkGetDeviceProcAddr + */ + GET_JSON_OBJECT(layer_node, functions) + if (functions != NULL) { + GET_JSON_ITEM(functions, vkGetInstanceProcAddr) + GET_JSON_ITEM(functions, vkGetDeviceProcAddr) + if (vkGetInstanceProcAddr != NULL) + strncpy(props->functions.str_gipa, vkGetInstanceProcAddr, + sizeof(props->functions.str_gipa)); + props->functions.str_gipa[sizeof(props->functions.str_gipa) - 1] = + '\0'; + if (vkGetDeviceProcAddr != NULL) + strncpy(props->functions.str_gdpa, vkGetDeviceProcAddr, + sizeof(props->functions.str_gdpa)); + props->functions.str_gdpa[sizeof(props->functions.str_gdpa) - 1] = + '\0'; + } + /** + * instance_extensions + * array of + * name + * spec_version + */ + GET_JSON_OBJECT(layer_node, instance_extensions) + if (instance_extensions != NULL) { + int count = cJSON_GetArraySize(instance_extensions); + for (i = 0; i < count; i++) { + ext_item = cJSON_GetArrayItem(instance_extensions, i); + GET_JSON_ITEM(ext_item, name) + GET_JSON_ITEM(ext_item, spec_version) + if (name != NULL) { + strncpy(ext_prop.extensionName, name, + sizeof(ext_prop.extensionName)); + ext_prop.extensionName[sizeof(ext_prop.extensionName) - 1] = + '\0'; + } + ext_prop.specVersion = atoi(spec_version); + loader_add_to_ext_list(inst, &props->instance_extension_list, 1, + &ext_prop); + } + } + /** + * device_extensions + * array of + * name + * spec_version + * entrypoints + */ + GET_JSON_OBJECT(layer_node, device_extensions) + if (device_extensions != NULL) { + int count = cJSON_GetArraySize(device_extensions); + for (i = 0; i < count; i++) { + ext_item = cJSON_GetArrayItem(device_extensions, i); + GET_JSON_ITEM(ext_item, name) + GET_JSON_ITEM(ext_item, spec_version) + if (name != NULL) { + strncpy(ext_prop.extensionName, name, + sizeof(ext_prop.extensionName)); + ext_prop.extensionName[sizeof(ext_prop.extensionName) - 1] = + '\0'; + } + ext_prop.specVersion = atoi(spec_version); + // entrypoints = cJSON_GetObjectItem(ext_item, "entrypoints"); + GET_JSON_OBJECT(ext_item, entrypoints) + int entry_count; + if (entrypoints == NULL) { + loader_add_to_dev_ext_list(inst, + &props->device_extension_list, + &ext_prop, 0, NULL); + continue; + } + entry_count = cJSON_GetArraySize(entrypoints); + if (entry_count) + entry_array = (char **)loader_stack_alloc(sizeof(char *) * + entry_count); + for (j = 0; j < entry_count; j++) { + ext_item = cJSON_GetArrayItem(entrypoints, j); + if (ext_item != NULL) { + temp = cJSON_Print(ext_item); + temp[strlen(temp) - 1] = '\0'; + entry_array[j] = loader_stack_alloc(strlen(temp) + 1); + strcpy(entry_array[j], &temp[1]); + loader_tls_heap_free(temp); + } + } + loader_add_to_dev_ext_list(inst, &props->device_extension_list, + &ext_prop, entry_count, entry_array); + } + } + if (is_implicit) { + GET_JSON_OBJECT(layer_node, enable_environment) + + // enable_environment is optional + if (enable_environment) { + strncpy(props->enable_env_var.name, + enable_environment->child->string, + sizeof(props->enable_env_var.name)); + props->enable_env_var + .name[sizeof(props->enable_env_var.name) - 1] = '\0'; + strncpy(props->enable_env_var.value, + enable_environment->child->valuestring, + sizeof(props->enable_env_var.value)); + props->enable_env_var + .value[sizeof(props->enable_env_var.value) - 1] = '\0'; + } + } +#undef GET_JSON_ITEM +#undef GET_JSON_OBJECT + // for global layers need to add them to both device and instance list + if (!strcmp(type, "GLOBAL")) { + struct loader_layer_properties *dev_props; + if (layer_instance_list == NULL || layer_device_list == NULL) { + layer_node = layer_node->next; + continue; + } + dev_props = loader_get_next_layer_property(inst, layer_device_list); + // copy into device layer list + loader_copy_layer_properties(inst, dev_props, props); + } + layer_node = layer_node->next; + } while (layer_node != NULL); + return; +} + +/** + * Find the Vulkan library manifest files. + * + * This function scans the location or env_override directories/files + * for a list of JSON manifest files. If env_override is non-NULL + * and has a valid value. Then the location is ignored. Otherwise + * location is used to look for manifest files. The location + * is interpreted as Registry path on Windows and a directory path(s) + * on Linux. + * + * \returns + * A string list of manifest files to be opened in out_files param. + * List has a pointer to string for each manifest filename. + * When done using the list in out_files, pointers should be freed. + * Location or override string lists can be either files or directories as + *follows: + * | location | override + * -------------------------------- + * Win ICD | files | files + * Win Layer | files | dirs + * Linux ICD | dirs | files + * Linux Layer| dirs | dirs + */ +static void loader_get_manifest_files(const struct loader_instance *inst, + const char *env_override, bool is_layer, + const char *location, + struct loader_manifest_files *out_files) { + char *override = NULL; + char *loc; + char *file, *next_file, *name; + size_t alloced_count = 64; + char full_path[2048]; + DIR *sysdir = NULL; + bool list_is_dirs = false; + struct dirent *dent; + + out_files->count = 0; + out_files->filename_list = NULL; + + if (env_override != NULL && (override = loader_getenv(env_override))) { +#if !defined(_WIN32) + if (geteuid() != getuid()) { + /* Don't allow setuid apps to use the env var: */ + loader_free_getenv(override); + override = NULL; + } +#endif + } + + if (location == NULL) { + loader_log( + inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Can't get manifest files with NULL location, env_override=%s", + env_override); + return; + } + +#if defined(_WIN32) + list_is_dirs = (is_layer && override != NULL) ? true : false; +#else + list_is_dirs = (override == NULL || is_layer) ? true : false; +#endif + // Make a copy of the input we are using so it is not modified + // Also handle getting the location(s) from registry on Windows + if (override == NULL) { + loc = loader_stack_alloc(strlen(location) + 1); + if (loc == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory can't get manifest files"); + return; + } + strcpy(loc, location); +#if defined(_WIN32) + loc = loader_get_registry_files(inst, loc); + if (loc == NULL) { + if (!is_layer) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Registry lookup failed can't get ICD manifest " + "files, do you have a Vulkan driver installed"); + } else { + // warning only for layers + loader_log( + inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Registry lookup failed can't get layer manifest files"); + } + return; + } +#endif + } else { + loc = loader_stack_alloc(strlen(override) + 1); + if (loc == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory can't get manifest files"); + return; + } + strcpy(loc, override); + loader_free_getenv(override); + } + + // Print out the paths being searched if debugging is enabled + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Searching the following paths for manifest files: %s\n", loc); + + file = loc; + while (*file) { + next_file = loader_get_next_path(file); + if (list_is_dirs) { + sysdir = opendir(file); + name = NULL; + if (sysdir) { + dent = readdir(sysdir); + if (dent == NULL) + break; + name = &(dent->d_name[0]); + loader_get_fullpath(name, file, sizeof(full_path), full_path); + name = full_path; + } + } else { +#if defined(_WIN32) + name = file; +#else + // only Linux has relative paths + char *dir; + // make a copy of location so it isn't modified + dir = loader_stack_alloc(strlen(loc) + 1); + if (dir == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory can't get manifest files"); + return; + } + strcpy(dir, loc); + + loader_get_fullpath(file, dir, sizeof(full_path), full_path); + + name = full_path; +#endif + } + while (name) { + /* Look for files ending with ".json" suffix */ + uint32_t nlen = (uint32_t)strlen(name); + const char *suf = name + nlen - 5; + if ((nlen > 5) && !strncmp(suf, ".json", 5)) { + if (out_files->count == 0) { + out_files->filename_list = + loader_heap_alloc(inst, alloced_count * sizeof(char *), + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + } else if (out_files->count == alloced_count) { + out_files->filename_list = + loader_heap_realloc(inst, out_files->filename_list, + alloced_count * sizeof(char *), + alloced_count * sizeof(char *) * 2, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + alloced_count *= 2; + } + if (out_files->filename_list == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory can't alloc manifest file list"); + return; + } + out_files->filename_list[out_files->count] = loader_heap_alloc( + inst, strlen(name) + 1, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (out_files->filename_list[out_files->count] == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Out of memory can't get manifest files"); + return; + } + strcpy(out_files->filename_list[out_files->count], name); + out_files->count++; + } else if (!list_is_dirs) { + loader_log( + inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Skipping manifest file %s, file name must end in .json", + name); + } + if (list_is_dirs) { + dent = readdir(sysdir); + if (dent == NULL) + break; + name = &(dent->d_name[0]); + loader_get_fullpath(name, file, sizeof(full_path), full_path); + name = full_path; + } else { + break; + } + } + if (sysdir) + closedir(sysdir); + file = next_file; + } + return; +} + +void loader_init_icd_lib_list() {} + +void loader_destroy_icd_lib_list() {} +/** + * Try to find the Vulkan ICD driver(s). + * + * This function scans the default system loader path(s) or path + * specified by the \c VK_ICD_FILENAMES environment variable in + * order to find loadable VK ICDs manifest files. From these + * manifest files it finds the ICD libraries. + * + * \returns + * a list of icds that were discovered + */ +void loader_icd_scan(const struct loader_instance *inst, + struct loader_icd_libs *icds) { + char *file_str; + struct loader_manifest_files manifest_files; + + loader_scanned_icd_init(inst, icds); + // Get a list of manifest files for ICDs + loader_get_manifest_files(inst, "VK_ICD_FILENAMES", false, + DEFAULT_VK_DRIVERS_INFO, &manifest_files); + if (manifest_files.count == 0) + return; + loader_platform_thread_lock_mutex(&loader_json_lock); + for (uint32_t i = 0; i < manifest_files.count; i++) { + file_str = manifest_files.filename_list[i]; + if (file_str == NULL) + continue; + + cJSON *json; + json = loader_get_json(inst, file_str); + if (!json) + continue; + cJSON *item, *itemICD; + item = cJSON_GetObjectItem(json, "file_format_version"); + if (item == NULL) { + loader_platform_thread_unlock_mutex(&loader_json_lock); + return; + } + char *file_vers = cJSON_Print(item); + loader_log(inst, VK_DEBUG_REPORT_INFORMATION_BIT_EXT, 0, + "Found manifest file %s, version %s", file_str, file_vers); + if (strcmp(file_vers, "\"1.0.0\"") != 0) + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Unexpected manifest file version (expected 1.0.0), may " + "cause errors"); + loader_tls_heap_free(file_vers); + itemICD = cJSON_GetObjectItem(json, "ICD"); + if (itemICD != NULL) { + item = cJSON_GetObjectItem(itemICD, "library_path"); + if (item != NULL) { + char *temp = cJSON_Print(item); + if (!temp || strlen(temp) == 0) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Can't find \"library_path\" in ICD JSON file " + "%s, skipping", + file_str); + loader_tls_heap_free(temp); + loader_heap_free(inst, file_str); + cJSON_Delete(json); + continue; + } + // strip out extra quotes + temp[strlen(temp) - 1] = '\0'; + char *library_path = loader_stack_alloc(strlen(temp) + 1); + strcpy(library_path, &temp[1]); + loader_tls_heap_free(temp); + if (!library_path || strlen(library_path) == 0) { + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Can't find \"library_path\" in ICD JSON file " + "%s, skipping", + file_str); + loader_heap_free(inst, file_str); + cJSON_Delete(json); + continue; + } + char fullpath[MAX_STRING_SIZE]; + // Print out the paths being searched if debugging is enabled + loader_log( + inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Searching for ICD drivers named %s default dir %s\n", + library_path, DEFAULT_VK_DRIVERS_PATH); + if (loader_platform_is_path(library_path)) { + // a relative or absolute path + char *name_copy = loader_stack_alloc(strlen(file_str) + 1); + char *rel_base; + strcpy(name_copy, file_str); + rel_base = loader_platform_dirname(name_copy); + loader_expand_path(library_path, rel_base, sizeof(fullpath), + fullpath); + } else { + // a filename which is assumed in a system directory + loader_get_fullpath(library_path, DEFAULT_VK_DRIVERS_PATH, + sizeof(fullpath), fullpath); + } + + uint32_t vers = 0; + item = cJSON_GetObjectItem(itemICD, "api_version"); + if (item != NULL) { + temp = cJSON_Print(item); + vers = loader_make_version(temp); + loader_tls_heap_free(temp); + } + loader_scanned_icd_add(inst, icds, fullpath, vers); + } else + loader_log(inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Can't find \"library_path\" object in ICD JSON " + "file %s, skipping", + file_str); + } else + loader_log( + inst, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, + "Can't find \"ICD\" object in ICD JSON file %s, skipping", + file_str); + + loader_heap_free(inst, file_str); + cJSON_Delete(json); + } + loader_heap_free(inst, manifest_files.filename_list); + loader_platform_thread_unlock_mutex(&loader_json_lock); +} + +void loader_layer_scan(const struct loader_instance *inst, + struct loader_layer_list *instance_layers, + struct loader_layer_list *device_layers) { + char *file_str; + struct loader_manifest_files + manifest_files[2]; // [0] = explicit, [1] = implicit + cJSON *json; + uint32_t i; + uint32_t implicit; + + // Get a list of manifest files for explicit layers + loader_get_manifest_files(inst, LAYERS_PATH_ENV, true, + DEFAULT_VK_ELAYERS_INFO, &manifest_files[0]); + // Pass NULL for environment variable override - implicit layers are not + // overridden by LAYERS_PATH_ENV + loader_get_manifest_files(inst, NULL, true, DEFAULT_VK_ILAYERS_INFO, + &manifest_files[1]); + if (manifest_files[0].count == 0 && manifest_files[1].count == 0) + return; + +#if 0 // TODO + /** + * We need a list of the layer libraries, not just a list of + * the layer properties (a layer library could expose more than + * one layer property). This list of scanned layers would be + * used to check for global and physicaldevice layer properties. + */ + if (!loader_init_layer_library_list(&loader.scanned_layer_libraries)) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Alloc for layer list failed: %s line: %d", __FILE__, __LINE__); + return; + } +#endif + + /* cleanup any previously scanned libraries */ + loader_delete_layer_properties(inst, instance_layers); + loader_delete_layer_properties(inst, device_layers); + + loader_platform_thread_lock_mutex(&loader_json_lock); + for (implicit = 0; implicit < 2; implicit++) { + for (i = 0; i < manifest_files[implicit].count; i++) { + file_str = manifest_files[implicit].filename_list[i]; + if (file_str == NULL) + continue; + + // parse file into JSON struct + json = loader_get_json(inst, file_str); + if (!json) { + continue; + } + + // TODO error if device layers expose instance_extensions + // TODO error if instance layers expose device extensions + loader_add_layer_properties(inst, instance_layers, device_layers, + json, (implicit == 1), file_str); + + loader_heap_free(inst, file_str); + cJSON_Delete(json); + } + } + if (manifest_files[0].count != 0) + loader_heap_free(inst, manifest_files[0].filename_list); + + if (manifest_files[1].count != 0) + loader_heap_free(inst, manifest_files[1].filename_list); + + // add a meta layer for validation if the validation layers are all present + loader_add_layer_property_meta( + inst, sizeof(std_validation_names) / sizeof(std_validation_names[0]), + std_validation_names, instance_layers, device_layers); + + loader_platform_thread_unlock_mutex(&loader_json_lock); +} + +static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +loader_gpa_instance_internal(VkInstance inst, const char *pName) { + if (!strcmp(pName, "vkGetInstanceProcAddr")) + return (void *)loader_gpa_instance_internal; + if (!strcmp(pName, "vkCreateInstance")) + return (void *)loader_CreateInstance; + if (!strcmp(pName, "vkCreateDevice")) + return (void *)loader_create_device_terminator; + + // inst is not wrapped + if (inst == VK_NULL_HANDLE) { + return NULL; + } + VkLayerInstanceDispatchTable *disp_table = + *(VkLayerInstanceDispatchTable **)inst; + void *addr; + + if (disp_table == NULL) + return NULL; + + addr = loader_lookup_instance_dispatch_table(disp_table, pName); + if (addr) { + return addr; + } + + if (disp_table->GetInstanceProcAddr == NULL) { + return NULL; + } + return disp_table->GetInstanceProcAddr(inst, pName); +} + +/** + * Initialize device_ext dispatch table entry as follows: + * If dev == NULL find all logical devices created within this instance and + * init the entry (given by idx) in the ext dispatch table. + * If dev != NULL only initialize the entry in the given dev's dispatch table. + * The initialization value is gotten by calling down the device chain with + * GDPA. + * If GDPA returns NULL then don't initialize the dispatch table entry. + */ +static void loader_init_dispatch_dev_ext_entry(struct loader_instance *inst, + struct loader_device *dev, + uint32_t idx, + const char *funcName) + +{ + void *gdpa_value; + if (dev != NULL) { + gdpa_value = dev->loader_dispatch.core_dispatch.GetDeviceProcAddr( + dev->device, funcName); + if (gdpa_value != NULL) + dev->loader_dispatch.ext_dispatch.DevExt[idx] = + (PFN_vkDevExt)gdpa_value; + } else { + for (uint32_t i = 0; i < inst->total_icd_count; i++) { + struct loader_icd *icd = &inst->icds[i]; + struct loader_device *dev = icd->logical_device_list; + while (dev) { + gdpa_value = + dev->loader_dispatch.core_dispatch.GetDeviceProcAddr( + dev->device, funcName); + if (gdpa_value != NULL) + dev->loader_dispatch.ext_dispatch.DevExt[idx] = + (PFN_vkDevExt)gdpa_value; + dev = dev->next; + } + } + } +} + +/** + * Find all dev extension in the hash table and initialize the dispatch table + * for dev for each of those extension entrypoints found in hash table. + + */ +static void loader_init_dispatch_dev_ext(struct loader_instance *inst, + struct loader_device *dev) { + for (uint32_t i = 0; i < MAX_NUM_DEV_EXTS; i++) { + if (inst->disp_hash[i].func_name != NULL) + loader_init_dispatch_dev_ext_entry(inst, dev, i, + inst->disp_hash[i].func_name); + } +} + +static bool loader_check_icds_for_address(struct loader_instance *inst, + const char *funcName) { + struct loader_icd *icd; + icd = inst->icds; + while (icd) { + if (icd->this_icd_lib->GetInstanceProcAddr(icd->instance, funcName)) + // this icd supports funcName + return true; + icd = icd->next; + } + + return false; +} + +static void loader_free_dev_ext_table(struct loader_instance *inst) { + for (uint32_t i = 0; i < MAX_NUM_DEV_EXTS; i++) { + loader_heap_free(inst, inst->disp_hash[i].func_name); + loader_heap_free(inst, inst->disp_hash[i].list.index); + } + memset(inst->disp_hash, 0, sizeof(inst->disp_hash)); +} + +static bool loader_add_dev_ext_table(struct loader_instance *inst, + uint32_t *ptr_idx, const char *funcName) { + uint32_t i; + uint32_t idx = *ptr_idx; + struct loader_dispatch_hash_list *list = &inst->disp_hash[idx].list; + + if (!inst->disp_hash[idx].func_name) { + // no entry here at this idx, so use it + assert(list->capacity == 0); + inst->disp_hash[idx].func_name = (char *)loader_heap_alloc( + inst, strlen(funcName) + 1, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (inst->disp_hash[idx].func_name == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "loader_add_dev_ext_table() can't allocate memory for " + "func_name"); + return false; + } + strncpy(inst->disp_hash[idx].func_name, funcName, strlen(funcName) + 1); + return true; + } + + // check for enough capacity + if (list->capacity == 0) { + list->index = loader_heap_alloc(inst, 8 * sizeof(*(list->index)), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (list->index == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "loader_add_dev_ext_table() can't allocate list memory"); + return false; + } + list->capacity = 8 * sizeof(*(list->index)); + } else if (list->capacity < (list->count + 1) * sizeof(*(list->index))) { + list->index = loader_heap_realloc(inst, list->index, list->capacity, + list->capacity * 2, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (list->index == NULL) { + loader_log( + inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "loader_add_dev_ext_table() can't reallocate list memory"); + return false; + } + list->capacity *= 2; + } + + // find an unused index in the hash table and use it + i = (idx + 1) % MAX_NUM_DEV_EXTS; + do { + if (!inst->disp_hash[i].func_name) { + assert(inst->disp_hash[i].list.capacity == 0); + inst->disp_hash[i].func_name = + (char *)loader_heap_alloc(inst, strlen(funcName) + 1, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (inst->disp_hash[i].func_name == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "loader_add_dev_ext_table() can't rallocate " + "func_name memory"); + return false; + } + strncpy(inst->disp_hash[i].func_name, funcName, + strlen(funcName) + 1); + list->index[list->count] = i; + list->count++; + *ptr_idx = i; + return true; + } + i = (i + 1) % MAX_NUM_DEV_EXTS; + } while (i != idx); + + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "loader_add_dev_ext_table() couldn't insert into hash table; is " + "it full?"); + return false; +} + +static bool loader_name_in_dev_ext_table(struct loader_instance *inst, + uint32_t *idx, const char *funcName) { + uint32_t alt_idx; + if (inst->disp_hash[*idx].func_name && + !strcmp(inst->disp_hash[*idx].func_name, funcName)) + return true; + + // funcName wasn't at the primary spot in the hash table + // search the list of secondary locations (shallow search, not deep search) + for (uint32_t i = 0; i < inst->disp_hash[*idx].list.count; i++) { + alt_idx = inst->disp_hash[*idx].list.index[i]; + if (!strcmp(inst->disp_hash[*idx].func_name, funcName)) { + *idx = alt_idx; + return true; + } + } + + return false; +} + +/** + * This function returns generic trampoline code address for unknown entry + * points. + * Presumably, these unknown entry points (as given by funcName) are device + * extension entrypoints. A hash table is used to keep a list of unknown entry + * points and their mapping to the device extension dispatch table + * (struct loader_dev_ext_dispatch_table). + * \returns + * For a given entry point string (funcName), if an existing mapping is found + * the + * trampoline address for that mapping is returned. Otherwise, this unknown + * entry point + * has not been seen yet. Next check if a layer or ICD supports it. If so then + * a + * new entry in the hash table is initialized and that trampoline address for + * the new entry is returned. Null is returned if the hash table is full or + * if no discovered layer or ICD returns a non-NULL GetProcAddr for it. + */ +void *loader_dev_ext_gpa(struct loader_instance *inst, const char *funcName) { + uint32_t idx; + uint32_t seed = 0; + + idx = murmurhash(funcName, strlen(funcName), seed) % MAX_NUM_DEV_EXTS; + + if (loader_name_in_dev_ext_table(inst, &idx, funcName)) + // found funcName already in hash + return loader_get_dev_ext_trampoline(idx); + + // Check if funcName is supported in either ICDs or a layer library + if (!loader_check_icds_for_address(inst, funcName)) { + // TODO Add check in layer libraries for support of address + // if support found in layers continue on + return NULL; + } + + if (loader_add_dev_ext_table(inst, &idx, funcName)) { + // successfully added new table entry + // init any dev dispatch table entrys as needed + loader_init_dispatch_dev_ext_entry(inst, NULL, idx, funcName); + return loader_get_dev_ext_trampoline(idx); + } + + return NULL; +} + +struct loader_instance *loader_get_instance(const VkInstance instance) { + /* look up the loader_instance in our list by comparing dispatch tables, as + * there is no guarantee the instance is still a loader_instance* after any + * layers which wrap the instance object. + */ + const VkLayerInstanceDispatchTable *disp; + struct loader_instance *ptr_instance = NULL; + disp = loader_get_instance_dispatch(instance); + for (struct loader_instance *inst = loader.instances; inst; + inst = inst->next) { + if (inst->disp == disp) { + ptr_instance = inst; + break; + } + } + return ptr_instance; +} + +static loader_platform_dl_handle +loader_add_layer_lib(const struct loader_instance *inst, const char *chain_type, + struct loader_layer_properties *layer_prop) { + struct loader_lib_info *new_layer_lib_list, *my_lib; + size_t new_alloc_size; + /* + * TODO: We can now track this information in the + * scanned_layer_libraries list. + */ + for (uint32_t i = 0; i < loader.loaded_layer_lib_count; i++) { + if (strcmp(loader.loaded_layer_lib_list[i].lib_name, + layer_prop->lib_name) == 0) { + /* Have already loaded this library, just increment ref count */ + loader.loaded_layer_lib_list[i].ref_count++; + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "%s Chain: Increment layer reference count for layer " + "library %s", + chain_type, layer_prop->lib_name); + return loader.loaded_layer_lib_list[i].lib_handle; + } + } + + /* Haven't seen this library so load it */ + new_alloc_size = 0; + if (loader.loaded_layer_lib_capacity == 0) + new_alloc_size = 8 * sizeof(struct loader_lib_info); + else if (loader.loaded_layer_lib_capacity <= + loader.loaded_layer_lib_count * sizeof(struct loader_lib_info)) + new_alloc_size = loader.loaded_layer_lib_capacity * 2; + + if (new_alloc_size) { + new_layer_lib_list = loader_heap_realloc( + inst, loader.loaded_layer_lib_list, + loader.loaded_layer_lib_capacity, new_alloc_size, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!new_layer_lib_list) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "loader: realloc failed in loader_add_layer_lib"); + return NULL; + } + loader.loaded_layer_lib_capacity = new_alloc_size; + loader.loaded_layer_lib_list = new_layer_lib_list; + } else + new_layer_lib_list = loader.loaded_layer_lib_list; + my_lib = &new_layer_lib_list[loader.loaded_layer_lib_count]; + + strncpy(my_lib->lib_name, layer_prop->lib_name, sizeof(my_lib->lib_name)); + my_lib->lib_name[sizeof(my_lib->lib_name) - 1] = '\0'; + my_lib->ref_count = 0; + my_lib->lib_handle = NULL; + + if ((my_lib->lib_handle = loader_platform_open_library(my_lib->lib_name)) == + NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + loader_platform_open_library_error(my_lib->lib_name)); + return NULL; + } else { + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Chain: %s: Loading layer library %s", chain_type, + layer_prop->lib_name); + } + loader.loaded_layer_lib_count++; + my_lib->ref_count++; + + return my_lib->lib_handle; +} + +static void +loader_remove_layer_lib(struct loader_instance *inst, + struct loader_layer_properties *layer_prop) { + uint32_t idx = loader.loaded_layer_lib_count; + struct loader_lib_info *new_layer_lib_list, *my_lib = NULL; + + for (uint32_t i = 0; i < loader.loaded_layer_lib_count; i++) { + if (strcmp(loader.loaded_layer_lib_list[i].lib_name, + layer_prop->lib_name) == 0) { + /* found matching library */ + idx = i; + my_lib = &loader.loaded_layer_lib_list[i]; + break; + } + } + + if (idx == loader.loaded_layer_lib_count) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Unable to unref library %s", layer_prop->lib_name); + return; + } + + if (my_lib) { + my_lib->ref_count--; + if (my_lib->ref_count > 0) { + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Decrement reference count for layer library %s", + layer_prop->lib_name); + return; + } + } + loader_platform_close_library(my_lib->lib_handle); + loader_log(inst, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Unloading layer library %s", layer_prop->lib_name); + + /* Need to remove unused library from list */ + new_layer_lib_list = + loader_heap_alloc(inst, loader.loaded_layer_lib_capacity, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!new_layer_lib_list) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "loader: heap alloc failed loader_remove_layer_library"); + return; + } + + if (idx > 0) { + /* Copy records before idx */ + memcpy(new_layer_lib_list, &loader.loaded_layer_lib_list[0], + sizeof(struct loader_lib_info) * idx); + } + if (idx < (loader.loaded_layer_lib_count - 1)) { + /* Copy records after idx */ + memcpy(&new_layer_lib_list[idx], &loader.loaded_layer_lib_list[idx + 1], + sizeof(struct loader_lib_info) * + (loader.loaded_layer_lib_count - idx - 1)); + } + + loader_heap_free(inst, loader.loaded_layer_lib_list); + loader.loaded_layer_lib_count--; + loader.loaded_layer_lib_list = new_layer_lib_list; +} + +/** + * Go through the search_list and find any layers which match type. If layer + * type match is found in then add it to ext_list. + */ +static void +loader_add_layer_implicit(const struct loader_instance *inst, + const enum layer_type type, + struct loader_layer_list *list, + const struct loader_layer_list *search_list) { + bool enable; + char *env_value; + uint32_t i; + for (i = 0; i < search_list->count; i++) { + const struct loader_layer_properties *prop = &search_list->list[i]; + if (prop->type & type) { + /* Found an implicit layer, see if it should be enabled */ + enable = false; + + // if no enable_environment variable is specified, this implicit + // layer + // should always be enabled. Otherwise check if the variable is set + if (prop->enable_env_var.name[0] == 0) { + enable = true; + } else { + env_value = loader_getenv(prop->enable_env_var.name); + if (env_value && !strcmp(prop->enable_env_var.value, env_value)) + enable = true; + loader_free_getenv(env_value); + } + + // disable_environment has priority, i.e. if both enable and disable + // environment variables are set, the layer is disabled. Implicit + // layers + // are required to have a disable_environment variables + env_value = loader_getenv(prop->disable_env_var.name); + if (env_value) + enable = false; + loader_free_getenv(env_value); + + if (enable) + loader_add_to_layer_list(inst, list, 1, prop); + } + } +} + +/** + * Get the layer name(s) from the env_name environment variable. If layer + * is found in search_list then add it to layer_list. But only add it to + * layer_list if type matches. + */ +static void loader_add_layer_env(const struct loader_instance *inst, + const enum layer_type type, + const char *env_name, + struct loader_layer_list *layer_list, + const struct loader_layer_list *search_list) { + char *layerEnv; + char *next, *name; + + layerEnv = loader_getenv(env_name); + if (layerEnv == NULL) { + return; + } + name = loader_stack_alloc(strlen(layerEnv) + 1); + if (name == NULL) { + return; + } + strcpy(name, layerEnv); + + loader_free_getenv(layerEnv); + + while (name && *name) { + next = loader_get_next_path(name); + if (!strcmp(std_validation_str, name)) { + /* add meta list of layers + don't attempt to remove duplicate layers already added by app or + env var + */ + loader_log(inst, VK_DEBUG_REPORT_INFORMATION_BIT_EXT, 0, + "Expanding meta layer %s found in environment variable", + std_validation_str); + for (uint32_t i = 0; i < sizeof(std_validation_names) / + sizeof(std_validation_names[0]); + i++) { + loader_find_layer_name_add_list(inst, std_validation_names[i], + type, search_list, layer_list); + } + } else { + loader_find_layer_name_add_list(inst, name, type, search_list, + layer_list); + } + name = next; + } + + return; +} + +void loader_deactivate_instance_layers(struct loader_instance *instance) { + /* Create instance chain of enabled layers */ + for (uint32_t i = 0; i < instance->activated_layer_list.count; i++) { + struct loader_layer_properties *layer_prop = + &instance->activated_layer_list.list[i]; + + loader_remove_layer_lib(instance, layer_prop); + } + loader_destroy_layer_list(instance, &instance->activated_layer_list); +} + +VkResult +loader_enable_instance_layers(struct loader_instance *inst, + const VkInstanceCreateInfo *pCreateInfo, + const struct loader_layer_list *instance_layers) { + VkResult err; + + assert(inst && "Cannot have null instance"); + + if (!loader_init_layer_list(inst, &inst->activated_layer_list)) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to alloc Instance activated layer list"); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* Add any implicit layers first */ + loader_add_layer_implicit(inst, VK_LAYER_TYPE_INSTANCE_IMPLICIT, + &inst->activated_layer_list, instance_layers); + + /* Add any layers specified via environment variable next */ + loader_add_layer_env(inst, VK_LAYER_TYPE_INSTANCE_EXPLICIT, + "VK_INSTANCE_LAYERS", &inst->activated_layer_list, + instance_layers); + + /* Add layers specified by the application */ + err = loader_add_layer_names_to_list( + inst, &inst->activated_layer_list, pCreateInfo->enabledLayerCount, + pCreateInfo->ppEnabledLayerNames, instance_layers); + + return err; +} + +/* + * Given the list of layers to activate in the loader_instance + * structure. This function will add a VkLayerInstanceCreateInfo + * structure to the VkInstanceCreateInfo.pNext pointer. + * Each activated layer will have it's own VkLayerInstanceLink + * structure that tells the layer what Get*ProcAddr to call to + * get function pointers to the next layer down. + * Once the chain info has been created this function will + * execute the CreateInstance call chain. Each layer will + * then have an opportunity in it's CreateInstance function + * to setup it's dispatch table when the lower layer returns + * successfully. + * Each layer can wrap or not-wrap the returned VkInstance object + * as it sees fit. + * The instance chain is terminated by a loader function + * that will call CreateInstance on all available ICD's and + * cache those VkInstance objects for future use. + */ +VkResult loader_create_instance_chain(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + struct loader_instance *inst, + VkInstance *created_instance) { + uint32_t activated_layers = 0; + VkLayerInstanceCreateInfo chain_info; + VkLayerInstanceLink *layer_instance_link_info = NULL; + VkInstanceCreateInfo loader_create_info; + VkResult res; + + PFN_vkGetInstanceProcAddr nextGIPA = loader_gpa_instance_internal; + PFN_vkGetInstanceProcAddr fpGIPA = loader_gpa_instance_internal; + + memcpy(&loader_create_info, pCreateInfo, sizeof(VkInstanceCreateInfo)); + + if (inst->activated_layer_list.count > 0) { + + chain_info.u.pLayerInfo = NULL; + chain_info.pNext = pCreateInfo->pNext; + chain_info.sType = VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO; + chain_info.function = VK_LAYER_LINK_INFO; + loader_create_info.pNext = &chain_info; + + layer_instance_link_info = loader_stack_alloc( + sizeof(VkLayerInstanceLink) * inst->activated_layer_list.count); + if (!layer_instance_link_info) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to alloc Instance objects for layer"); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* Create instance chain of enabled layers */ + for (int32_t i = inst->activated_layer_list.count - 1; i >= 0; i--) { + struct loader_layer_properties *layer_prop = + &inst->activated_layer_list.list[i]; + loader_platform_dl_handle lib_handle; + + lib_handle = loader_add_layer_lib(inst, "instance", layer_prop); + if (!lib_handle) + continue; + if ((fpGIPA = layer_prop->functions.get_instance_proc_addr) == + NULL) { + if (layer_prop->functions.str_gipa == NULL || + strlen(layer_prop->functions.str_gipa) == 0) { + fpGIPA = (PFN_vkGetInstanceProcAddr) + loader_platform_get_proc_address( + lib_handle, "vkGetInstanceProcAddr"); + layer_prop->functions.get_instance_proc_addr = fpGIPA; + } else + fpGIPA = (PFN_vkGetInstanceProcAddr) + loader_platform_get_proc_address( + lib_handle, layer_prop->functions.str_gipa); + if (!fpGIPA) { + loader_log( + inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to find vkGetInstanceProcAddr in layer %s", + layer_prop->lib_name); + continue; + } + } + + layer_instance_link_info[activated_layers].pNext = + chain_info.u.pLayerInfo; + layer_instance_link_info[activated_layers] + .pfnNextGetInstanceProcAddr = nextGIPA; + chain_info.u.pLayerInfo = + &layer_instance_link_info[activated_layers]; + nextGIPA = fpGIPA; + + loader_log(inst, VK_DEBUG_REPORT_INFORMATION_BIT_EXT, 0, + "Insert instance layer %s (%s)", + layer_prop->info.layerName, layer_prop->lib_name); + + activated_layers++; + } + } + + PFN_vkCreateInstance fpCreateInstance = + (PFN_vkCreateInstance)nextGIPA(*created_instance, "vkCreateInstance"); + if (fpCreateInstance) { + VkLayerInstanceCreateInfo instance_create_info; + + instance_create_info.sType = + VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO; + instance_create_info.function = VK_LAYER_INSTANCE_INFO; + + instance_create_info.u.instanceInfo.instance_info = inst; + instance_create_info.u.instanceInfo.pfnNextGetInstanceProcAddr = + nextGIPA; + + instance_create_info.pNext = loader_create_info.pNext; + loader_create_info.pNext = &instance_create_info; + + res = + fpCreateInstance(&loader_create_info, pAllocator, created_instance); + } else { + // Couldn't find CreateInstance function! + res = VK_ERROR_INITIALIZATION_FAILED; + } + + if (res != VK_SUCCESS) { + // TODO: Need to clean up here + } else { + loader_init_instance_core_dispatch_table(inst->disp, nextGIPA, + *created_instance); + } + + return res; +} + +void loader_activate_instance_layer_extensions(struct loader_instance *inst, + VkInstance created_inst) { + + loader_init_instance_extension_dispatch_table( + inst->disp, inst->disp->GetInstanceProcAddr, created_inst); +} + +static VkResult +loader_enable_device_layers(const struct loader_instance *inst, + struct loader_icd *icd, + struct loader_layer_list *activated_layer_list, + const VkDeviceCreateInfo *pCreateInfo, + const struct loader_layer_list *device_layers) + +{ + VkResult err; + + assert(activated_layer_list && "Cannot have null output layer list"); + + if (activated_layer_list->list == NULL || + activated_layer_list->capacity == 0) { + loader_init_layer_list(inst, activated_layer_list); + } + + if (activated_layer_list->list == NULL) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to alloc device activated layer list"); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* Add any implicit layers first */ + loader_add_layer_implicit(inst, VK_LAYER_TYPE_DEVICE_IMPLICIT, + activated_layer_list, device_layers); + + /* Add any layers specified via environment variable next */ + loader_add_layer_env(inst, VK_LAYER_TYPE_DEVICE_EXPLICIT, + "VK_DEVICE_LAYERS", activated_layer_list, + device_layers); + + /* Add layers specified by the application */ + err = loader_add_layer_names_to_list( + inst, activated_layer_list, pCreateInfo->enabledLayerCount, + pCreateInfo->ppEnabledLayerNames, device_layers); + + return err; +} + +VKAPI_ATTR VkResult VKAPI_CALL +loader_create_device_terminator(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) { + struct loader_physical_device *phys_dev; + phys_dev = loader_get_physical_device(physicalDevice); + + VkLayerDeviceCreateInfo *chain_info = + (VkLayerDeviceCreateInfo *)pCreateInfo->pNext; + while (chain_info && + !(chain_info->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && + chain_info->function == VK_LAYER_DEVICE_INFO)) { + chain_info = (VkLayerDeviceCreateInfo *)chain_info->pNext; + } + assert(chain_info != NULL); + + struct loader_device *dev = + (struct loader_device *)chain_info->u.deviceInfo.device_info; + PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = + chain_info->u.deviceInfo.pfnNextGetInstanceProcAddr; + PFN_vkCreateDevice fpCreateDevice = + (PFN_vkCreateDevice)fpGetInstanceProcAddr(phys_dev->this_icd->instance, + "vkCreateDevice"); + if (fpCreateDevice == NULL) { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VkDeviceCreateInfo localCreateInfo; + memcpy(&localCreateInfo, pCreateInfo, sizeof(localCreateInfo)); + localCreateInfo.pNext = loader_strip_create_extensions(pCreateInfo->pNext); + + /* + * NOTE: Need to filter the extensions to only those + * supported by the ICD. + * No ICD will advertise support for layers. An ICD + * library could support a layer, but it would be + * independent of the actual ICD, just in the same library. + */ + char **filtered_extension_names = NULL; + filtered_extension_names = + loader_stack_alloc(pCreateInfo->enabledExtensionCount * sizeof(char *)); + if (!filtered_extension_names) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + localCreateInfo.enabledLayerCount = 0; + localCreateInfo.ppEnabledLayerNames = NULL; + + localCreateInfo.enabledExtensionCount = 0; + localCreateInfo.ppEnabledExtensionNames = + (const char *const *)filtered_extension_names; + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + const char *extension_name = pCreateInfo->ppEnabledExtensionNames[i]; + VkExtensionProperties *prop = get_extension_property( + extension_name, &phys_dev->device_extension_cache); + if (prop) { + filtered_extension_names[localCreateInfo.enabledExtensionCount] = + (char *)extension_name; + localCreateInfo.enabledExtensionCount++; + } + } + + VkDevice localDevice; + // TODO: Why does fpCreateDevice behave differently than + // this_icd->CreateDevice? + // VkResult res = fpCreateDevice(phys_dev->phys_dev, &localCreateInfo, + // pAllocator, &localDevice); + VkResult res = phys_dev->this_icd->CreateDevice( + phys_dev->phys_dev, &localCreateInfo, pAllocator, &localDevice); + + if (res != VK_SUCCESS) { + return res; + } + + *pDevice = localDevice; + + /* Init dispatch pointer in new device object */ + loader_init_dispatch(*pDevice, &dev->loader_dispatch); + + return res; +} + +VkResult loader_create_device_chain(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + struct loader_instance *inst, + struct loader_icd *icd, + struct loader_device *dev) { + uint32_t activated_layers = 0; + VkLayerDeviceLink *layer_device_link_info; + VkLayerDeviceCreateInfo chain_info; + VkLayerDeviceCreateInfo device_info; + VkDeviceCreateInfo loader_create_info; + VkResult res; + + PFN_vkGetDeviceProcAddr fpGDPA, nextGDPA = icd->GetDeviceProcAddr; + PFN_vkGetInstanceProcAddr fpGIPA, nextGIPA = loader_gpa_instance_internal; + + memcpy(&loader_create_info, pCreateInfo, sizeof(VkDeviceCreateInfo)); + + chain_info.sType = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO; + chain_info.function = VK_LAYER_LINK_INFO; + chain_info.u.pLayerInfo = NULL; + chain_info.pNext = pCreateInfo->pNext; + + layer_device_link_info = loader_stack_alloc( + sizeof(VkLayerDeviceLink) * dev->activated_layer_list.count); + if (!layer_device_link_info) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to alloc Device objects for layer"); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* + * This structure is used by loader_create_device_terminator + * so that it can intialize the device dispatch table pointer + * in the device object returned by the ICD. Without this + * structure the code wouldn't know where the loader's device_info + * structure is located. + */ + device_info.sType = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO; + device_info.function = VK_LAYER_DEVICE_INFO; + device_info.pNext = &chain_info; + device_info.u.deviceInfo.device_info = dev; + device_info.u.deviceInfo.pfnNextGetInstanceProcAddr = + icd->this_icd_lib->GetInstanceProcAddr; + + loader_create_info.pNext = &device_info; + + if (dev->activated_layer_list.count > 0) { + /* Create instance chain of enabled layers */ + for (int32_t i = dev->activated_layer_list.count - 1; i >= 0; i--) { + struct loader_layer_properties *layer_prop = + &dev->activated_layer_list.list[i]; + loader_platform_dl_handle lib_handle; + + lib_handle = loader_add_layer_lib(inst, "device", layer_prop); + if (!lib_handle) + continue; + if ((fpGIPA = layer_prop->functions.get_instance_proc_addr) == + NULL) { + if (layer_prop->functions.str_gipa == NULL || + strlen(layer_prop->functions.str_gipa) == 0) { + fpGIPA = (PFN_vkGetInstanceProcAddr) + loader_platform_get_proc_address( + lib_handle, "vkGetInstanceProcAddr"); + layer_prop->functions.get_instance_proc_addr = fpGIPA; + } else + fpGIPA = (PFN_vkGetInstanceProcAddr) + loader_platform_get_proc_address( + lib_handle, layer_prop->functions.str_gipa); + if (!fpGIPA) { + loader_log( + inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to find vkGetInstanceProcAddr in layer %s", + layer_prop->lib_name); + continue; + } + } + if ((fpGDPA = layer_prop->functions.get_device_proc_addr) == NULL) { + if (layer_prop->functions.str_gdpa == NULL || + strlen(layer_prop->functions.str_gdpa) == 0) { + fpGDPA = (PFN_vkGetDeviceProcAddr) + loader_platform_get_proc_address(lib_handle, + "vkGetDeviceProcAddr"); + layer_prop->functions.get_device_proc_addr = fpGDPA; + } else + fpGDPA = (PFN_vkGetDeviceProcAddr) + loader_platform_get_proc_address( + lib_handle, layer_prop->functions.str_gdpa); + if (!fpGDPA) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Failed to find vkGetDeviceProcAddr in layer %s", + layer_prop->lib_name); + continue; + } + } + + layer_device_link_info[activated_layers].pNext = + chain_info.u.pLayerInfo; + layer_device_link_info[activated_layers] + .pfnNextGetInstanceProcAddr = nextGIPA; + layer_device_link_info[activated_layers].pfnNextGetDeviceProcAddr = + nextGDPA; + chain_info.u.pLayerInfo = &layer_device_link_info[activated_layers]; + nextGIPA = fpGIPA; + nextGDPA = fpGDPA; + + loader_log(inst, VK_DEBUG_REPORT_INFORMATION_BIT_EXT, 0, + "Insert device layer %s (%s)", + layer_prop->info.layerName, layer_prop->lib_name); + + activated_layers++; + } + } + + PFN_vkCreateDevice fpCreateDevice = + (PFN_vkCreateDevice)nextGIPA((VkInstance)inst, "vkCreateDevice"); + if (fpCreateDevice) { + res = fpCreateDevice(physicalDevice, &loader_create_info, pAllocator, + &dev->device); + } else { + // Couldn't find CreateDevice function! + return VK_ERROR_INITIALIZATION_FAILED; + } + + /* Initialize device dispatch table */ + loader_init_device_dispatch_table(&dev->loader_dispatch, nextGDPA, + dev->device); + + return res; +} + +VkResult loader_validate_layers(const struct loader_instance *inst, + const uint32_t layer_count, + const char *const *ppEnabledLayerNames, + const struct loader_layer_list *list) { + struct loader_layer_properties *prop; + + for (uint32_t i = 0; i < layer_count; i++) { + VkStringErrorFlags result = + vk_string_validate(MaxLoaderStringLength, ppEnabledLayerNames[i]); + if (result != VK_STRING_ERROR_NONE) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Loader: Device ppEnabledLayerNames contains string " + "that is too long or is badly formed"); + return VK_ERROR_LAYER_NOT_PRESENT; + } + + prop = loader_get_layer_property(ppEnabledLayerNames[i], list); + if (!prop) { + return VK_ERROR_LAYER_NOT_PRESENT; + } + } + return VK_SUCCESS; +} + +VkResult loader_validate_instance_extensions( + const struct loader_instance *inst, + const struct loader_extension_list *icd_exts, + const struct loader_layer_list *instance_layer, + const VkInstanceCreateInfo *pCreateInfo) { + + VkExtensionProperties *extension_prop; + struct loader_layer_properties *layer_prop; + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + VkStringErrorFlags result = vk_string_validate( + MaxLoaderStringLength, pCreateInfo->ppEnabledExtensionNames[i]); + if (result != VK_STRING_ERROR_NONE) { + loader_log(inst, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Loader: Instance ppEnabledExtensionNames contains " + "string that is too long or is badly formed"); + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + + extension_prop = get_extension_property( + pCreateInfo->ppEnabledExtensionNames[i], icd_exts); + + if (extension_prop) { + continue; + } + + extension_prop = NULL; + + /* Not in global list, search layer extension lists */ + for (uint32_t j = 0; j < pCreateInfo->enabledLayerCount; j++) { + layer_prop = loader_get_layer_property( + pCreateInfo->ppEnabledLayerNames[i], instance_layer); + if (!layer_prop) { + /* Should NOT get here, loader_validate_layers + * should have already filtered this case out. + */ + continue; + } + + extension_prop = + get_extension_property(pCreateInfo->ppEnabledExtensionNames[i], + &layer_prop->instance_extension_list); + if (extension_prop) { + /* Found the extension in one of the layers enabled by the app. + */ + break; + } + } + + if (!extension_prop) { + /* Didn't find extension name in any of the global layers, error out + */ + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + return VK_SUCCESS; +} + +VkResult loader_validate_device_extensions( + struct loader_physical_device *phys_dev, + const struct loader_layer_list *activated_device_layers, + const VkDeviceCreateInfo *pCreateInfo) { + VkExtensionProperties *extension_prop; + struct loader_layer_properties *layer_prop; + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + + VkStringErrorFlags result = vk_string_validate( + MaxLoaderStringLength, pCreateInfo->ppEnabledExtensionNames[i]); + if (result != VK_STRING_ERROR_NONE) { + loader_log(phys_dev->this_instance, VK_DEBUG_REPORT_ERROR_BIT_EXT, + 0, "Loader: Device ppEnabledExtensionNames contains " + "string that is too long or is badly formed"); + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + + const char *extension_name = pCreateInfo->ppEnabledExtensionNames[i]; + extension_prop = get_extension_property( + extension_name, &phys_dev->device_extension_cache); + + if (extension_prop) { + continue; + } + + /* Not in global list, search activated layer extension lists */ + for (uint32_t j = 0; j < activated_device_layers->count; j++) { + layer_prop = &activated_device_layers->list[j]; + + extension_prop = get_dev_extension_property( + extension_name, &layer_prop->device_extension_list); + if (extension_prop) { + /* Found the extension in one of the layers enabled by the app. + */ + break; + } + } + + if (!extension_prop) { + /* Didn't find extension name in any of the device layers, error out + */ + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) { + struct loader_icd *icd; + VkExtensionProperties *prop; + char **filtered_extension_names = NULL; + VkInstanceCreateInfo icd_create_info; + VkResult res = VK_SUCCESS; + bool success = false; + + VkLayerInstanceCreateInfo *chain_info = + (VkLayerInstanceCreateInfo *)pCreateInfo->pNext; + while ( + chain_info && + !(chain_info->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && + chain_info->function == VK_LAYER_INSTANCE_INFO)) { + chain_info = (VkLayerInstanceCreateInfo *)chain_info->pNext; + } + assert(chain_info != NULL); + + struct loader_instance *ptr_instance = + (struct loader_instance *)chain_info->u.instanceInfo.instance_info; + memcpy(&icd_create_info, pCreateInfo, sizeof(icd_create_info)); + + icd_create_info.enabledLayerCount = 0; + icd_create_info.ppEnabledLayerNames = NULL; + + // strip off the VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO entries + icd_create_info.pNext = loader_strip_create_extensions(pCreateInfo->pNext); + + /* + * NOTE: Need to filter the extensions to only those + * supported by the ICD. + * No ICD will advertise support for layers. An ICD + * library could support a layer, but it would be + * independent of the actual ICD, just in the same library. + */ + filtered_extension_names = + loader_stack_alloc(pCreateInfo->enabledExtensionCount * sizeof(char *)); + if (!filtered_extension_names) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + icd_create_info.ppEnabledExtensionNames = + (const char *const *)filtered_extension_names; + + for (uint32_t i = 0; i < ptr_instance->icd_libs.count; i++) { + icd = loader_icd_add(ptr_instance, &ptr_instance->icd_libs.list[i]); + if (icd) { + icd_create_info.enabledExtensionCount = 0; + struct loader_extension_list icd_exts; + + loader_log(ptr_instance, VK_DEBUG_REPORT_DEBUG_BIT_EXT, 0, + "Build ICD instance extension list"); + // traverse scanned icd list adding non-duplicate extensions to the + // list + loader_init_generic_list(ptr_instance, + (struct loader_generic_list *)&icd_exts, + sizeof(VkExtensionProperties)); + loader_add_instance_extensions( + ptr_instance, + icd->this_icd_lib->EnumerateInstanceExtensionProperties, + icd->this_icd_lib->lib_name, &icd_exts); + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + prop = get_extension_property( + pCreateInfo->ppEnabledExtensionNames[i], &icd_exts); + if (prop) { + filtered_extension_names[icd_create_info + .enabledExtensionCount] = + (char *)pCreateInfo->ppEnabledExtensionNames[i]; + icd_create_info.enabledExtensionCount++; + } + } + + loader_destroy_generic_list( + ptr_instance, (struct loader_generic_list *)&icd_exts); + + res = ptr_instance->icd_libs.list[i].CreateInstance( + &icd_create_info, pAllocator, &(icd->instance)); + if (res == VK_SUCCESS) + success = loader_icd_init_entrys( + icd, icd->instance, + ptr_instance->icd_libs.list[i].GetInstanceProcAddr); + + if (res != VK_SUCCESS || !success) { + ptr_instance->icds = ptr_instance->icds->next; + loader_icd_destroy(ptr_instance, icd); + loader_log(ptr_instance, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "ICD ignored: failed to CreateInstance and find " + "entrypoints with ICD"); + } + } + } + + /* + * If no ICDs were added to instance list and res is unchanged + * from it's initial value, the loader was unable to find + * a suitable ICD. + */ + if (ptr_instance->icds == NULL) { + if (res == VK_SUCCESS) { + return VK_ERROR_INCOMPATIBLE_DRIVER; + } else { + return res; + } + } + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +loader_DestroyInstance(VkInstance instance, + const VkAllocationCallbacks *pAllocator) { + struct loader_instance *ptr_instance = loader_instance(instance); + struct loader_icd *icds = ptr_instance->icds; + struct loader_icd *next_icd; + + // Remove this instance from the list of instances: + struct loader_instance *prev = NULL; + struct loader_instance *next = loader.instances; + while (next != NULL) { + if (next == ptr_instance) { + // Remove this instance from the list: + if (prev) + prev->next = next->next; + else + loader.instances = next->next; + break; + } + prev = next; + next = next->next; + } + + while (icds) { + if (icds->instance) { + icds->DestroyInstance(icds->instance, pAllocator); + } + next_icd = icds->next; + icds->instance = VK_NULL_HANDLE; + loader_icd_destroy(ptr_instance, icds); + + icds = next_icd; + } + loader_delete_layer_properties(ptr_instance, + &ptr_instance->device_layer_list); + loader_delete_layer_properties(ptr_instance, + &ptr_instance->instance_layer_list); + loader_scanned_icd_clear(ptr_instance, &ptr_instance->icd_libs); + loader_destroy_generic_list( + ptr_instance, (struct loader_generic_list *)&ptr_instance->ext_list); + for (uint32_t i = 0; i < ptr_instance->total_gpu_count; i++) + loader_destroy_generic_list( + ptr_instance, + (struct loader_generic_list *)&ptr_instance->phys_devs[i] + .device_extension_cache); + loader_heap_free(ptr_instance, ptr_instance->phys_devs); + loader_free_dev_ext_table(ptr_instance); +} + +VkResult +loader_init_physical_device_info(struct loader_instance *ptr_instance) { + struct loader_icd *icd; + uint32_t i, j, idx, count = 0; + VkResult res; + struct loader_phys_dev_per_icd *phys_devs; + + ptr_instance->total_gpu_count = 0; + phys_devs = (struct loader_phys_dev_per_icd *)loader_stack_alloc( + sizeof(struct loader_phys_dev_per_icd) * ptr_instance->total_icd_count); + if (!phys_devs) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + icd = ptr_instance->icds; + for (i = 0; i < ptr_instance->total_icd_count; i++) { + assert(icd); + res = icd->EnumeratePhysicalDevices(icd->instance, &phys_devs[i].count, + NULL); + if (res != VK_SUCCESS) + return res; + count += phys_devs[i].count; + icd = icd->next; + } + + ptr_instance->phys_devs = + (struct loader_physical_device *)loader_heap_alloc( + ptr_instance, count * sizeof(struct loader_physical_device), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!ptr_instance->phys_devs) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + icd = ptr_instance->icds; + + struct loader_physical_device *inst_phys_devs = ptr_instance->phys_devs; + idx = 0; + for (i = 0; i < ptr_instance->total_icd_count; i++) { + assert(icd); + + phys_devs[i].phys_devs = (VkPhysicalDevice *)loader_stack_alloc( + phys_devs[i].count * sizeof(VkPhysicalDevice)); + if (!phys_devs[i].phys_devs) { + loader_heap_free(ptr_instance, ptr_instance->phys_devs); + ptr_instance->phys_devs = NULL; + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + res = icd->EnumeratePhysicalDevices( + icd->instance, &(phys_devs[i].count), phys_devs[i].phys_devs); + if ((res == VK_SUCCESS)) { + ptr_instance->total_gpu_count += phys_devs[i].count; + for (j = 0; j < phys_devs[i].count; j++) { + + // initialize the loader's physicalDevice object + loader_set_dispatch((void *)&inst_phys_devs[idx], + ptr_instance->disp); + inst_phys_devs[idx].this_instance = ptr_instance; + inst_phys_devs[idx].this_icd = icd; + inst_phys_devs[idx].phys_dev = phys_devs[i].phys_devs[j]; + memset(&inst_phys_devs[idx].device_extension_cache, 0, + sizeof(struct loader_extension_list)); + + idx++; + } + } else { + loader_heap_free(ptr_instance, ptr_instance->phys_devs); + ptr_instance->phys_devs = NULL; + return res; + } + + icd = icd->next; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +loader_EnumeratePhysicalDevices(VkInstance instance, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) { + uint32_t i; + uint32_t copy_count = 0; + struct loader_instance *ptr_instance = (struct loader_instance *)instance; + VkResult res = VK_SUCCESS; + + if (ptr_instance->total_gpu_count == 0) { + res = loader_init_physical_device_info(ptr_instance); + } + + *pPhysicalDeviceCount = ptr_instance->total_gpu_count; + if (!pPhysicalDevices) { + return res; + } + + copy_count = (ptr_instance->total_gpu_count < *pPhysicalDeviceCount) + ? ptr_instance->total_gpu_count + : *pPhysicalDeviceCount; + for (i = 0; i < copy_count; i++) { + pPhysicalDevices[i] = (VkPhysicalDevice)&ptr_instance->phys_devs[i]; + } + *pPhysicalDeviceCount = copy_count; + + if (copy_count < ptr_instance->total_gpu_count) { + return VK_INCOMPLETE; + } + + return res; +} + +VKAPI_ATTR void VKAPI_CALL +loader_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties *pProperties) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + if (icd->GetPhysicalDeviceProperties) + icd->GetPhysicalDeviceProperties(phys_dev->phys_dev, pProperties); +} + +VKAPI_ATTR void VKAPI_CALL loader_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties *pProperties) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + if (icd->GetPhysicalDeviceQueueFamilyProperties) + icd->GetPhysicalDeviceQueueFamilyProperties( + phys_dev->phys_dev, pQueueFamilyPropertyCount, pProperties); +} + +VKAPI_ATTR void VKAPI_CALL loader_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties *pProperties) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + if (icd->GetPhysicalDeviceMemoryProperties) + icd->GetPhysicalDeviceMemoryProperties(phys_dev->phys_dev, pProperties); +} + +VKAPI_ATTR void VKAPI_CALL +loader_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures *pFeatures) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + if (icd->GetPhysicalDeviceFeatures) + icd->GetPhysicalDeviceFeatures(phys_dev->phys_dev, pFeatures); +} + +VKAPI_ATTR void VKAPI_CALL +loader_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties *pFormatInfo) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + if (icd->GetPhysicalDeviceFormatProperties) + icd->GetPhysicalDeviceFormatProperties(phys_dev->phys_dev, format, + pFormatInfo); +} + +VKAPI_ATTR VkResult VKAPI_CALL loader_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, + VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, + VkImageFormatProperties *pImageFormatProperties) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + if (!icd->GetPhysicalDeviceImageFormatProperties) + return VK_ERROR_INITIALIZATION_FAILED; + + return icd->GetPhysicalDeviceImageFormatProperties( + phys_dev->phys_dev, format, type, tiling, usage, flags, + pImageFormatProperties); +} + +VKAPI_ATTR void VKAPI_CALL loader_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, + VkSampleCountFlagBits samples, VkImageUsageFlags usage, + VkImageTiling tiling, uint32_t *pNumProperties, + VkSparseImageFormatProperties *pProperties) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + if (icd->GetPhysicalDeviceSparseImageFormatProperties) + icd->GetPhysicalDeviceSparseImageFormatProperties( + phys_dev->phys_dev, format, type, samples, usage, tiling, + pNumProperties, pProperties); +} + +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) { + struct loader_physical_device *phys_dev; + struct loader_icd *icd; + struct loader_device *dev; + struct loader_instance *inst; + struct loader_layer_list activated_layer_list = {0}; + VkResult res; + + assert(pCreateInfo->queueCreateInfoCount >= 1); + + // TODO this only works for one physical device per instance + // once CreateDevice layer bootstrapping is done via DeviceCreateInfo + // hopefully don't need this anymore in trampoline code + phys_dev = loader_get_physical_device(physicalDevice); + icd = phys_dev->this_icd; + if (!icd) + return VK_ERROR_INITIALIZATION_FAILED; + + inst = phys_dev->this_instance; + + if (!icd->CreateDevice) { + return VK_ERROR_INITIALIZATION_FAILED; + } + + /* validate any app enabled layers are available */ + if (pCreateInfo->enabledLayerCount > 0) { + res = loader_validate_layers(inst, pCreateInfo->enabledLayerCount, + pCreateInfo->ppEnabledLayerNames, + &inst->device_layer_list); + if (res != VK_SUCCESS) { + return res; + } + } + + /* Get the physical device extensions if they haven't been retrieved yet */ + if (phys_dev->device_extension_cache.capacity == 0) { + if (!loader_init_generic_list( + inst, + (struct loader_generic_list *)&phys_dev->device_extension_cache, + sizeof(VkExtensionProperties))) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + res = loader_add_device_extensions( + inst, icd, phys_dev->phys_dev, + phys_dev->this_icd->this_icd_lib->lib_name, + &phys_dev->device_extension_cache); + if (res != VK_SUCCESS) { + return res; + } + } + + /* convert any meta layers to the actual layers makes a copy of layer name*/ + uint32_t saved_layer_count = pCreateInfo->enabledLayerCount; + char **saved_layer_names; + char **saved_layer_ptr; + saved_layer_names = + loader_stack_alloc(sizeof(char *) * pCreateInfo->enabledLayerCount); + for (uint32_t i = 0; i < saved_layer_count; i++) { + saved_layer_names[i] = (char *)pCreateInfo->ppEnabledLayerNames[i]; + } + saved_layer_ptr = (char **)pCreateInfo->ppEnabledLayerNames; + + loader_expand_layer_names( + inst, std_validation_str, + sizeof(std_validation_names) / sizeof(std_validation_names[0]), + std_validation_names, (uint32_t *)&pCreateInfo->enabledLayerCount, + (char ***)&pCreateInfo->ppEnabledLayerNames); + + /* fetch a list of all layers activated, explicit and implicit */ + res = loader_enable_device_layers(inst, icd, &activated_layer_list, + pCreateInfo, &inst->device_layer_list); + if (res != VK_SUCCESS) { + loader_unexpand_dev_layer_names(inst, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + return res; + } + + /* make sure requested extensions to be enabled are supported */ + res = loader_validate_device_extensions(phys_dev, &activated_layer_list, + pCreateInfo); + if (res != VK_SUCCESS) { + loader_unexpand_dev_layer_names(inst, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_destroy_generic_list( + inst, (struct loader_generic_list *)&activated_layer_list); + return res; + } + + dev = loader_add_logical_device(inst, &icd->logical_device_list); + if (dev == NULL) { + loader_unexpand_dev_layer_names(inst, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_destroy_generic_list( + inst, (struct loader_generic_list *)&activated_layer_list); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* move the locally filled layer list into the device, and pass ownership of + * the memory */ + dev->activated_layer_list.capacity = activated_layer_list.capacity; + dev->activated_layer_list.count = activated_layer_list.count; + dev->activated_layer_list.list = activated_layer_list.list; + memset(&activated_layer_list, 0, sizeof(activated_layer_list)); + + /* activate any layers on device chain which terminates with device*/ + res = loader_enable_device_layers(inst, icd, &dev->activated_layer_list, + pCreateInfo, &inst->device_layer_list); + if (res != VK_SUCCESS) { + loader_unexpand_dev_layer_names(inst, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_remove_logical_device(inst, icd, dev); + return res; + } + + res = loader_create_device_chain(physicalDevice, pCreateInfo, pAllocator, + inst, icd, dev); + if (res != VK_SUCCESS) { + loader_unexpand_dev_layer_names(inst, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_remove_logical_device(inst, icd, dev); + return res; + } + + *pDevice = dev->device; + + /* initialize any device extension dispatch entry's from the instance list*/ + loader_init_dispatch_dev_ext(inst, dev); + + /* initialize WSI device extensions as part of core dispatch since loader + * has + * dedicated trampoline code for these*/ + loader_init_device_extension_dispatch_table( + &dev->loader_dispatch, + dev->loader_dispatch.core_dispatch.GetDeviceProcAddr, *pDevice); + + loader_unexpand_dev_layer_names(inst, saved_layer_count, saved_layer_names, + saved_layer_ptr, pCreateInfo); + return res; +} + +/** + * Get an instance level or global level entry point address. + * @param instance + * @param pName + * @return + * If instance == NULL returns a global level functions only + * If instance is valid returns a trampoline entry point for all dispatchable + * Vulkan + * functions both core and extensions. + */ +LOADER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vkGetInstanceProcAddr(VkInstance instance, const char *pName) { + + void *addr; + + addr = globalGetProcAddr(pName); + if (instance == VK_NULL_HANDLE) { + // get entrypoint addresses that are global (no dispatchable object) + + return addr; + } else { + // if a global entrypoint return NULL + if (addr) + return NULL; + } + + struct loader_instance *ptr_instance = loader_get_instance(instance); + if (ptr_instance == NULL) + return NULL; + // Return trampoline code for non-global entrypoints including any + // extensions. + // Device extensions are returned if a layer or ICD supports the extension. + // Instance extensions are returned if the extension is enabled and the + // loader + // or someone else supports the extension + return trampolineGetProcAddr(ptr_instance, pName); +} + +/** + * Get a device level or global level entry point address. + * @param device + * @param pName + * @return + * If device is valid, returns a device relative entry point for device level + * entry points both core and extensions. + * Device relative means call down the device chain. + */ +LOADER_EXPORT VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vkGetDeviceProcAddr(VkDevice device, const char *pName) { + void *addr; + + /* for entrypoints that loader must handle (ie non-dispatchable or create + object) + make sure the loader entrypoint is returned */ + addr = loader_non_passthrough_gdpa(pName); + if (addr) { + return addr; + } + + /* Although CreateDevice is on device chain it's dispatchable object isn't + * a VkDevice or child of VkDevice so return NULL. + */ + if (!strcmp(pName, "CreateDevice")) + return NULL; + + /* return the dispatch table entrypoint for the fastest case */ + const VkLayerDispatchTable *disp_table = *(VkLayerDispatchTable **)device; + if (disp_table == NULL) + return NULL; + + addr = loader_lookup_device_dispatch_table(disp_table, pName); + if (addr) + return addr; + + if (disp_table->GetDeviceProcAddr == NULL) + return NULL; + return disp_table->GetDeviceProcAddr(device, pName); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkEnumerateInstanceExtensionProperties(const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) { + struct loader_extension_list *global_ext_list = NULL; + struct loader_layer_list instance_layers; + struct loader_extension_list icd_extensions; + struct loader_icd_libs icd_libs; + uint32_t copy_size; + + tls_instance = NULL; + memset(&icd_extensions, 0, sizeof(icd_extensions)); + memset(&instance_layers, 0, sizeof(instance_layers)); + loader_platform_thread_once(&once_init, loader_initialize); + + /* get layer libraries if needed */ + if (pLayerName && strlen(pLayerName) != 0) { + if (vk_string_validate(MaxLoaderStringLength, pLayerName) == + VK_STRING_ERROR_NONE) { + loader_layer_scan(NULL, &instance_layers, NULL); + for (uint32_t i = 0; i < instance_layers.count; i++) { + struct loader_layer_properties *props = + &instance_layers.list[i]; + if (strcmp(props->info.layerName, pLayerName) == 0) { + global_ext_list = &props->instance_extension_list; + } + } + } else { + assert(VK_FALSE && "vkEnumerateInstanceExtensionProperties: " + "pLayerName is too long or is badly formed"); + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } else { + /* Scan/discover all ICD libraries */ + memset(&icd_libs, 0, sizeof(struct loader_icd_libs)); + loader_icd_scan(NULL, &icd_libs); + /* get extensions from all ICD's, merge so no duplicates */ + loader_get_icd_loader_instance_extensions(NULL, &icd_libs, + &icd_extensions); + loader_scanned_icd_clear(NULL, &icd_libs); + global_ext_list = &icd_extensions; + } + + if (global_ext_list == NULL) { + loader_destroy_layer_list(NULL, &instance_layers); + return VK_ERROR_LAYER_NOT_PRESENT; + } + + if (pProperties == NULL) { + *pPropertyCount = global_ext_list->count; + loader_destroy_layer_list(NULL, &instance_layers); + loader_destroy_generic_list( + NULL, (struct loader_generic_list *)&icd_extensions); + return VK_SUCCESS; + } + + copy_size = *pPropertyCount < global_ext_list->count + ? *pPropertyCount + : global_ext_list->count; + for (uint32_t i = 0; i < copy_size; i++) { + memcpy(&pProperties[i], &global_ext_list->list[i], + sizeof(VkExtensionProperties)); + } + *pPropertyCount = copy_size; + loader_destroy_generic_list(NULL, + (struct loader_generic_list *)&icd_extensions); + + if (copy_size < global_ext_list->count) { + loader_destroy_layer_list(NULL, &instance_layers); + return VK_INCOMPLETE; + } + + loader_destroy_layer_list(NULL, &instance_layers); + return VK_SUCCESS; +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkEnumerateInstanceLayerProperties(uint32_t *pPropertyCount, + VkLayerProperties *pProperties) { + + struct loader_layer_list instance_layer_list; + tls_instance = NULL; + + loader_platform_thread_once(&once_init, loader_initialize); + + uint32_t copy_size; + + /* get layer libraries */ + memset(&instance_layer_list, 0, sizeof(instance_layer_list)); + loader_layer_scan(NULL, &instance_layer_list, NULL); + + if (pProperties == NULL) { + *pPropertyCount = instance_layer_list.count; + loader_destroy_layer_list(NULL, &instance_layer_list); + return VK_SUCCESS; + } + + copy_size = (*pPropertyCount < instance_layer_list.count) + ? *pPropertyCount + : instance_layer_list.count; + for (uint32_t i = 0; i < copy_size; i++) { + memcpy(&pProperties[i], &instance_layer_list.list[i].info, + sizeof(VkLayerProperties)); + } + + *pPropertyCount = copy_size; + loader_destroy_layer_list(NULL, &instance_layer_list); + + if (copy_size < instance_layer_list.count) { + return VK_INCOMPLETE; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +loader_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) { + struct loader_physical_device *phys_dev; + uint32_t copy_size; + + uint32_t count; + struct loader_device_extension_list *dev_ext_list = NULL; + struct loader_layer_list implicit_layer_list; + + // TODO fix this aliases physical devices + phys_dev = loader_get_physical_device(physicalDevice); + + /* get layer libraries if needed */ + if (pLayerName && strlen(pLayerName) != 0) { + if (vk_string_validate(MaxLoaderStringLength, pLayerName) == + VK_STRING_ERROR_NONE) { + for (uint32_t i = 0; + i < phys_dev->this_instance->device_layer_list.count; i++) { + struct loader_layer_properties *props = + &phys_dev->this_instance->device_layer_list.list[i]; + if (strcmp(props->info.layerName, pLayerName) == 0) { + dev_ext_list = &props->device_extension_list; + } + } + count = (dev_ext_list == NULL) ? 0 : dev_ext_list->count; + if (pProperties == NULL) { + *pPropertyCount = count; + return VK_SUCCESS; + } + + copy_size = *pPropertyCount < count ? *pPropertyCount : count; + for (uint32_t i = 0; i < copy_size; i++) { + memcpy(&pProperties[i], &dev_ext_list->list[i].props, + sizeof(VkExtensionProperties)); + } + *pPropertyCount = copy_size; + + if (copy_size < count) { + return VK_INCOMPLETE; + } + } else { + loader_log(phys_dev->this_instance, VK_DEBUG_REPORT_ERROR_BIT_EXT, + 0, "vkEnumerateDeviceExtensionProperties: pLayerName " + "is too long or is badly formed"); + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + return VK_SUCCESS; + } else { + /* this case is during the call down the instance chain with pLayerName + * == NULL*/ + struct loader_icd *icd = phys_dev->this_icd; + uint32_t icd_ext_count = *pPropertyCount; + VkResult res; + + /* get device extensions */ + res = icd->EnumerateDeviceExtensionProperties( + phys_dev->phys_dev, NULL, &icd_ext_count, pProperties); + if (res != VK_SUCCESS) + return res; + + loader_init_layer_list(phys_dev->this_instance, &implicit_layer_list); + + loader_add_layer_implicit( + phys_dev->this_instance, VK_LAYER_TYPE_INSTANCE_IMPLICIT, + &implicit_layer_list, + &phys_dev->this_instance->instance_layer_list); + /* we need to determine which implicit layers are active, + * and then add their extensions. This can't be cached as + * it depends on results of environment variables (which can change). + */ + if (pProperties != NULL) { + /* initialize dev_extension list within the physicalDevice object */ + res = loader_init_device_extensions( + phys_dev->this_instance, phys_dev, icd_ext_count, pProperties, + &phys_dev->device_extension_cache); + if (res != VK_SUCCESS) + return res; + + /* we need to determine which implicit layers are active, + * and then add their extensions. This can't be cached as + * it depends on results of environment variables (which can + * change). + */ + struct loader_extension_list all_exts = {0}; + loader_add_to_ext_list(phys_dev->this_instance, &all_exts, + phys_dev->device_extension_cache.count, + phys_dev->device_extension_cache.list); + + loader_init_layer_list(phys_dev->this_instance, + &implicit_layer_list); + + loader_add_layer_implicit( + phys_dev->this_instance, VK_LAYER_TYPE_INSTANCE_IMPLICIT, + &implicit_layer_list, + &phys_dev->this_instance->instance_layer_list); + + for (uint32_t i = 0; i < implicit_layer_list.count; i++) { + for ( + uint32_t j = 0; + j < implicit_layer_list.list[i].device_extension_list.count; + j++) { + loader_add_to_ext_list(phys_dev->this_instance, &all_exts, + 1, + &implicit_layer_list.list[i] + .device_extension_list.list[j] + .props); + } + } + uint32_t capacity = *pPropertyCount; + VkExtensionProperties *props = pProperties; + + for (uint32_t i = 0; i < all_exts.count && i < capacity; i++) { + props[i] = all_exts.list[i]; + } + /* wasn't enough space for the extensions, we did partial copy now + * return VK_INCOMPLETE */ + if (capacity < all_exts.count) { + res = VK_INCOMPLETE; + } else { + *pPropertyCount = all_exts.count; + } + loader_destroy_generic_list( + phys_dev->this_instance, + (struct loader_generic_list *)&all_exts); + } else { + /* just return the count; need to add in the count of implicit layer + * extensions + * don't worry about duplicates being added in the count */ + *pPropertyCount = icd_ext_count; + + for (uint32_t i = 0; i < implicit_layer_list.count; i++) { + *pPropertyCount += + implicit_layer_list.list[i].device_extension_list.count; + } + res = VK_SUCCESS; + } + + loader_destroy_generic_list( + phys_dev->this_instance, + (struct loader_generic_list *)&implicit_layer_list); + return res; + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +loader_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkLayerProperties *pProperties) { + uint32_t copy_size; + struct loader_physical_device *phys_dev; + // TODO fix this, aliases physical devices + phys_dev = loader_get_physical_device(physicalDevice); + uint32_t count = phys_dev->this_instance->device_layer_list.count; + + if (pProperties == NULL) { + *pPropertyCount = count; + return VK_SUCCESS; + } + + copy_size = (*pPropertyCount < count) ? *pPropertyCount : count; + for (uint32_t i = 0; i < copy_size; i++) { + memcpy(&pProperties[i], + &(phys_dev->this_instance->device_layer_list.list[i].info), + sizeof(VkLayerProperties)); + } + *pPropertyCount = copy_size; + + if (copy_size < count) { + return VK_INCOMPLETE; + } + + return VK_SUCCESS; +} + +VkStringErrorFlags vk_string_validate(const int max_length, const char *utf8) { + VkStringErrorFlags result = VK_STRING_ERROR_NONE; + int num_char_bytes; + int i, j; + + for (i = 0; i < max_length; i++) { + if (utf8[i] == 0) { + break; + } else if ((utf8[i] >= 0x20) && (utf8[i] < 0x7f)) { + num_char_bytes = 0; + } else if ((utf8[i] & UTF8_ONE_BYTE_MASK) == UTF8_ONE_BYTE_CODE) { + num_char_bytes = 1; + } else if ((utf8[i] & UTF8_TWO_BYTE_MASK) == UTF8_TWO_BYTE_CODE) { + num_char_bytes = 2; + } else if ((utf8[i] & UTF8_THREE_BYTE_MASK) == UTF8_THREE_BYTE_CODE) { + num_char_bytes = 3; + } else { + result = VK_STRING_ERROR_BAD_DATA; + } + + // Validate the following num_char_bytes of data + for (j = 0; (j < num_char_bytes) && (i < max_length); j++) { + if (++i == max_length) { + result |= VK_STRING_ERROR_LENGTH; + break; + } + if ((utf8[i] & UTF8_DATA_BYTE_MASK) != UTF8_DATA_BYTE_CODE) { + result |= VK_STRING_ERROR_BAD_DATA; + } + } + } + return result; +} diff --git a/third_party/vulkan/loader/loader.h b/third_party/vulkan/loader/loader.h new file mode 100644 index 000000000..06c8961f6 --- /dev/null +++ b/third_party/vulkan/loader/loader.h @@ -0,0 +1,551 @@ +/* + * + * Copyright (c) 2014-2016 The Khronos Group Inc. + * Copyright (c) 2014-2016 Valve Corporation + * Copyright (c) 2014-2016 LunarG, Inc. + * Copyright (C) 2015 Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Jon Ashburn + * Author: Courtney Goeltzenleuchter + * Author: Chia-I Wu + * Author: Chia-I Wu + * Author: Mark Lobodzinski + * + */ + +#ifndef LOADER_H +#define LOADER_H + +#include +#include + + +#include +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +#define LOADER_EXPORT __attribute__((visibility("default"))) +#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) +#define LOADER_EXPORT __attribute__((visibility("default"))) +#else +#define LOADER_EXPORT +#endif + +#define MAX_STRING_SIZE 1024 +#define VK_MAJOR(version) (version >> 22) +#define VK_MINOR(version) ((version >> 12) & 0x3ff) +#define VK_PATCH(version) (version & 0xfff) + +enum layer_type { + VK_LAYER_TYPE_DEVICE_EXPLICIT = 0x1, + VK_LAYER_TYPE_INSTANCE_EXPLICIT = 0x2, + VK_LAYER_TYPE_GLOBAL_EXPLICIT = 0x3, // instance and device layer, bitwise + VK_LAYER_TYPE_DEVICE_IMPLICIT = 0x4, + VK_LAYER_TYPE_INSTANCE_IMPLICIT = 0x8, + VK_LAYER_TYPE_GLOBAL_IMPLICIT = 0xc, // instance and device layer, bitwise + VK_LAYER_TYPE_META_EXPLICT = 0x10, +}; + +typedef enum VkStringErrorFlagBits { + VK_STRING_ERROR_NONE = 0x00000000, + VK_STRING_ERROR_LENGTH = 0x00000001, + VK_STRING_ERROR_BAD_DATA = 0x00000002, +} VkStringErrorFlagBits; +typedef VkFlags VkStringErrorFlags; + +static const int MaxLoaderStringLength = 256; +static const char UTF8_ONE_BYTE_CODE = 0xC0; +static const char UTF8_ONE_BYTE_MASK = 0xE0; +static const char UTF8_TWO_BYTE_CODE = 0xE0; +static const char UTF8_TWO_BYTE_MASK = 0xF0; +static const char UTF8_THREE_BYTE_CODE = 0xF0; +static const char UTF8_THREE_BYTE_MASK = 0xF8; +static const char UTF8_DATA_BYTE_CODE = 0x80; +static const char UTF8_DATA_BYTE_MASK = 0xC0; + +static const char std_validation_names[9][VK_MAX_EXTENSION_NAME_SIZE] = { + "VK_LAYER_LUNARG_threading", "VK_LAYER_LUNARG_param_checker", + "VK_LAYER_LUNARG_device_limits", "VK_LAYER_LUNARG_object_tracker", + "VK_LAYER_LUNARG_image", "VK_LAYER_LUNARG_mem_tracker", + "VK_LAYER_LUNARG_draw_state", "VK_LAYER_LUNARG_swapchain", + "VK_LAYER_GOOGLE_unique_objects"}; + +// form of all dynamic lists/arrays +// only the list element should be changed +struct loader_generic_list { + size_t capacity; + uint32_t count; + void *list; +}; + +struct loader_extension_list { + size_t capacity; + uint32_t count; + VkExtensionProperties *list; +}; + +struct loader_dev_ext_props { + VkExtensionProperties props; + uint32_t entrypoint_count; + char **entrypoints; +}; + +struct loader_device_extension_list { + size_t capacity; + uint32_t count; + struct loader_dev_ext_props *list; +}; + +struct loader_name_value { + char name[MAX_STRING_SIZE]; + char value[MAX_STRING_SIZE]; +}; + +struct loader_lib_info { + char lib_name[MAX_STRING_SIZE]; + uint32_t ref_count; + loader_platform_dl_handle lib_handle; +}; + +struct loader_layer_functions { + char str_gipa[MAX_STRING_SIZE]; + char str_gdpa[MAX_STRING_SIZE]; + PFN_vkGetInstanceProcAddr get_instance_proc_addr; + PFN_vkGetDeviceProcAddr get_device_proc_addr; +}; + +struct loader_layer_properties { + VkLayerProperties info; + enum layer_type type; + char lib_name[MAX_STRING_SIZE]; + struct loader_layer_functions functions; + struct loader_extension_list instance_extension_list; + struct loader_device_extension_list device_extension_list; + struct loader_name_value disable_env_var; + struct loader_name_value enable_env_var; +}; + +struct loader_layer_list { + size_t capacity; + uint32_t count; + struct loader_layer_properties *list; +}; + +struct loader_layer_library_list { + size_t capacity; + uint32_t count; + struct loader_lib_info *list; +}; + +struct loader_dispatch_hash_list { + size_t capacity; + uint32_t count; + uint32_t *index; // index into the dev_ext dispatch table +}; + +#define MAX_NUM_DEV_EXTS 250 +// loader_dispatch_hash_entry and loader_dev_ext_dispatch_table.DevExt have one +// to one +// correspondence; one loader_dispatch_hash_entry for one DevExt dispatch entry. +// Also have a one to one correspondence with functions in dev_ext_trampoline.c +struct loader_dispatch_hash_entry { + char *func_name; + struct loader_dispatch_hash_list list; // to handle hashing collisions +}; + +typedef void(VKAPI_PTR *PFN_vkDevExt)(VkDevice device); +struct loader_dev_ext_dispatch_table { + PFN_vkDevExt DevExt[MAX_NUM_DEV_EXTS]; +}; + +struct loader_dev_dispatch_table { + VkLayerDispatchTable core_dispatch; + struct loader_dev_ext_dispatch_table ext_dispatch; +}; + +/* per CreateDevice structure */ +struct loader_device { + struct loader_dev_dispatch_table loader_dispatch; + VkDevice device; // device object from the icd + + uint32_t app_extension_count; + VkExtensionProperties *app_extension_props; + + struct loader_layer_list activated_layer_list; + + struct loader_device *next; +}; + +/* per ICD structure */ +struct loader_icd { + // pointers to find other structs + const struct loader_scanned_icds *this_icd_lib; + const struct loader_instance *this_instance; + + struct loader_device *logical_device_list; + VkInstance instance; // instance object from the icd + PFN_vkGetDeviceProcAddr GetDeviceProcAddr; + PFN_vkDestroyInstance DestroyInstance; + PFN_vkEnumeratePhysicalDevices EnumeratePhysicalDevices; + PFN_vkGetPhysicalDeviceFeatures GetPhysicalDeviceFeatures; + PFN_vkGetPhysicalDeviceFormatProperties GetPhysicalDeviceFormatProperties; + PFN_vkGetPhysicalDeviceImageFormatProperties + GetPhysicalDeviceImageFormatProperties; + PFN_vkCreateDevice CreateDevice; + PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceQueueFamilyProperties + GetPhysicalDeviceQueueFamilyProperties; + PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties; + PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties; + PFN_vkGetPhysicalDeviceSparseImageFormatProperties + GetPhysicalDeviceSparseImageFormatProperties; + PFN_vkCreateDebugReportCallbackEXT CreateDebugReportCallbackEXT; + PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT; + PFN_vkDebugReportMessageEXT DebugReportMessageEXT; + PFN_vkGetPhysicalDeviceSurfaceSupportKHR GetPhysicalDeviceSurfaceSupportKHR; + PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR + GetPhysicalDeviceSurfaceCapabilitiesKHR; + PFN_vkGetPhysicalDeviceSurfaceFormatsKHR GetPhysicalDeviceSurfaceFormatsKHR; + PFN_vkGetPhysicalDeviceSurfacePresentModesKHR + GetPhysicalDeviceSurfacePresentModesKHR; +#ifdef VK_USE_PLATFORM_WIN32_KHR + PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR + GetPhysicalDeviceWin32PresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_MIR_KHR + PFN_vkGetPhysicalDeviceMirPresentationSupportKHR + GetPhysicalDeviceMirPresentvationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR + GetPhysicalDeviceWaylandPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + PFN_vkGetPhysicalDeviceXcbPresentationSupportKHR + GetPhysicalDeviceXcbPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + PFN_vkGetPhysicalDeviceXlibPresentationSupportKHR + GetPhysicalDeviceXlibPresentationSupportKHR; +#endif + + struct loader_icd *next; +}; + +/* per ICD library structure */ +struct loader_icd_libs { + size_t capacity; + uint32_t count; + struct loader_scanned_icds *list; +}; + +/* per instance structure */ +struct loader_instance { + VkLayerInstanceDispatchTable *disp; // must be first entry in structure + + uint32_t total_gpu_count; + struct loader_physical_device *phys_devs; + uint32_t total_icd_count; + struct loader_icd *icds; + struct loader_instance *next; + struct loader_extension_list ext_list; // icds and loaders extensions + struct loader_icd_libs icd_libs; + struct loader_layer_list instance_layer_list; + struct loader_layer_list device_layer_list; + struct loader_dispatch_hash_entry disp_hash[MAX_NUM_DEV_EXTS]; + + struct loader_msg_callback_map_entry *icd_msg_callback_map; + + struct loader_layer_list activated_layer_list; + + VkInstance instance; + + bool debug_report_enabled; + VkLayerDbgFunctionNode *DbgFunctionHead; + + VkAllocationCallbacks alloc_callbacks; + + bool wsi_surface_enabled; +#ifdef VK_USE_PLATFORM_WIN32_KHR + bool wsi_win32_surface_enabled; +#endif +#ifdef VK_USE_PLATFORM_MIR_KHR + bool wsi_mir_surface_enabled; +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + bool wsi_wayland_surface_enabled; +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + bool wsi_xcb_surface_enabled; +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + bool wsi_xlib_surface_enabled; +#endif +#ifdef VK_USE_PLATFORM_ANDROID_KHR + bool wsi_android_surface_enabled; +#endif +}; + +/* per enumerated PhysicalDevice structure */ +struct loader_physical_device { + VkLayerInstanceDispatchTable *disp; // must be first entry in structure + struct loader_instance *this_instance; + struct loader_icd *this_icd; + VkPhysicalDevice phys_dev; // object from ICD + /* + * Fill in the cache of available device extensions from + * this physical device. This cache can be used during CreateDevice + */ + struct loader_extension_list device_extension_cache; +}; + +struct loader_struct { + struct loader_instance *instances; + + unsigned int loaded_layer_lib_count; + size_t loaded_layer_lib_capacity; + struct loader_lib_info *loaded_layer_lib_list; + // TODO add ref counting of ICD libraries + // TODO use this struct loader_layer_library_list scanned_layer_libraries; + // TODO add list of icd libraries for ref counting them for closure +}; + +struct loader_scanned_icds { + char *lib_name; + loader_platform_dl_handle handle; + uint32_t api_version; + PFN_vkGetInstanceProcAddr GetInstanceProcAddr; + PFN_vkCreateInstance CreateInstance; + PFN_vkEnumerateInstanceExtensionProperties + EnumerateInstanceExtensionProperties; +}; + +static inline struct loader_instance *loader_instance(VkInstance instance) { + return (struct loader_instance *)instance; +} + +static inline void loader_set_dispatch(void *obj, const void *data) { + *((const void **)obj) = data; +} + +static inline VkLayerDispatchTable *loader_get_dispatch(const void *obj) { + return *((VkLayerDispatchTable **)obj); +} + +static inline struct loader_dev_dispatch_table * +loader_get_dev_dispatch(const void *obj) { + return *((struct loader_dev_dispatch_table **)obj); +} + +static inline VkLayerInstanceDispatchTable * +loader_get_instance_dispatch(const void *obj) { + return *((VkLayerInstanceDispatchTable **)obj); +} + +static inline void loader_init_dispatch(void *obj, const void *data) { +#ifdef DEBUG + assert(valid_loader_magic_value(obj) && + "Incompatible ICD, first dword must be initialized to " + "ICD_LOADER_MAGIC. See loader/README.md for details."); +#endif + + loader_set_dispatch(obj, data); +} + +/* global variables used across files */ +extern struct loader_struct loader; +extern THREAD_LOCAL_DECL struct loader_instance *tls_instance; +extern LOADER_PLATFORM_THREAD_ONCE_DEFINITION(once_init); +extern loader_platform_thread_mutex loader_lock; +extern loader_platform_thread_mutex loader_json_lock; +extern const VkLayerInstanceDispatchTable instance_disp; +extern const char *std_validation_str; + +struct loader_msg_callback_map_entry { + VkDebugReportCallbackEXT icd_obj; + VkDebugReportCallbackEXT loader_obj; +}; + +void loader_log(const struct loader_instance *inst, VkFlags msg_type, + int32_t msg_code, const char *format, ...); + +bool compare_vk_extension_properties(const VkExtensionProperties *op1, + const VkExtensionProperties *op2); + +VkResult loader_validate_layers(const struct loader_instance *inst, + const uint32_t layer_count, + const char *const *ppEnabledLayerNames, + const struct loader_layer_list *list); + +VkResult loader_validate_instance_extensions( + const struct loader_instance *inst, + const struct loader_extension_list *icd_exts, + const struct loader_layer_list *instance_layer, + const VkInstanceCreateInfo *pCreateInfo); + +/* instance layer chain termination entrypoint definitions */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance); + +VKAPI_ATTR void VKAPI_CALL +loader_DestroyInstance(VkInstance instance, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_EnumeratePhysicalDevices(VkInstance instance, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices); + +VKAPI_ATTR void VKAPI_CALL +loader_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures *pFeatures); + +VKAPI_ATTR void VKAPI_CALL +loader_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties *pFormatInfo); + +VKAPI_ATTR VkResult VKAPI_CALL loader_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, + VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, + VkImageFormatProperties *pImageFormatProperties); + +VKAPI_ATTR void VKAPI_CALL loader_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, + VkSampleCountFlagBits samples, VkImageUsageFlags usage, + VkImageTiling tiling, uint32_t *pNumProperties, + VkSparseImageFormatProperties *pProperties); + +VKAPI_ATTR void VKAPI_CALL +loader_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, + const char *pLayerName, + uint32_t *pCount, + VkExtensionProperties *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, + uint32_t *pCount, + VkLayerProperties *pProperties); + +VKAPI_ATTR void VKAPI_CALL loader_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, uint32_t *pCount, + VkQueueFamilyProperties *pProperties); + +VKAPI_ATTR void VKAPI_CALL loader_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_create_device_terminator(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); + +/* helper function definitions */ +void loader_initialize(void); +bool has_vk_extension_property_array(const VkExtensionProperties *vk_ext_prop, + const uint32_t count, + const VkExtensionProperties *ext_array); +bool has_vk_extension_property(const VkExtensionProperties *vk_ext_prop, + const struct loader_extension_list *ext_list); + +VkResult loader_add_to_ext_list(const struct loader_instance *inst, + struct loader_extension_list *ext_list, + uint32_t prop_list_count, + const VkExtensionProperties *props); +void loader_destroy_generic_list(const struct loader_instance *inst, + struct loader_generic_list *list); +void loader_delete_layer_properties(const struct loader_instance *inst, + struct loader_layer_list *layer_list); +void loader_expand_layer_names( + const struct loader_instance *inst, const char *key_name, + uint32_t expand_count, + const char expand_names[][VK_MAX_EXTENSION_NAME_SIZE], + uint32_t *layer_count, char ***ppp_layer_names); +void loader_unexpand_dev_layer_names(const struct loader_instance *inst, + uint32_t layer_count, char **layer_names, + char **layer_ptr, + const VkDeviceCreateInfo *pCreateInfo); +void loader_unexpand_inst_layer_names(const struct loader_instance *inst, + uint32_t layer_count, char **layer_names, + char **layer_ptr, + const VkInstanceCreateInfo *pCreateInfo); +void loader_add_to_layer_list(const struct loader_instance *inst, + struct loader_layer_list *list, + uint32_t prop_list_count, + const struct loader_layer_properties *props); +void loader_scanned_icd_clear(const struct loader_instance *inst, + struct loader_icd_libs *icd_libs); +void loader_icd_scan(const struct loader_instance *inst, + struct loader_icd_libs *icds); +void loader_layer_scan(const struct loader_instance *inst, + struct loader_layer_list *instance_layers, + struct loader_layer_list *device_layers); +void loader_get_icd_loader_instance_extensions( + const struct loader_instance *inst, struct loader_icd_libs *icd_libs, + struct loader_extension_list *inst_exts); +struct loader_icd *loader_get_icd_and_device(const VkDevice device, + struct loader_device **found_dev); +void *loader_dev_ext_gpa(struct loader_instance *inst, const char *funcName); +void *loader_get_dev_ext_trampoline(uint32_t index); +struct loader_instance *loader_get_instance(const VkInstance instance); +void loader_remove_logical_device(const struct loader_instance *inst, + struct loader_icd *icd, + struct loader_device *found_dev); +VkResult +loader_enable_instance_layers(struct loader_instance *inst, + const VkInstanceCreateInfo *pCreateInfo, + const struct loader_layer_list *instance_layers); +void loader_deactivate_instance_layers(struct loader_instance *instance); + +VkResult loader_create_instance_chain(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + struct loader_instance *inst, + VkInstance *created_instance); + +void loader_activate_instance_layer_extensions(struct loader_instance *inst, + VkInstance created_inst); + +void *loader_heap_alloc(const struct loader_instance *instance, size_t size, + VkSystemAllocationScope allocationScope); + +void loader_heap_free(const struct loader_instance *instance, void *pMemory); + +void *loader_tls_heap_alloc(size_t size); + +void loader_tls_heap_free(void *pMemory); + +VkStringErrorFlags vk_string_validate(const int max_length, + const char *char_array); + +#endif /* LOADER_H */ diff --git a/third_party/vulkan/loader/murmurhash.c b/third_party/vulkan/loader/murmurhash.c new file mode 100644 index 000000000..5e5d0de64 --- /dev/null +++ b/third_party/vulkan/loader/murmurhash.c @@ -0,0 +1,97 @@ + +/** + * `murmurhash.h' - murmurhash + * + * copyright (c) 2014 joseph werle + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + */ + +#include +#include +#include +#include "murmurhash.h" + +uint32_t murmurhash(const char *key, size_t len, uint32_t seed) { + uint32_t c1 = 0xcc9e2d51; + uint32_t c2 = 0x1b873593; + uint32_t r1 = 15; + uint32_t r2 = 13; + uint32_t m = 5; + uint32_t n = 0xe6546b64; + uint32_t h = 0; + uint32_t k = 0; + uint8_t *d = (uint8_t *)key; // 32 bit extract from `key' + const uint32_t *chunks = NULL; + const uint8_t *tail = NULL; // tail - last 8 bytes + int i = 0; + int l = (int)len / 4; // chunk length + + h = seed; + + chunks = (const uint32_t *)(d + l * 4); // body + tail = (const uint8_t *)(d + l * 4); // last 8 byte chunk of `key' + + // for each 4 byte chunk of `key' + for (i = -l; i != 0; ++i) { + // next 4 byte chunk of `key' + k = chunks[i]; + + // encode next 4 byte chunk of `key' + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + + // append to hash + h ^= k; + h = (h << r2) | (h >> (32 - r2)); + h = h * m + n; + } + + k = 0; + + // remainder + switch (len & 3) { // `len % 4' + case 3: + k ^= (tail[2] << 16); + case 2: + k ^= (tail[1] << 8); + + case 1: + k ^= tail[0]; + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + h ^= k; + } + + h ^= len; + + h ^= (h >> 16); + h *= 0x85ebca6b; + h ^= (h >> 13); + h *= 0xc2b2ae35; + h ^= (h >> 16); + + return h; +} diff --git a/third_party/vulkan/loader/murmurhash.h b/third_party/vulkan/loader/murmurhash.h new file mode 100644 index 000000000..775532e8b --- /dev/null +++ b/third_party/vulkan/loader/murmurhash.h @@ -0,0 +1,52 @@ + +/** + * `murmurhash.h' - murmurhash + * + * copyright (c) 2014 joseph werle + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + */ + +#ifndef MURMURHASH_H +#define MURMURHASH_H 1 + +#include + +#define MURMURHASH_VERSION "0.0.3" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Returns a murmur hash of `key' based on `seed' + * using the MurmurHash3 algorithm + */ + +uint32_t murmurhash(const char *key, size_t len, uint32_t seed); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/loader/premake5.lua b/third_party/vulkan/loader/premake5.lua new file mode 100644 index 000000000..35675f232 --- /dev/null +++ b/third_party/vulkan/loader/premake5.lua @@ -0,0 +1,24 @@ +group("third_party") +project("vulkan-loader") + uuid("07d77359-1618-43e6-8a4a-0ee9ddc5fa6a") + kind("StaticLib") + language("C++") + + defines({ + "_LIB", + }) + removedefines({ + "_UNICODE", + "UNICODE", + }) + includedirs({ + ".", + }) + recursive_platform_files() + + filter("platforms:Windows") + warnings("Off") -- Too many warnings. + characterset("MBCS") + defines({ + "VK_USE_PLATFORM_WIN32_KHR", + }) diff --git a/third_party/vulkan/loader/table_ops.h b/third_party/vulkan/loader/table_ops.h new file mode 100644 index 000000000..4bf8b410a --- /dev/null +++ b/third_party/vulkan/loader/table_ops.h @@ -0,0 +1,710 @@ +/* + * + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * Copyright (C) 2016 Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Courtney Goeltzenleuchter + * Author: Jon Ashburn + * Author: Ian Elliott + * Author: Tony Barbour + */ + +#include +#include +#include +#include "loader.h" +#include "vk_loader_platform.h" + +static VkResult vkDevExtError(VkDevice dev) { + struct loader_device *found_dev; + struct loader_icd *icd = loader_get_icd_and_device(dev, &found_dev); + + if (icd) + loader_log(icd->this_instance, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, + "Bad destination in loader trampoline dispatch," + "Are layers and extensions that you are calling enabled?"); + return VK_ERROR_EXTENSION_NOT_PRESENT; +} + +static inline void +loader_init_device_dispatch_table(struct loader_dev_dispatch_table *dev_table, + PFN_vkGetDeviceProcAddr gpa, VkDevice dev) { + VkLayerDispatchTable *table = &dev_table->core_dispatch; + for (uint32_t i = 0; i < MAX_NUM_DEV_EXTS; i++) + dev_table->ext_dispatch.DevExt[i] = (PFN_vkDevExt)vkDevExtError; + + table->GetDeviceProcAddr = + (PFN_vkGetDeviceProcAddr)gpa(dev, "vkGetDeviceProcAddr"); + table->DestroyDevice = (PFN_vkDestroyDevice)gpa(dev, "vkDestroyDevice"); + table->GetDeviceQueue = (PFN_vkGetDeviceQueue)gpa(dev, "vkGetDeviceQueue"); + table->QueueSubmit = (PFN_vkQueueSubmit)gpa(dev, "vkQueueSubmit"); + table->QueueWaitIdle = (PFN_vkQueueWaitIdle)gpa(dev, "vkQueueWaitIdle"); + table->DeviceWaitIdle = (PFN_vkDeviceWaitIdle)gpa(dev, "vkDeviceWaitIdle"); + table->AllocateMemory = (PFN_vkAllocateMemory)gpa(dev, "vkAllocateMemory"); + table->FreeMemory = (PFN_vkFreeMemory)gpa(dev, "vkFreeMemory"); + table->MapMemory = (PFN_vkMapMemory)gpa(dev, "vkMapMemory"); + table->UnmapMemory = (PFN_vkUnmapMemory)gpa(dev, "vkUnmapMemory"); + table->FlushMappedMemoryRanges = + (PFN_vkFlushMappedMemoryRanges)gpa(dev, "vkFlushMappedMemoryRanges"); + table->InvalidateMappedMemoryRanges = + (PFN_vkInvalidateMappedMemoryRanges)gpa( + dev, "vkInvalidateMappedMemoryRanges"); + table->GetDeviceMemoryCommitment = (PFN_vkGetDeviceMemoryCommitment)gpa( + dev, "vkGetDeviceMemoryCommitment"); + table->GetImageSparseMemoryRequirements = + (PFN_vkGetImageSparseMemoryRequirements)gpa( + dev, "vkGetImageSparseMemoryRequirements"); + table->GetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)gpa( + dev, "vkGetBufferMemoryRequirements"); + table->GetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)gpa( + dev, "vkGetImageMemoryRequirements"); + table->BindBufferMemory = + (PFN_vkBindBufferMemory)gpa(dev, "vkBindBufferMemory"); + table->BindImageMemory = + (PFN_vkBindImageMemory)gpa(dev, "vkBindImageMemory"); + table->QueueBindSparse = + (PFN_vkQueueBindSparse)gpa(dev, "vkQueueBindSparse"); + table->CreateFence = (PFN_vkCreateFence)gpa(dev, "vkCreateFence"); + table->DestroyFence = (PFN_vkDestroyFence)gpa(dev, "vkDestroyFence"); + table->ResetFences = (PFN_vkResetFences)gpa(dev, "vkResetFences"); + table->GetFenceStatus = (PFN_vkGetFenceStatus)gpa(dev, "vkGetFenceStatus"); + table->WaitForFences = (PFN_vkWaitForFences)gpa(dev, "vkWaitForFences"); + table->CreateSemaphore = + (PFN_vkCreateSemaphore)gpa(dev, "vkCreateSemaphore"); + table->DestroySemaphore = + (PFN_vkDestroySemaphore)gpa(dev, "vkDestroySemaphore"); + table->CreateEvent = (PFN_vkCreateEvent)gpa(dev, "vkCreateEvent"); + table->DestroyEvent = (PFN_vkDestroyEvent)gpa(dev, "vkDestroyEvent"); + table->GetEventStatus = (PFN_vkGetEventStatus)gpa(dev, "vkGetEventStatus"); + table->SetEvent = (PFN_vkSetEvent)gpa(dev, "vkSetEvent"); + table->ResetEvent = (PFN_vkResetEvent)gpa(dev, "vkResetEvent"); + table->CreateQueryPool = + (PFN_vkCreateQueryPool)gpa(dev, "vkCreateQueryPool"); + table->DestroyQueryPool = + (PFN_vkDestroyQueryPool)gpa(dev, "vkDestroyQueryPool"); + table->GetQueryPoolResults = + (PFN_vkGetQueryPoolResults)gpa(dev, "vkGetQueryPoolResults"); + table->CreateBuffer = (PFN_vkCreateBuffer)gpa(dev, "vkCreateBuffer"); + table->DestroyBuffer = (PFN_vkDestroyBuffer)gpa(dev, "vkDestroyBuffer"); + table->CreateBufferView = + (PFN_vkCreateBufferView)gpa(dev, "vkCreateBufferView"); + table->DestroyBufferView = + (PFN_vkDestroyBufferView)gpa(dev, "vkDestroyBufferView"); + table->CreateImage = (PFN_vkCreateImage)gpa(dev, "vkCreateImage"); + table->DestroyImage = (PFN_vkDestroyImage)gpa(dev, "vkDestroyImage"); + table->GetImageSubresourceLayout = (PFN_vkGetImageSubresourceLayout)gpa( + dev, "vkGetImageSubresourceLayout"); + table->CreateImageView = + (PFN_vkCreateImageView)gpa(dev, "vkCreateImageView"); + table->DestroyImageView = + (PFN_vkDestroyImageView)gpa(dev, "vkDestroyImageView"); + table->CreateShaderModule = + (PFN_vkCreateShaderModule)gpa(dev, "vkCreateShaderModule"); + table->DestroyShaderModule = + (PFN_vkDestroyShaderModule)gpa(dev, "vkDestroyShaderModule"); + table->CreatePipelineCache = + (PFN_vkCreatePipelineCache)gpa(dev, "vkCreatePipelineCache"); + table->DestroyPipelineCache = + (PFN_vkDestroyPipelineCache)gpa(dev, "vkDestroyPipelineCache"); + table->GetPipelineCacheData = + (PFN_vkGetPipelineCacheData)gpa(dev, "vkGetPipelineCacheData"); + table->MergePipelineCaches = + (PFN_vkMergePipelineCaches)gpa(dev, "vkMergePipelineCaches"); + table->CreateGraphicsPipelines = + (PFN_vkCreateGraphicsPipelines)gpa(dev, "vkCreateGraphicsPipelines"); + table->CreateComputePipelines = + (PFN_vkCreateComputePipelines)gpa(dev, "vkCreateComputePipelines"); + table->DestroyPipeline = + (PFN_vkDestroyPipeline)gpa(dev, "vkDestroyPipeline"); + table->CreatePipelineLayout = + (PFN_vkCreatePipelineLayout)gpa(dev, "vkCreatePipelineLayout"); + table->DestroyPipelineLayout = + (PFN_vkDestroyPipelineLayout)gpa(dev, "vkDestroyPipelineLayout"); + table->CreateSampler = (PFN_vkCreateSampler)gpa(dev, "vkCreateSampler"); + table->DestroySampler = (PFN_vkDestroySampler)gpa(dev, "vkDestroySampler"); + table->CreateDescriptorSetLayout = (PFN_vkCreateDescriptorSetLayout)gpa( + dev, "vkCreateDescriptorSetLayout"); + table->DestroyDescriptorSetLayout = (PFN_vkDestroyDescriptorSetLayout)gpa( + dev, "vkDestroyDescriptorSetLayout"); + table->CreateDescriptorPool = + (PFN_vkCreateDescriptorPool)gpa(dev, "vkCreateDescriptorPool"); + table->DestroyDescriptorPool = + (PFN_vkDestroyDescriptorPool)gpa(dev, "vkDestroyDescriptorPool"); + table->ResetDescriptorPool = + (PFN_vkResetDescriptorPool)gpa(dev, "vkResetDescriptorPool"); + table->AllocateDescriptorSets = + (PFN_vkAllocateDescriptorSets)gpa(dev, "vkAllocateDescriptorSets"); + table->FreeDescriptorSets = + (PFN_vkFreeDescriptorSets)gpa(dev, "vkFreeDescriptorSets"); + table->UpdateDescriptorSets = + (PFN_vkUpdateDescriptorSets)gpa(dev, "vkUpdateDescriptorSets"); + table->CreateFramebuffer = + (PFN_vkCreateFramebuffer)gpa(dev, "vkCreateFramebuffer"); + table->DestroyFramebuffer = + (PFN_vkDestroyFramebuffer)gpa(dev, "vkDestroyFramebuffer"); + table->CreateRenderPass = + (PFN_vkCreateRenderPass)gpa(dev, "vkCreateRenderPass"); + table->DestroyRenderPass = + (PFN_vkDestroyRenderPass)gpa(dev, "vkDestroyRenderPass"); + table->GetRenderAreaGranularity = + (PFN_vkGetRenderAreaGranularity)gpa(dev, "vkGetRenderAreaGranularity"); + table->CreateCommandPool = + (PFN_vkCreateCommandPool)gpa(dev, "vkCreateCommandPool"); + table->DestroyCommandPool = + (PFN_vkDestroyCommandPool)gpa(dev, "vkDestroyCommandPool"); + table->ResetCommandPool = + (PFN_vkResetCommandPool)gpa(dev, "vkResetCommandPool"); + table->AllocateCommandBuffers = + (PFN_vkAllocateCommandBuffers)gpa(dev, "vkAllocateCommandBuffers"); + table->FreeCommandBuffers = + (PFN_vkFreeCommandBuffers)gpa(dev, "vkFreeCommandBuffers"); + table->BeginCommandBuffer = + (PFN_vkBeginCommandBuffer)gpa(dev, "vkBeginCommandBuffer"); + table->EndCommandBuffer = + (PFN_vkEndCommandBuffer)gpa(dev, "vkEndCommandBuffer"); + table->ResetCommandBuffer = + (PFN_vkResetCommandBuffer)gpa(dev, "vkResetCommandBuffer"); + table->CmdBindPipeline = + (PFN_vkCmdBindPipeline)gpa(dev, "vkCmdBindPipeline"); + table->CmdSetViewport = (PFN_vkCmdSetViewport)gpa(dev, "vkCmdSetViewport"); + table->CmdSetScissor = (PFN_vkCmdSetScissor)gpa(dev, "vkCmdSetScissor"); + table->CmdSetLineWidth = + (PFN_vkCmdSetLineWidth)gpa(dev, "vkCmdSetLineWidth"); + table->CmdSetDepthBias = + (PFN_vkCmdSetDepthBias)gpa(dev, "vkCmdSetDepthBias"); + table->CmdSetBlendConstants = + (PFN_vkCmdSetBlendConstants)gpa(dev, "vkCmdSetBlendConstants"); + table->CmdSetDepthBounds = + (PFN_vkCmdSetDepthBounds)gpa(dev, "vkCmdSetDepthBounds"); + table->CmdSetStencilCompareMask = + (PFN_vkCmdSetStencilCompareMask)gpa(dev, "vkCmdSetStencilCompareMask"); + table->CmdSetStencilWriteMask = + (PFN_vkCmdSetStencilWriteMask)gpa(dev, "vkCmdSetStencilWriteMask"); + table->CmdSetStencilReference = + (PFN_vkCmdSetStencilReference)gpa(dev, "vkCmdSetStencilReference"); + table->CmdBindDescriptorSets = + (PFN_vkCmdBindDescriptorSets)gpa(dev, "vkCmdBindDescriptorSets"); + table->CmdBindVertexBuffers = + (PFN_vkCmdBindVertexBuffers)gpa(dev, "vkCmdBindVertexBuffers"); + table->CmdBindIndexBuffer = + (PFN_vkCmdBindIndexBuffer)gpa(dev, "vkCmdBindIndexBuffer"); + table->CmdDraw = (PFN_vkCmdDraw)gpa(dev, "vkCmdDraw"); + table->CmdDrawIndexed = (PFN_vkCmdDrawIndexed)gpa(dev, "vkCmdDrawIndexed"); + table->CmdDrawIndirect = + (PFN_vkCmdDrawIndirect)gpa(dev, "vkCmdDrawIndirect"); + table->CmdDrawIndexedIndirect = + (PFN_vkCmdDrawIndexedIndirect)gpa(dev, "vkCmdDrawIndexedIndirect"); + table->CmdDispatch = (PFN_vkCmdDispatch)gpa(dev, "vkCmdDispatch"); + table->CmdDispatchIndirect = + (PFN_vkCmdDispatchIndirect)gpa(dev, "vkCmdDispatchIndirect"); + table->CmdCopyBuffer = (PFN_vkCmdCopyBuffer)gpa(dev, "vkCmdCopyBuffer"); + table->CmdCopyImage = (PFN_vkCmdCopyImage)gpa(dev, "vkCmdCopyImage"); + table->CmdBlitImage = (PFN_vkCmdBlitImage)gpa(dev, "vkCmdBlitImage"); + table->CmdCopyBufferToImage = + (PFN_vkCmdCopyBufferToImage)gpa(dev, "vkCmdCopyBufferToImage"); + table->CmdCopyImageToBuffer = + (PFN_vkCmdCopyImageToBuffer)gpa(dev, "vkCmdCopyImageToBuffer"); + table->CmdUpdateBuffer = + (PFN_vkCmdUpdateBuffer)gpa(dev, "vkCmdUpdateBuffer"); + table->CmdFillBuffer = (PFN_vkCmdFillBuffer)gpa(dev, "vkCmdFillBuffer"); + table->CmdClearColorImage = + (PFN_vkCmdClearColorImage)gpa(dev, "vkCmdClearColorImage"); + table->CmdClearDepthStencilImage = (PFN_vkCmdClearDepthStencilImage)gpa( + dev, "vkCmdClearDepthStencilImage"); + table->CmdClearAttachments = + (PFN_vkCmdClearAttachments)gpa(dev, "vkCmdClearAttachments"); + table->CmdResolveImage = + (PFN_vkCmdResolveImage)gpa(dev, "vkCmdResolveImage"); + table->CmdSetEvent = (PFN_vkCmdSetEvent)gpa(dev, "vkCmdSetEvent"); + table->CmdResetEvent = (PFN_vkCmdResetEvent)gpa(dev, "vkCmdResetEvent"); + table->CmdWaitEvents = (PFN_vkCmdWaitEvents)gpa(dev, "vkCmdWaitEvents"); + table->CmdPipelineBarrier = + (PFN_vkCmdPipelineBarrier)gpa(dev, "vkCmdPipelineBarrier"); + table->CmdBeginQuery = (PFN_vkCmdBeginQuery)gpa(dev, "vkCmdBeginQuery"); + table->CmdEndQuery = (PFN_vkCmdEndQuery)gpa(dev, "vkCmdEndQuery"); + table->CmdResetQueryPool = + (PFN_vkCmdResetQueryPool)gpa(dev, "vkCmdResetQueryPool"); + table->CmdWriteTimestamp = + (PFN_vkCmdWriteTimestamp)gpa(dev, "vkCmdWriteTimestamp"); + table->CmdCopyQueryPoolResults = + (PFN_vkCmdCopyQueryPoolResults)gpa(dev, "vkCmdCopyQueryPoolResults"); + table->CmdPushConstants = + (PFN_vkCmdPushConstants)gpa(dev, "vkCmdPushConstants"); + table->CmdBeginRenderPass = + (PFN_vkCmdBeginRenderPass)gpa(dev, "vkCmdBeginRenderPass"); + table->CmdNextSubpass = (PFN_vkCmdNextSubpass)gpa(dev, "vkCmdNextSubpass"); + table->CmdEndRenderPass = + (PFN_vkCmdEndRenderPass)gpa(dev, "vkCmdEndRenderPass"); + table->CmdExecuteCommands = + (PFN_vkCmdExecuteCommands)gpa(dev, "vkCmdExecuteCommands"); +} + +static inline void loader_init_device_extension_dispatch_table( + struct loader_dev_dispatch_table *dev_table, PFN_vkGetDeviceProcAddr gpa, + VkDevice dev) { + VkLayerDispatchTable *table = &dev_table->core_dispatch; + table->AcquireNextImageKHR = + (PFN_vkAcquireNextImageKHR)gpa(dev, "vkAcquireNextImageKHR"); + table->CreateSwapchainKHR = + (PFN_vkCreateSwapchainKHR)gpa(dev, "vkCreateSwapchainKHR"); + table->DestroySwapchainKHR = + (PFN_vkDestroySwapchainKHR)gpa(dev, "vkDestroySwapchainKHR"); + table->GetSwapchainImagesKHR = + (PFN_vkGetSwapchainImagesKHR)gpa(dev, "vkGetSwapchainImagesKHR"); + table->QueuePresentKHR = + (PFN_vkQueuePresentKHR)gpa(dev, "vkQueuePresentKHR"); +} + +static inline void * +loader_lookup_device_dispatch_table(const VkLayerDispatchTable *table, + const char *name) { + if (!name || name[0] != 'v' || name[1] != 'k') + return NULL; + + name += 2; + if (!strcmp(name, "GetDeviceProcAddr")) + return (void *)table->GetDeviceProcAddr; + if (!strcmp(name, "DestroyDevice")) + return (void *)table->DestroyDevice; + if (!strcmp(name, "GetDeviceQueue")) + return (void *)table->GetDeviceQueue; + if (!strcmp(name, "QueueSubmit")) + return (void *)table->QueueSubmit; + if (!strcmp(name, "QueueWaitIdle")) + return (void *)table->QueueWaitIdle; + if (!strcmp(name, "DeviceWaitIdle")) + return (void *)table->DeviceWaitIdle; + if (!strcmp(name, "AllocateMemory")) + return (void *)table->AllocateMemory; + if (!strcmp(name, "FreeMemory")) + return (void *)table->FreeMemory; + if (!strcmp(name, "MapMemory")) + return (void *)table->MapMemory; + if (!strcmp(name, "UnmapMemory")) + return (void *)table->UnmapMemory; + if (!strcmp(name, "FlushMappedMemoryRanges")) + return (void *)table->FlushMappedMemoryRanges; + if (!strcmp(name, "InvalidateMappedMemoryRanges")) + return (void *)table->InvalidateMappedMemoryRanges; + if (!strcmp(name, "GetDeviceMemoryCommitment")) + return (void *)table->GetDeviceMemoryCommitment; + if (!strcmp(name, "GetImageSparseMemoryRequirements")) + return (void *)table->GetImageSparseMemoryRequirements; + if (!strcmp(name, "GetBufferMemoryRequirements")) + return (void *)table->GetBufferMemoryRequirements; + if (!strcmp(name, "GetImageMemoryRequirements")) + return (void *)table->GetImageMemoryRequirements; + if (!strcmp(name, "BindBufferMemory")) + return (void *)table->BindBufferMemory; + if (!strcmp(name, "BindImageMemory")) + return (void *)table->BindImageMemory; + if (!strcmp(name, "QueueBindSparse")) + return (void *)table->QueueBindSparse; + if (!strcmp(name, "CreateFence")) + return (void *)table->CreateFence; + if (!strcmp(name, "DestroyFence")) + return (void *)table->DestroyFence; + if (!strcmp(name, "ResetFences")) + return (void *)table->ResetFences; + if (!strcmp(name, "GetFenceStatus")) + return (void *)table->GetFenceStatus; + if (!strcmp(name, "WaitForFences")) + return (void *)table->WaitForFences; + if (!strcmp(name, "CreateSemaphore")) + return (void *)table->CreateSemaphore; + if (!strcmp(name, "DestroySemaphore")) + return (void *)table->DestroySemaphore; + if (!strcmp(name, "CreateEvent")) + return (void *)table->CreateEvent; + if (!strcmp(name, "DestroyEvent")) + return (void *)table->DestroyEvent; + if (!strcmp(name, "GetEventStatus")) + return (void *)table->GetEventStatus; + if (!strcmp(name, "SetEvent")) + return (void *)table->SetEvent; + if (!strcmp(name, "ResetEvent")) + return (void *)table->ResetEvent; + if (!strcmp(name, "CreateQueryPool")) + return (void *)table->CreateQueryPool; + if (!strcmp(name, "DestroyQueryPool")) + return (void *)table->DestroyQueryPool; + if (!strcmp(name, "GetQueryPoolResults")) + return (void *)table->GetQueryPoolResults; + if (!strcmp(name, "CreateBuffer")) + return (void *)table->CreateBuffer; + if (!strcmp(name, "DestroyBuffer")) + return (void *)table->DestroyBuffer; + if (!strcmp(name, "CreateBufferView")) + return (void *)table->CreateBufferView; + if (!strcmp(name, "DestroyBufferView")) + return (void *)table->DestroyBufferView; + if (!strcmp(name, "CreateImage")) + return (void *)table->CreateImage; + if (!strcmp(name, "DestroyImage")) + return (void *)table->DestroyImage; + if (!strcmp(name, "GetImageSubresourceLayout")) + return (void *)table->GetImageSubresourceLayout; + if (!strcmp(name, "CreateImageView")) + return (void *)table->CreateImageView; + if (!strcmp(name, "DestroyImageView")) + return (void *)table->DestroyImageView; + if (!strcmp(name, "CreateShaderModule")) + return (void *)table->CreateShaderModule; + if (!strcmp(name, "DestroyShaderModule")) + return (void *)table->DestroyShaderModule; + if (!strcmp(name, "CreatePipelineCache")) + return (void *)vkCreatePipelineCache; + if (!strcmp(name, "DestroyPipelineCache")) + return (void *)vkDestroyPipelineCache; + if (!strcmp(name, "GetPipelineCacheData")) + return (void *)vkGetPipelineCacheData; + if (!strcmp(name, "MergePipelineCaches")) + return (void *)vkMergePipelineCaches; + if (!strcmp(name, "CreateGraphicsPipelines")) + return (void *)vkCreateGraphicsPipelines; + if (!strcmp(name, "CreateComputePipelines")) + return (void *)vkCreateComputePipelines; + if (!strcmp(name, "DestroyPipeline")) + return (void *)table->DestroyPipeline; + if (!strcmp(name, "CreatePipelineLayout")) + return (void *)table->CreatePipelineLayout; + if (!strcmp(name, "DestroyPipelineLayout")) + return (void *)table->DestroyPipelineLayout; + if (!strcmp(name, "CreateSampler")) + return (void *)table->CreateSampler; + if (!strcmp(name, "DestroySampler")) + return (void *)table->DestroySampler; + if (!strcmp(name, "CreateDescriptorSetLayout")) + return (void *)table->CreateDescriptorSetLayout; + if (!strcmp(name, "DestroyDescriptorSetLayout")) + return (void *)table->DestroyDescriptorSetLayout; + if (!strcmp(name, "CreateDescriptorPool")) + return (void *)table->CreateDescriptorPool; + if (!strcmp(name, "DestroyDescriptorPool")) + return (void *)table->DestroyDescriptorPool; + if (!strcmp(name, "ResetDescriptorPool")) + return (void *)table->ResetDescriptorPool; + if (!strcmp(name, "AllocateDescriptorSets")) + return (void *)table->AllocateDescriptorSets; + if (!strcmp(name, "FreeDescriptorSets")) + return (void *)table->FreeDescriptorSets; + if (!strcmp(name, "UpdateDescriptorSets")) + return (void *)table->UpdateDescriptorSets; + if (!strcmp(name, "CreateFramebuffer")) + return (void *)table->CreateFramebuffer; + if (!strcmp(name, "DestroyFramebuffer")) + return (void *)table->DestroyFramebuffer; + if (!strcmp(name, "CreateRenderPass")) + return (void *)table->CreateRenderPass; + if (!strcmp(name, "DestroyRenderPass")) + return (void *)table->DestroyRenderPass; + if (!strcmp(name, "GetRenderAreaGranularity")) + return (void *)table->GetRenderAreaGranularity; + if (!strcmp(name, "CreateCommandPool")) + return (void *)table->CreateCommandPool; + if (!strcmp(name, "DestroyCommandPool")) + return (void *)table->DestroyCommandPool; + if (!strcmp(name, "ResetCommandPool")) + return (void *)table->ResetCommandPool; + if (!strcmp(name, "AllocateCommandBuffers")) + return (void *)table->AllocateCommandBuffers; + if (!strcmp(name, "FreeCommandBuffers")) + return (void *)table->FreeCommandBuffers; + if (!strcmp(name, "BeginCommandBuffer")) + return (void *)table->BeginCommandBuffer; + if (!strcmp(name, "EndCommandBuffer")) + return (void *)table->EndCommandBuffer; + if (!strcmp(name, "ResetCommandBuffer")) + return (void *)table->ResetCommandBuffer; + if (!strcmp(name, "CmdBindPipeline")) + return (void *)table->CmdBindPipeline; + if (!strcmp(name, "CmdSetViewport")) + return (void *)table->CmdSetViewport; + if (!strcmp(name, "CmdSetScissor")) + return (void *)table->CmdSetScissor; + if (!strcmp(name, "CmdSetLineWidth")) + return (void *)table->CmdSetLineWidth; + if (!strcmp(name, "CmdSetDepthBias")) + return (void *)table->CmdSetDepthBias; + if (!strcmp(name, "CmdSetBlendConstants")) + return (void *)table->CmdSetBlendConstants; + if (!strcmp(name, "CmdSetDepthBounds")) + return (void *)table->CmdSetDepthBounds; + if (!strcmp(name, "CmdSetStencilCompareMask")) + return (void *)table->CmdSetStencilCompareMask; + if (!strcmp(name, "CmdSetStencilwriteMask")) + return (void *)table->CmdSetStencilWriteMask; + if (!strcmp(name, "CmdSetStencilReference")) + return (void *)table->CmdSetStencilReference; + if (!strcmp(name, "CmdBindDescriptorSets")) + return (void *)table->CmdBindDescriptorSets; + if (!strcmp(name, "CmdBindVertexBuffers")) + return (void *)table->CmdBindVertexBuffers; + if (!strcmp(name, "CmdBindIndexBuffer")) + return (void *)table->CmdBindIndexBuffer; + if (!strcmp(name, "CmdDraw")) + return (void *)table->CmdDraw; + if (!strcmp(name, "CmdDrawIndexed")) + return (void *)table->CmdDrawIndexed; + if (!strcmp(name, "CmdDrawIndirect")) + return (void *)table->CmdDrawIndirect; + if (!strcmp(name, "CmdDrawIndexedIndirect")) + return (void *)table->CmdDrawIndexedIndirect; + if (!strcmp(name, "CmdDispatch")) + return (void *)table->CmdDispatch; + if (!strcmp(name, "CmdDispatchIndirect")) + return (void *)table->CmdDispatchIndirect; + if (!strcmp(name, "CmdCopyBuffer")) + return (void *)table->CmdCopyBuffer; + if (!strcmp(name, "CmdCopyImage")) + return (void *)table->CmdCopyImage; + if (!strcmp(name, "CmdBlitImage")) + return (void *)table->CmdBlitImage; + if (!strcmp(name, "CmdCopyBufferToImage")) + return (void *)table->CmdCopyBufferToImage; + if (!strcmp(name, "CmdCopyImageToBuffer")) + return (void *)table->CmdCopyImageToBuffer; + if (!strcmp(name, "CmdUpdateBuffer")) + return (void *)table->CmdUpdateBuffer; + if (!strcmp(name, "CmdFillBuffer")) + return (void *)table->CmdFillBuffer; + if (!strcmp(name, "CmdClearColorImage")) + return (void *)table->CmdClearColorImage; + if (!strcmp(name, "CmdClearDepthStencilImage")) + return (void *)table->CmdClearDepthStencilImage; + if (!strcmp(name, "CmdClearAttachments")) + return (void *)table->CmdClearAttachments; + if (!strcmp(name, "CmdResolveImage")) + return (void *)table->CmdResolveImage; + if (!strcmp(name, "CmdSetEvent")) + return (void *)table->CmdSetEvent; + if (!strcmp(name, "CmdResetEvent")) + return (void *)table->CmdResetEvent; + if (!strcmp(name, "CmdWaitEvents")) + return (void *)table->CmdWaitEvents; + if (!strcmp(name, "CmdPipelineBarrier")) + return (void *)table->CmdPipelineBarrier; + if (!strcmp(name, "CmdBeginQuery")) + return (void *)table->CmdBeginQuery; + if (!strcmp(name, "CmdEndQuery")) + return (void *)table->CmdEndQuery; + if (!strcmp(name, "CmdResetQueryPool")) + return (void *)table->CmdResetQueryPool; + if (!strcmp(name, "CmdWriteTimestamp")) + return (void *)table->CmdWriteTimestamp; + if (!strcmp(name, "CmdCopyQueryPoolResults")) + return (void *)table->CmdCopyQueryPoolResults; + if (!strcmp(name, "CmdPushConstants")) + return (void *)table->CmdPushConstants; + if (!strcmp(name, "CmdBeginRenderPass")) + return (void *)table->CmdBeginRenderPass; + if (!strcmp(name, "CmdNextSubpass")) + return (void *)table->CmdNextSubpass; + if (!strcmp(name, "CmdEndRenderPass")) + return (void *)table->CmdEndRenderPass; + if (!strcmp(name, "CmdExecuteCommands")) + return (void *)table->CmdExecuteCommands; + + return NULL; +} + +static inline void +loader_init_instance_core_dispatch_table(VkLayerInstanceDispatchTable *table, + PFN_vkGetInstanceProcAddr gpa, + VkInstance inst) { + table->GetInstanceProcAddr = + (PFN_vkGetInstanceProcAddr)gpa(inst, "vkGetInstanceProcAddr"); + table->DestroyInstance = + (PFN_vkDestroyInstance)gpa(inst, "vkDestroyInstance"); + table->EnumeratePhysicalDevices = + (PFN_vkEnumeratePhysicalDevices)gpa(inst, "vkEnumeratePhysicalDevices"); + table->GetPhysicalDeviceFeatures = (PFN_vkGetPhysicalDeviceFeatures)gpa( + inst, "vkGetPhysicalDeviceFeatures"); + table->GetPhysicalDeviceImageFormatProperties = + (PFN_vkGetPhysicalDeviceImageFormatProperties)gpa( + inst, "vkGetPhysicalDeviceImageFormatProperties"); + table->GetPhysicalDeviceFormatProperties = + (PFN_vkGetPhysicalDeviceFormatProperties)gpa( + inst, "vkGetPhysicalDeviceFormatProperties"); + table->GetPhysicalDeviceSparseImageFormatProperties = + (PFN_vkGetPhysicalDeviceSparseImageFormatProperties)gpa( + inst, "vkGetPhysicalDeviceSparseImageFormatProperties"); + table->GetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)gpa( + inst, "vkGetPhysicalDeviceProperties"); + table->GetPhysicalDeviceQueueFamilyProperties = + (PFN_vkGetPhysicalDeviceQueueFamilyProperties)gpa( + inst, "vkGetPhysicalDeviceQueueFamilyProperties"); + table->GetPhysicalDeviceMemoryProperties = + (PFN_vkGetPhysicalDeviceMemoryProperties)gpa( + inst, "vkGetPhysicalDeviceMemoryProperties"); + table->EnumerateDeviceExtensionProperties = + (PFN_vkEnumerateDeviceExtensionProperties)gpa( + inst, "vkEnumerateDeviceExtensionProperties"); + table->EnumerateDeviceLayerProperties = + (PFN_vkEnumerateDeviceLayerProperties)gpa( + inst, "vkEnumerateDeviceLayerProperties"); +} + +static inline void loader_init_instance_extension_dispatch_table( + VkLayerInstanceDispatchTable *table, PFN_vkGetInstanceProcAddr gpa, + VkInstance inst) { + table->DestroySurfaceKHR = + (PFN_vkDestroySurfaceKHR)gpa(inst, "vkDestroySurfaceKHR"); + table->CreateDebugReportCallbackEXT = + (PFN_vkCreateDebugReportCallbackEXT)gpa( + inst, "vkCreateDebugReportCallbackEXT"); + table->DestroyDebugReportCallbackEXT = + (PFN_vkDestroyDebugReportCallbackEXT)gpa( + inst, "vkDestroyDebugReportCallbackEXT"); + table->DebugReportMessageEXT = + (PFN_vkDebugReportMessageEXT)gpa(inst, "vkDebugReportMessageEXT"); + table->GetPhysicalDeviceSurfaceSupportKHR = + (PFN_vkGetPhysicalDeviceSurfaceSupportKHR)gpa( + inst, "vkGetPhysicalDeviceSurfaceSupportKHR"); + table->GetPhysicalDeviceSurfaceCapabilitiesKHR = + (PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)gpa( + inst, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); + table->GetPhysicalDeviceSurfaceFormatsKHR = + (PFN_vkGetPhysicalDeviceSurfaceFormatsKHR)gpa( + inst, "vkGetPhysicalDeviceSurfaceFormatsKHR"); + table->GetPhysicalDeviceSurfacePresentModesKHR = + (PFN_vkGetPhysicalDeviceSurfacePresentModesKHR)gpa( + inst, "vkGetPhysicalDeviceSurfacePresentModesKHR"); +#ifdef VK_USE_PLATFORM_MIR_KHR + table->CreateMirSurfaceKHR = + (PFN_vkCreateMirSurfaceKHR)gpa(inst, "vkCreateMirSurfaceKHR"); + table->GetPhysicalDeviceMirPresentationSupportKHR = + (PFN_vkGetPhysicalDeviceMirPresentationSupportKHR)gpa( + inst, "vkGetPhysicalDeviceMirPresentationSupportKHR"); +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + table->CreateWaylandSurfaceKHR = + (PFN_vkCreateWaylandSurfaceKHR)gpa(inst, "vkCreateWaylandSurfaceKHR"); + table->GetPhysicalDeviceWaylandPresentationSupportKHR = + (PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR)gpa( + inst, "vkGetPhysicalDeviceWaylandPresentationSupportKHR"); +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + table->CreateWin32SurfaceKHR = + (PFN_vkCreateWin32SurfaceKHR)gpa(inst, "vkCreateWin32SurfaceKHR"); + table->GetPhysicalDeviceWin32PresentationSupportKHR = + (PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR)gpa( + inst, "vkGetPhysicalDeviceWin32PresentationSupportKHR"); +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + table->CreateXcbSurfaceKHR = + (PFN_vkCreateXcbSurfaceKHR)gpa(inst, "vkCreateXcbSurfaceKHR"); + table->GetPhysicalDeviceXcbPresentationSupportKHR = + (PFN_vkGetPhysicalDeviceXcbPresentationSupportKHR)gpa( + inst, "vkGetPhysicalDeviceXcbPresentationSupportKHR"); +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + table->CreateXlibSurfaceKHR = + (PFN_vkCreateXlibSurfaceKHR)gpa(inst, "vkCreateXlibSurfaceKHR"); + table->GetPhysicalDeviceXlibPresentationSupportKHR = + (PFN_vkGetPhysicalDeviceXlibPresentationSupportKHR)gpa( + inst, "vkGetPhysicalDeviceXlibPresentationSupportKHR"); +#endif +} + +static inline void * +loader_lookup_instance_dispatch_table(const VkLayerInstanceDispatchTable *table, + const char *name) { + if (!name || name[0] != 'v' || name[1] != 'k') + return NULL; + + name += 2; + if (!strcmp(name, "DestroyInstance")) + return (void *)table->DestroyInstance; + if (!strcmp(name, "EnumeratePhysicalDevices")) + return (void *)table->EnumeratePhysicalDevices; + if (!strcmp(name, "GetPhysicalDeviceFeatures")) + return (void *)table->GetPhysicalDeviceFeatures; + if (!strcmp(name, "GetPhysicalDeviceImageFormatProperties")) + return (void *)table->GetPhysicalDeviceImageFormatProperties; + if (!strcmp(name, "GetPhysicalDeviceFormatProperties")) + return (void *)table->GetPhysicalDeviceFormatProperties; + if (!strcmp(name, "GetPhysicalDeviceSparseImageFormatProperties")) + return (void *)table->GetPhysicalDeviceSparseImageFormatProperties; + if (!strcmp(name, "GetPhysicalDeviceProperties")) + return (void *)table->GetPhysicalDeviceProperties; + if (!strcmp(name, "GetPhysicalDeviceQueueFamilyProperties")) + return (void *)table->GetPhysicalDeviceQueueFamilyProperties; + if (!strcmp(name, "GetPhysicalDeviceMemoryProperties")) + return (void *)table->GetPhysicalDeviceMemoryProperties; + if (!strcmp(name, "GetInstanceProcAddr")) + return (void *)table->GetInstanceProcAddr; + if (!strcmp(name, "EnumerateDeviceExtensionProperties")) + return (void *)table->EnumerateDeviceExtensionProperties; + if (!strcmp(name, "EnumerateDeviceLayerProperties")) + return (void *)table->EnumerateDeviceLayerProperties; + if (!strcmp(name, "DestroySurfaceKHR")) + return (void *)table->DestroySurfaceKHR; + if (!strcmp(name, "GetPhysicalDeviceSurfaceSupportKHR")) + return (void *)table->GetPhysicalDeviceSurfaceSupportKHR; + if (!strcmp(name, "GetPhysicalDeviceSurfaceCapabilitiesKHR")) + return (void *)table->GetPhysicalDeviceSurfaceCapabilitiesKHR; + if (!strcmp(name, "GetPhysicalDeviceSurfaceFormatsKHR")) + return (void *)table->GetPhysicalDeviceSurfaceFormatsKHR; + if (!strcmp(name, "GetPhysicalDeviceSurfacePresentModesKHR")) + return (void *)table->GetPhysicalDeviceSurfacePresentModesKHR; +#ifdef VK_USE_PLATFORM_MIR_KHR + if (!strcmp(name, "CreateMirSurfaceKHR")) + return (void *)table->CreateMirSurfaceKHR; + if (!strcmp(name, "GetPhysicalDeviceMirPresentationSupportKHR")) + return (void *)table->GetPhysicalDeviceMirPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + if (!strcmp(name, "CreateWaylandSurfaceKHR")) + return (void *)table->CreateWaylandSurfaceKHR; + if (!strcmp(name, "GetPhysicalDeviceWaylandPresentationSupportKHR")) + return (void *)table->GetPhysicalDeviceWaylandPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (!strcmp(name, "CreateWin32SurfaceKHR")) + return (void *)table->CreateWin32SurfaceKHR; + if (!strcmp(name, "GetPhysicalDeviceWin32PresentationSupportKHR")) + return (void *)table->GetPhysicalDeviceWin32PresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + if (!strcmp(name, "CreateXcbSurfaceKHR")) + return (void *)table->CreateXcbSurfaceKHR; + if (!strcmp(name, "GetPhysicalDeviceXcbPresentationSupportKHR")) + return (void *)table->GetPhysicalDeviceXcbPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + if (!strcmp(name, "CreateXlibSurfaceKHR")) + return (void *)table->CreateXlibSurfaceKHR; + if (!strcmp(name, "GetPhysicalDeviceXlibPresentationSupportKHR")) + return (void *)table->GetPhysicalDeviceXlibPresentationSupportKHR; +#endif + if (!strcmp(name, "CreateDebugReportCallbackEXT")) + return (void *)table->CreateDebugReportCallbackEXT; + if (!strcmp(name, "DestroyDebugReportCallbackEXT")) + return (void *)table->DestroyDebugReportCallbackEXT; + if (!strcmp(name, "DebugReportMessageEXT")) + return (void *)table->DebugReportMessageEXT; + + return NULL; +} diff --git a/third_party/vulkan/loader/trampoline.c b/third_party/vulkan/loader/trampoline.c new file mode 100644 index 000000000..dfd2c0001 --- /dev/null +++ b/third_party/vulkan/loader/trampoline.c @@ -0,0 +1,1731 @@ +/* + * + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * Copyright (C) 2015 Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Courtney Goeltzenleuchter + * Author: Jon Ashburn + * Author: Tony Barbour + * Author: Chia-I Wu + */ +#define _GNU_SOURCE +#include +#include + +#include "vk_loader_platform.h" +#include "loader.h" +#include "debug_report.h" +#include "wsi.h" + +/* Trampoline entrypoints */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) { + struct loader_instance *ptr_instance = NULL; + VkInstance created_instance = VK_NULL_HANDLE; + VkResult res = VK_ERROR_INITIALIZATION_FAILED; + VkDebugReportCallbackEXT instance_callback = VK_NULL_HANDLE; + void *pNext = (void *)pCreateInfo->pNext; + + loader_platform_thread_once(&once_init, loader_initialize); + +#if 0 + if (pAllocator) { + ptr_instance = (struct loader_instance *) pAllocator->pfnAllocation( + pAllocator->pUserData, + sizeof(struct loader_instance), + sizeof(int *), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + } else { +#endif + ptr_instance = + (struct loader_instance *)malloc(sizeof(struct loader_instance)); + //} + if (ptr_instance == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + tls_instance = ptr_instance; + loader_platform_thread_lock_mutex(&loader_lock); + memset(ptr_instance, 0, sizeof(struct loader_instance)); +#if 0 + if (pAllocator) { + ptr_instance->alloc_callbacks = *pAllocator; + } +#endif + + /* + * Look for a debug report create info structure + * and setup a callback if found. + */ + while (pNext) { + if (((VkInstanceCreateInfo *)pNext)->sType == + VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT) { + instance_callback = (VkDebugReportCallbackEXT)ptr_instance; + if (util_CreateDebugReportCallback(ptr_instance, pNext, NULL, + instance_callback)) { + loader_heap_free(ptr_instance, ptr_instance); + loader_platform_thread_unlock_mutex(&loader_lock); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + pNext = (void *)((VkInstanceCreateInfo *)pNext)->pNext; + } + + /* Due to implicit layers need to get layer list even if + * enabledLayerCount == 0 and VK_INSTANCE_LAYERS is unset. For now always + * get layer list (both instance and device) via loader_layer_scan(). */ + memset(&ptr_instance->instance_layer_list, 0, + sizeof(ptr_instance->instance_layer_list)); + memset(&ptr_instance->device_layer_list, 0, + sizeof(ptr_instance->device_layer_list)); + loader_layer_scan(ptr_instance, &ptr_instance->instance_layer_list, + &ptr_instance->device_layer_list); + + /* validate the app requested layers to be enabled */ + if (pCreateInfo->enabledLayerCount > 0) { + res = + loader_validate_layers(ptr_instance, pCreateInfo->enabledLayerCount, + pCreateInfo->ppEnabledLayerNames, + &ptr_instance->instance_layer_list); + if (res != VK_SUCCESS) { + util_DestroyDebugReportCallback(ptr_instance, instance_callback, + NULL); + loader_heap_free(ptr_instance, ptr_instance); + loader_platform_thread_unlock_mutex(&loader_lock); + return res; + } + } + + /* convert any meta layers to the actual layers makes a copy of layer name*/ + uint32_t saved_layer_count = pCreateInfo->enabledLayerCount; + char **saved_layer_names; + char **saved_layer_ptr; + saved_layer_names = + loader_stack_alloc(sizeof(char *) * pCreateInfo->enabledLayerCount); + for (uint32_t i = 0; i < saved_layer_count; i++) { + saved_layer_names[i] = (char *)pCreateInfo->ppEnabledLayerNames[i]; + } + saved_layer_ptr = (char **)pCreateInfo->ppEnabledLayerNames; + + loader_expand_layer_names( + ptr_instance, std_validation_str, + sizeof(std_validation_names) / sizeof(std_validation_names[0]), + std_validation_names, (uint32_t *)&pCreateInfo->enabledLayerCount, + (char ***)&pCreateInfo->ppEnabledLayerNames); + + /* Scan/discover all ICD libraries */ + memset(&ptr_instance->icd_libs, 0, sizeof(ptr_instance->icd_libs)); + loader_icd_scan(ptr_instance, &ptr_instance->icd_libs); + + /* get extensions from all ICD's, merge so no duplicates, then validate */ + loader_get_icd_loader_instance_extensions( + ptr_instance, &ptr_instance->icd_libs, &ptr_instance->ext_list); + res = loader_validate_instance_extensions( + ptr_instance, &ptr_instance->ext_list, + &ptr_instance->instance_layer_list, pCreateInfo); + if (res != VK_SUCCESS) { + loader_unexpand_inst_layer_names(ptr_instance, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_delete_layer_properties(ptr_instance, + &ptr_instance->device_layer_list); + loader_delete_layer_properties(ptr_instance, + &ptr_instance->instance_layer_list); + loader_scanned_icd_clear(ptr_instance, &ptr_instance->icd_libs); + loader_destroy_generic_list( + ptr_instance, + (struct loader_generic_list *)&ptr_instance->ext_list); + util_DestroyDebugReportCallback(ptr_instance, instance_callback, NULL); + loader_platform_thread_unlock_mutex(&loader_lock); + loader_heap_free(ptr_instance, ptr_instance); + return res; + } + + ptr_instance->disp = + loader_heap_alloc(ptr_instance, sizeof(VkLayerInstanceDispatchTable), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (ptr_instance->disp == NULL) { + loader_unexpand_inst_layer_names(ptr_instance, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_delete_layer_properties(ptr_instance, + &ptr_instance->device_layer_list); + loader_delete_layer_properties(ptr_instance, + &ptr_instance->instance_layer_list); + loader_scanned_icd_clear(ptr_instance, &ptr_instance->icd_libs); + loader_destroy_generic_list( + ptr_instance, + (struct loader_generic_list *)&ptr_instance->ext_list); + util_DestroyDebugReportCallback(ptr_instance, instance_callback, NULL); + loader_platform_thread_unlock_mutex(&loader_lock); + loader_heap_free(ptr_instance, ptr_instance); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + memcpy(ptr_instance->disp, &instance_disp, sizeof(instance_disp)); + ptr_instance->next = loader.instances; + loader.instances = ptr_instance; + + /* activate any layers on instance chain */ + res = loader_enable_instance_layers(ptr_instance, pCreateInfo, + &ptr_instance->instance_layer_list); + if (res != VK_SUCCESS) { + loader_unexpand_inst_layer_names(ptr_instance, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_delete_layer_properties(ptr_instance, + &ptr_instance->device_layer_list); + loader_delete_layer_properties(ptr_instance, + &ptr_instance->instance_layer_list); + loader_scanned_icd_clear(ptr_instance, &ptr_instance->icd_libs); + loader_destroy_generic_list( + ptr_instance, + (struct loader_generic_list *)&ptr_instance->ext_list); + loader.instances = ptr_instance->next; + util_DestroyDebugReportCallback(ptr_instance, instance_callback, NULL); + loader_platform_thread_unlock_mutex(&loader_lock); + loader_heap_free(ptr_instance, ptr_instance->disp); + loader_heap_free(ptr_instance, ptr_instance); + return res; + } + + created_instance = (VkInstance)ptr_instance; + res = loader_create_instance_chain(pCreateInfo, pAllocator, ptr_instance, + &created_instance); + + if (res == VK_SUCCESS) { + wsi_create_instance(ptr_instance, pCreateInfo); + debug_report_create_instance(ptr_instance, pCreateInfo); + + *pInstance = created_instance; + + /* + * Finally have the layers in place and everyone has seen + * the CreateInstance command go by. This allows the layer's + * GetInstanceProcAddr functions to return valid extension functions + * if enabled. + */ + loader_activate_instance_layer_extensions(ptr_instance, *pInstance); + } else { + // TODO: cleanup here. + } + + /* Remove temporary debug_report callback */ + util_DestroyDebugReportCallback(ptr_instance, instance_callback, NULL); + loader_unexpand_inst_layer_names(ptr_instance, saved_layer_count, + saved_layer_names, saved_layer_ptr, + pCreateInfo); + loader_platform_thread_unlock_mutex(&loader_lock); + return res; +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyInstance(VkInstance instance, + const VkAllocationCallbacks *pAllocator) { + const VkLayerInstanceDispatchTable *disp; + struct loader_instance *ptr_instance = NULL; + disp = loader_get_instance_dispatch(instance); + + loader_platform_thread_lock_mutex(&loader_lock); + + /* TODO: Do we need a temporary callback here to catch cleanup issues? */ + + ptr_instance = loader_get_instance(instance); + disp->DestroyInstance(instance, pAllocator); + + loader_deactivate_instance_layers(ptr_instance); + loader_heap_free(ptr_instance, ptr_instance->disp); + loader_heap_free(ptr_instance, ptr_instance); + loader_platform_thread_unlock_mutex(&loader_lock); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkEnumeratePhysicalDevices(VkInstance instance, uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) { + const VkLayerInstanceDispatchTable *disp; + VkResult res; + disp = loader_get_instance_dispatch(instance); + + loader_platform_thread_lock_mutex(&loader_lock); + res = disp->EnumeratePhysicalDevices(instance, pPhysicalDeviceCount, + pPhysicalDevices); + loader_platform_thread_unlock_mutex(&loader_lock); + return res; +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetPhysicalDeviceFeatures(VkPhysicalDevice gpu, + VkPhysicalDeviceFeatures *pFeatures) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(gpu); + disp->GetPhysicalDeviceFeatures(gpu, pFeatures); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetPhysicalDeviceFormatProperties(VkPhysicalDevice gpu, VkFormat format, + VkFormatProperties *pFormatInfo) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(gpu); + disp->GetPhysicalDeviceFormatProperties(gpu, format, pFormatInfo); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, + VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, + VkImageFormatProperties *pImageFormatProperties) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(physicalDevice); + return disp->GetPhysicalDeviceImageFormatProperties( + physicalDevice, format, type, tiling, usage, flags, + pImageFormatProperties); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetPhysicalDeviceProperties(VkPhysicalDevice gpu, + VkPhysicalDeviceProperties *pProperties) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(gpu); + disp->GetPhysicalDeviceProperties(gpu, pProperties); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice gpu, uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties *pQueueProperties) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(gpu); + disp->GetPhysicalDeviceQueueFamilyProperties(gpu, pQueueFamilyPropertyCount, + pQueueProperties); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties( + VkPhysicalDevice gpu, VkPhysicalDeviceMemoryProperties *pMemoryProperties) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(gpu); + disp->GetPhysicalDeviceMemoryProperties(gpu, pMemoryProperties); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) { + VkResult res; + + loader_platform_thread_lock_mutex(&loader_lock); + + res = loader_CreateDevice(gpu, pCreateInfo, pAllocator, pDevice); + + loader_platform_thread_unlock_mutex(&loader_lock); + return res; +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + struct loader_device *dev; + + loader_platform_thread_lock_mutex(&loader_lock); + + struct loader_icd *icd = loader_get_icd_and_device(device, &dev); + const struct loader_instance *inst = icd->this_instance; + disp = loader_get_dispatch(device); + + disp->DestroyDevice(device, pAllocator); + dev->device = NULL; + loader_remove_logical_device(inst, icd, dev); + + loader_platform_thread_unlock_mutex(&loader_lock); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkEnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) { + VkResult res; + + loader_platform_thread_lock_mutex(&loader_lock); + + /* If pLayerName == NULL, then querying ICD extensions, pass this call + down the instance chain which will terminate in the ICD. This allows + layers to filter the extensions coming back up the chain. + If pLayerName != NULL then get layer extensions from manifest file. */ + if (pLayerName == NULL || strlen(pLayerName) == 0) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(physicalDevice); + res = disp->EnumerateDeviceExtensionProperties( + physicalDevice, NULL, pPropertyCount, pProperties); + } else { + res = loader_EnumerateDeviceExtensionProperties( + physicalDevice, pLayerName, pPropertyCount, pProperties); + } + + loader_platform_thread_unlock_mutex(&loader_lock); + return res; +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkEnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkLayerProperties *pProperties) { + VkResult res; + + loader_platform_thread_lock_mutex(&loader_lock); + + /* Don't dispatch this call down the instance chain, want all device layers + enumerated and instance chain may not contain all device layers */ + res = loader_EnumerateDeviceLayerProperties(physicalDevice, pPropertyCount, + pProperties); + loader_platform_thread_unlock_mutex(&loader_lock); + return res; +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetDeviceQueue(VkDevice device, uint32_t queueNodeIndex, uint32_t queueIndex, + VkQueue *pQueue) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->GetDeviceQueue(device, queueNodeIndex, queueIndex, pQueue); + loader_set_dispatch(*pQueue, disp); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, + VkFence fence) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(queue); + + return disp->QueueSubmit(queue, submitCount, pSubmits, fence); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle(VkQueue queue) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(queue); + + return disp->QueueWaitIdle(queue); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(VkDevice device) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->DeviceWaitIdle(device); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkAllocateMemory(VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMemory) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->AllocateMemory(device, pAllocateInfo, pAllocator, pMemory); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkFreeMemory(VkDevice device, VkDeviceMemory mem, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->FreeMemory(device, mem, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkMapMemory(VkDevice device, VkDeviceMemory mem, VkDeviceSize offset, + VkDeviceSize size, VkFlags flags, void **ppData) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->MapMemory(device, mem, offset, size, flags, ppData); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkUnmapMemory(VkDevice device, VkDeviceMemory mem) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->UnmapMemory(device, mem); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkFlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->FlushMappedMemoryRanges(device, memoryRangeCount, + pMemoryRanges); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkInvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->InvalidateMappedMemoryRanges(device, memoryRangeCount, + pMemoryRanges); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, + VkDeviceSize *pCommittedMemoryInBytes) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->GetDeviceMemoryCommitment(device, memory, pCommittedMemoryInBytes); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkBindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory mem, + VkDeviceSize offset) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->BindBufferMemory(device, buffer, mem, offset); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkBindImageMemory(VkDevice device, VkImage image, VkDeviceMemory mem, + VkDeviceSize offset) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->BindImageMemory(device, image, mem, offset); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetBufferMemoryRequirements(VkDevice device, VkBuffer buffer, + VkMemoryRequirements *pMemoryRequirements) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->GetBufferMemoryRequirements(device, buffer, pMemoryRequirements); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetImageMemoryRequirements(VkDevice device, VkImage image, + VkMemoryRequirements *pMemoryRequirements) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->GetImageMemoryRequirements(device, image, pMemoryRequirements); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements( + VkDevice device, VkImage image, uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements *pSparseMemoryRequirements) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->GetImageSparseMemoryRequirements(device, image, + pSparseMemoryRequirementCount, + pSparseMemoryRequirements); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, + VkSampleCountFlagBits samples, VkImageUsageFlags usage, + VkImageTiling tiling, uint32_t *pPropertyCount, + VkSparseImageFormatProperties *pProperties) { + const VkLayerInstanceDispatchTable *disp; + + disp = loader_get_instance_dispatch(physicalDevice); + + disp->GetPhysicalDeviceSparseImageFormatProperties( + physicalDevice, format, type, samples, usage, tiling, pPropertyCount, + pProperties); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkQueueBindSparse(VkQueue queue, uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, VkFence fence) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(queue); + + return disp->QueueBindSparse(queue, bindInfoCount, pBindInfo, fence); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateFence(VkDevice device, const VkFenceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkFence *pFence) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateFence(device, pCreateInfo, pAllocator, pFence); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyFence(VkDevice device, VkFence fence, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyFence(device, fence, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkResetFences(VkDevice device, uint32_t fenceCount, const VkFence *pFences) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->ResetFences(device, fenceCount, pFences); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetFenceStatus(VkDevice device, VkFence fence) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->GetFenceStatus(device, fence); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkWaitForFences(VkDevice device, uint32_t fenceCount, const VkFence *pFences, + VkBool32 waitAll, uint64_t timeout) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->WaitForFences(device, fenceCount, pFences, waitAll, timeout); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateSemaphore(VkDevice device, const VkSemaphoreCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSemaphore *pSemaphore) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateSemaphore(device, pCreateInfo, pAllocator, pSemaphore); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroySemaphore(VkDevice device, VkSemaphore semaphore, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroySemaphore(device, semaphore, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateEvent(VkDevice device, const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkEvent *pEvent) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateEvent(device, pCreateInfo, pAllocator, pEvent); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyEvent(VkDevice device, VkEvent event, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyEvent(device, event, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetEventStatus(VkDevice device, VkEvent event) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->GetEventStatus(device, event); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkSetEvent(VkDevice device, VkEvent event) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->SetEvent(device, event); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkResetEvent(VkDevice device, VkEvent event) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->ResetEvent(device, event); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateQueryPool(device, pCreateInfo, pAllocator, pQueryPool); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyQueryPool(VkDevice device, VkQueryPool queryPool, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyQueryPool(device, queryPool, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetQueryPoolResults(VkDevice device, VkQueryPool queryPool, + uint32_t firstQuery, uint32_t queryCount, size_t dataSize, + void *pData, VkDeviceSize stride, + VkQueryResultFlags flags) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->GetQueryPoolResults(device, queryPool, firstQuery, queryCount, + dataSize, pData, stride, flags); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateBuffer(device, pCreateInfo, pAllocator, pBuffer); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyBuffer(VkDevice device, VkBuffer buffer, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyBuffer(device, buffer, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateBufferView(VkDevice device, const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateBufferView(device, pCreateInfo, pAllocator, pView); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyBufferView(VkDevice device, VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyBufferView(device, bufferView, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkImage *pImage) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateImage(device, pCreateInfo, pAllocator, pImage); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyImage(VkDevice device, VkImage image, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyImage(device, image, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetImageSubresourceLayout(VkDevice device, VkImage image, + const VkImageSubresource *pSubresource, + VkSubresourceLayout *pLayout) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->GetImageSubresourceLayout(device, image, pSubresource, pLayout); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateImageView(VkDevice device, const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkImageView *pView) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateImageView(device, pCreateInfo, pAllocator, pView); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyImageView(VkDevice device, VkImageView imageView, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyImageView(device, imageView, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateShaderModule(VkDevice device, + const VkShaderModuleCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkShaderModule *pShader) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateShaderModule(device, pCreateInfo, pAllocator, pShader); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyShaderModule(VkDevice device, VkShaderModule shaderModule, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyShaderModule(device, shaderModule, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreatePipelineCache(VkDevice device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreatePipelineCache(device, pCreateInfo, pAllocator, + pPipelineCache); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyPipelineCache(VkDevice device, VkPipelineCache pipelineCache, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyPipelineCache(device, pipelineCache, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetPipelineCacheData(VkDevice device, VkPipelineCache pipelineCache, + size_t *pDataSize, void *pData) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->GetPipelineCacheData(device, pipelineCache, pDataSize, pData); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkMergePipelineCaches(VkDevice device, VkPipelineCache dstCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->MergePipelineCaches(device, dstCache, srcCacheCount, + pSrcCaches); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateGraphicsPipelines(device, pipelineCache, createInfoCount, + pCreateInfos, pAllocator, pPipelines); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateComputePipelines(VkDevice device, VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateComputePipelines(device, pipelineCache, createInfoCount, + pCreateInfos, pAllocator, pPipelines); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyPipeline(VkDevice device, VkPipeline pipeline, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyPipeline(device, pipeline, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreatePipelineLayout(VkDevice device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreatePipelineLayout(device, pCreateInfo, pAllocator, + pPipelineLayout); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyPipelineLayout(VkDevice device, VkPipelineLayout pipelineLayout, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyPipelineLayout(device, pipelineLayout, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateSampler(VkDevice device, const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkSampler *pSampler) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateSampler(device, pCreateInfo, pAllocator, pSampler); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroySampler(VkDevice device, VkSampler sampler, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroySampler(device, sampler, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateDescriptorSetLayout(VkDevice device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateDescriptorSetLayout(device, pCreateInfo, pAllocator, + pSetLayout); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyDescriptorSetLayout(VkDevice device, + VkDescriptorSetLayout descriptorSetLayout, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyDescriptorSetLayout(device, descriptorSetLayout, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateDescriptorPool(VkDevice device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateDescriptorPool(device, pCreateInfo, pAllocator, + pDescriptorPool); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyDescriptorPool(VkDevice device, VkDescriptorPool descriptorPool, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyDescriptorPool(device, descriptorPool, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkResetDescriptorPool(VkDevice device, VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->ResetDescriptorPool(device, descriptorPool, flags); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkAllocateDescriptorSets(VkDevice device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->AllocateDescriptorSets(device, pAllocateInfo, pDescriptorSets); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkFreeDescriptorSets(VkDevice device, VkDescriptorPool descriptorPool, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->FreeDescriptorSets(device, descriptorPool, descriptorSetCount, + pDescriptorSets); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkUpdateDescriptorSets(VkDevice device, uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->UpdateDescriptorSets(device, descriptorWriteCount, pDescriptorWrites, + descriptorCopyCount, pDescriptorCopies); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateFramebuffer(VkDevice device, const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateFramebuffer(device, pCreateInfo, pAllocator, + pFramebuffer); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyFramebuffer(VkDevice device, VkFramebuffer framebuffer, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyFramebuffer(device, framebuffer, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateRenderPass(VkDevice device, const VkRenderPassCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateRenderPass(device, pCreateInfo, pAllocator, pRenderPass); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyRenderPass(VkDevice device, VkRenderPass renderPass, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyRenderPass(device, renderPass, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkGetRenderAreaGranularity(VkDevice device, VkRenderPass renderPass, + VkExtent2D *pGranularity) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->GetRenderAreaGranularity(device, renderPass, pGranularity); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateCommandPool(VkDevice device, const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkCommandPool *pCommandPool) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->CreateCommandPool(device, pCreateInfo, pAllocator, + pCommandPool); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroyCommandPool(VkDevice device, VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->DestroyCommandPool(device, commandPool, pAllocator); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkResetCommandPool(VkDevice device, VkCommandPool commandPool, + VkCommandPoolResetFlags flags) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + return disp->ResetCommandPool(device, commandPool, flags); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkAllocateCommandBuffers(VkDevice device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) { + const VkLayerDispatchTable *disp; + VkResult res; + + disp = loader_get_dispatch(device); + + res = disp->AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers); + if (res == VK_SUCCESS) { + for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) { + if (pCommandBuffers[i]) { + loader_init_dispatch(pCommandBuffers[i], disp); + } + } + } + + return res; +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkFreeCommandBuffers(VkDevice device, VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(device); + + disp->FreeCommandBuffers(device, commandPool, commandBufferCount, + pCommandBuffers); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkBeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *pBeginInfo) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + return disp->BeginCommandBuffer(commandBuffer, pBeginInfo); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkEndCommandBuffer(VkCommandBuffer commandBuffer) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + return disp->EndCommandBuffer(commandBuffer); +} + +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + return disp->ResetCommandBuffer(commandBuffer, flags); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdBindPipeline(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, + uint32_t viewportCount, const VkViewport *pViewports) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetViewport(commandBuffer, firstViewport, viewportCount, + pViewports); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, + uint32_t scissorCount, const VkRect2D *pScissors) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetScissor(commandBuffer, firstScissor, scissorCount, pScissors); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetLineWidth(commandBuffer, lineWidth); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, + float depthBiasClamp, float depthBiasSlopeFactor) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetDepthBias(commandBuffer, depthBiasConstantFactor, + depthBiasClamp, depthBiasSlopeFactor); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetBlendConstants(VkCommandBuffer commandBuffer, + const float blendConstants[4]) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetBlendConstants(commandBuffer, blendConstants); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, + float maxDepthBounds) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetDepthBounds(commandBuffer, minDepthBounds, maxDepthBounds); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetStencilCompareMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, uint32_t compareMask) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetStencilCompareMask(commandBuffer, faceMask, compareMask); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetStencilWriteMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, uint32_t writeMask) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetStencilWriteMask(commandBuffer, faceMask, writeMask); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetStencilReference(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, uint32_t reference) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetStencilReference(commandBuffer, faceMask, reference); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( + VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdBindDescriptorSets(commandBuffer, pipelineBindPoint, layout, + firstSet, descriptorSetCount, pDescriptorSets, + dynamicOffsetCount, pDynamicOffsets); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, + VkDeviceSize offset, VkIndexType indexType) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdBindIndexBuffer(commandBuffer, buffer, offset, indexType); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdBindVertexBuffers(commandBuffer, firstBinding, bindingCount, + pBuffers, pOffsets); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, + uint32_t instanceCount, uint32_t firstVertex, + uint32_t firstInstance) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdDraw(commandBuffer, vertexCount, instanceCount, firstVertex, + firstInstance); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, + uint32_t instanceCount, uint32_t firstIndex, + int32_t vertexOffset, uint32_t firstInstance) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdDrawIndexed(commandBuffer, indexCount, instanceCount, firstIndex, + vertexOffset, firstInstance); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, + VkDeviceSize offset, uint32_t drawCount, uint32_t stride) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, + VkDeviceSize offset, uint32_t drawCount, + uint32_t stride) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, + stride); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, + uint32_t z) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdDispatch(commandBuffer, x, y, z); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, + VkDeviceSize offset) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdDispatchIndirect(commandBuffer, buffer, offset); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdCopyBuffer(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, + VkBuffer dstBuffer, uint32_t regionCount, + const VkBufferCopy *pRegions) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, + pRegions); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdCopyImage(VkCommandBuffer commandBuffer, VkImage srcImage, + VkImageLayout srcImageLayout, VkImage dstImage, + VkImageLayout dstImageLayout, uint32_t regionCount, + const VkImageCopy *pRegions) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdCopyImage(commandBuffer, srcImage, srcImageLayout, dstImage, + dstImageLayout, regionCount, pRegions); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdBlitImage(VkCommandBuffer commandBuffer, VkImage srcImage, + VkImageLayout srcImageLayout, VkImage dstImage, + VkImageLayout dstImageLayout, uint32_t regionCount, + const VkImageBlit *pRegions, VkFilter filter) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdBlitImage(commandBuffer, srcImage, srcImageLayout, dstImage, + dstImageLayout, regionCount, pRegions, filter); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, + VkImage dstImage, VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdCopyBufferToImage(commandBuffer, srcBuffer, dstImage, + dstImageLayout, regionCount, pRegions); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdCopyImageToBuffer(VkCommandBuffer commandBuffer, VkImage srcImage, + VkImageLayout srcImageLayout, VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdCopyImageToBuffer(commandBuffer, srcImage, srcImageLayout, + dstBuffer, regionCount, pRegions); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, + VkDeviceSize dstOffset, VkDeviceSize dataSize, + const uint32_t *pData) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdUpdateBuffer(commandBuffer, dstBuffer, dstOffset, dataSize, pData); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, + VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdFillBuffer(commandBuffer, dstBuffer, dstOffset, size, data); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image, + VkImageLayout imageLayout, const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdClearColorImage(commandBuffer, image, imageLayout, pColor, + rangeCount, pRanges); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdClearDepthStencilImage(commandBuffer, image, imageLayout, + pDepthStencil, rangeCount, pRanges); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, + const VkClearAttachment *pAttachments, uint32_t rectCount, + const VkClearRect *pRects) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdClearAttachments(commandBuffer, attachmentCount, pAttachments, + rectCount, pRects); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdResolveImage(VkCommandBuffer commandBuffer, VkImage srcImage, + VkImageLayout srcImageLayout, VkImage dstImage, + VkImageLayout dstImageLayout, uint32_t regionCount, + const VkImageResolve *pRegions) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdResolveImage(commandBuffer, srcImage, srcImageLayout, dstImage, + dstImageLayout, regionCount, pRegions); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdSetEvent(VkCommandBuffer commandBuffer, VkEvent event, + VkPipelineStageFlags stageMask) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdSetEvent(commandBuffer, event, stageMask); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdResetEvent(VkCommandBuffer commandBuffer, VkEvent event, + VkPipelineStageFlags stageMask) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdResetEvent(commandBuffer, event, stageMask); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, + const VkEvent *pEvents, VkPipelineStageFlags sourceStageMask, + VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdWaitEvents(commandBuffer, eventCount, pEvents, sourceStageMask, + dstStageMask, memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( + VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdPipelineBarrier( + commandBuffer, srcStageMask, dstStageMask, dependencyFlags, + memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, + pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t slot, VkFlags flags) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdBeginQuery(commandBuffer, queryPool, slot, flags); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t slot) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdEndQuery(commandBuffer, queryPool, slot); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t firstQuery, uint32_t queryCount) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdResetQueryPool(commandBuffer, queryPool, firstQuery, queryCount); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdWriteTimestamp(VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, uint32_t slot) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdWriteTimestamp(commandBuffer, pipelineStage, queryPool, slot); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t firstQuery, uint32_t queryCount, + VkBuffer dstBuffer, VkDeviceSize dstOffset, + VkDeviceSize stride, VkFlags flags) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdCopyQueryPoolResults(commandBuffer, queryPool, firstQuery, + queryCount, dstBuffer, dstOffset, stride, + flags); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, + VkShaderStageFlags stageFlags, uint32_t offset, + uint32_t size, const void *pValues) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdPushConstants(commandBuffer, layout, stageFlags, offset, size, + pValues); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBegin, + VkSubpassContents contents) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdBeginRenderPass(commandBuffer, pRenderPassBegin, contents); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdNextSubpass(commandBuffer, contents); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdEndRenderPass(VkCommandBuffer commandBuffer) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdEndRenderPass(commandBuffer); +} + +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkCmdExecuteCommands(VkCommandBuffer commandBuffer, + uint32_t commandBuffersCount, + const VkCommandBuffer *pCommandBuffers) { + const VkLayerDispatchTable *disp; + + disp = loader_get_dispatch(commandBuffer); + + disp->CmdExecuteCommands(commandBuffer, commandBuffersCount, + pCommandBuffers); +} diff --git a/third_party/vulkan/loader/vk_loader_platform.h b/third_party/vulkan/loader/vk_loader_platform.h new file mode 100644 index 000000000..5fcc74023 --- /dev/null +++ b/third_party/vulkan/loader/vk_loader_platform.h @@ -0,0 +1,449 @@ +/* + * + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Ian Elliot + * Author: Jon Ashburn + * + */ +#pragma once + +#if defined(_WIN32) +// WinSock2.h must be included *BEFORE* windows.h +#include +#endif // _WIN32 + +#include "vulkan/vk_platform.h" +#include "vulkan/vk_sdk_platform.h" + +#if defined(__linux__) +/* Linux-specific common code: */ + +// Headers: +//#define _GNU_SOURCE 1 +// TBD: Are the contents of the following file used? +#include +// Note: The following file is for dynamic loading: +#include +#include +#include +#include +#include +#include +#include + +// VK Library Filenames, Paths, etc.: +#define PATH_SEPERATOR ':' +#define DIRECTORY_SYMBOL '/' + +#define VULKAN_ICDCONF_DIR \ + "/" \ + "vulkan" \ + "/" \ + "icd.d" +#define VULKAN_ICD_DIR \ + "/" \ + "vulkan" \ + "/" \ + "icd" +#define VULKAN_ELAYERCONF_DIR \ + "/" \ + "vulkan" \ + "/" \ + "explicit_layer.d" +#define VULKAN_ILAYERCONF_DIR \ + "/" \ + "vulkan" \ + "/" \ + "implicit_layer.d" +#define VULKAN_LAYER_DIR \ + "/" \ + "vulkan" \ + "/" \ + "layer" + +#if defined(LOCALPREFIX) +#define LOCAL_DRIVERS_INFO \ + LOCALPREFIX "/" SYSCONFDIR VULKAN_ICDCONF_DIR ":" LOCALPREFIX \ + "/" DATADIR VULKAN_ICDCONF_DIR ":" +#define LOCAL_ELAYERS_INFO \ + LOCALPREFIX "/" SYSCONFDIR VULKAN_ELAYERCONF_DIR ":" LOCALPREFIX \ + "/" DATADIR VULKAN_ELAYERCONF_DIR ":" +#define LOCAL_ILAYERS_INFO \ + LOCALPREFIX "/" SYSCONFDIR VULKAN_ILAYERCONF_DIR ":" LOCALPREFIX \ + "/" DATADIR VULKAN_ILAYERCONF_DIR ":" +#else +#define LOCAL_DRIVERS_INFO +#define LOCAL_ELAYERS_INFO +#define LOCAL_ILAYERS_INFO +#endif + +#define DEFAULT_VK_DRIVERS_INFO \ + LOCAL_DRIVERS_INFO \ + "/" SYSCONFDIR VULKAN_ICDCONF_DIR ":" \ + "/usr/" DATADIR VULKAN_ICDCONF_DIR +#define DEFAULT_VK_DRIVERS_PATH "" +#define DEFAULT_VK_ELAYERS_INFO \ + LOCAL_ELAYERS_INFO \ + "/" SYSCONFDIR VULKAN_ELAYERCONF_DIR ":" \ + "/usr/" DATADIR VULKAN_ELAYERCONF_DIR ":" +#define DEFAULT_VK_ILAYERS_INFO \ + LOCAL_ILAYERS_INFO \ + "/" SYSCONFDIR VULKAN_ILAYERCONF_DIR ":" \ + "/usr/" DATADIR VULKAN_ILAYERCONF_DIR +#define DEFAULT_VK_LAYERS_PATH "" +#define LAYERS_PATH_ENV "VK_LAYER_PATH" + +// C99: +#define PRINTF_SIZE_T_SPECIFIER "%zu" + +// File IO +static inline bool loader_platform_file_exists(const char *path) { + if (access(path, F_OK)) + return false; + else + return true; +} + +static inline bool loader_platform_is_path_absolute(const char *path) { + if (path[0] == '/') + return true; + else + return false; +} + +static inline char *loader_platform_dirname(char *path) { + return dirname(path); +} + +// Environment variables + +static inline char *loader_getenv(const char *name) { return getenv(name); } + +static inline void loader_free_getenv(const char *val) {} + +// Dynamic Loading of libraries: +typedef void *loader_platform_dl_handle; +static inline loader_platform_dl_handle +loader_platform_open_library(const char *libPath) { + return dlopen(libPath, RTLD_LAZY | RTLD_LOCAL); +} +static inline const char * +loader_platform_open_library_error(const char *libPath) { + return dlerror(); +} +static inline void +loader_platform_close_library(loader_platform_dl_handle library) { + dlclose(library); +} +static inline void * +loader_platform_get_proc_address(loader_platform_dl_handle library, + const char *name) { + assert(library); + assert(name); + return dlsym(library, name); +} +static inline const char * +loader_platform_get_proc_address_error(const char *name) { + return dlerror(); +} + +// Threads: +typedef pthread_t loader_platform_thread; +#define THREAD_LOCAL_DECL __thread +#define LOADER_PLATFORM_THREAD_ONCE_DECLARATION(var) \ + pthread_once_t var = PTHREAD_ONCE_INIT; +#define LOADER_PLATFORM_THREAD_ONCE_DEFINITION(var) pthread_once_t var; +static inline void loader_platform_thread_once(pthread_once_t *ctl, + void (*func)(void)) { + assert(func != NULL); + assert(ctl != NULL); + pthread_once(ctl, func); +} + +// Thread IDs: +typedef pthread_t loader_platform_thread_id; +static inline loader_platform_thread_id loader_platform_get_thread_id() { + return pthread_self(); +} + +// Thread mutex: +typedef pthread_mutex_t loader_platform_thread_mutex; +static inline void +loader_platform_thread_create_mutex(loader_platform_thread_mutex *pMutex) { + pthread_mutex_init(pMutex, NULL); +} +static inline void +loader_platform_thread_lock_mutex(loader_platform_thread_mutex *pMutex) { + pthread_mutex_lock(pMutex); +} +static inline void +loader_platform_thread_unlock_mutex(loader_platform_thread_mutex *pMutex) { + pthread_mutex_unlock(pMutex); +} +static inline void +loader_platform_thread_delete_mutex(loader_platform_thread_mutex *pMutex) { + pthread_mutex_destroy(pMutex); +} +typedef pthread_cond_t loader_platform_thread_cond; +static inline void +loader_platform_thread_init_cond(loader_platform_thread_cond *pCond) { + pthread_cond_init(pCond, NULL); +} +static inline void +loader_platform_thread_cond_wait(loader_platform_thread_cond *pCond, + loader_platform_thread_mutex *pMutex) { + pthread_cond_wait(pCond, pMutex); +} +static inline void +loader_platform_thread_cond_broadcast(loader_platform_thread_cond *pCond) { + pthread_cond_broadcast(pCond); +} + +#define loader_stack_alloc(size) alloca(size) + +#elif defined(_WIN32) // defined(__linux__) +/* Windows-specific common code: */ +// WinBase.h defines CreateSemaphore and synchapi.h defines CreateEvent +// undefine them to avoid conflicts with VkLayerDispatchTable struct members. +#ifdef CreateSemaphore +#undef CreateSemaphore +#endif +#ifdef CreateEvent +#undef CreateEvent +#endif +#include +#include +#include +#include +#include +#include +#ifdef __cplusplus +#include +#include +using namespace std; +#endif // __cplusplus + +// VK Library Filenames, Paths, etc.: +#define PATH_SEPERATOR ';' +#define DIRECTORY_SYMBOL '\\' +#define DEFAULT_VK_REGISTRY_HIVE HKEY_LOCAL_MACHINE +#define DEFAULT_VK_DRIVERS_INFO "SOFTWARE\\Khronos\\Vulkan\\Drivers" +// TODO: Are these the correct paths +#define DEFAULT_VK_DRIVERS_PATH "C:\\Windows\\System32;C:\\Windows\\SysWow64" +#define DEFAULT_VK_ELAYERS_INFO "SOFTWARE\\Khronos\\Vulkan\\ExplicitLayers" +#define DEFAULT_VK_ILAYERS_INFO "SOFTWARE\\Khronos\\Vulkan\\ImplicitLayers" +#define DEFAULT_VK_LAYERS_PATH "C:\\Windows\\System32;C:\\Windows\\SysWow64" +#define LAYERS_PATH_ENV "VK_LAYER_PATH" + +#define PRINTF_SIZE_T_SPECIFIER "%Iu" + +// File IO +static bool loader_platform_file_exists(const char *path) { + if ((_access(path, 0)) == -1) + return false; + else + return true; +} + +static bool loader_platform_is_path_absolute(const char *path) { + return !PathIsRelative(path); +} + +// WIN32 runtime doesn't have dirname(). +static inline char *loader_platform_dirname(char *path) { + char *current, *next; + + // TODO/TBD: Do we need to deal with the Windows's ":" character? + + for (current = path; *current != '\0'; current = next) { + next = strchr(current, DIRECTORY_SYMBOL); + if (next == NULL) { + if (current != path) + *(current - 1) = '\0'; + return path; + } else { + // Point one character past the DIRECTORY_SYMBOL: + next++; + } + } + return path; +} + +// WIN32 runtime doesn't have basename(). +// Microsoft also doesn't have basename(). Paths are different on Windows, and +// so this is just a temporary solution in order to get us compiling, so that we +// can test some scenarios, and develop the correct solution for Windows. +// TODO: Develop a better, permanent solution for Windows, to replace this +// temporary code: +static char *loader_platform_basename(char *pathname) { + char *current, *next; + + // TODO/TBD: Do we need to deal with the Windows's ":" character? + + for (current = pathname; *current != '\0'; current = next) { + next = strchr(current, DIRECTORY_SYMBOL); + if (next == NULL) { + // No more DIRECTORY_SYMBOL's so return p: + return current; + } else { + // Point one character past the DIRECTORY_SYMBOL: + next++; + } + } + // We shouldn't get to here, but this makes the compiler happy: + return current; +} + +// Environment variables + +static inline char *loader_getenv(const char *name) { + char *retVal; + DWORD valSize; + + valSize = GetEnvironmentVariableA(name, NULL, 0); + + // valSize DOES include the null terminator, so for any set variable + // will always be at least 1. If it's 0, the variable wasn't set. + if (valSize == 0) + return NULL; + + // TODO; FIXME This should be using any app defined memory allocation + retVal = (char *)malloc(valSize); + + GetEnvironmentVariableA(name, retVal, valSize); + + return retVal; +} + +static inline void loader_free_getenv(const char *val) { free((void *)val); } + +// Dynamic Loading: +typedef HMODULE loader_platform_dl_handle; +static loader_platform_dl_handle +loader_platform_open_library(const char *libPath) { + return LoadLibrary(libPath); +} +static char *loader_platform_open_library_error(const char *libPath) { + static char errorMsg[120]; + snprintf(errorMsg, 119, "Failed to open dynamic library \"%s\"", libPath); + return errorMsg; +} +static void loader_platform_close_library(loader_platform_dl_handle library) { + FreeLibrary(library); +} +static void *loader_platform_get_proc_address(loader_platform_dl_handle library, + const char *name) { + assert(library); + assert(name); + return GetProcAddress(library, name); +} +static char *loader_platform_get_proc_address_error(const char *name) { + static char errorMsg[120]; + snprintf(errorMsg, 119, "Failed to find function \"%s\" in dynamic library", + name); + return errorMsg; +} + +// Threads: +typedef HANDLE loader_platform_thread; +#define THREAD_LOCAL_DECL __declspec(thread) +#define LOADER_PLATFORM_THREAD_ONCE_DECLARATION(var) \ + INIT_ONCE var = INIT_ONCE_STATIC_INIT; +#define LOADER_PLATFORM_THREAD_ONCE_DEFINITION(var) INIT_ONCE var; +static BOOL CALLBACK +InitFuncWrapper(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *Context) { + void (*func)(void) = (void (*)(void))Parameter; + func(); + return TRUE; +} + +static void loader_platform_thread_once(void *ctl, void (*func)(void)) { + assert(func != NULL); + assert(ctl != NULL); + InitOnceExecuteOnce((PINIT_ONCE)ctl, InitFuncWrapper, func, NULL); +} + +// Thread IDs: +typedef DWORD loader_platform_thread_id; +static loader_platform_thread_id loader_platform_get_thread_id() { + return GetCurrentThreadId(); +} + +// Thread mutex: +typedef CRITICAL_SECTION loader_platform_thread_mutex; +static void +loader_platform_thread_create_mutex(loader_platform_thread_mutex *pMutex) { + InitializeCriticalSection(pMutex); +} +static void +loader_platform_thread_lock_mutex(loader_platform_thread_mutex *pMutex) { + EnterCriticalSection(pMutex); +} +static void +loader_platform_thread_unlock_mutex(loader_platform_thread_mutex *pMutex) { + LeaveCriticalSection(pMutex); +} +static void +loader_platform_thread_delete_mutex(loader_platform_thread_mutex *pMutex) { + DeleteCriticalSection(pMutex); +} +typedef CONDITION_VARIABLE loader_platform_thread_cond; +static void +loader_platform_thread_init_cond(loader_platform_thread_cond *pCond) { + InitializeConditionVariable(pCond); +} +static void +loader_platform_thread_cond_wait(loader_platform_thread_cond *pCond, + loader_platform_thread_mutex *pMutex) { + SleepConditionVariableCS(pCond, pMutex, INFINITE); +} +static void +loader_platform_thread_cond_broadcast(loader_platform_thread_cond *pCond) { + WakeAllConditionVariable(pCond); +} + +// Windows Registry: +char *loader_get_registry_string(const HKEY hive, const LPCTSTR sub_key, + const char *value); + +#define loader_stack_alloc(size) _alloca(size) +#else // defined(_WIN32) + +#error The "loader_platform.h" file must be modified for this OS. + +// NOTE: In order to support another OS, an #elif needs to be added (above the +// "#else // defined(_WIN32)") for that OS, and OS-specific versions of the +// contents of this file must be created. + +// NOTE: Other OS-specific changes are also needed for this OS. Search for +// files with "WIN32" in it, as a quick way to find files that must be changed. + +#endif // defined(_WIN32) + +// returns true if the given string appears to be a relative or absolute +// path, as opposed to a bare filename. +static inline bool loader_platform_is_path(const char *path) { + return strchr(path, DIRECTORY_SYMBOL) != NULL; +} diff --git a/third_party/vulkan/loader/wsi.c b/third_party/vulkan/loader/wsi.c new file mode 100644 index 000000000..05945fb50 --- /dev/null +++ b/third_party/vulkan/loader/wsi.c @@ -0,0 +1,1092 @@ +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Ian Elliott + * Author: Jon Ashburn + * Author: Ian Elliott + * Author: Mark Lobodzinski + */ + +//#define _ISOC11_SOURCE /* for aligned_alloc() */ +#define _GNU_SOURCE +#include +#include +#include "vk_loader_platform.h" +#include "loader.h" +#include "wsi.h" +#include + +static const VkExtensionProperties wsi_surface_extension_info = { + .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, + .specVersion = VK_KHR_SURFACE_SPEC_VERSION, +}; + +#ifdef VK_USE_PLATFORM_WIN32_KHR +static const VkExtensionProperties wsi_win32_surface_extension_info = { + .extensionName = VK_KHR_WIN32_SURFACE_EXTENSION_NAME, + .specVersion = VK_KHR_WIN32_SURFACE_SPEC_VERSION, +}; +#endif // VK_USE_PLATFORM_WIN32_KHR + +#ifdef VK_USE_PLATFORM_MIR_KHR +static const VkExtensionProperties wsi_mir_surface_extension_info = { + .extensionName = VK_KHR_MIR_SURFACE_EXTENSION_NAME, + .specVersion = VK_KHR_MIR_SURFACE_SPEC_VERSION, +}; +#endif // VK_USE_PLATFORM_MIR_KHR + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +static const VkExtensionProperties wsi_wayland_surface_extension_info = { + .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + .specVersion = VK_KHR_WAYLAND_SURFACE_SPEC_VERSION, +}; +#endif // VK_USE_PLATFORM_WAYLAND_KHR + +#ifdef VK_USE_PLATFORM_XCB_KHR +static const VkExtensionProperties wsi_xcb_surface_extension_info = { + .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, + .specVersion = VK_KHR_XCB_SURFACE_SPEC_VERSION, +}; +#endif // VK_USE_PLATFORM_XCB_KHR + +#ifdef VK_USE_PLATFORM_XLIB_KHR +static const VkExtensionProperties wsi_xlib_surface_extension_info = { + .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, + .specVersion = VK_KHR_XLIB_SURFACE_SPEC_VERSION, +}; +#endif // VK_USE_PLATFORM_XLIB_KHR + +#ifdef VK_USE_PLATFORM_ANDROID_KHR +static const VkExtensionProperties wsi_android_surface_extension_info = { + .extensionName = VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, + .specVersion = VK_KHR_ANDROID_SURFACE_REVISION, +}; +#endif // VK_USE_PLATFORM_ANDROID_KHR + +void wsi_add_instance_extensions(const struct loader_instance *inst, + struct loader_extension_list *ext_list) { + loader_add_to_ext_list(inst, ext_list, 1, &wsi_surface_extension_info); +#ifdef VK_USE_PLATFORM_WIN32_KHR + loader_add_to_ext_list(inst, ext_list, 1, + &wsi_win32_surface_extension_info); +#endif // VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR + loader_add_to_ext_list(inst, ext_list, 1, &wsi_mir_surface_extension_info); +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + loader_add_to_ext_list(inst, ext_list, 1, + &wsi_wayland_surface_extension_info); +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_XCB_KHR + loader_add_to_ext_list(inst, ext_list, 1, &wsi_xcb_surface_extension_info); +#endif // VK_USE_PLATFORM_XCB_KHR +#ifdef VK_USE_PLATFORM_XLIB_KHR + loader_add_to_ext_list(inst, ext_list, 1, &wsi_xlib_surface_extension_info); +#endif // VK_USE_PLATFORM_XLIB_KHR +#ifdef VK_USE_PLATFORM_ANDROID_KHR + loader_add_to_ext_list(inst, ext_list, 1, + &wsi_android_surface_extension_info); +#endif // VK_USE_PLATFORM_ANDROID_KHR +} + +void wsi_create_instance(struct loader_instance *ptr_instance, + const VkInstanceCreateInfo *pCreateInfo) { + ptr_instance->wsi_surface_enabled = false; + +#ifdef VK_USE_PLATFORM_WIN32_KHR + ptr_instance->wsi_win32_surface_enabled = true; +#endif // VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR + ptr_instance->wsi_mir_surface_enabled = false; +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + ptr_instance->wsi_wayland_surface_enabled = false; +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_XCB_KHR + ptr_instance->wsi_xcb_surface_enabled = false; +#endif // VK_USE_PLATFORM_XCB_KHR +#ifdef VK_USE_PLATFORM_XLIB_KHR + ptr_instance->wsi_xlib_surface_enabled = false; +#endif // VK_USE_PLATFORM_XLIB_KHR +#ifdef VK_USE_PLATFORM_ANDROID_KHR + ptr_instance->wsi_android_surface_enabled = false; +#endif // VK_USE_PLATFORM_ANDROID_KHR + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_SURFACE_EXTENSION_NAME) == 0) { + ptr_instance->wsi_surface_enabled = true; + continue; + } +#ifdef VK_USE_PLATFORM_WIN32_KHR + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_WIN32_SURFACE_EXTENSION_NAME) == 0) { + ptr_instance->wsi_win32_surface_enabled = true; + continue; + } +#endif // VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_MIR_SURFACE_EXTENSION_NAME) == 0) { + ptr_instance->wsi_mir_surface_enabled = true; + continue; + } +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME) == 0) { + ptr_instance->wsi_wayland_surface_enabled = true; + continue; + } +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_XCB_KHR + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_XCB_SURFACE_EXTENSION_NAME) == 0) { + ptr_instance->wsi_xcb_surface_enabled = true; + continue; + } +#endif // VK_USE_PLATFORM_XCB_KHR +#ifdef VK_USE_PLATFORM_XLIB_KHR + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_XLIB_SURFACE_EXTENSION_NAME) == 0) { + ptr_instance->wsi_xlib_surface_enabled = true; + continue; + } +#endif // VK_USE_PLATFORM_XLIB_KHR +#ifdef VK_USE_PLATFORM_ANDROID_KHR + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_ANDROID_SURFACE_EXTENSION_NAME) == 0) { + ptr_instance->wsi_android_surface_enabled = true; + continue; + } +#endif // VK_USE_PLATFORM_ANDROID_KHR + } +} + +/* + * Functions for the VK_KHR_surface extension: + */ + +/* + * This is the trampoline entrypoint + * for DestroySurfaceKHR + */ +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroySurfaceKHR(VkInstance instance, VkSurfaceKHR surface, + const VkAllocationCallbacks *pAllocator) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(instance); + disp->DestroySurfaceKHR(instance, surface, pAllocator); +} + +/* + * This is the instance chain terminator function + * for DestroySurfaceKHR + */ +VKAPI_ATTR void VKAPI_CALL +loader_DestroySurfaceKHR(VkInstance instance, VkSurfaceKHR surface, + const VkAllocationCallbacks *pAllocator) { + struct loader_instance *ptr_instance = loader_get_instance(instance); + + loader_heap_free(ptr_instance, (void *)surface); +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceSurfaceSupportKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32 *pSupported) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkResult res = disp->GetPhysicalDeviceSurfaceSupportKHR( + physicalDevice, queueFamilyIndex, surface, pSupported); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceSurfaceSupportKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32 *pSupported) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert(pSupported && + "GetPhysicalDeviceSurfaceSupportKHR: Error, null pSupported"); + *pSupported = false; + + assert(icd->GetPhysicalDeviceSurfaceSupportKHR && + "loader: null GetPhysicalDeviceSurfaceSupportKHR ICD pointer"); + + return icd->GetPhysicalDeviceSurfaceSupportKHR( + phys_dev->phys_dev, queueFamilyIndex, surface, pSupported); +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceSurfaceCapabilitiesKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkResult res = disp->GetPhysicalDeviceSurfaceCapabilitiesKHR( + physicalDevice, surface, pSurfaceCapabilities); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceSurfaceCapabilitiesKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL loader_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert(pSurfaceCapabilities && "GetPhysicalDeviceSurfaceCapabilitiesKHR: " + "Error, null pSurfaceCapabilities"); + + assert(icd->GetPhysicalDeviceSurfaceCapabilitiesKHR && + "loader: null GetPhysicalDeviceSurfaceCapabilitiesKHR ICD pointer"); + + return icd->GetPhysicalDeviceSurfaceCapabilitiesKHR( + phys_dev->phys_dev, surface, pSurfaceCapabilities); +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceSurfaceFormatsKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkResult res = disp->GetPhysicalDeviceSurfaceFormatsKHR( + physicalDevice, surface, pSurfaceFormatCount, pSurfaceFormats); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceSurfaceFormatsKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert( + pSurfaceFormatCount && + "GetPhysicalDeviceSurfaceFormatsKHR: Error, null pSurfaceFormatCount"); + + assert(icd->GetPhysicalDeviceSurfaceFormatsKHR && + "loader: null GetPhysicalDeviceSurfaceFormatsKHR ICD pointer"); + + return icd->GetPhysicalDeviceSurfaceFormatsKHR( + phys_dev->phys_dev, surface, pSurfaceFormatCount, pSurfaceFormats); +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceSurfacePresentModesKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkResult res = disp->GetPhysicalDeviceSurfacePresentModesKHR( + physicalDevice, surface, pPresentModeCount, pPresentModes); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceSurfacePresentModesKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL loader_GetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, + uint32_t *pPresentModeCount, VkPresentModeKHR *pPresentModes) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert(pPresentModeCount && "GetPhysicalDeviceSurfacePresentModesKHR: " + "Error, null pPresentModeCount"); + + assert(icd->GetPhysicalDeviceSurfacePresentModesKHR && + "loader: null GetPhysicalDeviceSurfacePresentModesKHR ICD pointer"); + + return icd->GetPhysicalDeviceSurfacePresentModesKHR( + phys_dev->phys_dev, surface, pPresentModeCount, pPresentModes); +} + +/* + * Functions for the VK_KHR_swapchain extension: + */ + +/* + * This is the trampoline entrypoint + * for CreateSwapchainKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateSwapchainKHR(VkDevice device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSwapchainKHR *pSwapchain) { + const VkLayerDispatchTable *disp; + disp = loader_get_dispatch(device); + VkResult res = + disp->CreateSwapchainKHR(device, pCreateInfo, pAllocator, pSwapchain); + return res; +} + +/* + * This is the trampoline entrypoint + * for DestroySwapchainKHR + */ +LOADER_EXPORT VKAPI_ATTR void VKAPI_CALL +vkDestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, + const VkAllocationCallbacks *pAllocator) { + const VkLayerDispatchTable *disp; + disp = loader_get_dispatch(device); + disp->DestroySwapchainKHR(device, swapchain, pAllocator); +} + +/* + * This is the trampoline entrypoint + * for GetSwapchainImagesKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkGetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, + uint32_t *pSwapchainImageCount, + VkImage *pSwapchainImages) { + const VkLayerDispatchTable *disp; + disp = loader_get_dispatch(device); + VkResult res = disp->GetSwapchainImagesKHR( + device, swapchain, pSwapchainImageCount, pSwapchainImages); + return res; +} + +/* + * This is the trampoline entrypoint + * for AcquireNextImageKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkAcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, + uint64_t timeout, VkSemaphore semaphore, VkFence fence, + uint32_t *pImageIndex) { + const VkLayerDispatchTable *disp; + disp = loader_get_dispatch(device); + VkResult res = disp->AcquireNextImageKHR(device, swapchain, timeout, + semaphore, fence, pImageIndex); + return res; +} + +/* + * This is the trampoline entrypoint + * for QueuePresentKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo) { + const VkLayerDispatchTable *disp; + disp = loader_get_dispatch(queue); + VkResult res = disp->QueuePresentKHR(queue, pPresentInfo); + return res; +} + +#ifdef VK_USE_PLATFORM_WIN32_KHR + +/* + * Functions for the VK_KHR_win32_surface extension: + */ + +/* + * This is the trampoline entrypoint + * for CreateWin32SurfaceKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateWin32SurfaceKHR(VkInstance instance, + const VkWin32SurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(instance); + VkResult res; + + res = disp->CreateWin32SurfaceKHR(instance, pCreateInfo, pAllocator, + pSurface); + return res; +} + +/* + * This is the instance chain terminator function + * for CreateWin32SurfaceKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateWin32SurfaceKHR(VkInstance instance, + const VkWin32SurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + struct loader_instance *ptr_instance = loader_get_instance(instance); + VkIcdSurfaceWin32 *pIcdSurface = NULL; + + pIcdSurface = loader_heap_alloc(ptr_instance, sizeof(VkIcdSurfaceWin32), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (pIcdSurface == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + pIcdSurface->base.platform = VK_ICD_WSI_PLATFORM_WIN32; + pIcdSurface->hinstance = pCreateInfo->hinstance; + pIcdSurface->hwnd = pCreateInfo->hwnd; + + *pSurface = (VkSurfaceKHR)pIcdSurface; + + return VK_SUCCESS; +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceWin32PresentationSupportKHR + */ +LOADER_EXPORT VKAPI_ATTR VkBool32 VKAPI_CALL +vkGetPhysicalDeviceWin32PresentationSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkBool32 res = disp->GetPhysicalDeviceWin32PresentationSupportKHR( + physicalDevice, queueFamilyIndex); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceWin32PresentationSupportKHR + */ +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceWin32PresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert(icd->GetPhysicalDeviceWin32PresentationSupportKHR && + "loader: null GetPhysicalDeviceWin32PresentationSupportKHR ICD " + "pointer"); + + return icd->GetPhysicalDeviceWin32PresentationSupportKHR(phys_dev->phys_dev, + queueFamilyIndex); +} +#endif // VK_USE_PLATFORM_WIN32_KHR + +#ifdef VK_USE_PLATFORM_MIR_KHR + +/* + * Functions for the VK_KHR_mir_surface extension: + */ + +/* + * This is the trampoline entrypoint + * for CreateMirSurfaceKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateMirSurfaceKHR(VkInstance instance, + const VkMirSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(instance); + VkResult res; + + res = + disp->CreateMirSurfaceKHR(instance, pCreateInfo, pAllocator, pSurface); + return res; +} + +/* + * This is the instance chain terminator function + * for CreateMirSurfaceKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateMirSurfaceKHR(VkInstance instance, + const VkMirSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + struct loader_instance *ptr_instance = loader_get_instance(instance); + VkIcdSurfaceMir *pIcdSurface = NULL; + + pIcdSurface = loader_heap_alloc(ptr_instance, sizeof(VkIcdSurfaceMir), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (pIcdSurface == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + pIcdSurface->base.platform = VK_ICD_WSI_PLATFORM_MIR; + pIcdSurface->connection = pCreateInfo->connection; + pIcdSurface->mirSurface = pCreateInfo->mirSurface; + + *pSurface = (VkSurfaceKHR)pIcdSurface; + + return VK_SUCCESS; +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceMirPresentationSupportKHR + */ +LOADER_EXPORT VKAPI_ATTR VkBool32 VKAPI_CALL +vkGetPhysicalDeviceMirPresentationSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + MirConnection *connection) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkBool32 res = disp->GetPhysicalDeviceMirPresentationSupportKHR( + physicalDevice, queueFamilyIndex, connection); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceMirPresentationSupportKHR + */ +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceMirPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, + MirConnection *connection) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert( + icd->GetPhysicalDeviceMirPresentationSupportKHR && + "loader: null GetPhysicalDeviceMirPresentationSupportKHR ICD pointer"); + + return icd->GetPhysicalDeviceMirPresentationSupportKHR( + phys_dev->phys_dev, queueFamilyIndex, connection); +} +#endif // VK_USE_PLATFORM_MIR_KHR + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + +/* + * Functions for the VK_KHR_wayland_surface extension: + */ + +/* + * This is the trampoline entrypoint + * for CreateWaylandSurfaceKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateWaylandSurfaceKHR(VkInstance instance, + const VkMirSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(instance); + VkResult res; + + res = disp->CreateWaylandSurfaceKHR(instance, pCreateInfo, pAllocator, + pSurface); + return res; +} + +/* + * This is the instance chain terminator function + * for CreateXlibSurfaceKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateWaylandSurfaceKHR(VkInstance instance, + const VkMirSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + struct loader_instance *ptr_instance = loader_get_instance(instance); + VkIcdSurfaceWayland *pIcdSurface = NULL; + + pIcdSurface = loader_heap_alloc(ptr_instance, sizeof(VkIcdSurfaceWayland), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (pIcdSurface == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + pIcdSurface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; + pIcdSurface->display = pCreateInfo->display; + pIcdSurface->surface = pCreateInfo->surface; + + *pSurface = (VkSurfaceKHR)pIcdSurface; + + return VK_SUCCESS; +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceWaylandPresentationSupportKHR + */ +LOADER_EXPORT VKAPI_ATTR VkBool32 VKAPI_CALL +vkGetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, + struct wl_display *display) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkBool32 res = disp->GetPhysicalDeviceWaylandPresentationSupportKHR( + physicalDevice, queueFamilyIndex, display); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceWaylandPresentationSupportKHR + */ +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, + struct wl_display *display) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert(icd->GetPhysicalDeviceWaylandPresentationSupportKHR && + "loader: null GetPhysicalDeviceWaylandPresentationSupportKHR ICD " + "pointer"); + + return icd->GetPhysicalDeviceWaylandPresentationSupportKHR( + phys_dev->phys_dev, queueFamilyIndex, display); +} +#endif // VK_USE_PLATFORM_WAYLAND_KHR + +#ifdef VK_USE_PLATFORM_XCB_KHR + +/* + * Functions for the VK_KHR_xcb_surface extension: + */ + +/* + * This is the trampoline entrypoint + * for CreateXcbSurfaceKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateXcbSurfaceKHR(VkInstance instance, + const VkXcbSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(instance); + VkResult res; + + res = + disp->CreateXcbSurfaceKHR(instance, pCreateInfo, pAllocator, pSurface); + return res; +} + +/* + * This is the instance chain terminator function + * for CreateXcbSurfaceKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateXcbSurfaceKHR(VkInstance instance, + const VkXcbSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + struct loader_instance *ptr_instance = loader_get_instance(instance); + VkIcdSurfaceXcb *pIcdSurface = NULL; + + pIcdSurface = loader_heap_alloc(ptr_instance, sizeof(VkIcdSurfaceXcb), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (pIcdSurface == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + pIcdSurface->base.platform = VK_ICD_WSI_PLATFORM_XCB; + pIcdSurface->connection = pCreateInfo->connection; + pIcdSurface->window = pCreateInfo->window; + + *pSurface = (VkSurfaceKHR)pIcdSurface; + + return VK_SUCCESS; +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceXcbPresentationSupportKHR + */ +LOADER_EXPORT VKAPI_ATTR VkBool32 VKAPI_CALL +vkGetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t *connection, + xcb_visualid_t visual_id) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkBool32 res = disp->GetPhysicalDeviceXcbPresentationSupportKHR( + physicalDevice, queueFamilyIndex, connection, visual_id); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceXcbPresentationSupportKHR + */ +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, + xcb_connection_t *connection, xcb_visualid_t visual_id) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert( + icd->GetPhysicalDeviceXcbPresentationSupportKHR && + "loader: null GetPhysicalDeviceXcbPresentationSupportKHR ICD pointer"); + + return icd->GetPhysicalDeviceXcbPresentationSupportKHR( + phys_dev->phys_dev, queueFamilyIndex, connection, visual_id); +} +#endif // VK_USE_PLATFORM_XCB_KHR + +#ifdef VK_USE_PLATFORM_XLIB_KHR + +/* + * Functions for the VK_KHR_xlib_surface extension: + */ + +/* + * This is the trampoline entrypoint + * for CreateXlibSurfaceKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateXlibSurfaceKHR(VkInstance instance, + const VkXlibSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(instance); + VkResult res; + + res = + disp->CreateXlibSurfaceKHR(instance, pCreateInfo, pAllocator, pSurface); + return res; +} + +/* + * This is the instance chain terminator function + * for CreateXlibSurfaceKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateXlibSurfaceKHR(VkInstance instance, + const VkXlibSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + struct loader_instance *ptr_instance = loader_get_instance(instance); + VkIcdSurfaceXlib *pIcdSurface = NULL; + + pIcdSurface = loader_heap_alloc(ptr_instance, sizeof(VkIcdSurfaceXlib), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (pIcdSurface == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + pIcdSurface->base.platform = VK_ICD_WSI_PLATFORM_XLIB; + pIcdSurface->dpy = pCreateInfo->dpy; + pIcdSurface->window = pCreateInfo->window; + + *pSurface = (VkSurfaceKHR)pIcdSurface; + + return VK_SUCCESS; +} + +/* + * This is the trampoline entrypoint + * for GetPhysicalDeviceXlibPresentationSupportKHR + */ +LOADER_EXPORT VKAPI_ATTR VkBool32 VKAPI_CALL +vkGetPhysicalDeviceXlibPresentationSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + Display *dpy, VisualID visualID) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(physicalDevice); + VkBool32 res = disp->GetPhysicalDeviceXlibPresentationSupportKHR( + physicalDevice, queueFamilyIndex, dpy, visualID); + return res; +} + +/* + * This is the instance chain terminator function + * for GetPhysicalDeviceXlibPresentationSupportKHR + */ +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceXlibPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, Display *dpy, + VisualID visualID) { + struct loader_physical_device *phys_dev = + (struct loader_physical_device *)physicalDevice; + struct loader_icd *icd = phys_dev->this_icd; + + assert( + icd->GetPhysicalDeviceXlibPresentationSupportKHR && + "loader: null GetPhysicalDeviceXlibPresentationSupportKHR ICD pointer"); + + return icd->GetPhysicalDeviceXlibPresentationSupportKHR( + phys_dev->phys_dev, queueFamilyIndex, dpy, visualID); +} +#endif // VK_USE_PLATFORM_XLIB_KHR + +#ifdef VK_USE_PLATFORM_ANDROID_KHR + +/* + * Functions for the VK_KHR_android_surface extension: + */ + +/* + * This is the trampoline entrypoint + * for CreateAndroidSurfaceKHR + */ +LOADER_EXPORT VKAPI_ATTR VkResult VKAPI_CALL +vkCreateAndroidSurfaceKHR(VkInstance instance, ANativeWindow *window, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + const VkLayerInstanceDispatchTable *disp; + disp = loader_get_instance_dispatch(instance); + VkResult res; + + res = disp->CreateAndroidSurfaceKHR(instance, window, pAllocator, pSurface); + return res; +} + +/* + * This is the instance chain terminator function + * for CreateAndroidSurfaceKHR + */ +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateAndroidSurfaceKHR(VkInstance instance, Window window, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) { + struct loader_instance *ptr_instance = loader_get_instance(instance); + VkIcdSurfaceAndroid *pIcdSurface = NULL; + + pIcdSurface = loader_heap_alloc(ptr_instance, sizeof(VkIcdSurfaceAndroid), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (pIcdSurface == NULL) { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + pIcdSurface->base.platform = VK_ICD_WSI_PLATFORM_ANDROID; + pIcdSurface->dpy = dpy; + pIcdSurface->window = window; + + *pSurface = (VkSurfaceKHR)pIcdSurface; + + return VK_SUCCESS; +} + +#endif // VK_USE_PLATFORM_ANDROID_KHR + +bool wsi_swapchain_instance_gpa(struct loader_instance *ptr_instance, + const char *name, void **addr) { + *addr = NULL; + + /* + * Functions for the VK_KHR_surface extension: + */ + if (!strcmp("vkDestroySurfaceKHR", name)) { + *addr = ptr_instance->wsi_surface_enabled ? (void *)vkDestroySurfaceKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceSurfaceSupportKHR", name)) { + *addr = ptr_instance->wsi_surface_enabled + ? (void *)vkGetPhysicalDeviceSurfaceSupportKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceSurfaceCapabilitiesKHR", name)) { + *addr = ptr_instance->wsi_surface_enabled + ? (void *)vkGetPhysicalDeviceSurfaceCapabilitiesKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceSurfaceFormatsKHR", name)) { + *addr = ptr_instance->wsi_surface_enabled + ? (void *)vkGetPhysicalDeviceSurfaceFormatsKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceSurfacePresentModesKHR", name)) { + *addr = ptr_instance->wsi_surface_enabled + ? (void *)vkGetPhysicalDeviceSurfacePresentModesKHR + : NULL; + return true; + } + + /* + * Functions for the VK_KHR_swapchain extension: + * + * Note: This is a device extension, and its functions are statically + * exported from the loader. Per Khronos decisions, the the loader's GIPA + * function will return the trampoline function for such device-extension + * functions, regardless of whether the extension has been enabled. + */ + if (!strcmp("vkCreateSwapchainKHR", name)) { + *addr = (void *)vkCreateSwapchainKHR; + return true; + } + if (!strcmp("vkDestroySwapchainKHR", name)) { + *addr = (void *)vkDestroySwapchainKHR; + return true; + } + if (!strcmp("vkGetSwapchainImagesKHR", name)) { + *addr = (void *)vkGetSwapchainImagesKHR; + return true; + } + if (!strcmp("vkAcquireNextImageKHR", name)) { + *addr = (void *)vkAcquireNextImageKHR; + return true; + } + if (!strcmp("vkQueuePresentKHR", name)) { + *addr = (void *)vkQueuePresentKHR; + return true; + } + +#ifdef VK_USE_PLATFORM_WIN32_KHR + /* + * Functions for the VK_KHR_win32_surface extension: + */ + if (!strcmp("vkCreateWin32SurfaceKHR", name)) { + *addr = ptr_instance->wsi_win32_surface_enabled + ? (void *)vkCreateWin32SurfaceKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceWin32PresentationSupportKHR", name)) { + *addr = ptr_instance->wsi_win32_surface_enabled + ? (void *)vkGetPhysicalDeviceWin32PresentationSupportKHR + : NULL; + return true; + } +#endif // VK_USE_PLATFORM_WIN32_KHR +#ifdef VK_USE_PLATFORM_MIR_KHR + /* + * Functions for the VK_KHR_mir_surface extension: + */ + if (!strcmp("vkCreateMirSurfaceKHR", name)) { + *addr = ptr_instance->wsi_mir_surface_enabled + ? (void *)vkCreateMirSurfaceKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceMirPresentationSupportKHR", name)) { + *addr = ptr_instance->wsi_mir_surface_enabled + ? (void *)vkGetPhysicalDeviceMirPresentationSupportKHR + : NULL; + return true; +#endif // VK_USE_PLATFORM_MIR_KHR +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + /* + * Functions for the VK_KHR_wayland_surface extension: + */ + if (!strcmp("vkCreateWaylandSurfaceKHR", name)) { + *addr = ptr_instance->wsi_wayland_surface_enabled + ? (void *)vkCreateWaylandSurfaceKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceWaylandPresentationSupportKHR", name)) { + *addr = + ptr_instance->wsi_wayland_surface_enabled + ? (void *)vkGetPhysicalDeviceWaylandPresentationSupportKHR + : NULL; + return true; +#endif // VK_USE_PLATFORM_WAYLAND_KHR +#ifdef VK_USE_PLATFORM_XCB_KHR + /* + * Functions for the VK_KHR_xcb_surface extension: + */ + if (!strcmp("vkCreateXcbSurfaceKHR", name)) { + *addr = ptr_instance->wsi_xcb_surface_enabled + ? (void *)vkCreateXcbSurfaceKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceXcbPresentationSupportKHR", name)) { + *addr = + ptr_instance->wsi_xcb_surface_enabled + ? (void *)vkGetPhysicalDeviceXcbPresentationSupportKHR + : NULL; + return true; + } +#endif // VK_USE_PLATFORM_XCB_KHR +#ifdef VK_USE_PLATFORM_XLIB_KHR + /* + * Functions for the VK_KHR_xlib_surface extension: + */ + if (!strcmp("vkCreateXlibSurfaceKHR", name)) { + *addr = ptr_instance->wsi_xlib_surface_enabled + ? (void *)vkCreateXlibSurfaceKHR + : NULL; + return true; + } + if (!strcmp("vkGetPhysicalDeviceXlibPresentationSupportKHR", + name)) { + *addr = + ptr_instance->wsi_xlib_surface_enabled + ? (void *)vkGetPhysicalDeviceXlibPresentationSupportKHR + : NULL; + return true; + } +#endif // VK_USE_PLATFORM_XLIB_KHR +#ifdef VK_USE_PLATFORM_ANDROID_KHR + /* + * Functions for the VK_KHR_android_surface extension: + */ + if (!strcmp("vkCreateAndroidSurfaceKHR", name)) { + *addr = ptr_instance->wsi_xlib_surface_enabled + ? (void *)vkCreateAndroidSurfaceKHR + : NULL; + return true; + } +#endif // VK_USE_PLATFORM_ANDROID_KHR + + return false; + } diff --git a/third_party/vulkan/loader/wsi.h b/third_party/vulkan/loader/wsi.h new file mode 100644 index 000000000..c0213313d --- /dev/null +++ b/third_party/vulkan/loader/wsi.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Author: Ian Elliott + * + */ + +#include "vk_loader_platform.h" +#include "loader.h" + +bool wsi_swapchain_instance_gpa(struct loader_instance *ptr_instance, + const char *name, void **addr); +void wsi_add_instance_extensions(const struct loader_instance *inst, + struct loader_extension_list *ext_list); + +void wsi_create_instance(struct loader_instance *ptr_instance, + const VkInstanceCreateInfo *pCreateInfo); + +VKAPI_ATTR void VKAPI_CALL +loader_DestroySurfaceKHR(VkInstance instance, VkSurfaceKHR surface, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32 *pSupported); + +VKAPI_ATTR VkResult VKAPI_CALL loader_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR *pSurfaceCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats); + +VKAPI_ATTR VkResult VKAPI_CALL +loader_GetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes); + +#ifdef VK_USE_PLATFORM_WIN32_KHR +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateWin32SurfaceKHR(VkInstance instance, + const VkWin32SurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceWin32PresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex); +#endif +#ifdef VK_USE_PLATFORM_MIR_KHR +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateMirSurfaceKHR(VkInstance instance, + const VkMirSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceMirPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, + MirConnection *connection); +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateWaylandSurfaceKHR(VkInstance instance, + const VkWaylandSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, + struct wl_display *display); +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateXcbSurfaceKHR(VkInstance instance, + const VkXcbSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, + xcb_connection_t *connection, xcb_visualid_t visual_id); +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR +VKAPI_ATTR VkResult VKAPI_CALL +loader_CreateXlibSurfaceKHR(VkInstance instance, + const VkXlibSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +VKAPI_ATTR VkBool32 VKAPI_CALL +loader_GetPhysicalDeviceXlibPresentationSupportKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, Display *dpy, + VisualID visualID); +#endif diff --git a/third_party/vulkan/vk_debug_marker_layer.h b/third_party/vulkan/vk_debug_marker_layer.h new file mode 100644 index 000000000..e882b02b4 --- /dev/null +++ b/third_party/vulkan/vk_debug_marker_layer.h @@ -0,0 +1,44 @@ +// +// File: vk_debug_marker_layer.h +// +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Authors: + * Jon Ashburn + * Courtney Goeltzenleuchter + */ + +#pragma once + +#include "vulkan.h" +#include "vk_lunarg_debug_marker.h" +#include "vk_layer.h" + +typedef struct VkLayerDebugMarkerDispatchTable_ { + PFN_vkCmdDbgMarkerBegin CmdDbgMarkerBegin; + PFN_vkCmdDbgMarkerEnd CmdDbgMarkerEnd; + PFN_vkDbgSetObjectTag DbgSetObjectTag; + PFN_vkDbgSetObjectName DbgSetObjectName; +} VkLayerDebugMarkerDispatchTable; diff --git a/third_party/vulkan/vk_icd.h b/third_party/vulkan/vk_icd.h new file mode 100644 index 000000000..60b29e037 --- /dev/null +++ b/third_party/vulkan/vk_icd.h @@ -0,0 +1,114 @@ +// +// File: vk_icd.h +// +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + */ + +#ifndef VKICD_H +#define VKICD_H + +#include "vk_platform.h" + +/* + * The ICD must reserve space for a pointer for the loader's dispatch + * table, at the start of . + * The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro. + */ + +#define ICD_LOADER_MAGIC 0x01CDC0DE + +typedef union _VK_LOADER_DATA { + uintptr_t loaderMagic; + void *loaderData; +} VK_LOADER_DATA; + +static inline void set_loader_magic_value(void *pNewObject) { + VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject; + loader_info->loaderMagic = ICD_LOADER_MAGIC; +} + +static inline bool valid_loader_magic_value(void *pNewObject) { + const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject; + return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC; +} + +/* + * Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that + * contains the platform-specific connection and surface information. + */ +typedef enum _VkIcdWsiPlatform { + VK_ICD_WSI_PLATFORM_MIR, + VK_ICD_WSI_PLATFORM_WAYLAND, + VK_ICD_WSI_PLATFORM_WIN32, + VK_ICD_WSI_PLATFORM_XCB, + VK_ICD_WSI_PLATFORM_XLIB, +} VkIcdWsiPlatform; + +typedef struct _VkIcdSurfaceBase { + VkIcdWsiPlatform platform; +} VkIcdSurfaceBase; + +#ifdef VK_USE_PLATFORM_MIR_KHR +typedef struct _VkIcdSurfaceMir { + VkIcdSurfaceBase base; + MirConnection *connection; + MirSurface *mirSurface; +} VkIcdSurfaceMir; +#endif // VK_USE_PLATFORM_MIR_KHR + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +typedef struct _VkIcdSurfaceWayland { + VkIcdSurfaceBase base; + struct wl_display *display; + struct wl_surface *surface; +} VkIcdSurfaceWayland; +#endif // VK_USE_PLATFORM_WAYLAND_KHR + +#ifdef VK_USE_PLATFORM_WIN32_KHR +typedef struct _VkIcdSurfaceWin32 { + VkIcdSurfaceBase base; + HINSTANCE hinstance; + HWND hwnd; +} VkIcdSurfaceWin32; +#endif // VK_USE_PLATFORM_WIN32_KHR + +#ifdef VK_USE_PLATFORM_XCB_KHR +typedef struct _VkIcdSurfaceXcb { + VkIcdSurfaceBase base; + xcb_connection_t *connection; + xcb_window_t window; +} VkIcdSurfaceXcb; +#endif // VK_USE_PLATFORM_XCB_KHR + +#ifdef VK_USE_PLATFORM_XLIB_KHR +typedef struct _VkIcdSurfaceXlib { + VkIcdSurfaceBase base; + Display *dpy; + Window window; +} VkIcdSurfaceXlib; +#endif // VK_USE_PLATFORM_XLIB_KHR + +#endif // VKICD_H diff --git a/third_party/vulkan/vk_layer.h b/third_party/vulkan/vk_layer.h new file mode 100644 index 000000000..248704340 --- /dev/null +++ b/third_party/vulkan/vk_layer.h @@ -0,0 +1,313 @@ +// +// File: vk_layer.h +// +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + */ + +/* Need to define dispatch table + * Core struct can then have ptr to dispatch table at the top + * Along with object ptrs for current and next OBJ + */ +#pragma once + +#include "vulkan.h" +#include "vk_lunarg_debug_marker.h" +#if defined(__GNUC__) && __GNUC__ >= 4 +#define VK_LAYER_EXPORT __attribute__((visibility("default"))) +#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) +#define VK_LAYER_EXPORT __attribute__((visibility("default"))) +#else +#define VK_LAYER_EXPORT +#endif + +typedef struct VkLayerDispatchTable_ { + PFN_vkGetDeviceProcAddr GetDeviceProcAddr; + PFN_vkDestroyDevice DestroyDevice; + PFN_vkGetDeviceQueue GetDeviceQueue; + PFN_vkQueueSubmit QueueSubmit; + PFN_vkQueueWaitIdle QueueWaitIdle; + PFN_vkDeviceWaitIdle DeviceWaitIdle; + PFN_vkAllocateMemory AllocateMemory; + PFN_vkFreeMemory FreeMemory; + PFN_vkMapMemory MapMemory; + PFN_vkUnmapMemory UnmapMemory; + PFN_vkFlushMappedMemoryRanges FlushMappedMemoryRanges; + PFN_vkInvalidateMappedMemoryRanges InvalidateMappedMemoryRanges; + PFN_vkGetDeviceMemoryCommitment GetDeviceMemoryCommitment; + PFN_vkGetImageSparseMemoryRequirements GetImageSparseMemoryRequirements; + PFN_vkGetImageMemoryRequirements GetImageMemoryRequirements; + PFN_vkGetBufferMemoryRequirements GetBufferMemoryRequirements; + PFN_vkBindImageMemory BindImageMemory; + PFN_vkBindBufferMemory BindBufferMemory; + PFN_vkQueueBindSparse QueueBindSparse; + PFN_vkCreateFence CreateFence; + PFN_vkDestroyFence DestroyFence; + PFN_vkGetFenceStatus GetFenceStatus; + PFN_vkResetFences ResetFences; + PFN_vkWaitForFences WaitForFences; + PFN_vkCreateSemaphore CreateSemaphore; + PFN_vkDestroySemaphore DestroySemaphore; + PFN_vkCreateEvent CreateEvent; + PFN_vkDestroyEvent DestroyEvent; + PFN_vkGetEventStatus GetEventStatus; + PFN_vkSetEvent SetEvent; + PFN_vkResetEvent ResetEvent; + PFN_vkCreateQueryPool CreateQueryPool; + PFN_vkDestroyQueryPool DestroyQueryPool; + PFN_vkGetQueryPoolResults GetQueryPoolResults; + PFN_vkCreateBuffer CreateBuffer; + PFN_vkDestroyBuffer DestroyBuffer; + PFN_vkCreateBufferView CreateBufferView; + PFN_vkDestroyBufferView DestroyBufferView; + PFN_vkCreateImage CreateImage; + PFN_vkDestroyImage DestroyImage; + PFN_vkGetImageSubresourceLayout GetImageSubresourceLayout; + PFN_vkCreateImageView CreateImageView; + PFN_vkDestroyImageView DestroyImageView; + PFN_vkCreateShaderModule CreateShaderModule; + PFN_vkDestroyShaderModule DestroyShaderModule; + PFN_vkCreatePipelineCache CreatePipelineCache; + PFN_vkDestroyPipelineCache DestroyPipelineCache; + PFN_vkGetPipelineCacheData GetPipelineCacheData; + PFN_vkMergePipelineCaches MergePipelineCaches; + PFN_vkCreateGraphicsPipelines CreateGraphicsPipelines; + PFN_vkCreateComputePipelines CreateComputePipelines; + PFN_vkDestroyPipeline DestroyPipeline; + PFN_vkCreatePipelineLayout CreatePipelineLayout; + PFN_vkDestroyPipelineLayout DestroyPipelineLayout; + PFN_vkCreateSampler CreateSampler; + PFN_vkDestroySampler DestroySampler; + PFN_vkCreateDescriptorSetLayout CreateDescriptorSetLayout; + PFN_vkDestroyDescriptorSetLayout DestroyDescriptorSetLayout; + PFN_vkCreateDescriptorPool CreateDescriptorPool; + PFN_vkDestroyDescriptorPool DestroyDescriptorPool; + PFN_vkResetDescriptorPool ResetDescriptorPool; + PFN_vkAllocateDescriptorSets AllocateDescriptorSets; + PFN_vkFreeDescriptorSets FreeDescriptorSets; + PFN_vkUpdateDescriptorSets UpdateDescriptorSets; + PFN_vkCreateFramebuffer CreateFramebuffer; + PFN_vkDestroyFramebuffer DestroyFramebuffer; + PFN_vkCreateRenderPass CreateRenderPass; + PFN_vkDestroyRenderPass DestroyRenderPass; + PFN_vkGetRenderAreaGranularity GetRenderAreaGranularity; + PFN_vkCreateCommandPool CreateCommandPool; + PFN_vkDestroyCommandPool DestroyCommandPool; + PFN_vkResetCommandPool ResetCommandPool; + PFN_vkAllocateCommandBuffers AllocateCommandBuffers; + PFN_vkFreeCommandBuffers FreeCommandBuffers; + PFN_vkBeginCommandBuffer BeginCommandBuffer; + PFN_vkEndCommandBuffer EndCommandBuffer; + PFN_vkResetCommandBuffer ResetCommandBuffer; + PFN_vkCmdBindPipeline CmdBindPipeline; + PFN_vkCmdBindDescriptorSets CmdBindDescriptorSets; + PFN_vkCmdBindVertexBuffers CmdBindVertexBuffers; + PFN_vkCmdBindIndexBuffer CmdBindIndexBuffer; + PFN_vkCmdSetViewport CmdSetViewport; + PFN_vkCmdSetScissor CmdSetScissor; + PFN_vkCmdSetLineWidth CmdSetLineWidth; + PFN_vkCmdSetDepthBias CmdSetDepthBias; + PFN_vkCmdSetBlendConstants CmdSetBlendConstants; + PFN_vkCmdSetDepthBounds CmdSetDepthBounds; + PFN_vkCmdSetStencilCompareMask CmdSetStencilCompareMask; + PFN_vkCmdSetStencilWriteMask CmdSetStencilWriteMask; + PFN_vkCmdSetStencilReference CmdSetStencilReference; + PFN_vkCmdDraw CmdDraw; + PFN_vkCmdDrawIndexed CmdDrawIndexed; + PFN_vkCmdDrawIndirect CmdDrawIndirect; + PFN_vkCmdDrawIndexedIndirect CmdDrawIndexedIndirect; + PFN_vkCmdDispatch CmdDispatch; + PFN_vkCmdDispatchIndirect CmdDispatchIndirect; + PFN_vkCmdCopyBuffer CmdCopyBuffer; + PFN_vkCmdCopyImage CmdCopyImage; + PFN_vkCmdBlitImage CmdBlitImage; + PFN_vkCmdCopyBufferToImage CmdCopyBufferToImage; + PFN_vkCmdCopyImageToBuffer CmdCopyImageToBuffer; + PFN_vkCmdUpdateBuffer CmdUpdateBuffer; + PFN_vkCmdFillBuffer CmdFillBuffer; + PFN_vkCmdClearColorImage CmdClearColorImage; + PFN_vkCmdClearDepthStencilImage CmdClearDepthStencilImage; + PFN_vkCmdClearAttachments CmdClearAttachments; + PFN_vkCmdResolveImage CmdResolveImage; + PFN_vkCmdSetEvent CmdSetEvent; + PFN_vkCmdResetEvent CmdResetEvent; + PFN_vkCmdWaitEvents CmdWaitEvents; + PFN_vkCmdPipelineBarrier CmdPipelineBarrier; + PFN_vkCmdBeginQuery CmdBeginQuery; + PFN_vkCmdEndQuery CmdEndQuery; + PFN_vkCmdResetQueryPool CmdResetQueryPool; + PFN_vkCmdWriteTimestamp CmdWriteTimestamp; + PFN_vkCmdCopyQueryPoolResults CmdCopyQueryPoolResults; + PFN_vkCmdPushConstants CmdPushConstants; + PFN_vkCmdBeginRenderPass CmdBeginRenderPass; + PFN_vkCmdNextSubpass CmdNextSubpass; + PFN_vkCmdEndRenderPass CmdEndRenderPass; + PFN_vkCmdExecuteCommands CmdExecuteCommands; + PFN_vkCreateSwapchainKHR CreateSwapchainKHR; + PFN_vkDestroySwapchainKHR DestroySwapchainKHR; + PFN_vkGetSwapchainImagesKHR GetSwapchainImagesKHR; + PFN_vkAcquireNextImageKHR AcquireNextImageKHR; + PFN_vkQueuePresentKHR QueuePresentKHR; +} VkLayerDispatchTable; + +typedef struct VkLayerInstanceDispatchTable_ { + PFN_vkGetInstanceProcAddr GetInstanceProcAddr; + PFN_vkDestroyInstance DestroyInstance; + PFN_vkEnumeratePhysicalDevices EnumeratePhysicalDevices; + PFN_vkGetPhysicalDeviceFeatures GetPhysicalDeviceFeatures; + PFN_vkGetPhysicalDeviceImageFormatProperties + GetPhysicalDeviceImageFormatProperties; + PFN_vkGetPhysicalDeviceFormatProperties GetPhysicalDeviceFormatProperties; + PFN_vkGetPhysicalDeviceSparseImageFormatProperties + GetPhysicalDeviceSparseImageFormatProperties; + PFN_vkGetPhysicalDeviceProperties GetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceQueueFamilyProperties + GetPhysicalDeviceQueueFamilyProperties; + PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties; + PFN_vkEnumerateDeviceExtensionProperties EnumerateDeviceExtensionProperties; + PFN_vkEnumerateDeviceLayerProperties EnumerateDeviceLayerProperties; + PFN_vkDestroySurfaceKHR DestroySurfaceKHR; + PFN_vkGetPhysicalDeviceSurfaceSupportKHR GetPhysicalDeviceSurfaceSupportKHR; + PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR + GetPhysicalDeviceSurfaceCapabilitiesKHR; + PFN_vkGetPhysicalDeviceSurfaceFormatsKHR GetPhysicalDeviceSurfaceFormatsKHR; + PFN_vkGetPhysicalDeviceSurfacePresentModesKHR + GetPhysicalDeviceSurfacePresentModesKHR; + PFN_vkCreateDebugReportCallbackEXT CreateDebugReportCallbackEXT; + PFN_vkDestroyDebugReportCallbackEXT DestroyDebugReportCallbackEXT; + PFN_vkDebugReportMessageEXT DebugReportMessageEXT; +#ifdef VK_USE_PLATFORM_MIR_KHR + PFN_vkCreateMirSurfaceKHR CreateMirSurfaceKHR; + PFN_vkGetPhysicalDeviceMirPresentationSupportKHR + GetPhysicalDeviceMirPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + PFN_vkCreateWaylandSurfaceKHR CreateWaylandSurfaceKHR; + PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR + GetPhysicalDeviceWaylandPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_WIN32_KHR + PFN_vkCreateWin32SurfaceKHR CreateWin32SurfaceKHR; + PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR + GetPhysicalDeviceWin32PresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + PFN_vkCreateXcbSurfaceKHR CreateXcbSurfaceKHR; + PFN_vkGetPhysicalDeviceXcbPresentationSupportKHR + GetPhysicalDeviceXcbPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + PFN_vkCreateXlibSurfaceKHR CreateXlibSurfaceKHR; + PFN_vkGetPhysicalDeviceXlibPresentationSupportKHR + GetPhysicalDeviceXlibPresentationSupportKHR; +#endif +#ifdef VK_USE_PLATFORM_ANDROID_KHR + PFN_vkCreateAndroidSurfaceKHR CreateAndroidSurfaceKHR; +#endif +} VkLayerInstanceDispatchTable; + +// LL node for tree of dbg callback functions +typedef struct VkLayerDbgFunctionNode_ { + VkDebugReportCallbackEXT msgCallback; + PFN_vkDebugReportCallbackEXT pfnMsgCallback; + VkFlags msgFlags; + void *pUserData; + struct VkLayerDbgFunctionNode_ *pNext; +} VkLayerDbgFunctionNode; + +typedef enum VkLayerDbgAction_ { + VK_DBG_LAYER_ACTION_IGNORE = 0x0, + VK_DBG_LAYER_ACTION_CALLBACK = 0x1, + VK_DBG_LAYER_ACTION_LOG_MSG = 0x2, + VK_DBG_LAYER_ACTION_BREAK = 0x4, + VK_DBG_LAYER_ACTION_DEBUG_OUTPUT = 0x8, +} VkLayerDbgAction; + +// ------------------------------------------------------------------------------------------------ +// CreateInstance and CreateDevice support structures + +typedef enum VkLayerFunction_ { + VK_LAYER_LINK_INFO = 0, + VK_LAYER_DEVICE_INFO = 1, + VK_LAYER_INSTANCE_INFO = 2 +} VkLayerFunction; + +/* + * When creating the device chain the loader needs to pass + * down information about it's device structure needed at + * the end of the chain. Passing the data via the + * VkLayerInstanceInfo avoids issues with finding the + * exact instance being used. + */ +typedef struct VkLayerInstanceInfo_ { + void *instance_info; + PFN_vkGetInstanceProcAddr pfnNextGetInstanceProcAddr; +} VkLayerInstanceInfo; + +typedef struct VkLayerInstanceLink_ { + struct VkLayerInstanceLink_ *pNext; + PFN_vkGetInstanceProcAddr pfnNextGetInstanceProcAddr; +} VkLayerInstanceLink; + +/* + * When creating the device chain the loader needs to pass + * down information about it's device structure needed at + * the end of the chain. Passing the data via the + * VkLayerDeviceInfo avoids issues with finding the + * exact instance being used. + */ +typedef struct VkLayerDeviceInfo_ { + void *device_info; + PFN_vkGetInstanceProcAddr pfnNextGetInstanceProcAddr; +} VkLayerDeviceInfo; + +typedef struct { + VkStructureType sType; // VK_STRUCTURE_TYPE_LAYER_INSTANCE_CREATE_INFO + const void *pNext; + VkLayerFunction function; + union { + VkLayerInstanceLink *pLayerInfo; + VkLayerInstanceInfo instanceInfo; + } u; +} VkLayerInstanceCreateInfo; + +typedef struct VkLayerDeviceLink_ { + struct VkLayerDeviceLink_ *pNext; + PFN_vkGetInstanceProcAddr pfnNextGetInstanceProcAddr; + PFN_vkGetDeviceProcAddr pfnNextGetDeviceProcAddr; +} VkLayerDeviceLink; + +typedef struct { + VkStructureType sType; // VK_STRUCTURE_TYPE_LAYER_DEVICE_CREATE_INFO + const void *pNext; + VkLayerFunction function; + union { + VkLayerDeviceLink *pLayerInfo; + VkLayerDeviceInfo deviceInfo; + } u; +} VkLayerDeviceCreateInfo; + +// ------------------------------------------------------------------------------------------------ +// API functions diff --git a/third_party/vulkan/vk_lunarg_debug_marker.h b/third_party/vulkan/vk_lunarg_debug_marker.h new file mode 100644 index 000000000..edff2b9ee --- /dev/null +++ b/third_party/vulkan/vk_lunarg_debug_marker.h @@ -0,0 +1,98 @@ +// +// File: vk_lunarg_debug_marker.h +// +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + * + * Authors: + * Jon Ashburn + * Courtney Goeltzenleuchter + */ + +#ifndef __VK_DEBUG_MARKER_H__ +#define __VK_DEBUG_MARKER_H__ + +#include "vulkan.h" + +#define VK_DEBUG_MARKER_EXTENSION_NUMBER 6 +#define VK_DEBUG_MARKER_EXTENSION_REVISION 1 +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/* +*************************************************************************************************** +* DebugMarker Vulkan Extension API +*************************************************************************************************** +*/ + +#define DEBUG_MARKER_EXTENSION_NAME "VK_LUNARG_DEBUG_MARKER" + +// ------------------------------------------------------------------------------------------------ +// Enumerations + +#define VK_DEBUG_MARKER_ENUM_EXTEND(type, id) \ + ((type)(VK_DEBUG_MARKER_EXTENSION_NUMBER * -1000 + (id))) + +#define VK_OBJECT_INFO_TYPE_DBG_OBJECT_TAG \ + VK_DEBUG_MARKER_ENUM_EXTEND(VkDbgObjectInfoType, 0) +#define VK_OBJECT_INFO_TYPE_DBG_OBJECT_NAME \ + VK_DEBUG_MARKER_ENUM_EXTEND(VkDbgObjectInfoType, 1) + +// ------------------------------------------------------------------------------------------------ +// API functions + +typedef void(VKAPI_PTR *PFN_vkCmdDbgMarkerBegin)(VkCommandBuffer commandBuffer, + const char *pMarker); +typedef void(VKAPI_PTR *PFN_vkCmdDbgMarkerEnd)(VkCommandBuffer commandBuffer); +typedef VkResult(VKAPI_PTR *PFN_vkDbgSetObjectTag)( + VkDevice device, VkDebugReportObjectTypeEXT objType, uint64_t object, + size_t tagSize, const void *pTag); +typedef VkResult(VKAPI_PTR *PFN_vkDbgSetObjectName)( + VkDevice device, VkDebugReportObjectTypeEXT objType, uint64_t object, + size_t nameSize, const char *pName); + +#ifndef VK_NO_PROTOTYPES + +// DebugMarker extension entrypoints +VKAPI_ATTR void VKAPI_CALL +vkCmdDbgMarkerBegin(VkCommandBuffer commandBuffer, const char *pMarker); + +VKAPI_ATTR void VKAPI_CALL vkCmdDbgMarkerEnd(VkCommandBuffer commandBuffer); + +VKAPI_ATTR VkResult VKAPI_CALL +vkDbgSetObjectTag(VkDevice device, VkDebugReportObjectTypeEXT objType, + uint64_t object, size_t tagSize, const void *pTag); + +VKAPI_ATTR VkResult VKAPI_CALL +vkDbgSetObjectName(VkDevice device, VkDebugReportObjectTypeEXT objType, + uint64_t object, size_t nameSize, const char *pName); + +#endif // VK_NO_PROTOTYPES + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // __VK_DEBUG_MARKER_H__ diff --git a/third_party/vulkan/vk_platform.h b/third_party/vulkan/vk_platform.h new file mode 100644 index 000000000..a53e725a9 --- /dev/null +++ b/third_party/vulkan/vk_platform.h @@ -0,0 +1,127 @@ +// +// File: vk_platform.h +// +/* +** Copyright (c) 2014-2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + + +#ifndef __VK_PLATFORM_H__ +#define __VK_PLATFORM_H__ + +#ifdef __cplusplus +extern "C" +{ +#endif // __cplusplus + +/* +*************************************************************************************************** +* Platform-specific directives and type declarations +*************************************************************************************************** +*/ + +/* Platform-specific calling convention macros. + * + * Platforms should define these so that Vulkan clients call Vulkan commands + * with the same calling conventions that the Vulkan implementation expects. + * + * VKAPI_ATTR - Placed before the return type in function declarations. + * Useful for C++11 and GCC/Clang-style function attribute syntax. + * VKAPI_CALL - Placed after the return type in function declarations. + * Useful for MSVC-style calling convention syntax. + * VKAPI_PTR - Placed between the '(' and '*' in function pointer types. + * + * Function declaration: VKAPI_ATTR void VKAPI_CALL vkCommand(void); + * Function pointer type: typedef void (VKAPI_PTR *PFN_vkCommand)(void); + */ +#if defined(_WIN32) + // On Windows, Vulkan commands use the stdcall convention + #define VKAPI_ATTR + #define VKAPI_CALL __stdcall + #define VKAPI_PTR VKAPI_CALL +#elif defined(__ANDROID__) && defined(__ARM_EABI__) && !defined(__ARM_ARCH_7A__) + // Android does not support Vulkan in native code using the "armeabi" ABI. + #error "Vulkan requires the 'armeabi-v7a' or 'armeabi-v7a-hard' ABI on 32-bit ARM CPUs" +#elif defined(__ANDROID__) && defined(__ARM_ARCH_7A__) + // On Android/ARMv7a, Vulkan functions use the armeabi-v7a-hard calling + // convention, even if the application's native code is compiled with the + // armeabi-v7a calling convention. + #define VKAPI_ATTR __attribute__((pcs("aapcs-vfp"))) + #define VKAPI_CALL + #define VKAPI_PTR VKAPI_ATTR +#else + // On other platforms, use the default calling convention + #define VKAPI_ATTR + #define VKAPI_CALL + #define VKAPI_PTR +#endif + +#include + +#if !defined(VK_NO_STDINT_H) + #if defined(_MSC_VER) && (_MSC_VER < 1600) + typedef signed __int8 int8_t; + typedef unsigned __int8 uint8_t; + typedef signed __int16 int16_t; + typedef unsigned __int16 uint16_t; + typedef signed __int32 int32_t; + typedef unsigned __int32 uint32_t; + typedef signed __int64 int64_t; + typedef unsigned __int64 uint64_t; + #else + #include + #endif +#endif // !defined(VK_NO_STDINT_H) + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +// Platform-specific headers required by platform window system extensions. +// These are enabled prior to #including "vulkan.h". The same enable then +// controls inclusion of the extension interfaces in vulkan.h. + +#ifdef VK_USE_PLATFORM_ANDROID_KHR +#include +#endif + +#ifdef VK_USE_PLATFORM_MIR_KHR +#include +#endif + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +#include +#endif + +#ifdef VK_USE_PLATFORM_WIN32_KHR +#include +#endif + +#ifdef VK_USE_PLATFORM_XLIB_KHR +#include +#endif + +#ifdef VK_USE_PLATFORM_XCB_KHR +#include +#endif + +#endif // __VK_PLATFORM_H__ diff --git a/third_party/vulkan/vk_sdk_platform.h b/third_party/vulkan/vk_sdk_platform.h new file mode 100644 index 000000000..f79396bac --- /dev/null +++ b/third_party/vulkan/vk_sdk_platform.h @@ -0,0 +1,53 @@ +// +// File: vk_sdk_platform.h +// +/* + * Copyright (c) 2015-2016 The Khronos Group Inc. + * Copyright (c) 2015-2016 Valve Corporation + * Copyright (c) 2015-2016 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and/or associated documentation files (the "Materials"), to + * deal in the Materials without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Materials, and to permit persons to whom the Materials are + * furnished to do so, subject to the following conditions: + * + * The above copyright notice(s) and this permission notice shall be included in + * all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE + * USE OR OTHER DEALINGS IN THE MATERIALS. + */ + +#ifndef VK_SDK_PLATFORM_H +#define VK_SDK_PLATFORM_H + +#if defined(_WIN32) +#define NOMINMAX +#ifndef __cplusplus +#undef inline +#define inline __inline +#endif // __cplusplus + +#if (defined(_MSC_VER) && _MSC_VER < 1900 /*vs2015*/) +// C99: +// Microsoft didn't implement C99 in Visual Studio; but started adding it with +// VS2013. However, VS2013 still didn't have snprintf(). The following is a +// work-around (Note: The _CRT_SECURE_NO_WARNINGS macro must be set in the +// "CMakeLists.txt" file). +// NOTE: This is fixed in Visual Studio 2015. +#define snprintf _snprintf +#endif + +#define strdup _strdup + +#endif // _WIN32 + +#endif // VK_SDK_PLATFORM_H diff --git a/third_party/vulkan/vulkan.h b/third_party/vulkan/vulkan.h new file mode 100644 index 000000000..cd6a71ac1 --- /dev/null +++ b/third_party/vulkan/vulkan.h @@ -0,0 +1,3775 @@ +#ifndef __vulkan_h_ +#define __vulkan_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2015-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* +** This header is generated from the Khronos Vulkan XML API Registry. +** +*/ + + +#define VK_VERSION_1_0 1 +#include "vk_platform.h" + +#define VK_MAKE_VERSION(major, minor, patch) \ + (((major) << 22) | ((minor) << 12) | (patch)) + +// Vulkan API version supported by this file +#define VK_API_VERSION VK_MAKE_VERSION(1, 0, 3) + +#define VK_VERSION_MAJOR(version) ((uint32_t)(version) >> 22) +#define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff) +#define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff) + +#define VK_NULL_HANDLE 0 + + + +#define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; + + +#if defined(__LP64__) || defined(_WIN64) || defined(__x86_64__) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef struct object##_T *object; +#else + #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef uint64_t object; +#endif + + + +typedef uint32_t VkFlags; +typedef uint32_t VkBool32; +typedef uint64_t VkDeviceSize; +typedef uint32_t VkSampleMask; + +VK_DEFINE_HANDLE(VkInstance) +VK_DEFINE_HANDLE(VkPhysicalDevice) +VK_DEFINE_HANDLE(VkDevice) +VK_DEFINE_HANDLE(VkQueue) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSemaphore) +VK_DEFINE_HANDLE(VkCommandBuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFence) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDeviceMemory) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImage) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkEvent) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkQueryPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBufferView) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImageView) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShaderModule) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineCache) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineLayout) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipeline) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSetLayout) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSampler) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSet) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFramebuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) + +#define VK_LOD_CLAMP_NONE 1000.0f +#define VK_REMAINING_MIP_LEVELS (~0U) +#define VK_REMAINING_ARRAY_LAYERS (~0U) +#define VK_WHOLE_SIZE (~0ULL) +#define VK_ATTACHMENT_UNUSED (~0U) +#define VK_TRUE 1 +#define VK_FALSE 0 +#define VK_QUEUE_FAMILY_IGNORED (~0U) +#define VK_SUBPASS_EXTERNAL (~0U) +#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256 +#define VK_UUID_SIZE 16 +#define VK_MAX_MEMORY_TYPES 32 +#define VK_MAX_MEMORY_HEAPS 16 +#define VK_MAX_EXTENSION_NAME_SIZE 256 +#define VK_MAX_DESCRIPTION_SIZE 256 + + +typedef enum VkPipelineCacheHeaderVersion { + VK_PIPELINE_CACHE_HEADER_VERSION_ONE = 1, + VK_PIPELINE_CACHE_HEADER_VERSION_BEGIN_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + VK_PIPELINE_CACHE_HEADER_VERSION_END_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + VK_PIPELINE_CACHE_HEADER_VERSION_RANGE_SIZE = (VK_PIPELINE_CACHE_HEADER_VERSION_ONE - VK_PIPELINE_CACHE_HEADER_VERSION_ONE + 1), + VK_PIPELINE_CACHE_HEADER_VERSION_MAX_ENUM = 0x7FFFFFFF +} VkPipelineCacheHeaderVersion; + +typedef enum VkResult { + VK_SUCCESS = 0, + VK_NOT_READY = 1, + VK_TIMEOUT = 2, + VK_EVENT_SET = 3, + VK_EVENT_RESET = 4, + VK_INCOMPLETE = 5, + VK_ERROR_OUT_OF_HOST_MEMORY = -1, + VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, + VK_ERROR_INITIALIZATION_FAILED = -3, + VK_ERROR_DEVICE_LOST = -4, + VK_ERROR_MEMORY_MAP_FAILED = -5, + VK_ERROR_LAYER_NOT_PRESENT = -6, + VK_ERROR_EXTENSION_NOT_PRESENT = -7, + VK_ERROR_FEATURE_NOT_PRESENT = -8, + VK_ERROR_INCOMPATIBLE_DRIVER = -9, + VK_ERROR_TOO_MANY_OBJECTS = -10, + VK_ERROR_FORMAT_NOT_SUPPORTED = -11, + VK_ERROR_SURFACE_LOST_KHR = -1000000000, + VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000000001, + VK_SUBOPTIMAL_KHR = 1000001003, + VK_ERROR_OUT_OF_DATE_KHR = -1000001004, + VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001, + VK_ERROR_VALIDATION_FAILED_EXT = -1000011001, + VK_RESULT_BEGIN_RANGE = VK_ERROR_FORMAT_NOT_SUPPORTED, + VK_RESULT_END_RANGE = VK_INCOMPLETE, + VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FORMAT_NOT_SUPPORTED + 1), + VK_RESULT_MAX_ENUM = 0x7FFFFFFF +} VkResult; + +typedef enum VkStructureType { + VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1, + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 2, + VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 3, + VK_STRUCTURE_TYPE_SUBMIT_INFO = 4, + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 5, + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 6, + VK_STRUCTURE_TYPE_BIND_SPARSE_INFO = 7, + VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 8, + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 9, + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 11, + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 12, + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 13, + VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 14, + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 15, + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 16, + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 17, + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 18, + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 19, + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 20, + VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 21, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 22, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 23, + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 24, + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 25, + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 26, + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 27, + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 28, + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 29, + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 30, + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 31, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 32, + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 33, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO = 34, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 35, + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 36, + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 37, + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38, + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO = 41, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 42, + VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43, + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 44, + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 45, + VK_STRUCTURE_TYPE_MEMORY_BARRIER = 46, + VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO = 47, + VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO = 48, + VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR = 1000001000, + VK_STRUCTURE_TYPE_PRESENT_INFO_KHR = 1000001001, + VK_STRUCTURE_TYPE_DISPLAY_MODE_CREATE_INFO_KHR = 1000002000, + VK_STRUCTURE_TYPE_DISPLAY_SURFACE_CREATE_INFO_KHR = 1000002001, + VK_STRUCTURE_TYPE_DISPLAY_PRESENT_INFO_KHR = 1000003000, + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR = 1000004000, + VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR = 1000005000, + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR = 1000006000, + VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR = 1000007000, + VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR = 1000008000, + VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR = 1000009000, + VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = 1000011000, + VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, + VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, + VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), + VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkStructureType; + +typedef enum VkSystemAllocationScope { + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1, + VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4, + VK_SYSTEM_ALLOCATION_SCOPE_BEGIN_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND, + VK_SYSTEM_ALLOCATION_SCOPE_END_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE, + VK_SYSTEM_ALLOCATION_SCOPE_RANGE_SIZE = (VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND + 1), + VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF +} VkSystemAllocationScope; + +typedef enum VkInternalAllocationType { + VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0, + VK_INTERNAL_ALLOCATION_TYPE_BEGIN_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, + VK_INTERNAL_ALLOCATION_TYPE_END_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, + VK_INTERNAL_ALLOCATION_TYPE_RANGE_SIZE = (VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE - VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE + 1), + VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkInternalAllocationType; + +typedef enum VkFormat { + VK_FORMAT_UNDEFINED = 0, + VK_FORMAT_R4G4_UNORM_PACK8 = 1, + VK_FORMAT_R4G4B4A4_UNORM_PACK16 = 2, + VK_FORMAT_B4G4R4A4_UNORM_PACK16 = 3, + VK_FORMAT_R5G6B5_UNORM_PACK16 = 4, + VK_FORMAT_B5G6R5_UNORM_PACK16 = 5, + VK_FORMAT_R5G5B5A1_UNORM_PACK16 = 6, + VK_FORMAT_B5G5R5A1_UNORM_PACK16 = 7, + VK_FORMAT_A1R5G5B5_UNORM_PACK16 = 8, + VK_FORMAT_R8_UNORM = 9, + VK_FORMAT_R8_SNORM = 10, + VK_FORMAT_R8_USCALED = 11, + VK_FORMAT_R8_SSCALED = 12, + VK_FORMAT_R8_UINT = 13, + VK_FORMAT_R8_SINT = 14, + VK_FORMAT_R8_SRGB = 15, + VK_FORMAT_R8G8_UNORM = 16, + VK_FORMAT_R8G8_SNORM = 17, + VK_FORMAT_R8G8_USCALED = 18, + VK_FORMAT_R8G8_SSCALED = 19, + VK_FORMAT_R8G8_UINT = 20, + VK_FORMAT_R8G8_SINT = 21, + VK_FORMAT_R8G8_SRGB = 22, + VK_FORMAT_R8G8B8_UNORM = 23, + VK_FORMAT_R8G8B8_SNORM = 24, + VK_FORMAT_R8G8B8_USCALED = 25, + VK_FORMAT_R8G8B8_SSCALED = 26, + VK_FORMAT_R8G8B8_UINT = 27, + VK_FORMAT_R8G8B8_SINT = 28, + VK_FORMAT_R8G8B8_SRGB = 29, + VK_FORMAT_B8G8R8_UNORM = 30, + VK_FORMAT_B8G8R8_SNORM = 31, + VK_FORMAT_B8G8R8_USCALED = 32, + VK_FORMAT_B8G8R8_SSCALED = 33, + VK_FORMAT_B8G8R8_UINT = 34, + VK_FORMAT_B8G8R8_SINT = 35, + VK_FORMAT_B8G8R8_SRGB = 36, + VK_FORMAT_R8G8B8A8_UNORM = 37, + VK_FORMAT_R8G8B8A8_SNORM = 38, + VK_FORMAT_R8G8B8A8_USCALED = 39, + VK_FORMAT_R8G8B8A8_SSCALED = 40, + VK_FORMAT_R8G8B8A8_UINT = 41, + VK_FORMAT_R8G8B8A8_SINT = 42, + VK_FORMAT_R8G8B8A8_SRGB = 43, + VK_FORMAT_B8G8R8A8_UNORM = 44, + VK_FORMAT_B8G8R8A8_SNORM = 45, + VK_FORMAT_B8G8R8A8_USCALED = 46, + VK_FORMAT_B8G8R8A8_SSCALED = 47, + VK_FORMAT_B8G8R8A8_UINT = 48, + VK_FORMAT_B8G8R8A8_SINT = 49, + VK_FORMAT_B8G8R8A8_SRGB = 50, + VK_FORMAT_A8B8G8R8_UNORM_PACK32 = 51, + VK_FORMAT_A8B8G8R8_SNORM_PACK32 = 52, + VK_FORMAT_A8B8G8R8_USCALED_PACK32 = 53, + VK_FORMAT_A8B8G8R8_SSCALED_PACK32 = 54, + VK_FORMAT_A8B8G8R8_UINT_PACK32 = 55, + VK_FORMAT_A8B8G8R8_SINT_PACK32 = 56, + VK_FORMAT_A8B8G8R8_SRGB_PACK32 = 57, + VK_FORMAT_A2R10G10B10_UNORM_PACK32 = 58, + VK_FORMAT_A2R10G10B10_SNORM_PACK32 = 59, + VK_FORMAT_A2R10G10B10_USCALED_PACK32 = 60, + VK_FORMAT_A2R10G10B10_SSCALED_PACK32 = 61, + VK_FORMAT_A2R10G10B10_UINT_PACK32 = 62, + VK_FORMAT_A2R10G10B10_SINT_PACK32 = 63, + VK_FORMAT_A2B10G10R10_UNORM_PACK32 = 64, + VK_FORMAT_A2B10G10R10_SNORM_PACK32 = 65, + VK_FORMAT_A2B10G10R10_USCALED_PACK32 = 66, + VK_FORMAT_A2B10G10R10_SSCALED_PACK32 = 67, + VK_FORMAT_A2B10G10R10_UINT_PACK32 = 68, + VK_FORMAT_A2B10G10R10_SINT_PACK32 = 69, + VK_FORMAT_R16_UNORM = 70, + VK_FORMAT_R16_SNORM = 71, + VK_FORMAT_R16_USCALED = 72, + VK_FORMAT_R16_SSCALED = 73, + VK_FORMAT_R16_UINT = 74, + VK_FORMAT_R16_SINT = 75, + VK_FORMAT_R16_SFLOAT = 76, + VK_FORMAT_R16G16_UNORM = 77, + VK_FORMAT_R16G16_SNORM = 78, + VK_FORMAT_R16G16_USCALED = 79, + VK_FORMAT_R16G16_SSCALED = 80, + VK_FORMAT_R16G16_UINT = 81, + VK_FORMAT_R16G16_SINT = 82, + VK_FORMAT_R16G16_SFLOAT = 83, + VK_FORMAT_R16G16B16_UNORM = 84, + VK_FORMAT_R16G16B16_SNORM = 85, + VK_FORMAT_R16G16B16_USCALED = 86, + VK_FORMAT_R16G16B16_SSCALED = 87, + VK_FORMAT_R16G16B16_UINT = 88, + VK_FORMAT_R16G16B16_SINT = 89, + VK_FORMAT_R16G16B16_SFLOAT = 90, + VK_FORMAT_R16G16B16A16_UNORM = 91, + VK_FORMAT_R16G16B16A16_SNORM = 92, + VK_FORMAT_R16G16B16A16_USCALED = 93, + VK_FORMAT_R16G16B16A16_SSCALED = 94, + VK_FORMAT_R16G16B16A16_UINT = 95, + VK_FORMAT_R16G16B16A16_SINT = 96, + VK_FORMAT_R16G16B16A16_SFLOAT = 97, + VK_FORMAT_R32_UINT = 98, + VK_FORMAT_R32_SINT = 99, + VK_FORMAT_R32_SFLOAT = 100, + VK_FORMAT_R32G32_UINT = 101, + VK_FORMAT_R32G32_SINT = 102, + VK_FORMAT_R32G32_SFLOAT = 103, + VK_FORMAT_R32G32B32_UINT = 104, + VK_FORMAT_R32G32B32_SINT = 105, + VK_FORMAT_R32G32B32_SFLOAT = 106, + VK_FORMAT_R32G32B32A32_UINT = 107, + VK_FORMAT_R32G32B32A32_SINT = 108, + VK_FORMAT_R32G32B32A32_SFLOAT = 109, + VK_FORMAT_R64_UINT = 110, + VK_FORMAT_R64_SINT = 111, + VK_FORMAT_R64_SFLOAT = 112, + VK_FORMAT_R64G64_UINT = 113, + VK_FORMAT_R64G64_SINT = 114, + VK_FORMAT_R64G64_SFLOAT = 115, + VK_FORMAT_R64G64B64_UINT = 116, + VK_FORMAT_R64G64B64_SINT = 117, + VK_FORMAT_R64G64B64_SFLOAT = 118, + VK_FORMAT_R64G64B64A64_UINT = 119, + VK_FORMAT_R64G64B64A64_SINT = 120, + VK_FORMAT_R64G64B64A64_SFLOAT = 121, + VK_FORMAT_B10G11R11_UFLOAT_PACK32 = 122, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 = 123, + VK_FORMAT_D16_UNORM = 124, + VK_FORMAT_X8_D24_UNORM_PACK32 = 125, + VK_FORMAT_D32_SFLOAT = 126, + VK_FORMAT_S8_UINT = 127, + VK_FORMAT_D16_UNORM_S8_UINT = 128, + VK_FORMAT_D24_UNORM_S8_UINT = 129, + VK_FORMAT_D32_SFLOAT_S8_UINT = 130, + VK_FORMAT_BC1_RGB_UNORM_BLOCK = 131, + VK_FORMAT_BC1_RGB_SRGB_BLOCK = 132, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK = 133, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK = 134, + VK_FORMAT_BC2_UNORM_BLOCK = 135, + VK_FORMAT_BC2_SRGB_BLOCK = 136, + VK_FORMAT_BC3_UNORM_BLOCK = 137, + VK_FORMAT_BC3_SRGB_BLOCK = 138, + VK_FORMAT_BC4_UNORM_BLOCK = 139, + VK_FORMAT_BC4_SNORM_BLOCK = 140, + VK_FORMAT_BC5_UNORM_BLOCK = 141, + VK_FORMAT_BC5_SNORM_BLOCK = 142, + VK_FORMAT_BC6H_UFLOAT_BLOCK = 143, + VK_FORMAT_BC6H_SFLOAT_BLOCK = 144, + VK_FORMAT_BC7_UNORM_BLOCK = 145, + VK_FORMAT_BC7_SRGB_BLOCK = 146, + VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = 147, + VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = 148, + VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK = 149, + VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = 150, + VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK = 151, + VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = 152, + VK_FORMAT_EAC_R11_UNORM_BLOCK = 153, + VK_FORMAT_EAC_R11_SNORM_BLOCK = 154, + VK_FORMAT_EAC_R11G11_UNORM_BLOCK = 155, + VK_FORMAT_EAC_R11G11_SNORM_BLOCK = 156, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK = 157, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK = 158, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK = 161, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK = 162, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK = 165, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK = 166, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK = 171, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK = 172, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, + VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED, + VK_FORMAT_END_RANGE = VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_RANGE_SIZE = (VK_FORMAT_ASTC_12x12_SRGB_BLOCK - VK_FORMAT_UNDEFINED + 1), + VK_FORMAT_MAX_ENUM = 0x7FFFFFFF +} VkFormat; + +typedef enum VkImageType { + VK_IMAGE_TYPE_1D = 0, + VK_IMAGE_TYPE_2D = 1, + VK_IMAGE_TYPE_3D = 2, + VK_IMAGE_TYPE_BEGIN_RANGE = VK_IMAGE_TYPE_1D, + VK_IMAGE_TYPE_END_RANGE = VK_IMAGE_TYPE_3D, + VK_IMAGE_TYPE_RANGE_SIZE = (VK_IMAGE_TYPE_3D - VK_IMAGE_TYPE_1D + 1), + VK_IMAGE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkImageType; + +typedef enum VkImageTiling { + VK_IMAGE_TILING_OPTIMAL = 0, + VK_IMAGE_TILING_LINEAR = 1, + VK_IMAGE_TILING_BEGIN_RANGE = VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_TILING_END_RANGE = VK_IMAGE_TILING_LINEAR, + VK_IMAGE_TILING_RANGE_SIZE = (VK_IMAGE_TILING_LINEAR - VK_IMAGE_TILING_OPTIMAL + 1), + VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF +} VkImageTiling; + +typedef enum VkPhysicalDeviceType { + VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, + VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, + VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, + VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3, + VK_PHYSICAL_DEVICE_TYPE_CPU = 4, + VK_PHYSICAL_DEVICE_TYPE_BEGIN_RANGE = VK_PHYSICAL_DEVICE_TYPE_OTHER, + VK_PHYSICAL_DEVICE_TYPE_END_RANGE = VK_PHYSICAL_DEVICE_TYPE_CPU, + VK_PHYSICAL_DEVICE_TYPE_RANGE_SIZE = (VK_PHYSICAL_DEVICE_TYPE_CPU - VK_PHYSICAL_DEVICE_TYPE_OTHER + 1), + VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkPhysicalDeviceType; + +typedef enum VkQueryType { + VK_QUERY_TYPE_OCCLUSION = 0, + VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, + VK_QUERY_TYPE_TIMESTAMP = 2, + VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, + VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_TIMESTAMP, + VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_TIMESTAMP - VK_QUERY_TYPE_OCCLUSION + 1), + VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkQueryType; + +typedef enum VkSharingMode { + VK_SHARING_MODE_EXCLUSIVE = 0, + VK_SHARING_MODE_CONCURRENT = 1, + VK_SHARING_MODE_BEGIN_RANGE = VK_SHARING_MODE_EXCLUSIVE, + VK_SHARING_MODE_END_RANGE = VK_SHARING_MODE_CONCURRENT, + VK_SHARING_MODE_RANGE_SIZE = (VK_SHARING_MODE_CONCURRENT - VK_SHARING_MODE_EXCLUSIVE + 1), + VK_SHARING_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSharingMode; + +typedef enum VkImageLayout { + VK_IMAGE_LAYOUT_UNDEFINED = 0, + VK_IMAGE_LAYOUT_GENERAL = 1, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL = 3, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL = 4, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = 6, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7, + VK_IMAGE_LAYOUT_PREINITIALIZED = 8, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002, + VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, + VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), + VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF +} VkImageLayout; + +typedef enum VkImageViewType { + VK_IMAGE_VIEW_TYPE_1D = 0, + VK_IMAGE_VIEW_TYPE_2D = 1, + VK_IMAGE_VIEW_TYPE_3D = 2, + VK_IMAGE_VIEW_TYPE_CUBE = 3, + VK_IMAGE_VIEW_TYPE_1D_ARRAY = 4, + VK_IMAGE_VIEW_TYPE_2D_ARRAY = 5, + VK_IMAGE_VIEW_TYPE_CUBE_ARRAY = 6, + VK_IMAGE_VIEW_TYPE_BEGIN_RANGE = VK_IMAGE_VIEW_TYPE_1D, + VK_IMAGE_VIEW_TYPE_END_RANGE = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, + VK_IMAGE_VIEW_TYPE_RANGE_SIZE = (VK_IMAGE_VIEW_TYPE_CUBE_ARRAY - VK_IMAGE_VIEW_TYPE_1D + 1), + VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkImageViewType; + +typedef enum VkComponentSwizzle { + VK_COMPONENT_SWIZZLE_IDENTITY = 0, + VK_COMPONENT_SWIZZLE_ZERO = 1, + VK_COMPONENT_SWIZZLE_ONE = 2, + VK_COMPONENT_SWIZZLE_R = 3, + VK_COMPONENT_SWIZZLE_G = 4, + VK_COMPONENT_SWIZZLE_B = 5, + VK_COMPONENT_SWIZZLE_A = 6, + VK_COMPONENT_SWIZZLE_BEGIN_RANGE = VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_END_RANGE = VK_COMPONENT_SWIZZLE_A, + VK_COMPONENT_SWIZZLE_RANGE_SIZE = (VK_COMPONENT_SWIZZLE_A - VK_COMPONENT_SWIZZLE_IDENTITY + 1), + VK_COMPONENT_SWIZZLE_MAX_ENUM = 0x7FFFFFFF +} VkComponentSwizzle; + +typedef enum VkVertexInputRate { + VK_VERTEX_INPUT_RATE_VERTEX = 0, + VK_VERTEX_INPUT_RATE_INSTANCE = 1, + VK_VERTEX_INPUT_RATE_BEGIN_RANGE = VK_VERTEX_INPUT_RATE_VERTEX, + VK_VERTEX_INPUT_RATE_END_RANGE = VK_VERTEX_INPUT_RATE_INSTANCE, + VK_VERTEX_INPUT_RATE_RANGE_SIZE = (VK_VERTEX_INPUT_RATE_INSTANCE - VK_VERTEX_INPUT_RATE_VERTEX + 1), + VK_VERTEX_INPUT_RATE_MAX_ENUM = 0x7FFFFFFF +} VkVertexInputRate; + +typedef enum VkPrimitiveTopology { + VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST = 1, + VK_PRIMITIVE_TOPOLOGY_LINE_STRIP = 2, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST = 3, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP = 4, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN = 5, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY = 6, + VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY = 7, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY = 8, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY = 9, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST = 10, + VK_PRIMITIVE_TOPOLOGY_BEGIN_RANGE = VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_END_RANGE = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + VK_PRIMITIVE_TOPOLOGY_RANGE_SIZE = (VK_PRIMITIVE_TOPOLOGY_PATCH_LIST - VK_PRIMITIVE_TOPOLOGY_POINT_LIST + 1), + VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF +} VkPrimitiveTopology; + +typedef enum VkPolygonMode { + VK_POLYGON_MODE_FILL = 0, + VK_POLYGON_MODE_LINE = 1, + VK_POLYGON_MODE_POINT = 2, + VK_POLYGON_MODE_BEGIN_RANGE = VK_POLYGON_MODE_FILL, + VK_POLYGON_MODE_END_RANGE = VK_POLYGON_MODE_POINT, + VK_POLYGON_MODE_RANGE_SIZE = (VK_POLYGON_MODE_POINT - VK_POLYGON_MODE_FILL + 1), + VK_POLYGON_MODE_MAX_ENUM = 0x7FFFFFFF +} VkPolygonMode; + +typedef enum VkFrontFace { + VK_FRONT_FACE_COUNTER_CLOCKWISE = 0, + VK_FRONT_FACE_CLOCKWISE = 1, + VK_FRONT_FACE_BEGIN_RANGE = VK_FRONT_FACE_COUNTER_CLOCKWISE, + VK_FRONT_FACE_END_RANGE = VK_FRONT_FACE_CLOCKWISE, + VK_FRONT_FACE_RANGE_SIZE = (VK_FRONT_FACE_CLOCKWISE - VK_FRONT_FACE_COUNTER_CLOCKWISE + 1), + VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF +} VkFrontFace; + +typedef enum VkCompareOp { + VK_COMPARE_OP_NEVER = 0, + VK_COMPARE_OP_LESS = 1, + VK_COMPARE_OP_EQUAL = 2, + VK_COMPARE_OP_LESS_OR_EQUAL = 3, + VK_COMPARE_OP_GREATER = 4, + VK_COMPARE_OP_NOT_EQUAL = 5, + VK_COMPARE_OP_GREATER_OR_EQUAL = 6, + VK_COMPARE_OP_ALWAYS = 7, + VK_COMPARE_OP_BEGIN_RANGE = VK_COMPARE_OP_NEVER, + VK_COMPARE_OP_END_RANGE = VK_COMPARE_OP_ALWAYS, + VK_COMPARE_OP_RANGE_SIZE = (VK_COMPARE_OP_ALWAYS - VK_COMPARE_OP_NEVER + 1), + VK_COMPARE_OP_MAX_ENUM = 0x7FFFFFFF +} VkCompareOp; + +typedef enum VkStencilOp { + VK_STENCIL_OP_KEEP = 0, + VK_STENCIL_OP_ZERO = 1, + VK_STENCIL_OP_REPLACE = 2, + VK_STENCIL_OP_INCREMENT_AND_CLAMP = 3, + VK_STENCIL_OP_DECREMENT_AND_CLAMP = 4, + VK_STENCIL_OP_INVERT = 5, + VK_STENCIL_OP_INCREMENT_AND_WRAP = 6, + VK_STENCIL_OP_DECREMENT_AND_WRAP = 7, + VK_STENCIL_OP_BEGIN_RANGE = VK_STENCIL_OP_KEEP, + VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DECREMENT_AND_WRAP, + VK_STENCIL_OP_RANGE_SIZE = (VK_STENCIL_OP_DECREMENT_AND_WRAP - VK_STENCIL_OP_KEEP + 1), + VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF +} VkStencilOp; + +typedef enum VkLogicOp { + VK_LOGIC_OP_CLEAR = 0, + VK_LOGIC_OP_AND = 1, + VK_LOGIC_OP_AND_REVERSE = 2, + VK_LOGIC_OP_COPY = 3, + VK_LOGIC_OP_AND_INVERTED = 4, + VK_LOGIC_OP_NO_OP = 5, + VK_LOGIC_OP_XOR = 6, + VK_LOGIC_OP_OR = 7, + VK_LOGIC_OP_NOR = 8, + VK_LOGIC_OP_EQUIVALENT = 9, + VK_LOGIC_OP_INVERT = 10, + VK_LOGIC_OP_OR_REVERSE = 11, + VK_LOGIC_OP_COPY_INVERTED = 12, + VK_LOGIC_OP_OR_INVERTED = 13, + VK_LOGIC_OP_NAND = 14, + VK_LOGIC_OP_SET = 15, + VK_LOGIC_OP_BEGIN_RANGE = VK_LOGIC_OP_CLEAR, + VK_LOGIC_OP_END_RANGE = VK_LOGIC_OP_SET, + VK_LOGIC_OP_RANGE_SIZE = (VK_LOGIC_OP_SET - VK_LOGIC_OP_CLEAR + 1), + VK_LOGIC_OP_MAX_ENUM = 0x7FFFFFFF +} VkLogicOp; + +typedef enum VkBlendFactor { + VK_BLEND_FACTOR_ZERO = 0, + VK_BLEND_FACTOR_ONE = 1, + VK_BLEND_FACTOR_SRC_COLOR = 2, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR = 3, + VK_BLEND_FACTOR_DST_COLOR = 4, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR = 5, + VK_BLEND_FACTOR_SRC_ALPHA = 6, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA = 7, + VK_BLEND_FACTOR_DST_ALPHA = 8, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA = 9, + VK_BLEND_FACTOR_CONSTANT_COLOR = 10, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR = 11, + VK_BLEND_FACTOR_CONSTANT_ALPHA = 12, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA = 13, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE = 14, + VK_BLEND_FACTOR_SRC1_COLOR = 15, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR = 16, + VK_BLEND_FACTOR_SRC1_ALPHA = 17, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA = 18, + VK_BLEND_FACTOR_BEGIN_RANGE = VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_END_RANGE = VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, + VK_BLEND_FACTOR_RANGE_SIZE = (VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - VK_BLEND_FACTOR_ZERO + 1), + VK_BLEND_FACTOR_MAX_ENUM = 0x7FFFFFFF +} VkBlendFactor; + +typedef enum VkBlendOp { + VK_BLEND_OP_ADD = 0, + VK_BLEND_OP_SUBTRACT = 1, + VK_BLEND_OP_REVERSE_SUBTRACT = 2, + VK_BLEND_OP_MIN = 3, + VK_BLEND_OP_MAX = 4, + VK_BLEND_OP_BEGIN_RANGE = VK_BLEND_OP_ADD, + VK_BLEND_OP_END_RANGE = VK_BLEND_OP_MAX, + VK_BLEND_OP_RANGE_SIZE = (VK_BLEND_OP_MAX - VK_BLEND_OP_ADD + 1), + VK_BLEND_OP_MAX_ENUM = 0x7FFFFFFF +} VkBlendOp; + +typedef enum VkDynamicState { + VK_DYNAMIC_STATE_VIEWPORT = 0, + VK_DYNAMIC_STATE_SCISSOR = 1, + VK_DYNAMIC_STATE_LINE_WIDTH = 2, + VK_DYNAMIC_STATE_DEPTH_BIAS = 3, + VK_DYNAMIC_STATE_BLEND_CONSTANTS = 4, + VK_DYNAMIC_STATE_DEPTH_BOUNDS = 5, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK = 6, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK = 7, + VK_DYNAMIC_STATE_STENCIL_REFERENCE = 8, + VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_RANGE_SIZE = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), + VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF +} VkDynamicState; + +typedef enum VkFilter { + VK_FILTER_NEAREST = 0, + VK_FILTER_LINEAR = 1, + VK_FILTER_BEGIN_RANGE = VK_FILTER_NEAREST, + VK_FILTER_END_RANGE = VK_FILTER_LINEAR, + VK_FILTER_RANGE_SIZE = (VK_FILTER_LINEAR - VK_FILTER_NEAREST + 1), + VK_FILTER_MAX_ENUM = 0x7FFFFFFF +} VkFilter; + +typedef enum VkSamplerMipmapMode { + VK_SAMPLER_MIPMAP_MODE_NEAREST = 0, + VK_SAMPLER_MIPMAP_MODE_LINEAR = 1, + VK_SAMPLER_MIPMAP_MODE_BEGIN_RANGE = VK_SAMPLER_MIPMAP_MODE_NEAREST, + VK_SAMPLER_MIPMAP_MODE_END_RANGE = VK_SAMPLER_MIPMAP_MODE_LINEAR, + VK_SAMPLER_MIPMAP_MODE_RANGE_SIZE = (VK_SAMPLER_MIPMAP_MODE_LINEAR - VK_SAMPLER_MIPMAP_MODE_NEAREST + 1), + VK_SAMPLER_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerMipmapMode; + +typedef enum VkSamplerAddressMode { + VK_SAMPLER_ADDRESS_MODE_REPEAT = 0, + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT = 1, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, + VK_SAMPLER_ADDRESS_MODE_BEGIN_RANGE = VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_END_RANGE = VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_RANGE_SIZE = (VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE - VK_SAMPLER_ADDRESS_MODE_REPEAT + 1), + VK_SAMPLER_ADDRESS_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerAddressMode; + +typedef enum VkBorderColor { + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, + VK_BORDER_COLOR_INT_TRANSPARENT_BLACK = 1, + VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK = 2, + VK_BORDER_COLOR_INT_OPAQUE_BLACK = 3, + VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE = 4, + VK_BORDER_COLOR_INT_OPAQUE_WHITE = 5, + VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, + VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + VK_BORDER_COLOR_RANGE_SIZE = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), + VK_BORDER_COLOR_MAX_ENUM = 0x7FFFFFFF +} VkBorderColor; + +typedef enum VkDescriptorType { + VK_DESCRIPTOR_TYPE_SAMPLER = 0, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = 1, + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE = 2, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE = 3, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER = 4, + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER = 5, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER = 6, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER = 7, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, + VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, + VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, + VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, + VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), + VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkDescriptorType; + +typedef enum VkAttachmentLoadOp { + VK_ATTACHMENT_LOAD_OP_LOAD = 0, + VK_ATTACHMENT_LOAD_OP_CLEAR = 1, + VK_ATTACHMENT_LOAD_OP_DONT_CARE = 2, + VK_ATTACHMENT_LOAD_OP_BEGIN_RANGE = VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_LOAD_OP_END_RANGE = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_RANGE_SIZE = (VK_ATTACHMENT_LOAD_OP_DONT_CARE - VK_ATTACHMENT_LOAD_OP_LOAD + 1), + VK_ATTACHMENT_LOAD_OP_MAX_ENUM = 0x7FFFFFFF +} VkAttachmentLoadOp; + +typedef enum VkAttachmentStoreOp { + VK_ATTACHMENT_STORE_OP_STORE = 0, + VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, + VK_ATTACHMENT_STORE_OP_BEGIN_RANGE = VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_STORE_OP_END_RANGE = VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_RANGE_SIZE = (VK_ATTACHMENT_STORE_OP_DONT_CARE - VK_ATTACHMENT_STORE_OP_STORE + 1), + VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF +} VkAttachmentStoreOp; + +typedef enum VkPipelineBindPoint { + VK_PIPELINE_BIND_POINT_GRAPHICS = 0, + VK_PIPELINE_BIND_POINT_COMPUTE = 1, + VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, + VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, + VK_PIPELINE_BIND_POINT_RANGE_SIZE = (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1), + VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF +} VkPipelineBindPoint; + +typedef enum VkCommandBufferLevel { + VK_COMMAND_BUFFER_LEVEL_PRIMARY = 0, + VK_COMMAND_BUFFER_LEVEL_SECONDARY = 1, + VK_COMMAND_BUFFER_LEVEL_BEGIN_RANGE = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + VK_COMMAND_BUFFER_LEVEL_END_RANGE = VK_COMMAND_BUFFER_LEVEL_SECONDARY, + VK_COMMAND_BUFFER_LEVEL_RANGE_SIZE = (VK_COMMAND_BUFFER_LEVEL_SECONDARY - VK_COMMAND_BUFFER_LEVEL_PRIMARY + 1), + VK_COMMAND_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF +} VkCommandBufferLevel; + +typedef enum VkIndexType { + VK_INDEX_TYPE_UINT16 = 0, + VK_INDEX_TYPE_UINT32 = 1, + VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, + VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, + VK_INDEX_TYPE_RANGE_SIZE = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), + VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkIndexType; + +typedef enum VkSubpassContents { + VK_SUBPASS_CONTENTS_INLINE = 0, + VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1, + VK_SUBPASS_CONTENTS_BEGIN_RANGE = VK_SUBPASS_CONTENTS_INLINE, + VK_SUBPASS_CONTENTS_END_RANGE = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, + VK_SUBPASS_CONTENTS_RANGE_SIZE = (VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS - VK_SUBPASS_CONTENTS_INLINE + 1), + VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF +} VkSubpassContents; + +typedef VkFlags VkInstanceCreateFlags; + +typedef enum VkFormatFeatureFlagBits { + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, + VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT = 0x00000002, + VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT = 0x00000004, + VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000008, + VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT = 0x00000010, + VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT = 0x00000020, + VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT = 0x00000040, + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT = 0x00000080, + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT = 0x00000100, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000200, + VK_FORMAT_FEATURE_BLIT_SRC_BIT = 0x00000400, + VK_FORMAT_FEATURE_BLIT_DST_BIT = 0x00000800, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT = 0x00001000, +} VkFormatFeatureFlagBits; +typedef VkFlags VkFormatFeatureFlags; + +typedef enum VkImageUsageFlagBits { + VK_IMAGE_USAGE_TRANSFER_SRC_BIT = 0x00000001, + VK_IMAGE_USAGE_TRANSFER_DST_BIT = 0x00000002, + VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, + VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020, + VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, +} VkImageUsageFlagBits; +typedef VkFlags VkImageUsageFlags; + +typedef enum VkImageCreateFlagBits { + VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001, + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, + VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004, + VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008, + VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010, +} VkImageCreateFlagBits; +typedef VkFlags VkImageCreateFlags; + +typedef enum VkSampleCountFlagBits { + VK_SAMPLE_COUNT_1_BIT = 0x00000001, + VK_SAMPLE_COUNT_2_BIT = 0x00000002, + VK_SAMPLE_COUNT_4_BIT = 0x00000004, + VK_SAMPLE_COUNT_8_BIT = 0x00000008, + VK_SAMPLE_COUNT_16_BIT = 0x00000010, + VK_SAMPLE_COUNT_32_BIT = 0x00000020, + VK_SAMPLE_COUNT_64_BIT = 0x00000040, +} VkSampleCountFlagBits; +typedef VkFlags VkSampleCountFlags; + +typedef enum VkQueueFlagBits { + VK_QUEUE_GRAPHICS_BIT = 0x00000001, + VK_QUEUE_COMPUTE_BIT = 0x00000002, + VK_QUEUE_TRANSFER_BIT = 0x00000004, + VK_QUEUE_SPARSE_BINDING_BIT = 0x00000008, +} VkQueueFlagBits; +typedef VkFlags VkQueueFlags; + +typedef enum VkMemoryPropertyFlagBits { + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT = 0x00000001, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000002, + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT = 0x00000004, + VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008, + VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, +} VkMemoryPropertyFlagBits; +typedef VkFlags VkMemoryPropertyFlags; + +typedef enum VkMemoryHeapFlagBits { + VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001, +} VkMemoryHeapFlagBits; +typedef VkFlags VkMemoryHeapFlags; +typedef VkFlags VkDeviceCreateFlags; +typedef VkFlags VkDeviceQueueCreateFlags; + +typedef enum VkPipelineStageFlagBits { + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT = 0x00000010, + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT = 0x00000020, + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = 0x00000200, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, + VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT = 0x00002000, + VK_PIPELINE_STAGE_HOST_BIT = 0x00004000, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00008000, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00010000, +} VkPipelineStageFlagBits; +typedef VkFlags VkPipelineStageFlags; +typedef VkFlags VkMemoryMapFlags; + +typedef enum VkImageAspectFlagBits { + VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, + VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, + VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004, + VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008, +} VkImageAspectFlagBits; +typedef VkFlags VkImageAspectFlags; + +typedef enum VkSparseImageFormatFlagBits { + VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT = 0x00000001, + VK_SPARSE_IMAGE_FORMAT_ALIGNED_MIP_SIZE_BIT = 0x00000002, + VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT = 0x00000004, +} VkSparseImageFormatFlagBits; +typedef VkFlags VkSparseImageFormatFlags; + +typedef enum VkSparseMemoryBindFlagBits { + VK_SPARSE_MEMORY_BIND_METADATA_BIT = 0x00000001, +} VkSparseMemoryBindFlagBits; +typedef VkFlags VkSparseMemoryBindFlags; + +typedef enum VkFenceCreateFlagBits { + VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, +} VkFenceCreateFlagBits; +typedef VkFlags VkFenceCreateFlags; +typedef VkFlags VkSemaphoreCreateFlags; +typedef VkFlags VkEventCreateFlags; +typedef VkFlags VkQueryPoolCreateFlags; + +typedef enum VkQueryPipelineStatisticFlagBits { + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT = 0x00000001, + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT = 0x00000002, + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT = 0x00000004, + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT = 0x00000008, + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT = 0x00000010, + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT = 0x00000020, + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT = 0x00000040, + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT = 0x00000080, + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT = 0x00000100, + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT = 0x00000200, + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT = 0x00000400, +} VkQueryPipelineStatisticFlagBits; +typedef VkFlags VkQueryPipelineStatisticFlags; + +typedef enum VkQueryResultFlagBits { + VK_QUERY_RESULT_64_BIT = 0x00000001, + VK_QUERY_RESULT_WAIT_BIT = 0x00000002, + VK_QUERY_RESULT_WITH_AVAILABILITY_BIT = 0x00000004, + VK_QUERY_RESULT_PARTIAL_BIT = 0x00000008, +} VkQueryResultFlagBits; +typedef VkFlags VkQueryResultFlags; + +typedef enum VkBufferCreateFlagBits { + VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001, + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, + VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, +} VkBufferCreateFlagBits; +typedef VkFlags VkBufferCreateFlags; + +typedef enum VkBufferUsageFlagBits { + VK_BUFFER_USAGE_TRANSFER_SRC_BIT = 0x00000001, + VK_BUFFER_USAGE_TRANSFER_DST_BIT = 0x00000002, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000004, + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT = 0x00000008, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT = 0x00000010, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT = 0x00000020, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT = 0x00000040, + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT = 0x00000080, + VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100, +} VkBufferUsageFlagBits; +typedef VkFlags VkBufferUsageFlags; +typedef VkFlags VkBufferViewCreateFlags; +typedef VkFlags VkImageViewCreateFlags; +typedef VkFlags VkShaderModuleCreateFlags; +typedef VkFlags VkPipelineCacheCreateFlags; + +typedef enum VkPipelineCreateFlagBits { + VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, + VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, + VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, +} VkPipelineCreateFlagBits; +typedef VkFlags VkPipelineCreateFlags; +typedef VkFlags VkPipelineShaderStageCreateFlags; + +typedef enum VkShaderStageFlagBits { + VK_SHADER_STAGE_VERTEX_BIT = 0x00000001, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT = 0x00000002, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT = 0x00000004, + VK_SHADER_STAGE_GEOMETRY_BIT = 0x00000008, + VK_SHADER_STAGE_FRAGMENT_BIT = 0x00000010, + VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020, + VK_SHADER_STAGE_ALL_GRAPHICS = 0x1F, + VK_SHADER_STAGE_ALL = 0x7FFFFFFF, +} VkShaderStageFlagBits; +typedef VkFlags VkPipelineVertexInputStateCreateFlags; +typedef VkFlags VkPipelineInputAssemblyStateCreateFlags; +typedef VkFlags VkPipelineTessellationStateCreateFlags; +typedef VkFlags VkPipelineViewportStateCreateFlags; +typedef VkFlags VkPipelineRasterizationStateCreateFlags; + +typedef enum VkCullModeFlagBits { + VK_CULL_MODE_NONE = 0, + VK_CULL_MODE_FRONT_BIT = 0x00000001, + VK_CULL_MODE_BACK_BIT = 0x00000002, + VK_CULL_MODE_FRONT_AND_BACK = 0x3, +} VkCullModeFlagBits; +typedef VkFlags VkCullModeFlags; +typedef VkFlags VkPipelineMultisampleStateCreateFlags; +typedef VkFlags VkPipelineDepthStencilStateCreateFlags; +typedef VkFlags VkPipelineColorBlendStateCreateFlags; + +typedef enum VkColorComponentFlagBits { + VK_COLOR_COMPONENT_R_BIT = 0x00000001, + VK_COLOR_COMPONENT_G_BIT = 0x00000002, + VK_COLOR_COMPONENT_B_BIT = 0x00000004, + VK_COLOR_COMPONENT_A_BIT = 0x00000008, +} VkColorComponentFlagBits; +typedef VkFlags VkColorComponentFlags; +typedef VkFlags VkPipelineDynamicStateCreateFlags; +typedef VkFlags VkPipelineLayoutCreateFlags; +typedef VkFlags VkShaderStageFlags; +typedef VkFlags VkSamplerCreateFlags; +typedef VkFlags VkDescriptorSetLayoutCreateFlags; + +typedef enum VkDescriptorPoolCreateFlagBits { + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT = 0x00000001, +} VkDescriptorPoolCreateFlagBits; +typedef VkFlags VkDescriptorPoolCreateFlags; +typedef VkFlags VkDescriptorPoolResetFlags; +typedef VkFlags VkFramebufferCreateFlags; +typedef VkFlags VkRenderPassCreateFlags; + +typedef enum VkAttachmentDescriptionFlagBits { + VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001, +} VkAttachmentDescriptionFlagBits; +typedef VkFlags VkAttachmentDescriptionFlags; +typedef VkFlags VkSubpassDescriptionFlags; + +typedef enum VkAccessFlagBits { + VK_ACCESS_INDIRECT_COMMAND_READ_BIT = 0x00000001, + VK_ACCESS_INDEX_READ_BIT = 0x00000002, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT = 0x00000004, + VK_ACCESS_UNIFORM_READ_BIT = 0x00000008, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT = 0x00000010, + VK_ACCESS_SHADER_READ_BIT = 0x00000020, + VK_ACCESS_SHADER_WRITE_BIT = 0x00000040, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT = 0x00000080, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = 0x00000100, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = 0x00000200, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = 0x00000400, + VK_ACCESS_TRANSFER_READ_BIT = 0x00000800, + VK_ACCESS_TRANSFER_WRITE_BIT = 0x00001000, + VK_ACCESS_HOST_READ_BIT = 0x00002000, + VK_ACCESS_HOST_WRITE_BIT = 0x00004000, + VK_ACCESS_MEMORY_READ_BIT = 0x00008000, + VK_ACCESS_MEMORY_WRITE_BIT = 0x00010000, +} VkAccessFlagBits; +typedef VkFlags VkAccessFlags; + +typedef enum VkDependencyFlagBits { + VK_DEPENDENCY_BY_REGION_BIT = 0x00000001, +} VkDependencyFlagBits; +typedef VkFlags VkDependencyFlags; + +typedef enum VkCommandPoolCreateFlagBits { + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001, + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, +} VkCommandPoolCreateFlagBits; +typedef VkFlags VkCommandPoolCreateFlags; + +typedef enum VkCommandPoolResetFlagBits { + VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001, +} VkCommandPoolResetFlagBits; +typedef VkFlags VkCommandPoolResetFlags; + +typedef enum VkCommandBufferUsageFlagBits { + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT = 0x00000001, + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT = 0x00000002, + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT = 0x00000004, +} VkCommandBufferUsageFlagBits; +typedef VkFlags VkCommandBufferUsageFlags; + +typedef enum VkQueryControlFlagBits { + VK_QUERY_CONTROL_PRECISE_BIT = 0x00000001, +} VkQueryControlFlagBits; +typedef VkFlags VkQueryControlFlags; + +typedef enum VkCommandBufferResetFlagBits { + VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, +} VkCommandBufferResetFlagBits; +typedef VkFlags VkCommandBufferResetFlags; + +typedef enum VkStencilFaceFlagBits { + VK_STENCIL_FACE_FRONT_BIT = 0x00000001, + VK_STENCIL_FACE_BACK_BIT = 0x00000002, + VK_STENCIL_FRONT_AND_BACK = 0x3, +} VkStencilFaceFlagBits; +typedef VkFlags VkStencilFaceFlags; + +typedef void* (VKAPI_PTR *PFN_vkAllocationFunction)( + void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocationScope allocationScope); + +typedef void* (VKAPI_PTR *PFN_vkReallocationFunction)( + void* pUserData, + void* pOriginal, + size_t size, + size_t alignment, + VkSystemAllocationScope allocationScope); + +typedef void (VKAPI_PTR *PFN_vkFreeFunction)( + void* pUserData, + void* pMemory); + +typedef void (VKAPI_PTR *PFN_vkInternalAllocationNotification)( + void* pUserData, + size_t size, + VkInternalAllocationType allocationType, + VkSystemAllocationScope allocationScope); + +typedef void (VKAPI_PTR *PFN_vkInternalFreeNotification)( + void* pUserData, + size_t size, + VkInternalAllocationType allocationType, + VkSystemAllocationScope allocationScope); + +typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void); + +typedef struct VkApplicationInfo { + VkStructureType sType; + const void* pNext; + const char* pApplicationName; + uint32_t applicationVersion; + const char* pEngineName; + uint32_t engineVersion; + uint32_t apiVersion; +} VkApplicationInfo; + +typedef struct VkInstanceCreateInfo { + VkStructureType sType; + const void* pNext; + VkInstanceCreateFlags flags; + const VkApplicationInfo* pApplicationInfo; + uint32_t enabledLayerCount; + const char* const* ppEnabledLayerNames; + uint32_t enabledExtensionCount; + const char* const* ppEnabledExtensionNames; +} VkInstanceCreateInfo; + +typedef struct VkAllocationCallbacks { + void* pUserData; + PFN_vkAllocationFunction pfnAllocation; + PFN_vkReallocationFunction pfnReallocation; + PFN_vkFreeFunction pfnFree; + PFN_vkInternalAllocationNotification pfnInternalAllocation; + PFN_vkInternalFreeNotification pfnInternalFree; +} VkAllocationCallbacks; + +typedef struct VkPhysicalDeviceFeatures { + VkBool32 robustBufferAccess; + VkBool32 fullDrawIndexUint32; + VkBool32 imageCubeArray; + VkBool32 independentBlend; + VkBool32 geometryShader; + VkBool32 tessellationShader; + VkBool32 sampleRateShading; + VkBool32 dualSrcBlend; + VkBool32 logicOp; + VkBool32 multiDrawIndirect; + VkBool32 drawIndirectFirstInstance; + VkBool32 depthClamp; + VkBool32 depthBiasClamp; + VkBool32 fillModeNonSolid; + VkBool32 depthBounds; + VkBool32 wideLines; + VkBool32 largePoints; + VkBool32 alphaToOne; + VkBool32 multiViewport; + VkBool32 samplerAnisotropy; + VkBool32 textureCompressionETC2; + VkBool32 textureCompressionASTC_LDR; + VkBool32 textureCompressionBC; + VkBool32 occlusionQueryPrecise; + VkBool32 pipelineStatisticsQuery; + VkBool32 vertexPipelineStoresAndAtomics; + VkBool32 fragmentStoresAndAtomics; + VkBool32 shaderTessellationAndGeometryPointSize; + VkBool32 shaderImageGatherExtended; + VkBool32 shaderStorageImageExtendedFormats; + VkBool32 shaderStorageImageMultisample; + VkBool32 shaderStorageImageReadWithoutFormat; + VkBool32 shaderStorageImageWriteWithoutFormat; + VkBool32 shaderUniformBufferArrayDynamicIndexing; + VkBool32 shaderSampledImageArrayDynamicIndexing; + VkBool32 shaderStorageBufferArrayDynamicIndexing; + VkBool32 shaderStorageImageArrayDynamicIndexing; + VkBool32 shaderClipDistance; + VkBool32 shaderCullDistance; + VkBool32 shaderFloat64; + VkBool32 shaderInt64; + VkBool32 shaderInt16; + VkBool32 shaderResourceResidency; + VkBool32 shaderResourceMinLod; + VkBool32 sparseBinding; + VkBool32 sparseResidencyBuffer; + VkBool32 sparseResidencyImage2D; + VkBool32 sparseResidencyImage3D; + VkBool32 sparseResidency2Samples; + VkBool32 sparseResidency4Samples; + VkBool32 sparseResidency8Samples; + VkBool32 sparseResidency16Samples; + VkBool32 sparseResidencyAliased; + VkBool32 variableMultisampleRate; + VkBool32 inheritedQueries; +} VkPhysicalDeviceFeatures; + +typedef struct VkFormatProperties { + VkFormatFeatureFlags linearTilingFeatures; + VkFormatFeatureFlags optimalTilingFeatures; + VkFormatFeatureFlags bufferFeatures; +} VkFormatProperties; + +typedef struct VkExtent3D { + uint32_t width; + uint32_t height; + uint32_t depth; +} VkExtent3D; + +typedef struct VkImageFormatProperties { + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArrayLayers; + VkSampleCountFlags sampleCounts; + VkDeviceSize maxResourceSize; +} VkImageFormatProperties; + +typedef struct VkPhysicalDeviceLimits { + uint32_t maxImageDimension1D; + uint32_t maxImageDimension2D; + uint32_t maxImageDimension3D; + uint32_t maxImageDimensionCube; + uint32_t maxImageArrayLayers; + uint32_t maxTexelBufferElements; + uint32_t maxUniformBufferRange; + uint32_t maxStorageBufferRange; + uint32_t maxPushConstantsSize; + uint32_t maxMemoryAllocationCount; + uint32_t maxSamplerAllocationCount; + VkDeviceSize bufferImageGranularity; + VkDeviceSize sparseAddressSpaceSize; + uint32_t maxBoundDescriptorSets; + uint32_t maxPerStageDescriptorSamplers; + uint32_t maxPerStageDescriptorUniformBuffers; + uint32_t maxPerStageDescriptorStorageBuffers; + uint32_t maxPerStageDescriptorSampledImages; + uint32_t maxPerStageDescriptorStorageImages; + uint32_t maxPerStageDescriptorInputAttachments; + uint32_t maxPerStageResources; + uint32_t maxDescriptorSetSamplers; + uint32_t maxDescriptorSetUniformBuffers; + uint32_t maxDescriptorSetUniformBuffersDynamic; + uint32_t maxDescriptorSetStorageBuffers; + uint32_t maxDescriptorSetStorageBuffersDynamic; + uint32_t maxDescriptorSetSampledImages; + uint32_t maxDescriptorSetStorageImages; + uint32_t maxDescriptorSetInputAttachments; + uint32_t maxVertexInputAttributes; + uint32_t maxVertexInputBindings; + uint32_t maxVertexInputAttributeOffset; + uint32_t maxVertexInputBindingStride; + uint32_t maxVertexOutputComponents; + uint32_t maxTessellationGenerationLevel; + uint32_t maxTessellationPatchSize; + uint32_t maxTessellationControlPerVertexInputComponents; + uint32_t maxTessellationControlPerVertexOutputComponents; + uint32_t maxTessellationControlPerPatchOutputComponents; + uint32_t maxTessellationControlTotalOutputComponents; + uint32_t maxTessellationEvaluationInputComponents; + uint32_t maxTessellationEvaluationOutputComponents; + uint32_t maxGeometryShaderInvocations; + uint32_t maxGeometryInputComponents; + uint32_t maxGeometryOutputComponents; + uint32_t maxGeometryOutputVertices; + uint32_t maxGeometryTotalOutputComponents; + uint32_t maxFragmentInputComponents; + uint32_t maxFragmentOutputAttachments; + uint32_t maxFragmentDualSrcAttachments; + uint32_t maxFragmentCombinedOutputResources; + uint32_t maxComputeSharedMemorySize; + uint32_t maxComputeWorkGroupCount[3]; + uint32_t maxComputeWorkGroupInvocations; + uint32_t maxComputeWorkGroupSize[3]; + uint32_t subPixelPrecisionBits; + uint32_t subTexelPrecisionBits; + uint32_t mipmapPrecisionBits; + uint32_t maxDrawIndexedIndexValue; + uint32_t maxDrawIndirectCount; + float maxSamplerLodBias; + float maxSamplerAnisotropy; + uint32_t maxViewports; + uint32_t maxViewportDimensions[2]; + float viewportBoundsRange[2]; + uint32_t viewportSubPixelBits; + size_t minMemoryMapAlignment; + VkDeviceSize minTexelBufferOffsetAlignment; + VkDeviceSize minUniformBufferOffsetAlignment; + VkDeviceSize minStorageBufferOffsetAlignment; + int32_t minTexelOffset; + uint32_t maxTexelOffset; + int32_t minTexelGatherOffset; + uint32_t maxTexelGatherOffset; + float minInterpolationOffset; + float maxInterpolationOffset; + uint32_t subPixelInterpolationOffsetBits; + uint32_t maxFramebufferWidth; + uint32_t maxFramebufferHeight; + uint32_t maxFramebufferLayers; + VkSampleCountFlags framebufferColorSampleCounts; + VkSampleCountFlags framebufferDepthSampleCounts; + VkSampleCountFlags framebufferStencilSampleCounts; + VkSampleCountFlags framebufferNoAttachmentsSampleCounts; + uint32_t maxColorAttachments; + VkSampleCountFlags sampledImageColorSampleCounts; + VkSampleCountFlags sampledImageIntegerSampleCounts; + VkSampleCountFlags sampledImageDepthSampleCounts; + VkSampleCountFlags sampledImageStencilSampleCounts; + VkSampleCountFlags storageImageSampleCounts; + uint32_t maxSampleMaskWords; + VkBool32 timestampComputeAndGraphics; + float timestampPeriod; + uint32_t maxClipDistances; + uint32_t maxCullDistances; + uint32_t maxCombinedClipAndCullDistances; + uint32_t discreteQueuePriorities; + float pointSizeRange[2]; + float lineWidthRange[2]; + float pointSizeGranularity; + float lineWidthGranularity; + VkBool32 strictLines; + VkBool32 standardSampleLocations; + VkDeviceSize optimalBufferCopyOffsetAlignment; + VkDeviceSize optimalBufferCopyRowPitchAlignment; + VkDeviceSize nonCoherentAtomSize; +} VkPhysicalDeviceLimits; + +typedef struct VkPhysicalDeviceSparseProperties { + VkBool32 residencyStandard2DBlockShape; + VkBool32 residencyStandard2DMultisampleBlockShape; + VkBool32 residencyStandard3DBlockShape; + VkBool32 residencyAlignedMipSize; + VkBool32 residencyNonResidentStrict; +} VkPhysicalDeviceSparseProperties; + +typedef struct VkPhysicalDeviceProperties { + uint32_t apiVersion; + uint32_t driverVersion; + uint32_t vendorID; + uint32_t deviceID; + VkPhysicalDeviceType deviceType; + char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; + uint8_t pipelineCacheUUID[VK_UUID_SIZE]; + VkPhysicalDeviceLimits limits; + VkPhysicalDeviceSparseProperties sparseProperties; +} VkPhysicalDeviceProperties; + +typedef struct VkQueueFamilyProperties { + VkQueueFlags queueFlags; + uint32_t queueCount; + uint32_t timestampValidBits; + VkExtent3D minImageTransferGranularity; +} VkQueueFamilyProperties; + +typedef struct VkMemoryType { + VkMemoryPropertyFlags propertyFlags; + uint32_t heapIndex; +} VkMemoryType; + +typedef struct VkMemoryHeap { + VkDeviceSize size; + VkMemoryHeapFlags flags; +} VkMemoryHeap; + +typedef struct VkPhysicalDeviceMemoryProperties { + uint32_t memoryTypeCount; + VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; + uint32_t memoryHeapCount; + VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryProperties; + +typedef struct VkDeviceQueueCreateInfo { + VkStructureType sType; + const void* pNext; + VkDeviceQueueCreateFlags flags; + uint32_t queueFamilyIndex; + uint32_t queueCount; + const float* pQueuePriorities; +} VkDeviceQueueCreateInfo; + +typedef struct VkDeviceCreateInfo { + VkStructureType sType; + const void* pNext; + VkDeviceCreateFlags flags; + uint32_t queueCreateInfoCount; + const VkDeviceQueueCreateInfo* pQueueCreateInfos; + uint32_t enabledLayerCount; + const char* const* ppEnabledLayerNames; + uint32_t enabledExtensionCount; + const char* const* ppEnabledExtensionNames; + const VkPhysicalDeviceFeatures* pEnabledFeatures; +} VkDeviceCreateInfo; + +typedef struct VkExtensionProperties { + char extensionName[VK_MAX_EXTENSION_NAME_SIZE]; + uint32_t specVersion; +} VkExtensionProperties; + +typedef struct VkLayerProperties { + char layerName[VK_MAX_EXTENSION_NAME_SIZE]; + uint32_t specVersion; + uint32_t implementationVersion; + char description[VK_MAX_DESCRIPTION_SIZE]; +} VkLayerProperties; + +typedef struct VkSubmitInfo { + VkStructureType sType; + const void* pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore* pWaitSemaphores; + const VkPipelineStageFlags* pWaitDstStageMask; + uint32_t commandBufferCount; + const VkCommandBuffer* pCommandBuffers; + uint32_t signalSemaphoreCount; + const VkSemaphore* pSignalSemaphores; +} VkSubmitInfo; + +typedef struct VkMemoryAllocateInfo { + VkStructureType sType; + const void* pNext; + VkDeviceSize allocationSize; + uint32_t memoryTypeIndex; +} VkMemoryAllocateInfo; + +typedef struct VkMappedMemoryRange { + VkStructureType sType; + const void* pNext; + VkDeviceMemory memory; + VkDeviceSize offset; + VkDeviceSize size; +} VkMappedMemoryRange; + +typedef struct VkMemoryRequirements { + VkDeviceSize size; + VkDeviceSize alignment; + uint32_t memoryTypeBits; +} VkMemoryRequirements; + +typedef struct VkSparseImageFormatProperties { + VkImageAspectFlags aspectMask; + VkExtent3D imageGranularity; + VkSparseImageFormatFlags flags; +} VkSparseImageFormatProperties; + +typedef struct VkSparseImageMemoryRequirements { + VkSparseImageFormatProperties formatProperties; + uint32_t imageMipTailFirstLod; + VkDeviceSize imageMipTailSize; + VkDeviceSize imageMipTailOffset; + VkDeviceSize imageMipTailStride; +} VkSparseImageMemoryRequirements; + +typedef struct VkSparseMemoryBind { + VkDeviceSize resourceOffset; + VkDeviceSize size; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + VkSparseMemoryBindFlags flags; +} VkSparseMemoryBind; + +typedef struct VkSparseBufferMemoryBindInfo { + VkBuffer buffer; + uint32_t bindCount; + const VkSparseMemoryBind* pBinds; +} VkSparseBufferMemoryBindInfo; + +typedef struct VkSparseImageOpaqueMemoryBindInfo { + VkImage image; + uint32_t bindCount; + const VkSparseMemoryBind* pBinds; +} VkSparseImageOpaqueMemoryBindInfo; + +typedef struct VkImageSubresource { + VkImageAspectFlags aspectMask; + uint32_t mipLevel; + uint32_t arrayLayer; +} VkImageSubresource; + +typedef struct VkOffset3D { + int32_t x; + int32_t y; + int32_t z; +} VkOffset3D; + +typedef struct VkSparseImageMemoryBind { + VkImageSubresource subresource; + VkOffset3D offset; + VkExtent3D extent; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + VkSparseMemoryBindFlags flags; +} VkSparseImageMemoryBind; + +typedef struct VkSparseImageMemoryBindInfo { + VkImage image; + uint32_t bindCount; + const VkSparseImageMemoryBind* pBinds; +} VkSparseImageMemoryBindInfo; + +typedef struct VkBindSparseInfo { + VkStructureType sType; + const void* pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore* pWaitSemaphores; + uint32_t bufferBindCount; + const VkSparseBufferMemoryBindInfo* pBufferBinds; + uint32_t imageOpaqueBindCount; + const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds; + uint32_t imageBindCount; + const VkSparseImageMemoryBindInfo* pImageBinds; + uint32_t signalSemaphoreCount; + const VkSemaphore* pSignalSemaphores; +} VkBindSparseInfo; + +typedef struct VkFenceCreateInfo { + VkStructureType sType; + const void* pNext; + VkFenceCreateFlags flags; +} VkFenceCreateInfo; + +typedef struct VkSemaphoreCreateInfo { + VkStructureType sType; + const void* pNext; + VkSemaphoreCreateFlags flags; +} VkSemaphoreCreateInfo; + +typedef struct VkEventCreateInfo { + VkStructureType sType; + const void* pNext; + VkEventCreateFlags flags; +} VkEventCreateInfo; + +typedef struct VkQueryPoolCreateInfo { + VkStructureType sType; + const void* pNext; + VkQueryPoolCreateFlags flags; + VkQueryType queryType; + uint32_t queryCount; + VkQueryPipelineStatisticFlags pipelineStatistics; +} VkQueryPoolCreateInfo; + +typedef struct VkBufferCreateInfo { + VkStructureType sType; + const void* pNext; + VkBufferCreateFlags flags; + VkDeviceSize size; + VkBufferUsageFlags usage; + VkSharingMode sharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t* pQueueFamilyIndices; +} VkBufferCreateInfo; + +typedef struct VkBufferViewCreateInfo { + VkStructureType sType; + const void* pNext; + VkBufferViewCreateFlags flags; + VkBuffer buffer; + VkFormat format; + VkDeviceSize offset; + VkDeviceSize range; +} VkBufferViewCreateInfo; + +typedef struct VkImageCreateInfo { + VkStructureType sType; + const void* pNext; + VkImageCreateFlags flags; + VkImageType imageType; + VkFormat format; + VkExtent3D extent; + uint32_t mipLevels; + uint32_t arrayLayers; + VkSampleCountFlagBits samples; + VkImageTiling tiling; + VkImageUsageFlags usage; + VkSharingMode sharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t* pQueueFamilyIndices; + VkImageLayout initialLayout; +} VkImageCreateInfo; + +typedef struct VkSubresourceLayout { + VkDeviceSize offset; + VkDeviceSize size; + VkDeviceSize rowPitch; + VkDeviceSize arrayPitch; + VkDeviceSize depthPitch; +} VkSubresourceLayout; + +typedef struct VkComponentMapping { + VkComponentSwizzle r; + VkComponentSwizzle g; + VkComponentSwizzle b; + VkComponentSwizzle a; +} VkComponentMapping; + +typedef struct VkImageSubresourceRange { + VkImageAspectFlags aspectMask; + uint32_t baseMipLevel; + uint32_t levelCount; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkImageSubresourceRange; + +typedef struct VkImageViewCreateInfo { + VkStructureType sType; + const void* pNext; + VkImageViewCreateFlags flags; + VkImage image; + VkImageViewType viewType; + VkFormat format; + VkComponentMapping components; + VkImageSubresourceRange subresourceRange; +} VkImageViewCreateInfo; + +typedef struct VkShaderModuleCreateInfo { + VkStructureType sType; + const void* pNext; + VkShaderModuleCreateFlags flags; + size_t codeSize; + const uint32_t* pCode; +} VkShaderModuleCreateInfo; + +typedef struct VkPipelineCacheCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineCacheCreateFlags flags; + size_t initialDataSize; + const void* pInitialData; +} VkPipelineCacheCreateInfo; + +typedef struct VkSpecializationMapEntry { + uint32_t constantID; + uint32_t offset; + size_t size; +} VkSpecializationMapEntry; + +typedef struct VkSpecializationInfo { + uint32_t mapEntryCount; + const VkSpecializationMapEntry* pMapEntries; + size_t dataSize; + const void* pData; +} VkSpecializationInfo; + +typedef struct VkPipelineShaderStageCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineShaderStageCreateFlags flags; + VkShaderStageFlagBits stage; + VkShaderModule module; + const char* pName; + const VkSpecializationInfo* pSpecializationInfo; +} VkPipelineShaderStageCreateInfo; + +typedef struct VkVertexInputBindingDescription { + uint32_t binding; + uint32_t stride; + VkVertexInputRate inputRate; +} VkVertexInputBindingDescription; + +typedef struct VkVertexInputAttributeDescription { + uint32_t location; + uint32_t binding; + VkFormat format; + uint32_t offset; +} VkVertexInputAttributeDescription; + +typedef struct VkPipelineVertexInputStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineVertexInputStateCreateFlags flags; + uint32_t vertexBindingDescriptionCount; + const VkVertexInputBindingDescription* pVertexBindingDescriptions; + uint32_t vertexAttributeDescriptionCount; + const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; +} VkPipelineVertexInputStateCreateInfo; + +typedef struct VkPipelineInputAssemblyStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineInputAssemblyStateCreateFlags flags; + VkPrimitiveTopology topology; + VkBool32 primitiveRestartEnable; +} VkPipelineInputAssemblyStateCreateInfo; + +typedef struct VkPipelineTessellationStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineTessellationStateCreateFlags flags; + uint32_t patchControlPoints; +} VkPipelineTessellationStateCreateInfo; + +typedef struct VkViewport { + float x; + float y; + float width; + float height; + float minDepth; + float maxDepth; +} VkViewport; + +typedef struct VkOffset2D { + int32_t x; + int32_t y; +} VkOffset2D; + +typedef struct VkExtent2D { + uint32_t width; + uint32_t height; +} VkExtent2D; + +typedef struct VkRect2D { + VkOffset2D offset; + VkExtent2D extent; +} VkRect2D; + +typedef struct VkPipelineViewportStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineViewportStateCreateFlags flags; + uint32_t viewportCount; + const VkViewport* pViewports; + uint32_t scissorCount; + const VkRect2D* pScissors; +} VkPipelineViewportStateCreateInfo; + +typedef struct VkPipelineRasterizationStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineRasterizationStateCreateFlags flags; + VkBool32 depthClampEnable; + VkBool32 rasterizerDiscardEnable; + VkPolygonMode polygonMode; + VkCullModeFlags cullMode; + VkFrontFace frontFace; + VkBool32 depthBiasEnable; + float depthBiasConstantFactor; + float depthBiasClamp; + float depthBiasSlopeFactor; + float lineWidth; +} VkPipelineRasterizationStateCreateInfo; + +typedef struct VkPipelineMultisampleStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineMultisampleStateCreateFlags flags; + VkSampleCountFlagBits rasterizationSamples; + VkBool32 sampleShadingEnable; + float minSampleShading; + const VkSampleMask* pSampleMask; + VkBool32 alphaToCoverageEnable; + VkBool32 alphaToOneEnable; +} VkPipelineMultisampleStateCreateInfo; + +typedef struct VkStencilOpState { + VkStencilOp failOp; + VkStencilOp passOp; + VkStencilOp depthFailOp; + VkCompareOp compareOp; + uint32_t compareMask; + uint32_t writeMask; + uint32_t reference; +} VkStencilOpState; + +typedef struct VkPipelineDepthStencilStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineDepthStencilStateCreateFlags flags; + VkBool32 depthTestEnable; + VkBool32 depthWriteEnable; + VkCompareOp depthCompareOp; + VkBool32 depthBoundsTestEnable; + VkBool32 stencilTestEnable; + VkStencilOpState front; + VkStencilOpState back; + float minDepthBounds; + float maxDepthBounds; +} VkPipelineDepthStencilStateCreateInfo; + +typedef struct VkPipelineColorBlendAttachmentState { + VkBool32 blendEnable; + VkBlendFactor srcColorBlendFactor; + VkBlendFactor dstColorBlendFactor; + VkBlendOp colorBlendOp; + VkBlendFactor srcAlphaBlendFactor; + VkBlendFactor dstAlphaBlendFactor; + VkBlendOp alphaBlendOp; + VkColorComponentFlags colorWriteMask; +} VkPipelineColorBlendAttachmentState; + +typedef struct VkPipelineColorBlendStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineColorBlendStateCreateFlags flags; + VkBool32 logicOpEnable; + VkLogicOp logicOp; + uint32_t attachmentCount; + const VkPipelineColorBlendAttachmentState* pAttachments; + float blendConstants[4]; +} VkPipelineColorBlendStateCreateInfo; + +typedef struct VkPipelineDynamicStateCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineDynamicStateCreateFlags flags; + uint32_t dynamicStateCount; + const VkDynamicState* pDynamicStates; +} VkPipelineDynamicStateCreateInfo; + +typedef struct VkGraphicsPipelineCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineCreateFlags flags; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo* pStages; + const VkPipelineVertexInputStateCreateInfo* pVertexInputState; + const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState; + const VkPipelineTessellationStateCreateInfo* pTessellationState; + const VkPipelineViewportStateCreateInfo* pViewportState; + const VkPipelineRasterizationStateCreateInfo* pRasterizationState; + const VkPipelineMultisampleStateCreateInfo* pMultisampleState; + const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState; + const VkPipelineColorBlendStateCreateInfo* pColorBlendState; + const VkPipelineDynamicStateCreateInfo* pDynamicState; + VkPipelineLayout layout; + VkRenderPass renderPass; + uint32_t subpass; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkGraphicsPipelineCreateInfo; + +typedef struct VkComputePipelineCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineCreateFlags flags; + VkPipelineShaderStageCreateInfo stage; + VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkComputePipelineCreateInfo; + +typedef struct VkPushConstantRange { + VkShaderStageFlags stageFlags; + uint32_t offset; + uint32_t size; +} VkPushConstantRange; + +typedef struct VkPipelineLayoutCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineLayoutCreateFlags flags; + uint32_t setLayoutCount; + const VkDescriptorSetLayout* pSetLayouts; + uint32_t pushConstantRangeCount; + const VkPushConstantRange* pPushConstantRanges; +} VkPipelineLayoutCreateInfo; + +typedef struct VkSamplerCreateInfo { + VkStructureType sType; + const void* pNext; + VkSamplerCreateFlags flags; + VkFilter magFilter; + VkFilter minFilter; + VkSamplerMipmapMode mipmapMode; + VkSamplerAddressMode addressModeU; + VkSamplerAddressMode addressModeV; + VkSamplerAddressMode addressModeW; + float mipLodBias; + VkBool32 anisotropyEnable; + float maxAnisotropy; + VkBool32 compareEnable; + VkCompareOp compareOp; + float minLod; + float maxLod; + VkBorderColor borderColor; + VkBool32 unnormalizedCoordinates; +} VkSamplerCreateInfo; + +typedef struct VkDescriptorSetLayoutBinding { + uint32_t binding; + VkDescriptorType descriptorType; + uint32_t descriptorCount; + VkShaderStageFlags stageFlags; + const VkSampler* pImmutableSamplers; +} VkDescriptorSetLayoutBinding; + +typedef struct VkDescriptorSetLayoutCreateInfo { + VkStructureType sType; + const void* pNext; + VkDescriptorSetLayoutCreateFlags flags; + uint32_t bindingCount; + const VkDescriptorSetLayoutBinding* pBindings; +} VkDescriptorSetLayoutCreateInfo; + +typedef struct VkDescriptorPoolSize { + VkDescriptorType type; + uint32_t descriptorCount; +} VkDescriptorPoolSize; + +typedef struct VkDescriptorPoolCreateInfo { + VkStructureType sType; + const void* pNext; + VkDescriptorPoolCreateFlags flags; + uint32_t maxSets; + uint32_t poolSizeCount; + const VkDescriptorPoolSize* pPoolSizes; +} VkDescriptorPoolCreateInfo; + +typedef struct VkDescriptorSetAllocateInfo { + VkStructureType sType; + const void* pNext; + VkDescriptorPool descriptorPool; + uint32_t descriptorSetCount; + const VkDescriptorSetLayout* pSetLayouts; +} VkDescriptorSetAllocateInfo; + +typedef struct VkDescriptorImageInfo { + VkSampler sampler; + VkImageView imageView; + VkImageLayout imageLayout; +} VkDescriptorImageInfo; + +typedef struct VkDescriptorBufferInfo { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize range; +} VkDescriptorBufferInfo; + +typedef struct VkWriteDescriptorSet { + VkStructureType sType; + const void* pNext; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; + VkDescriptorType descriptorType; + const VkDescriptorImageInfo* pImageInfo; + const VkDescriptorBufferInfo* pBufferInfo; + const VkBufferView* pTexelBufferView; +} VkWriteDescriptorSet; + +typedef struct VkCopyDescriptorSet { + VkStructureType sType; + const void* pNext; + VkDescriptorSet srcSet; + uint32_t srcBinding; + uint32_t srcArrayElement; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; +} VkCopyDescriptorSet; + +typedef struct VkFramebufferCreateInfo { + VkStructureType sType; + const void* pNext; + VkFramebufferCreateFlags flags; + VkRenderPass renderPass; + uint32_t attachmentCount; + const VkImageView* pAttachments; + uint32_t width; + uint32_t height; + uint32_t layers; +} VkFramebufferCreateInfo; + +typedef struct VkAttachmentDescription { + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription; + +typedef struct VkAttachmentReference { + uint32_t attachment; + VkImageLayout layout; +} VkAttachmentReference; + +typedef struct VkSubpassDescription { + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t inputAttachmentCount; + const VkAttachmentReference* pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference* pColorAttachments; + const VkAttachmentReference* pResolveAttachments; + const VkAttachmentReference* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t* pPreserveAttachments; +} VkSubpassDescription; + +typedef struct VkSubpassDependency { + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; +} VkSubpassDependency; + +typedef struct VkRenderPassCreateInfo { + VkStructureType sType; + const void* pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency* pDependencies; +} VkRenderPassCreateInfo; + +typedef struct VkCommandPoolCreateInfo { + VkStructureType sType; + const void* pNext; + VkCommandPoolCreateFlags flags; + uint32_t queueFamilyIndex; +} VkCommandPoolCreateInfo; + +typedef struct VkCommandBufferAllocateInfo { + VkStructureType sType; + const void* pNext; + VkCommandPool commandPool; + VkCommandBufferLevel level; + uint32_t commandBufferCount; +} VkCommandBufferAllocateInfo; + +typedef struct VkCommandBufferInheritanceInfo { + VkStructureType sType; + const void* pNext; + VkRenderPass renderPass; + uint32_t subpass; + VkFramebuffer framebuffer; + VkBool32 occlusionQueryEnable; + VkQueryControlFlags queryFlags; + VkQueryPipelineStatisticFlags pipelineStatistics; +} VkCommandBufferInheritanceInfo; + +typedef struct VkCommandBufferBeginInfo { + VkStructureType sType; + const void* pNext; + VkCommandBufferUsageFlags flags; + const VkCommandBufferInheritanceInfo* pInheritanceInfo; +} VkCommandBufferBeginInfo; + +typedef struct VkBufferCopy { + VkDeviceSize srcOffset; + VkDeviceSize dstOffset; + VkDeviceSize size; +} VkBufferCopy; + +typedef struct VkImageSubresourceLayers { + VkImageAspectFlags aspectMask; + uint32_t mipLevel; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkImageSubresourceLayers; + +typedef struct VkImageCopy { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffset; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; + VkExtent3D extent; +} VkImageCopy; + +typedef struct VkImageBlit { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffsets[2]; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffsets[2]; +} VkImageBlit; + +typedef struct VkBufferImageCopy { + VkDeviceSize bufferOffset; + uint32_t bufferRowLength; + uint32_t bufferImageHeight; + VkImageSubresourceLayers imageSubresource; + VkOffset3D imageOffset; + VkExtent3D imageExtent; +} VkBufferImageCopy; + +typedef union VkClearColorValue { + float float32[4]; + int32_t int32[4]; + uint32_t uint32[4]; +} VkClearColorValue; + +typedef struct VkClearDepthStencilValue { + float depth; + uint32_t stencil; +} VkClearDepthStencilValue; + +typedef union VkClearValue { + VkClearColorValue color; + VkClearDepthStencilValue depthStencil; +} VkClearValue; + +typedef struct VkClearAttachment { + VkImageAspectFlags aspectMask; + uint32_t colorAttachment; + VkClearValue clearValue; +} VkClearAttachment; + +typedef struct VkClearRect { + VkRect2D rect; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkClearRect; + +typedef struct VkImageResolve { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffset; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; + VkExtent3D extent; +} VkImageResolve; + +typedef struct VkMemoryBarrier { + VkStructureType sType; + const void* pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; +} VkMemoryBarrier; + +typedef struct VkBufferMemoryBarrier { + VkStructureType sType; + const void* pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize size; +} VkBufferMemoryBarrier; + +typedef struct VkImageMemoryBarrier { + VkStructureType sType; + const void* pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkImageLayout oldLayout; + VkImageLayout newLayout; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkImage image; + VkImageSubresourceRange subresourceRange; +} VkImageMemoryBarrier; + +typedef struct VkRenderPassBeginInfo { + VkStructureType sType; + const void* pNext; + VkRenderPass renderPass; + VkFramebuffer framebuffer; + VkRect2D renderArea; + uint32_t clearValueCount; + const VkClearValue* pClearValues; +} VkRenderPassBeginInfo; + +typedef struct VkDispatchIndirectCommand { + uint32_t x; + uint32_t y; + uint32_t z; +} VkDispatchIndirectCommand; + +typedef struct VkDrawIndexedIndirectCommand { + uint32_t indexCount; + uint32_t instanceCount; + uint32_t firstIndex; + int32_t vertexOffset; + uint32_t firstInstance; +} VkDrawIndexedIndirectCommand; + +typedef struct VkDrawIndirectCommand { + uint32_t vertexCount; + uint32_t instanceCount; + uint32_t firstVertex; + uint32_t firstInstance; +} VkDrawIndirectCommand; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance); +typedef void (VKAPI_PTR *PFN_vkDestroyInstance)(VkInstance instance, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures* pFeatures); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties* pImageFormatProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties); +typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char* pName); +typedef PFN_vkVoidFunction (VKAPI_PTR *PFN_vkGetDeviceProcAddr)(VkDevice device, const char* pName); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDevice* pDevice); +typedef void (VKAPI_PTR *PFN_vkDestroyDevice)(VkDevice device, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceExtensionProperties)(const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char* pLayerName, uint32_t* pPropertyCount, VkExtensionProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t* pPropertyCount, VkLayerProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkLayerProperties* pProperties); +typedef void (VKAPI_PTR *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue* pQueue); +typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); +typedef VkResult (VKAPI_PTR *PFN_vkDeviceWaitIdle)(VkDevice device); +typedef VkResult (VKAPI_PTR *PFN_vkAllocateMemory)(VkDevice device, const VkMemoryAllocateInfo* pAllocateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMemory); +typedef void (VKAPI_PTR *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData); +typedef void (VKAPI_PTR *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory memory); +typedef VkResult (VKAPI_PTR *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); +typedef VkResult (VKAPI_PTR *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange* pMemoryRanges); +typedef void (VKAPI_PTR *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize* pCommittedMemoryInBytes); +typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset); +typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset); +typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements* pMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t* pPropertyCount, VkSparseImageFormatProperties* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkQueueBindSparse)(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo* pBindInfo, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); +typedef void (VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); +typedef VkResult (VKAPI_PTR *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); +typedef VkResult (VKAPI_PTR *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, uint64_t timeout); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSemaphore* pSemaphore); +typedef void (VKAPI_PTR *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent); +typedef void (VKAPI_PTR *PFN_vkDestroyEvent)(VkDevice device, VkEvent event, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkSetEvent)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkResetEvent)(VkDevice device, VkEvent event); +typedef VkResult (VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); +typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void* pData, VkDeviceSize stride, VkQueryResultFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer); +typedef void (VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkBufferView* pView); +typedef void (VKAPI_PTR *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkImage* pImage); +typedef void (VKAPI_PTR *PFN_vkDestroyImage)(VkDevice device, VkImage image, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource* pSubresource, VkSubresourceLayout* pLayout); +typedef VkResult (VKAPI_PTR *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkImageView* pView); +typedef void (VKAPI_PTR *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule); +typedef void (VKAPI_PTR *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineCache* pPipelineCache); +typedef void (VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, size_t* pDataSize, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache dstCache, uint32_t srcCacheCount, const VkPipelineCache* pSrcCaches); +typedef VkResult (VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef VkResult (VKAPI_PTR *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef void (VKAPI_PTR *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSampler* pSampler); +typedef void (VKAPI_PTR *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorSetLayout* pSetLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorPool* pDescriptorPool); +typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkAllocateDescriptorSets)(VkDevice device, const VkDescriptorSetAllocateInfo* pAllocateInfo, VkDescriptorSet* pDescriptorSets); +typedef VkResult (VKAPI_PTR *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets); +typedef void (VKAPI_PTR *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies); +typedef VkResult (VKAPI_PTR *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFramebuffer* pFramebuffer); +typedef void (VKAPI_PTR *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void (VKAPI_PTR *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D* pGranularity); +typedef VkResult (VKAPI_PTR *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool); +typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); +typedef VkResult (VKAPI_PTR *PFN_vkAllocateCommandBuffers)(VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers); +typedef void (VKAPI_PTR *PFN_vkFreeCommandBuffers)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); +typedef VkResult (VKAPI_PTR *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); +typedef VkResult (VKAPI_PTR *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); +typedef VkResult (VKAPI_PTR *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); +typedef void (VKAPI_PTR *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkViewport* pViewports); +typedef void (VKAPI_PTR *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, const VkRect2D* pScissors); +typedef void (VKAPI_PTR *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor); +typedef void (VKAPI_PTR *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConstants[4]); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBounds)(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference); +typedef void (VKAPI_PTR *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); +typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); +typedef void (VKAPI_PTR *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter); +typedef void (VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData); +typedef void (VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data); +typedef void (VKAPI_PTR *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges); +typedef void (VKAPI_PTR *PFN_vkCmdClearAttachments)(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment* pAttachments, uint32_t rectCount, const VkClearRect* pRects); +typedef void (VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageResolve* pRegions); +typedef void (VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const VkMemoryBarrier* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers); +typedef void (VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query); +typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); +typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t query); +typedef void (VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); +typedef void (VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void* pValues); +typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, VkSubpassContents contents); +typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); +typedef void (VKAPI_PTR *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); +typedef void (VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance); + +VKAPI_ATTR void VKAPI_CALL vkDestroyInstance( + VkInstance instance, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices( + VkInstance instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties* pFormatProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags flags, + VkImageFormatProperties* pImageFormatProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pQueueFamilyPropertyCount, + VkQueueFamilyProperties* pQueueFamilyProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr( + VkInstance instance, + const char* pName); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr( + VkDevice device, + const char* pName); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDevice( + VkDevice device, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceLayerProperties( + uint32_t* pPropertyCount, + VkLayerProperties* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkLayerProperties* pProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue( + VkDevice device, + uint32_t queueFamilyIndex, + uint32_t queueIndex, + VkQueue* pQueue); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle( + VkQueue queue); + +VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle( + VkDevice device); + +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateMemory( + VkDevice device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMemory); + +VKAPI_ATTR void VKAPI_CALL vkFreeMemory( + VkDevice device, + VkDeviceMemory memory, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData); + +VKAPI_ATTR void VKAPI_CALL vkUnmapMemory( + VkDevice device, + VkDeviceMemory memory); + +VKAPI_ATTR VkResult VKAPI_CALL vkFlushMappedMemoryRanges( + VkDevice device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges); + +VKAPI_ATTR VkResult VKAPI_CALL vkInvalidateMappedMemoryRanges( + VkDevice device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges); + +VKAPI_ATTR void VKAPI_CALL vkGetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory( + VkDevice device, + VkBuffer buffer, + VkDeviceMemory memory, + VkDeviceSize memoryOffset); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory( + VkDevice device, + VkImage image, + VkDeviceMemory memory, + VkDeviceSize memoryOffset); + +VKAPI_ATTR void VKAPI_CALL vkGetBufferMemoryRequirements( + VkDevice device, + VkBuffer buffer, + VkMemoryRequirements* pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements( + VkDevice device, + VkImage image, + VkMemoryRequirements* pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkSampleCountFlagBits samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pPropertyCount, + VkSparseImageFormatProperties* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparse( + VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence( + VkDevice device, + const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFence* pFence); + +VKAPI_ATTR void VKAPI_CALL vkDestroyFence( + VkDevice device, + VkFence fence, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetFences( + VkDevice device, + uint32_t fenceCount, + const VkFence* pFences); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceStatus( + VkDevice device, + VkFence fence); + +VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences( + VkDevice device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t timeout); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSemaphore* pSemaphore); + +VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore( + VkDevice device, + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent( + VkDevice device, + const VkEventCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkEvent* pEvent); + +VKAPI_ATTR void VKAPI_CALL vkDestroyEvent( + VkDevice device, + VkEvent event, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetEventStatus( + VkDevice device, + VkEvent event); + +VKAPI_ATTR VkResult VKAPI_CALL vkSetEvent( + VkDevice device, + VkEvent event); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetEvent( + VkDevice device, + VkEvent event); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateQueryPool( + VkDevice device, + const VkQueryPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkQueryPool* pQueryPool); + +VKAPI_ATTR void VKAPI_CALL vkDestroyQueryPool( + VkDevice device, + VkQueryPool queryPool, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( + VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void* pData, + VkDeviceSize stride, + VkQueryResultFlags flags); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateBuffer( + VkDevice device, + const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkBuffer* pBuffer); + +VKAPI_ATTR void VKAPI_CALL vkDestroyBuffer( + VkDevice device, + VkBuffer buffer, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateBufferView( + VkDevice device, + const VkBufferViewCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkBufferView* pView); + +VKAPI_ATTR void VKAPI_CALL vkDestroyBufferView( + VkDevice device, + VkBufferView bufferView, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage( + VkDevice device, + const VkImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkImage* pImage); + +VKAPI_ATTR void VKAPI_CALL vkDestroyImage( + VkDevice device, + VkImage image, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkGetImageSubresourceLayout( + VkDevice device, + VkImage image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateImageView( + VkDevice device, + const VkImageViewCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkImageView* pView); + +VKAPI_ATTR void VKAPI_CALL vkDestroyImageView( + VkDevice device, + VkImageView imageView, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateShaderModule( + VkDevice device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule); + +VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule( + VkDevice device, + VkShaderModule shaderModule, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache( + VkDevice device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache( + VkDevice device, + VkPipelineCache pipelineCache, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + size_t* pDataSize, + void* pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkMergePipelineCaches( + VkDevice device, + VkPipelineCache dstCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( + VkDevice device, + VkPipeline pipeline, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineLayout( + VkDevice device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineLayout* pPipelineLayout); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineLayout( + VkDevice device, + VkPipelineLayout pipelineLayout, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSampler( + VkDevice device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler); + +VKAPI_ATTR void VKAPI_CALL vkDestroySampler( + VkDevice device, + VkSampler sampler, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorSetLayout( + VkDevice device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorSetLayout* pSetLayout); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorSetLayout( + VkDevice device, + VkDescriptorSetLayout descriptorSetLayout, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorPool( + VkDevice device, + const VkDescriptorPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorPool* pDescriptorPool); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags); + +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateDescriptorSets( + VkDevice device, + const VkDescriptorSetAllocateInfo* pAllocateInfo, + VkDescriptorSet* pDescriptorSets); + +VKAPI_ATTR VkResult VKAPI_CALL vkFreeDescriptorSets( + VkDevice device, + VkDescriptorPool descriptorPool, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets); + +VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSets( + VkDevice device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet* pDescriptorCopies); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateFramebuffer( + VkDevice device, + const VkFramebufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFramebuffer* pFramebuffer); + +VKAPI_ATTR void VKAPI_CALL vkDestroyFramebuffer( + VkDevice device, + VkFramebuffer framebuffer, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +VKAPI_ATTR void VKAPI_CALL vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkGetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCommandPool); + +VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags); + +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers); + +VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); + +VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer( + VkCommandBuffer commandBuffer); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline pipeline); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport* pViewports); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D* pScissors); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindIndexBuffer( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets); + +VKAPI_ATTR void VKAPI_CALL vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z); + +VKAPI_ATTR void VKAPI_CALL vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData); + +VKAPI_ATTR void VKAPI_CALL vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data); + +VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +VKAPI_ATTR void VKAPI_CALL vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges); + +VKAPI_ATTR void VKAPI_CALL vkCmdClearAttachments( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects); + +VKAPI_ATTR void VKAPI_CALL vkCmdResolveImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageResolve* pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdResetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdWaitEvents( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers); + +VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndQuery( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query); + +VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount); + +VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, + VkQueryResultFlags flags); + +VKAPI_ATTR void VKAPI_CALL vkCmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents); + +VKAPI_ATTR void VKAPI_CALL vkCmdNextSubpass( + VkCommandBuffer commandBuffer, + VkSubpassContents contents); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass( + VkCommandBuffer commandBuffer); + +VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers); +#endif + +#define VK_KHR_surface 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR) + +#define VK_KHR_SURFACE_SPEC_VERSION 25 +#define VK_KHR_SURFACE_EXTENSION_NAME "VK_KHR_surface" + + +typedef enum VkColorSpaceKHR { + VK_COLORSPACE_SRGB_NONLINEAR_KHR = 0, + VK_COLORSPACE_BEGIN_RANGE = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + VK_COLORSPACE_END_RANGE = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + VK_COLORSPACE_RANGE_SIZE = (VK_COLORSPACE_SRGB_NONLINEAR_KHR - VK_COLORSPACE_SRGB_NONLINEAR_KHR + 1), + VK_COLORSPACE_MAX_ENUM = 0x7FFFFFFF +} VkColorSpaceKHR; + +typedef enum VkPresentModeKHR { + VK_PRESENT_MODE_IMMEDIATE_KHR = 0, + VK_PRESENT_MODE_MAILBOX_KHR = 1, + VK_PRESENT_MODE_FIFO_KHR = 2, + VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3, + VK_PRESENT_MODE_BEGIN_RANGE = VK_PRESENT_MODE_IMMEDIATE_KHR, + VK_PRESENT_MODE_END_RANGE = VK_PRESENT_MODE_FIFO_RELAXED_KHR, + VK_PRESENT_MODE_RANGE_SIZE = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1), + VK_PRESENT_MODE_MAX_ENUM = 0x7FFFFFFF +} VkPresentModeKHR; + + +typedef enum VkSurfaceTransformFlagBitsKHR { + VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR = 0x00000001, + VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR = 0x00000002, + VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR = 0x00000004, + VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR = 0x00000008, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_BIT_KHR = 0x00000010, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_90_BIT_KHR = 0x00000020, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_180_BIT_KHR = 0x00000040, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_270_BIT_KHR = 0x00000080, + VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR = 0x00000100, +} VkSurfaceTransformFlagBitsKHR; +typedef VkFlags VkSurfaceTransformFlagsKHR; + +typedef enum VkCompositeAlphaFlagBitsKHR { + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR = 0x00000002, + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR = 0x00000004, + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR = 0x00000008, +} VkCompositeAlphaFlagBitsKHR; +typedef VkFlags VkCompositeAlphaFlagsKHR; + +typedef struct VkSurfaceCapabilitiesKHR { + uint32_t minImageCount; + uint32_t maxImageCount; + VkExtent2D currentExtent; + VkExtent2D minImageExtent; + VkExtent2D maxImageExtent; + uint32_t maxImageArrayLayers; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkSurfaceTransformFlagBitsKHR currentTransform; + VkCompositeAlphaFlagsKHR supportedCompositeAlpha; + VkImageUsageFlags supportedUsageFlags; +} VkSurfaceCapabilitiesKHR; + +typedef struct VkSurfaceFormatKHR { + VkFormat format; + VkColorSpaceKHR colorSpace; +} VkSurfaceFormatKHR; + + +typedef void (VKAPI_PTR *PFN_vkDestroySurfaceKHR)(VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, VkSurfaceKHR surface, VkBool32* pSupported); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormatsKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pSurfaceFormatCount, VkSurfaceFormatKHR* pSurfaceFormats); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfacePresentModesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pPresentModeCount, VkPresentModeKHR* pPresentModes); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR( + VkInstance instance, + VkSurfaceKHR surface, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32* pSupported); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); +#endif + +#define VK_KHR_swapchain 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR) + +#define VK_KHR_SWAPCHAIN_SPEC_VERSION 67 +#define VK_KHR_SWAPCHAIN_EXTENSION_NAME "VK_KHR_swapchain" + +typedef VkFlags VkSwapchainCreateFlagsKHR; + +typedef struct VkSwapchainCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkSwapchainCreateFlagsKHR flags; + VkSurfaceKHR surface; + uint32_t minImageCount; + VkFormat imageFormat; + VkColorSpaceKHR imageColorSpace; + VkExtent2D imageExtent; + uint32_t imageArrayLayers; + VkImageUsageFlags imageUsage; + VkSharingMode imageSharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t* pQueueFamilyIndices; + VkSurfaceTransformFlagBitsKHR preTransform; + VkCompositeAlphaFlagBitsKHR compositeAlpha; + VkPresentModeKHR presentMode; + VkBool32 clipped; + VkSwapchainKHR oldSwapchain; +} VkSwapchainCreateInfoKHR; + +typedef struct VkPresentInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore* pWaitSemaphores; + uint32_t swapchainCount; + const VkSwapchainKHR* pSwapchains; + const uint32_t* pImageIndices; + VkResult* pResults; +} VkPresentInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain); +typedef void (VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages); +typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex); +typedef VkResult (VKAPI_PTR *PFN_vkQueuePresentKHR)(VkQueue queue, const VkPresentInfoKHR* pPresentInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( + VkDevice device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain); + +VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR swapchain, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages); + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR* pPresentInfo); +#endif + +#define VK_KHR_display 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayKHR) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayModeKHR) + +#define VK_KHR_DISPLAY_SPEC_VERSION 21 +#define VK_KHR_DISPLAY_EXTENSION_NAME "VK_KHR_display" + + +typedef enum VkDisplayPlaneAlphaFlagBitsKHR { + VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, + VK_DISPLAY_PLANE_ALPHA_GLOBAL_BIT_KHR = 0x00000002, + VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_BIT_KHR = 0x00000004, + VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_PREMULTIPLIED_BIT_KHR = 0x00000008, +} VkDisplayPlaneAlphaFlagBitsKHR; +typedef VkFlags VkDisplayModeCreateFlagsKHR; +typedef VkFlags VkDisplayPlaneAlphaFlagsKHR; +typedef VkFlags VkDisplaySurfaceCreateFlagsKHR; + +typedef struct VkDisplayPropertiesKHR { + VkDisplayKHR display; + const char* displayName; + VkExtent2D physicalDimensions; + VkExtent2D physicalResolution; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkBool32 planeReorderPossible; + VkBool32 persistentContent; +} VkDisplayPropertiesKHR; + +typedef struct VkDisplayModeParametersKHR { + VkExtent2D visibleRegion; + uint32_t refreshRate; +} VkDisplayModeParametersKHR; + +typedef struct VkDisplayModePropertiesKHR { + VkDisplayModeKHR displayMode; + VkDisplayModeParametersKHR parameters; +} VkDisplayModePropertiesKHR; + +typedef struct VkDisplayModeCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkDisplayModeCreateFlagsKHR flags; + VkDisplayModeParametersKHR parameters; +} VkDisplayModeCreateInfoKHR; + +typedef struct VkDisplayPlaneCapabilitiesKHR { + VkDisplayPlaneAlphaFlagsKHR supportedAlpha; + VkOffset2D minSrcPosition; + VkOffset2D maxSrcPosition; + VkExtent2D minSrcExtent; + VkExtent2D maxSrcExtent; + VkOffset2D minDstPosition; + VkOffset2D maxDstPosition; + VkExtent2D minDstExtent; + VkExtent2D maxDstExtent; +} VkDisplayPlaneCapabilitiesKHR; + +typedef struct VkDisplayPlanePropertiesKHR { + VkDisplayKHR currentDisplay; + uint32_t currentStackIndex; +} VkDisplayPlanePropertiesKHR; + +typedef struct VkDisplaySurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkDisplaySurfaceCreateFlagsKHR flags; + VkDisplayModeKHR displayMode; + uint32_t planeIndex; + uint32_t planeStackIndex; + VkSurfaceTransformFlagBitsKHR transform; + float globalAlpha; + VkDisplayPlaneAlphaFlagBitsKHR alphaMode; + VkExtent2D imageExtent; +} VkDisplaySurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPropertiesKHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPlanePropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneSupportedDisplaysKHR)(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayModePropertiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModePropertiesKHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDisplayModeKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR*pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR* pCapabilities); +typedef VkResult (VKAPI_PTR *PFN_vkCreateDisplayPlaneSurfaceKHR)(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayPropertiesKHR( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkDisplayPropertiesKHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayPlanePropertiesKHR( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkDisplayPlanePropertiesKHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayPlaneSupportedDisplaysKHR( + VkPhysicalDevice physicalDevice, + uint32_t planeIndex, + uint32_t* pDisplayCount, + VkDisplayKHR* pDisplays); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayModePropertiesKHR( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display, + uint32_t* pPropertyCount, + VkDisplayModePropertiesKHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayModeKHR( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display, + const VkDisplayModeCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDisplayModeKHR* pMode); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayPlaneCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkDisplayModeKHR mode, + uint32_t planeIndex, + VkDisplayPlaneCapabilitiesKHR* pCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayPlaneSurfaceKHR( + VkInstance instance, + const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif + +#define VK_KHR_display_swapchain 1 +#define VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION 9 +#define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME "VK_KHR_display_swapchain" + +typedef struct VkDisplayPresentInfoKHR { + VkStructureType sType; + const void* pNext; + VkRect2D srcRect; + VkRect2D dstRect; + VkBool32 persistent; +} VkDisplayPresentInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateSharedSwapchainsKHR)(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchains); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSharedSwapchainsKHR( + VkDevice device, + uint32_t swapchainCount, + const VkSwapchainCreateInfoKHR* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchains); +#endif + +#ifdef VK_USE_PLATFORM_XLIB_KHR +#define VK_KHR_xlib_surface 1 +#include + +#define VK_KHR_XLIB_SURFACE_SPEC_VERSION 6 +#define VK_KHR_XLIB_SURFACE_EXTENSION_NAME "VK_KHR_xlib_surface" + +typedef VkFlags VkXlibSurfaceCreateFlagsKHR; + +typedef struct VkXlibSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkXlibSurfaceCreateFlagsKHR flags; + Display* dpy; + Window window; +} VkXlibSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateXlibSurfaceKHR)(VkInstance instance, const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXlibPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, Display* dpy, VisualID visualID); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateXlibSurfaceKHR( + VkInstance instance, + const VkXlibSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceXlibPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + Display* dpy, + VisualID visualID); +#endif +#endif /* VK_USE_PLATFORM_XLIB_KHR */ + +#ifdef VK_USE_PLATFORM_XCB_KHR +#define VK_KHR_xcb_surface 1 +#include + +#define VK_KHR_XCB_SURFACE_SPEC_VERSION 6 +#define VK_KHR_XCB_SURFACE_EXTENSION_NAME "VK_KHR_xcb_surface" + +typedef VkFlags VkXcbSurfaceCreateFlagsKHR; + +typedef struct VkXcbSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkXcbSurfaceCreateFlagsKHR flags; + xcb_connection_t* connection; + xcb_window_t window; +} VkXcbSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateXcbSurfaceKHR)(VkInstance instance, const VkXcbSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXcbPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateXcbSurfaceKHR( + VkInstance instance, + const VkXcbSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t* connection, + xcb_visualid_t visual_id); +#endif +#endif /* VK_USE_PLATFORM_XCB_KHR */ + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +#define VK_KHR_wayland_surface 1 +#include + +#define VK_KHR_WAYLAND_SURFACE_SPEC_VERSION 5 +#define VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME "VK_KHR_wayland_surface" + +typedef VkFlags VkWaylandSurfaceCreateFlagsKHR; + +typedef struct VkWaylandSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkWaylandSurfaceCreateFlagsKHR flags; + struct wl_display* display; + struct wl_surface* surface; +} VkWaylandSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateWaylandSurfaceKHR)(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateWaylandSurfaceKHR( + VkInstance instance, + const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display* display); +#endif +#endif /* VK_USE_PLATFORM_WAYLAND_KHR */ + +#ifdef VK_USE_PLATFORM_MIR_KHR +#define VK_KHR_mir_surface 1 +#include + +#define VK_KHR_MIR_SURFACE_SPEC_VERSION 4 +#define VK_KHR_MIR_SURFACE_EXTENSION_NAME "VK_KHR_mir_surface" + +typedef VkFlags VkMirSurfaceCreateFlagsKHR; + +typedef struct VkMirSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkMirSurfaceCreateFlagsKHR flags; + MirConnection* connection; + MirSurface* mirSurface; +} VkMirSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateMirSurfaceKHR)(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceMirPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateMirSurfaceKHR( + VkInstance instance, + const VkMirSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceMirPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + MirConnection* connection); +#endif +#endif /* VK_USE_PLATFORM_MIR_KHR */ + +#ifdef VK_USE_PLATFORM_ANDROID_KHR +#define VK_KHR_android_surface 1 +#include + +#define VK_KHR_ANDROID_SURFACE_SPEC_VERSION 6 +#define VK_KHR_ANDROID_SURFACE_EXTENSION_NAME "VK_KHR_android_surface" + +typedef VkFlags VkAndroidSurfaceCreateFlagsKHR; + +typedef struct VkAndroidSurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkAndroidSurfaceCreateFlagsKHR flags; + ANativeWindow* window; +} VkAndroidSurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateAndroidSurfaceKHR)(VkInstance instance, const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateAndroidSurfaceKHR( + VkInstance instance, + const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif +#endif /* VK_USE_PLATFORM_ANDROID_KHR */ + +#ifdef VK_USE_PLATFORM_WIN32_KHR +#define VK_KHR_win32_surface 1 +#include + +#define VK_KHR_WIN32_SURFACE_SPEC_VERSION 5 +#define VK_KHR_WIN32_SURFACE_EXTENSION_NAME "VK_KHR_win32_surface" + +typedef VkFlags VkWin32SurfaceCreateFlagsKHR; + +typedef struct VkWin32SurfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkWin32SurfaceCreateFlagsKHR flags; + HINSTANCE hinstance; + HWND hwnd; +} VkWin32SurfaceCreateInfoKHR; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateWin32SurfaceKHR)(VkInstance instance, const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateWin32SurfaceKHR( + VkInstance instance, + const VkWin32SurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWin32PresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex); +#endif +#endif /* VK_USE_PLATFORM_WIN32_KHR */ + +#define VK_EXT_debug_report 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT) + +#define VK_EXT_DEBUG_REPORT_SPEC_VERSION 1 +#define VK_EXT_DEBUG_REPORT_EXTENSION_NAME "VK_EXT_debug_report" + + +typedef enum VkDebugReportObjectTypeEXT { + VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT = 0, + VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT = 1, + VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT = 2, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT = 3, + VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT = 4, + VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT = 5, + VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT = 6, + VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT = 7, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT = 8, + VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT = 9, + VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT = 10, + VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT = 11, + VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT = 12, + VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT = 13, + VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT = 14, + VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT = 15, + VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT = 16, + VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT = 17, + VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT = 18, + VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT = 19, + VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT = 20, + VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT = 21, + VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT = 22, + VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT = 23, + VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT = 24, + VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT = 25, + VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT = 26, + VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT = 27, + VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = 28, +} VkDebugReportObjectTypeEXT; + +typedef enum VkDebugReportErrorEXT { + VK_DEBUG_REPORT_ERROR_NONE_EXT = 0, + VK_DEBUG_REPORT_ERROR_CALLBACK_REF_EXT = 1, +} VkDebugReportErrorEXT; + + +typedef enum VkDebugReportFlagBitsEXT { + VK_DEBUG_REPORT_INFORMATION_BIT_EXT = 0x00000001, + VK_DEBUG_REPORT_WARNING_BIT_EXT = 0x00000002, + VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT = 0x00000004, + VK_DEBUG_REPORT_ERROR_BIT_EXT = 0x00000008, + VK_DEBUG_REPORT_DEBUG_BIT_EXT = 0x00000010, +} VkDebugReportFlagBitsEXT; +typedef VkFlags VkDebugReportFlagsEXT; + +typedef VkBool32 (VKAPI_PTR *PFN_vkDebugReportCallbackEXT)( + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char* pMessage, + void* pUserData); + + +typedef struct VkDebugReportCallbackCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkDebugReportFlagsEXT flags; + PFN_vkDebugReportCallbackEXT pfnCallback; + void* pUserData; +} VkDebugReportCallbackCreateInfoEXT; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateDebugReportCallbackEXT)(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback); +typedef void (VKAPI_PTR *PFN_vkDestroyDebugReportCallbackEXT)(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkDebugReportMessageEXT)(VkInstance instance, VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDebugReportCallbackEXT( + VkInstance instance, + const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDebugReportCallbackEXT* pCallback); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDebugReportCallbackEXT( + VkInstance instance, + VkDebugReportCallbackEXT callback, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkDebugReportMessageEXT( + VkInstance instance, + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char* pMessage); +#endif + +#ifdef __cplusplus +} +#endif + +#endif From a97fa36512ed67f8e9224d928012bfc08182cc7d Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 17 Feb 2016 19:33:14 -0800 Subject: [PATCH 002/145] Fixing swap chain scissoring; surface is still not resized right. --- src/xenia/ui/vulkan/vulkan_immediate_drawer.cc | 8 +++++--- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 97b31de98..ae44cab00 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -614,6 +614,8 @@ void VulkanImmediateDrawer::Begin(int render_target_width, auto swap_chain = context_->swap_chain(); assert_null(current_cmd_buffer_); current_cmd_buffer_ = swap_chain->render_cmd_buffer(); + current_render_target_width_ = render_target_width; + current_render_target_height_ = render_target_height; // Viewport changes only once per batch. VkViewport viewport; @@ -704,15 +706,15 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { VkRect2D scissor; if (draw.scissor) { scissor.offset.x = draw.scissor_rect[0]; - scissor.offset.y = swap_chain->surface_height() - + scissor.offset.y = current_render_target_height_ - (draw.scissor_rect[1] + draw.scissor_rect[3]); scissor.extent.width = draw.scissor_rect[2]; scissor.extent.height = draw.scissor_rect[3]; } else { scissor.offset.x = 0; scissor.offset.y = 0; - scissor.extent.width = swap_chain->surface_width(); - scissor.extent.height = swap_chain->surface_height(); + scissor.extent.width = current_render_target_width_; + scissor.extent.height = current_render_target_height_; } vkCmdSetScissor(current_cmd_buffer_, 0, 1, &scissor); diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index 004804e66..d14a6eb7c 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -60,6 +60,8 @@ class VulkanImmediateDrawer : public ImmediateDrawer { bool batch_has_index_buffer_ = false; VkCommandBuffer current_cmd_buffer_ = nullptr; + int current_render_target_width_ = 0; + int current_render_target_height_ = 0; }; } // namespace vulkan From ca5902c111be8aec2f7861d0282dcdd077276096 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 17 Feb 2016 19:43:42 -0800 Subject: [PATCH 003/145] Recreate swap chain on window resize. --- src/xenia/ui/vulkan/vulkan_context.cc | 11 ++++ src/xenia/ui/vulkan/vulkan_swap_chain.cc | 67 +++++++++++++++--------- src/xenia/ui/vulkan/vulkan_swap_chain.h | 8 +++ 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 5d82f4f46..9dd9c7d58 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -115,6 +115,17 @@ void VulkanContext::BeginSwap() { auto provider = static_cast(provider_); auto device = provider->device(); + // If we have a window see if it's been resized since we last swapped. + // If it has been, we'll need to reinitialize the swap chain before we + // start touching it. + if (target_window_) { + if (target_window_->width() != swap_chain_->surface_width() || + target_window_->height() != swap_chain_->surface_height()) { + // Resized! + swap_chain_->Reinitialize(); + } + } + // Acquire the next image and set it up for use. swap_chain_->Begin(); diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc index ec640d92f..cb088bb75 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.cc +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.cc @@ -32,31 +32,7 @@ namespace vulkan { VulkanSwapChain::VulkanSwapChain(VulkanInstance* instance, VulkanDevice* device) : instance_(instance), device_(device) {} -VulkanSwapChain::~VulkanSwapChain() { - for (auto& buffer : buffers_) { - DestroyBuffer(&buffer); - } - if (image_available_semaphore_) { - vkDestroySemaphore(*device_, image_available_semaphore_, nullptr); - } - if (render_pass_) { - vkDestroyRenderPass(*device_, render_pass_, nullptr); - } - if (render_cmd_buffer_) { - vkFreeCommandBuffers(*device_, cmd_pool_, 1, &render_cmd_buffer_); - } - if (cmd_pool_) { - vkDestroyCommandPool(*device_, cmd_pool_, nullptr); - } - // images_ doesn't need to be cleaned up as the swapchain does it implicitly. - if (handle) { - vkDestroySwapchainKHR(*device_, handle, nullptr); - handle = nullptr; - } - if (surface_) { - vkDestroySurfaceKHR(*instance_, surface_, nullptr); - } -} +VulkanSwapChain::~VulkanSwapChain() { Shutdown(); } bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) { surface_ = surface; @@ -338,6 +314,47 @@ void VulkanSwapChain::DestroyBuffer(Buffer* buffer) { buffer->image = nullptr; } +bool VulkanSwapChain::Reinitialize() { + // Hacky, but stash the surface so we can reuse it. + auto surface = surface_; + surface_ = nullptr; + Shutdown(); + return Initialize(surface); +} + +void VulkanSwapChain::Shutdown() { + // TODO(benvanik): properly wait for a clean state. + for (auto& buffer : buffers_) { + DestroyBuffer(&buffer); + } + buffers_.clear(); + if (image_available_semaphore_) { + vkDestroySemaphore(*device_, image_available_semaphore_, nullptr); + image_available_semaphore_ = nullptr; + } + if (render_pass_) { + vkDestroyRenderPass(*device_, render_pass_, nullptr); + render_pass_ = nullptr; + } + if (render_cmd_buffer_) { + vkFreeCommandBuffers(*device_, cmd_pool_, 1, &render_cmd_buffer_); + render_cmd_buffer_ = nullptr; + } + if (cmd_pool_) { + vkDestroyCommandPool(*device_, cmd_pool_, nullptr); + cmd_pool_ = nullptr; + } + // images_ doesn't need to be cleaned up as the swapchain does it implicitly. + if (handle) { + vkDestroySwapchainKHR(*device_, handle, nullptr); + handle = nullptr; + } + if (surface_) { + vkDestroySurfaceKHR(*instance_, surface_, nullptr); + surface_ = nullptr; + } +} + bool VulkanSwapChain::Begin() { // Get the index of the next available swapchain image. auto err = diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.h b/src/xenia/ui/vulkan/vulkan_swap_chain.h index 18bb26cee..1d1f578c3 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.h +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.h @@ -41,7 +41,12 @@ class VulkanSwapChain { // Render command buffer, active inside the render pass from Begin to End. VkCommandBuffer render_cmd_buffer() const { return render_cmd_buffer_; } + // Initializes the swap chain with the given WSI surface. bool Initialize(VkSurfaceKHR surface); + // Reinitializes the swap chain with the initial surface. + // The surface will be retained but all other swap chain resources will be + // torn down and recreated with the new surface properties (size/etc). + bool Reinitialize(); // Begins the swap operation, preparing state for rendering. bool Begin(); @@ -58,6 +63,9 @@ class VulkanSwapChain { bool InitializeBuffer(Buffer* buffer, VkImage target_image); void DestroyBuffer(Buffer* buffer); + // Safely releases all swap chain resources. + void Shutdown(); + VulkanInstance* instance_ = nullptr; VulkanDevice* device_ = nullptr; From 05107d2d3ef78342fc54bc65447517d670230a02 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 17 Feb 2016 21:17:12 -0800 Subject: [PATCH 004/145] Skeleton xenia::gpu::vulkan implementation, enough to start trace viewer. --- premake5.lua | 1 + src/xenia/cpu/hir/value.cc | 8 + src/xenia/gpu/trace_viewer.cc | 3 +- src/xenia/gpu/trace_viewer.h | 6 +- src/xenia/gpu/vulkan/premake5.lua | 125 ++++ .../gpu/vulkan/vulkan_command_processor.cc | 585 ++++++++++++++++++ .../gpu/vulkan/vulkan_command_processor.h | 165 +++++ src/xenia/gpu/vulkan/vulkan_gpu_flags.cc | 10 + src/xenia/gpu/vulkan/vulkan_gpu_flags.h | 15 + .../gpu/vulkan/vulkan_graphics_system.cc | 87 +++ src/xenia/gpu/vulkan/vulkan_graphics_system.h | 43 ++ src/xenia/gpu/vulkan/vulkan_shader.cc | 27 + src/xenia/gpu/vulkan/vulkan_shader.h | 33 + .../gpu/vulkan/vulkan_trace_dump_main.cc | 76 +++ .../gpu/vulkan/vulkan_trace_viewer_main.cc | 76 +++ .../ui/vulkan/vulkan_immediate_drawer.cc | 8 +- 16 files changed, 1260 insertions(+), 8 deletions(-) create mode 100644 src/xenia/gpu/vulkan/premake5.lua create mode 100644 src/xenia/gpu/vulkan/vulkan_command_processor.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_command_processor.h create mode 100644 src/xenia/gpu/vulkan/vulkan_gpu_flags.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_gpu_flags.h create mode 100644 src/xenia/gpu/vulkan/vulkan_graphics_system.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_graphics_system.h create mode 100644 src/xenia/gpu/vulkan/vulkan_shader.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_shader.h create mode 100644 src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc diff --git a/premake5.lua b/premake5.lua index 94df2fde5..7c429828b 100644 --- a/premake5.lua +++ b/premake5.lua @@ -183,6 +183,7 @@ solution("xenia") include("src/xenia/debug/ui") include("src/xenia/gpu") include("src/xenia/gpu/gl4") + include("src/xenia/gpu/vulkan") include("src/xenia/hid") include("src/xenia/hid/nop") include("src/xenia/kernel") diff --git a/src/xenia/cpu/hir/value.cc b/src/xenia/cpu/hir/value.cc index 0c6f62f55..dc7e8cf64 100644 --- a/src/xenia/cpu/hir/value.cc +++ b/src/xenia/cpu/hir/value.cc @@ -1023,6 +1023,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, } } } + break; + case FLOAT32_TYPE: + assert_false(is_unsigned); + assert_false(saturate); + for (int i = 0; i < 4; i++) { + constant.v128.f32[i] -= other->constant.v128.f32[i]; + } + break; default: assert_unhandled_case(type); break; diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 317675709..7ce20c7ca 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -1439,7 +1439,8 @@ void TraceViewer::DrawStateUI() { } ImGui::EndChild(); } - if (ImGui::CollapsingHeader("Vertex Shader Output")) { + if (ImGui::CollapsingHeader("Vertex Shader Output") && + QueryVSOutputElementSize()) { auto size = QueryVSOutputSize(); auto el_size = QueryVSOutputElementSize(); if (size > 0) { diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h index 5a5c1b104..6f7c900fc 100644 --- a/src/xenia/gpu/trace_viewer.h +++ b/src/xenia/gpu/trace_viewer.h @@ -54,9 +54,9 @@ class TraceViewer { virtual uintptr_t GetTextureEntry(const TextureInfo& texture_info, const SamplerInfo& sampler_info) = 0; - virtual size_t QueryVSOutputSize() = 0; - virtual size_t QueryVSOutputElementSize() = 0; - virtual bool QueryVSOutput(void* buffer, size_t size) = 0; + virtual size_t QueryVSOutputSize() { return 0; } + virtual size_t QueryVSOutputElementSize() { return 0; } + virtual bool QueryVSOutput(void* buffer, size_t size) { return false; } virtual bool Setup(); diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua new file mode 100644 index 000000000..5a89101e2 --- /dev/null +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -0,0 +1,125 @@ +project_root = "../../../.." +include(project_root.."/tools/build") + +group("src") +project("xenia-gpu-vulkan") + uuid("717590b4-f579-4162-8f23-0624e87d6cca") + kind("StaticLib") + language("C++") + links({ + "vulkan-loader", + "xenia-base", + "xenia-gpu", + "xenia-ui", + "xenia-ui-spirv", + "xenia-ui-vulkan", + "xxhash", + }) + defines({ + }) + includedirs({ + project_root.."/third_party/gflags/src", + }) + local_platform_files() + +-- TODO(benvanik): kill this and move to the debugger UI. +group("src") +project("xenia-gpu-vulkan-trace-viewer") + uuid("86a1dddc-a26a-4885-8c55-cf745225d93e") + kind("WindowedApp") + language("C++") + links({ + "gflags", + "imgui", + "vulkan-loader", + "xenia-apu", + "xenia-apu-nop", + "xenia-apu-xaudio2", + "xenia-base", + "xenia-core", + "xenia-cpu", + "xenia-cpu-backend-x64", + "xenia-gpu", + "xenia-gpu-vulkan", + "xenia-hid-nop", + "xenia-hid-winkey", + "xenia-hid-xinput", + "xenia-kernel", + "xenia-ui", + "xenia-ui-spirv", + "xenia-ui-vulkan", + "xenia-vfs", + }) + flags({ + "WinMain", -- Use WinMain instead of main. + }) + defines({ + }) + includedirs({ + project_root.."/third_party/gflags/src", + }) + files({ + "vulkan_trace_viewer_main.cc", + "../../base/main_"..platform_suffix..".cc", + }) + + filter("platforms:Windows") + -- Only create the .user file if it doesn't already exist. + local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user" + if not os.isfile(user_file) then + debugdir(project_root) + debugargs({ + "--flagfile=scratch/flags.txt", + "2>&1", + "1>scratch/stdout-trace-viewer.txt", + }) + end + +group("src") +project("xenia-gpu-vulkan-trace-dump") + uuid("0dd0dd1c-b321-494d-ab9a-6c062f0c65cc") + kind("ConsoleApp") + language("C++") + links({ + "gflags", + "imgui", + "vulkan-loader", + "xenia-apu", + "xenia-apu-nop", + "xenia-apu-xaudio2", + "xenia-base", + "xenia-core", + "xenia-cpu", + "xenia-cpu-backend-x64", + "xenia-gpu", + "xenia-gpu-vulkan", + "xenia-hid-nop", + "xenia-hid-winkey", + "xenia-hid-xinput", + "xenia-kernel", + "xenia-ui", + "xenia-ui-spirv", + "xenia-ui-vulkan", + "xenia-vfs", + }) + defines({ + }) + includedirs({ + project_root.."/third_party/gflags/src", + }) + files({ + "vulkan_trace_dump_main.cc", + "../../base/main_"..platform_suffix..".cc", + }) + + filter("platforms:Windows") + -- Only create the .user file if it doesn't already exist. + local user_file = project_root.."/build/xenia-gpu-vulkan-trace-dump.vcxproj.user" + if not os.isfile(user_file) then + debugdir(project_root) + debugargs({ + "--flagfile=scratch/flags.txt", + "2>&1", + "1>scratch/stdout-trace-dump.txt", + }) + end diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc new file mode 100644 index 000000000..3320d2927 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -0,0 +1,585 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_command_processor.h" + +#include + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/sampler_info.h" +#include "xenia/gpu/texture_info.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_graphics_system.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using namespace xe::gpu::xenos; + +VulkanCommandProcessor::VulkanCommandProcessor( + VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) + : CommandProcessor(graphics_system, kernel_state) {} + +VulkanCommandProcessor::~VulkanCommandProcessor() = default; + +void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); } + +bool VulkanCommandProcessor::SetupContext() { + if (!CommandProcessor::SetupContext()) { + XELOGE("Unable to initialize base command processor context"); + return false; + } + + return true; +} + +void VulkanCommandProcessor::ShutdownContext() { + CommandProcessor::ShutdownContext(); +} + +void VulkanCommandProcessor::MakeCoherent() { + RegisterFile* regs = register_file_; + auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; + + CommandProcessor::MakeCoherent(); + + if (status_host & 0x80000000ul) { + // scratch_buffer_.ClearCache(); + } +} + +void VulkanCommandProcessor::PrepareForWait() { + SCOPE_profile_cpu_f("gpu"); + + CommandProcessor::PrepareForWait(); + + // TODO(benvanik): fences and fancy stuff. We should figure out a way to + // make interrupt callbacks from the GPU so that we don't have to do a full + // synchronize here. + // glFlush(); + // glFinish(); + + context_->ClearCurrent(); +} + +void VulkanCommandProcessor::ReturnFromWait() { + context_->MakeCurrent(); + + CommandProcessor::ReturnFromWait(); +} + +void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, + uint32_t frontbuffer_width, + uint32_t frontbuffer_height) { + // Ensure we issue any pending draws. + // draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent); + + // Need to finish to be sure the other context sees the right data. + // TODO(benvanik): prevent this? fences? + // glFinish(); + + if (context_->WasLost()) { + // We've lost the context due to a TDR. + // TODO: Dump the current commands to a tracefile. + assert_always(); + } + + // Remove any dead textures, etc. + // texture_cache_.Scavenge(); +} + +Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, + uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) { + // return shader_cache_.LookupOrInsertShader(shader_type, host_address, + // dword_count); + return nullptr; +} + +bool VulkanCommandProcessor::IssueDraw(PrimitiveType prim_type, + uint32_t index_count, + IndexBufferInfo* index_buffer_info) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + // Skip all drawing for now - what did you expect? :) + return true; + + bool draw_valid = false; + // if (index_buffer_info) { + // draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count, + // index_buffer_info->format); + //} else { + // draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count); + //} + if (!draw_valid) { + return false; + } + + auto& regs = *register_file_; + + auto enable_mode = + static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + if (enable_mode == ModeControl::kIgnore) { + // Ignored. + // draw_batcher_.DiscardDraw(); + return true; + } else if (enable_mode == ModeControl::kCopy) { + // Special copy handling. + // draw_batcher_.DiscardDraw(); + return IssueCopy(); + } + +#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + XELOGE(error_message); \ + /*draw_batcher_.DiscardDraw(); */ \ + return false; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } + + UpdateStatus status; + bool mismatch = false; + status = UpdateShaders(prim_type); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders"); + status = UpdateRenderTargets(); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets"); + // if (!active_framebuffer_) { + // // No framebuffer, so nothing we do will actually have an effect. + // // Treat it as a no-op. + // // TODO(benvanik): if we have a vs export, still allow it to go. + // draw_batcher_.DiscardDraw(); + // return true; + //} + + status = UpdateState(prim_type); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state"); + status = PopulateSamplers(); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, + "Unable to prepare draw samplers"); + + status = PopulateIndexBuffer(index_buffer_info); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer"); + status = PopulateVertexBuffers(); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers"); + + // if (!draw_batcher_.CommitDraw()) { + // return false; + //} + + // draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent); + if (context_->WasLost()) { + // This draw lost us the context. This typically isn't hit. + assert_always(); + return false; + } + + return true; +} + +bool VulkanCommandProcessor::SetShadowRegister(uint32_t* dest, + uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +bool VulkanCommandProcessor::SetShadowRegister(float* dest, + uint32_t register_name) { + float value = register_file_->values[register_name].f32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateShaders( + PrimitiveType prim_type) { + auto& regs = update_shaders_regs_; + + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ. + assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == + 0x000FF000 || + register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == + 0x000FF100 || + register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); + dirty |= regs.vertex_shader != active_vertex_shader_; + dirty |= regs.pixel_shader != active_pixel_shader_; + dirty |= regs.prim_type != prim_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + regs.vertex_shader = static_cast(active_vertex_shader_); + regs.pixel_shader = static_cast(active_pixel_shader_); + regs.prim_type = prim_type; + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::UpdateRenderTargets() { + auto& regs = update_render_targets_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateState( + PrimitiveType prim_type) { + bool mismatch = false; + +#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + XELOGE(error_message); \ + return status; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } + + UpdateStatus status; + status = UpdateViewportState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); + status = UpdateRasterizerState(prim_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); + status = UpdateBlendState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); + status = UpdateDepthStencilState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::UpdateViewportState() { + auto& regs = update_viewport_state_regs_; + + bool dirty = false; + // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl, + // XE_GPU_REG_PA_CL_CLIP_CNTL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.pa_sc_window_offset, + XE_GPU_REG_PA_SC_WINDOW_OFFSET); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, + XE_GPU_REG_PA_CL_VPORT_XOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, + XE_GPU_REG_PA_CL_VPORT_YOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, + XE_GPU_REG_PA_CL_VPORT_ZOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, + XE_GPU_REG_PA_CL_VPORT_XSCALE); + dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, + XE_GPU_REG_PA_CL_VPORT_YSCALE); + dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, + XE_GPU_REG_PA_CL_VPORT_ZSCALE); + + // Much of this state machine is extracted from: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to + // get 1/W0. + // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f, + // (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f, + // (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f); + + // Done in VS, no need to flush state. + // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { + // draw_batcher_.set_window_scalar(1.0f, 1.0f); + //} else { + // draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); + //} + + if (!dirty) { + return UpdateStatus::kCompatible; + } + + return UpdateStatus::kMismatch; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::UpdateRasterizerState(PrimitiveType prim_type) { + auto& regs = update_rasterizer_state_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, + XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, + XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); + dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, + XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + dirty |= regs.prim_type != prim_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + + regs.prim_type = prim_type; + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::UpdateBlendState() { + auto& reg_file = *register_file_; + auto& regs = update_blend_state_regs_; + + // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE + // Deprecated in GL, implemented in shader. + // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; + // uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32; + // draw_batcher_.set_alpha_test((color_control & 0x4) != 0, // + // ALPAHTESTENABLE + // color_control & 0x7, // ALPHAFUNC + // reg_file[XE_GPU_REG_RB_ALPHA_REF].f32); + + bool dirty = false; + dirty |= + SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); + dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); + dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); + dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); + dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::UpdateDepthStencilState() { + auto& regs = update_depth_stencil_state_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::PopulateIndexBuffer( + IndexBufferInfo* index_buffer_info) { + auto& regs = *register_file_; + if (!index_buffer_info || !index_buffer_info->guest_base) { + // No index buffer or auto draw. + return UpdateStatus::kCompatible; + } + auto& info = *index_buffer_info; + +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + // Min/max index ranges for clamping. This is often [0g,FFFF|FFFFFF]. + // All indices should be clamped to [min,max]. May be a way to do this in GL. + uint32_t min_index = regs[XE_GPU_REG_VGT_MIN_VTX_INDX].u32; + uint32_t max_index = regs[XE_GPU_REG_VGT_MAX_VTX_INDX].u32; + assert_true(min_index == 0); + assert_true(max_index == 0xFFFF || max_index == 0xFFFFFF); + + assert_true(info.endianness == Endian::k8in16 || + info.endianness == Endian::k8in32); + + trace_writer_.WriteMemoryRead(info.guest_base, info.length); + + return UpdateStatus::kCompatible; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::PopulateVertexBuffers() { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + auto& regs = *register_file_; + assert_not_null(active_vertex_shader_); + + for (const auto& vertex_binding : active_vertex_shader_->vertex_bindings()) { + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + (vertex_binding.fetch_constant / 3) * 6; + const auto group = reinterpret_cast(®s.values[r]); + const xe_gpu_vertex_fetch_t* fetch = nullptr; + switch (vertex_binding.fetch_constant % 3) { + case 0: + fetch = &group->vertex_fetch_0; + break; + case 1: + fetch = &group->vertex_fetch_1; + break; + case 2: + fetch = &group->vertex_fetch_2; + break; + } + assert_true(fetch->endian == 2); + + size_t valid_range = size_t(fetch->size * 4); + + trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range); + } + + return UpdateStatus::kCompatible; +} + +VulkanCommandProcessor::UpdateStatus +VulkanCommandProcessor::PopulateSamplers() { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + bool mismatch = false; + + // VS and PS samplers are shared, but may be used exclusively. + // We walk each and setup lazily. + bool has_setup_sampler[32] = {false}; + + // Vertex texture samplers. + for (auto& texture_binding : active_vertex_shader_->texture_bindings()) { + if (has_setup_sampler[texture_binding.fetch_constant]) { + continue; + } + has_setup_sampler[texture_binding.fetch_constant] = true; + auto status = PopulateSampler(texture_binding); + if (status == UpdateStatus::kError) { + return status; + } else if (status == UpdateStatus::kMismatch) { + mismatch = true; + } + } + + // Pixel shader texture sampler. + for (auto& texture_binding : active_pixel_shader_->texture_bindings()) { + if (has_setup_sampler[texture_binding.fetch_constant]) { + continue; + } + has_setup_sampler[texture_binding.fetch_constant] = true; + auto status = PopulateSampler(texture_binding); + if (status == UpdateStatus::kError) { + return UpdateStatus::kError; + } else if (status == UpdateStatus::kMismatch) { + mismatch = true; + } + } + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; +} + +VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler( + const Shader::TextureBinding& texture_binding) { + auto& regs = *register_file_; + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + + texture_binding.fetch_constant * 6; + auto group = reinterpret_cast(®s.values[r]); + auto& fetch = group->texture_fetch; + + // ? + if (!fetch.type) { + return UpdateStatus::kCompatible; + } + assert_true(fetch.type == 0x2); + + TextureInfo texture_info; + if (!TextureInfo::Prepare(fetch, &texture_info)) { + XELOGE("Unable to parse texture fetcher info"); + return UpdateStatus::kCompatible; // invalid texture used + } + SamplerInfo sampler_info; + if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr, + &sampler_info)) { + XELOGE("Unable to parse sampler info"); + return UpdateStatus::kCompatible; // invalid texture used + } + + trace_writer_.WriteMemoryRead(texture_info.guest_address, + texture_info.input_length); + + return UpdateStatus::kCompatible; +} + +bool VulkanCommandProcessor::IssueCopy() { + SCOPE_profile_cpu_f("gpu"); + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h new file mode 100644 index 000000000..493345410 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -0,0 +1,165 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_COMMAND_PROCESSOR_H_ +#define XENIA_GPU_VULKAN_COMMAND_PROCESSOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xenia/base/threading.h" +#include "xenia/gpu/command_processor.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/kernel/xthread.h" +#include "xenia/memory.h" +#include "xenia/ui/vulkan/vulkan_context.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanGraphicsSystem; + +class VulkanCommandProcessor : public CommandProcessor { + public: + VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system, + kernel::KernelState* kernel_state); + ~VulkanCommandProcessor() override; + + void ClearCaches() override; + + private: + enum class UpdateStatus { + kCompatible, + kMismatch, + kError, + }; + + bool SetupContext() override; + void ShutdownContext() override; + + void MakeCoherent() override; + void PrepareForWait() override; + void ReturnFromWait() override; + + void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, + uint32_t frontbuffer_height) override; + + Shader* LoadShader(ShaderType shader_type, uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) override; + + bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, + IndexBufferInfo* index_buffer_info) override; + UpdateStatus UpdateShaders(PrimitiveType prim_type); + UpdateStatus UpdateRenderTargets(); + UpdateStatus UpdateState(PrimitiveType prim_type); + UpdateStatus UpdateViewportState(); + UpdateStatus UpdateRasterizerState(PrimitiveType prim_type); + UpdateStatus UpdateBlendState(); + UpdateStatus UpdateDepthStencilState(); + UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info); + UpdateStatus PopulateVertexBuffers(); + UpdateStatus PopulateSamplers(); + UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding); + bool IssueCopy() override; + + SpirvShaderTranslator shader_translator_; + + private: + bool SetShadowRegister(uint32_t* dest, uint32_t register_name); + bool SetShadowRegister(float* dest, uint32_t register_name); + struct UpdateRenderTargetsRegisters { + uint32_t rb_modecontrol; + uint32_t rb_surface_info; + uint32_t rb_color_info; + uint32_t rb_color1_info; + uint32_t rb_color2_info; + uint32_t rb_color3_info; + uint32_t rb_color_mask; + uint32_t rb_depthcontrol; + uint32_t rb_stencilrefmask; + uint32_t rb_depth_info; + + UpdateRenderTargetsRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_render_targets_regs_; + struct UpdateViewportStateRegisters { + // uint32_t pa_cl_clip_cntl; + uint32_t rb_surface_info; + uint32_t pa_cl_vte_cntl; + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_window_offset; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + float pa_cl_vport_xoffset; + float pa_cl_vport_yoffset; + float pa_cl_vport_zoffset; + float pa_cl_vport_xscale; + float pa_cl_vport_yscale; + float pa_cl_vport_zscale; + + UpdateViewportStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_viewport_state_regs_; + struct UpdateRasterizerStateRegisters { + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_screen_scissor_tl; + uint32_t pa_sc_screen_scissor_br; + uint32_t multi_prim_ib_reset_index; + PrimitiveType prim_type; + + UpdateRasterizerStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_rasterizer_state_regs_; + struct UpdateBlendStateRegisters { + uint32_t rb_blendcontrol[4]; + float rb_blend_rgba[4]; + + UpdateBlendStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_blend_state_regs_; + struct UpdateDepthStencilStateRegisters { + uint32_t rb_depthcontrol; + uint32_t rb_stencilrefmask; + + UpdateDepthStencilStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_depth_stencil_state_regs_; + struct UpdateShadersRegisters { + PrimitiveType prim_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t sq_program_cntl; + uint32_t sq_context_misc; + VulkanShader* vertex_shader; + VulkanShader* pixel_shader; + + UpdateShadersRegisters() { Reset(); } + void Reset() { + sq_program_cntl = 0; + vertex_shader = pixel_shader = nullptr; + } + } update_shaders_regs_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_COMMAND_PROCESSOR_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc new file mode 100644 index 000000000..675e60476 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc @@ -0,0 +1,10 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h new file mode 100644 index 000000000..c78637a47 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h @@ -0,0 +1,15 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ +#define XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ + +#include + +#endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc new file mode 100644 index 000000000..74ec57849 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc @@ -0,0 +1,87 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_graphics_system.h" + +#include +#include + +#include "xenia/base/logging.h" +#include "xenia/base/profiling.h" +#include "xenia/cpu/processor.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/window.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanGraphicsSystem::VulkanGraphicsSystem() = default; + +VulkanGraphicsSystem::~VulkanGraphicsSystem() = default; + +X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor, + kernel::KernelState* kernel_state, + ui::Window* target_window) { + // Must create the provider so we can create contexts. + provider_ = xe::ui::vulkan::VulkanProvider::Create(target_window); + + auto result = GraphicsSystem::Setup(processor, kernel_state, target_window); + if (result) { + return result; + } + + display_context_ = reinterpret_cast( + target_window->context()); + + return X_STATUS_SUCCESS; +} + +void VulkanGraphicsSystem::Shutdown() { GraphicsSystem::Shutdown(); } + +std::unique_ptr +VulkanGraphicsSystem::CreateCommandProcessor() { + return std::unique_ptr( + new VulkanCommandProcessor(this, kernel_state_)); +} + +void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { + if (!command_processor_) { + return; + } + // Check for pending swap. + auto& swap_state = command_processor_->swap_state(); + { + std::lock_guard lock(swap_state.mutex); + if (swap_state.pending) { + swap_state.pending = false; + std::swap(swap_state.front_buffer_texture, + swap_state.back_buffer_texture); + } + } + + if (!swap_state.front_buffer_texture) { + // Not yet ready. + return; + } + + // Blit the frontbuffer. + // display_context_->blitter()->BlitTexture2D( + // static_cast(swap_state.front_buffer_texture), + // Rect2D(0, 0, swap_state.width, swap_state.height), + // Rect2D(0, 0, target_window_->width(), target_window_->height()), + // GL_LINEAR, false); +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.h b/src/xenia/gpu/vulkan/vulkan_graphics_system.h new file mode 100644 index 000000000..e486452aa --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.h @@ -0,0 +1,43 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_GRAPHICS_SYSTEM_H_ +#define XENIA_GPU_VULKAN_VULKAN_GRAPHICS_SYSTEM_H_ + +#include + +#include "xenia/gpu/graphics_system.h" +#include "xenia/ui/vulkan/vulkan_context.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanGraphicsSystem : public GraphicsSystem { + public: + VulkanGraphicsSystem(); + ~VulkanGraphicsSystem() override; + + X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state, + ui::Window* target_window) override; + void Shutdown() override; + + private: + std::unique_ptr CreateCommandProcessor() override; + + void Swap(xe::ui::UIEvent* e) override; + + xe::ui::vulkan::VulkanContext* display_context_ = nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_GRAPHICS_SYSTEM_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc new file mode 100644 index 000000000..00b68af42 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -0,0 +1,27 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_shader.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanShader::VulkanShader(ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count) + : Shader(shader_type, data_hash, dword_ptr, dword_count) {} + +VulkanShader::~VulkanShader() = default; + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h new file mode 100644 index 000000000..9277ae44f --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -0,0 +1,33 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_ +#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_ + +#include + +#include "xenia/gpu/shader.h" +#include "xenia/ui/vulkan/vulkan_context.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanShader : public Shader { + public: + VulkanShader(ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count); + ~VulkanShader() override; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_SHADER_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc b/src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc new file mode 100644 index 000000000..6099a7cf1 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc @@ -0,0 +1,76 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/base/logging.h" +#include "xenia/base/main.h" +#include "xenia/gpu/trace_dump.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/vulkan/vulkan_graphics_system.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using namespace xe::gpu::xenos; + +class VulkanTraceDump : public TraceDump { + public: + std::unique_ptr CreateGraphicsSystem() override { + return std::unique_ptr(new VulkanGraphicsSystem()); + } + + uintptr_t GetColorRenderTarget(uint32_t pitch, MsaaSamples samples, + uint32_t base, + ColorRenderTargetFormat format) override { + auto command_processor = static_cast( + graphics_system_->command_processor()); + // return command_processor->GetColorRenderTarget(pitch, samples, base, + // format); + return 0; + } + + uintptr_t GetDepthRenderTarget(uint32_t pitch, MsaaSamples samples, + uint32_t base, + DepthRenderTargetFormat format) override { + auto command_processor = static_cast( + graphics_system_->command_processor()); + // return command_processor->GetDepthRenderTarget(pitch, samples, base, + // format); + return 0; + } + + uintptr_t GetTextureEntry(const TextureInfo& texture_info, + const SamplerInfo& sampler_info) override { + auto command_processor = static_cast( + graphics_system_->command_processor()); + + // auto entry_view = + // command_processor->texture_cache()->Demand(texture_info, + // sampler_info); + // if (!entry_view) { + // return 0; + //} + // auto texture = entry_view->texture; + // return static_cast(texture->handle); + return 0; + } +}; + +int trace_dump_main(const std::vector& args) { + VulkanTraceDump trace_dump; + return trace_dump.Main(args); +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +DEFINE_ENTRY_POINT(L"xenia-gpu-vulkan-trace-dump", + L"xenia-gpu-vulkan-trace-dump some.trace", + xe::gpu::vulkan::trace_dump_main); diff --git a/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc b/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc new file mode 100644 index 000000000..b2cc8c30a --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc @@ -0,0 +1,76 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/base/logging.h" +#include "xenia/base/main.h" +#include "xenia/gpu/trace_viewer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/vulkan/vulkan_graphics_system.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using namespace xe::gpu::xenos; + +class VulkanTraceViewer : public TraceViewer { + public: + std::unique_ptr CreateGraphicsSystem() override { + return std::unique_ptr(new VulkanGraphicsSystem()); + } + + uintptr_t GetColorRenderTarget(uint32_t pitch, MsaaSamples samples, + uint32_t base, + ColorRenderTargetFormat format) override { + auto command_processor = static_cast( + graphics_system_->command_processor()); + // return command_processor->GetColorRenderTarget(pitch, samples, base, + // format); + return 0; + } + + uintptr_t GetDepthRenderTarget(uint32_t pitch, MsaaSamples samples, + uint32_t base, + DepthRenderTargetFormat format) override { + auto command_processor = static_cast( + graphics_system_->command_processor()); + // return command_processor->GetDepthRenderTarget(pitch, samples, base, + // format); + return 0; + } + + uintptr_t GetTextureEntry(const TextureInfo& texture_info, + const SamplerInfo& sampler_info) override { + auto command_processor = static_cast( + graphics_system_->command_processor()); + + // auto entry_view = + // command_processor->texture_cache()->Demand(texture_info, + // sampler_info); + // if (!entry_view) { + // return 0; + //} + // auto texture = entry_view->texture; + // return static_cast(texture->handle); + return 0; + } +}; + +int trace_viewer_main(const std::vector& args) { + VulkanTraceViewer trace_viewer; + return trace_viewer.Main(args); +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +DEFINE_ENTRY_POINT(L"xenia-gpu-vulkan-trace-viewer", + L"xenia-gpu-vulkan-trace-viewer some.trace", + xe::gpu::vulkan::trace_viewer_main); diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index ae44cab00..463e7ece0 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -686,13 +686,13 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { } // Setup texture binding. - VkDescriptorSet texture_set = nullptr; auto texture = reinterpret_cast(draw.texture_handle); if (texture) { - texture_set = texture->descriptor_set(); + auto texture_set = texture->descriptor_set(); + vkCmdBindDescriptorSets(current_cmd_buffer_, + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, + 0, 1, &texture_set, 0, nullptr); } - vkCmdBindDescriptorSets(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout_, 0, 1, &texture_set, 0, nullptr); // Use push constants for our per-draw changes. // Here, the restrict_texture_samples uniform. From 5ab0af9e6daab53f91e7e3c710ca5fa539dc4abe Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 17 Feb 2016 23:47:11 -0800 Subject: [PATCH 005/145] Implementing shader constant register map construction. --- src/xenia/gpu/shader.h | 21 +++++++++++++++++++++ src/xenia/gpu/shader_translator.cc | 24 +++++++++++++++++++++++- src/xenia/gpu/shader_translator.h | 4 +++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index ebe491d65..e3e4d7072 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -493,6 +493,21 @@ class Shader { ParsedTextureFetchInstruction fetch_instr; }; + struct ConstantRegisterMap { + // Bitmap of all kConstantFloat registers read by the shader. + // Any shader can only read up to 256 of the 512, and the base is dependent + // on the shader type. Each bit corresponds to a storage index from the type + // base, so bit 0 in a vertex shader is register 0, and bit 0 in a fragment + // shader is register 256. + uint64_t float_bitmap[256 / 64]; + // Bitmap of all kConstantInt registers read by the shader. + // Each bit corresponds to a storage index [0-31]. + uint32_t int_bitmap; + // Bitmap of all kConstantBool registers read by the shader. + // Each bit corresponds to a storage index [0-31]. + uint32_t bool_bitmap; + }; + Shader(ShaderType shader_type, uint64_t ucode_data_hash, const uint32_t* ucode_dwords, size_t ucode_dword_count); virtual ~Shader(); @@ -518,6 +533,11 @@ class Shader { return texture_bindings_; } + // Bitmaps of all constant registers accessed by the shader. + const ConstantRegisterMap& constant_register_map() const { + return constant_register_map_; + } + // Returns true if the given color target index [0-3]. bool writes_color_target(int i) const { return writes_color_targets_[i]; } @@ -564,6 +584,7 @@ class Shader { std::vector vertex_bindings_; std::vector texture_bindings_; + ConstantRegisterMap constant_register_map_ = {0}; bool writes_color_targets_[4] = {false, false, false, false}; bool is_valid_ = false; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 4991833ab..68a70d5fb 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -53,13 +53,14 @@ void ShaderTranslator::Reset() { total_attrib_count_ = 0; vertex_bindings_.clear(); texture_bindings_.clear(); + std::memset(&constant_register_map_, 0, sizeof(constant_register_map_)); for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) { writes_color_targets_[i] = false; } } bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) { - // FIXME: This is kind of silly. + // DEPRECATED: remove this codepath when GL4 goes away. Reset(); shader_type_ = shader->type(); @@ -129,6 +130,7 @@ bool ShaderTranslator::Translate(Shader* shader) { shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string(); shader->vertex_bindings_ = std::move(vertex_bindings_); shader->texture_bindings_ = std::move(texture_bindings_); + shader->constant_register_map_ = std::move(constant_register_map_); for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) { shader->writes_color_targets_[i] = writes_color_targets_[i]; } @@ -488,6 +490,7 @@ void ShaderTranslator::TranslateControlFlowCondExec( i.instruction_count = cf.count(); i.type = ParsedExecInstruction::Type::kConditional; i.bool_constant_index = cf.bool_address(); + constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index; i.condition = cf.condition(); switch (cf.opcode()) { case ControlFlowOpcode::kCondExec: @@ -527,6 +530,7 @@ void ShaderTranslator::TranslateControlFlowLoopStart( ParsedLoopStartInstruction i; i.dword_index = cf_index_; i.loop_constant_index = cf.loop_id(); + constant_register_map_.int_bitmap |= 1 << i.loop_constant_index; i.is_repeat = cf.is_repeat(); i.loop_skip_address = cf.address(); @@ -542,6 +546,7 @@ void ShaderTranslator::TranslateControlFlowLoopEnd( i.is_predicated_break = cf.is_predicated_break(); i.predicate_condition = cf.condition(); i.loop_constant_index = cf.loop_id(); + constant_register_map_.int_bitmap |= 1 << i.loop_constant_index; i.loop_body_address = cf.address(); i.Disassemble(&ucode_disasm_buffer_); @@ -562,6 +567,7 @@ void ShaderTranslator::TranslateControlFlowCondCall( } else { i.type = ParsedCallInstruction::Type::kConditional; i.bool_constant_index = cf.bool_address(); + constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index; i.condition = cf.condition(); } @@ -593,6 +599,7 @@ void ShaderTranslator::TranslateControlFlowCondJmp( } else { i.type = ParsedJumpInstruction::Type::kConditional; i.bool_constant_index = cf.bool_address(); + constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index; i.condition = cf.condition(); } @@ -1150,6 +1157,14 @@ void ShaderTranslator::ParseAluVectorInstruction( for (int j = 0; j < i.operand_count; ++j) { ParseAluInstructionOperand( op, j + 1, opcode_info.src_swizzle_component_count, &i.operands[j]); + + // Track constant float register loads. + if (i.operands[j].storage_source == + InstructionStorageSource::kConstantFloat) { + auto register_index = i.operands[j].storage_index; + constant_register_map_.float_bitmap[register_index / 64] |= + 1ull << (register_index % 64); + } } i.Disassemble(&ucode_disasm_buffer_); @@ -1243,9 +1258,16 @@ void ShaderTranslator::ParseAluScalarInstruction( uint32_t reg2 = (static_cast(op.scalar_opcode()) & 1) | (src3_swizzle & 0x3C) | (op.src_is_temp(3) << 1); int const_slot = (op.src_is_temp(1) || op.src_is_temp(2)) ? 1 : 0; + ParseAluInstructionOperandSpecial( op, InstructionStorageSource::kConstantFloat, op.src_reg(3), op.src_negate(3), 0, swiz_a, &i.operands[0]); + + // Track constant float register loads. + auto register_index = i.operands[0].storage_index; + constant_register_map_.float_bitmap[register_index / 64] |= + 1ull << (register_index % 64); + ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister, reg2, op.src_negate(3), const_slot, swiz_b, &i.operands[1]); diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index d1d731926..8c8a8c176 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -27,8 +27,9 @@ class ShaderTranslator { virtual ~ShaderTranslator(); // Gathers all vertex/texture bindings. Implicitly called in Translate. - // TODO: Move this functionality to Shader. + // DEPRECATED(benvanik): remove this when shader cache is removed. bool GatherAllBindingInformation(Shader* shader); + bool Translate(Shader* shader); protected: @@ -191,6 +192,7 @@ class ShaderTranslator { int total_attrib_count_ = 0; std::vector vertex_bindings_; std::vector texture_bindings_; + Shader::ConstantRegisterMap constant_register_map_ = {0}; bool writes_color_targets_[4] = {false, false, false, false}; static const AluOpcodeInfo alu_vector_opcode_infos_[0x20]; From 618ca80011b90a0cb776fcc52129484c67c6d72b Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 01:06:28 -0800 Subject: [PATCH 006/145] Count packed bytes used by a constant map. --- src/xenia/base/math.h | 16 ++++++++++++++++ src/xenia/gpu/shader.h | 7 +++++-- src/xenia/gpu/shader_translator.cc | 27 ++++++++++++++++++++++++--- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index 824a1d746..662ceced7 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -64,6 +64,22 @@ constexpr uint32_t select_bits(uint32_t value, uint32_t a, uint32_t b) { return (value & make_bitmask(a, b)) >> a; } +inline uint32_t bit_count(uint32_t v) { + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; +} + +inline uint32_t bit_count(uint64_t v) { + v = (v & 0x5555555555555555LU) + (v >> 1 & 0x5555555555555555LU); + v = (v & 0x3333333333333333LU) + (v >> 2 & 0x3333333333333333LU); + v = v + (v >> 4) & 0x0F0F0F0F0F0F0F0FLU; + v = v + (v >> 8); + v = v + (v >> 16); + v = v + (v >> 32) & 0x0000007F; + return static_cast(v); +} + // lzcnt instruction, typed for integers of all sizes. // The number of leading zero bits in the value parameter. If value is zero, the // return value is the size of the input operand (8, 16, 32, or 64). If the most diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index e3e4d7072..476369e53 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -504,8 +504,11 @@ class Shader { // Each bit corresponds to a storage index [0-31]. uint32_t int_bitmap; // Bitmap of all kConstantBool registers read by the shader. - // Each bit corresponds to a storage index [0-31]. - uint32_t bool_bitmap; + // Each bit corresponds to a storage index [0-255]. + uint32_t bool_bitmap[256 / 32]; + + // Computed byte count of all registers required when packed. + uint32_t packed_byte_length; }; Shader(ShaderType shader_type, uint64_t ucode_data_hash, diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 68a70d5fb..1c9f31962 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -125,6 +125,24 @@ bool ShaderTranslator::Translate(Shader* shader) { TranslateBlocks(); + // Compute total bytes used by the register map. + // This saves us work later when we need to pack them. + constant_register_map_.packed_byte_length = 0; + for (int i = 0; i < 4; ++i) { + // Each bit indicates a vec4 (4 floats). + constant_register_map_.packed_byte_length += + 4 * 4 * xe::bit_count(constant_register_map_.float_bitmap[i]); + } + // Each bit indicates a single word. + constant_register_map_.packed_byte_length += + 4 * xe::bit_count(constant_register_map_.int_bitmap); + // Direct map between words and words we upload. + for (int i = 0; i < 4; ++i) { + if (constant_register_map_.bool_bitmap[i]) { + constant_register_map_.packed_byte_length += 4; + } + } + shader->errors_ = std::move(errors_); shader->translated_binary_ = CompleteTranslation(); shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string(); @@ -490,7 +508,8 @@ void ShaderTranslator::TranslateControlFlowCondExec( i.instruction_count = cf.count(); i.type = ParsedExecInstruction::Type::kConditional; i.bool_constant_index = cf.bool_address(); - constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index; + constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |= + 1 << (i.bool_constant_index % 32); i.condition = cf.condition(); switch (cf.opcode()) { case ControlFlowOpcode::kCondExec: @@ -567,7 +586,8 @@ void ShaderTranslator::TranslateControlFlowCondCall( } else { i.type = ParsedCallInstruction::Type::kConditional; i.bool_constant_index = cf.bool_address(); - constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index; + constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |= + 1 << (i.bool_constant_index % 32); i.condition = cf.condition(); } @@ -599,7 +619,8 @@ void ShaderTranslator::TranslateControlFlowCondJmp( } else { i.type = ParsedJumpInstruction::Type::kConditional; i.bool_constant_index = cf.bool_address(); - constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index; + constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |= + 1 << (i.bool_constant_index % 32); i.condition = cf.condition(); } From ecd257b34c80ace83535ed7b025d0c71266db4cc Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 01:06:38 -0800 Subject: [PATCH 007/145] Fix headers include guards. --- src/xenia/gpu/gl4/gl4_command_processor.h | 6 +++--- src/xenia/gpu/vulkan/vulkan_command_processor.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/gl4/gl4_command_processor.h b/src/xenia/gpu/gl4/gl4_command_processor.h index 1ebd12a1e..e16edc4cb 100644 --- a/src/xenia/gpu/gl4/gl4_command_processor.h +++ b/src/xenia/gpu/gl4/gl4_command_processor.h @@ -7,8 +7,8 @@ ****************************************************************************** */ -#ifndef XENIA_GPU_GL4_COMMAND_PROCESSOR_H_ -#define XENIA_GPU_GL4_COMMAND_PROCESSOR_H_ +#ifndef XENIA_GPU_GL4_GL4_COMMAND_PROCESSOR_H_ +#define XENIA_GPU_GL4_GL4_COMMAND_PROCESSOR_H_ #include #include @@ -232,4 +232,4 @@ class GL4CommandProcessor : public CommandProcessor { } // namespace gpu } // namespace xe -#endif // XENIA_GPU_GL4_COMMAND_PROCESSOR_H_ +#endif // XENIA_GPU_GL4_GL4_COMMAND_PROCESSOR_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 493345410..babbc9ab3 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -7,8 +7,8 @@ ****************************************************************************** */ -#ifndef XENIA_GPU_VULKAN_COMMAND_PROCESSOR_H_ -#define XENIA_GPU_VULKAN_COMMAND_PROCESSOR_H_ +#ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ +#define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ #include #include @@ -162,4 +162,4 @@ class VulkanCommandProcessor : public CommandProcessor { } // namespace gpu } // namespace xe -#endif // XENIA_GPU_VULKAN_COMMAND_PROCESSOR_H_ +#endif // XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ From ea959b52fdc84566881099aa5110ab97e1069607 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 16:40:02 -0800 Subject: [PATCH 008/145] Adding a subset of the glslang repo dealing with spirv. The main repo is a mess. --- premake5.lua | 1 + third_party/glslang-spirv.lua | 29 + third_party/glslang-spirv/GLSL.std.450.h | 131 + third_party/glslang-spirv/GlslangToSpv.cpp | 3902 +++++++++++++++++ third_party/glslang-spirv/GlslangToSpv.h | 43 + third_party/glslang-spirv/InReadableOrder.cpp | 116 + third_party/glslang-spirv/SPVRemapper.cpp | 1231 ++++++ third_party/glslang-spirv/SPVRemapper.h | 288 ++ third_party/glslang-spirv/SpvBuilder.cpp | 2344 ++++++++++ third_party/glslang-spirv/SpvBuilder.h | 576 +++ third_party/glslang-spirv/disassemble.cpp | 576 +++ third_party/glslang-spirv/disassemble.h | 56 + third_party/glslang-spirv/doc.cpp | 2711 ++++++++++++ third_party/glslang-spirv/doc.h | 261 ++ third_party/glslang-spirv/spirv.hpp | 879 ++++ third_party/glslang-spirv/spvIR.h | 403 ++ 16 files changed, 13547 insertions(+) create mode 100644 third_party/glslang-spirv.lua create mode 100644 third_party/glslang-spirv/GLSL.std.450.h create mode 100644 third_party/glslang-spirv/GlslangToSpv.cpp create mode 100644 third_party/glslang-spirv/GlslangToSpv.h create mode 100644 third_party/glslang-spirv/InReadableOrder.cpp create mode 100644 third_party/glslang-spirv/SPVRemapper.cpp create mode 100644 third_party/glslang-spirv/SPVRemapper.h create mode 100644 third_party/glslang-spirv/SpvBuilder.cpp create mode 100644 third_party/glslang-spirv/SpvBuilder.h create mode 100644 third_party/glslang-spirv/disassemble.cpp create mode 100644 third_party/glslang-spirv/disassemble.h create mode 100644 third_party/glslang-spirv/doc.cpp create mode 100644 third_party/glslang-spirv/doc.h create mode 100644 third_party/glslang-spirv/spirv.hpp create mode 100644 third_party/glslang-spirv/spvIR.h diff --git a/premake5.lua b/premake5.lua index 7c429828b..d0c2261f3 100644 --- a/premake5.lua +++ b/premake5.lua @@ -165,6 +165,7 @@ solution("xenia") include("third_party/capstone.lua") include("third_party/gflags.lua") include("third_party/glew.lua") + include("third_party/glslang-spirv.lua") include("third_party/imgui.lua") include("third_party/libav.lua") include("third_party/snappy.lua") diff --git a/third_party/glslang-spirv.lua b/third_party/glslang-spirv.lua new file mode 100644 index 000000000..a3a237eac --- /dev/null +++ b/third_party/glslang-spirv.lua @@ -0,0 +1,29 @@ +group("third_party") +project("glslang-spirv") + uuid("1cc8f45e-91e2-4daf-a55e-666bf8b5e6b2") + kind("StaticLib") + language("C++") + links({ + }) + defines({ + "_LIB", + }) + includedirs({ + }) + files({ + "glslang-spirv/disassemble.cpp", + "glslang-spirv/disassemble.h", + "glslang-spirv/doc.cpp", + "glslang-spirv/doc.h", + "glslang-spirv/GLSL.std.450.h", + -- Disabled until required. + -- "glslang-spirv/GlslangToSpv.cpp", + -- "glslang-spirv/GlslangToSpv.h", + "glslang-spirv/InReadableOrder.cpp", + "glslang-spirv/spirv.hpp", + "glslang-spirv/SpvBuilder.cpp", + "glslang-spirv/SpvBuilder.h", + "glslang-spirv/spvIR.h", + "glslang-spirv/SPVRemapper.cpp", + "glslang-spirv/SPVRemapper.h", + }) diff --git a/third_party/glslang-spirv/GLSL.std.450.h b/third_party/glslang-spirv/GLSL.std.450.h new file mode 100644 index 000000000..df31092be --- /dev/null +++ b/third_party/glslang-spirv/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 1; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/third_party/glslang-spirv/GlslangToSpv.cpp b/third_party/glslang-spirv/GlslangToSpv.cpp new file mode 100644 index 000000000..864902ccb --- /dev/null +++ b/third_party/glslang-spirv/GlslangToSpv.cpp @@ -0,0 +1,3902 @@ +// +//Copyright (C) 2014-2015 LunarG, Inc. +//Copyright (C) 2015-2016 Google, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// +// Visit the nodes in the glslang intermediate tree representation to +// translate them to SPIR-V. +// + +#include "spirv.hpp" +#include "GlslangToSpv.h" +#include "SpvBuilder.h" +namespace spv { + #include "GLSL.std.450.h" +} + +// Glslang includes +#include "../glslang/MachineIndependent/localintermediate.h" +#include "../glslang/MachineIndependent/SymbolTable.h" +#include "../glslang/Include/Common.h" + +#include +#include +#include +#include +#include +#include + +namespace { + +// For low-order part of the generator's magic number. Bump up +// when there is a change in the style (e.g., if SSA form changes, +// or a different instruction sequence to do something gets used). +const int GeneratorVersion = 1; + +// +// The main holder of information for translating glslang to SPIR-V. +// +// Derives from the AST walking base class. +// +class TGlslangToSpvTraverser : public glslang::TIntermTraverser { +public: + TGlslangToSpvTraverser(const glslang::TIntermediate*); + virtual ~TGlslangToSpvTraverser(); + + bool visitAggregate(glslang::TVisit, glslang::TIntermAggregate*); + bool visitBinary(glslang::TVisit, glslang::TIntermBinary*); + void visitConstantUnion(glslang::TIntermConstantUnion*); + bool visitSelection(glslang::TVisit, glslang::TIntermSelection*); + bool visitSwitch(glslang::TVisit, glslang::TIntermSwitch*); + void visitSymbol(glslang::TIntermSymbol* symbol); + bool visitUnary(glslang::TVisit, glslang::TIntermUnary*); + bool visitLoop(glslang::TVisit, glslang::TIntermLoop*); + bool visitBranch(glslang::TVisit visit, glslang::TIntermBranch*); + + void dumpSpv(std::vector& out); + +protected: + spv::Decoration TranslateInterpolationDecoration(const glslang::TQualifier& qualifier); + spv::BuiltIn TranslateBuiltInDecoration(glslang::TBuiltInVariable); + spv::ImageFormat TranslateImageFormat(const glslang::TType& type); + spv::Id createSpvVariable(const glslang::TIntermSymbol*); + spv::Id getSampledType(const glslang::TSampler&); + spv::Id convertGlslangToSpvType(const glslang::TType& type); + spv::Id convertGlslangToSpvType(const glslang::TType& type, glslang::TLayoutPacking, const glslang::TQualifier&); + spv::Id makeArraySizeId(const glslang::TArraySizes&, int dim); + spv::Id accessChainLoad(const glslang::TType& type); + glslang::TLayoutPacking getExplicitLayout(const glslang::TType& type) const; + int getArrayStride(const glslang::TType& arrayType, glslang::TLayoutPacking, glslang::TLayoutMatrix); + int getMatrixStride(const glslang::TType& matrixType, glslang::TLayoutPacking, glslang::TLayoutMatrix); + void updateMemberOffset(const glslang::TType& structType, const glslang::TType& memberType, int& currentOffset, int& nextOffset, glslang::TLayoutPacking, glslang::TLayoutMatrix); + + bool isShaderEntrypoint(const glslang::TIntermAggregate* node); + void makeFunctions(const glslang::TIntermSequence&); + void makeGlobalInitializers(const glslang::TIntermSequence&); + void visitFunctions(const glslang::TIntermSequence&); + void handleFunctionEntry(const glslang::TIntermAggregate* node); + void translateArguments(const glslang::TIntermAggregate& node, std::vector& arguments); + void translateArguments(glslang::TIntermUnary& node, std::vector& arguments); + spv::Id createImageTextureFunctionCall(glslang::TIntermOperator* node); + spv::Id handleUserFunctionCall(const glslang::TIntermAggregate*); + + spv::Id createBinaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, spv::Id left, spv::Id right, glslang::TBasicType typeProxy, bool reduceComparison = true); + spv::Id createBinaryMatrixOperation(spv::Op, spv::Decoration precision, spv::Id typeId, spv::Id left, spv::Id right); + spv::Id createUnaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, spv::Id operand,glslang::TBasicType typeProxy); + spv::Id createUnaryMatrixOperation(spv::Op, spv::Decoration precision, spv::Id typeId, spv::Id operand,glslang::TBasicType typeProxy); + spv::Id createConversion(glslang::TOperator op, spv::Decoration precision, spv::Id destTypeId, spv::Id operand); + spv::Id makeSmearedConstant(spv::Id constant, int vectorSize); + spv::Id createAtomicOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); + spv::Id createMiscOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); + spv::Id createNoArgOperation(glslang::TOperator op); + spv::Id getSymbolId(const glslang::TIntermSymbol* node); + void addDecoration(spv::Id id, spv::Decoration dec); + void addDecoration(spv::Id id, spv::Decoration dec, unsigned value); + void addMemberDecoration(spv::Id id, int member, spv::Decoration dec); + void addMemberDecoration(spv::Id id, int member, spv::Decoration dec, unsigned value); + spv::Id createSpvSpecConstant(const glslang::TIntermTyped&); + spv::Id createSpvConstant(const glslang::TType& type, const glslang::TConstUnionArray&, int& nextConst, bool specConstant); + bool isTrivialLeaf(const glslang::TIntermTyped* node); + bool isTrivial(const glslang::TIntermTyped* node); + spv::Id createShortCircuit(glslang::TOperator, glslang::TIntermTyped& left, glslang::TIntermTyped& right); + + spv::Function* shaderEntry; + spv::Instruction* entryPoint; + int sequenceDepth; + + // There is a 1:1 mapping between a spv builder and a module; this is thread safe + spv::Builder builder; + bool inMain; + bool mainTerminated; + bool linkageOnly; // true when visiting the set of objects in the AST present only for establishing interface, whether or not they were statically used + std::set iOSet; // all input/output variables from either static use or declaration of interface + const glslang::TIntermediate* glslangIntermediate; + spv::Id stdBuiltins; + + std::unordered_map symbolValues; + std::unordered_set constReadOnlyParameters; // set of formal function parameters that have glslang qualifier constReadOnly, so we know they are not local function "const" that are write-once + std::unordered_map functionMap; + std::unordered_map structMap[glslang::ElpCount][glslang::ElmCount]; + std::unordered_map > memberRemapper; // for mapping glslang block indices to spv indices (e.g., due to hidden members) + std::stack breakForLoop; // false means break for switch +}; + +// +// Helper functions for translating glslang representations to SPIR-V enumerants. +// + +// Translate glslang profile to SPIR-V source language. +spv::SourceLanguage TranslateSourceLanguage(EProfile profile) +{ + switch (profile) { + case ENoProfile: + case ECoreProfile: + case ECompatibilityProfile: + return spv::SourceLanguageGLSL; + case EEsProfile: + return spv::SourceLanguageESSL; + default: + return spv::SourceLanguageUnknown; + } +} + +// Translate glslang language (stage) to SPIR-V execution model. +spv::ExecutionModel TranslateExecutionModel(EShLanguage stage) +{ + switch (stage) { + case EShLangVertex: return spv::ExecutionModelVertex; + case EShLangTessControl: return spv::ExecutionModelTessellationControl; + case EShLangTessEvaluation: return spv::ExecutionModelTessellationEvaluation; + case EShLangGeometry: return spv::ExecutionModelGeometry; + case EShLangFragment: return spv::ExecutionModelFragment; + case EShLangCompute: return spv::ExecutionModelGLCompute; + default: + assert(0); + return spv::ExecutionModelFragment; + } +} + +// Translate glslang type to SPIR-V storage class. +spv::StorageClass TranslateStorageClass(const glslang::TType& type) +{ + if (type.getQualifier().isPipeInput()) + return spv::StorageClassInput; + else if (type.getQualifier().isPipeOutput()) + return spv::StorageClassOutput; + else if (type.getQualifier().isUniformOrBuffer()) { + if (type.getQualifier().layoutPushConstant) + return spv::StorageClassPushConstant; + if (type.getBasicType() == glslang::EbtBlock) + return spv::StorageClassUniform; + else if (type.getBasicType() == glslang::EbtAtomicUint) + return spv::StorageClassAtomicCounter; + else + return spv::StorageClassUniformConstant; + // TODO: how are we distuingishing between default and non-default non-writable uniforms? Do default uniforms even exist? + } else { + switch (type.getQualifier().storage) { + case glslang::EvqShared: return spv::StorageClassWorkgroup; break; + case glslang::EvqGlobal: return spv::StorageClassPrivate; + case glslang::EvqConstReadOnly: return spv::StorageClassFunction; + case glslang::EvqTemporary: return spv::StorageClassFunction; + default: + assert(0); + return spv::StorageClassFunction; + } + } +} + +// Translate glslang sampler type to SPIR-V dimensionality. +spv::Dim TranslateDimensionality(const glslang::TSampler& sampler) +{ + switch (sampler.dim) { + case glslang::Esd1D: return spv::Dim1D; + case glslang::Esd2D: return spv::Dim2D; + case glslang::Esd3D: return spv::Dim3D; + case glslang::EsdCube: return spv::DimCube; + case glslang::EsdRect: return spv::DimRect; + case glslang::EsdBuffer: return spv::DimBuffer; + case glslang::EsdSubpass: return spv::DimSubpassData; + default: + assert(0); + return spv::Dim2D; + } +} + +// Translate glslang type to SPIR-V precision decorations. +spv::Decoration TranslatePrecisionDecoration(const glslang::TType& type) +{ + switch (type.getQualifier().precision) { + case glslang::EpqLow: return spv::DecorationRelaxedPrecision; + case glslang::EpqMedium: return spv::DecorationRelaxedPrecision; + default: + return spv::NoPrecision; + } +} + +// Translate glslang type to SPIR-V block decorations. +spv::Decoration TranslateBlockDecoration(const glslang::TType& type) +{ + if (type.getBasicType() == glslang::EbtBlock) { + switch (type.getQualifier().storage) { + case glslang::EvqUniform: return spv::DecorationBlock; + case glslang::EvqBuffer: return spv::DecorationBufferBlock; + case glslang::EvqVaryingIn: return spv::DecorationBlock; + case glslang::EvqVaryingOut: return spv::DecorationBlock; + default: + assert(0); + break; + } + } + + return (spv::Decoration)spv::BadValue; +} + +// Translate glslang type to SPIR-V layout decorations. +spv::Decoration TranslateLayoutDecoration(const glslang::TType& type, glslang::TLayoutMatrix matrixLayout) +{ + if (type.isMatrix()) { + switch (matrixLayout) { + case glslang::ElmRowMajor: + return spv::DecorationRowMajor; + case glslang::ElmColumnMajor: + return spv::DecorationColMajor; + default: + // opaque layouts don't need a majorness + return (spv::Decoration)spv::BadValue; + } + } else { + switch (type.getBasicType()) { + default: + return (spv::Decoration)spv::BadValue; + break; + case glslang::EbtBlock: + switch (type.getQualifier().storage) { + case glslang::EvqUniform: + case glslang::EvqBuffer: + switch (type.getQualifier().layoutPacking) { + case glslang::ElpShared: return spv::DecorationGLSLShared; + case glslang::ElpPacked: return spv::DecorationGLSLPacked; + default: + return (spv::Decoration)spv::BadValue; + } + case glslang::EvqVaryingIn: + case glslang::EvqVaryingOut: + assert(type.getQualifier().layoutPacking == glslang::ElpNone); + return (spv::Decoration)spv::BadValue; + default: + assert(0); + return (spv::Decoration)spv::BadValue; + } + } + } +} + +// Translate glslang type to SPIR-V interpolation decorations. +// Returns spv::Decoration(spv::BadValue) when no decoration +// should be applied. +spv::Decoration TGlslangToSpvTraverser::TranslateInterpolationDecoration(const glslang::TQualifier& qualifier) +{ + if (qualifier.smooth) { + // Smooth decoration doesn't exist in SPIR-V 1.0 + return (spv::Decoration)spv::BadValue; + } + if (qualifier.nopersp) + return spv::DecorationNoPerspective; + else if (qualifier.patch) + return spv::DecorationPatch; + else if (qualifier.flat) + return spv::DecorationFlat; + else if (qualifier.centroid) + return spv::DecorationCentroid; + else if (qualifier.sample) { + builder.addCapability(spv::CapabilitySampleRateShading); + return spv::DecorationSample; + } else + return (spv::Decoration)spv::BadValue; +} + +// If glslang type is invariant, return SPIR-V invariant decoration. +spv::Decoration TranslateInvariantDecoration(const glslang::TQualifier& qualifier) +{ + if (qualifier.invariant) + return spv::DecorationInvariant; + else + return (spv::Decoration)spv::BadValue; +} + +// Translate glslang built-in variable to SPIR-V built in decoration. +spv::BuiltIn TGlslangToSpvTraverser::TranslateBuiltInDecoration(glslang::TBuiltInVariable builtIn) +{ + switch (builtIn) { + case glslang::EbvPointSize: + switch (glslangIntermediate->getStage()) { + case EShLangGeometry: + builder.addCapability(spv::CapabilityGeometryPointSize); + break; + case EShLangTessControl: + case EShLangTessEvaluation: + builder.addCapability(spv::CapabilityTessellationPointSize); + break; + } + return spv::BuiltInPointSize; + + case glslang::EbvClipDistance: + builder.addCapability(spv::CapabilityClipDistance); + return spv::BuiltInClipDistance; + + case glslang::EbvCullDistance: + builder.addCapability(spv::CapabilityCullDistance); + return spv::BuiltInCullDistance; + + case glslang::EbvViewportIndex: + // TODO: builder.addCapability(spv::CapabilityMultiViewport); + return spv::BuiltInViewportIndex; + + case glslang::EbvSampleId: + builder.addCapability(spv::CapabilitySampleRateShading); + return spv::BuiltInSampleId; + + case glslang::EbvSamplePosition: + builder.addCapability(spv::CapabilitySampleRateShading); + return spv::BuiltInSamplePosition; + + case glslang::EbvSampleMask: + builder.addCapability(spv::CapabilitySampleRateShading); + return spv::BuiltInSampleMask; + + case glslang::EbvPosition: return spv::BuiltInPosition; + case glslang::EbvVertexId: return spv::BuiltInVertexId; + case glslang::EbvInstanceId: return spv::BuiltInInstanceId; + case glslang::EbvVertexIndex: return spv::BuiltInVertexIndex; + case glslang::EbvInstanceIndex: return spv::BuiltInInstanceIndex; + case glslang::EbvBaseVertex: + case glslang::EbvBaseInstance: + case glslang::EbvDrawId: + // TODO: Add SPIR-V builtin ID. + spv::MissingFunctionality("Draw parameters"); + return (spv::BuiltIn)spv::BadValue; + case glslang::EbvPrimitiveId: return spv::BuiltInPrimitiveId; + case glslang::EbvInvocationId: return spv::BuiltInInvocationId; + case glslang::EbvLayer: return spv::BuiltInLayer; + case glslang::EbvTessLevelInner: return spv::BuiltInTessLevelInner; + case glslang::EbvTessLevelOuter: return spv::BuiltInTessLevelOuter; + case glslang::EbvTessCoord: return spv::BuiltInTessCoord; + case glslang::EbvPatchVertices: return spv::BuiltInPatchVertices; + case glslang::EbvFragCoord: return spv::BuiltInFragCoord; + case glslang::EbvPointCoord: return spv::BuiltInPointCoord; + case glslang::EbvFace: return spv::BuiltInFrontFacing; + case glslang::EbvFragDepth: return spv::BuiltInFragDepth; + case glslang::EbvHelperInvocation: return spv::BuiltInHelperInvocation; + case glslang::EbvNumWorkGroups: return spv::BuiltInNumWorkgroups; + case glslang::EbvWorkGroupSize: return spv::BuiltInWorkgroupSize; + case glslang::EbvWorkGroupId: return spv::BuiltInWorkgroupId; + case glslang::EbvLocalInvocationId: return spv::BuiltInLocalInvocationId; + case glslang::EbvLocalInvocationIndex: return spv::BuiltInLocalInvocationIndex; + case glslang::EbvGlobalInvocationId: return spv::BuiltInGlobalInvocationId; + default: return (spv::BuiltIn)spv::BadValue; + } +} + +// Translate glslang image layout format to SPIR-V image format. +spv::ImageFormat TGlslangToSpvTraverser::TranslateImageFormat(const glslang::TType& type) +{ + assert(type.getBasicType() == glslang::EbtSampler); + + // Check for capabilities + switch (type.getQualifier().layoutFormat) { + case glslang::ElfRg32f: + case glslang::ElfRg16f: + case glslang::ElfR11fG11fB10f: + case glslang::ElfR16f: + case glslang::ElfRgba16: + case glslang::ElfRgb10A2: + case glslang::ElfRg16: + case glslang::ElfRg8: + case glslang::ElfR16: + case glslang::ElfR8: + case glslang::ElfRgba16Snorm: + case glslang::ElfRg16Snorm: + case glslang::ElfRg8Snorm: + case glslang::ElfR16Snorm: + case glslang::ElfR8Snorm: + + case glslang::ElfRg32i: + case glslang::ElfRg16i: + case glslang::ElfRg8i: + case glslang::ElfR16i: + case glslang::ElfR8i: + + case glslang::ElfRgb10a2ui: + case glslang::ElfRg32ui: + case glslang::ElfRg16ui: + case glslang::ElfRg8ui: + case glslang::ElfR16ui: + case glslang::ElfR8ui: + builder.addCapability(spv::CapabilityStorageImageExtendedFormats); + break; + + default: + break; + } + + // do the translation + switch (type.getQualifier().layoutFormat) { + case glslang::ElfNone: return spv::ImageFormatUnknown; + case glslang::ElfRgba32f: return spv::ImageFormatRgba32f; + case glslang::ElfRgba16f: return spv::ImageFormatRgba16f; + case glslang::ElfR32f: return spv::ImageFormatR32f; + case glslang::ElfRgba8: return spv::ImageFormatRgba8; + case glslang::ElfRgba8Snorm: return spv::ImageFormatRgba8Snorm; + case glslang::ElfRg32f: return spv::ImageFormatRg32f; + case glslang::ElfRg16f: return spv::ImageFormatRg16f; + case glslang::ElfR11fG11fB10f: return spv::ImageFormatR11fG11fB10f; + case glslang::ElfR16f: return spv::ImageFormatR16f; + case glslang::ElfRgba16: return spv::ImageFormatRgba16; + case glslang::ElfRgb10A2: return spv::ImageFormatRgb10A2; + case glslang::ElfRg16: return spv::ImageFormatRg16; + case glslang::ElfRg8: return spv::ImageFormatRg8; + case glslang::ElfR16: return spv::ImageFormatR16; + case glslang::ElfR8: return spv::ImageFormatR8; + case glslang::ElfRgba16Snorm: return spv::ImageFormatRgba16Snorm; + case glslang::ElfRg16Snorm: return spv::ImageFormatRg16Snorm; + case glslang::ElfRg8Snorm: return spv::ImageFormatRg8Snorm; + case glslang::ElfR16Snorm: return spv::ImageFormatR16Snorm; + case glslang::ElfR8Snorm: return spv::ImageFormatR8Snorm; + case glslang::ElfRgba32i: return spv::ImageFormatRgba32i; + case glslang::ElfRgba16i: return spv::ImageFormatRgba16i; + case glslang::ElfRgba8i: return spv::ImageFormatRgba8i; + case glslang::ElfR32i: return spv::ImageFormatR32i; + case glslang::ElfRg32i: return spv::ImageFormatRg32i; + case glslang::ElfRg16i: return spv::ImageFormatRg16i; + case glslang::ElfRg8i: return spv::ImageFormatRg8i; + case glslang::ElfR16i: return spv::ImageFormatR16i; + case glslang::ElfR8i: return spv::ImageFormatR8i; + case glslang::ElfRgba32ui: return spv::ImageFormatRgba32ui; + case glslang::ElfRgba16ui: return spv::ImageFormatRgba16ui; + case glslang::ElfRgba8ui: return spv::ImageFormatRgba8ui; + case glslang::ElfR32ui: return spv::ImageFormatR32ui; + case glslang::ElfRg32ui: return spv::ImageFormatRg32ui; + case glslang::ElfRg16ui: return spv::ImageFormatRg16ui; + case glslang::ElfRgb10a2ui: return spv::ImageFormatRgb10a2ui; + case glslang::ElfRg8ui: return spv::ImageFormatRg8ui; + case glslang::ElfR16ui: return spv::ImageFormatR16ui; + case glslang::ElfR8ui: return spv::ImageFormatR8ui; + default: return (spv::ImageFormat)spv::BadValue; + } +} + +// Return whether or not the given type is something that should be tied to a +// descriptor set. +bool IsDescriptorResource(const glslang::TType& type) +{ + // uniform and buffer blocks are included + if (type.getBasicType() == glslang::EbtBlock) + return type.getQualifier().isUniformOrBuffer(); + + // non block... + // basically samplerXXX/subpass/sampler/texture are all included + // if they are the global-scope-class, not the function parameter + // (or local, if they ever exist) class. + if (type.getBasicType() == glslang::EbtSampler) + return type.getQualifier().isUniformOrBuffer(); + + // None of the above. + return false; +} + +void InheritQualifiers(glslang::TQualifier& child, const glslang::TQualifier& parent) +{ + if (child.layoutMatrix == glslang::ElmNone) + child.layoutMatrix = parent.layoutMatrix; + + if (parent.invariant) + child.invariant = true; + if (parent.nopersp) + child.nopersp = true; + if (parent.flat) + child.flat = true; + if (parent.centroid) + child.centroid = true; + if (parent.patch) + child.patch = true; + if (parent.sample) + child.sample = true; + + child.layoutLocation = parent.layoutLocation; +} + +bool HasNonLayoutQualifiers(const glslang::TQualifier& qualifier) +{ + // This should list qualifiers that simultaneous satisfy: + // - struct members can inherit from a struct declaration + // - effect decorations on the struct members (note smooth does not, and expecting something like volatile to effect the whole object) + // - are not part of the offset/st430/etc or row/column-major layout + return qualifier.invariant || qualifier.nopersp || qualifier.flat || qualifier.centroid || qualifier.patch || qualifier.sample || qualifier.hasLocation(); +} + +// +// Implement the TGlslangToSpvTraverser class. +// + +TGlslangToSpvTraverser::TGlslangToSpvTraverser(const glslang::TIntermediate* glslangIntermediate) + : TIntermTraverser(true, false, true), shaderEntry(0), sequenceDepth(0), + builder((glslang::GetKhronosToolId() << 16) | GeneratorVersion), + inMain(false), mainTerminated(false), linkageOnly(false), + glslangIntermediate(glslangIntermediate) +{ + spv::ExecutionModel executionModel = TranslateExecutionModel(glslangIntermediate->getStage()); + + builder.clearAccessChain(); + builder.setSource(TranslateSourceLanguage(glslangIntermediate->getProfile()), glslangIntermediate->getVersion()); + stdBuiltins = builder.import("GLSL.std.450"); + builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); + shaderEntry = builder.makeMain(); + entryPoint = builder.addEntryPoint(executionModel, shaderEntry, "main"); + + // Add the source extensions + const auto& sourceExtensions = glslangIntermediate->getRequestedExtensions(); + for (auto it = sourceExtensions.begin(); it != sourceExtensions.end(); ++it) + builder.addSourceExtension(it->c_str()); + + // Add the top-level modes for this shader. + + if (glslangIntermediate->getXfbMode()) { + builder.addCapability(spv::CapabilityTransformFeedback); + builder.addExecutionMode(shaderEntry, spv::ExecutionModeXfb); + } + + unsigned int mode; + switch (glslangIntermediate->getStage()) { + case EShLangVertex: + builder.addCapability(spv::CapabilityShader); + break; + + case EShLangTessControl: + builder.addCapability(spv::CapabilityTessellation); + builder.addExecutionMode(shaderEntry, spv::ExecutionModeOutputVertices, glslangIntermediate->getVertices()); + break; + + case EShLangTessEvaluation: + builder.addCapability(spv::CapabilityTessellation); + switch (glslangIntermediate->getInputPrimitive()) { + case glslang::ElgTriangles: mode = spv::ExecutionModeTriangles; break; + case glslang::ElgQuads: mode = spv::ExecutionModeQuads; break; + case glslang::ElgIsolines: mode = spv::ExecutionModeIsolines; break; + default: mode = spv::BadValue; break; + } + if (mode != spv::BadValue) + builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); + + switch (glslangIntermediate->getVertexSpacing()) { + case glslang::EvsEqual: mode = spv::ExecutionModeSpacingEqual; break; + case glslang::EvsFractionalEven: mode = spv::ExecutionModeSpacingFractionalEven; break; + case glslang::EvsFractionalOdd: mode = spv::ExecutionModeSpacingFractionalOdd; break; + default: mode = spv::BadValue; break; + } + if (mode != spv::BadValue) + builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); + + switch (glslangIntermediate->getVertexOrder()) { + case glslang::EvoCw: mode = spv::ExecutionModeVertexOrderCw; break; + case glslang::EvoCcw: mode = spv::ExecutionModeVertexOrderCcw; break; + default: mode = spv::BadValue; break; + } + if (mode != spv::BadValue) + builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); + + if (glslangIntermediate->getPointMode()) + builder.addExecutionMode(shaderEntry, spv::ExecutionModePointMode); + break; + + case EShLangGeometry: + builder.addCapability(spv::CapabilityGeometry); + switch (glslangIntermediate->getInputPrimitive()) { + case glslang::ElgPoints: mode = spv::ExecutionModeInputPoints; break; + case glslang::ElgLines: mode = spv::ExecutionModeInputLines; break; + case glslang::ElgLinesAdjacency: mode = spv::ExecutionModeInputLinesAdjacency; break; + case glslang::ElgTriangles: mode = spv::ExecutionModeTriangles; break; + case glslang::ElgTrianglesAdjacency: mode = spv::ExecutionModeInputTrianglesAdjacency; break; + default: mode = spv::BadValue; break; + } + if (mode != spv::BadValue) + builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); + + builder.addExecutionMode(shaderEntry, spv::ExecutionModeInvocations, glslangIntermediate->getInvocations()); + + switch (glslangIntermediate->getOutputPrimitive()) { + case glslang::ElgPoints: mode = spv::ExecutionModeOutputPoints; break; + case glslang::ElgLineStrip: mode = spv::ExecutionModeOutputLineStrip; break; + case glslang::ElgTriangleStrip: mode = spv::ExecutionModeOutputTriangleStrip; break; + default: mode = spv::BadValue; break; + } + if (mode != spv::BadValue) + builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); + builder.addExecutionMode(shaderEntry, spv::ExecutionModeOutputVertices, glslangIntermediate->getVertices()); + break; + + case EShLangFragment: + builder.addCapability(spv::CapabilityShader); + if (glslangIntermediate->getPixelCenterInteger()) + builder.addExecutionMode(shaderEntry, spv::ExecutionModePixelCenterInteger); + + if (glslangIntermediate->getOriginUpperLeft()) + builder.addExecutionMode(shaderEntry, spv::ExecutionModeOriginUpperLeft); + else + builder.addExecutionMode(shaderEntry, spv::ExecutionModeOriginLowerLeft); + + if (glslangIntermediate->getEarlyFragmentTests()) + builder.addExecutionMode(shaderEntry, spv::ExecutionModeEarlyFragmentTests); + + switch(glslangIntermediate->getDepth()) { + case glslang::EldGreater: mode = spv::ExecutionModeDepthGreater; break; + case glslang::EldLess: mode = spv::ExecutionModeDepthLess; break; + default: mode = spv::BadValue; break; + } + if (mode != spv::BadValue) + builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); + + if (glslangIntermediate->getDepth() != glslang::EldUnchanged && glslangIntermediate->isDepthReplacing()) + builder.addExecutionMode(shaderEntry, spv::ExecutionModeDepthReplacing); + break; + + case EShLangCompute: + builder.addCapability(spv::CapabilityShader); + builder.addExecutionMode(shaderEntry, spv::ExecutionModeLocalSize, glslangIntermediate->getLocalSize(0), + glslangIntermediate->getLocalSize(1), + glslangIntermediate->getLocalSize(2)); + break; + + default: + break; + } + +} + +// Finish everything and dump +void TGlslangToSpvTraverser::dumpSpv(std::vector& out) +{ + // finish off the entry-point SPV instruction by adding the Input/Output + for (auto it : iOSet) + entryPoint->addIdOperand(it); + + builder.dump(out); +} + +TGlslangToSpvTraverser::~TGlslangToSpvTraverser() +{ + if (! mainTerminated) { + spv::Block* lastMainBlock = shaderEntry->getLastBlock(); + builder.setBuildPoint(lastMainBlock); + builder.leaveFunction(); + } +} + +// +// Implement the traversal functions. +// +// Return true from interior nodes to have the external traversal +// continue on to children. Return false if children were +// already processed. +// + +// +// Symbols can turn into +// - uniform/input reads +// - output writes +// - complex lvalue base setups: foo.bar[3].... , where we see foo and start up an access chain +// - something simple that degenerates into the last bullet +// +void TGlslangToSpvTraverser::visitSymbol(glslang::TIntermSymbol* symbol) +{ + // getSymbolId() will set up all the IO decorations on the first call. + // Formal function parameters were mapped during makeFunctions(). + spv::Id id = getSymbolId(symbol); + + // Include all "static use" and "linkage only" interface variables on the OpEntryPoint instruction + if (builder.isPointer(id)) { + spv::StorageClass sc = builder.getStorageClass(id); + if (sc == spv::StorageClassInput || sc == spv::StorageClassOutput) + iOSet.insert(id); + } + + // Only process non-linkage-only nodes for generating actual static uses + if (! linkageOnly || symbol->getQualifier().isSpecConstant()) { + // Prepare to generate code for the access + + // L-value chains will be computed left to right. We're on the symbol now, + // which is the left-most part of the access chain, so now is "clear" time, + // followed by setting the base. + builder.clearAccessChain(); + + // For now, we consider all user variables as being in memory, so they are pointers, + // except for + // A) "const in" arguments to a function, which are an intermediate object. + // See comments in handleUserFunctionCall(). + // B) Specialization constants (normal constant don't even come in as a variable), + // These are also pure R-values. + glslang::TQualifier qualifier = symbol->getQualifier(); + if ((qualifier.storage == glslang::EvqConstReadOnly && constReadOnlyParameters.find(symbol->getId()) != constReadOnlyParameters.end()) || + qualifier.isSpecConstant()) + builder.setAccessChainRValue(id); + else + builder.setAccessChainLValue(id); + } +} + +bool TGlslangToSpvTraverser::visitBinary(glslang::TVisit /* visit */, glslang::TIntermBinary* node) +{ + // First, handle special cases + switch (node->getOp()) { + case glslang::EOpAssign: + case glslang::EOpAddAssign: + case glslang::EOpSubAssign: + case glslang::EOpMulAssign: + case glslang::EOpVectorTimesMatrixAssign: + case glslang::EOpVectorTimesScalarAssign: + case glslang::EOpMatrixTimesScalarAssign: + case glslang::EOpMatrixTimesMatrixAssign: + case glslang::EOpDivAssign: + case glslang::EOpModAssign: + case glslang::EOpAndAssign: + case glslang::EOpInclusiveOrAssign: + case glslang::EOpExclusiveOrAssign: + case glslang::EOpLeftShiftAssign: + case glslang::EOpRightShiftAssign: + // A bin-op assign "a += b" means the same thing as "a = a + b" + // where a is evaluated before b. For a simple assignment, GLSL + // says to evaluate the left before the right. So, always, left + // node then right node. + { + // get the left l-value, save it away + builder.clearAccessChain(); + node->getLeft()->traverse(this); + spv::Builder::AccessChain lValue = builder.getAccessChain(); + + // evaluate the right + builder.clearAccessChain(); + node->getRight()->traverse(this); + spv::Id rValue = accessChainLoad(node->getRight()->getType()); + + if (node->getOp() != glslang::EOpAssign) { + // the left is also an r-value + builder.setAccessChain(lValue); + spv::Id leftRValue = accessChainLoad(node->getLeft()->getType()); + + // do the operation + rValue = createBinaryOperation(node->getOp(), TranslatePrecisionDecoration(node->getType()), + convertGlslangToSpvType(node->getType()), leftRValue, rValue, + node->getType().getBasicType()); + + // these all need their counterparts in createBinaryOperation() + assert(rValue != spv::NoResult); + } + + // store the result + builder.setAccessChain(lValue); + builder.accessChainStore(rValue); + + // assignments are expressions having an rValue after they are evaluated... + builder.clearAccessChain(); + builder.setAccessChainRValue(rValue); + } + return false; + case glslang::EOpIndexDirect: + case glslang::EOpIndexDirectStruct: + { + // Get the left part of the access chain. + node->getLeft()->traverse(this); + + // Add the next element in the chain + + int index = node->getRight()->getAsConstantUnion()->getConstArray()[0].getIConst(); + if (node->getLeft()->getBasicType() == glslang::EbtBlock && node->getOp() == glslang::EOpIndexDirectStruct) { + // This may be, e.g., an anonymous block-member selection, which generally need + // index remapping due to hidden members in anonymous blocks. + std::vector& remapper = memberRemapper[node->getLeft()->getType().getStruct()]; + assert(remapper.size() > 0); + index = remapper[index]; + } + + if (! node->getLeft()->getType().isArray() && + node->getLeft()->getType().isVector() && + node->getOp() == glslang::EOpIndexDirect) { + // This is essentially a hard-coded vector swizzle of size 1, + // so short circuit the access-chain stuff with a swizzle. + std::vector swizzle; + swizzle.push_back(node->getRight()->getAsConstantUnion()->getConstArray()[0].getIConst()); + builder.accessChainPushSwizzle(swizzle, convertGlslangToSpvType(node->getLeft()->getType())); + } else { + // normal case for indexing array or structure or block + builder.accessChainPush(builder.makeIntConstant(index)); + } + } + return false; + case glslang::EOpIndexIndirect: + { + // Structure or array or vector indirection. + // Will use native SPIR-V access-chain for struct and array indirection; + // matrices are arrays of vectors, so will also work for a matrix. + // Will use the access chain's 'component' for variable index into a vector. + + // This adapter is building access chains left to right. + // Set up the access chain to the left. + node->getLeft()->traverse(this); + + // save it so that computing the right side doesn't trash it + spv::Builder::AccessChain partial = builder.getAccessChain(); + + // compute the next index in the chain + builder.clearAccessChain(); + node->getRight()->traverse(this); + spv::Id index = accessChainLoad(node->getRight()->getType()); + + // restore the saved access chain + builder.setAccessChain(partial); + + if (! node->getLeft()->getType().isArray() && node->getLeft()->getType().isVector()) + builder.accessChainPushComponent(index, convertGlslangToSpvType(node->getLeft()->getType())); + else + builder.accessChainPush(index); + } + return false; + case glslang::EOpVectorSwizzle: + { + node->getLeft()->traverse(this); + glslang::TIntermSequence& swizzleSequence = node->getRight()->getAsAggregate()->getSequence(); + std::vector swizzle; + for (int i = 0; i < (int)swizzleSequence.size(); ++i) + swizzle.push_back(swizzleSequence[i]->getAsConstantUnion()->getConstArray()[0].getIConst()); + builder.accessChainPushSwizzle(swizzle, convertGlslangToSpvType(node->getLeft()->getType())); + } + return false; + case glslang::EOpLogicalOr: + case glslang::EOpLogicalAnd: + { + + // These may require short circuiting, but can sometimes be done as straight + // binary operations. The right operand must be short circuited if it has + // side effects, and should probably be if it is complex. + if (isTrivial(node->getRight()->getAsTyped())) + break; // handle below as a normal binary operation + // otherwise, we need to do dynamic short circuiting on the right operand + spv::Id result = createShortCircuit(node->getOp(), *node->getLeft()->getAsTyped(), *node->getRight()->getAsTyped()); + builder.clearAccessChain(); + builder.setAccessChainRValue(result); + } + return false; + default: + break; + } + + // Assume generic binary op... + + // get right operand + builder.clearAccessChain(); + node->getLeft()->traverse(this); + spv::Id left = accessChainLoad(node->getLeft()->getType()); + + // get left operand + builder.clearAccessChain(); + node->getRight()->traverse(this); + spv::Id right = accessChainLoad(node->getRight()->getType()); + + // get result + spv::Id result = createBinaryOperation(node->getOp(), TranslatePrecisionDecoration(node->getType()), + convertGlslangToSpvType(node->getType()), left, right, + node->getLeft()->getType().getBasicType()); + + builder.clearAccessChain(); + if (! result) { + spv::MissingFunctionality("unknown glslang binary operation"); + return true; // pick up a child as the place-holder result + } else { + builder.setAccessChainRValue(result); + return false; + } +} + +bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TIntermUnary* node) +{ + spv::Id result = spv::NoResult; + + // try texturing first + result = createImageTextureFunctionCall(node); + if (result != spv::NoResult) { + builder.clearAccessChain(); + builder.setAccessChainRValue(result); + + return false; // done with this node + } + + // Non-texturing. + + if (node->getOp() == glslang::EOpArrayLength) { + // Quite special; won't want to evaluate the operand. + + // Normal .length() would have been constant folded by the front-end. + // So, this has to be block.lastMember.length(). + // SPV wants "block" and member number as the operands, go get them. + assert(node->getOperand()->getType().isRuntimeSizedArray()); + glslang::TIntermTyped* block = node->getOperand()->getAsBinaryNode()->getLeft(); + block->traverse(this); + unsigned int member = node->getOperand()->getAsBinaryNode()->getRight()->getAsConstantUnion()->getConstArray()[0].getUConst(); + spv::Id length = builder.createArrayLength(builder.accessChainGetLValue(), member); + + builder.clearAccessChain(); + builder.setAccessChainRValue(length); + + return false; + } + + // Start by evaluating the operand + + builder.clearAccessChain(); + node->getOperand()->traverse(this); + + spv::Id operand = spv::NoResult; + + if (node->getOp() == glslang::EOpAtomicCounterIncrement || + node->getOp() == glslang::EOpAtomicCounterDecrement || + node->getOp() == glslang::EOpAtomicCounter || + node->getOp() == glslang::EOpInterpolateAtCentroid) + operand = builder.accessChainGetLValue(); // Special case l-value operands + else + operand = accessChainLoad(node->getOperand()->getType()); + + spv::Decoration precision = TranslatePrecisionDecoration(node->getType()); + + // it could be a conversion + if (! result) + result = createConversion(node->getOp(), precision, convertGlslangToSpvType(node->getType()), operand); + + // if not, then possibly an operation + if (! result) + result = createUnaryOperation(node->getOp(), precision, convertGlslangToSpvType(node->getType()), operand, node->getOperand()->getBasicType()); + + if (result) { + builder.clearAccessChain(); + builder.setAccessChainRValue(result); + + return false; // done with this node + } + + // it must be a special case, check... + switch (node->getOp()) { + case glslang::EOpPostIncrement: + case glslang::EOpPostDecrement: + case glslang::EOpPreIncrement: + case glslang::EOpPreDecrement: + { + // we need the integer value "1" or the floating point "1.0" to add/subtract + spv::Id one = node->getBasicType() == glslang::EbtFloat ? + builder.makeFloatConstant(1.0F) : + builder.makeIntConstant(1); + glslang::TOperator op; + if (node->getOp() == glslang::EOpPreIncrement || + node->getOp() == glslang::EOpPostIncrement) + op = glslang::EOpAdd; + else + op = glslang::EOpSub; + + spv::Id result = createBinaryOperation(op, TranslatePrecisionDecoration(node->getType()), + convertGlslangToSpvType(node->getType()), operand, one, + node->getType().getBasicType()); + assert(result != spv::NoResult); + + // The result of operation is always stored, but conditionally the + // consumed result. The consumed result is always an r-value. + builder.accessChainStore(result); + builder.clearAccessChain(); + if (node->getOp() == glslang::EOpPreIncrement || + node->getOp() == glslang::EOpPreDecrement) + builder.setAccessChainRValue(result); + else + builder.setAccessChainRValue(operand); + } + + return false; + + case glslang::EOpEmitStreamVertex: + builder.createNoResultOp(spv::OpEmitStreamVertex, operand); + return false; + case glslang::EOpEndStreamPrimitive: + builder.createNoResultOp(spv::OpEndStreamPrimitive, operand); + return false; + + default: + spv::MissingFunctionality("unknown glslang unary"); + return true; // pick up operand as placeholder result + } +} + +bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TIntermAggregate* node) +{ + spv::Id result = spv::NoResult; + + // try texturing + result = createImageTextureFunctionCall(node); + if (result != spv::NoResult) { + builder.clearAccessChain(); + builder.setAccessChainRValue(result); + + return false; + } else if (node->getOp() == glslang::EOpImageStore) { + // "imageStore" is a special case, which has no result + return false; + } + + glslang::TOperator binOp = glslang::EOpNull; + bool reduceComparison = true; + bool isMatrix = false; + bool noReturnValue = false; + bool atomic = false; + + assert(node->getOp()); + + spv::Decoration precision = TranslatePrecisionDecoration(node->getType()); + + switch (node->getOp()) { + case glslang::EOpSequence: + { + if (preVisit) + ++sequenceDepth; + else + --sequenceDepth; + + if (sequenceDepth == 1) { + // If this is the parent node of all the functions, we want to see them + // early, so all call points have actual SPIR-V functions to reference. + // In all cases, still let the traverser visit the children for us. + makeFunctions(node->getAsAggregate()->getSequence()); + + // Also, we want all globals initializers to go into the entry of main(), before + // anything else gets there, so visit out of order, doing them all now. + makeGlobalInitializers(node->getAsAggregate()->getSequence()); + + // Initializers are done, don't want to visit again, but functions link objects need to be processed, + // so do them manually. + visitFunctions(node->getAsAggregate()->getSequence()); + + return false; + } + + return true; + } + case glslang::EOpLinkerObjects: + { + if (visit == glslang::EvPreVisit) + linkageOnly = true; + else + linkageOnly = false; + + return true; + } + case glslang::EOpComma: + { + // processing from left to right naturally leaves the right-most + // lying around in the access chain + glslang::TIntermSequence& glslangOperands = node->getSequence(); + for (int i = 0; i < (int)glslangOperands.size(); ++i) + glslangOperands[i]->traverse(this); + + return false; + } + case glslang::EOpFunction: + if (visit == glslang::EvPreVisit) { + if (isShaderEntrypoint(node)) { + inMain = true; + builder.setBuildPoint(shaderEntry->getLastBlock()); + } else { + handleFunctionEntry(node); + } + } else { + if (inMain) + mainTerminated = true; + builder.leaveFunction(); + inMain = false; + } + + return true; + case glslang::EOpParameters: + // Parameters will have been consumed by EOpFunction processing, but not + // the body, so we still visited the function node's children, making this + // child redundant. + return false; + case glslang::EOpFunctionCall: + { + if (node->isUserDefined()) + result = handleUserFunctionCall(node); + //assert(result); // this can happen for bad shaders because the call graph completeness checking is not yet done + if (result) { + builder.clearAccessChain(); + builder.setAccessChainRValue(result); + } else + spv::MissingFunctionality("missing user function; linker needs to catch that"); + + return false; + } + case glslang::EOpConstructMat2x2: + case glslang::EOpConstructMat2x3: + case glslang::EOpConstructMat2x4: + case glslang::EOpConstructMat3x2: + case glslang::EOpConstructMat3x3: + case glslang::EOpConstructMat3x4: + case glslang::EOpConstructMat4x2: + case glslang::EOpConstructMat4x3: + case glslang::EOpConstructMat4x4: + case glslang::EOpConstructDMat2x2: + case glslang::EOpConstructDMat2x3: + case glslang::EOpConstructDMat2x4: + case glslang::EOpConstructDMat3x2: + case glslang::EOpConstructDMat3x3: + case glslang::EOpConstructDMat3x4: + case glslang::EOpConstructDMat4x2: + case glslang::EOpConstructDMat4x3: + case glslang::EOpConstructDMat4x4: + isMatrix = true; + // fall through + case glslang::EOpConstructFloat: + case glslang::EOpConstructVec2: + case glslang::EOpConstructVec3: + case glslang::EOpConstructVec4: + case glslang::EOpConstructDouble: + case glslang::EOpConstructDVec2: + case glslang::EOpConstructDVec3: + case glslang::EOpConstructDVec4: + case glslang::EOpConstructBool: + case glslang::EOpConstructBVec2: + case glslang::EOpConstructBVec3: + case glslang::EOpConstructBVec4: + case glslang::EOpConstructInt: + case glslang::EOpConstructIVec2: + case glslang::EOpConstructIVec3: + case glslang::EOpConstructIVec4: + case glslang::EOpConstructUint: + case glslang::EOpConstructUVec2: + case glslang::EOpConstructUVec3: + case glslang::EOpConstructUVec4: + case glslang::EOpConstructStruct: + case glslang::EOpConstructTextureSampler: + { + std::vector arguments; + translateArguments(*node, arguments); + spv::Id resultTypeId = convertGlslangToSpvType(node->getType()); + spv::Id constructed; + if (node->getOp() == glslang::EOpConstructTextureSampler) + constructed = builder.createOp(spv::OpSampledImage, resultTypeId, arguments); + else if (node->getOp() == glslang::EOpConstructStruct || node->getType().isArray()) { + std::vector constituents; + for (int c = 0; c < (int)arguments.size(); ++c) + constituents.push_back(arguments[c]); + constructed = builder.createCompositeConstruct(resultTypeId, constituents); + } else if (isMatrix) + constructed = builder.createMatrixConstructor(precision, arguments, resultTypeId); + else + constructed = builder.createConstructor(precision, arguments, resultTypeId); + + builder.clearAccessChain(); + builder.setAccessChainRValue(constructed); + + return false; + } + + // These six are component-wise compares with component-wise results. + // Forward on to createBinaryOperation(), requesting a vector result. + case glslang::EOpLessThan: + case glslang::EOpGreaterThan: + case glslang::EOpLessThanEqual: + case glslang::EOpGreaterThanEqual: + case glslang::EOpVectorEqual: + case glslang::EOpVectorNotEqual: + { + // Map the operation to a binary + binOp = node->getOp(); + reduceComparison = false; + switch (node->getOp()) { + case glslang::EOpVectorEqual: binOp = glslang::EOpVectorEqual; break; + case glslang::EOpVectorNotEqual: binOp = glslang::EOpVectorNotEqual; break; + default: binOp = node->getOp(); break; + } + + break; + } + case glslang::EOpMul: + // compontent-wise matrix multiply + binOp = glslang::EOpMul; + break; + case glslang::EOpOuterProduct: + // two vectors multiplied to make a matrix + binOp = glslang::EOpOuterProduct; + break; + case glslang::EOpDot: + { + // for scalar dot product, use multiply + glslang::TIntermSequence& glslangOperands = node->getSequence(); + if (! glslangOperands[0]->getAsTyped()->isVector()) + binOp = glslang::EOpMul; + break; + } + case glslang::EOpMod: + // when an aggregate, this is the floating-point mod built-in function, + // which can be emitted by the one in createBinaryOperation() + binOp = glslang::EOpMod; + break; + case glslang::EOpEmitVertex: + case glslang::EOpEndPrimitive: + case glslang::EOpBarrier: + case glslang::EOpMemoryBarrier: + case glslang::EOpMemoryBarrierAtomicCounter: + case glslang::EOpMemoryBarrierBuffer: + case glslang::EOpMemoryBarrierImage: + case glslang::EOpMemoryBarrierShared: + case glslang::EOpGroupMemoryBarrier: + noReturnValue = true; + // These all have 0 operands and will naturally finish up in the code below for 0 operands + break; + + case glslang::EOpAtomicAdd: + case glslang::EOpAtomicMin: + case glslang::EOpAtomicMax: + case glslang::EOpAtomicAnd: + case glslang::EOpAtomicOr: + case glslang::EOpAtomicXor: + case glslang::EOpAtomicExchange: + case glslang::EOpAtomicCompSwap: + atomic = true; + break; + + default: + break; + } + + // + // See if it maps to a regular operation. + // + if (binOp != glslang::EOpNull) { + glslang::TIntermTyped* left = node->getSequence()[0]->getAsTyped(); + glslang::TIntermTyped* right = node->getSequence()[1]->getAsTyped(); + assert(left && right); + + builder.clearAccessChain(); + left->traverse(this); + spv::Id leftId = accessChainLoad(left->getType()); + + builder.clearAccessChain(); + right->traverse(this); + spv::Id rightId = accessChainLoad(right->getType()); + + result = createBinaryOperation(binOp, precision, + convertGlslangToSpvType(node->getType()), leftId, rightId, + left->getType().getBasicType(), reduceComparison); + + // code above should only make binOp that exists in createBinaryOperation + assert(result != spv::NoResult); + builder.clearAccessChain(); + builder.setAccessChainRValue(result); + + return false; + } + + // + // Create the list of operands. + // + glslang::TIntermSequence& glslangOperands = node->getSequence(); + std::vector operands; + for (int arg = 0; arg < (int)glslangOperands.size(); ++arg) { + builder.clearAccessChain(); + glslangOperands[arg]->traverse(this); + + // special case l-value operands; there are just a few + bool lvalue = false; + switch (node->getOp()) { + case glslang::EOpFrexp: + case glslang::EOpModf: + if (arg == 1) + lvalue = true; + break; + case glslang::EOpInterpolateAtSample: + case glslang::EOpInterpolateAtOffset: + if (arg == 0) + lvalue = true; + break; + case glslang::EOpAtomicAdd: + case glslang::EOpAtomicMin: + case glslang::EOpAtomicMax: + case glslang::EOpAtomicAnd: + case glslang::EOpAtomicOr: + case glslang::EOpAtomicXor: + case glslang::EOpAtomicExchange: + case glslang::EOpAtomicCompSwap: + if (arg == 0) + lvalue = true; + break; + case glslang::EOpAddCarry: + case glslang::EOpSubBorrow: + if (arg == 2) + lvalue = true; + break; + case glslang::EOpUMulExtended: + case glslang::EOpIMulExtended: + if (arg >= 2) + lvalue = true; + break; + default: + break; + } + if (lvalue) + operands.push_back(builder.accessChainGetLValue()); + else + operands.push_back(accessChainLoad(glslangOperands[arg]->getAsTyped()->getType())); + } + + if (atomic) { + // Handle all atomics + result = createAtomicOperation(node->getOp(), precision, convertGlslangToSpvType(node->getType()), operands, node->getBasicType()); + } else { + // Pass through to generic operations. + switch (glslangOperands.size()) { + case 0: + result = createNoArgOperation(node->getOp()); + break; + case 1: + result = createUnaryOperation(node->getOp(), precision, convertGlslangToSpvType(node->getType()), operands.front(), glslangOperands[0]->getAsTyped()->getBasicType()); + break; + default: + result = createMiscOperation(node->getOp(), precision, convertGlslangToSpvType(node->getType()), operands, node->getBasicType()); + break; + } + } + + if (noReturnValue) + return false; + + if (! result) { + spv::MissingFunctionality("unknown glslang aggregate"); + return true; // pick up a child as a placeholder operand + } else { + builder.clearAccessChain(); + builder.setAccessChainRValue(result); + return false; + } +} + +bool TGlslangToSpvTraverser::visitSelection(glslang::TVisit /* visit */, glslang::TIntermSelection* node) +{ + // This path handles both if-then-else and ?: + // The if-then-else has a node type of void, while + // ?: has a non-void node type + spv::Id result = 0; + if (node->getBasicType() != glslang::EbtVoid) { + // don't handle this as just on-the-fly temporaries, because there will be two names + // and better to leave SSA to later passes + result = builder.createVariable(spv::StorageClassFunction, convertGlslangToSpvType(node->getType())); + } + + // emit the condition before doing anything with selection + node->getCondition()->traverse(this); + + // make an "if" based on the value created by the condition + spv::Builder::If ifBuilder(accessChainLoad(node->getCondition()->getType()), builder); + + if (node->getTrueBlock()) { + // emit the "then" statement + node->getTrueBlock()->traverse(this); + if (result) + builder.createStore(accessChainLoad(node->getTrueBlock()->getAsTyped()->getType()), result); + } + + if (node->getFalseBlock()) { + ifBuilder.makeBeginElse(); + // emit the "else" statement + node->getFalseBlock()->traverse(this); + if (result) + builder.createStore(accessChainLoad(node->getFalseBlock()->getAsTyped()->getType()), result); + } + + ifBuilder.makeEndIf(); + + if (result) { + // GLSL only has r-values as the result of a :?, but + // if we have an l-value, that can be more efficient if it will + // become the base of a complex r-value expression, because the + // next layer copies r-values into memory to use the access-chain mechanism + builder.clearAccessChain(); + builder.setAccessChainLValue(result); + } + + return false; +} + +bool TGlslangToSpvTraverser::visitSwitch(glslang::TVisit /* visit */, glslang::TIntermSwitch* node) +{ + // emit and get the condition before doing anything with switch + node->getCondition()->traverse(this); + spv::Id selector = accessChainLoad(node->getCondition()->getAsTyped()->getType()); + + // browse the children to sort out code segments + int defaultSegment = -1; + std::vector codeSegments; + glslang::TIntermSequence& sequence = node->getBody()->getSequence(); + std::vector caseValues; + std::vector valueIndexToSegment(sequence.size()); // note: probably not all are used, it is an overestimate + for (glslang::TIntermSequence::iterator c = sequence.begin(); c != sequence.end(); ++c) { + TIntermNode* child = *c; + if (child->getAsBranchNode() && child->getAsBranchNode()->getFlowOp() == glslang::EOpDefault) + defaultSegment = (int)codeSegments.size(); + else if (child->getAsBranchNode() && child->getAsBranchNode()->getFlowOp() == glslang::EOpCase) { + valueIndexToSegment[caseValues.size()] = (int)codeSegments.size(); + caseValues.push_back(child->getAsBranchNode()->getExpression()->getAsConstantUnion()->getConstArray()[0].getIConst()); + } else + codeSegments.push_back(child); + } + + // handle the case where the last code segment is missing, due to no code + // statements between the last case and the end of the switch statement + if ((caseValues.size() && (int)codeSegments.size() == valueIndexToSegment[caseValues.size() - 1]) || + (int)codeSegments.size() == defaultSegment) + codeSegments.push_back(nullptr); + + // make the switch statement + std::vector segmentBlocks; // returned, as the blocks allocated in the call + builder.makeSwitch(selector, (int)codeSegments.size(), caseValues, valueIndexToSegment, defaultSegment, segmentBlocks); + + // emit all the code in the segments + breakForLoop.push(false); + for (unsigned int s = 0; s < codeSegments.size(); ++s) { + builder.nextSwitchSegment(segmentBlocks, s); + if (codeSegments[s]) + codeSegments[s]->traverse(this); + else + builder.addSwitchBreak(); + } + breakForLoop.pop(); + + builder.endSwitch(segmentBlocks); + + return false; +} + +void TGlslangToSpvTraverser::visitConstantUnion(glslang::TIntermConstantUnion* node) +{ + int nextConst = 0; + spv::Id constant = createSpvConstant(node->getType(), node->getConstArray(), nextConst, false); + + builder.clearAccessChain(); + builder.setAccessChainRValue(constant); +} + +bool TGlslangToSpvTraverser::visitLoop(glslang::TVisit /* visit */, glslang::TIntermLoop* node) +{ + auto blocks = builder.makeNewLoop(); + builder.createBranch(&blocks.head); + // Spec requires back edges to target header blocks, and every header block + // must dominate its merge block. Make a header block first to ensure these + // conditions are met. By definition, it will contain OpLoopMerge, followed + // by a block-ending branch. But we don't want to put any other body/test + // instructions in it, since the body/test may have arbitrary instructions, + // including merges of its own. + builder.setBuildPoint(&blocks.head); + builder.createLoopMerge(&blocks.merge, &blocks.continue_target, spv::LoopControlMaskNone); + if (node->testFirst() && node->getTest()) { + spv::Block& test = builder.makeNewBlock(); + builder.createBranch(&test); + + builder.setBuildPoint(&test); + node->getTest()->traverse(this); + spv::Id condition = + accessChainLoad(node->getTest()->getType()); + builder.createConditionalBranch(condition, &blocks.body, &blocks.merge); + + builder.setBuildPoint(&blocks.body); + breakForLoop.push(true); + if (node->getBody()) + node->getBody()->traverse(this); + builder.createBranch(&blocks.continue_target); + breakForLoop.pop(); + + builder.setBuildPoint(&blocks.continue_target); + if (node->getTerminal()) + node->getTerminal()->traverse(this); + builder.createBranch(&blocks.head); + } else { + builder.createBranch(&blocks.body); + + breakForLoop.push(true); + builder.setBuildPoint(&blocks.body); + if (node->getBody()) + node->getBody()->traverse(this); + builder.createBranch(&blocks.continue_target); + breakForLoop.pop(); + + builder.setBuildPoint(&blocks.continue_target); + if (node->getTerminal()) + node->getTerminal()->traverse(this); + if (node->getTest()) { + node->getTest()->traverse(this); + spv::Id condition = + accessChainLoad(node->getTest()->getType()); + builder.createConditionalBranch(condition, &blocks.head, &blocks.merge); + } else { + // TODO: unless there was a break/return/discard instruction + // somewhere in the body, this is an infinite loop, so we should + // issue a warning. + builder.createBranch(&blocks.head); + } + } + builder.setBuildPoint(&blocks.merge); + builder.closeLoop(); + return false; +} + +bool TGlslangToSpvTraverser::visitBranch(glslang::TVisit /* visit */, glslang::TIntermBranch* node) +{ + if (node->getExpression()) + node->getExpression()->traverse(this); + + switch (node->getFlowOp()) { + case glslang::EOpKill: + builder.makeDiscard(); + break; + case glslang::EOpBreak: + if (breakForLoop.top()) + builder.createLoopExit(); + else + builder.addSwitchBreak(); + break; + case glslang::EOpContinue: + builder.createLoopContinue(); + break; + case glslang::EOpReturn: + if (node->getExpression()) + builder.makeReturn(false, accessChainLoad(node->getExpression()->getType())); + else + builder.makeReturn(false); + + builder.clearAccessChain(); + break; + + default: + assert(0); + break; + } + + return false; +} + +spv::Id TGlslangToSpvTraverser::createSpvVariable(const glslang::TIntermSymbol* node) +{ + // First, steer off constants, which are not SPIR-V variables, but + // can still have a mapping to a SPIR-V Id. + // This includes specialization constants. + if (node->getQualifier().storage == glslang::EvqConst) { + return createSpvSpecConstant(*node); + } + + // Now, handle actual variables + spv::StorageClass storageClass = TranslateStorageClass(node->getType()); + spv::Id spvType = convertGlslangToSpvType(node->getType()); + + const char* name = node->getName().c_str(); + if (glslang::IsAnonymous(name)) + name = ""; + + return builder.createVariable(storageClass, spvType, name); +} + +// Return type Id of the sampled type. +spv::Id TGlslangToSpvTraverser::getSampledType(const glslang::TSampler& sampler) +{ + switch (sampler.type) { + case glslang::EbtFloat: return builder.makeFloatType(32); + case glslang::EbtInt: return builder.makeIntType(32); + case glslang::EbtUint: return builder.makeUintType(32); + default: + assert(0); + return builder.makeFloatType(32); + } +} + +// Convert from a glslang type to an SPV type, by calling into a +// recursive version of this function. This establishes the inherited +// layout state rooted from the top-level type. +spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& type) +{ + return convertGlslangToSpvType(type, getExplicitLayout(type), type.getQualifier()); +} + +// Do full recursive conversion of an arbitrary glslang type to a SPIR-V Id. +// explicitLayout can be kept the same throughout the hierarchical recursive walk. +spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& type, glslang::TLayoutPacking explicitLayout, const glslang::TQualifier& qualifier) +{ + spv::Id spvType = spv::NoResult; + + switch (type.getBasicType()) { + case glslang::EbtVoid: + spvType = builder.makeVoidType(); + assert (! type.isArray()); + break; + case glslang::EbtFloat: + spvType = builder.makeFloatType(32); + break; + case glslang::EbtDouble: + spvType = builder.makeFloatType(64); + break; + case glslang::EbtBool: + // "transparent" bool doesn't exist in SPIR-V. The GLSL convention is + // a 32-bit int where non-0 means true. + if (explicitLayout != glslang::ElpNone) + spvType = builder.makeUintType(32); + else + spvType = builder.makeBoolType(); + break; + case glslang::EbtInt: + spvType = builder.makeIntType(32); + break; + case glslang::EbtUint: + spvType = builder.makeUintType(32); + break; + case glslang::EbtAtomicUint: + spv::TbdFunctionality("Is atomic_uint an opaque handle in the uniform storage class, or an addresses in the atomic storage class?"); + spvType = builder.makeUintType(32); + break; + case glslang::EbtSampler: + { + const glslang::TSampler& sampler = type.getSampler(); + if (sampler.sampler) { + // pure sampler + spvType = builder.makeSamplerType(); + } else { + // an image is present, make its type + spvType = builder.makeImageType(getSampledType(sampler), TranslateDimensionality(sampler), sampler.shadow, sampler.arrayed, sampler.ms, + sampler.image ? 2 : 1, TranslateImageFormat(type)); + if (sampler.combined) { + // already has both image and sampler, make the combined type + spvType = builder.makeSampledImageType(spvType); + } + } + } + break; + case glslang::EbtStruct: + case glslang::EbtBlock: + { + // If we've seen this struct type, return it + const glslang::TTypeList* glslangStruct = type.getStruct(); + std::vector structFields; + + // Try to share structs for different layouts, but not yet for other + // kinds of qualification (primarily not yet including interpolant qualification). + if (! HasNonLayoutQualifiers(qualifier)) + spvType = structMap[explicitLayout][qualifier.layoutMatrix][glslangStruct]; + if (spvType != spv::NoResult) + break; + + // else, we haven't seen it... + + // Create a vector of struct types for SPIR-V to consume + int memberDelta = 0; // how much the member's index changes from glslang to SPIR-V, normally 0, except sometimes for blocks + if (type.getBasicType() == glslang::EbtBlock) + memberRemapper[glslangStruct].resize(glslangStruct->size()); + int locationOffset = 0; // for use across struct members, when they are called recursively + for (int i = 0; i < (int)glslangStruct->size(); i++) { + glslang::TType& glslangType = *(*glslangStruct)[i].type; + if (glslangType.hiddenMember()) { + ++memberDelta; + if (type.getBasicType() == glslang::EbtBlock) + memberRemapper[glslangStruct][i] = -1; + } else { + if (type.getBasicType() == glslang::EbtBlock) + memberRemapper[glslangStruct][i] = i - memberDelta; + // modify just this child's view of the qualifier + glslang::TQualifier subQualifier = glslangType.getQualifier(); + InheritQualifiers(subQualifier, qualifier); + if (qualifier.hasLocation()) { + subQualifier.layoutLocation += locationOffset; + locationOffset += glslangIntermediate->computeTypeLocationSize(glslangType); + } + structFields.push_back(convertGlslangToSpvType(glslangType, explicitLayout, subQualifier)); + } + } + + // Make the SPIR-V type + spvType = builder.makeStructType(structFields, type.getTypeName().c_str()); + if (! HasNonLayoutQualifiers(qualifier)) + structMap[explicitLayout][qualifier.layoutMatrix][glslangStruct] = spvType; + + // Name and decorate the non-hidden members + int offset = -1; + locationOffset = 0; // for use within the members of this struct, right now + for (int i = 0; i < (int)glslangStruct->size(); i++) { + glslang::TType& glslangType = *(*glslangStruct)[i].type; + int member = i; + if (type.getBasicType() == glslang::EbtBlock) + member = memberRemapper[glslangStruct][i]; + + // modify just this child's view of the qualifier + glslang::TQualifier subQualifier = glslangType.getQualifier(); + InheritQualifiers(subQualifier, qualifier); + + // using -1 above to indicate a hidden member + if (member >= 0) { + builder.addMemberName(spvType, member, glslangType.getFieldName().c_str()); + addMemberDecoration(spvType, member, TranslateLayoutDecoration(glslangType, subQualifier.layoutMatrix)); + addMemberDecoration(spvType, member, TranslatePrecisionDecoration(glslangType)); + addMemberDecoration(spvType, member, TranslateInterpolationDecoration(subQualifier)); + addMemberDecoration(spvType, member, TranslateInvariantDecoration(subQualifier)); + if (qualifier.hasLocation()) { + builder.addMemberDecoration(spvType, member, spv::DecorationLocation, qualifier.layoutLocation + locationOffset); + locationOffset += glslangIntermediate->computeTypeLocationSize(glslangType); + } + if (glslangType.getQualifier().hasComponent()) + builder.addMemberDecoration(spvType, member, spv::DecorationComponent, glslangType.getQualifier().layoutComponent); + if (glslangType.getQualifier().hasXfbOffset()) + builder.addMemberDecoration(spvType, member, spv::DecorationOffset, glslangType.getQualifier().layoutXfbOffset); + else if (explicitLayout != glslang::ElpNone) { + // figure out what to do with offset, which is accumulating + int nextOffset; + updateMemberOffset(type, glslangType, offset, nextOffset, explicitLayout, subQualifier.layoutMatrix); + if (offset >= 0) + builder.addMemberDecoration(spvType, member, spv::DecorationOffset, offset); + offset = nextOffset; + } + + if (glslangType.isMatrix() && explicitLayout != glslang::ElpNone) + builder.addMemberDecoration(spvType, member, spv::DecorationMatrixStride, getMatrixStride(glslangType, explicitLayout, subQualifier.layoutMatrix)); + + // built-in variable decorations + spv::BuiltIn builtIn = TranslateBuiltInDecoration(glslangType.getQualifier().builtIn); + if (builtIn != spv::BadValue) + addMemberDecoration(spvType, member, spv::DecorationBuiltIn, (int)builtIn); + } + } + + // Decorate the structure + addDecoration(spvType, TranslateLayoutDecoration(type, qualifier.layoutMatrix)); + addDecoration(spvType, TranslateBlockDecoration(type)); + if (type.getQualifier().hasStream()) { + builder.addCapability(spv::CapabilityGeometryStreams); + builder.addDecoration(spvType, spv::DecorationStream, type.getQualifier().layoutStream); + } + if (glslangIntermediate->getXfbMode()) { + builder.addCapability(spv::CapabilityTransformFeedback); + if (type.getQualifier().hasXfbStride()) + builder.addDecoration(spvType, spv::DecorationXfbStride, type.getQualifier().layoutXfbStride); + if (type.getQualifier().hasXfbBuffer()) + builder.addDecoration(spvType, spv::DecorationXfbBuffer, type.getQualifier().layoutXfbBuffer); + } + } + break; + default: + assert(0); + break; + } + + if (type.isMatrix()) + spvType = builder.makeMatrixType(spvType, type.getMatrixCols(), type.getMatrixRows()); + else { + // If this variable has a vector element count greater than 1, create a SPIR-V vector + if (type.getVectorSize() > 1) + spvType = builder.makeVectorType(spvType, type.getVectorSize()); + } + + if (type.isArray()) { + int stride = 0; // keep this 0 unless doing an explicit layout; 0 will mean no decoration, no stride + + // Do all but the outer dimension + if (type.getArraySizes()->getNumDims() > 1) { + // We need to decorate array strides for types needing explicit layout, except blocks. + if (explicitLayout != glslang::ElpNone && type.getBasicType() != glslang::EbtBlock) { + // Use a dummy glslang type for querying internal strides of + // arrays of arrays, but using just a one-dimensional array. + glslang::TType simpleArrayType(type, 0); // deference type of the array + while (simpleArrayType.getArraySizes().getNumDims() > 1) + simpleArrayType.getArraySizes().dereference(); + + // Will compute the higher-order strides here, rather than making a whole + // pile of types and doing repetitive recursion on their contents. + stride = getArrayStride(simpleArrayType, explicitLayout, qualifier.layoutMatrix); + } + + // make the arrays + for (int dim = type.getArraySizes()->getNumDims() - 1; dim > 0; --dim) { + spvType = builder.makeArrayType(spvType, makeArraySizeId(*type.getArraySizes(), dim), stride); + if (stride > 0) + builder.addDecoration(spvType, spv::DecorationArrayStride, stride); + stride *= type.getArraySizes()->getDimSize(dim); + } + } else { + // single-dimensional array, and don't yet have stride + + // We need to decorate array strides for types needing explicit layout, except blocks. + if (explicitLayout != glslang::ElpNone && type.getBasicType() != glslang::EbtBlock) + stride = getArrayStride(type, explicitLayout, qualifier.layoutMatrix); + } + + // Do the outer dimension, which might not be known for a runtime-sized array + if (type.isRuntimeSizedArray()) { + spvType = builder.makeRuntimeArray(spvType); + } else { + assert(type.getOuterArraySize() > 0); + spvType = builder.makeArrayType(spvType, makeArraySizeId(*type.getArraySizes(), 0), stride); + } + if (stride > 0) + builder.addDecoration(spvType, spv::DecorationArrayStride, stride); + } + + return spvType; +} + +// Turn the expression forming the array size into an id. +// This is not quite trivial, because of specialization constants. +// Sometimes, a raw constant is turned into an Id, and sometimes +// a specialization constant expression is. +spv::Id TGlslangToSpvTraverser::makeArraySizeId(const glslang::TArraySizes& arraySizes, int dim) +{ + // First, see if this is sized with a node, meaning a specialization constant: + glslang::TIntermTyped* specNode = arraySizes.getDimNode(dim); + if (specNode != nullptr) { + builder.clearAccessChain(); + specNode->traverse(this); + return accessChainLoad(specNode->getAsTyped()->getType()); + } + + // Otherwise, need a compile-time (front end) size, get it: + int size = arraySizes.getDimSize(dim); + assert(size > 0); + return builder.makeUintConstant(size); +} + +// Wrap the builder's accessChainLoad to: +// - localize handling of RelaxedPrecision +// - use the SPIR-V inferred type instead of another conversion of the glslang type +// (avoids unnecessary work and possible type punning for structures) +// - do conversion of concrete to abstract type +spv::Id TGlslangToSpvTraverser::accessChainLoad(const glslang::TType& type) +{ + spv::Id nominalTypeId = builder.accessChainGetInferredType(); + spv::Id loadedId = builder.accessChainLoad(TranslatePrecisionDecoration(type), nominalTypeId); + + // Need to convert to abstract types when necessary + if (builder.isScalarType(nominalTypeId) && type.getBasicType() == glslang::EbtBool && nominalTypeId != builder.makeBoolType()) + loadedId = builder.createBinOp(spv::OpINotEqual, builder.makeBoolType(), loadedId, builder.makeUintConstant(0)); + + return loadedId; +} + +// Decide whether or not this type should be +// decorated with offsets and strides, and if so +// whether std140 or std430 rules should be applied. +glslang::TLayoutPacking TGlslangToSpvTraverser::getExplicitLayout(const glslang::TType& type) const +{ + // has to be a block + if (type.getBasicType() != glslang::EbtBlock) + return glslang::ElpNone; + + // has to be a uniform or buffer block + if (type.getQualifier().storage != glslang::EvqUniform && + type.getQualifier().storage != glslang::EvqBuffer) + return glslang::ElpNone; + + // return the layout to use + switch (type.getQualifier().layoutPacking) { + case glslang::ElpStd140: + case glslang::ElpStd430: + return type.getQualifier().layoutPacking; + default: + return glslang::ElpNone; + } +} + +// Given an array type, returns the integer stride required for that array +int TGlslangToSpvTraverser::getArrayStride(const glslang::TType& arrayType, glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) +{ + int size; + int stride; + glslangIntermediate->getBaseAlignment(arrayType, size, stride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); + + return stride; +} + +// Given a matrix type, or array (of array) of matrixes type, returns the integer stride required for that matrix +// when used as a member of an interface block +int TGlslangToSpvTraverser::getMatrixStride(const glslang::TType& matrixType, glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) +{ + glslang::TType elementType; + elementType.shallowCopy(matrixType); + elementType.clearArraySizes(); + + int size; + int stride; + glslangIntermediate->getBaseAlignment(elementType, size, stride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); + + return stride; +} + +// Given a member type of a struct, realign the current offset for it, and compute +// the next (not yet aligned) offset for the next member, which will get aligned +// on the next call. +// 'currentOffset' should be passed in already initialized, ready to modify, and reflecting +// the migration of data from nextOffset -> currentOffset. It should be -1 on the first call. +// -1 means a non-forced member offset (no decoration needed). +void TGlslangToSpvTraverser::updateMemberOffset(const glslang::TType& /*structType*/, const glslang::TType& memberType, int& currentOffset, int& nextOffset, + glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) +{ + // this will get a positive value when deemed necessary + nextOffset = -1; + + // override anything in currentOffset with user-set offset + if (memberType.getQualifier().hasOffset()) + currentOffset = memberType.getQualifier().layoutOffset; + + // It could be that current linker usage in glslang updated all the layoutOffset, + // in which case the following code does not matter. But, that's not quite right + // once cross-compilation unit GLSL validation is done, as the original user + // settings are needed in layoutOffset, and then the following will come into play. + + if (explicitLayout == glslang::ElpNone) { + if (! memberType.getQualifier().hasOffset()) + currentOffset = -1; + + return; + } + + // Getting this far means we need explicit offsets + if (currentOffset < 0) + currentOffset = 0; + + // Now, currentOffset is valid (either 0, or from a previous nextOffset), + // but possibly not yet correctly aligned. + + int memberSize; + int dummyStride; + int memberAlignment = glslangIntermediate->getBaseAlignment(memberType, memberSize, dummyStride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); + glslang::RoundToPow2(currentOffset, memberAlignment); + nextOffset = currentOffset + memberSize; +} + +bool TGlslangToSpvTraverser::isShaderEntrypoint(const glslang::TIntermAggregate* node) +{ + return node->getName() == "main("; +} + +// Make all the functions, skeletally, without actually visiting their bodies. +void TGlslangToSpvTraverser::makeFunctions(const glslang::TIntermSequence& glslFunctions) +{ + for (int f = 0; f < (int)glslFunctions.size(); ++f) { + glslang::TIntermAggregate* glslFunction = glslFunctions[f]->getAsAggregate(); + if (! glslFunction || glslFunction->getOp() != glslang::EOpFunction || isShaderEntrypoint(glslFunction)) + continue; + + // We're on a user function. Set up the basic interface for the function now, + // so that it's available to call. + // Translating the body will happen later. + // + // Typically (except for a "const in" parameter), an address will be passed to the + // function. What it is an address of varies: + // + // - "in" parameters not marked as "const" can be written to without modifying the argument, + // so that write needs to be to a copy, hence the address of a copy works. + // + // - "const in" parameters can just be the r-value, as no writes need occur. + // + // - "out" and "inout" arguments can't be done as direct pointers, because GLSL has + // copy-in/copy-out semantics. They can be handled though with a pointer to a copy. + + std::vector paramTypes; + std::vector paramPrecisions; + glslang::TIntermSequence& parameters = glslFunction->getSequence()[0]->getAsAggregate()->getSequence(); + + for (int p = 0; p < (int)parameters.size(); ++p) { + const glslang::TType& paramType = parameters[p]->getAsTyped()->getType(); + spv::Id typeId = convertGlslangToSpvType(paramType); + if (paramType.getQualifier().storage != glslang::EvqConstReadOnly) + typeId = builder.makePointer(spv::StorageClassFunction, typeId); + else + constReadOnlyParameters.insert(parameters[p]->getAsSymbolNode()->getId()); + paramPrecisions.push_back(TranslatePrecisionDecoration(paramType)); + paramTypes.push_back(typeId); + } + + spv::Block* functionBlock; + spv::Function *function = builder.makeFunctionEntry(TranslatePrecisionDecoration(glslFunction->getType()), + convertGlslangToSpvType(glslFunction->getType()), + glslFunction->getName().c_str(), paramTypes, paramPrecisions, &functionBlock); + + // Track function to emit/call later + functionMap[glslFunction->getName().c_str()] = function; + + // Set the parameter id's + for (int p = 0; p < (int)parameters.size(); ++p) { + symbolValues[parameters[p]->getAsSymbolNode()->getId()] = function->getParamId(p); + // give a name too + builder.addName(function->getParamId(p), parameters[p]->getAsSymbolNode()->getName().c_str()); + } + } +} + +// Process all the initializers, while skipping the functions and link objects +void TGlslangToSpvTraverser::makeGlobalInitializers(const glslang::TIntermSequence& initializers) +{ + builder.setBuildPoint(shaderEntry->getLastBlock()); + for (int i = 0; i < (int)initializers.size(); ++i) { + glslang::TIntermAggregate* initializer = initializers[i]->getAsAggregate(); + if (initializer && initializer->getOp() != glslang::EOpFunction && initializer->getOp() != glslang::EOpLinkerObjects) { + + // We're on a top-level node that's not a function. Treat as an initializer, whose + // code goes into the beginning of main. + initializer->traverse(this); + } + } +} + +// Process all the functions, while skipping initializers. +void TGlslangToSpvTraverser::visitFunctions(const glslang::TIntermSequence& glslFunctions) +{ + for (int f = 0; f < (int)glslFunctions.size(); ++f) { + glslang::TIntermAggregate* node = glslFunctions[f]->getAsAggregate(); + if (node && (node->getOp() == glslang::EOpFunction || node->getOp() == glslang ::EOpLinkerObjects)) + node->traverse(this); + } +} + +void TGlslangToSpvTraverser::handleFunctionEntry(const glslang::TIntermAggregate* node) +{ + // SPIR-V functions should already be in the functionMap from the prepass + // that called makeFunctions(). + spv::Function* function = functionMap[node->getName().c_str()]; + spv::Block* functionBlock = function->getEntryBlock(); + builder.setBuildPoint(functionBlock); +} + +void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate& node, std::vector& arguments) +{ + const glslang::TIntermSequence& glslangArguments = node.getSequence(); + + glslang::TSampler sampler = {}; + bool cubeCompare = false; + if (node.isTexture()) { + sampler = glslangArguments[0]->getAsTyped()->getType().getSampler(); + cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow; + } + + for (int i = 0; i < (int)glslangArguments.size(); ++i) { + builder.clearAccessChain(); + glslangArguments[i]->traverse(this); + + // Special case l-value operands + bool lvalue = false; + switch (node.getOp()) { + case glslang::EOpImageAtomicAdd: + case glslang::EOpImageAtomicMin: + case glslang::EOpImageAtomicMax: + case glslang::EOpImageAtomicAnd: + case glslang::EOpImageAtomicOr: + case glslang::EOpImageAtomicXor: + case glslang::EOpImageAtomicExchange: + case glslang::EOpImageAtomicCompSwap: + if (i == 0) + lvalue = true; + break; + case glslang::EOpSparseTexture: + if ((cubeCompare && i == 3) || (! cubeCompare && i == 2)) + lvalue = true; + break; + case glslang::EOpSparseTextureClamp: + if ((cubeCompare && i == 4) || (! cubeCompare && i == 3)) + lvalue = true; + break; + case glslang::EOpSparseTextureLod: + case glslang::EOpSparseTextureOffset: + if (i == 3) + lvalue = true; + break; + case glslang::EOpSparseTextureFetch: + if ((sampler.dim != glslang::EsdRect && i == 3) || (sampler.dim == glslang::EsdRect && i == 2)) + lvalue = true; + break; + case glslang::EOpSparseTextureFetchOffset: + if ((sampler.dim != glslang::EsdRect && i == 4) || (sampler.dim == glslang::EsdRect && i == 3)) + lvalue = true; + break; + case glslang::EOpSparseTextureLodOffset: + case glslang::EOpSparseTextureGrad: + case glslang::EOpSparseTextureOffsetClamp: + if (i == 4) + lvalue = true; + break; + case glslang::EOpSparseTextureGradOffset: + case glslang::EOpSparseTextureGradClamp: + if (i == 5) + lvalue = true; + break; + case glslang::EOpSparseTextureGradOffsetClamp: + if (i == 6) + lvalue = true; + break; + case glslang::EOpSparseTextureGather: + if ((sampler.shadow && i == 3) || (! sampler.shadow && i == 2)) + lvalue = true; + break; + case glslang::EOpSparseTextureGatherOffset: + case glslang::EOpSparseTextureGatherOffsets: + if ((sampler.shadow && i == 4) || (! sampler.shadow && i == 3)) + lvalue = true; + break; + default: + break; + } + + if (lvalue) + arguments.push_back(builder.accessChainGetLValue()); + else + arguments.push_back(accessChainLoad(glslangArguments[i]->getAsTyped()->getType())); + } +} + +void TGlslangToSpvTraverser::translateArguments(glslang::TIntermUnary& node, std::vector& arguments) +{ + builder.clearAccessChain(); + node.getOperand()->traverse(this); + arguments.push_back(accessChainLoad(node.getOperand()->getType())); +} + +spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermOperator* node) +{ + if (! node->isImage() && ! node->isTexture()) { + return spv::NoResult; + } + + // Process a GLSL texturing op (will be SPV image) + const glslang::TSampler sampler = node->getAsAggregate() ? node->getAsAggregate()->getSequence()[0]->getAsTyped()->getType().getSampler() + : node->getAsUnaryNode()->getOperand()->getAsTyped()->getType().getSampler(); + std::vector arguments; + if (node->getAsAggregate()) + translateArguments(*node->getAsAggregate(), arguments); + else + translateArguments(*node->getAsUnaryNode(), arguments); + spv::Decoration precision = TranslatePrecisionDecoration(node->getType()); + + spv::Builder::TextureParameters params = { }; + params.sampler = arguments[0]; + + glslang::TCrackedTextureOp cracked; + node->crackTexture(sampler, cracked); + + // Check for queries + if (cracked.query) { + // a sampled image needs to have the image extracted first + if (builder.isSampledImage(params.sampler)) + params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); + switch (node->getOp()) { + case glslang::EOpImageQuerySize: + case glslang::EOpTextureQuerySize: + if (arguments.size() > 1) { + params.lod = arguments[1]; + return builder.createTextureQueryCall(spv::OpImageQuerySizeLod, params); + } else + return builder.createTextureQueryCall(spv::OpImageQuerySize, params); + case glslang::EOpImageQuerySamples: + case glslang::EOpTextureQuerySamples: + return builder.createTextureQueryCall(spv::OpImageQuerySamples, params); + case glslang::EOpTextureQueryLod: + params.coords = arguments[1]; + return builder.createTextureQueryCall(spv::OpImageQueryLod, params); + case glslang::EOpTextureQueryLevels: + return builder.createTextureQueryCall(spv::OpImageQueryLevels, params); + case glslang::EOpSparseTexelsResident: + return builder.createUnaryOp(spv::OpImageSparseTexelsResident, builder.makeBoolType(), arguments[0]); + default: + assert(0); + break; + } + } + + // Check for image functions other than queries + if (node->isImage()) { + std::vector operands; + auto opIt = arguments.begin(); + operands.push_back(*(opIt++)); + + // Handle subpass operations + // TODO: GLSL should change to have the "MS" only on the type rather than the + // built-in function. + if (cracked.subpass) { + // add on the (0,0) coordinate + spv::Id zero = builder.makeIntConstant(0); + std::vector comps; + comps.push_back(zero); + comps.push_back(zero); + operands.push_back(builder.makeCompositeConstant(builder.makeVectorType(builder.makeIntType(32), 2), comps)); + if (sampler.ms) { + operands.push_back(spv::ImageOperandsSampleMask); + operands.push_back(*(opIt++)); + } + return builder.createOp(spv::OpImageRead, convertGlslangToSpvType(node->getType()), operands); + } + + operands.push_back(*(opIt++)); + if (node->getOp() == glslang::EOpImageLoad) { + if (sampler.ms) { + operands.push_back(spv::ImageOperandsSampleMask); + operands.push_back(*opIt); + } + return builder.createOp(spv::OpImageRead, convertGlslangToSpvType(node->getType()), operands); + if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) + builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat); + } else if (node->getOp() == glslang::EOpImageStore) { + if (sampler.ms) { + operands.push_back(*(opIt + 1)); + operands.push_back(spv::ImageOperandsSampleMask); + operands.push_back(*opIt); + } else + operands.push_back(*opIt); + builder.createNoResultOp(spv::OpImageWrite, operands); + if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) + builder.addCapability(spv::CapabilityStorageImageWriteWithoutFormat); + return spv::NoResult; + } else if (node->isSparseImage()) { + spv::MissingFunctionality("sparse image functions"); + return spv::NoResult; + } else { + // Process image atomic operations + + // GLSL "IMAGE_PARAMS" will involve in constructing an image texel pointer and this pointer, + // as the first source operand, is required by SPIR-V atomic operations. + operands.push_back(sampler.ms ? *(opIt++) : builder.makeUintConstant(0)); // For non-MS, the value should be 0 + + spv::Id resultTypeId = builder.makePointer(spv::StorageClassImage, convertGlslangToSpvType(node->getType())); + spv::Id pointer = builder.createOp(spv::OpImageTexelPointer, resultTypeId, operands); + + std::vector operands; + operands.push_back(pointer); + for (; opIt != arguments.end(); ++opIt) + operands.push_back(*opIt); + + return createAtomicOperation(node->getOp(), precision, convertGlslangToSpvType(node->getType()), operands, node->getBasicType()); + } + } + + // Check for texture functions other than queries + bool sparse = node->isSparseTexture(); + bool cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow; + + // check for bias argument + bool bias = false; + if (! cracked.lod && ! cracked.gather && ! cracked.grad && ! cracked.fetch && ! cubeCompare) { + int nonBiasArgCount = 2; + if (cracked.offset) + ++nonBiasArgCount; + if (cracked.grad) + nonBiasArgCount += 2; + if (cracked.lodClamp) + ++nonBiasArgCount; + if (sparse) + ++nonBiasArgCount; + + if ((int)arguments.size() > nonBiasArgCount) + bias = true; + } + + // set the rest of the arguments + + params.coords = arguments[1]; + int extraArgs = 0; + bool noImplicitLod = false; + + // sort out where Dref is coming from + if (cubeCompare) { + params.Dref = arguments[2]; + ++extraArgs; + } else if (sampler.shadow && cracked.gather) { + params.Dref = arguments[2]; + ++extraArgs; + } else if (sampler.shadow) { + std::vector indexes; + int comp; + if (cracked.proj) + comp = 2; // "The resulting 3rd component of P in the shadow forms is used as Dref" + else + comp = builder.getNumComponents(params.coords) - 1; + indexes.push_back(comp); + params.Dref = builder.createCompositeExtract(params.coords, builder.getScalarTypeId(builder.getTypeId(params.coords)), indexes); + } + if (cracked.lod) { + params.lod = arguments[2]; + ++extraArgs; + } else if (glslangIntermediate->getStage() != EShLangFragment) { + // we need to invent the default lod for an explicit lod instruction for a non-fragment stage + noImplicitLod = true; + } + if (sampler.ms) { + params.sample = arguments[2]; // For MS, "sample" should be specified + ++extraArgs; + } + if (cracked.grad) { + params.gradX = arguments[2 + extraArgs]; + params.gradY = arguments[3 + extraArgs]; + extraArgs += 2; + } + if (cracked.offset) { + params.offset = arguments[2 + extraArgs]; + ++extraArgs; + } else if (cracked.offsets) { + params.offsets = arguments[2 + extraArgs]; + ++extraArgs; + } + if (cracked.lodClamp) { + params.lodClamp = arguments[2 + extraArgs]; + ++extraArgs; + } + if (sparse) { + params.texelOut = arguments[2 + extraArgs]; + ++extraArgs; + } + if (bias) { + params.bias = arguments[2 + extraArgs]; + ++extraArgs; + } + if (cracked.gather && ! sampler.shadow) { + // default component is 0, if missing, otherwise an argument + if (2 + extraArgs < (int)arguments.size()) { + params.comp = arguments[2 + extraArgs]; + ++extraArgs; + } else { + params.comp = builder.makeIntConstant(0); + } + } + + return builder.createTextureCall(precision, convertGlslangToSpvType(node->getType()), sparse, cracked.fetch, cracked.proj, cracked.gather, noImplicitLod, params); +} + +spv::Id TGlslangToSpvTraverser::handleUserFunctionCall(const glslang::TIntermAggregate* node) +{ + // Grab the function's pointer from the previously created function + spv::Function* function = functionMap[node->getName().c_str()]; + if (! function) + return 0; + + const glslang::TIntermSequence& glslangArgs = node->getSequence(); + const glslang::TQualifierList& qualifiers = node->getQualifierList(); + + // See comments in makeFunctions() for details about the semantics for parameter passing. + // + // These imply we need a four step process: + // 1. Evaluate the arguments + // 2. Allocate and make copies of in, out, and inout arguments + // 3. Make the call + // 4. Copy back the results + + // 1. Evaluate the arguments + std::vector lValues; + std::vector rValues; + std::vector argTypes; + for (int a = 0; a < (int)glslangArgs.size(); ++a) { + // build l-value + builder.clearAccessChain(); + glslangArgs[a]->traverse(this); + argTypes.push_back(&glslangArgs[a]->getAsTyped()->getType()); + // keep outputs as l-values, evaluate input-only as r-values + if (qualifiers[a] != glslang::EvqConstReadOnly) { + // save l-value + lValues.push_back(builder.getAccessChain()); + } else { + // process r-value + rValues.push_back(accessChainLoad(*argTypes.back())); + } + } + + // 2. Allocate space for anything needing a copy, and if it's "in" or "inout" + // copy the original into that space. + // + // Also, build up the list of actual arguments to pass in for the call + int lValueCount = 0; + int rValueCount = 0; + std::vector spvArgs; + for (int a = 0; a < (int)glslangArgs.size(); ++a) { + spv::Id arg; + if (qualifiers[a] != glslang::EvqConstReadOnly) { + // need space to hold the copy + const glslang::TType& paramType = glslangArgs[a]->getAsTyped()->getType(); + arg = builder.createVariable(spv::StorageClassFunction, convertGlslangToSpvType(paramType), "param"); + if (qualifiers[a] == glslang::EvqIn || qualifiers[a] == glslang::EvqInOut) { + // need to copy the input into output space + builder.setAccessChain(lValues[lValueCount]); + spv::Id copy = accessChainLoad(*argTypes[a]); + builder.createStore(copy, arg); + } + ++lValueCount; + } else { + arg = rValues[rValueCount]; + ++rValueCount; + } + spvArgs.push_back(arg); + } + + // 3. Make the call. + spv::Id result = builder.createFunctionCall(function, spvArgs); + builder.setPrecision(result, TranslatePrecisionDecoration(node->getType())); + + // 4. Copy back out an "out" arguments. + lValueCount = 0; + for (int a = 0; a < (int)glslangArgs.size(); ++a) { + if (qualifiers[a] != glslang::EvqConstReadOnly) { + if (qualifiers[a] == glslang::EvqOut || qualifiers[a] == glslang::EvqInOut) { + spv::Id copy = builder.createLoad(spvArgs[a]); + builder.setAccessChain(lValues[lValueCount]); + builder.accessChainStore(copy); + } + ++lValueCount; + } + } + + return result; +} + +// Translate AST operation to SPV operation, already having SPV-based operands/types. +spv::Id TGlslangToSpvTraverser::createBinaryOperation(glslang::TOperator op, spv::Decoration precision, + spv::Id typeId, spv::Id left, spv::Id right, + glslang::TBasicType typeProxy, bool reduceComparison) +{ + bool isUnsigned = typeProxy == glslang::EbtUint; + bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; + + spv::Op binOp = spv::OpNop; + bool needMatchingVectors = true; // for non-matrix ops, would a scalar need to smear to match a vector? + bool comparison = false; + + switch (op) { + case glslang::EOpAdd: + case glslang::EOpAddAssign: + if (isFloat) + binOp = spv::OpFAdd; + else + binOp = spv::OpIAdd; + break; + case glslang::EOpSub: + case glslang::EOpSubAssign: + if (isFloat) + binOp = spv::OpFSub; + else + binOp = spv::OpISub; + break; + case glslang::EOpMul: + case glslang::EOpMulAssign: + if (isFloat) + binOp = spv::OpFMul; + else + binOp = spv::OpIMul; + break; + case glslang::EOpVectorTimesScalar: + case glslang::EOpVectorTimesScalarAssign: + if (isFloat) { + if (builder.isVector(right)) + std::swap(left, right); + assert(builder.isScalar(right)); + needMatchingVectors = false; + binOp = spv::OpVectorTimesScalar; + } else + binOp = spv::OpIMul; + break; + case glslang::EOpVectorTimesMatrix: + case glslang::EOpVectorTimesMatrixAssign: + binOp = spv::OpVectorTimesMatrix; + break; + case glslang::EOpMatrixTimesVector: + binOp = spv::OpMatrixTimesVector; + break; + case glslang::EOpMatrixTimesScalar: + case glslang::EOpMatrixTimesScalarAssign: + binOp = spv::OpMatrixTimesScalar; + break; + case glslang::EOpMatrixTimesMatrix: + case glslang::EOpMatrixTimesMatrixAssign: + binOp = spv::OpMatrixTimesMatrix; + break; + case glslang::EOpOuterProduct: + binOp = spv::OpOuterProduct; + needMatchingVectors = false; + break; + + case glslang::EOpDiv: + case glslang::EOpDivAssign: + if (isFloat) + binOp = spv::OpFDiv; + else if (isUnsigned) + binOp = spv::OpUDiv; + else + binOp = spv::OpSDiv; + break; + case glslang::EOpMod: + case glslang::EOpModAssign: + if (isFloat) + binOp = spv::OpFMod; + else if (isUnsigned) + binOp = spv::OpUMod; + else + binOp = spv::OpSMod; + break; + case glslang::EOpRightShift: + case glslang::EOpRightShiftAssign: + if (isUnsigned) + binOp = spv::OpShiftRightLogical; + else + binOp = spv::OpShiftRightArithmetic; + break; + case glslang::EOpLeftShift: + case glslang::EOpLeftShiftAssign: + binOp = spv::OpShiftLeftLogical; + break; + case glslang::EOpAnd: + case glslang::EOpAndAssign: + binOp = spv::OpBitwiseAnd; + break; + case glslang::EOpLogicalAnd: + needMatchingVectors = false; + binOp = spv::OpLogicalAnd; + break; + case glslang::EOpInclusiveOr: + case glslang::EOpInclusiveOrAssign: + binOp = spv::OpBitwiseOr; + break; + case glslang::EOpLogicalOr: + needMatchingVectors = false; + binOp = spv::OpLogicalOr; + break; + case glslang::EOpExclusiveOr: + case glslang::EOpExclusiveOrAssign: + binOp = spv::OpBitwiseXor; + break; + case glslang::EOpLogicalXor: + needMatchingVectors = false; + binOp = spv::OpLogicalNotEqual; + break; + + case glslang::EOpLessThan: + case glslang::EOpGreaterThan: + case glslang::EOpLessThanEqual: + case glslang::EOpGreaterThanEqual: + case glslang::EOpEqual: + case glslang::EOpNotEqual: + case glslang::EOpVectorEqual: + case glslang::EOpVectorNotEqual: + comparison = true; + break; + default: + break; + } + + // handle mapped binary operations (should be non-comparison) + if (binOp != spv::OpNop) { + assert(comparison == false); + if (builder.isMatrix(left) || builder.isMatrix(right)) + return createBinaryMatrixOperation(binOp, precision, typeId, left, right); + + // No matrix involved; make both operands be the same number of components, if needed + if (needMatchingVectors) + builder.promoteScalar(precision, left, right); + + return builder.setPrecision(builder.createBinOp(binOp, typeId, left, right), precision); + } + + if (! comparison) + return 0; + + // Handle comparison instructions + + if (reduceComparison && (builder.isVector(left) || builder.isMatrix(left) || builder.isAggregate(left))) { + assert(op == glslang::EOpEqual || op == glslang::EOpNotEqual); + + return builder.createCompositeCompare(precision, left, right, op == glslang::EOpEqual); + } + + switch (op) { + case glslang::EOpLessThan: + if (isFloat) + binOp = spv::OpFOrdLessThan; + else if (isUnsigned) + binOp = spv::OpULessThan; + else + binOp = spv::OpSLessThan; + break; + case glslang::EOpGreaterThan: + if (isFloat) + binOp = spv::OpFOrdGreaterThan; + else if (isUnsigned) + binOp = spv::OpUGreaterThan; + else + binOp = spv::OpSGreaterThan; + break; + case glslang::EOpLessThanEqual: + if (isFloat) + binOp = spv::OpFOrdLessThanEqual; + else if (isUnsigned) + binOp = spv::OpULessThanEqual; + else + binOp = spv::OpSLessThanEqual; + break; + case glslang::EOpGreaterThanEqual: + if (isFloat) + binOp = spv::OpFOrdGreaterThanEqual; + else if (isUnsigned) + binOp = spv::OpUGreaterThanEqual; + else + binOp = spv::OpSGreaterThanEqual; + break; + case glslang::EOpEqual: + case glslang::EOpVectorEqual: + if (isFloat) + binOp = spv::OpFOrdEqual; + else + binOp = spv::OpIEqual; + break; + case glslang::EOpNotEqual: + case glslang::EOpVectorNotEqual: + if (isFloat) + binOp = spv::OpFOrdNotEqual; + else + binOp = spv::OpINotEqual; + break; + default: + break; + } + + if (binOp != spv::OpNop) + return builder.setPrecision(builder.createBinOp(binOp, typeId, left, right), precision); + + return 0; +} + +// +// Translate AST matrix operation to SPV operation, already having SPV-based operands/types. +// These can be any of: +// +// matrix * scalar +// scalar * matrix +// matrix * matrix linear algebraic +// matrix * vector +// vector * matrix +// matrix * matrix componentwise +// matrix op matrix op in {+, -, /} +// matrix op scalar op in {+, -, /} +// scalar op matrix op in {+, -, /} +// +spv::Id TGlslangToSpvTraverser::createBinaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Id typeId, spv::Id left, spv::Id right) +{ + bool firstClass = true; + + // First, handle first-class matrix operations (* and matrix/scalar) + switch (op) { + case spv::OpFDiv: + if (builder.isMatrix(left) && builder.isScalar(right)) { + // turn matrix / scalar into a multiply... + right = builder.createBinOp(spv::OpFDiv, builder.getTypeId(right), builder.makeFloatConstant(1.0F), right); + op = spv::OpMatrixTimesScalar; + } else + firstClass = false; + break; + case spv::OpMatrixTimesScalar: + if (builder.isMatrix(right)) + std::swap(left, right); + assert(builder.isScalar(right)); + break; + case spv::OpVectorTimesMatrix: + assert(builder.isVector(left)); + assert(builder.isMatrix(right)); + break; + case spv::OpMatrixTimesVector: + assert(builder.isMatrix(left)); + assert(builder.isVector(right)); + break; + case spv::OpMatrixTimesMatrix: + assert(builder.isMatrix(left)); + assert(builder.isMatrix(right)); + break; + default: + firstClass = false; + break; + } + + if (firstClass) + return builder.setPrecision(builder.createBinOp(op, typeId, left, right), precision); + + // Handle component-wise +, -, *, and / for all combinations of type. + // The result type of all of them is the same type as the (a) matrix operand. + // The algorithm is to: + // - break the matrix(es) into vectors + // - smear any scalar to a vector + // - do vector operations + // - make a matrix out the vector results + switch (op) { + case spv::OpFAdd: + case spv::OpFSub: + case spv::OpFDiv: + case spv::OpFMul: + { + // one time set up... + bool leftMat = builder.isMatrix(left); + bool rightMat = builder.isMatrix(right); + unsigned int numCols = leftMat ? builder.getNumColumns(left) : builder.getNumColumns(right); + int numRows = leftMat ? builder.getNumRows(left) : builder.getNumRows(right); + spv::Id scalarType = builder.getScalarTypeId(typeId); + spv::Id vecType = builder.makeVectorType(scalarType, numRows); + std::vector results; + spv::Id smearVec = spv::NoResult; + if (builder.isScalar(left)) + smearVec = builder.smearScalar(precision, left, vecType); + else if (builder.isScalar(right)) + smearVec = builder.smearScalar(precision, right, vecType); + + // do each vector op + for (unsigned int c = 0; c < numCols; ++c) { + std::vector indexes; + indexes.push_back(c); + spv::Id leftVec = leftMat ? builder.createCompositeExtract( left, vecType, indexes) : smearVec; + spv::Id rightVec = rightMat ? builder.createCompositeExtract(right, vecType, indexes) : smearVec; + results.push_back(builder.createBinOp(op, vecType, leftVec, rightVec)); + builder.setPrecision(results.back(), precision); + } + + // put the pieces together + return builder.setPrecision(builder.createCompositeConstruct(typeId, results), precision); + } + default: + assert(0); + return spv::NoResult; + } +} + +spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, spv::Id operand, glslang::TBasicType typeProxy) +{ + spv::Op unaryOp = spv::OpNop; + int libCall = -1; + bool isUnsigned = typeProxy == glslang::EbtUint; + bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; + + switch (op) { + case glslang::EOpNegative: + if (isFloat) { + unaryOp = spv::OpFNegate; + if (builder.isMatrixType(typeId)) + return createUnaryMatrixOperation(unaryOp, precision, typeId, operand, typeProxy); + } else + unaryOp = spv::OpSNegate; + break; + + case glslang::EOpLogicalNot: + case glslang::EOpVectorLogicalNot: + unaryOp = spv::OpLogicalNot; + break; + case glslang::EOpBitwiseNot: + unaryOp = spv::OpNot; + break; + + case glslang::EOpDeterminant: + libCall = spv::GLSLstd450Determinant; + break; + case glslang::EOpMatrixInverse: + libCall = spv::GLSLstd450MatrixInverse; + break; + case glslang::EOpTranspose: + unaryOp = spv::OpTranspose; + break; + + case glslang::EOpRadians: + libCall = spv::GLSLstd450Radians; + break; + case glslang::EOpDegrees: + libCall = spv::GLSLstd450Degrees; + break; + case glslang::EOpSin: + libCall = spv::GLSLstd450Sin; + break; + case glslang::EOpCos: + libCall = spv::GLSLstd450Cos; + break; + case glslang::EOpTan: + libCall = spv::GLSLstd450Tan; + break; + case glslang::EOpAcos: + libCall = spv::GLSLstd450Acos; + break; + case glslang::EOpAsin: + libCall = spv::GLSLstd450Asin; + break; + case glslang::EOpAtan: + libCall = spv::GLSLstd450Atan; + break; + + case glslang::EOpAcosh: + libCall = spv::GLSLstd450Acosh; + break; + case glslang::EOpAsinh: + libCall = spv::GLSLstd450Asinh; + break; + case glslang::EOpAtanh: + libCall = spv::GLSLstd450Atanh; + break; + case glslang::EOpTanh: + libCall = spv::GLSLstd450Tanh; + break; + case glslang::EOpCosh: + libCall = spv::GLSLstd450Cosh; + break; + case glslang::EOpSinh: + libCall = spv::GLSLstd450Sinh; + break; + + case glslang::EOpLength: + libCall = spv::GLSLstd450Length; + break; + case glslang::EOpNormalize: + libCall = spv::GLSLstd450Normalize; + break; + + case glslang::EOpExp: + libCall = spv::GLSLstd450Exp; + break; + case glslang::EOpLog: + libCall = spv::GLSLstd450Log; + break; + case glslang::EOpExp2: + libCall = spv::GLSLstd450Exp2; + break; + case glslang::EOpLog2: + libCall = spv::GLSLstd450Log2; + break; + case glslang::EOpSqrt: + libCall = spv::GLSLstd450Sqrt; + break; + case glslang::EOpInverseSqrt: + libCall = spv::GLSLstd450InverseSqrt; + break; + + case glslang::EOpFloor: + libCall = spv::GLSLstd450Floor; + break; + case glslang::EOpTrunc: + libCall = spv::GLSLstd450Trunc; + break; + case glslang::EOpRound: + libCall = spv::GLSLstd450Round; + break; + case glslang::EOpRoundEven: + libCall = spv::GLSLstd450RoundEven; + break; + case glslang::EOpCeil: + libCall = spv::GLSLstd450Ceil; + break; + case glslang::EOpFract: + libCall = spv::GLSLstd450Fract; + break; + + case glslang::EOpIsNan: + unaryOp = spv::OpIsNan; + break; + case glslang::EOpIsInf: + unaryOp = spv::OpIsInf; + break; + + case glslang::EOpFloatBitsToInt: + case glslang::EOpFloatBitsToUint: + case glslang::EOpIntBitsToFloat: + case glslang::EOpUintBitsToFloat: + unaryOp = spv::OpBitcast; + break; + + case glslang::EOpPackSnorm2x16: + libCall = spv::GLSLstd450PackSnorm2x16; + break; + case glslang::EOpUnpackSnorm2x16: + libCall = spv::GLSLstd450UnpackSnorm2x16; + break; + case glslang::EOpPackUnorm2x16: + libCall = spv::GLSLstd450PackUnorm2x16; + break; + case glslang::EOpUnpackUnorm2x16: + libCall = spv::GLSLstd450UnpackUnorm2x16; + break; + case glslang::EOpPackHalf2x16: + libCall = spv::GLSLstd450PackHalf2x16; + break; + case glslang::EOpUnpackHalf2x16: + libCall = spv::GLSLstd450UnpackHalf2x16; + break; + case glslang::EOpPackSnorm4x8: + libCall = spv::GLSLstd450PackSnorm4x8; + break; + case glslang::EOpUnpackSnorm4x8: + libCall = spv::GLSLstd450UnpackSnorm4x8; + break; + case glslang::EOpPackUnorm4x8: + libCall = spv::GLSLstd450PackUnorm4x8; + break; + case glslang::EOpUnpackUnorm4x8: + libCall = spv::GLSLstd450UnpackUnorm4x8; + break; + case glslang::EOpPackDouble2x32: + libCall = spv::GLSLstd450PackDouble2x32; + break; + case glslang::EOpUnpackDouble2x32: + libCall = spv::GLSLstd450UnpackDouble2x32; + break; + + case glslang::EOpDPdx: + unaryOp = spv::OpDPdx; + break; + case glslang::EOpDPdy: + unaryOp = spv::OpDPdy; + break; + case glslang::EOpFwidth: + unaryOp = spv::OpFwidth; + break; + case glslang::EOpDPdxFine: + builder.addCapability(spv::CapabilityDerivativeControl); + unaryOp = spv::OpDPdxFine; + break; + case glslang::EOpDPdyFine: + builder.addCapability(spv::CapabilityDerivativeControl); + unaryOp = spv::OpDPdyFine; + break; + case glslang::EOpFwidthFine: + builder.addCapability(spv::CapabilityDerivativeControl); + unaryOp = spv::OpFwidthFine; + break; + case glslang::EOpDPdxCoarse: + builder.addCapability(spv::CapabilityDerivativeControl); + unaryOp = spv::OpDPdxCoarse; + break; + case glslang::EOpDPdyCoarse: + builder.addCapability(spv::CapabilityDerivativeControl); + unaryOp = spv::OpDPdyCoarse; + break; + case glslang::EOpFwidthCoarse: + builder.addCapability(spv::CapabilityDerivativeControl); + unaryOp = spv::OpFwidthCoarse; + break; + case glslang::EOpInterpolateAtCentroid: + builder.addCapability(spv::CapabilityInterpolationFunction); + libCall = spv::GLSLstd450InterpolateAtCentroid; + break; + case glslang::EOpAny: + unaryOp = spv::OpAny; + break; + case glslang::EOpAll: + unaryOp = spv::OpAll; + break; + + case glslang::EOpAbs: + if (isFloat) + libCall = spv::GLSLstd450FAbs; + else + libCall = spv::GLSLstd450SAbs; + break; + case glslang::EOpSign: + if (isFloat) + libCall = spv::GLSLstd450FSign; + else + libCall = spv::GLSLstd450SSign; + break; + + case glslang::EOpAtomicCounterIncrement: + case glslang::EOpAtomicCounterDecrement: + case glslang::EOpAtomicCounter: + { + // Handle all of the atomics in one place, in createAtomicOperation() + std::vector operands; + operands.push_back(operand); + return createAtomicOperation(op, precision, typeId, operands, typeProxy); + } + + case glslang::EOpBitFieldReverse: + unaryOp = spv::OpBitReverse; + break; + case glslang::EOpBitCount: + unaryOp = spv::OpBitCount; + break; + case glslang::EOpFindLSB: + libCall = spv::GLSLstd450FindILsb; + break; + case glslang::EOpFindMSB: + if (isUnsigned) + libCall = spv::GLSLstd450FindUMsb; + else + libCall = spv::GLSLstd450FindSMsb; + break; + + default: + return 0; + } + + spv::Id id; + if (libCall >= 0) { + std::vector args; + args.push_back(operand); + id = builder.createBuiltinCall(typeId, stdBuiltins, libCall, args); + } else + id = builder.createUnaryOp(unaryOp, typeId, operand); + + return builder.setPrecision(id, precision); +} + +// Create a unary operation on a matrix +spv::Id TGlslangToSpvTraverser::createUnaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Id typeId, spv::Id operand, glslang::TBasicType /* typeProxy */) +{ + // Handle unary operations vector by vector. + // The result type is the same type as the original type. + // The algorithm is to: + // - break the matrix into vectors + // - apply the operation to each vector + // - make a matrix out the vector results + + // get the types sorted out + int numCols = builder.getNumColumns(operand); + int numRows = builder.getNumRows(operand); + spv::Id scalarType = builder.getScalarTypeId(typeId); + spv::Id vecType = builder.makeVectorType(scalarType, numRows); + std::vector results; + + // do each vector op + for (int c = 0; c < numCols; ++c) { + std::vector indexes; + indexes.push_back(c); + spv::Id vec = builder.createCompositeExtract(operand, vecType, indexes); + results.push_back(builder.createUnaryOp(op, vecType, vec)); + builder.setPrecision(results.back(), precision); + } + + // put the pieces together + return builder.setPrecision(builder.createCompositeConstruct(typeId, results), precision); +} + +spv::Id TGlslangToSpvTraverser::createConversion(glslang::TOperator op, spv::Decoration precision, spv::Id destType, spv::Id operand) +{ + spv::Op convOp = spv::OpNop; + spv::Id zero = 0; + spv::Id one = 0; + + int vectorSize = builder.isVectorType(destType) ? builder.getNumTypeComponents(destType) : 0; + + switch (op) { + case glslang::EOpConvIntToBool: + case glslang::EOpConvUintToBool: + zero = builder.makeUintConstant(0); + zero = makeSmearedConstant(zero, vectorSize); + return builder.createBinOp(spv::OpINotEqual, destType, operand, zero); + + case glslang::EOpConvFloatToBool: + zero = builder.makeFloatConstant(0.0F); + zero = makeSmearedConstant(zero, vectorSize); + return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); + + case glslang::EOpConvDoubleToBool: + zero = builder.makeDoubleConstant(0.0); + zero = makeSmearedConstant(zero, vectorSize); + return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); + + case glslang::EOpConvBoolToFloat: + convOp = spv::OpSelect; + zero = builder.makeFloatConstant(0.0); + one = builder.makeFloatConstant(1.0); + break; + case glslang::EOpConvBoolToDouble: + convOp = spv::OpSelect; + zero = builder.makeDoubleConstant(0.0); + one = builder.makeDoubleConstant(1.0); + break; + case glslang::EOpConvBoolToInt: + zero = builder.makeIntConstant(0); + one = builder.makeIntConstant(1); + convOp = spv::OpSelect; + break; + case glslang::EOpConvBoolToUint: + zero = builder.makeUintConstant(0); + one = builder.makeUintConstant(1); + convOp = spv::OpSelect; + break; + + case glslang::EOpConvIntToFloat: + case glslang::EOpConvIntToDouble: + convOp = spv::OpConvertSToF; + break; + + case glslang::EOpConvUintToFloat: + case glslang::EOpConvUintToDouble: + convOp = spv::OpConvertUToF; + break; + + case glslang::EOpConvDoubleToFloat: + case glslang::EOpConvFloatToDouble: + convOp = spv::OpFConvert; + break; + + case glslang::EOpConvFloatToInt: + case glslang::EOpConvDoubleToInt: + convOp = spv::OpConvertFToS; + break; + + case glslang::EOpConvUintToInt: + case glslang::EOpConvIntToUint: + convOp = spv::OpBitcast; + break; + + case glslang::EOpConvFloatToUint: + case glslang::EOpConvDoubleToUint: + convOp = spv::OpConvertFToU; + break; + default: + break; + } + + spv::Id result = 0; + if (convOp == spv::OpNop) + return result; + + if (convOp == spv::OpSelect) { + zero = makeSmearedConstant(zero, vectorSize); + one = makeSmearedConstant(one, vectorSize); + result = builder.createTriOp(convOp, destType, operand, one, zero); + } else + result = builder.createUnaryOp(convOp, destType, operand); + + return builder.setPrecision(result, precision); +} + +spv::Id TGlslangToSpvTraverser::makeSmearedConstant(spv::Id constant, int vectorSize) +{ + if (vectorSize == 0) + return constant; + + spv::Id vectorTypeId = builder.makeVectorType(builder.getTypeId(constant), vectorSize); + std::vector components; + for (int c = 0; c < vectorSize; ++c) + components.push_back(constant); + return builder.makeCompositeConstant(vectorTypeId, components); +} + +// For glslang ops that map to SPV atomic opCodes +spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv::Decoration /*precision*/, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) +{ + spv::Op opCode = spv::OpNop; + + switch (op) { + case glslang::EOpAtomicAdd: + case glslang::EOpImageAtomicAdd: + opCode = spv::OpAtomicIAdd; + break; + case glslang::EOpAtomicMin: + case glslang::EOpImageAtomicMin: + opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMin : spv::OpAtomicSMin; + break; + case glslang::EOpAtomicMax: + case glslang::EOpImageAtomicMax: + opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMax : spv::OpAtomicSMax; + break; + case glslang::EOpAtomicAnd: + case glslang::EOpImageAtomicAnd: + opCode = spv::OpAtomicAnd; + break; + case glslang::EOpAtomicOr: + case glslang::EOpImageAtomicOr: + opCode = spv::OpAtomicOr; + break; + case glslang::EOpAtomicXor: + case glslang::EOpImageAtomicXor: + opCode = spv::OpAtomicXor; + break; + case glslang::EOpAtomicExchange: + case glslang::EOpImageAtomicExchange: + opCode = spv::OpAtomicExchange; + break; + case glslang::EOpAtomicCompSwap: + case glslang::EOpImageAtomicCompSwap: + opCode = spv::OpAtomicCompareExchange; + break; + case glslang::EOpAtomicCounterIncrement: + opCode = spv::OpAtomicIIncrement; + break; + case glslang::EOpAtomicCounterDecrement: + opCode = spv::OpAtomicIDecrement; + break; + case glslang::EOpAtomicCounter: + opCode = spv::OpAtomicLoad; + break; + default: + assert(0); + break; + } + + // Sort out the operands + // - mapping from glslang -> SPV + // - there are extra SPV operands with no glslang source + // - compare-exchange swaps the value and comparator + // - compare-exchange has an extra memory semantics + std::vector spvAtomicOperands; // hold the spv operands + auto opIt = operands.begin(); // walk the glslang operands + spvAtomicOperands.push_back(*(opIt++)); + spvAtomicOperands.push_back(builder.makeUintConstant(spv::ScopeDevice)); // TBD: what is the correct scope? + spvAtomicOperands.push_back(builder.makeUintConstant(spv::MemorySemanticsMaskNone)); // TBD: what are the correct memory semantics? + if (opCode == spv::OpAtomicCompareExchange) { + // There are 2 memory semantics for compare-exchange. And the operand order of "comparator" and "new value" in GLSL + // differs from that in SPIR-V. Hence, special processing is required. + spvAtomicOperands.push_back(builder.makeUintConstant(spv::MemorySemanticsMaskNone)); + spvAtomicOperands.push_back(*(opIt + 1)); + spvAtomicOperands.push_back(*opIt); + opIt += 2; + } + + // Add the rest of the operands, skipping any that were dealt with above. + for (; opIt != operands.end(); ++opIt) + spvAtomicOperands.push_back(*opIt); + + return builder.createOp(opCode, typeId, spvAtomicOperands); +} + +spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) +{ + bool isUnsigned = typeProxy == glslang::EbtUint; + bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; + + spv::Op opCode = spv::OpNop; + int libCall = -1; + size_t consumedOperands = operands.size(); + spv::Id typeId0 = 0; + if (consumedOperands > 0) + typeId0 = builder.getTypeId(operands[0]); + spv::Id frexpIntType = 0; + + switch (op) { + case glslang::EOpMin: + if (isFloat) + libCall = spv::GLSLstd450FMin; + else if (isUnsigned) + libCall = spv::GLSLstd450UMin; + else + libCall = spv::GLSLstd450SMin; + builder.promoteScalar(precision, operands.front(), operands.back()); + break; + case glslang::EOpModf: + libCall = spv::GLSLstd450Modf; + break; + case glslang::EOpMax: + if (isFloat) + libCall = spv::GLSLstd450FMax; + else if (isUnsigned) + libCall = spv::GLSLstd450UMax; + else + libCall = spv::GLSLstd450SMax; + builder.promoteScalar(precision, operands.front(), operands.back()); + break; + case glslang::EOpPow: + libCall = spv::GLSLstd450Pow; + break; + case glslang::EOpDot: + opCode = spv::OpDot; + break; + case glslang::EOpAtan: + libCall = spv::GLSLstd450Atan2; + break; + + case glslang::EOpClamp: + if (isFloat) + libCall = spv::GLSLstd450FClamp; + else if (isUnsigned) + libCall = spv::GLSLstd450UClamp; + else + libCall = spv::GLSLstd450SClamp; + builder.promoteScalar(precision, operands.front(), operands[1]); + builder.promoteScalar(precision, operands.front(), operands[2]); + break; + case glslang::EOpMix: + if (isFloat) + libCall = spv::GLSLstd450FMix; + else { + opCode = spv::OpSelect; + spv::MissingFunctionality("translating integer mix to OpSelect"); + } + builder.promoteScalar(precision, operands.front(), operands.back()); + break; + case glslang::EOpStep: + libCall = spv::GLSLstd450Step; + builder.promoteScalar(precision, operands.front(), operands.back()); + break; + case glslang::EOpSmoothStep: + libCall = spv::GLSLstd450SmoothStep; + builder.promoteScalar(precision, operands[0], operands[2]); + builder.promoteScalar(precision, operands[1], operands[2]); + break; + + case glslang::EOpDistance: + libCall = spv::GLSLstd450Distance; + break; + case glslang::EOpCross: + libCall = spv::GLSLstd450Cross; + break; + case glslang::EOpFaceForward: + libCall = spv::GLSLstd450FaceForward; + break; + case glslang::EOpReflect: + libCall = spv::GLSLstd450Reflect; + break; + case glslang::EOpRefract: + libCall = spv::GLSLstd450Refract; + break; + case glslang::EOpInterpolateAtSample: + builder.addCapability(spv::CapabilityInterpolationFunction); + libCall = spv::GLSLstd450InterpolateAtSample; + break; + case glslang::EOpInterpolateAtOffset: + builder.addCapability(spv::CapabilityInterpolationFunction); + libCall = spv::GLSLstd450InterpolateAtOffset; + break; + case glslang::EOpAddCarry: + opCode = spv::OpIAddCarry; + typeId = builder.makeStructResultType(typeId0, typeId0); + consumedOperands = 2; + break; + case glslang::EOpSubBorrow: + opCode = spv::OpISubBorrow; + typeId = builder.makeStructResultType(typeId0, typeId0); + consumedOperands = 2; + break; + case glslang::EOpUMulExtended: + opCode = spv::OpUMulExtended; + typeId = builder.makeStructResultType(typeId0, typeId0); + consumedOperands = 2; + break; + case glslang::EOpIMulExtended: + opCode = spv::OpSMulExtended; + typeId = builder.makeStructResultType(typeId0, typeId0); + consumedOperands = 2; + break; + case glslang::EOpBitfieldExtract: + if (isUnsigned) + opCode = spv::OpBitFieldUExtract; + else + opCode = spv::OpBitFieldSExtract; + break; + case glslang::EOpBitfieldInsert: + opCode = spv::OpBitFieldInsert; + break; + + case glslang::EOpFma: + libCall = spv::GLSLstd450Fma; + break; + case glslang::EOpFrexp: + libCall = spv::GLSLstd450FrexpStruct; + if (builder.getNumComponents(operands[0]) == 1) + frexpIntType = builder.makeIntegerType(32, true); + else + frexpIntType = builder.makeVectorType(builder.makeIntegerType(32, true), builder.getNumComponents(operands[0])); + typeId = builder.makeStructResultType(typeId0, frexpIntType); + consumedOperands = 1; + break; + case glslang::EOpLdexp: + libCall = spv::GLSLstd450Ldexp; + break; + + default: + return 0; + } + + spv::Id id = 0; + if (libCall >= 0) { + // Use an extended instruction from the standard library. + // Construct the call arguments, without modifying the original operands vector. + // We might need the remaining arguments, e.g. in the EOpFrexp case. + std::vector callArguments(operands.begin(), operands.begin() + consumedOperands); + id = builder.createBuiltinCall(typeId, stdBuiltins, libCall, callArguments); + } else { + switch (consumedOperands) { + case 0: + // should all be handled by visitAggregate and createNoArgOperation + assert(0); + return 0; + case 1: + // should all be handled by createUnaryOperation + assert(0); + return 0; + case 2: + id = builder.createBinOp(opCode, typeId, operands[0], operands[1]); + break; + default: + // anything 3 or over doesn't have l-value operands, so all should be consumed + assert(consumedOperands == operands.size()); + id = builder.createOp(opCode, typeId, operands); + break; + } + } + + // Decode the return types that were structures + switch (op) { + case glslang::EOpAddCarry: + case glslang::EOpSubBorrow: + builder.createStore(builder.createCompositeExtract(id, typeId0, 1), operands[2]); + id = builder.createCompositeExtract(id, typeId0, 0); + break; + case glslang::EOpUMulExtended: + case glslang::EOpIMulExtended: + builder.createStore(builder.createCompositeExtract(id, typeId0, 0), operands[3]); + builder.createStore(builder.createCompositeExtract(id, typeId0, 1), operands[2]); + break; + case glslang::EOpFrexp: + assert(operands.size() == 2); + builder.createStore(builder.createCompositeExtract(id, frexpIntType, 1), operands[1]); + id = builder.createCompositeExtract(id, typeId0, 0); + break; + default: + break; + } + + return builder.setPrecision(id, precision); +} + +// Intrinsics with no arguments, no return value, and no precision. +spv::Id TGlslangToSpvTraverser::createNoArgOperation(glslang::TOperator op) +{ + // TODO: get the barrier operands correct + + switch (op) { + case glslang::EOpEmitVertex: + builder.createNoResultOp(spv::OpEmitVertex); + return 0; + case glslang::EOpEndPrimitive: + builder.createNoResultOp(spv::OpEndPrimitive); + return 0; + case glslang::EOpBarrier: + builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAllMemory); + builder.createControlBarrier(spv::ScopeDevice, spv::ScopeDevice, spv::MemorySemanticsMaskNone); + return 0; + case glslang::EOpMemoryBarrier: + builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAllMemory); + return 0; + case glslang::EOpMemoryBarrierAtomicCounter: + builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAtomicCounterMemoryMask); + return 0; + case glslang::EOpMemoryBarrierBuffer: + builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask); + return 0; + case glslang::EOpMemoryBarrierImage: + builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsImageMemoryMask); + return 0; + case glslang::EOpMemoryBarrierShared: + builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsWorkgroupMemoryMask); + return 0; + case glslang::EOpGroupMemoryBarrier: + builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsCrossWorkgroupMemoryMask); + return 0; + default: + spv::MissingFunctionality("unknown operation with no arguments"); + return 0; + } +} + +spv::Id TGlslangToSpvTraverser::getSymbolId(const glslang::TIntermSymbol* symbol) +{ + auto iter = symbolValues.find(symbol->getId()); + spv::Id id; + if (symbolValues.end() != iter) { + id = iter->second; + return id; + } + + // it was not found, create it + id = createSpvVariable(symbol); + symbolValues[symbol->getId()] = id; + + if (! symbol->getType().isStruct()) { + addDecoration(id, TranslatePrecisionDecoration(symbol->getType())); + addDecoration(id, TranslateInterpolationDecoration(symbol->getType().getQualifier())); + if (symbol->getType().getQualifier().hasSpecConstantId()) + addDecoration(id, spv::DecorationSpecId, symbol->getType().getQualifier().layoutSpecConstantId); + if (symbol->getQualifier().hasLocation()) + builder.addDecoration(id, spv::DecorationLocation, symbol->getQualifier().layoutLocation); + if (symbol->getQualifier().hasIndex()) + builder.addDecoration(id, spv::DecorationIndex, symbol->getQualifier().layoutIndex); + if (symbol->getQualifier().hasComponent()) + builder.addDecoration(id, spv::DecorationComponent, symbol->getQualifier().layoutComponent); + if (glslangIntermediate->getXfbMode()) { + builder.addCapability(spv::CapabilityTransformFeedback); + if (symbol->getQualifier().hasXfbStride()) + builder.addDecoration(id, spv::DecorationXfbStride, symbol->getQualifier().layoutXfbStride); + if (symbol->getQualifier().hasXfbBuffer()) + builder.addDecoration(id, spv::DecorationXfbBuffer, symbol->getQualifier().layoutXfbBuffer); + if (symbol->getQualifier().hasXfbOffset()) + builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutXfbOffset); + } + } + + addDecoration(id, TranslateInvariantDecoration(symbol->getType().getQualifier())); + if (symbol->getQualifier().hasStream()) { + builder.addCapability(spv::CapabilityGeometryStreams); + builder.addDecoration(id, spv::DecorationStream, symbol->getQualifier().layoutStream); + } + if (symbol->getQualifier().hasSet()) + builder.addDecoration(id, spv::DecorationDescriptorSet, symbol->getQualifier().layoutSet); + else if (IsDescriptorResource(symbol->getType())) { + // default to 0 + builder.addDecoration(id, spv::DecorationDescriptorSet, 0); + } + if (symbol->getQualifier().hasBinding()) + builder.addDecoration(id, spv::DecorationBinding, symbol->getQualifier().layoutBinding); + if (symbol->getQualifier().hasAttachment()) + builder.addDecoration(id, spv::DecorationInputAttachmentIndex, symbol->getQualifier().layoutAttachment); + if (glslangIntermediate->getXfbMode()) { + builder.addCapability(spv::CapabilityTransformFeedback); + if (symbol->getQualifier().hasXfbStride()) + builder.addDecoration(id, spv::DecorationXfbStride, symbol->getQualifier().layoutXfbStride); + if (symbol->getQualifier().hasXfbBuffer()) + builder.addDecoration(id, spv::DecorationXfbBuffer, symbol->getQualifier().layoutXfbBuffer); + } + + // built-in variable decorations + spv::BuiltIn builtIn = TranslateBuiltInDecoration(symbol->getQualifier().builtIn); + if (builtIn != spv::BadValue) + addDecoration(id, spv::DecorationBuiltIn, (int)builtIn); + + return id; +} + +// If 'dec' is valid, add no-operand decoration to an object +void TGlslangToSpvTraverser::addDecoration(spv::Id id, spv::Decoration dec) +{ + if (dec != spv::BadValue) + builder.addDecoration(id, dec); +} + +// If 'dec' is valid, add a one-operand decoration to an object +void TGlslangToSpvTraverser::addDecoration(spv::Id id, spv::Decoration dec, unsigned value) +{ + if (dec != spv::BadValue) + builder.addDecoration(id, dec, value); +} + +// If 'dec' is valid, add a no-operand decoration to a struct member +void TGlslangToSpvTraverser::addMemberDecoration(spv::Id id, int member, spv::Decoration dec) +{ + if (dec != spv::BadValue) + builder.addMemberDecoration(id, (unsigned)member, dec); +} + +// If 'dec' is valid, add a one-operand decoration to a struct member +void TGlslangToSpvTraverser::addMemberDecoration(spv::Id id, int member, spv::Decoration dec, unsigned value) +{ + if (dec != spv::BadValue) + builder.addMemberDecoration(id, (unsigned)member, dec, value); +} + +// Make a full tree of instructions to build a SPIR-V specialization constant, +// or regular constant if possible. +// +// TBD: this is not yet done, nor verified to be the best design, it does do the leaf symbols though +// +// Recursively walk the nodes. The nodes form a tree whose leaves are +// regular constants, which themselves are trees that createSpvConstant() +// recursively walks. So, this function walks the "top" of the tree: +// - emit specialization constant-building instructions for specConstant +// - when running into a non-spec-constant, switch to createSpvConstant() +spv::Id TGlslangToSpvTraverser::createSpvSpecConstant(const glslang::TIntermTyped& node) +{ + assert(node.getQualifier().storage == glslang::EvqConst); + + if (! node.getQualifier().specConstant) { + // hand off to the non-spec-constant path + assert(node.getAsConstantUnion() != nullptr || node.getAsSymbolNode() != nullptr); + int nextConst = 0; + return createSpvConstant(node.getType(), node.getAsConstantUnion() ? node.getAsConstantUnion()->getConstArray() : node.getAsSymbolNode()->getConstArray(), + nextConst, false); + } + + // We now know we have a specialization constant to build + + if (node.getAsSymbolNode() && node.getQualifier().hasSpecConstantId()) { + // this is a direct literal assigned to a layout(constant_id=) declaration + int nextConst = 0; + return createSpvConstant(node.getType(), node.getAsConstantUnion() ? node.getAsConstantUnion()->getConstArray() : node.getAsSymbolNode()->getConstArray(), + nextConst, true); + } else { + // gl_WorkgroupSize is a special case until the front-end handles hierarchical specialization constants, + // even then, it's specialization ids are handled by special case syntax in GLSL: layout(local_size_x = ... + if (node.getType().getQualifier().builtIn == glslang::EbvWorkGroupSize) { + std::vector dimConstId; + for (int dim = 0; dim < 3; ++dim) { + bool specConst = (glslangIntermediate->getLocalSizeSpecId(dim) != glslang::TQualifier::layoutNotSet); + dimConstId.push_back(builder.makeUintConstant(glslangIntermediate->getLocalSize(dim), specConst)); + if (specConst) + addDecoration(dimConstId.back(), spv::DecorationSpecId, glslangIntermediate->getLocalSizeSpecId(dim)); + } + return builder.makeCompositeConstant(builder.makeVectorType(builder.makeUintType(32), 3), dimConstId, true); + } else { + spv::MissingFunctionality("specialization-constant expression trees"); + return spv::NoResult; + } + } +} + +// Use 'consts' as the flattened glslang source of scalar constants to recursively +// build the aggregate SPIR-V constant. +// +// If there are not enough elements present in 'consts', 0 will be substituted; +// an empty 'consts' can be used to create a fully zeroed SPIR-V constant. +// +spv::Id TGlslangToSpvTraverser::createSpvConstant(const glslang::TType& glslangType, const glslang::TConstUnionArray& consts, int& nextConst, bool specConstant) +{ + // vector of constants for SPIR-V + std::vector spvConsts; + + // Type is used for struct and array constants + spv::Id typeId = convertGlslangToSpvType(glslangType); + + if (glslangType.isArray()) { + glslang::TType elementType(glslangType, 0); + for (int i = 0; i < glslangType.getOuterArraySize(); ++i) + spvConsts.push_back(createSpvConstant(elementType, consts, nextConst, false)); + } else if (glslangType.isMatrix()) { + glslang::TType vectorType(glslangType, 0); + for (int col = 0; col < glslangType.getMatrixCols(); ++col) + spvConsts.push_back(createSpvConstant(vectorType, consts, nextConst, false)); + } else if (glslangType.getStruct()) { + glslang::TVector::const_iterator iter; + for (iter = glslangType.getStruct()->begin(); iter != glslangType.getStruct()->end(); ++iter) + spvConsts.push_back(createSpvConstant(*iter->type, consts, nextConst, false)); + } else if (glslangType.isVector()) { + for (unsigned int i = 0; i < (unsigned int)glslangType.getVectorSize(); ++i) { + bool zero = nextConst >= consts.size(); + switch (glslangType.getBasicType()) { + case glslang::EbtInt: + spvConsts.push_back(builder.makeIntConstant(zero ? 0 : consts[nextConst].getIConst())); + break; + case glslang::EbtUint: + spvConsts.push_back(builder.makeUintConstant(zero ? 0 : consts[nextConst].getUConst())); + break; + case glslang::EbtFloat: + spvConsts.push_back(builder.makeFloatConstant(zero ? 0.0F : (float)consts[nextConst].getDConst())); + break; + case glslang::EbtDouble: + spvConsts.push_back(builder.makeDoubleConstant(zero ? 0.0 : consts[nextConst].getDConst())); + break; + case glslang::EbtBool: + spvConsts.push_back(builder.makeBoolConstant(zero ? false : consts[nextConst].getBConst())); + break; + default: + assert(0); + break; + } + ++nextConst; + } + } else { + // we have a non-aggregate (scalar) constant + bool zero = nextConst >= consts.size(); + spv::Id scalar = 0; + switch (glslangType.getBasicType()) { + case glslang::EbtInt: + scalar = builder.makeIntConstant(zero ? 0 : consts[nextConst].getIConst(), specConstant); + break; + case glslang::EbtUint: + scalar = builder.makeUintConstant(zero ? 0 : consts[nextConst].getUConst(), specConstant); + break; + case glslang::EbtFloat: + scalar = builder.makeFloatConstant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant); + break; + case glslang::EbtDouble: + scalar = builder.makeDoubleConstant(zero ? 0.0 : consts[nextConst].getDConst(), specConstant); + break; + case glslang::EbtBool: + scalar = builder.makeBoolConstant(zero ? false : consts[nextConst].getBConst(), specConstant); + break; + default: + assert(0); + break; + } + ++nextConst; + return scalar; + } + + return builder.makeCompositeConstant(typeId, spvConsts); +} + +// Return true if the node is a constant or symbol whose reading has no +// non-trivial observable cost or effect. +bool TGlslangToSpvTraverser::isTrivialLeaf(const glslang::TIntermTyped* node) +{ + // don't know what this is + if (node == nullptr) + return false; + + // a constant is safe + if (node->getAsConstantUnion() != nullptr) + return true; + + // not a symbol means non-trivial + if (node->getAsSymbolNode() == nullptr) + return false; + + // a symbol, depends on what's being read + switch (node->getType().getQualifier().storage) { + case glslang::EvqTemporary: + case glslang::EvqGlobal: + case glslang::EvqIn: + case glslang::EvqInOut: + case glslang::EvqConst: + case glslang::EvqConstReadOnly: + case glslang::EvqUniform: + return true; + default: + return false; + } +} + +// A node is trivial if it is a single operation with no side effects. +// Error on the side of saying non-trivial. +// Return true if trivial. +bool TGlslangToSpvTraverser::isTrivial(const glslang::TIntermTyped* node) +{ + if (node == nullptr) + return false; + + // symbols and constants are trivial + if (isTrivialLeaf(node)) + return true; + + // otherwise, it needs to be a simple operation or one or two leaf nodes + + // not a simple operation + const glslang::TIntermBinary* binaryNode = node->getAsBinaryNode(); + const glslang::TIntermUnary* unaryNode = node->getAsUnaryNode(); + if (binaryNode == nullptr && unaryNode == nullptr) + return false; + + // not on leaf nodes + if (binaryNode && (! isTrivialLeaf(binaryNode->getLeft()) || ! isTrivialLeaf(binaryNode->getRight()))) + return false; + + if (unaryNode && ! isTrivialLeaf(unaryNode->getOperand())) { + return false; + } + + switch (node->getAsOperator()->getOp()) { + case glslang::EOpLogicalNot: + case glslang::EOpConvIntToBool: + case glslang::EOpConvUintToBool: + case glslang::EOpConvFloatToBool: + case glslang::EOpConvDoubleToBool: + case glslang::EOpEqual: + case glslang::EOpNotEqual: + case glslang::EOpLessThan: + case glslang::EOpGreaterThan: + case glslang::EOpLessThanEqual: + case glslang::EOpGreaterThanEqual: + case glslang::EOpIndexDirect: + case glslang::EOpIndexDirectStruct: + case glslang::EOpLogicalXor: + case glslang::EOpAny: + case glslang::EOpAll: + return true; + default: + return false; + } +} + +// Emit short-circuiting code, where 'right' is never evaluated unless +// the left side is true (for &&) or false (for ||). +spv::Id TGlslangToSpvTraverser::createShortCircuit(glslang::TOperator op, glslang::TIntermTyped& left, glslang::TIntermTyped& right) +{ + spv::Id boolTypeId = builder.makeBoolType(); + + // emit left operand + builder.clearAccessChain(); + left.traverse(this); + spv::Id leftId = builder.accessChainLoad(spv::NoPrecision, boolTypeId); + + // Operands to accumulate OpPhi operands + std::vector phiOperands; + // accumulate left operand's phi information + phiOperands.push_back(leftId); + phiOperands.push_back(builder.getBuildPoint()->getId()); + + // Make the two kinds of operation symmetric with a "!" + // || => emit "if (! left) result = right" + // && => emit "if ( left) result = right" + // + // TODO: this runtime "not" for || could be avoided by adding functionality + // to 'builder' to have an "else" without an "then" + if (op == glslang::EOpLogicalOr) + leftId = builder.createUnaryOp(spv::OpLogicalNot, boolTypeId, leftId); + + // make an "if" based on the left value + spv::Builder::If ifBuilder(leftId, builder); + + // emit right operand as the "then" part of the "if" + builder.clearAccessChain(); + right.traverse(this); + spv::Id rightId = builder.accessChainLoad(spv::NoPrecision, boolTypeId); + + // accumulate left operand's phi information + phiOperands.push_back(rightId); + phiOperands.push_back(builder.getBuildPoint()->getId()); + + // finish the "if" + ifBuilder.makeEndIf(); + + // phi together the two results + return builder.createOp(spv::OpPhi, boolTypeId, phiOperands); +} + +}; // end anonymous namespace + +namespace glslang { + +void GetSpirvVersion(std::string& version) +{ + const int bufSize = 100; + char buf[bufSize]; + snprintf(buf, bufSize, "0x%08x, Revision %d", spv::Version, spv::Revision); + version = buf; +} + +// Write SPIR-V out to a binary file +void OutputSpv(const std::vector& spirv, const char* baseName) +{ + std::ofstream out; + out.open(baseName, std::ios::binary | std::ios::out); + for (int i = 0; i < (int)spirv.size(); ++i) { + unsigned int word = spirv[i]; + out.write((const char*)&word, 4); + } + out.close(); +} + +// +// Set up the glslang traversal +// +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv) +{ + TIntermNode* root = intermediate.getTreeRoot(); + + if (root == 0) + return; + + glslang::GetThreadPoolAllocator().push(); + + TGlslangToSpvTraverser it(&intermediate); + + root->traverse(&it); + + it.dumpSpv(spirv); + + glslang::GetThreadPoolAllocator().pop(); +} + +}; // end namespace glslang diff --git a/third_party/glslang-spirv/GlslangToSpv.h b/third_party/glslang-spirv/GlslangToSpv.h new file mode 100644 index 000000000..d8a18893c --- /dev/null +++ b/third_party/glslang-spirv/GlslangToSpv.h @@ -0,0 +1,43 @@ +// +//Copyright (C) 2014 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +#include "../glslang/Include/intermediate.h" + +namespace glslang { + +void GetSpirvVersion(std::string&); +void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv); +void OutputSpv(const std::vector& spirv, const char* baseName); + +}; diff --git a/third_party/glslang-spirv/InReadableOrder.cpp b/third_party/glslang-spirv/InReadableOrder.cpp new file mode 100644 index 000000000..9180dc8c7 --- /dev/null +++ b/third_party/glslang-spirv/InReadableOrder.cpp @@ -0,0 +1,116 @@ +// +//Copyright (C) 2016 Google, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: Dejan Mircevski, Google +// + +// The SPIR-V spec requires code blocks to appear in an order satisfying the +// dominator-tree direction (ie, dominator before the dominated). This is, +// actually, easy to achieve: any pre-order CFG traversal algorithm will do it. +// Because such algorithms visit a block only after traversing some path to it +// from the root, they necessarily visit the block's idom first. +// +// But not every graph-traversal algorithm outputs blocks in an order that +// appears logical to human readers. The problem is that unrelated branches may +// be interspersed with each other, and merge blocks may come before some of the +// branches being merged. +// +// A good, human-readable order of blocks may be achieved by performing +// depth-first search but delaying merge nodes until after all their branches +// have been visited. This is implemented below by the inReadableOrder() +// function. + +#include "spvIR.h" + +#include +#include + +using spv::Block; +using spv::Id; + +namespace { +// Traverses CFG in a readable order, invoking a pre-set callback on each block. +// Use by calling visit() on the root block. +class ReadableOrderTraverser { +public: + explicit ReadableOrderTraverser(std::function callback) : callback_(callback) {} + // Visits the block if it hasn't been visited already and isn't currently + // being delayed. Invokes callback(block), then descends into its + // successors. Delays merge-block and continue-block processing until all + // the branches have been completed. + void visit(Block* block) + { + assert(block); + if (visited_[block] || delayed_[block]) + return; + callback_(block); + visited_[block] = true; + Block* mergeBlock = nullptr; + Block* continueBlock = nullptr; + auto mergeInst = block->getMergeInstruction(); + if (mergeInst) { + Id mergeId = mergeInst->getIdOperand(0); + mergeBlock = block->getParent().getParent().getInstruction(mergeId)->getBlock(); + delayed_[mergeBlock] = true; + if (mergeInst->getOpCode() == spv::OpLoopMerge) { + Id continueId = mergeInst->getIdOperand(1); + continueBlock = + block->getParent().getParent().getInstruction(continueId)->getBlock(); + delayed_[continueBlock] = true; + } + } + for (const auto succ : block->getSuccessors()) + visit(succ); + if (continueBlock) { + delayed_[continueBlock] = false; + visit(continueBlock); + } + if (mergeBlock) { + delayed_[mergeBlock] = false; + visit(mergeBlock); + } + } + +private: + std::function callback_; + // Whether a block has already been visited or is being delayed. + std::unordered_map visited_, delayed_; +}; +} + +void spv::inReadableOrder(Block* root, std::function callback) +{ + ReadableOrderTraverser(callback).visit(root); +} diff --git a/third_party/glslang-spirv/SPVRemapper.cpp b/third_party/glslang-spirv/SPVRemapper.cpp new file mode 100644 index 000000000..a76720a0b --- /dev/null +++ b/third_party/glslang-spirv/SPVRemapper.cpp @@ -0,0 +1,1231 @@ +// +//Copyright (C) 2015 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. +// + +#include "SPVRemapper.h" +#include "doc.h" + +#if !defined (use_cpp11) +// ... not supported before C++11 +#else // defined (use_cpp11) + +#include +#include + +namespace spv { + + // By default, just abort on error. Can be overridden via RegisterErrorHandler + spirvbin_t::errorfn_t spirvbin_t::errorHandler = [](const std::string&) { exit(5); }; + // By default, eat log messages. Can be overridden via RegisterLogHandler + spirvbin_t::logfn_t spirvbin_t::logHandler = [](const std::string&) { }; + + // This can be overridden to provide other message behavior if needed + void spirvbin_t::msg(int minVerbosity, int indent, const std::string& txt) const + { + if (verbose >= minVerbosity) + logHandler(std::string(indent, ' ') + txt); + } + + // hash opcode, with special handling for OpExtInst + std::uint32_t spirvbin_t::asOpCodeHash(unsigned word) + { + const spv::Op opCode = asOpCode(word); + + std::uint32_t offset = 0; + + switch (opCode) { + case spv::OpExtInst: + offset += asId(word + 4); break; + default: + break; + } + + return opCode * 19 + offset; // 19 = small prime + } + + spirvbin_t::range_t spirvbin_t::literalRange(spv::Op opCode) const + { + static const int maxCount = 1<<30; + + switch (opCode) { + case spv::OpTypeFloat: // fall through... + case spv::OpTypePointer: return range_t(2, 3); + case spv::OpTypeInt: return range_t(2, 4); + // TODO: case spv::OpTypeImage: + // TODO: case spv::OpTypeSampledImage: + case spv::OpTypeSampler: return range_t(3, 8); + case spv::OpTypeVector: // fall through + case spv::OpTypeMatrix: // ... + case spv::OpTypePipe: return range_t(3, 4); + case spv::OpConstant: return range_t(3, maxCount); + default: return range_t(0, 0); + } + } + + spirvbin_t::range_t spirvbin_t::typeRange(spv::Op opCode) const + { + static const int maxCount = 1<<30; + + if (isConstOp(opCode)) + return range_t(1, 2); + + switch (opCode) { + case spv::OpTypeVector: // fall through + case spv::OpTypeMatrix: // ... + case spv::OpTypeSampler: // ... + case spv::OpTypeArray: // ... + case spv::OpTypeRuntimeArray: // ... + case spv::OpTypePipe: return range_t(2, 3); + case spv::OpTypeStruct: // fall through + case spv::OpTypeFunction: return range_t(2, maxCount); + case spv::OpTypePointer: return range_t(3, 4); + default: return range_t(0, 0); + } + } + + spirvbin_t::range_t spirvbin_t::constRange(spv::Op opCode) const + { + static const int maxCount = 1<<30; + + switch (opCode) { + case spv::OpTypeArray: // fall through... + case spv::OpTypeRuntimeArray: return range_t(3, 4); + case spv::OpConstantComposite: return range_t(3, maxCount); + default: return range_t(0, 0); + } + } + + // Is this an opcode we should remove when using --strip? + bool spirvbin_t::isStripOp(spv::Op opCode) const + { + switch (opCode) { + case spv::OpSource: + case spv::OpSourceExtension: + case spv::OpName: + case spv::OpMemberName: + case spv::OpLine: return true; + default: return false; + } + } + + bool spirvbin_t::isFlowCtrl(spv::Op opCode) const + { + switch (opCode) { + case spv::OpBranchConditional: + case spv::OpBranch: + case spv::OpSwitch: + case spv::OpLoopMerge: + case spv::OpSelectionMerge: + case spv::OpLabel: + case spv::OpFunction: + case spv::OpFunctionEnd: return true; + default: return false; + } + } + + bool spirvbin_t::isTypeOp(spv::Op opCode) const + { + switch (opCode) { + case spv::OpTypeVoid: + case spv::OpTypeBool: + case spv::OpTypeInt: + case spv::OpTypeFloat: + case spv::OpTypeVector: + case spv::OpTypeMatrix: + case spv::OpTypeImage: + case spv::OpTypeSampler: + case spv::OpTypeArray: + case spv::OpTypeRuntimeArray: + case spv::OpTypeStruct: + case spv::OpTypeOpaque: + case spv::OpTypePointer: + case spv::OpTypeFunction: + case spv::OpTypeEvent: + case spv::OpTypeDeviceEvent: + case spv::OpTypeReserveId: + case spv::OpTypeQueue: + case spv::OpTypeSampledImage: + case spv::OpTypePipe: return true; + default: return false; + } + } + + bool spirvbin_t::isConstOp(spv::Op opCode) const + { + switch (opCode) { + case spv::OpConstantNull: error("unimplemented constant type"); + case spv::OpConstantSampler: error("unimplemented constant type"); + + case spv::OpConstantTrue: + case spv::OpConstantFalse: + case spv::OpConstantComposite: + case spv::OpConstant: return true; + default: return false; + } + } + + const auto inst_fn_nop = [](spv::Op, unsigned) { return false; }; + const auto op_fn_nop = [](spv::Id&) { }; + + // g++ doesn't like these defined in the class proper in an anonymous namespace. + // Dunno why. Also MSVC doesn't like the constexpr keyword. Also dunno why. + // Defining them externally seems to please both compilers, so, here they are. + const spv::Id spirvbin_t::unmapped = spv::Id(-10000); + const spv::Id spirvbin_t::unused = spv::Id(-10001); + const int spirvbin_t::header_size = 5; + + spv::Id spirvbin_t::nextUnusedId(spv::Id id) + { + while (isNewIdMapped(id)) // search for an unused ID + ++id; + + return id; + } + + spv::Id spirvbin_t::localId(spv::Id id, spv::Id newId) + { + assert(id != spv::NoResult && newId != spv::NoResult); + + if (id >= idMapL.size()) + idMapL.resize(id+1, unused); + + if (newId != unmapped && newId != unused) { + if (isOldIdUnused(id)) + error(std::string("ID unused in module: ") + std::to_string(id)); + + if (!isOldIdUnmapped(id)) + error(std::string("ID already mapped: ") + std::to_string(id) + " -> " + + std::to_string(localId(id))); + + if (isNewIdMapped(newId)) + error(std::string("ID already used in module: ") + std::to_string(newId)); + + msg(4, 4, std::string("map: ") + std::to_string(id) + " -> " + std::to_string(newId)); + setMapped(newId); + largestNewId = std::max(largestNewId, newId); + } + + return idMapL[id] = newId; + } + + // Parse a literal string from the SPIR binary and return it as an std::string + // Due to C++11 RValue references, this doesn't copy the result string. + std::string spirvbin_t::literalString(unsigned word) const + { + std::string literal; + + literal.reserve(16); + + const char* bytes = reinterpret_cast(spv.data() + word); + + while (bytes && *bytes) + literal += *bytes++; + + return literal; + } + + + void spirvbin_t::applyMap() + { + msg(3, 2, std::string("Applying map: ")); + + // Map local IDs through the ID map + process(inst_fn_nop, // ignore instructions + [this](spv::Id& id) { + id = localId(id); + assert(id != unused && id != unmapped); + } + ); + } + + + // Find free IDs for anything we haven't mapped + void spirvbin_t::mapRemainder() + { + msg(3, 2, std::string("Remapping remainder: ")); + + spv::Id unusedId = 1; // can't use 0: that's NoResult + spirword_t maxBound = 0; + + for (spv::Id id = 0; id < idMapL.size(); ++id) { + if (isOldIdUnused(id)) + continue; + + // Find a new mapping for any used but unmapped IDs + if (isOldIdUnmapped(id)) + localId(id, unusedId = nextUnusedId(unusedId)); + + if (isOldIdUnmapped(id)) + error(std::string("old ID not mapped: ") + std::to_string(id)); + + // Track max bound + maxBound = std::max(maxBound, localId(id) + 1); + } + + bound(maxBound); // reset header ID bound to as big as it now needs to be + } + + void spirvbin_t::stripDebug() + { + if ((options & STRIP) == 0) + return; + + // build local Id and name maps + process( + [&](spv::Op opCode, unsigned start) { + // remember opcodes we want to strip later + if (isStripOp(opCode)) + stripInst(start); + return true; + }, + op_fn_nop); + } + + void spirvbin_t::buildLocalMaps() + { + msg(2, 2, std::string("build local maps: ")); + + mapped.clear(); + idMapL.clear(); +// preserve nameMap, so we don't clear that. + fnPos.clear(); + fnPosDCE.clear(); + fnCalls.clear(); + typeConstPos.clear(); + typeConstPosR.clear(); + entryPoint = spv::NoResult; + largestNewId = 0; + + idMapL.resize(bound(), unused); + + int fnStart = 0; + spv::Id fnRes = spv::NoResult; + + // build local Id and name maps + process( + [&](spv::Op opCode, unsigned start) { + // remember opcodes we want to strip later + if ((options & STRIP) && isStripOp(opCode)) + stripInst(start); + + if (opCode == spv::Op::OpName) { + const spv::Id target = asId(start+1); + const std::string name = literalString(start+2); + nameMap[name] = target; + + } else if (opCode == spv::Op::OpFunctionCall) { + ++fnCalls[asId(start + 3)]; + } else if (opCode == spv::Op::OpEntryPoint) { + entryPoint = asId(start + 2); + } else if (opCode == spv::Op::OpFunction) { + if (fnStart != 0) + error("nested function found"); + fnStart = start; + fnRes = asId(start + 2); + } else if (opCode == spv::Op::OpFunctionEnd) { + assert(fnRes != spv::NoResult); + if (fnStart == 0) + error("function end without function start"); + fnPos[fnRes] = range_t(fnStart, start + asWordCount(start)); + fnStart = 0; + } else if (isConstOp(opCode)) { + assert(asId(start + 2) != spv::NoResult); + typeConstPos.insert(start); + typeConstPosR[asId(start + 2)] = start; + } else if (isTypeOp(opCode)) { + assert(asId(start + 1) != spv::NoResult); + typeConstPos.insert(start); + typeConstPosR[asId(start + 1)] = start; + } + + return false; + }, + + [this](spv::Id& id) { localId(id, unmapped); } + ); + } + + // Validate the SPIR header + void spirvbin_t::validate() const + { + msg(2, 2, std::string("validating: ")); + + if (spv.size() < header_size) + error("file too short: "); + + if (magic() != spv::MagicNumber) + error("bad magic number"); + + // field 1 = version + // field 2 = generator magic + // field 3 = result bound + + if (schemaNum() != 0) + error("bad schema, must be 0"); + } + + + int spirvbin_t::processInstruction(unsigned word, instfn_t instFn, idfn_t idFn) + { + const auto instructionStart = word; + const unsigned wordCount = asWordCount(instructionStart); + const spv::Op opCode = asOpCode(instructionStart); + const int nextInst = word++ + wordCount; + + if (nextInst > int(spv.size())) + error("spir instruction terminated too early"); + + // Base for computing number of operands; will be updated as more is learned + unsigned numOperands = wordCount - 1; + + if (instFn(opCode, instructionStart)) + return nextInst; + + // Read type and result ID from instruction desc table + if (spv::InstructionDesc[opCode].hasType()) { + idFn(asId(word++)); + --numOperands; + } + + if (spv::InstructionDesc[opCode].hasResult()) { + idFn(asId(word++)); + --numOperands; + } + + // Extended instructions: currently, assume everything is an ID. + // TODO: add whatever data we need for exceptions to that + if (opCode == spv::OpExtInst) { + word += 2; // instruction set, and instruction from set + numOperands -= 2; + + for (unsigned op=0; op < numOperands; ++op) + idFn(asId(word++)); // ID + + return nextInst; + } + + // Store IDs from instruction in our map + for (int op = 0; numOperands > 0; ++op, --numOperands) { + switch (spv::InstructionDesc[opCode].operands.getClass(op)) { + case spv::OperandId: + idFn(asId(word++)); + break; + + case spv::OperandVariableIds: + for (unsigned i = 0; i < numOperands; ++i) + idFn(asId(word++)); + return nextInst; + + case spv::OperandVariableLiterals: + // for clarity + // if (opCode == spv::OpDecorate && asDecoration(word - 1) == spv::DecorationBuiltIn) { + // ++word; + // --numOperands; + // } + // word += numOperands; + return nextInst; + + case spv::OperandVariableLiteralId: + while (numOperands > 0) { + ++word; // immediate + idFn(asId(word++)); // ID + numOperands -= 2; + } + return nextInst; + + case spv::OperandLiteralString: { + const int stringWordCount = literalStringWords(literalString(word)); + word += stringWordCount; + numOperands -= (stringWordCount-1); // -1 because for() header post-decrements + break; + } + + // Execution mode might have extra literal operands. Skip them. + case spv::OperandExecutionMode: + return nextInst; + + // Single word operands we simply ignore, as they hold no IDs + case spv::OperandLiteralNumber: + case spv::OperandSource: + case spv::OperandExecutionModel: + case spv::OperandAddressing: + case spv::OperandMemory: + case spv::OperandStorage: + case spv::OperandDimensionality: + case spv::OperandSamplerAddressingMode: + case spv::OperandSamplerFilterMode: + case spv::OperandSamplerImageFormat: + case spv::OperandImageChannelOrder: + case spv::OperandImageChannelDataType: + case spv::OperandImageOperands: + case spv::OperandFPFastMath: + case spv::OperandFPRoundingMode: + case spv::OperandLinkageType: + case spv::OperandAccessQualifier: + case spv::OperandFuncParamAttr: + case spv::OperandDecoration: + case spv::OperandBuiltIn: + case spv::OperandSelect: + case spv::OperandLoop: + case spv::OperandFunction: + case spv::OperandMemorySemantics: + case spv::OperandMemoryAccess: + case spv::OperandScope: + case spv::OperandGroupOperation: + case spv::OperandKernelEnqueueFlags: + case spv::OperandKernelProfilingInfo: + case spv::OperandCapability: + ++word; + break; + + default: + assert(0 && "Unhandled Operand Class"); + break; + } + } + + return nextInst; + } + + // Make a pass over all the instructions and process them given appropriate functions + spirvbin_t& spirvbin_t::process(instfn_t instFn, idfn_t idFn, unsigned begin, unsigned end) + { + // For efficiency, reserve name map space. It can grow if needed. + nameMap.reserve(32); + + // If begin or end == 0, use defaults + begin = (begin == 0 ? header_size : begin); + end = (end == 0 ? unsigned(spv.size()) : end); + + // basic parsing and InstructionDesc table borrowed from SpvDisassemble.cpp... + unsigned nextInst = unsigned(spv.size()); + + for (unsigned word = begin; word < end; word = nextInst) + nextInst = processInstruction(word, instFn, idFn); + + return *this; + } + + // Apply global name mapping to a single module + void spirvbin_t::mapNames() + { + static const std::uint32_t softTypeIdLimit = 3011; // small prime. TODO: get from options + static const std::uint32_t firstMappedID = 3019; // offset into ID space + + for (const auto& name : nameMap) { + std::uint32_t hashval = 1911; + for (const char c : name.first) + hashval = hashval * 1009 + c; + + if (isOldIdUnmapped(name.second)) + localId(name.second, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); + } + } + + // Map fn contents to IDs of similar functions in other modules + void spirvbin_t::mapFnBodies() + { + static const std::uint32_t softTypeIdLimit = 19071; // small prime. TODO: get from options + static const std::uint32_t firstMappedID = 6203; // offset into ID space + + // Initial approach: go through some high priority opcodes first and assign them + // hash values. + + spv::Id fnId = spv::NoResult; + std::vector instPos; + instPos.reserve(unsigned(spv.size()) / 16); // initial estimate; can grow if needed. + + // Build local table of instruction start positions + process( + [&](spv::Op, unsigned start) { instPos.push_back(start); return true; }, + op_fn_nop); + + // Window size for context-sensitive canonicalization values + // Emperical best size from a single data set. TODO: Would be a good tunable. + // We essentially perform a little convolution around each instruction, + // to capture the flavor of nearby code, to hopefully match to similar + // code in other modules. + static const unsigned windowSize = 2; + + for (unsigned entry = 0; entry < unsigned(instPos.size()); ++entry) { + const unsigned start = instPos[entry]; + const spv::Op opCode = asOpCode(start); + + if (opCode == spv::OpFunction) + fnId = asId(start + 2); + + if (opCode == spv::OpFunctionEnd) + fnId = spv::NoResult; + + if (fnId != spv::NoResult) { // if inside a function + if (spv::InstructionDesc[opCode].hasResult()) { + const unsigned word = start + (spv::InstructionDesc[opCode].hasType() ? 2 : 1); + const spv::Id resId = asId(word); + std::uint32_t hashval = fnId * 17; // small prime + + for (unsigned i = entry-1; i >= entry-windowSize; --i) { + if (asOpCode(instPos[i]) == spv::OpFunction) + break; + hashval = hashval * 30103 + asOpCodeHash(instPos[i]); // 30103 = semiarbitrary prime + } + + for (unsigned i = entry; i <= entry + windowSize; ++i) { + if (asOpCode(instPos[i]) == spv::OpFunctionEnd) + break; + hashval = hashval * 30103 + asOpCodeHash(instPos[i]); // 30103 = semiarbitrary prime + } + + if (isOldIdUnmapped(resId)) + localId(resId, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); + } + } + } + + spv::Op thisOpCode(spv::OpNop); + std::unordered_map opCounter; + int idCounter(0); + fnId = spv::NoResult; + + process( + [&](spv::Op opCode, unsigned start) { + switch (opCode) { + case spv::OpFunction: + // Reset counters at each function + idCounter = 0; + opCounter.clear(); + fnId = asId(start + 2); + break; + + case spv::OpImageSampleImplicitLod: + case spv::OpImageSampleExplicitLod: + case spv::OpImageSampleDrefImplicitLod: + case spv::OpImageSampleDrefExplicitLod: + case spv::OpImageSampleProjImplicitLod: + case spv::OpImageSampleProjExplicitLod: + case spv::OpImageSampleProjDrefImplicitLod: + case spv::OpImageSampleProjDrefExplicitLod: + case spv::OpDot: + case spv::OpCompositeExtract: + case spv::OpCompositeInsert: + case spv::OpVectorShuffle: + case spv::OpLabel: + case spv::OpVariable: + + case spv::OpAccessChain: + case spv::OpLoad: + case spv::OpStore: + case spv::OpCompositeConstruct: + case spv::OpFunctionCall: + ++opCounter[opCode]; + idCounter = 0; + thisOpCode = opCode; + break; + default: + thisOpCode = spv::OpNop; + } + + return false; + }, + + [&](spv::Id& id) { + if (thisOpCode != spv::OpNop) { + ++idCounter; + const std::uint32_t hashval = opCounter[thisOpCode] * thisOpCode * 50047 + idCounter + fnId * 117; + + if (isOldIdUnmapped(id)) + localId(id, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); + } + }); + } + + // EXPERIMENTAL: forward IO and uniform load/stores into operands + // This produces invalid Schema-0 SPIRV + void spirvbin_t::forwardLoadStores() + { + idset_t fnLocalVars; // set of function local vars + idmap_t idMap; // Map of load result IDs to what they load + + // EXPERIMENTAL: Forward input and access chain loads into consumptions + process( + [&](spv::Op opCode, unsigned start) { + // Add inputs and uniforms to the map + if ((opCode == spv::OpVariable && asWordCount(start) == 4) && + (spv[start+3] == spv::StorageClassUniform || + spv[start+3] == spv::StorageClassUniformConstant || + spv[start+3] == spv::StorageClassInput)) + fnLocalVars.insert(asId(start+2)); + + if (opCode == spv::OpAccessChain && fnLocalVars.count(asId(start+3)) > 0) + fnLocalVars.insert(asId(start+2)); + + if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) { + idMap[asId(start+2)] = asId(start+3); + stripInst(start); + } + + return false; + }, + + [&](spv::Id& id) { if (idMap.find(id) != idMap.end()) id = idMap[id]; } + ); + + // EXPERIMENTAL: Implicit output stores + fnLocalVars.clear(); + idMap.clear(); + + process( + [&](spv::Op opCode, unsigned start) { + // Add inputs and uniforms to the map + if ((opCode == spv::OpVariable && asWordCount(start) == 4) && + (spv[start+3] == spv::StorageClassOutput)) + fnLocalVars.insert(asId(start+2)); + + if (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) { + idMap[asId(start+2)] = asId(start+1); + stripInst(start); + } + + return false; + }, + op_fn_nop); + + process( + inst_fn_nop, + [&](spv::Id& id) { if (idMap.find(id) != idMap.end()) id = idMap[id]; } + ); + + strip(); // strip out data we decided to eliminate + } + + // optimize loads and stores + void spirvbin_t::optLoadStore() + { + idset_t fnLocalVars; // candidates for removal (only locals) + idmap_t idMap; // Map of load result IDs to what they load + blockmap_t blockMap; // Map of IDs to blocks they first appear in + int blockNum = 0; // block count, to avoid crossing flow control + + // Find all the function local pointers stored at most once, and not via access chains + process( + [&](spv::Op opCode, unsigned start) { + const int wordCount = asWordCount(start); + + // Count blocks, so we can avoid crossing flow control + if (isFlowCtrl(opCode)) + ++blockNum; + + // Add local variables to the map + if ((opCode == spv::OpVariable && spv[start+3] == spv::StorageClassFunction && asWordCount(start) == 4)) { + fnLocalVars.insert(asId(start+2)); + return true; + } + + // Ignore process vars referenced via access chain + if ((opCode == spv::OpAccessChain || opCode == spv::OpInBoundsAccessChain) && fnLocalVars.count(asId(start+3)) > 0) { + fnLocalVars.erase(asId(start+3)); + idMap.erase(asId(start+3)); + return true; + } + + if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) { + const spv::Id varId = asId(start+3); + + // Avoid loads before stores + if (idMap.find(varId) == idMap.end()) { + fnLocalVars.erase(varId); + idMap.erase(varId); + } + + // don't do for volatile references + if (wordCount > 4 && (spv[start+4] & spv::MemoryAccessVolatileMask)) { + fnLocalVars.erase(varId); + idMap.erase(varId); + } + + // Handle flow control + if (blockMap.find(varId) == blockMap.end()) { + blockMap[varId] = blockNum; // track block we found it in. + } else if (blockMap[varId] != blockNum) { + fnLocalVars.erase(varId); // Ignore if crosses flow control + idMap.erase(varId); + } + + return true; + } + + if (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) { + const spv::Id varId = asId(start+1); + + if (idMap.find(varId) == idMap.end()) { + idMap[varId] = asId(start+2); + } else { + // Remove if it has more than one store to the same pointer + fnLocalVars.erase(varId); + idMap.erase(varId); + } + + // don't do for volatile references + if (wordCount > 3 && (spv[start+3] & spv::MemoryAccessVolatileMask)) { + fnLocalVars.erase(asId(start+3)); + idMap.erase(asId(start+3)); + } + + // Handle flow control + if (blockMap.find(varId) == blockMap.end()) { + blockMap[varId] = blockNum; // track block we found it in. + } else if (blockMap[varId] != blockNum) { + fnLocalVars.erase(varId); // Ignore if crosses flow control + idMap.erase(varId); + } + + return true; + } + + return false; + }, + + // If local var id used anywhere else, don't eliminate + [&](spv::Id& id) { + if (fnLocalVars.count(id) > 0) { + fnLocalVars.erase(id); + idMap.erase(id); + } + } + ); + + process( + [&](spv::Op opCode, unsigned start) { + if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) + idMap[asId(start+2)] = idMap[asId(start+3)]; + return false; + }, + op_fn_nop); + + // Chase replacements to their origins, in case there is a chain such as: + // 2 = store 1 + // 3 = load 2 + // 4 = store 3 + // 5 = load 4 + // We want to replace uses of 5 with 1. + for (const auto& idPair : idMap) { + spv::Id id = idPair.first; + while (idMap.find(id) != idMap.end()) // Chase to end of chain + id = idMap[id]; + + idMap[idPair.first] = id; // replace with final result + } + + // Remove the load/store/variables for the ones we've discovered + process( + [&](spv::Op opCode, unsigned start) { + if ((opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) || + (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) || + (opCode == spv::OpVariable && fnLocalVars.count(asId(start+2)) > 0)) { + + stripInst(start); + return true; + } + + return false; + }, + + [&](spv::Id& id) { + if (idMap.find(id) != idMap.end()) id = idMap[id]; + } + ); + + strip(); // strip out data we decided to eliminate + } + + // remove bodies of uncalled functions + void spirvbin_t::dceFuncs() + { + msg(3, 2, std::string("Removing Dead Functions: ")); + + // TODO: There are more efficient ways to do this. + bool changed = true; + + while (changed) { + changed = false; + + for (auto fn = fnPos.begin(); fn != fnPos.end(); ) { + if (fn->first == entryPoint) { // don't DCE away the entry point! + ++fn; + continue; + } + + const auto call_it = fnCalls.find(fn->first); + + if (call_it == fnCalls.end() || call_it->second == 0) { + changed = true; + stripRange.push_back(fn->second); + fnPosDCE.insert(*fn); + + // decrease counts of called functions + process( + [&](spv::Op opCode, unsigned start) { + if (opCode == spv::Op::OpFunctionCall) { + const auto call_it = fnCalls.find(asId(start + 3)); + if (call_it != fnCalls.end()) { + if (--call_it->second <= 0) + fnCalls.erase(call_it); + } + } + + return true; + }, + op_fn_nop, + fn->second.first, + fn->second.second); + + fn = fnPos.erase(fn); + } else ++fn; + } + } + } + + // remove unused function variables + decorations + void spirvbin_t::dceVars() + { + msg(3, 2, std::string("DCE Vars: ")); + + std::unordered_map varUseCount; + + // Count function variable use + process( + [&](spv::Op opCode, unsigned start) { + if (opCode == spv::OpVariable) { ++varUseCount[asId(start+2)]; return true; } + return false; + }, + + [&](spv::Id& id) { if (varUseCount[id]) ++varUseCount[id]; } + ); + + // Remove single-use function variables + associated decorations and names + process( + [&](spv::Op opCode, unsigned start) { + if ((opCode == spv::OpVariable && varUseCount[asId(start+2)] == 1) || + (opCode == spv::OpDecorate && varUseCount[asId(start+1)] == 1) || + (opCode == spv::OpName && varUseCount[asId(start+1)] == 1)) { + stripInst(start); + } + return true; + }, + op_fn_nop); + } + + // remove unused types + void spirvbin_t::dceTypes() + { + std::vector isType(bound(), false); + + // for speed, make O(1) way to get to type query (map is log(n)) + for (const auto typeStart : typeConstPos) + isType[asTypeConstId(typeStart)] = true; + + std::unordered_map typeUseCount; + + // Count total type usage + process(inst_fn_nop, + [&](spv::Id& id) { if (isType[id]) ++typeUseCount[id]; } + ); + + // Remove types from deleted code + for (const auto& fn : fnPosDCE) + process(inst_fn_nop, + [&](spv::Id& id) { if (isType[id]) --typeUseCount[id]; }, + fn.second.first, fn.second.second); + + // Remove single reference types + for (const auto typeStart : typeConstPos) { + const spv::Id typeId = asTypeConstId(typeStart); + if (typeUseCount[typeId] == 1) { + --typeUseCount[typeId]; + stripInst(typeStart); + } + } + } + + +#ifdef NOTDEF + bool spirvbin_t::matchType(const spirvbin_t::globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const + { + // Find the local type id "lt" and global type id "gt" + const auto lt_it = typeConstPosR.find(lt); + if (lt_it == typeConstPosR.end()) + return false; + + const auto typeStart = lt_it->second; + + // Search for entry in global table + const auto gtype = globalTypes.find(gt); + if (gtype == globalTypes.end()) + return false; + + const auto& gdata = gtype->second; + + // local wordcount and opcode + const int wordCount = asWordCount(typeStart); + const spv::Op opCode = asOpCode(typeStart); + + // no type match if opcodes don't match, or operand count doesn't match + if (opCode != opOpCode(gdata[0]) || wordCount != opWordCount(gdata[0])) + return false; + + const unsigned numOperands = wordCount - 2; // all types have a result + + const auto cmpIdRange = [&](range_t range) { + for (int x=range.first; xsecond; + } + + // Hash types to canonical values. This can return ID collisions (it's a bit + // inevitable): it's up to the caller to handle that gracefully. + std::uint32_t spirvbin_t::hashType(unsigned typeStart) const + { + const unsigned wordCount = asWordCount(typeStart); + const spv::Op opCode = asOpCode(typeStart); + + switch (opCode) { + case spv::OpTypeVoid: return 0; + case spv::OpTypeBool: return 1; + case spv::OpTypeInt: return 3 + (spv[typeStart+3]); + case spv::OpTypeFloat: return 5; + case spv::OpTypeVector: + return 6 + hashType(typePos(spv[typeStart+2])) * (spv[typeStart+3] - 1); + case spv::OpTypeMatrix: + return 30 + hashType(typePos(spv[typeStart+2])) * (spv[typeStart+3] - 1); + case spv::OpTypeImage: + return 120 + hashType(typePos(spv[typeStart+2])) + + spv[typeStart+3] + // dimensionality + spv[typeStart+4] * 8 * 16 + // depth + spv[typeStart+5] * 4 * 16 + // arrayed + spv[typeStart+6] * 2 * 16 + // multisampled + spv[typeStart+7] * 1 * 16; // format + case spv::OpTypeSampler: + return 500; + case spv::OpTypeSampledImage: + return 502; + case spv::OpTypeArray: + return 501 + hashType(typePos(spv[typeStart+2])) * spv[typeStart+3]; + case spv::OpTypeRuntimeArray: + return 5000 + hashType(typePos(spv[typeStart+2])); + case spv::OpTypeStruct: + { + std::uint32_t hash = 10000; + for (unsigned w=2; w < wordCount; ++w) + hash += w * hashType(typePos(spv[typeStart+w])); + return hash; + } + + case spv::OpTypeOpaque: return 6000 + spv[typeStart+2]; + case spv::OpTypePointer: return 100000 + hashType(typePos(spv[typeStart+3])); + case spv::OpTypeFunction: + { + std::uint32_t hash = 200000; + for (unsigned w=2; w < wordCount; ++w) + hash += w * hashType(typePos(spv[typeStart+w])); + return hash; + } + + case spv::OpTypeEvent: return 300000; + case spv::OpTypeDeviceEvent: return 300001; + case spv::OpTypeReserveId: return 300002; + case spv::OpTypeQueue: return 300003; + case spv::OpTypePipe: return 300004; + + case spv::OpConstantNull: return 300005; + case spv::OpConstantSampler: return 300006; + + case spv::OpConstantTrue: return 300007; + case spv::OpConstantFalse: return 300008; + case spv::OpConstantComposite: + { + std::uint32_t hash = 300011 + hashType(typePos(spv[typeStart+1])); + for (unsigned w=3; w < wordCount; ++w) + hash += w * hashType(typePos(spv[typeStart+w])); + return hash; + } + case spv::OpConstant: + { + std::uint32_t hash = 400011 + hashType(typePos(spv[typeStart+1])); + for (unsigned w=3; w < wordCount; ++w) + hash += w * spv[typeStart+w]; + return hash; + } + + default: + error("unknown type opcode"); + return 0; + } + } + + void spirvbin_t::mapTypeConst() + { + globaltypes_t globalTypeMap; + + msg(3, 2, std::string("Remapping Consts & Types: ")); + + static const std::uint32_t softTypeIdLimit = 3011; // small prime. TODO: get from options + static const std::uint32_t firstMappedID = 8; // offset into ID space + + for (auto& typeStart : typeConstPos) { + const spv::Id resId = asTypeConstId(typeStart); + const std::uint32_t hashval = hashType(typeStart); + + if (isOldIdUnmapped(resId)) + localId(resId, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); + } + } + + + // Strip a single binary by removing ranges given in stripRange + void spirvbin_t::strip() + { + if (stripRange.empty()) // nothing to do + return; + + // Sort strip ranges in order of traversal + std::sort(stripRange.begin(), stripRange.end()); + + // Allocate a new binary big enough to hold old binary + // We'll step this iterator through the strip ranges as we go through the binary + auto strip_it = stripRange.begin(); + + int strippedPos = 0; + for (unsigned word = 0; word < unsigned(spv.size()); ++word) { + if (strip_it != stripRange.end() && word >= strip_it->second) + ++strip_it; + + if (strip_it == stripRange.end() || word < strip_it->first || word >= strip_it->second) + spv[strippedPos++] = spv[word]; + } + + spv.resize(strippedPos); + stripRange.clear(); + + buildLocalMaps(); + } + + // Strip a single binary by removing ranges given in stripRange + void spirvbin_t::remap(std::uint32_t opts) + { + options = opts; + + // Set up opcode tables from SpvDoc + spv::Parameterize(); + + validate(); // validate header + buildLocalMaps(); + + msg(3, 4, std::string("ID bound: ") + std::to_string(bound())); + + strip(); // strip out data we decided to eliminate + + if (options & OPT_LOADSTORE) optLoadStore(); + if (options & OPT_FWD_LS) forwardLoadStores(); + if (options & DCE_FUNCS) dceFuncs(); + if (options & DCE_VARS) dceVars(); + if (options & DCE_TYPES) dceTypes(); + if (options & MAP_TYPES) mapTypeConst(); + if (options & MAP_NAMES) mapNames(); + if (options & MAP_FUNCS) mapFnBodies(); + + mapRemainder(); // map any unmapped IDs + applyMap(); // Now remap each shader to the new IDs we've come up with + strip(); // strip out data we decided to eliminate + } + + // remap from a memory image + void spirvbin_t::remap(std::vector& in_spv, std::uint32_t opts) + { + spv.swap(in_spv); + remap(opts); + spv.swap(in_spv); + } + +} // namespace SPV + +#endif // defined (use_cpp11) + diff --git a/third_party/glslang-spirv/SPVRemapper.h b/third_party/glslang-spirv/SPVRemapper.h new file mode 100644 index 000000000..e5e8e1bde --- /dev/null +++ b/third_party/glslang-spirv/SPVRemapper.h @@ -0,0 +1,288 @@ +// +//Copyright (C) 2015 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. +// + +#ifndef SPIRVREMAPPER_H +#define SPIRVREMAPPER_H + +#include +#include +#include + +namespace spv { + +// MSVC defines __cplusplus as an older value, even when it supports almost all of 11. +// We handle that here by making our own symbol. +#if __cplusplus >= 201103L || _MSC_VER >= 1700 +# define use_cpp11 1 +#endif + +class spirvbin_base_t +{ +public: + enum Options { + NONE = 0, + STRIP = (1<<0), + MAP_TYPES = (1<<1), + MAP_NAMES = (1<<2), + MAP_FUNCS = (1<<3), + DCE_FUNCS = (1<<4), + DCE_VARS = (1<<5), + DCE_TYPES = (1<<6), + OPT_LOADSTORE = (1<<7), + OPT_FWD_LS = (1<<8), // EXPERIMENTAL: PRODUCES INVALID SCHEMA-0 SPIRV + MAP_ALL = (MAP_TYPES | MAP_NAMES | MAP_FUNCS), + DCE_ALL = (DCE_FUNCS | DCE_VARS | DCE_TYPES), + OPT_ALL = (OPT_LOADSTORE), + + ALL_BUT_STRIP = (MAP_ALL | DCE_ALL | OPT_ALL), + DO_EVERYTHING = (STRIP | ALL_BUT_STRIP) + }; +}; + +} // namespace SPV + +#if !defined (use_cpp11) +#include + +namespace spv { +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int /*verbose = 0*/) { } + + void remap(std::vector& /*spv*/, unsigned int /*opts = 0*/) + { + printf("Tool not compiled for C++11, which is required for SPIR-V remapping.\n"); + exit(5); + } +}; + +} // namespace SPV + +#else // defined (use_cpp11) + +#include +#include +#include +#include +#include +#include +#include + +#include "spirv.hpp" +#include "spvIR.h" + +namespace spv { + +// class to hold SPIR-V binary data for remapping, DCE, and debug stripping +class spirvbin_t : public spirvbin_base_t +{ +public: + spirvbin_t(int verbose = 0) : entryPoint(spv::NoResult), largestNewId(0), verbose(verbose) { } + + // remap on an existing binary in memory + void remap(std::vector& spv, std::uint32_t opts = DO_EVERYTHING); + + // Type for error/log handler functions + typedef std::function errorfn_t; + typedef std::function logfn_t; + + // Register error/log handling functions (can be lambda fn / functor / etc) + static void registerErrorHandler(errorfn_t handler) { errorHandler = handler; } + static void registerLogHandler(logfn_t handler) { logHandler = handler; } + +protected: + // This can be overridden to provide other message behavior if needed + virtual void msg(int minVerbosity, int indent, const std::string& txt) const; + +private: + // Local to global, or global to local ID map + typedef std::unordered_map idmap_t; + typedef std::unordered_set idset_t; + typedef std::unordered_map blockmap_t; + + void remap(std::uint32_t opts = DO_EVERYTHING); + + // Map of names to IDs + typedef std::unordered_map namemap_t; + + typedef std::uint32_t spirword_t; + + typedef std::pair range_t; + typedef std::function idfn_t; + typedef std::function instfn_t; + + // Special Values for ID map: + static const spv::Id unmapped; // unchanged from default value + static const spv::Id unused; // unused ID + static const int header_size; // SPIR header = 5 words + + class id_iterator_t; + + // For mapping type entries between different shaders + typedef std::vector typeentry_t; + typedef std::map globaltypes_t; + + // A set that preserves position order, and a reverse map + typedef std::set posmap_t; + typedef std::unordered_map posmap_rev_t; + + // handle error + void error(const std::string& txt) const { errorHandler(txt); } + + bool isConstOp(spv::Op opCode) const; + bool isTypeOp(spv::Op opCode) const; + bool isStripOp(spv::Op opCode) const; + bool isFlowCtrl(spv::Op opCode) const; + range_t literalRange(spv::Op opCode) const; + range_t typeRange(spv::Op opCode) const; + range_t constRange(spv::Op opCode) const; + + spv::Id& asId(unsigned word) { return spv[word]; } + const spv::Id& asId(unsigned word) const { return spv[word]; } + spv::Op asOpCode(unsigned word) const { return opOpCode(spv[word]); } + std::uint32_t asOpCodeHash(unsigned word); + spv::Decoration asDecoration(unsigned word) const { return spv::Decoration(spv[word]); } + unsigned asWordCount(unsigned word) const { return opWordCount(spv[word]); } + spv::Id asTypeConstId(unsigned word) const { return asId(word + (isTypeOp(asOpCode(word)) ? 1 : 2)); } + unsigned typePos(spv::Id id) const; + + static unsigned opWordCount(spirword_t data) { return data >> spv::WordCountShift; } + static spv::Op opOpCode(spirword_t data) { return spv::Op(data & spv::OpCodeMask); } + + // Header access & set methods + spirword_t magic() const { return spv[0]; } // return magic number + spirword_t bound() const { return spv[3]; } // return Id bound from header + spirword_t bound(spirword_t b) { return spv[3] = b; }; + spirword_t genmagic() const { return spv[2]; } // generator magic + spirword_t genmagic(spirword_t m) { return spv[2] = m; } + spirword_t schemaNum() const { return spv[4]; } // schema number from header + + // Mapping fns: get + spv::Id localId(spv::Id id) const { return idMapL[id]; } + + // Mapping fns: set + inline spv::Id localId(spv::Id id, spv::Id newId); + void countIds(spv::Id id); + + // Return next unused new local ID. + // NOTE: boost::dynamic_bitset would be more efficient due to find_next(), + // which std::vector doens't have. + inline spv::Id nextUnusedId(spv::Id id); + + void buildLocalMaps(); + std::string literalString(unsigned word) const; // Return literal as a std::string + int literalStringWords(const std::string& str) const { return (int(str.size())+4)/4; } + + bool isNewIdMapped(spv::Id newId) const { return isMapped(newId); } + bool isOldIdUnmapped(spv::Id oldId) const { return localId(oldId) == unmapped; } + bool isOldIdUnused(spv::Id oldId) const { return localId(oldId) == unused; } + bool isOldIdMapped(spv::Id oldId) const { return !isOldIdUnused(oldId) && !isOldIdUnmapped(oldId); } + bool isFunction(spv::Id oldId) const { return fnPos.find(oldId) != fnPos.end(); } + + // bool matchType(const globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const; + // spv::Id findType(const globaltypes_t& globalTypes, spv::Id lt) const; + std::uint32_t hashType(unsigned typeStart) const; + + spirvbin_t& process(instfn_t, idfn_t, unsigned begin = 0, unsigned end = 0); + int processInstruction(unsigned word, instfn_t, idfn_t); + + void validate() const; + void mapTypeConst(); + void mapFnBodies(); + void optLoadStore(); + void dceFuncs(); + void dceVars(); + void dceTypes(); + void mapNames(); + void foldIds(); // fold IDs to smallest space + void forwardLoadStores(); // load store forwarding (EXPERIMENTAL) + void offsetIds(); // create relative offset IDs + + void applyMap(); // remap per local name map + void mapRemainder(); // map any IDs we haven't touched yet + void stripDebug(); // strip debug info + void strip(); // remove debug symbols + + std::vector spv; // SPIR words + + namemap_t nameMap; // ID names from OpName + + // Since we want to also do binary ops, we can't use std::vector. we could use + // boost::dynamic_bitset, but we're trying to avoid a boost dependency. + typedef std::uint64_t bits_t; + std::vector mapped; // which new IDs have been mapped + static const int mBits = sizeof(bits_t) * 4; + + bool isMapped(spv::Id id) const { return id < maxMappedId() && ((mapped[id/mBits] & (1LL<<(id%mBits))) != 0); } + void setMapped(spv::Id id) { resizeMapped(id); mapped[id/mBits] |= (1LL<<(id%mBits)); } + void resizeMapped(spv::Id id) { if (id >= maxMappedId()) mapped.resize(id/mBits+1, 0); } + size_t maxMappedId() const { return mapped.size() * mBits; } + + // Add a strip range for a given instruction starting at 'start' + // Note: avoiding brace initializers to please older versions os MSVC. + void stripInst(unsigned start) { stripRange.push_back(range_t(start, start + asWordCount(start))); } + + // Function start and end. use unordered_map because we'll have + // many fewer functions than IDs. + std::unordered_map fnPos; + std::unordered_map fnPosDCE; // deleted functions + + // Which functions are called, anywhere in the module, with a call count + std::unordered_map fnCalls; + + posmap_t typeConstPos; // word positions that define types & consts (ordered) + posmap_rev_t typeConstPosR; // reverse map from IDs to positions + + std::vector idMapL; // ID {M}ap from {L}ocal to {G}lobal IDs + + spv::Id entryPoint; // module entry point + spv::Id largestNewId; // biggest new ID we have mapped anything to + + // Sections of the binary to strip, given as [begin,end) + std::vector stripRange; + + // processing options: + std::uint32_t options; + int verbose; // verbosity level + + static errorfn_t errorHandler; + static logfn_t logHandler; +}; + +} // namespace SPV + +#endif // defined (use_cpp11) +#endif // SPIRVREMAPPER_H diff --git a/third_party/glslang-spirv/SpvBuilder.cpp b/third_party/glslang-spirv/SpvBuilder.cpp new file mode 100644 index 000000000..0a2fa2139 --- /dev/null +++ b/third_party/glslang-spirv/SpvBuilder.cpp @@ -0,0 +1,2344 @@ +// +//Copyright (C) 2014-2015 LunarG, Inc. +//Copyright (C) 2015-2016 Google, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// + +// +// Helper for making SPIR-V IR. Generally, this is documented in the header +// SpvBuilder.h. +// + +#include +#include +#include + +#include + +#include "SpvBuilder.h" + +#ifndef _WIN32 + #include +#endif + +namespace spv { + +Builder::Builder(unsigned int magicNumber) : + source(SourceLanguageUnknown), + sourceVersion(0), + addressModel(AddressingModelLogical), + memoryModel(MemoryModelGLSL450), + builderNumber(magicNumber), + buildPoint(0), + uniqueId(0), + mainFunction(0) +{ + clearAccessChain(); +} + +Builder::~Builder() +{ +} + +Id Builder::import(const char* name) +{ + Instruction* import = new Instruction(getUniqueId(), NoType, OpExtInstImport); + import->addStringOperand(name); + + imports.push_back(std::unique_ptr(import)); + return import->getResultId(); +} + +// For creating new groupedTypes (will return old type if the requested one was already made). +Id Builder::makeVoidType() +{ + Instruction* type; + if (groupedTypes[OpTypeVoid].size() == 0) { + type = new Instruction(getUniqueId(), NoType, OpTypeVoid); + groupedTypes[OpTypeVoid].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + } else + type = groupedTypes[OpTypeVoid].back(); + + return type->getResultId(); +} + +Id Builder::makeBoolType() +{ + Instruction* type; + if (groupedTypes[OpTypeBool].size() == 0) { + type = new Instruction(getUniqueId(), NoType, OpTypeBool); + groupedTypes[OpTypeBool].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + } else + type = groupedTypes[OpTypeBool].back(); + + return type->getResultId(); +} + +Id Builder::makeSamplerType() +{ + Instruction* type; + if (groupedTypes[OpTypeSampler].size() == 0) { + type = new Instruction(getUniqueId(), NoType, OpTypeSampler); + groupedTypes[OpTypeSampler].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + } else + type = groupedTypes[OpTypeSampler].back(); + + return type->getResultId(); +} + +Id Builder::makePointer(StorageClass storageClass, Id pointee) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) { + type = groupedTypes[OpTypePointer][t]; + if (type->getImmediateOperand(0) == (unsigned)storageClass && + type->getIdOperand(1) == pointee) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypePointer); + type->addImmediateOperand(storageClass); + type->addIdOperand(pointee); + groupedTypes[OpTypePointer].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + return type->getResultId(); +} + +Id Builder::makeIntegerType(int width, bool hasSign) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeInt].size(); ++t) { + type = groupedTypes[OpTypeInt][t]; + if (type->getImmediateOperand(0) == (unsigned)width && + type->getImmediateOperand(1) == (hasSign ? 1u : 0u)) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeInt); + type->addImmediateOperand(width); + type->addImmediateOperand(hasSign ? 1 : 0); + groupedTypes[OpTypeInt].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + // deal with capabilities + switch (width) { + case 16: + addCapability(CapabilityInt16); + break; + case 64: + addCapability(CapabilityInt64); + break; + default: + break; + } + + return type->getResultId(); +} + +Id Builder::makeFloatType(int width) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeFloat].size(); ++t) { + type = groupedTypes[OpTypeFloat][t]; + if (type->getImmediateOperand(0) == (unsigned)width) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeFloat); + type->addImmediateOperand(width); + groupedTypes[OpTypeFloat].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + // deal with capabilities + switch (width) { + case 16: + addCapability(CapabilityFloat16); + break; + case 64: + addCapability(CapabilityFloat64); + break; + default: + break; + } + + return type->getResultId(); +} + +// Make a struct without checking for duplication. +// See makeStructResultType() for non-decorated structs +// needed as the result of some instructions, which does +// check for duplicates. +Id Builder::makeStructType(const std::vector& members, const char* name) +{ + // Don't look for previous one, because in the general case, + // structs can be duplicated except for decorations. + + // not found, make it + Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeStruct); + for (int op = 0; op < (int)members.size(); ++op) + type->addIdOperand(members[op]); + groupedTypes[OpTypeStruct].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + addName(type->getResultId(), name); + + return type->getResultId(); +} + +// Make a struct for the simple results of several instructions, +// checking for duplication. +Id Builder::makeStructResultType(Id type0, Id type1) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeStruct].size(); ++t) { + type = groupedTypes[OpTypeStruct][t]; + if (type->getNumOperands() != 2) + continue; + if (type->getIdOperand(0) != type0 || + type->getIdOperand(1) != type1) + continue; + return type->getResultId(); + } + + // not found, make it + std::vector members; + members.push_back(type0); + members.push_back(type1); + + return makeStructType(members, "ResType"); +} + +Id Builder::makeVectorType(Id component, int size) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeVector].size(); ++t) { + type = groupedTypes[OpTypeVector][t]; + if (type->getIdOperand(0) == component && + type->getImmediateOperand(1) == (unsigned)size) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeVector); + type->addIdOperand(component); + type->addImmediateOperand(size); + groupedTypes[OpTypeVector].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + return type->getResultId(); +} + +Id Builder::makeMatrixType(Id component, int cols, int rows) +{ + assert(cols <= maxMatrixSize && rows <= maxMatrixSize); + + Id column = makeVectorType(component, rows); + + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeMatrix].size(); ++t) { + type = groupedTypes[OpTypeMatrix][t]; + if (type->getIdOperand(0) == column && + type->getImmediateOperand(1) == (unsigned)cols) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeMatrix); + type->addIdOperand(column); + type->addImmediateOperand(cols); + groupedTypes[OpTypeMatrix].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + return type->getResultId(); +} + +// TODO: performance: track arrays per stride +// If a stride is supplied (non-zero) make an array. +// If no stride (0), reuse previous array types. +// 'size' is an Id of a constant or specialization constant of the array size +Id Builder::makeArrayType(Id element, Id sizeId, int stride) +{ + Instruction* type; + if (stride == 0) { + // try to find existing type + for (int t = 0; t < (int)groupedTypes[OpTypeArray].size(); ++t) { + type = groupedTypes[OpTypeArray][t]; + if (type->getIdOperand(0) == element && + type->getIdOperand(1) == sizeId) + return type->getResultId(); + } + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeArray); + type->addIdOperand(element); + type->addIdOperand(sizeId); + groupedTypes[OpTypeArray].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + return type->getResultId(); +} + +Id Builder::makeRuntimeArray(Id element) +{ + Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeRuntimeArray); + type->addIdOperand(element); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + return type->getResultId(); +} + +Id Builder::makeFunctionType(Id returnType, const std::vector& paramTypes) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeFunction].size(); ++t) { + type = groupedTypes[OpTypeFunction][t]; + if (type->getIdOperand(0) != returnType || (int)paramTypes.size() != type->getNumOperands() - 1) + continue; + bool mismatch = false; + for (int p = 0; p < (int)paramTypes.size(); ++p) { + if (paramTypes[p] != type->getIdOperand(p + 1)) { + mismatch = true; + break; + } + } + if (! mismatch) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeFunction); + type->addIdOperand(returnType); + for (int p = 0; p < (int)paramTypes.size(); ++p) + type->addIdOperand(paramTypes[p]); + groupedTypes[OpTypeFunction].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + return type->getResultId(); +} + +Id Builder::makeImageType(Id sampledType, Dim dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeImage].size(); ++t) { + type = groupedTypes[OpTypeImage][t]; + if (type->getIdOperand(0) == sampledType && + type->getImmediateOperand(1) == (unsigned int)dim && + type->getImmediateOperand(2) == ( depth ? 1u : 0u) && + type->getImmediateOperand(3) == (arrayed ? 1u : 0u) && + type->getImmediateOperand(4) == ( ms ? 1u : 0u) && + type->getImmediateOperand(5) == sampled && + type->getImmediateOperand(6) == (unsigned int)format) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeImage); + type->addIdOperand(sampledType); + type->addImmediateOperand( dim); + type->addImmediateOperand( depth ? 1 : 0); + type->addImmediateOperand(arrayed ? 1 : 0); + type->addImmediateOperand( ms ? 1 : 0); + type->addImmediateOperand(sampled); + type->addImmediateOperand((unsigned int)format); + + groupedTypes[OpTypeImage].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + // deal with capabilities + switch (dim) { + case DimBuffer: + if (sampled) + addCapability(CapabilitySampledBuffer); + else + addCapability(CapabilityImageBuffer); + break; + case Dim1D: + if (sampled) + addCapability(CapabilitySampled1D); + else + addCapability(CapabilityImage1D); + break; + case DimCube: + if (arrayed) { + if (sampled) + addCapability(CapabilitySampledCubeArray); + else + addCapability(CapabilityImageCubeArray); + } + break; + case DimRect: + if (sampled) + addCapability(CapabilitySampledRect); + else + addCapability(CapabilityImageRect); + break; + case DimSubpassData: + addCapability(CapabilityInputAttachment); + break; + default: + break; + } + + if (ms) { + if (arrayed) + addCapability(CapabilityImageMSArray); + if (! sampled) + addCapability(CapabilityStorageImageMultisample); + } + + return type->getResultId(); +} + +Id Builder::makeSampledImageType(Id imageType) +{ + // try to find it + Instruction* type; + for (int t = 0; t < (int)groupedTypes[OpTypeSampledImage].size(); ++t) { + type = groupedTypes[OpTypeSampledImage][t]; + if (type->getIdOperand(0) == imageType) + return type->getResultId(); + } + + // not found, make it + type = new Instruction(getUniqueId(), NoType, OpTypeSampledImage); + type->addIdOperand(imageType); + + groupedTypes[OpTypeSampledImage].push_back(type); + constantsTypesGlobals.push_back(std::unique_ptr(type)); + module.mapInstruction(type); + + return type->getResultId(); +} + +Id Builder::getDerefTypeId(Id resultId) const +{ + Id typeId = getTypeId(resultId); + assert(isPointerType(typeId)); + + return module.getInstruction(typeId)->getImmediateOperand(1); +} + +Op Builder::getMostBasicTypeClass(Id typeId) const +{ + Instruction* instr = module.getInstruction(typeId); + + Op typeClass = instr->getOpCode(); + switch (typeClass) + { + case OpTypeVoid: + case OpTypeBool: + case OpTypeInt: + case OpTypeFloat: + case OpTypeStruct: + return typeClass; + case OpTypeVector: + case OpTypeMatrix: + case OpTypeArray: + case OpTypeRuntimeArray: + return getMostBasicTypeClass(instr->getIdOperand(0)); + case OpTypePointer: + return getMostBasicTypeClass(instr->getIdOperand(1)); + default: + assert(0); + return OpTypeFloat; + } +} + +int Builder::getNumTypeConstituents(Id typeId) const +{ + Instruction* instr = module.getInstruction(typeId); + + switch (instr->getOpCode()) + { + case OpTypeBool: + case OpTypeInt: + case OpTypeFloat: + return 1; + case OpTypeVector: + case OpTypeMatrix: + return instr->getImmediateOperand(1); + case OpTypeArray: + { + Id lengthId = instr->getImmediateOperand(1); + return module.getInstruction(lengthId)->getImmediateOperand(0); + } + case OpTypeStruct: + return instr->getNumOperands(); + default: + assert(0); + return 1; + } +} + +// Return the lowest-level type of scalar that an homogeneous composite is made out of. +// Typically, this is just to find out if something is made out of ints or floats. +// However, it includes returning a structure, if say, it is an array of structure. +Id Builder::getScalarTypeId(Id typeId) const +{ + Instruction* instr = module.getInstruction(typeId); + + Op typeClass = instr->getOpCode(); + switch (typeClass) + { + case OpTypeVoid: + case OpTypeBool: + case OpTypeInt: + case OpTypeFloat: + case OpTypeStruct: + return instr->getResultId(); + case OpTypeVector: + case OpTypeMatrix: + case OpTypeArray: + case OpTypeRuntimeArray: + case OpTypePointer: + return getScalarTypeId(getContainedTypeId(typeId)); + default: + assert(0); + return NoResult; + } +} + +// Return the type of 'member' of a composite. +Id Builder::getContainedTypeId(Id typeId, int member) const +{ + Instruction* instr = module.getInstruction(typeId); + + Op typeClass = instr->getOpCode(); + switch (typeClass) + { + case OpTypeVector: + case OpTypeMatrix: + case OpTypeArray: + case OpTypeRuntimeArray: + return instr->getIdOperand(0); + case OpTypePointer: + return instr->getIdOperand(1); + case OpTypeStruct: + return instr->getIdOperand(member); + default: + assert(0); + return NoResult; + } +} + +// Return the immediately contained type of a given composite type. +Id Builder::getContainedTypeId(Id typeId) const +{ + return getContainedTypeId(typeId, 0); +} + +// See if a scalar constant of this type has already been created, so it +// can be reused rather than duplicated. (Required by the specification). +Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const +{ + Instruction* constant; + for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { + constant = groupedConstants[typeClass][i]; + if (constant->getOpCode() == opcode && + constant->getTypeId() == typeId && + constant->getImmediateOperand(0) == value) + return constant->getResultId(); + } + + return 0; +} + +// Version of findScalarConstant (see above) for scalars that take two operands (e.g. a 'double'). +Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const +{ + Instruction* constant; + for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { + constant = groupedConstants[typeClass][i]; + if (constant->getOpCode() == opcode && + constant->getTypeId() == typeId && + constant->getImmediateOperand(0) == v1 && + constant->getImmediateOperand(1) == v2) + return constant->getResultId(); + } + + return 0; +} + +// Return true if consuming 'opcode' means consuming a constant. +// "constant" here means after final transform to executable code, +// the value consumed will be a constant, so includes specialization. +bool Builder::isConstantOpCode(Op opcode) const +{ + switch (opcode) { + case OpUndef: + case OpConstantTrue: + case OpConstantFalse: + case OpConstant: + case OpConstantComposite: + case OpConstantSampler: + case OpConstantNull: + case OpSpecConstantTrue: + case OpSpecConstantFalse: + case OpSpecConstant: + case OpSpecConstantComposite: + case OpSpecConstantOp: + return true; + default: + return false; + } +} + +Id Builder::makeBoolConstant(bool b, bool specConstant) +{ + Id typeId = makeBoolType(); + Instruction* constant; + Op opcode = specConstant ? (b ? OpSpecConstantTrue : OpSpecConstantFalse) : (b ? OpConstantTrue : OpConstantFalse); + + // See if we already made it. Applies only to regular constants, because specialization constants + // must remain distinct for the purpose of applying a SpecId decoration. + if (! specConstant) { + Id existing = 0; + for (int i = 0; i < (int)groupedConstants[OpTypeBool].size(); ++i) { + constant = groupedConstants[OpTypeBool][i]; + if (constant->getTypeId() == typeId && constant->getOpCode() == opcode) + existing = constant->getResultId(); + } + + if (existing) + return existing; + } + + // Make it + Instruction* c = new Instruction(getUniqueId(), typeId, opcode); + constantsTypesGlobals.push_back(std::unique_ptr(c)); + groupedConstants[OpTypeBool].push_back(c); + module.mapInstruction(c); + + return c->getResultId(); +} + +Id Builder::makeIntConstant(Id typeId, unsigned value, bool specConstant) +{ + Op opcode = specConstant ? OpSpecConstant : OpConstant; + + // See if we already made it. Applies only to regular constants, because specialization constants + // must remain distinct for the purpose of applying a SpecId decoration. + if (! specConstant) { + Id existing = findScalarConstant(OpTypeInt, opcode, typeId, value); + if (existing) + return existing; + } + + Instruction* c = new Instruction(getUniqueId(), typeId, opcode); + c->addImmediateOperand(value); + constantsTypesGlobals.push_back(std::unique_ptr(c)); + groupedConstants[OpTypeInt].push_back(c); + module.mapInstruction(c); + + return c->getResultId(); +} + +Id Builder::makeFloatConstant(float f, bool specConstant) +{ + Op opcode = specConstant ? OpSpecConstant : OpConstant; + Id typeId = makeFloatType(32); + unsigned value = *(unsigned int*)&f; + + // See if we already made it. Applies only to regular constants, because specialization constants + // must remain distinct for the purpose of applying a SpecId decoration. + if (! specConstant) { + Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value); + if (existing) + return existing; + } + + Instruction* c = new Instruction(getUniqueId(), typeId, opcode); + c->addImmediateOperand(value); + constantsTypesGlobals.push_back(std::unique_ptr(c)); + groupedConstants[OpTypeFloat].push_back(c); + module.mapInstruction(c); + + return c->getResultId(); +} + +Id Builder::makeDoubleConstant(double d, bool specConstant) +{ + Op opcode = specConstant ? OpSpecConstant : OpConstant; + Id typeId = makeFloatType(64); + unsigned long long value = *(unsigned long long*)&d; + unsigned op1 = value & 0xFFFFFFFF; + unsigned op2 = value >> 32; + + // See if we already made it. Applies only to regular constants, because specialization constants + // must remain distinct for the purpose of applying a SpecId decoration. + if (! specConstant) { + Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, op1, op2); + if (existing) + return existing; + } + + Instruction* c = new Instruction(getUniqueId(), typeId, opcode); + c->addImmediateOperand(op1); + c->addImmediateOperand(op2); + constantsTypesGlobals.push_back(std::unique_ptr(c)); + groupedConstants[OpTypeFloat].push_back(c); + module.mapInstruction(c); + + return c->getResultId(); +} + +Id Builder::findCompositeConstant(Op typeClass, std::vector& comps) const +{ + Instruction* constant = 0; + bool found = false; + for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { + constant = groupedConstants[typeClass][i]; + + // same shape? + if (constant->getNumOperands() != (int)comps.size()) + continue; + + // same contents? + bool mismatch = false; + for (int op = 0; op < constant->getNumOperands(); ++op) { + if (constant->getIdOperand(op) != comps[op]) { + mismatch = true; + break; + } + } + if (! mismatch) { + found = true; + break; + } + } + + return found ? constant->getResultId() : NoResult; +} + +// Comments in header +Id Builder::makeCompositeConstant(Id typeId, std::vector& members, bool specConstant) +{ + Op opcode = specConstant ? OpSpecConstantComposite : OpConstantComposite; + assert(typeId); + Op typeClass = getTypeClass(typeId); + + switch (typeClass) { + case OpTypeVector: + case OpTypeArray: + case OpTypeStruct: + case OpTypeMatrix: + break; + default: + assert(0); + return makeFloatConstant(0.0); + } + + if (! specConstant) { + Id existing = findCompositeConstant(typeClass, members); + if (existing) + return existing; + } + + Instruction* c = new Instruction(getUniqueId(), typeId, opcode); + for (int op = 0; op < (int)members.size(); ++op) + c->addIdOperand(members[op]); + constantsTypesGlobals.push_back(std::unique_ptr(c)); + groupedConstants[typeClass].push_back(c); + module.mapInstruction(c); + + return c->getResultId(); +} + +Instruction* Builder::addEntryPoint(ExecutionModel model, Function* function, const char* name) +{ + Instruction* entryPoint = new Instruction(OpEntryPoint); + entryPoint->addImmediateOperand(model); + entryPoint->addIdOperand(function->getId()); + entryPoint->addStringOperand(name); + + entryPoints.push_back(std::unique_ptr(entryPoint)); + + return entryPoint; +} + +// Currently relying on the fact that all 'value' of interest are small non-negative values. +void Builder::addExecutionMode(Function* entryPoint, ExecutionMode mode, int value1, int value2, int value3) +{ + Instruction* instr = new Instruction(OpExecutionMode); + instr->addIdOperand(entryPoint->getId()); + instr->addImmediateOperand(mode); + if (value1 >= 0) + instr->addImmediateOperand(value1); + if (value2 >= 0) + instr->addImmediateOperand(value2); + if (value3 >= 0) + instr->addImmediateOperand(value3); + + executionModes.push_back(std::unique_ptr(instr)); +} + +void Builder::addName(Id id, const char* string) +{ + Instruction* name = new Instruction(OpName); + name->addIdOperand(id); + name->addStringOperand(string); + + names.push_back(std::unique_ptr(name)); +} + +void Builder::addMemberName(Id id, int memberNumber, const char* string) +{ + Instruction* name = new Instruction(OpMemberName); + name->addIdOperand(id); + name->addImmediateOperand(memberNumber); + name->addStringOperand(string); + + names.push_back(std::unique_ptr(name)); +} + +void Builder::addLine(Id target, Id fileName, int lineNum, int column) +{ + Instruction* line = new Instruction(OpLine); + line->addIdOperand(target); + line->addIdOperand(fileName); + line->addImmediateOperand(lineNum); + line->addImmediateOperand(column); + + lines.push_back(std::unique_ptr(line)); +} + +void Builder::addDecoration(Id id, Decoration decoration, int num) +{ + if (decoration == (spv::Decoration)spv::BadValue) + return; + Instruction* dec = new Instruction(OpDecorate); + dec->addIdOperand(id); + dec->addImmediateOperand(decoration); + if (num >= 0) + dec->addImmediateOperand(num); + + decorations.push_back(std::unique_ptr(dec)); +} + +void Builder::addMemberDecoration(Id id, unsigned int member, Decoration decoration, int num) +{ + Instruction* dec = new Instruction(OpMemberDecorate); + dec->addIdOperand(id); + dec->addImmediateOperand(member); + dec->addImmediateOperand(decoration); + if (num >= 0) + dec->addImmediateOperand(num); + + decorations.push_back(std::unique_ptr(dec)); +} + +// Comments in header +Function* Builder::makeMain() +{ + assert(! mainFunction); + + Block* entry; + std::vector params; + std::vector precisions; + + mainFunction = makeFunctionEntry(NoPrecision, makeVoidType(), "main", params, precisions, &entry); + + return mainFunction; +} + +// Comments in header +Function* Builder::makeFunctionEntry(Decoration precision, Id returnType, const char* name, + const std::vector& paramTypes, const std::vector& precisions, Block **entry) +{ + // Make the function and initial instructions in it + Id typeId = makeFunctionType(returnType, paramTypes); + Id firstParamId = paramTypes.size() == 0 ? 0 : getUniqueIds((int)paramTypes.size()); + Function* function = new Function(getUniqueId(), returnType, typeId, firstParamId, module); + + // Set up the precisions + setPrecision(function->getId(), precision); + for (unsigned p = 0; p < (unsigned)precisions.size(); ++p) + setPrecision(firstParamId + p, precisions[p]); + + // CFG + if (entry) { + *entry = new Block(getUniqueId(), *function); + function->addBlock(*entry); + setBuildPoint(*entry); + } + + if (name) + addName(function->getId(), name); + + functions.push_back(std::unique_ptr(function)); + + return function; +} + +// Comments in header +void Builder::makeReturn(bool implicit, Id retVal) +{ + if (retVal) { + Instruction* inst = new Instruction(NoResult, NoType, OpReturnValue); + inst->addIdOperand(retVal); + buildPoint->addInstruction(std::unique_ptr(inst)); + } else + buildPoint->addInstruction(std::unique_ptr(new Instruction(NoResult, NoType, OpReturn))); + + if (! implicit) + createAndSetNoPredecessorBlock("post-return"); +} + +// Comments in header +void Builder::leaveFunction() +{ + Block* block = buildPoint; + Function& function = buildPoint->getParent(); + assert(block); + + // If our function did not contain a return, add a return void now. + if (! block->isTerminated()) { + if (function.getReturnType() == makeVoidType()) + makeReturn(true); + else { + makeReturn(true, createUndefined(function.getReturnType())); + } + } +} + +// Comments in header +void Builder::makeDiscard() +{ + buildPoint->addInstruction(std::unique_ptr(new Instruction(OpKill))); + createAndSetNoPredecessorBlock("post-discard"); +} + +// Comments in header +Id Builder::createVariable(StorageClass storageClass, Id type, const char* name) +{ + Id pointerType = makePointer(storageClass, type); + Instruction* inst = new Instruction(getUniqueId(), pointerType, OpVariable); + inst->addImmediateOperand(storageClass); + + switch (storageClass) { + case StorageClassFunction: + // Validation rules require the declaration in the entry block + buildPoint->getParent().addLocalVariable(std::unique_ptr(inst)); + break; + + default: + constantsTypesGlobals.push_back(std::unique_ptr(inst)); + module.mapInstruction(inst); + break; + } + + if (name) + addName(inst->getResultId(), name); + + return inst->getResultId(); +} + +// Comments in header +Id Builder::createUndefined(Id type) +{ + Instruction* inst = new Instruction(getUniqueId(), type, OpUndef); + buildPoint->addInstruction(std::unique_ptr(inst)); + return inst->getResultId(); +} + +// Comments in header +void Builder::createStore(Id rValue, Id lValue) +{ + Instruction* store = new Instruction(OpStore); + store->addIdOperand(lValue); + store->addIdOperand(rValue); + buildPoint->addInstruction(std::unique_ptr(store)); +} + +// Comments in header +Id Builder::createLoad(Id lValue) +{ + Instruction* load = new Instruction(getUniqueId(), getDerefTypeId(lValue), OpLoad); + load->addIdOperand(lValue); + buildPoint->addInstruction(std::unique_ptr(load)); + + return load->getResultId(); +} + +// Comments in header +Id Builder::createAccessChain(StorageClass storageClass, Id base, std::vector& offsets) +{ + // Figure out the final resulting type. + spv::Id typeId = getTypeId(base); + assert(isPointerType(typeId) && offsets.size() > 0); + typeId = getContainedTypeId(typeId); + for (int i = 0; i < (int)offsets.size(); ++i) { + if (isStructType(typeId)) { + assert(isConstantScalar(offsets[i])); + typeId = getContainedTypeId(typeId, getConstantScalar(offsets[i])); + } else + typeId = getContainedTypeId(typeId, offsets[i]); + } + typeId = makePointer(storageClass, typeId); + + // Make the instruction + Instruction* chain = new Instruction(getUniqueId(), typeId, OpAccessChain); + chain->addIdOperand(base); + for (int i = 0; i < (int)offsets.size(); ++i) + chain->addIdOperand(offsets[i]); + buildPoint->addInstruction(std::unique_ptr(chain)); + + return chain->getResultId(); +} + +Id Builder::createArrayLength(Id base, unsigned int member) +{ + Instruction* length = new Instruction(getUniqueId(), makeIntType(32), OpArrayLength); + length->addIdOperand(base); + length->addImmediateOperand(member); + buildPoint->addInstruction(std::unique_ptr(length)); + + return length->getResultId(); +} + +Id Builder::createCompositeExtract(Id composite, Id typeId, unsigned index) +{ + Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract); + extract->addIdOperand(composite); + extract->addImmediateOperand(index); + buildPoint->addInstruction(std::unique_ptr(extract)); + + return extract->getResultId(); +} + +Id Builder::createCompositeExtract(Id composite, Id typeId, std::vector& indexes) +{ + Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract); + extract->addIdOperand(composite); + for (int i = 0; i < (int)indexes.size(); ++i) + extract->addImmediateOperand(indexes[i]); + buildPoint->addInstruction(std::unique_ptr(extract)); + + return extract->getResultId(); +} + +Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, unsigned index) +{ + Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert); + insert->addIdOperand(object); + insert->addIdOperand(composite); + insert->addImmediateOperand(index); + buildPoint->addInstruction(std::unique_ptr(insert)); + + return insert->getResultId(); +} + +Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, std::vector& indexes) +{ + Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert); + insert->addIdOperand(object); + insert->addIdOperand(composite); + for (int i = 0; i < (int)indexes.size(); ++i) + insert->addImmediateOperand(indexes[i]); + buildPoint->addInstruction(std::unique_ptr(insert)); + + return insert->getResultId(); +} + +Id Builder::createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex) +{ + Instruction* extract = new Instruction(getUniqueId(), typeId, OpVectorExtractDynamic); + extract->addIdOperand(vector); + extract->addIdOperand(componentIndex); + buildPoint->addInstruction(std::unique_ptr(extract)); + + return extract->getResultId(); +} + +Id Builder::createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex) +{ + Instruction* insert = new Instruction(getUniqueId(), typeId, OpVectorInsertDynamic); + insert->addIdOperand(vector); + insert->addIdOperand(component); + insert->addIdOperand(componentIndex); + buildPoint->addInstruction(std::unique_ptr(insert)); + + return insert->getResultId(); +} + +// An opcode that has no operands, no result id, and no type +void Builder::createNoResultOp(Op opCode) +{ + Instruction* op = new Instruction(opCode); + buildPoint->addInstruction(std::unique_ptr(op)); +} + +// An opcode that has one operand, no result id, and no type +void Builder::createNoResultOp(Op opCode, Id operand) +{ + Instruction* op = new Instruction(opCode); + op->addIdOperand(operand); + buildPoint->addInstruction(std::unique_ptr(op)); +} + +// An opcode that has one operand, no result id, and no type +void Builder::createNoResultOp(Op opCode, const std::vector& operands) +{ + Instruction* op = new Instruction(opCode); + for (auto operand : operands) + op->addIdOperand(operand); + buildPoint->addInstruction(std::unique_ptr(op)); +} + +void Builder::createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask semantics) +{ + Instruction* op = new Instruction(OpControlBarrier); + op->addImmediateOperand(makeUintConstant(execution)); + op->addImmediateOperand(makeUintConstant(memory)); + op->addImmediateOperand(makeUintConstant(semantics)); + buildPoint->addInstruction(std::unique_ptr(op)); +} + +void Builder::createMemoryBarrier(unsigned executionScope, unsigned memorySemantics) +{ + Instruction* op = new Instruction(OpMemoryBarrier); + op->addImmediateOperand(makeUintConstant(executionScope)); + op->addImmediateOperand(makeUintConstant(memorySemantics)); + buildPoint->addInstruction(std::unique_ptr(op)); +} + +// An opcode that has one operands, a result id, and a type +Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand) +{ + Instruction* op = new Instruction(getUniqueId(), typeId, opCode); + op->addIdOperand(operand); + buildPoint->addInstruction(std::unique_ptr(op)); + + return op->getResultId(); +} + +Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right) +{ + Instruction* op = new Instruction(getUniqueId(), typeId, opCode); + op->addIdOperand(left); + op->addIdOperand(right); + buildPoint->addInstruction(std::unique_ptr(op)); + + return op->getResultId(); +} + +Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3) +{ + Instruction* op = new Instruction(getUniqueId(), typeId, opCode); + op->addIdOperand(op1); + op->addIdOperand(op2); + op->addIdOperand(op3); + buildPoint->addInstruction(std::unique_ptr(op)); + + return op->getResultId(); +} + +Id Builder::createOp(Op opCode, Id typeId, const std::vector& operands) +{ + Instruction* op = new Instruction(getUniqueId(), typeId, opCode); + for (auto operand : operands) + op->addIdOperand(operand); + buildPoint->addInstruction(std::unique_ptr(op)); + + return op->getResultId(); +} + +Id Builder::createFunctionCall(spv::Function* function, std::vector& args) +{ + Instruction* op = new Instruction(getUniqueId(), function->getReturnType(), OpFunctionCall); + op->addIdOperand(function->getId()); + for (int a = 0; a < (int)args.size(); ++a) + op->addIdOperand(args[a]); + buildPoint->addInstruction(std::unique_ptr(op)); + + return op->getResultId(); +} + +// Comments in header +Id Builder::createRvalueSwizzle(Decoration precision, Id typeId, Id source, std::vector& channels) +{ + if (channels.size() == 1) + return setPrecision(createCompositeExtract(source, typeId, channels.front()), precision); + + Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle); + assert(isVector(source)); + swizzle->addIdOperand(source); + swizzle->addIdOperand(source); + for (int i = 0; i < (int)channels.size(); ++i) + swizzle->addImmediateOperand(channels[i]); + buildPoint->addInstruction(std::unique_ptr(swizzle)); + + return setPrecision(swizzle->getResultId(), precision); +} + +// Comments in header +Id Builder::createLvalueSwizzle(Id typeId, Id target, Id source, std::vector& channels) +{ + assert(getNumComponents(source) == (int)channels.size()); + if (channels.size() == 1 && getNumComponents(source) == 1) + return createCompositeInsert(source, target, typeId, channels.front()); + + Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle); + assert(isVector(source)); + assert(isVector(target)); + swizzle->addIdOperand(target); + swizzle->addIdOperand(source); + + // Set up an identity shuffle from the base value to the result value + unsigned int components[4]; + int numTargetComponents = getNumComponents(target); + for (int i = 0; i < numTargetComponents; ++i) + components[i] = i; + + // Punch in the l-value swizzle + for (int i = 0; i < (int)channels.size(); ++i) + components[channels[i]] = numTargetComponents + i; + + // finish the instruction with these components selectors + for (int i = 0; i < numTargetComponents; ++i) + swizzle->addImmediateOperand(components[i]); + buildPoint->addInstruction(std::unique_ptr(swizzle)); + + return swizzle->getResultId(); +} + +// Comments in header +void Builder::promoteScalar(Decoration precision, Id& left, Id& right) +{ + int direction = getNumComponents(right) - getNumComponents(left); + + if (direction > 0) + left = smearScalar(precision, left, makeVectorType(getTypeId(left), getNumComponents(right))); + else if (direction < 0) + right = smearScalar(precision, right, makeVectorType(getTypeId(right), getNumComponents(left))); + + return; +} + +// Comments in header +Id Builder::smearScalar(Decoration precision, Id scalar, Id vectorType) +{ + assert(getNumComponents(scalar) == 1); + assert(getTypeId(scalar) == getScalarTypeId(vectorType)); + + int numComponents = getNumTypeComponents(vectorType); + if (numComponents == 1) + return scalar; + + Instruction* smear = new Instruction(getUniqueId(), vectorType, OpCompositeConstruct); + for (int c = 0; c < numComponents; ++c) + smear->addIdOperand(scalar); + buildPoint->addInstruction(std::unique_ptr(smear)); + + return setPrecision(smear->getResultId(), precision); +} + +// Comments in header +Id Builder::createBuiltinCall(Id resultType, Id builtins, int entryPoint, std::vector& args) +{ + Instruction* inst = new Instruction(getUniqueId(), resultType, OpExtInst); + inst->addIdOperand(builtins); + inst->addImmediateOperand(entryPoint); + for (int arg = 0; arg < (int)args.size(); ++arg) + inst->addIdOperand(args[arg]); + + buildPoint->addInstruction(std::unique_ptr(inst)); + + return inst->getResultId(); +} + +// Accept all parameters needed to create a texture instruction. +// Create the correct instruction based on the inputs, and make the call. +Id Builder::createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicitLod, const TextureParameters& parameters) +{ + static const int maxTextureArgs = 10; + Id texArgs[maxTextureArgs] = {}; + + // + // Set up the fixed arguments + // + int numArgs = 0; + bool explicitLod = false; + texArgs[numArgs++] = parameters.sampler; + texArgs[numArgs++] = parameters.coords; + if (parameters.Dref) + texArgs[numArgs++] = parameters.Dref; + if (parameters.comp) + texArgs[numArgs++] = parameters.comp; + + // + // Set up the optional arguments + // + int optArgNum = numArgs; // track which operand, if it exists, is the mask of optional arguments + ++numArgs; // speculatively make room for the mask operand + ImageOperandsMask mask = ImageOperandsMaskNone; // the mask operand + if (parameters.bias) { + mask = (ImageOperandsMask)(mask | ImageOperandsBiasMask); + texArgs[numArgs++] = parameters.bias; + } + if (parameters.lod) { + mask = (ImageOperandsMask)(mask | ImageOperandsLodMask); + texArgs[numArgs++] = parameters.lod; + explicitLod = true; + } else if (parameters.gradX) { + mask = (ImageOperandsMask)(mask | ImageOperandsGradMask); + texArgs[numArgs++] = parameters.gradX; + texArgs[numArgs++] = parameters.gradY; + explicitLod = true; + } else if (noImplicitLod && ! fetch && ! gather) { + // have to explicitly use lod of 0 if not allowed to have them be implicit, and + // we would otherwise be about to issue an implicit instruction + mask = (ImageOperandsMask)(mask | ImageOperandsLodMask); + texArgs[numArgs++] = makeFloatConstant(0.0); + explicitLod = true; + } + if (parameters.offset) { + if (isConstant(parameters.offset)) + mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetMask); + else + mask = (ImageOperandsMask)(mask | ImageOperandsOffsetMask); + texArgs[numArgs++] = parameters.offset; + } + if (parameters.offsets) { + mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetsMask); + texArgs[numArgs++] = parameters.offsets; + } + if (parameters.sample) { + mask = (ImageOperandsMask)(mask | ImageOperandsSampleMask); + texArgs[numArgs++] = parameters.sample; + } + if (parameters.lodClamp) { + // capability if this bit is used + addCapability(CapabilityMinLod); + + mask = (ImageOperandsMask)(mask | ImageOperandsMinLodMask); + texArgs[numArgs++] = parameters.lodClamp; + } + if (mask == ImageOperandsMaskNone) + --numArgs; // undo speculative reservation for the mask argument + else + texArgs[optArgNum] = mask; + + // + // Set up the instruction + // + Op opCode = OpNop; // All paths below need to set this + if (fetch) { + if (sparse) + opCode = OpImageSparseFetch; + else + opCode = OpImageFetch; + } else if (gather) { + if (parameters.Dref) + if (sparse) + opCode = OpImageSparseDrefGather; + else + opCode = OpImageDrefGather; + else + if (sparse) + opCode = OpImageSparseGather; + else + opCode = OpImageGather; + } else if (explicitLod) { + if (parameters.Dref) { + if (proj) + if (sparse) + opCode = OpImageSparseSampleProjDrefExplicitLod; + else + opCode = OpImageSampleProjDrefExplicitLod; + else + if (sparse) + opCode = OpImageSparseSampleDrefExplicitLod; + else + opCode = OpImageSampleDrefExplicitLod; + } else { + if (proj) + if (sparse) + opCode = OpImageSparseSampleProjExplicitLod; + else + opCode = OpImageSampleProjExplicitLod; + else + if (sparse) + opCode = OpImageSparseSampleExplicitLod; + else + opCode = OpImageSampleExplicitLod; + } + } else { + if (parameters.Dref) { + if (proj) + if (sparse) + opCode = OpImageSparseSampleProjDrefImplicitLod; + else + opCode = OpImageSampleProjDrefImplicitLod; + else + if (sparse) + opCode = OpImageSparseSampleDrefImplicitLod; + else + opCode = OpImageSampleDrefImplicitLod; + } else { + if (proj) + if (sparse) + opCode = OpImageSparseSampleProjImplicitLod; + else + opCode = OpImageSampleProjImplicitLod; + else + if (sparse) + opCode = OpImageSparseSampleImplicitLod; + else + opCode = OpImageSampleImplicitLod; + } + } + + // See if the result type is expecting a smeared result. + // This happens when a legacy shadow*() call is made, which + // gets a vec4 back instead of a float. + Id smearedType = resultType; + if (! isScalarType(resultType)) { + switch (opCode) { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + resultType = getScalarTypeId(resultType); + break; + default: + break; + } + } + + Id typeId0 = 0; + Id typeId1 = 0; + + if (sparse) { + typeId0 = resultType; + typeId1 = getDerefTypeId(parameters.texelOut); + resultType = makeStructResultType(typeId0, typeId1); + } + + // Build the SPIR-V instruction + Instruction* textureInst = new Instruction(getUniqueId(), resultType, opCode); + for (int op = 0; op < optArgNum; ++op) + textureInst->addIdOperand(texArgs[op]); + if (optArgNum < numArgs) + textureInst->addImmediateOperand(texArgs[optArgNum]); + for (int op = optArgNum + 1; op < numArgs; ++op) + textureInst->addIdOperand(texArgs[op]); + setPrecision(textureInst->getResultId(), precision); + buildPoint->addInstruction(std::unique_ptr(textureInst)); + + Id resultId = textureInst->getResultId(); + + if (sparse) { + // set capability + addCapability(CapabilitySparseResidency); + + // Decode the return type that was a special structure + createStore(createCompositeExtract(resultId, typeId1, 1), parameters.texelOut); + resultId = createCompositeExtract(resultId, typeId0, 0); + setPrecision(resultId, precision); + } else { + // When a smear is needed, do it, as per what was computed + // above when resultType was changed to a scalar type. + if (resultType != smearedType) + resultId = smearScalar(precision, resultId, smearedType); + } + + return resultId; +} + +// Comments in header +Id Builder::createTextureQueryCall(Op opCode, const TextureParameters& parameters) +{ + // All these need a capability + addCapability(CapabilityImageQuery); + + // Figure out the result type + Id resultType = 0; + switch (opCode) { + case OpImageQuerySize: + case OpImageQuerySizeLod: + { + int numComponents = 0; + switch (getTypeDimensionality(getImageType(parameters.sampler))) { + case Dim1D: + case DimBuffer: + numComponents = 1; + break; + case Dim2D: + case DimCube: + case DimRect: + case DimSubpassData: + numComponents = 2; + break; + case Dim3D: + numComponents = 3; + break; + + default: + assert(0); + break; + } + if (isArrayedImageType(getImageType(parameters.sampler))) + ++numComponents; + if (numComponents == 1) + resultType = makeIntType(32); + else + resultType = makeVectorType(makeIntType(32), numComponents); + + break; + } + case OpImageQueryLod: + resultType = makeVectorType(makeFloatType(32), 2); + break; + case OpImageQueryLevels: + case OpImageQuerySamples: + resultType = makeIntType(32); + break; + default: + assert(0); + break; + } + + Instruction* query = new Instruction(getUniqueId(), resultType, opCode); + query->addIdOperand(parameters.sampler); + if (parameters.coords) + query->addIdOperand(parameters.coords); + if (parameters.lod) + query->addIdOperand(parameters.lod); + buildPoint->addInstruction(std::unique_ptr(query)); + + return query->getResultId(); +} + +// External comments in header. +// Operates recursively to visit the composite's hierarchy. +Id Builder::createCompositeCompare(Decoration precision, Id value1, Id value2, bool equal) +{ + Id boolType = makeBoolType(); + Id valueType = getTypeId(value1); + + Id resultId = NoResult; + + int numConstituents = getNumTypeConstituents(valueType); + + // Scalars and Vectors + + if (isScalarType(valueType) || isVectorType(valueType)) { + assert(valueType == getTypeId(value2)); + // These just need a single comparison, just have + // to figure out what it is. + Op op; + switch (getMostBasicTypeClass(valueType)) { + case OpTypeFloat: + op = equal ? OpFOrdEqual : OpFOrdNotEqual; + break; + case OpTypeInt: + default: + op = equal ? OpIEqual : OpINotEqual; + break; + case OpTypeBool: + op = equal ? OpLogicalEqual : OpLogicalNotEqual; + precision = NoPrecision; + break; + } + + if (isScalarType(valueType)) { + // scalar + resultId = createBinOp(op, boolType, value1, value2); + } else { + // vector + resultId = createBinOp(op, makeVectorType(boolType, numConstituents), value1, value2); + setPrecision(resultId, precision); + // reduce vector compares... + resultId = createUnaryOp(equal ? OpAll : OpAny, boolType, resultId); + } + + return setPrecision(resultId, precision); + } + + // Only structs, arrays, and matrices should be left. + // They share in common the reduction operation across their constituents. + assert(isAggregateType(valueType) || isMatrixType(valueType)); + + // Compare each pair of constituents + for (int constituent = 0; constituent < numConstituents; ++constituent) { + std::vector indexes(1, constituent); + Id constituentType1 = getContainedTypeId(getTypeId(value1), constituent); + Id constituentType2 = getContainedTypeId(getTypeId(value2), constituent); + Id constituent1 = createCompositeExtract(value1, constituentType1, indexes); + Id constituent2 = createCompositeExtract(value2, constituentType2, indexes); + + Id subResultId = createCompositeCompare(precision, constituent1, constituent2, equal); + + if (constituent == 0) + resultId = subResultId; + else + resultId = setPrecision(createBinOp(equal ? OpLogicalAnd : OpLogicalOr, boolType, resultId, subResultId), precision); + } + + return resultId; +} + +// OpCompositeConstruct +Id Builder::createCompositeConstruct(Id typeId, std::vector& constituents) +{ + assert(isAggregateType(typeId) || (getNumTypeConstituents(typeId) > 1 && getNumTypeConstituents(typeId) == (int)constituents.size())); + + Instruction* op = new Instruction(getUniqueId(), typeId, OpCompositeConstruct); + for (int c = 0; c < (int)constituents.size(); ++c) + op->addIdOperand(constituents[c]); + buildPoint->addInstruction(std::unique_ptr(op)); + + return op->getResultId(); +} + +// Vector or scalar constructor +Id Builder::createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId) +{ + Id result = NoResult; + unsigned int numTargetComponents = getNumTypeComponents(resultTypeId); + unsigned int targetComponent = 0; + + // Special case: when calling a vector constructor with a single scalar + // argument, smear the scalar + if (sources.size() == 1 && isScalar(sources[0]) && numTargetComponents > 1) + return smearScalar(precision, sources[0], resultTypeId); + + Id scalarTypeId = getScalarTypeId(resultTypeId); + std::vector constituents; // accumulate the arguments for OpCompositeConstruct + for (unsigned int i = 0; i < sources.size(); ++i) { + assert(! isAggregate(sources[i])); + unsigned int sourceSize = getNumComponents(sources[i]); + unsigned int sourcesToUse = sourceSize; + if (sourcesToUse + targetComponent > numTargetComponents) + sourcesToUse = numTargetComponents - targetComponent; + + for (unsigned int s = 0; s < sourcesToUse; ++s) { + Id arg = sources[i]; + if (sourceSize > 1) { + std::vector swiz; + swiz.push_back(s); + arg = createRvalueSwizzle(precision, scalarTypeId, arg, swiz); + } + + if (numTargetComponents > 1) + constituents.push_back(arg); + else + result = arg; + ++targetComponent; + } + + if (targetComponent >= numTargetComponents) + break; + } + + if (constituents.size() > 0) + result = createCompositeConstruct(resultTypeId, constituents); + + return setPrecision(result, precision); +} + +// Comments in header +Id Builder::createMatrixConstructor(Decoration precision, const std::vector& sources, Id resultTypeId) +{ + Id componentTypeId = getScalarTypeId(resultTypeId); + int numCols = getTypeNumColumns(resultTypeId); + int numRows = getTypeNumRows(resultTypeId); + + // Will use a two step process + // 1. make a compile-time 2D array of values + // 2. construct a matrix from that array + + // Step 1. + + // initialize the array to the identity matrix + Id ids[maxMatrixSize][maxMatrixSize]; + Id one = makeFloatConstant(1.0); + Id zero = makeFloatConstant(0.0); + for (int col = 0; col < 4; ++col) { + for (int row = 0; row < 4; ++row) { + if (col == row) + ids[col][row] = one; + else + ids[col][row] = zero; + } + } + + // modify components as dictated by the arguments + if (sources.size() == 1 && isScalar(sources[0])) { + // a single scalar; resets the diagonals + for (int col = 0; col < 4; ++col) + ids[col][col] = sources[0]; + } else if (isMatrix(sources[0])) { + // constructing from another matrix; copy over the parts that exist in both the argument and constructee + Id matrix = sources[0]; + int minCols = std::min(numCols, getNumColumns(matrix)); + int minRows = std::min(numRows, getNumRows(matrix)); + for (int col = 0; col < minCols; ++col) { + std::vector indexes; + indexes.push_back(col); + for (int row = 0; row < minRows; ++row) { + indexes.push_back(row); + ids[col][row] = createCompositeExtract(matrix, componentTypeId, indexes); + indexes.pop_back(); + setPrecision(ids[col][row], precision); + } + } + } else { + // fill in the matrix in column-major order with whatever argument components are available + int row = 0; + int col = 0; + + for (int arg = 0; arg < (int)sources.size(); ++arg) { + Id argComp = sources[arg]; + for (int comp = 0; comp < getNumComponents(sources[arg]); ++comp) { + if (getNumComponents(sources[arg]) > 1) { + argComp = createCompositeExtract(sources[arg], componentTypeId, comp); + setPrecision(argComp, precision); + } + ids[col][row++] = argComp; + if (row == numRows) { + row = 0; + col++; + } + } + } + } + + + // Step 2: Construct a matrix from that array. + // First make the column vectors, then make the matrix. + + // make the column vectors + Id columnTypeId = getContainedTypeId(resultTypeId); + std::vector matrixColumns; + for (int col = 0; col < numCols; ++col) { + std::vector vectorComponents; + for (int row = 0; row < numRows; ++row) + vectorComponents.push_back(ids[col][row]); + Id column = createCompositeConstruct(columnTypeId, vectorComponents); + setPrecision(column, precision); + matrixColumns.push_back(column); + } + + // make the matrix + return setPrecision(createCompositeConstruct(resultTypeId, matrixColumns), precision); +} + +// Comments in header +Builder::If::If(Id cond, Builder& gb) : + builder(gb), + condition(cond), + elseBlock(0) +{ + function = &builder.getBuildPoint()->getParent(); + + // make the blocks, but only put the then-block into the function, + // the else-block and merge-block will be added later, in order, after + // earlier code is emitted + thenBlock = new Block(builder.getUniqueId(), *function); + mergeBlock = new Block(builder.getUniqueId(), *function); + + // Save the current block, so that we can add in the flow control split when + // makeEndIf is called. + headerBlock = builder.getBuildPoint(); + + function->addBlock(thenBlock); + builder.setBuildPoint(thenBlock); +} + +// Comments in header +void Builder::If::makeBeginElse() +{ + // Close out the "then" by having it jump to the mergeBlock + builder.createBranch(mergeBlock); + + // Make the first else block and add it to the function + elseBlock = new Block(builder.getUniqueId(), *function); + function->addBlock(elseBlock); + + // Start building the else block + builder.setBuildPoint(elseBlock); +} + +// Comments in header +void Builder::If::makeEndIf() +{ + // jump to the merge block + builder.createBranch(mergeBlock); + + // Go back to the headerBlock and make the flow control split + builder.setBuildPoint(headerBlock); + builder.createSelectionMerge(mergeBlock, SelectionControlMaskNone); + if (elseBlock) + builder.createConditionalBranch(condition, thenBlock, elseBlock); + else + builder.createConditionalBranch(condition, thenBlock, mergeBlock); + + // add the merge block to the function + function->addBlock(mergeBlock); + builder.setBuildPoint(mergeBlock); +} + +// Comments in header +void Builder::makeSwitch(Id selector, int numSegments, std::vector& caseValues, std::vector& valueIndexToSegment, int defaultSegment, + std::vector& segmentBlocks) +{ + Function& function = buildPoint->getParent(); + + // make all the blocks + for (int s = 0; s < numSegments; ++s) + segmentBlocks.push_back(new Block(getUniqueId(), function)); + + Block* mergeBlock = new Block(getUniqueId(), function); + + // make and insert the switch's selection-merge instruction + createSelectionMerge(mergeBlock, SelectionControlMaskNone); + + // make the switch instruction + Instruction* switchInst = new Instruction(NoResult, NoType, OpSwitch); + switchInst->addIdOperand(selector); + auto defaultOrMerge = (defaultSegment >= 0) ? segmentBlocks[defaultSegment] : mergeBlock; + switchInst->addIdOperand(defaultOrMerge->getId()); + defaultOrMerge->addPredecessor(buildPoint); + for (int i = 0; i < (int)caseValues.size(); ++i) { + switchInst->addImmediateOperand(caseValues[i]); + switchInst->addIdOperand(segmentBlocks[valueIndexToSegment[i]]->getId()); + segmentBlocks[valueIndexToSegment[i]]->addPredecessor(buildPoint); + } + buildPoint->addInstruction(std::unique_ptr(switchInst)); + + // push the merge block + switchMerges.push(mergeBlock); +} + +// Comments in header +void Builder::addSwitchBreak() +{ + // branch to the top of the merge block stack + createBranch(switchMerges.top()); + createAndSetNoPredecessorBlock("post-switch-break"); +} + +// Comments in header +void Builder::nextSwitchSegment(std::vector& segmentBlock, int nextSegment) +{ + int lastSegment = nextSegment - 1; + if (lastSegment >= 0) { + // Close out previous segment by jumping, if necessary, to next segment + if (! buildPoint->isTerminated()) + createBranch(segmentBlock[nextSegment]); + } + Block* block = segmentBlock[nextSegment]; + block->getParent().addBlock(block); + setBuildPoint(block); +} + +// Comments in header +void Builder::endSwitch(std::vector& /*segmentBlock*/) +{ + // Close out previous segment by jumping, if necessary, to next segment + if (! buildPoint->isTerminated()) + addSwitchBreak(); + + switchMerges.top()->getParent().addBlock(switchMerges.top()); + setBuildPoint(switchMerges.top()); + + switchMerges.pop(); +} + +Block& Builder::makeNewBlock() +{ + Function& function = buildPoint->getParent(); + auto block = new Block(getUniqueId(), function); + function.addBlock(block); + return *block; +} + +Builder::LoopBlocks& Builder::makeNewLoop() +{ + // Older MSVC versions don't allow inlining of blocks below. + LoopBlocks blocks = {makeNewBlock(), makeNewBlock(), makeNewBlock(), makeNewBlock()}; + loops.push(blocks); + return loops.top(); +} + +void Builder::createLoopContinue() +{ + createBranch(&loops.top().continue_target); + // Set up a block for dead code. + createAndSetNoPredecessorBlock("post-loop-continue"); +} + +void Builder::createLoopExit() +{ + createBranch(&loops.top().merge); + // Set up a block for dead code. + createAndSetNoPredecessorBlock("post-loop-break"); +} + +void Builder::closeLoop() +{ + loops.pop(); +} + +void Builder::clearAccessChain() +{ + accessChain.base = NoResult; + accessChain.indexChain.clear(); + accessChain.instr = NoResult; + accessChain.swizzle.clear(); + accessChain.component = NoResult; + accessChain.preSwizzleBaseType = NoType; + accessChain.isRValue = false; +} + +// Comments in header +void Builder::accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType) +{ + // swizzles can be stacked in GLSL, but simplified to a single + // one here; the base type doesn't change + if (accessChain.preSwizzleBaseType == NoType) + accessChain.preSwizzleBaseType = preSwizzleBaseType; + + // if needed, propagate the swizzle for the current access chain + if (accessChain.swizzle.size()) { + std::vector oldSwizzle = accessChain.swizzle; + accessChain.swizzle.resize(0); + for (unsigned int i = 0; i < swizzle.size(); ++i) { + accessChain.swizzle.push_back(oldSwizzle[swizzle[i]]); + } + } else + accessChain.swizzle = swizzle; + + // determine if we need to track this swizzle anymore + simplifyAccessChainSwizzle(); +} + +// Comments in header +void Builder::accessChainStore(Id rvalue) +{ + assert(accessChain.isRValue == false); + + transferAccessChainSwizzle(true); + Id base = collapseAccessChain(); + + if (accessChain.swizzle.size() && accessChain.component != NoResult) + MissingFunctionality("simultaneous l-value swizzle and dynamic component selection"); + + // If swizzle still exists, it is out-of-order or not full, we must load the target vector, + // extract and insert elements to perform writeMask and/or swizzle. + Id source = NoResult; + if (accessChain.swizzle.size()) { + Id tempBaseId = createLoad(base); + source = createLvalueSwizzle(getTypeId(tempBaseId), tempBaseId, rvalue, accessChain.swizzle); + } + + // dynamic component selection + if (accessChain.component != NoResult) { + Id tempBaseId = (source == NoResult) ? createLoad(base) : source; + source = createVectorInsertDynamic(tempBaseId, getTypeId(tempBaseId), rvalue, accessChain.component); + } + + if (source == NoResult) + source = rvalue; + + createStore(source, base); +} + +// Comments in header +Id Builder::accessChainLoad(Decoration precision, Id resultType) +{ + Id id; + + if (accessChain.isRValue) { + // transfer access chain, but keep it static, so we can stay in registers + transferAccessChainSwizzle(false); + if (accessChain.indexChain.size() > 0) { + Id swizzleBase = accessChain.preSwizzleBaseType != NoType ? accessChain.preSwizzleBaseType : resultType; + + // if all the accesses are constants, we can use OpCompositeExtract + std::vector indexes; + bool constant = true; + for (int i = 0; i < (int)accessChain.indexChain.size(); ++i) { + if (isConstantScalar(accessChain.indexChain[i])) + indexes.push_back(getConstantScalar(accessChain.indexChain[i])); + else { + constant = false; + break; + } + } + + if (constant) + id = createCompositeExtract(accessChain.base, swizzleBase, indexes); + else { + // make a new function variable for this r-value + Id lValue = createVariable(StorageClassFunction, getTypeId(accessChain.base), "indexable"); + + // store into it + createStore(accessChain.base, lValue); + + // move base to the new variable + accessChain.base = lValue; + accessChain.isRValue = false; + + // load through the access chain + id = createLoad(collapseAccessChain()); + } + setPrecision(id, precision); + } else + id = accessChain.base; // no precision, it was set when this was defined + } else { + transferAccessChainSwizzle(true); + // load through the access chain + id = createLoad(collapseAccessChain()); + setPrecision(id, precision); + } + + // Done, unless there are swizzles to do + if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult) + return id; + + // Do remaining swizzling + // First, static swizzling + if (accessChain.swizzle.size()) { + // static swizzle + Id swizzledType = getScalarTypeId(getTypeId(id)); + if (accessChain.swizzle.size() > 1) + swizzledType = makeVectorType(swizzledType, (int)accessChain.swizzle.size()); + id = createRvalueSwizzle(precision, swizzledType, id, accessChain.swizzle); + } + + // dynamic single-component selection + if (accessChain.component != NoResult) + id = setPrecision(createVectorExtractDynamic(id, resultType, accessChain.component), precision); + + return id; +} + +Id Builder::accessChainGetLValue() +{ + assert(accessChain.isRValue == false); + + transferAccessChainSwizzle(true); + Id lvalue = collapseAccessChain(); + + // If swizzle exists, it is out-of-order or not full, we must load the target vector, + // extract and insert elements to perform writeMask and/or swizzle. This does not + // go with getting a direct l-value pointer. + assert(accessChain.swizzle.size() == 0); + assert(accessChain.component == NoResult); + + return lvalue; +} + +// comment in header +Id Builder::accessChainGetInferredType() +{ + // anything to operate on? + if (accessChain.base == NoResult) + return NoType; + Id type = getTypeId(accessChain.base); + + // do initial dereference + if (! accessChain.isRValue) + type = getContainedTypeId(type); + + // dereference each index + for (auto deref : accessChain.indexChain) { + if (isStructType(type)) + type = getContainedTypeId(type, getConstantScalar(deref)); + else + type = getContainedTypeId(type); + } + + // dereference swizzle + if (accessChain.swizzle.size() == 1) + type = getContainedTypeId(type); + else if (accessChain.swizzle.size() > 1) + type = makeVectorType(getContainedTypeId(type), + static_cast(accessChain.swizzle.size())); + + // dereference component selection + if (accessChain.component) + type = getContainedTypeId(type); + + return type; +} + +void Builder::dump(std::vector& out) const +{ + // Header, before first instructions: + out.push_back(MagicNumber); + out.push_back(Version); + out.push_back(builderNumber); + out.push_back(uniqueId + 1); + out.push_back(0); + + // Capabilities + for (auto cap : capabilities) { + Instruction capInst(0, 0, OpCapability); + capInst.addImmediateOperand(cap); + capInst.dump(out); + } + + // TBD: OpExtension ... + + dumpInstructions(out, imports); + Instruction memInst(0, 0, OpMemoryModel); + memInst.addImmediateOperand(addressModel); + memInst.addImmediateOperand(memoryModel); + memInst.dump(out); + + // Instructions saved up while building: + dumpInstructions(out, entryPoints); + dumpInstructions(out, executionModes); + + // Debug instructions + if (source != SourceLanguageUnknown) { + Instruction sourceInst(0, 0, OpSource); + sourceInst.addImmediateOperand(source); + sourceInst.addImmediateOperand(sourceVersion); + sourceInst.dump(out); + } + for (int e = 0; e < (int)extensions.size(); ++e) { + Instruction extInst(0, 0, OpSourceExtension); + extInst.addStringOperand(extensions[e]); + extInst.dump(out); + } + dumpInstructions(out, names); + dumpInstructions(out, lines); + + // Annotation instructions + dumpInstructions(out, decorations); + + dumpInstructions(out, constantsTypesGlobals); + dumpInstructions(out, externals); + + // The functions + module.dump(out); +} + +// +// Protected methods. +// + +// Turn the described access chain in 'accessChain' into an instruction +// computing its address. This *cannot* include complex swizzles, which must +// be handled after this is called, but it does include swizzles that select +// an individual element, as a single address of a scalar type can be +// computed by an OpAccessChain instruction. +Id Builder::collapseAccessChain() +{ + assert(accessChain.isRValue == false); + + if (accessChain.indexChain.size() > 0) { + if (accessChain.instr == 0) { + StorageClass storageClass = (StorageClass)module.getStorageClass(getTypeId(accessChain.base)); + accessChain.instr = createAccessChain(storageClass, accessChain.base, accessChain.indexChain); + } + + return accessChain.instr; + } else + return accessChain.base; + + // note that non-trivial swizzling is left pending... +} + +// clear out swizzle if it is redundant, that is reselecting the same components +// that would be present without the swizzle. +void Builder::simplifyAccessChainSwizzle() +{ + // If the swizzle has fewer components than the vector, it is subsetting, and must stay + // to preserve that fact. + if (getNumTypeComponents(accessChain.preSwizzleBaseType) > (int)accessChain.swizzle.size()) + return; + + // if components are out of order, it is a swizzle + for (unsigned int i = 0; i < accessChain.swizzle.size(); ++i) { + if (i != accessChain.swizzle[i]) + return; + } + + // otherwise, there is no need to track this swizzle + accessChain.swizzle.clear(); + if (accessChain.component == NoResult) + accessChain.preSwizzleBaseType = NoType; +} + +// To the extent any swizzling can become part of the chain +// of accesses instead of a post operation, make it so. +// If 'dynamic' is true, include transfering a non-static component index, +// otherwise, only transfer static indexes. +// +// Also, Boolean vectors are likely to be special. While +// for external storage, they should only be integer types, +// function-local bool vectors could use sub-word indexing, +// so keep that as a separate Insert/Extract on a loaded vector. +void Builder::transferAccessChainSwizzle(bool dynamic) +{ + // too complex? + if (accessChain.swizzle.size() > 1) + return; + + // non existent? + if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult) + return; + + // single component... + + // skip doing it for Boolean vectors + if (isBoolType(getContainedTypeId(accessChain.preSwizzleBaseType))) + return; + + if (accessChain.swizzle.size() == 1) { + // handle static component + accessChain.indexChain.push_back(makeUintConstant(accessChain.swizzle.front())); + accessChain.swizzle.clear(); + // note, the only valid remaining dynamic access would be to this one + // component, so don't bother even looking at accessChain.component + accessChain.preSwizzleBaseType = NoType; + accessChain.component = NoResult; + } else if (dynamic && accessChain.component != NoResult) { + // handle dynamic component + accessChain.indexChain.push_back(accessChain.component); + accessChain.preSwizzleBaseType = NoType; + accessChain.component = NoResult; + } +} + +// Utility method for creating a new block and setting the insert point to +// be in it. This is useful for flow-control operations that need a "dummy" +// block proceeding them (e.g. instructions after a discard, etc). +void Builder::createAndSetNoPredecessorBlock(const char* /*name*/) +{ + Block* block = new Block(getUniqueId(), buildPoint->getParent()); + block->setUnreachable(); + buildPoint->getParent().addBlock(block); + setBuildPoint(block); + + //if (name) + // addName(block->getId(), name); +} + +// Comments in header +void Builder::createBranch(Block* block) +{ + Instruction* branch = new Instruction(OpBranch); + branch->addIdOperand(block->getId()); + buildPoint->addInstruction(std::unique_ptr(branch)); + block->addPredecessor(buildPoint); +} + +void Builder::createSelectionMerge(Block* mergeBlock, unsigned int control) +{ + Instruction* merge = new Instruction(OpSelectionMerge); + merge->addIdOperand(mergeBlock->getId()); + merge->addImmediateOperand(control); + buildPoint->addInstruction(std::unique_ptr(merge)); +} + +void Builder::createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control) +{ + Instruction* merge = new Instruction(OpLoopMerge); + merge->addIdOperand(mergeBlock->getId()); + merge->addIdOperand(continueBlock->getId()); + merge->addImmediateOperand(control); + buildPoint->addInstruction(std::unique_ptr(merge)); +} + +void Builder::createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock) +{ + Instruction* branch = new Instruction(OpBranchConditional); + branch->addIdOperand(condition); + branch->addIdOperand(thenBlock->getId()); + branch->addIdOperand(elseBlock->getId()); + buildPoint->addInstruction(std::unique_ptr(branch)); + thenBlock->addPredecessor(buildPoint); + elseBlock->addPredecessor(buildPoint); +} + +void Builder::dumpInstructions(std::vector& out, const std::vector >& instructions) const +{ + for (int i = 0; i < (int)instructions.size(); ++i) { + instructions[i]->dump(out); + } +} + +void TbdFunctionality(const char* tbd) +{ + static std::unordered_set issued; + + if (issued.find(tbd) == issued.end()) { + printf("TBD functionality: %s\n", tbd); + issued.insert(tbd); + } +} + +void MissingFunctionality(const char* fun) +{ + printf("Missing functionality: %s\n", fun); +} + +}; // end spv namespace diff --git a/third_party/glslang-spirv/SpvBuilder.h b/third_party/glslang-spirv/SpvBuilder.h new file mode 100644 index 000000000..d6dc61218 --- /dev/null +++ b/third_party/glslang-spirv/SpvBuilder.h @@ -0,0 +1,576 @@ +// +//Copyright (C) 2014-2015 LunarG, Inc. +//Copyright (C) 2015-2016 Google, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// + +// +// "Builder" is an interface to fully build SPIR-V IR. Allocate one of +// these to build (a thread safe) internal SPIR-V representation (IR), +// and then dump it as a binary stream according to the SPIR-V specification. +// +// A Builder has a 1:1 relationship with a SPIR-V module. +// + +#pragma once +#ifndef SpvBuilder_H +#define SpvBuilder_H + +#include "spirv.hpp" +#include "spvIR.h" + +#include +#include +#include +#include +#include + +namespace spv { + +class Builder { +public: + Builder(unsigned int userNumber); + virtual ~Builder(); + + static const int maxMatrixSize = 4; + + void setSource(spv::SourceLanguage lang, int version) + { + source = lang; + sourceVersion = version; + } + void addSourceExtension(const char* ext) { extensions.push_back(ext); } + Id import(const char*); + void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem) + { + addressModel = addr; + memoryModel = mem; + } + + void addCapability(spv::Capability cap) { capabilities.insert(cap); } + + // To get a new for anything needing a new one. + Id getUniqueId() { return ++uniqueId; } + + // To get a set of new s, e.g., for a set of function parameters + Id getUniqueIds(int numIds) + { + Id id = uniqueId + 1; + uniqueId += numIds; + return id; + } + + // For creating new types (will return old type if the requested one was already made). + Id makeVoidType(); + Id makeBoolType(); + Id makePointer(StorageClass, Id type); + Id makeIntegerType(int width, bool hasSign); // generic + Id makeIntType(int width) { return makeIntegerType(width, true); } + Id makeUintType(int width) { return makeIntegerType(width, false); } + Id makeFloatType(int width); + Id makeStructType(const std::vector& members, const char*); + Id makeStructResultType(Id type0, Id type1); + Id makeVectorType(Id component, int size); + Id makeMatrixType(Id component, int cols, int rows); + Id makeArrayType(Id element, Id sizeId, int stride); // 0 stride means no stride decoration + Id makeRuntimeArray(Id element); + Id makeFunctionType(Id returnType, const std::vector& paramTypes); + Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format); + Id makeSamplerType(); + Id makeSampledImageType(Id imageType); + + // For querying about types. + Id getTypeId(Id resultId) const { return module.getTypeId(resultId); } + Id getDerefTypeId(Id resultId) const; + Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); } + Op getTypeClass(Id typeId) const { return getOpCode(typeId); } + Op getMostBasicTypeClass(Id typeId) const; + int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); } + int getNumTypeConstituents(Id typeId) const; + int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); } + Id getScalarTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId) const; + Id getContainedTypeId(Id typeId, int) const; + StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); } + ImageFormat getImageTypeFormat(Id typeId) const { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); } + + bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); } + bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); } + bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); } + bool isMatrix(Id resultId) const { return isMatrixType(getTypeId(resultId)); } + bool isAggregate(Id resultId) const { return isAggregateType(getTypeId(resultId)); } + bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); } + + bool isBoolType(Id typeId) const { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); } + bool isPointerType(Id typeId) const { return getTypeClass(typeId) == OpTypePointer; } + bool isScalarType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat || getTypeClass(typeId) == OpTypeInt || getTypeClass(typeId) == OpTypeBool; } + bool isVectorType(Id typeId) const { return getTypeClass(typeId) == OpTypeVector; } + bool isMatrixType(Id typeId) const { return getTypeClass(typeId) == OpTypeMatrix; } + bool isStructType(Id typeId) const { return getTypeClass(typeId) == OpTypeStruct; } + bool isArrayType(Id typeId) const { return getTypeClass(typeId) == OpTypeArray; } + bool isAggregateType(Id typeId) const { return isArrayType(typeId) || isStructType(typeId); } + bool isImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeImage; } + bool isSamplerType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampler; } + bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; } + + bool isConstantOpCode(Op opcode) const; + bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); } + bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; } + unsigned int getConstantScalar(Id resultId) const { return module.getInstruction(resultId)->getImmediateOperand(0); } + StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); } + + int getTypeNumColumns(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeConstituents(typeId); + } + int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); } + int getTypeNumRows(Id typeId) const + { + assert(isMatrixType(typeId)); + return getNumTypeComponents(getContainedTypeId(typeId)); + } + int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); } + + Dim getTypeDimensionality(Id typeId) const + { + assert(isImageType(typeId)); + return (Dim)module.getInstruction(typeId)->getImmediateOperand(1); + } + Id getImageType(Id resultId) const + { + Id typeId = getTypeId(resultId); + assert(isImageType(typeId) || isSampledImageType(typeId)); + return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId; + } + bool isArrayedImageType(Id typeId) const + { + assert(isImageType(typeId)); + return module.getInstruction(typeId)->getImmediateOperand(3) != 0; + } + + // For making new constants (will return old constant if the requested one was already made). + Id makeBoolConstant(bool b, bool specConstant = false); + Id makeIntConstant(int i, bool specConstant = false) { return makeIntConstant(makeIntType(32), (unsigned)i, specConstant); } + Id makeUintConstant(unsigned u, bool specConstant = false) { return makeIntConstant(makeUintType(32), u, specConstant); } + Id makeFloatConstant(float f, bool specConstant = false); + Id makeDoubleConstant(double d, bool specConstant = false); + + // Turn the array of constants into a proper spv constant of the requested type. + Id makeCompositeConstant(Id type, std::vector& comps, bool specConst = false); + + // Methods for adding information outside the CFG. + Instruction* addEntryPoint(ExecutionModel, Function*, const char* name); + void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1); + void addName(Id, const char* name); + void addMemberName(Id, int member, const char* name); + void addLine(Id target, Id fileName, int line, int column); + void addDecoration(Id, Decoration, int num = -1); + void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1); + + // At the end of what block do the next create*() instructions go? + void setBuildPoint(Block* bp) { buildPoint = bp; } + Block* getBuildPoint() const { return buildPoint; } + + // Make the main function. The returned pointer is only valid + // for the lifetime of this builder. + Function* makeMain(); + + // Make a shader-style function, and create its entry block if entry is non-zero. + // Return the function, pass back the entry. + // The returned pointer is only valid for the lifetime of this builder. + Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, const std::vector& paramTypes, + const std::vector& precisions, Block **entry = 0); + + // Create a return. An 'implicit' return is one not appearing in the source + // code. In the case of an implicit return, no post-return block is inserted. + void makeReturn(bool implicit, Id retVal = 0); + + // Generate all the code needed to finish up a function. + void leaveFunction(); + + // Create a discard. + void makeDiscard(); + + // Create a global or function local or IO variable. + Id createVariable(StorageClass, Id type, const char* name = 0); + + // Create an intermediate with an undefined value. + Id createUndefined(Id type); + + // Store into an Id and return the l-value + void createStore(Id rValue, Id lValue); + + // Load from an Id and return it + Id createLoad(Id lValue); + + // Create an OpAccessChain instruction + Id createAccessChain(StorageClass, Id base, std::vector& offsets); + + // Create an OpArrayLength instruction + Id createArrayLength(Id base, unsigned int member); + + // Create an OpCompositeExtract instruction + Id createCompositeExtract(Id composite, Id typeId, unsigned index); + Id createCompositeExtract(Id composite, Id typeId, std::vector& indexes); + Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index); + Id createCompositeInsert(Id object, Id composite, Id typeId, std::vector& indexes); + + Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex); + Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex); + + void createNoResultOp(Op); + void createNoResultOp(Op, Id operand); + void createNoResultOp(Op, const std::vector& operands); + void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask); + void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics); + Id createUnaryOp(Op, Id typeId, Id operand); + Id createBinOp(Op, Id typeId, Id operand1, Id operand2); + Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3); + Id createOp(Op, Id typeId, const std::vector& operands); + Id createFunctionCall(spv::Function*, std::vector&); + + // Take an rvalue (source) and a set of channels to extract from it to + // make a new rvalue, which is returned. + Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, std::vector& channels); + + // Take a copy of an lvalue (target) and a source of components, and set the + // source components into the lvalue where the 'channels' say to put them. + // An updated version of the target is returned. + // (No true lvalue or stores are used.) + Id createLvalueSwizzle(Id typeId, Id target, Id source, std::vector& channels); + + // If both the id and precision are valid, the id + // gets tagged with the requested precision. + // The passed in id is always the returned id, to simplify use patterns. + Id setPrecision(Id id, Decoration precision) + { + if (precision != NoPrecision && id != NoResult) + addDecoration(id, precision); + + return id; + } + + // Can smear a scalar to a vector for the following forms: + // - promoteScalar(scalar, vector) // smear scalar to width of vector + // - promoteScalar(vector, scalar) // smear scalar to width of vector + // - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to + // - promoteScalar(scalar, scalar) // do nothing + // Other forms are not allowed. + // + // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'. + // The type of the created vector is a vector of components of the same type as the scalar. + // + // Note: One of the arguments will change, with the result coming back that way rather than + // through the return value. + void promoteScalar(Decoration precision, Id& left, Id& right); + + // Make a value by smearing the scalar to fill the type. + // vectorType should be the correct type for making a vector of scalarVal. + // (No conversions are done.) + Id smearScalar(Decoration precision, Id scalarVal, Id vectorType); + + // Create a call to a built-in function. + Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, std::vector& args); + + // List of parameters used to create a texture operation + struct TextureParameters { + Id sampler; + Id coords; + Id bias; + Id lod; + Id Dref; + Id offset; + Id offsets; + Id gradX; + Id gradY; + Id sample; + Id comp; + Id texelOut; + Id lodClamp; + }; + + // Select the correct texture operation based on all inputs, and emit the correct instruction + Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicit, const TextureParameters&); + + // Emit the OpTextureQuery* instruction that was passed in. + // Figure out the right return value and type, and return it. + Id createTextureQueryCall(Op, const TextureParameters&); + + Id createSamplePositionCall(Decoration precision, Id, Id); + + Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned); + Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id); + + // Reduction comparison for composites: For equal and not-equal resulting in a scalar. + Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */); + + // OpCompositeConstruct + Id createCompositeConstruct(Id typeId, std::vector& constituents); + + // vector or scalar constructor + Id createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId); + + // matrix constructor + Id createMatrixConstructor(Decoration precision, const std::vector& sources, Id constructee); + + // Helper to use for building nested control flow with if-then-else. + class If { + public: + If(Id condition, Builder& builder); + ~If() {} + + void makeBeginElse(); + void makeEndIf(); + + private: + If(const If&); + If& operator=(If&); + + Builder& builder; + Id condition; + Function* function; + Block* headerBlock; + Block* thenBlock; + Block* elseBlock; + Block* mergeBlock; + }; + + // Make a switch statement. A switch has 'numSegments' of pieces of code, not containing + // any case/default labels, all separated by one or more case/default labels. Each possible + // case value v is a jump to the caseValues[v] segment. The defaultSegment is also in this + // number space. How to compute the value is given by 'condition', as in switch(condition). + // + // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches. + // + // Use a defaultSegment < 0 if there is no default segment (to branch to post switch). + // + // Returns the right set of basic blocks to start each code segment with, so that the caller's + // recursion stack can hold the memory for it. + // + void makeSwitch(Id condition, int numSegments, std::vector& caseValues, std::vector& valueToSegment, int defaultSegment, + std::vector& segmentBB); // return argument + + // Add a branch to the innermost switch's merge block. + void addSwitchBreak(); + + // Move to the next code segment, passing in the return argument in makeSwitch() + void nextSwitchSegment(std::vector& segmentBB, int segment); + + // Finish off the innermost switch. + void endSwitch(std::vector& segmentBB); + + struct LoopBlocks { + Block &head, &body, &merge, &continue_target; + }; + + // Start a new loop and prepare the builder to generate code for it. Until + // closeLoop() is called for this loop, createLoopContinue() and + // createLoopExit() will target its corresponding blocks. + LoopBlocks& makeNewLoop(); + + // Create a new block in the function containing the build point. Memory is + // owned by the function object. + Block& makeNewBlock(); + + // Add a branch to the continue_target of the current (innermost) loop. + void createLoopContinue(); + + // Add an exit (e.g. "break") from the innermost loop that we're currently + // in. + void createLoopExit(); + + // Close the innermost loop that you're in + void closeLoop(); + + // + // Access chain design for an R-Value vs. L-Value: + // + // There is a single access chain the builder is building at + // any particular time. Such a chain can be used to either to a load or + // a store, when desired. + // + // Expressions can be r-values, l-values, or both, or only r-values: + // a[b.c].d = .... // l-value + // ... = a[b.c].d; // r-value, that also looks like an l-value + // ++a[b.c].d; // r-value and l-value + // (x + y)[2]; // r-value only, can't possibly be l-value + // + // Computing an r-value means generating code. Hence, + // r-values should only be computed when they are needed, not speculatively. + // + // Computing an l-value means saving away information for later use in the compiler, + // no code is generated until the l-value is later dereferenced. It is okay + // to speculatively generate an l-value, just not okay to speculatively dereference it. + // + // The base of the access chain (the left-most variable or expression + // from which everything is based) can be set either as an l-value + // or as an r-value. Most efficient would be to set an l-value if one + // is available. If an expression was evaluated, the resulting r-value + // can be set as the chain base. + // + // The users of this single access chain can save and restore if they + // want to nest or manage multiple chains. + // + + struct AccessChain { + Id base; // for l-values, pointer to the base object, for r-values, the base object + std::vector indexChain; + Id instr; // cache the instruction that generates this access chain + std::vector swizzle; // each std::vector element selects the next GLSL component number + Id component; // a dynamic component index, can coexist with a swizzle, done after the swizzle, NoResult if not present + Id preSwizzleBaseType; // dereferenced type, before swizzle or component is applied; NoType unless a swizzle or component is present + bool isRValue; // true if 'base' is an r-value, otherwise, base is an l-value + }; + + // + // the SPIR-V builder maintains a single active chain that + // the following methods operated on + // + + // for external save and restore + AccessChain getAccessChain() { return accessChain; } + void setAccessChain(AccessChain newChain) { accessChain = newChain; } + + // clear accessChain + void clearAccessChain(); + + // set new base as an l-value base + void setAccessChainLValue(Id lValue) + { + assert(isPointer(lValue)); + accessChain.base = lValue; + } + + // set new base value as an r-value + void setAccessChainRValue(Id rValue) + { + accessChain.isRValue = true; + accessChain.base = rValue; + } + + // push offset onto the end of the chain + void accessChainPush(Id offset) + { + accessChain.indexChain.push_back(offset); + } + + // push new swizzle onto the end of any existing swizzle, merging into a single swizzle + void accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType); + + // push a variable component selection onto the access chain; supporting only one, so unsided + void accessChainPushComponent(Id component, Id preSwizzleBaseType) + { + accessChain.component = component; + if (accessChain.preSwizzleBaseType == NoType) + accessChain.preSwizzleBaseType = preSwizzleBaseType; + } + + // use accessChain and swizzle to store value + void accessChainStore(Id rvalue); + + // use accessChain and swizzle to load an r-value + Id accessChainLoad(Decoration precision, Id ResultType); + + // get the direct pointer for an l-value + Id accessChainGetLValue(); + + // Get the inferred SPIR-V type of the result of the current access chain, + // based on the type of the base and the chain of dereferences. + Id accessChainGetInferredType(); + + void dump(std::vector&) const; + + void createBranch(Block* block); + void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); + void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control); + + protected: + Id makeIntConstant(Id typeId, unsigned value, bool specConstant); + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const; + Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const; + Id findCompositeConstant(Op typeClass, std::vector& comps) const; + Id collapseAccessChain(); + void transferAccessChainSwizzle(bool dynamic); + void simplifyAccessChainSwizzle(); + void createAndSetNoPredecessorBlock(const char*); + void createSelectionMerge(Block* mergeBlock, unsigned int control); + void dumpInstructions(std::vector&, const std::vector >&) const; + + SourceLanguage source; + int sourceVersion; + std::vector extensions; + AddressingModel addressModel; + MemoryModel memoryModel; + std::set capabilities; + int builderNumber; + Module module; + Block* buildPoint; + Id uniqueId; + Function* mainFunction; + AccessChain accessChain; + + // special blocks of instructions for output + std::vector > imports; + std::vector > entryPoints; + std::vector > executionModes; + std::vector > names; + std::vector > lines; + std::vector > decorations; + std::vector > constantsTypesGlobals; + std::vector > externals; + std::vector > functions; + + // not output, internally used for quick & dirty canonical (unique) creation + std::vector groupedConstants[OpConstant]; // all types appear before OpConstant + std::vector groupedTypes[OpConstant]; + + // stack of switches + std::stack switchMerges; + + // Our loop stack. + std::stack loops; +}; // end Builder class + +// Use for non-fatal notes about what's not complete +void TbdFunctionality(const char*); + +// Use for fatal missing functionality +void MissingFunctionality(const char*); + +}; // end spv namespace + +#endif // SpvBuilder_H diff --git a/third_party/glslang-spirv/disassemble.cpp b/third_party/glslang-spirv/disassemble.cpp new file mode 100644 index 000000000..b2d30bec8 --- /dev/null +++ b/third_party/glslang-spirv/disassemble.cpp @@ -0,0 +1,576 @@ +// +//Copyright (C) 2014-2015 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// + +// +// Disassembler for SPIR-V. +// + +#include +#include +#include +#include +#include +#include +#include + +namespace spv { + // Include C-based headers that don't have a namespace + #include "GLSL.std.450.h" +} +const char* GlslStd450DebugNames[spv::GLSLstd450Count]; + +#include "disassemble.h" +#include "doc.h" + +namespace spv { + +void Kill(std::ostream& out, const char* message) +{ + out << std::endl << "Disassembly failed: " << message << std::endl; + exit(1); +} + +// used to identify the extended instruction library imported when printing +enum ExtInstSet { + GLSL450Inst, + OpenCLExtInst, +}; + +// Container class for a single instance of a SPIR-V stream, with methods for disassembly. +class SpirvStream { +public: + SpirvStream(std::ostream& out, const std::vector& stream) : out(out), stream(stream), word(0), nextNestedControl(0) { } + virtual ~SpirvStream() { } + + void validate(); + void processInstructions(); + +protected: + SpirvStream(const SpirvStream&); + SpirvStream& operator=(const SpirvStream&); + Op getOpCode(int id) const { return idInstruction[id] ? (Op)(stream[idInstruction[id]] & OpCodeMask) : OpNop; } + + // Output methods + void outputIndent(); + void formatId(Id id, std::stringstream&); + void outputResultId(Id id); + void outputTypeId(Id id); + void outputId(Id id); + void outputMask(OperandClass operandClass, unsigned mask); + void disassembleImmediates(int numOperands); + void disassembleIds(int numOperands); + int disassembleString(); + void disassembleInstruction(Id resultId, Id typeId, Op opCode, int numOperands); + + // Data + std::ostream& out; // where to write the disassembly + const std::vector& stream; // the actual word stream + int size; // the size of the word stream + int word; // the next word of the stream to read + + // map each to the instruction that created it + Id bound; + std::vector idInstruction; // the word offset into the stream where the instruction for result [id] starts; 0 if not yet seen (forward reference or function parameter) + + std::vector idDescriptor; // the best text string known for explaining the + + // schema + unsigned int schema; + + // stack of structured-merge points + std::stack nestedControl; + Id nextNestedControl; // need a slight delay for when we are nested +}; + +void SpirvStream::validate() +{ + size = (int)stream.size(); + if (size < 4) + Kill(out, "stream is too short"); + + // Magic number + if (stream[word++] != MagicNumber) { + out << "Bad magic number"; + return; + } + + // Version + out << "// Module Version " << std::hex << stream[word++] << std::endl; + + // Generator's magic number + out << "// Generated by (magic number): " << std::hex << stream[word++] << std::dec << std::endl; + + // Result bound + bound = stream[word++]; + idInstruction.resize(bound); + idDescriptor.resize(bound); + out << "// Id's are bound by " << bound << std::endl; + out << std::endl; + + // Reserved schema, must be 0 for now + schema = stream[word++]; + if (schema != 0) + Kill(out, "bad schema, must be 0"); +} + +// Loop over all the instructions, in order, processing each. +// Boiler plate for each is handled here directly, the rest is dispatched. +void SpirvStream::processInstructions() +{ + // Instructions + while (word < size) { + int instructionStart = word; + + // Instruction wordCount and opcode + unsigned int firstWord = stream[word]; + unsigned wordCount = firstWord >> WordCountShift; + Op opCode = (Op)(firstWord & OpCodeMask); + int nextInst = word + wordCount; + ++word; + + // Presence of full instruction + if (nextInst > size) + Kill(out, "stream instruction terminated too early"); + + // Base for computing number of operands; will be updated as more is learned + unsigned numOperands = wordCount - 1; + + // Type + Id typeId = 0; + if (InstructionDesc[opCode].hasType()) { + typeId = stream[word++]; + --numOperands; + } + + // Result + Id resultId = 0; + if (InstructionDesc[opCode].hasResult()) { + resultId = stream[word++]; + --numOperands; + + // save instruction for future reference + idInstruction[resultId] = instructionStart; + } + + outputResultId(resultId); + outputTypeId(typeId); + outputIndent(); + + // Hand off the Op and all its operands + disassembleInstruction(resultId, typeId, opCode, numOperands); + if (word != nextInst) { + out << " ERROR, incorrect number of operands consumed. At " << word << " instead of " << nextInst << " instruction start was " << instructionStart; + word = nextInst; + } + out << std::endl; + } +} + +void SpirvStream::outputIndent() +{ + for (int i = 0; i < (int)nestedControl.size(); ++i) + out << " "; +} + +void SpirvStream::formatId(Id id, std::stringstream& idStream) +{ + if (id >= bound) + Kill(out, "Bad "); + + if (id != 0) { + idStream << id; + if (idDescriptor[id].size() > 0) + idStream << "(" << idDescriptor[id] << ")"; + } +} + +void SpirvStream::outputResultId(Id id) +{ + const int width = 16; + std::stringstream idStream; + formatId(id, idStream); + out << std::setw(width) << std::right << idStream.str(); + if (id != 0) + out << ":"; + else + out << " "; + + if (nestedControl.size() && id == nestedControl.top()) + nestedControl.pop(); +} + +void SpirvStream::outputTypeId(Id id) +{ + const int width = 12; + std::stringstream idStream; + formatId(id, idStream); + out << std::setw(width) << std::right << idStream.str() << " "; +} + +void SpirvStream::outputId(Id id) +{ + if (id >= bound) + Kill(out, "Bad "); + + out << id; + if (idDescriptor[id].size() > 0) + out << "(" << idDescriptor[id] << ")"; +} + +void SpirvStream::outputMask(OperandClass operandClass, unsigned mask) +{ + if (mask == 0) + out << "None"; + else { + for (int m = 0; m < OperandClassParams[operandClass].ceiling; ++m) { + if (mask & (1 << m)) + out << OperandClassParams[operandClass].getName(m) << " "; + } + } +} + +void SpirvStream::disassembleImmediates(int numOperands) +{ + for (int i = 0; i < numOperands; ++i) { + out << stream[word++]; + if (i < numOperands - 1) + out << " "; + } +} + +void SpirvStream::disassembleIds(int numOperands) +{ + for (int i = 0; i < numOperands; ++i) { + outputId(stream[word++]); + if (i < numOperands - 1) + out << " "; + } +} + +// return the number of operands consumed by the string +int SpirvStream::disassembleString() +{ + int startWord = word; + + out << " \""; + + const char* wordString; + bool done = false; + do { + unsigned int content = stream[word]; + wordString = (const char*)&content; + for (int charCount = 0; charCount < 4; ++charCount) { + if (*wordString == 0) { + done = true; + break; + } + out << *(wordString++); + } + ++word; + } while (! done); + + out << "\""; + + return word - startWord; +} + +void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, int numOperands) +{ + // Process the opcode + + out << (OpcodeString(opCode) + 2); // leave out the "Op" + + if (opCode == OpLoopMerge || opCode == OpSelectionMerge) + nextNestedControl = stream[word]; + else if (opCode == OpBranchConditional || opCode == OpSwitch) { + if (nextNestedControl) { + nestedControl.push(nextNestedControl); + nextNestedControl = 0; + } + } else if (opCode == OpExtInstImport) { + idDescriptor[resultId] = (const char*)(&stream[word]); + } + else { + if (idDescriptor[resultId].size() == 0) { + switch (opCode) { + case OpTypeInt: + idDescriptor[resultId] = "int"; + break; + case OpTypeFloat: + idDescriptor[resultId] = "float"; + break; + case OpTypeBool: + idDescriptor[resultId] = "bool"; + break; + case OpTypeStruct: + idDescriptor[resultId] = "struct"; + break; + case OpTypePointer: + idDescriptor[resultId] = "ptr"; + break; + case OpTypeVector: + if (idDescriptor[stream[word]].size() > 0) + idDescriptor[resultId].append(idDescriptor[stream[word]].begin(), idDescriptor[stream[word]].begin() + 1); + idDescriptor[resultId].append("vec"); + switch (stream[word + 1]) { + case 2: idDescriptor[resultId].append("2"); break; + case 3: idDescriptor[resultId].append("3"); break; + case 4: idDescriptor[resultId].append("4"); break; + case 8: idDescriptor[resultId].append("8"); break; + case 16: idDescriptor[resultId].append("16"); break; + case 32: idDescriptor[resultId].append("32"); break; + default: break; + } + break; + default: + break; + } + } + } + + // Process the operands. Note, a new context-dependent set could be + // swapped in mid-traversal. + + // Handle images specially, so can put out helpful strings. + if (opCode == OpTypeImage) { + out << " "; + disassembleIds(1); + out << " " << DimensionString((Dim)stream[word++]); + out << (stream[word++] != 0 ? " depth" : ""); + out << (stream[word++] != 0 ? " array" : ""); + out << (stream[word++] != 0 ? " multi-sampled" : ""); + switch (stream[word++]) { + case 0: out << " runtime"; break; + case 1: out << " sampled"; break; + case 2: out << " nonsampled"; break; + } + out << " format:" << ImageFormatString((ImageFormat)stream[word++]); + + if (numOperands == 8) { + out << " " << AccessQualifierString(stream[word++]); + } + return; + } + + // Handle all the parameterized operands + for (int op = 0; op < InstructionDesc[opCode].operands.getNum() && numOperands > 0; ++op) { + out << " "; + OperandClass operandClass = InstructionDesc[opCode].operands.getClass(op); + switch (operandClass) { + case OperandId: + case OperandScope: + case OperandMemorySemantics: + disassembleIds(1); + --numOperands; + // Get names for printing "(XXX)" for readability, *after* this id + if (opCode == OpName) + idDescriptor[stream[word - 1]] = (const char*)(&stream[word]); + break; + case OperandVariableIds: + disassembleIds(numOperands); + return; + case OperandImageOperands: + outputMask(OperandImageOperands, stream[word++]); + --numOperands; + disassembleIds(numOperands); + return; + case OperandOptionalLiteral: + case OperandVariableLiterals: + if ((opCode == OpDecorate && stream[word - 1] == DecorationBuiltIn) || + (opCode == OpMemberDecorate && stream[word - 1] == DecorationBuiltIn)) { + out << BuiltInString(stream[word++]); + --numOperands; + ++op; + } + disassembleImmediates(numOperands); + return; + case OperandVariableIdLiteral: + while (numOperands > 0) { + out << std::endl; + outputResultId(0); + outputTypeId(0); + outputIndent(); + out << " Type "; + disassembleIds(1); + out << ", member "; + disassembleImmediates(1); + numOperands -= 2; + } + return; + case OperandVariableLiteralId: + while (numOperands > 0) { + out << std::endl; + outputResultId(0); + outputTypeId(0); + outputIndent(); + out << " case "; + disassembleImmediates(1); + out << ": "; + disassembleIds(1); + numOperands -= 2; + } + return; + case OperandLiteralNumber: + disassembleImmediates(1); + --numOperands; + if (opCode == OpExtInst) { + ExtInstSet extInstSet = GLSL450Inst; + if (0 == memcmp("OpenCL", (const char*)(idDescriptor[stream[word-2]].c_str()), 6)) { + extInstSet = OpenCLExtInst; + } + unsigned entrypoint = stream[word - 1]; + if (extInstSet == GLSL450Inst) { + if (entrypoint < GLSLstd450Count) { + out << "(" << GlslStd450DebugNames[entrypoint] << ")"; + } + } + } + break; + case OperandOptionalLiteralString: + case OperandLiteralString: + numOperands -= disassembleString(); + break; + default: + assert(operandClass >= OperandSource && operandClass < OperandOpcode); + + if (OperandClassParams[operandClass].bitmask) + outputMask(operandClass, stream[word++]); + else + out << OperandClassParams[operandClass].getName(stream[word++]); + --numOperands; + + break; + } + } + + return; +} + +void GLSLstd450GetDebugNames(const char** names) +{ + for (int i = 0; i < GLSLstd450Count; ++i) + names[i] = "Unknown"; + + names[GLSLstd450Round] = "Round"; + names[GLSLstd450RoundEven] = "RoundEven"; + names[GLSLstd450Trunc] = "Trunc"; + names[GLSLstd450FAbs] = "FAbs"; + names[GLSLstd450SAbs] = "SAbs"; + names[GLSLstd450FSign] = "FSign"; + names[GLSLstd450SSign] = "SSign"; + names[GLSLstd450Floor] = "Floor"; + names[GLSLstd450Ceil] = "Ceil"; + names[GLSLstd450Fract] = "Fract"; + names[GLSLstd450Radians] = "Radians"; + names[GLSLstd450Degrees] = "Degrees"; + names[GLSLstd450Sin] = "Sin"; + names[GLSLstd450Cos] = "Cos"; + names[GLSLstd450Tan] = "Tan"; + names[GLSLstd450Asin] = "Asin"; + names[GLSLstd450Acos] = "Acos"; + names[GLSLstd450Atan] = "Atan"; + names[GLSLstd450Sinh] = "Sinh"; + names[GLSLstd450Cosh] = "Cosh"; + names[GLSLstd450Tanh] = "Tanh"; + names[GLSLstd450Asinh] = "Asinh"; + names[GLSLstd450Acosh] = "Acosh"; + names[GLSLstd450Atanh] = "Atanh"; + names[GLSLstd450Atan2] = "Atan2"; + names[GLSLstd450Pow] = "Pow"; + names[GLSLstd450Exp] = "Exp"; + names[GLSLstd450Log] = "Log"; + names[GLSLstd450Exp2] = "Exp2"; + names[GLSLstd450Log2] = "Log2"; + names[GLSLstd450Sqrt] = "Sqrt"; + names[GLSLstd450InverseSqrt] = "InverseSqrt"; + names[GLSLstd450Determinant] = "Determinant"; + names[GLSLstd450MatrixInverse] = "MatrixInverse"; + names[GLSLstd450Modf] = "Modf"; + names[GLSLstd450ModfStruct] = "ModfStruct"; + names[GLSLstd450FMin] = "FMin"; + names[GLSLstd450SMin] = "SMin"; + names[GLSLstd450UMin] = "UMin"; + names[GLSLstd450FMax] = "FMax"; + names[GLSLstd450SMax] = "SMax"; + names[GLSLstd450UMax] = "UMax"; + names[GLSLstd450FClamp] = "FClamp"; + names[GLSLstd450SClamp] = "SClamp"; + names[GLSLstd450UClamp] = "UClamp"; + names[GLSLstd450FMix] = "FMix"; + names[GLSLstd450Step] = "Step"; + names[GLSLstd450SmoothStep] = "SmoothStep"; + names[GLSLstd450Fma] = "Fma"; + names[GLSLstd450Frexp] = "Frexp"; + names[GLSLstd450FrexpStruct] = "FrexpStruct"; + names[GLSLstd450Ldexp] = "Ldexp"; + names[GLSLstd450PackSnorm4x8] = "PackSnorm4x8"; + names[GLSLstd450PackUnorm4x8] = "PackUnorm4x8"; + names[GLSLstd450PackSnorm2x16] = "PackSnorm2x16"; + names[GLSLstd450PackUnorm2x16] = "PackUnorm2x16"; + names[GLSLstd450PackHalf2x16] = "PackHalf2x16"; + names[GLSLstd450PackDouble2x32] = "PackDouble2x32"; + names[GLSLstd450UnpackSnorm2x16] = "UnpackSnorm2x16"; + names[GLSLstd450UnpackUnorm2x16] = "UnpackUnorm2x16"; + names[GLSLstd450UnpackHalf2x16] = "UnpackHalf2x16"; + names[GLSLstd450UnpackSnorm4x8] = "UnpackSnorm4x8"; + names[GLSLstd450UnpackUnorm4x8] = "UnpackUnorm4x8"; + names[GLSLstd450UnpackDouble2x32] = "UnpackDouble2x32"; + names[GLSLstd450Length] = "Length"; + names[GLSLstd450Distance] = "Distance"; + names[GLSLstd450Cross] = "Cross"; + names[GLSLstd450Normalize] = "Normalize"; + names[GLSLstd450FaceForward] = "FaceForward"; + names[GLSLstd450Reflect] = "Reflect"; + names[GLSLstd450Refract] = "Refract"; + names[GLSLstd450FindILsb] = "FindILsb"; + names[GLSLstd450FindSMsb] = "FindSMsb"; + names[GLSLstd450FindUMsb] = "FindUMsb"; + names[GLSLstd450InterpolateAtCentroid] = "InterpolateAtCentroid"; + names[GLSLstd450InterpolateAtSample] = "InterpolateAtSample"; + names[GLSLstd450InterpolateAtOffset] = "InterpolateAtOffset"; +} + +void Disassemble(std::ostream& out, const std::vector& stream) +{ + SpirvStream SpirvStream(out, stream); + GLSLstd450GetDebugNames(GlslStd450DebugNames); + SpirvStream.validate(); + SpirvStream.processInstructions(); +} + +}; // end namespace spv diff --git a/third_party/glslang-spirv/disassemble.h b/third_party/glslang-spirv/disassemble.h new file mode 100644 index 000000000..be537a371 --- /dev/null +++ b/third_party/glslang-spirv/disassemble.h @@ -0,0 +1,56 @@ +// +//Copyright (C) 2014-2015 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// + +// +// Disassembler for SPIR-V. +// + +#pragma once +#ifndef disassembler_H +#define disassembler_H + +#include +#include + +namespace spv { + + void Disassemble(std::ostream& out, const std::vector&); + +}; // end namespace spv + +#endif // disassembler_H diff --git a/third_party/glslang-spirv/doc.cpp b/third_party/glslang-spirv/doc.cpp new file mode 100644 index 000000000..7cf1c87f0 --- /dev/null +++ b/third_party/glslang-spirv/doc.cpp @@ -0,0 +1,2711 @@ +// +//Copyright (C) 2014-2015 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// + +// +// 1) Programatically fill in instruction/operand information. +// This can be used for disassembly, printing documentation, etc. +// +// 2) Print documentation from this parameterization. +// + +#include "doc.h" + +#include +#include +#include + +namespace spv { + +// +// Whole set of functions that translate enumerants to their text strings for +// the specification (or their sanitized versions for auto-generating the +// spirv headers. +// +// Also, the ceilings are declared next to these, to help keep them in sync. +// Ceilings should be +// - one more than the maximum value an enumerant takes on, for non-mask enumerants +// (for non-sparse enums, this is the number of enumurants) +// - the number of bits consumed by the set of masks +// (for non-sparse mask enums, this is the number of enumurants) +// + +const int SourceLanguageCeiling = 5; + +const char* SourceString(int source) +{ + switch (source) { + case 0: return "Unknown"; + case 1: return "ESSL"; + case 2: return "GLSL"; + case 3: return "OpenCL_C"; + case 4: return "OpenCL_CPP"; + + case SourceLanguageCeiling: + default: return "Bad"; + } +} + +const int ExecutionModelCeiling = 7; + +const char* ExecutionModelString(int model) +{ + switch (model) { + case 0: return "Vertex"; + case 1: return "TessellationControl"; + case 2: return "TessellationEvaluation"; + case 3: return "Geometry"; + case 4: return "Fragment"; + case 5: return "GLCompute"; + case 6: return "Kernel"; + + case ExecutionModelCeiling: + default: return "Bad"; + } +} + +const int AddressingModelCeiling = 3; + +const char* AddressingString(int addr) +{ + switch (addr) { + case 0: return "Logical"; + case 1: return "Physical32"; + case 2: return "Physical64"; + + case AddressingModelCeiling: + default: return "Bad"; + } +} + +const int MemoryModelCeiling = 3; + +const char* MemoryString(int mem) +{ + switch (mem) { + case 0: return "Simple"; + case 1: return "GLSL450"; + case 2: return "OpenCL"; + + case MemoryModelCeiling: + default: return "Bad"; + } +} + +const int ExecutionModeCeiling = 33; + +const char* ExecutionModeString(int mode) +{ + switch (mode) { + case 0: return "Invocations"; + case 1: return "SpacingEqual"; + case 2: return "SpacingFractionalEven"; + case 3: return "SpacingFractionalOdd"; + case 4: return "VertexOrderCw"; + case 5: return "VertexOrderCcw"; + case 6: return "PixelCenterInteger"; + case 7: return "OriginUpperLeft"; + case 8: return "OriginLowerLeft"; + case 9: return "EarlyFragmentTests"; + case 10: return "PointMode"; + case 11: return "Xfb"; + case 12: return "DepthReplacing"; + case 13: return "Bad"; + case 14: return "DepthGreater"; + case 15: return "DepthLess"; + case 16: return "DepthUnchanged"; + case 17: return "LocalSize"; + case 18: return "LocalSizeHint"; + case 19: return "InputPoints"; + case 20: return "InputLines"; + case 21: return "InputLinesAdjacency"; + case 22: return "Triangles"; + case 23: return "InputTrianglesAdjacency"; + case 24: return "Quads"; + case 25: return "Isolines"; + case 26: return "OutputVertices"; + case 27: return "OutputPoints"; + case 28: return "OutputLineStrip"; + case 29: return "OutputTriangleStrip"; + case 30: return "VecTypeHint"; + case 31: return "ContractionOff"; + case 32: return "Bad"; + + case ExecutionModeCeiling: + default: return "Bad"; + } +} + +const int StorageClassCeiling = 12; + +const char* StorageClassString(int StorageClass) +{ + switch (StorageClass) { + case 0: return "UniformConstant"; + case 1: return "Input"; + case 2: return "Uniform"; + case 3: return "Output"; + case 4: return "Workgroup"; + case 5: return "CrossWorkgroup"; + case 6: return "Private"; + case 7: return "Function"; + case 8: return "Generic"; + case 9: return "PushConstant"; + case 10: return "AtomicCounter"; + case 11: return "Image"; + + case StorageClassCeiling: + default: return "Bad"; + } +} + +const int DecorationCeiling = 45; + +const char* DecorationString(int decoration) +{ + switch (decoration) { + case 0: return "RelaxedPrecision"; + case 1: return "SpecId"; + case 2: return "Block"; + case 3: return "BufferBlock"; + case 4: return "RowMajor"; + case 5: return "ColMajor"; + case 6: return "ArrayStride"; + case 7: return "MatrixStride"; + case 8: return "GLSLShared"; + case 9: return "GLSLPacked"; + case 10: return "CPacked"; + case 11: return "BuiltIn"; + case 12: return "Bad"; + case 13: return "NoPerspective"; + case 14: return "Flat"; + case 15: return "Patch"; + case 16: return "Centroid"; + case 17: return "Sample"; + case 18: return "Invariant"; + case 19: return "Restrict"; + case 20: return "Aliased"; + case 21: return "Volatile"; + case 22: return "Constant"; + case 23: return "Coherent"; + case 24: return "NonWritable"; + case 25: return "NonReadable"; + case 26: return "Uniform"; + case 27: return "Bad"; + case 28: return "SaturatedConversion"; + case 29: return "Stream"; + case 30: return "Location"; + case 31: return "Component"; + case 32: return "Index"; + case 33: return "Binding"; + case 34: return "DescriptorSet"; + case 35: return "Offset"; + case 36: return "XfbBuffer"; + case 37: return "XfbStride"; + case 38: return "FuncParamAttr"; + case 39: return "FP Rounding Mode"; + case 40: return "FP Fast Math Mode"; + case 41: return "Linkage Attributes"; + case 42: return "NoContraction"; + case 43: return "InputAttachmentIndex"; + case 44: return "Alignment"; + + case DecorationCeiling: + default: return "Bad"; + } +} + +const int BuiltInCeiling = 44; + +const char* BuiltInString(int builtIn) +{ + switch (builtIn) { + case 0: return "Position"; + case 1: return "PointSize"; + case 2: return "Bad"; + case 3: return "ClipDistance"; + case 4: return "CullDistance"; + case 5: return "VertexId"; + case 6: return "InstanceId"; + case 7: return "PrimitiveId"; + case 8: return "InvocationId"; + case 9: return "Layer"; + case 10: return "ViewportIndex"; + case 11: return "TessLevelOuter"; + case 12: return "TessLevelInner"; + case 13: return "TessCoord"; + case 14: return "PatchVertices"; + case 15: return "FragCoord"; + case 16: return "PointCoord"; + case 17: return "FrontFacing"; + case 18: return "SampleId"; + case 19: return "SamplePosition"; + case 20: return "SampleMask"; + case 21: return "Bad"; + case 22: return "FragDepth"; + case 23: return "HelperInvocation"; + case 24: return "NumWorkgroups"; + case 25: return "WorkgroupSize"; + case 26: return "WorkgroupId"; + case 27: return "LocalInvocationId"; + case 28: return "GlobalInvocationId"; + case 29: return "LocalInvocationIndex"; + case 30: return "WorkDim"; + case 31: return "GlobalSize"; + case 32: return "EnqueuedWorkgroupSize"; + case 33: return "GlobalOffset"; + case 34: return "GlobalLinearId"; + case 35: return "Bad"; + case 36: return "SubgroupSize"; + case 37: return "SubgroupMaxSize"; + case 38: return "NumSubgroups"; + case 39: return "NumEnqueuedSubgroups"; + case 40: return "SubgroupId"; + case 41: return "SubgroupLocalInvocationId"; + case 42: return "VertexIndex"; // TBD: put next to VertexId? + case 43: return "InstanceIndex"; // TBD: put next to InstanceId? + + case BuiltInCeiling: + default: return "Bad"; + } +} + +const int DimensionCeiling = 7; + +const char* DimensionString(int dim) +{ + switch (dim) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "Cube"; + case 4: return "Rect"; + case 5: return "Buffer"; + case 6: return "SubpassData"; + + case DimensionCeiling: + default: return "Bad"; + } +} + +const int SamplerAddressingModeCeiling = 5; + +const char* SamplerAddressingModeString(int mode) +{ + switch (mode) { + case 0: return "None"; + case 1: return "ClampToEdge"; + case 2: return "Clamp"; + case 3: return "Repeat"; + case 4: return "RepeatMirrored"; + + case SamplerAddressingModeCeiling: + default: return "Bad"; + } +} + +const int SamplerFilterModeCeiling = 2; + +const char* SamplerFilterModeString(int mode) +{ + switch (mode) { + case 0: return "Nearest"; + case 1: return "Linear"; + + case SamplerFilterModeCeiling: + default: return "Bad"; + } +} + +const int ImageFormatCeiling = 40; + +const char* ImageFormatString(int format) +{ + switch (format) { + case 0: return "Unknown"; + + // ES/Desktop float + case 1: return "Rgba32f"; + case 2: return "Rgba16f"; + case 3: return "R32f"; + case 4: return "Rgba8"; + case 5: return "Rgba8Snorm"; + + // Desktop float + case 6: return "Rg32f"; + case 7: return "Rg16f"; + case 8: return "R11fG11fB10f"; + case 9: return "R16f"; + case 10: return "Rgba16"; + case 11: return "Rgb10A2"; + case 12: return "Rg16"; + case 13: return "Rg8"; + case 14: return "R16"; + case 15: return "R8"; + case 16: return "Rgba16Snorm"; + case 17: return "Rg16Snorm"; + case 18: return "Rg8Snorm"; + case 19: return "R16Snorm"; + case 20: return "R8Snorm"; + + // ES/Desktop int + case 21: return "Rgba32i"; + case 22: return "Rgba16i"; + case 23: return "Rgba8i"; + case 24: return "R32i"; + + // Desktop int + case 25: return "Rg32i"; + case 26: return "Rg16i"; + case 27: return "Rg8i"; + case 28: return "R16i"; + case 29: return "R8i"; + + // ES/Desktop uint + case 30: return "Rgba32ui"; + case 31: return "Rgba16ui"; + case 32: return "Rgba8ui"; + case 33: return "R32ui"; + + // Desktop uint + case 34: return "Rgb10a2ui"; + case 35: return "Rg32ui"; + case 36: return "Rg16ui"; + case 37: return "Rg8ui"; + case 38: return "R16ui"; + case 39: return "R8ui"; + + case ImageFormatCeiling: + default: + return "Bad"; + } +} + +const int ImageChannelOrderCeiling = 19; + +const char* ImageChannelOrderString(int format) +{ + switch (format) { + case 0: return "R"; + case 1: return "A"; + case 2: return "RG"; + case 3: return "RA"; + case 4: return "RGB"; + case 5: return "RGBA"; + case 6: return "BGRA"; + case 7: return "ARGB"; + case 8: return "Intensity"; + case 9: return "Luminance"; + case 10: return "Rx"; + case 11: return "RGx"; + case 12: return "RGBx"; + case 13: return "Depth"; + case 14: return "DepthStencil"; + case 15: return "sRGB"; + case 16: return "sRGBx"; + case 17: return "sRGBA"; + case 18: return "sBGRA"; + + case ImageChannelOrderCeiling: + default: + return "Bad"; + } +} + +const int ImageChannelDataTypeCeiling = 17; + +const char* ImageChannelDataTypeString(int type) +{ + switch (type) + { + case 0: return "SnormInt8"; + case 1: return "SnormInt16"; + case 2: return "UnormInt8"; + case 3: return "UnormInt16"; + case 4: return "UnormShort565"; + case 5: return "UnormShort555"; + case 6: return "UnormInt101010"; + case 7: return "SignedInt8"; + case 8: return "SignedInt16"; + case 9: return "SignedInt32"; + case 10: return "UnsignedInt8"; + case 11: return "UnsignedInt16"; + case 12: return "UnsignedInt32"; + case 13: return "HalfFloat"; + case 14: return "Float"; + case 15: return "UnormInt24"; + case 16: return "UnormInt101010_2"; + + case ImageChannelDataTypeCeiling: + default: + return "Bad"; + } +} + +const int ImageOperandsCeiling = 8; + +const char* ImageOperandsString(int format) +{ + switch (format) { + case 0: return "Bias"; + case 1: return "Lod"; + case 2: return "Grad"; + case 3: return "ConstOffset"; + case 4: return "Offset"; + case 5: return "ConstOffsets"; + case 6: return "Sample"; + case 7: return "MinLod"; + + case ImageOperandsCeiling: + default: + return "Bad"; + } +} + +const int FPFastMathCeiling = 5; + +const char* FPFastMathString(int mode) +{ + switch (mode) { + case 0: return "NotNaN"; + case 1: return "NotInf"; + case 2: return "NSZ"; + case 3: return "AllowRecip"; + case 4: return "Fast"; + + case FPFastMathCeiling: + default: return "Bad"; + } +} + +const int FPRoundingModeCeiling = 4; + +const char* FPRoundingModeString(int mode) +{ + switch (mode) { + case 0: return "RTE"; + case 1: return "RTZ"; + case 2: return "RTP"; + case 3: return "RTN"; + + case FPRoundingModeCeiling: + default: return "Bad"; + } +} + +const int LinkageTypeCeiling = 2; + +const char* LinkageTypeString(int type) +{ + switch (type) { + case 0: return "Export"; + case 1: return "Import"; + + case LinkageTypeCeiling: + default: return "Bad"; + } +} + +const int FuncParamAttrCeiling = 8; + +const char* FuncParamAttrString(int attr) +{ + switch (attr) { + case 0: return "Zext"; + case 1: return "Sext"; + case 2: return "ByVal"; + case 3: return "Sret"; + case 4: return "NoAlias"; + case 5: return "NoCapture"; + case 6: return "NoWrite"; + case 7: return "NoReadWrite"; + + case FuncParamAttrCeiling: + default: return "Bad"; + } +} + +const int AccessQualifierCeiling = 3; + +const char* AccessQualifierString(int attr) +{ + switch (attr) { + case 0: return "ReadOnly"; + case 1: return "WriteOnly"; + case 2: return "ReadWrite"; + + case AccessQualifierCeiling: + default: return "Bad"; + } +} + +const int SelectControlCeiling = 2; + +const char* SelectControlString(int cont) +{ + switch (cont) { + case 0: return "Flatten"; + case 1: return "DontFlatten"; + + case SelectControlCeiling: + default: return "Bad"; + } +} + +const int LoopControlCeiling = 2; + +const char* LoopControlString(int cont) +{ + switch (cont) { + case 0: return "Unroll"; + case 1: return "DontUnroll"; + + case LoopControlCeiling: + default: return "Bad"; + } +} + +const int FunctionControlCeiling = 4; + +const char* FunctionControlString(int cont) +{ + switch (cont) { + case 0: return "Inline"; + case 1: return "DontInline"; + case 2: return "Pure"; + case 3: return "Const"; + + case FunctionControlCeiling: + default: return "Bad"; + } +} + +const int MemorySemanticsCeiling = 12; + +const char* MemorySemanticsString(int mem) +{ + // Note: No bits set (None) means "Relaxed" + switch (mem) { + case 0: return "Bad"; // Note: this is a placeholder for 'Consume' + case 1: return "Acquire"; + case 2: return "Release"; + case 3: return "AcquireRelease"; + case 4: return "SequentiallyConsistent"; + case 5: return "Bad"; // Note: reserved for future expansion + case 6: return "UniformMemory"; + case 7: return "SubgroupMemory"; + case 8: return "WorkgroupMemory"; + case 9: return "CrossWorkgroupMemory"; + case 10: return "AtomicCounterMemory"; + case 11: return "ImageMemory"; + + case MemorySemanticsCeiling: + default: return "Bad"; + } +} + +const int MemoryAccessCeiling = 3; + +const char* MemoryAccessString(int mem) +{ + switch (mem) { + case 0: return "Volatile"; + case 1: return "Aligned"; + case 2: return "Nontemporal"; + + case MemoryAccessCeiling: + default: return "Bad"; + } +} + +const int ScopeCeiling = 5; + +const char* ScopeString(int mem) +{ + switch (mem) { + case 0: return "CrossDevice"; + case 1: return "Device"; + case 2: return "Workgroup"; + case 3: return "Subgroup"; + case 4: return "Invocation"; + + case ScopeCeiling: + default: return "Bad"; + } +} + +const int GroupOperationCeiling = 3; + +const char* GroupOperationString(int gop) +{ + + switch (gop) + { + case 0: return "Reduce"; + case 1: return "InclusiveScan"; + case 2: return "ExclusiveScan"; + + case GroupOperationCeiling: + default: return "Bad"; + } +} + +const int KernelEnqueueFlagsCeiling = 3; + +const char* KernelEnqueueFlagsString(int flag) +{ + switch (flag) + { + case 0: return "NoWait"; + case 1: return "WaitKernel"; + case 2: return "WaitWorkGroup"; + + case KernelEnqueueFlagsCeiling: + default: return "Bad"; + } +} + +const int KernelProfilingInfoCeiling = 1; + +const char* KernelProfilingInfoString(int info) +{ + switch (info) + { + case 0: return "CmdExecTime"; + + case KernelProfilingInfoCeiling: + default: return "Bad"; + } +} + +const int CapabilityCeiling = 58; + +const char* CapabilityString(int info) +{ + switch (info) + { + case 0: return "Matrix"; + case 1: return "Shader"; + case 2: return "Geometry"; + case 3: return "Tessellation"; + case 4: return "Addresses"; + case 5: return "Linkage"; + case 6: return "Kernel"; + case 7: return "Vector16"; + case 8: return "Float16Buffer"; + case 9: return "Float16"; + case 10: return "Float64"; + case 11: return "Int64"; + case 12: return "Int64Atomics"; + case 13: return "ImageBasic"; + case 14: return "ImageReadWrite"; + case 15: return "ImageMipmap"; + case 16: return "Bad"; + case 17: return "Pipes"; + case 18: return "Groups"; + case 19: return "DeviceEnqueue"; + case 20: return "LiteralSampler"; + case 21: return "AtomicStorage"; + case 22: return "Int16"; + case 23: return "TessellationPointSize"; + case 24: return "GeometryPointSize"; + case 25: return "ImageGatherExtended"; + case 26: return "Bad"; + case 27: return "StorageImageMultisample"; + case 28: return "UniformBufferArrayDynamicIndexing"; + case 29: return "SampledImageArrayDynamicIndexing"; + case 30: return "StorageBufferArrayDynamicIndexing"; + case 31: return "StorageImageArrayDynamicIndexing"; + case 32: return "ClipDistance"; + case 33: return "CullDistance"; + case 34: return "ImageCubeArray"; + case 35: return "SampleRateShading"; + case 36: return "ImageRect"; + case 37: return "SampledRect"; + case 38: return "GenericPointer"; + case 39: return "Int8"; + case 40: return "InputAttachment"; + case 41: return "SparseResidency"; + case 42: return "MinLod"; + case 43: return "Sampled1D"; + case 44: return "Image1D"; + case 45: return "SampledCubeArray"; + case 46: return "SampledBuffer"; + case 47: return "ImageBuffer"; + case 48: return "ImageMSArray"; + case 49: return "StorageImageExtendedFormats"; + case 50: return "ImageQuery"; + case 51: return "DerivativeControl"; + case 52: return "InterpolationFunction"; + case 53: return "TransformFeedback"; + case 54: return "GeometryStreams"; + case 55: return "StorageImageReadWithoutFormat"; + case 56: return "StorageImageWriteWithoutFormat"; + case 57: return "MultiViewport"; + + case CapabilityCeiling: + default: return "Bad"; + } +} + +const char* OpcodeString(int op) +{ + switch (op) { + case 0: return "OpNop"; + case 1: return "OpUndef"; + case 2: return "OpSourceContinued"; + case 3: return "OpSource"; + case 4: return "OpSourceExtension"; + case 5: return "OpName"; + case 6: return "OpMemberName"; + case 7: return "OpString"; + case 8: return "OpLine"; + case 9: return "Bad"; + case 10: return "OpExtension"; + case 11: return "OpExtInstImport"; + case 12: return "OpExtInst"; + case 13: return "Bad"; + case 14: return "OpMemoryModel"; + case 15: return "OpEntryPoint"; + case 16: return "OpExecutionMode"; + case 17: return "OpCapability"; + case 18: return "Bad"; + case 19: return "OpTypeVoid"; + case 20: return "OpTypeBool"; + case 21: return "OpTypeInt"; + case 22: return "OpTypeFloat"; + case 23: return "OpTypeVector"; + case 24: return "OpTypeMatrix"; + case 25: return "OpTypeImage"; + case 26: return "OpTypeSampler"; + case 27: return "OpTypeSampledImage"; + case 28: return "OpTypeArray"; + case 29: return "OpTypeRuntimeArray"; + case 30: return "OpTypeStruct"; + case 31: return "OpTypeOpaque"; + case 32: return "OpTypePointer"; + case 33: return "OpTypeFunction"; + case 34: return "OpTypeEvent"; + case 35: return "OpTypeDeviceEvent"; + case 36: return "OpTypeReserveId"; + case 37: return "OpTypeQueue"; + case 38: return "OpTypePipe"; + case 39: return "OpTypeForwardPointer"; + case 40: return "Bad"; + case 41: return "OpConstantTrue"; + case 42: return "OpConstantFalse"; + case 43: return "OpConstant"; + case 44: return "OpConstantComposite"; + case 45: return "OpConstantSampler"; + case 46: return "OpConstantNull"; + case 47: return "Bad"; + case 48: return "OpSpecConstantTrue"; + case 49: return "OpSpecConstantFalse"; + case 50: return "OpSpecConstant"; + case 51: return "OpSpecConstantComposite"; + case 52: return "OpSpecConstantOp"; + case 53: return "Bad"; + case 54: return "OpFunction"; + case 55: return "OpFunctionParameter"; + case 56: return "OpFunctionEnd"; + case 57: return "OpFunctionCall"; + case 58: return "Bad"; + case 59: return "OpVariable"; + case 60: return "OpImageTexelPointer"; + case 61: return "OpLoad"; + case 62: return "OpStore"; + case 63: return "OpCopyMemory"; + case 64: return "OpCopyMemorySized"; + case 65: return "OpAccessChain"; + case 66: return "OpInBoundsAccessChain"; + case 67: return "OpPtrAccessChain"; + case 68: return "OpArrayLength"; + case 69: return "OpGenericPtrMemSemantics"; + case 70: return "OpInBoundsPtrAccessChain"; + case 71: return "OpDecorate"; + case 72: return "OpMemberDecorate"; + case 73: return "OpDecorationGroup"; + case 74: return "OpGroupDecorate"; + case 75: return "OpGroupMemberDecorate"; + case 76: return "Bad"; + case 77: return "OpVectorExtractDynamic"; + case 78: return "OpVectorInsertDynamic"; + case 79: return "OpVectorShuffle"; + case 80: return "OpCompositeConstruct"; + case 81: return "OpCompositeExtract"; + case 82: return "OpCompositeInsert"; + case 83: return "OpCopyObject"; + case 84: return "OpTranspose"; + case 85: return "Bad"; + case 86: return "OpSampledImage"; + case 87: return "OpImageSampleImplicitLod"; + case 88: return "OpImageSampleExplicitLod"; + case 89: return "OpImageSampleDrefImplicitLod"; + case 90: return "OpImageSampleDrefExplicitLod"; + case 91: return "OpImageSampleProjImplicitLod"; + case 92: return "OpImageSampleProjExplicitLod"; + case 93: return "OpImageSampleProjDrefImplicitLod"; + case 94: return "OpImageSampleProjDrefExplicitLod"; + case 95: return "OpImageFetch"; + case 96: return "OpImageGather"; + case 97: return "OpImageDrefGather"; + case 98: return "OpImageRead"; + case 99: return "OpImageWrite"; + case 100: return "OpImage"; + case 101: return "OpImageQueryFormat"; + case 102: return "OpImageQueryOrder"; + case 103: return "OpImageQuerySizeLod"; + case 104: return "OpImageQuerySize"; + case 105: return "OpImageQueryLod"; + case 106: return "OpImageQueryLevels"; + case 107: return "OpImageQuerySamples"; + case 108: return "Bad"; + case 109: return "OpConvertFToU"; + case 110: return "OpConvertFToS"; + case 111: return "OpConvertSToF"; + case 112: return "OpConvertUToF"; + case 113: return "OpUConvert"; + case 114: return "OpSConvert"; + case 115: return "OpFConvert"; + case 116: return "OpQuantizeToF16"; + case 117: return "OpConvertPtrToU"; + case 118: return "OpSatConvertSToU"; + case 119: return "OpSatConvertUToS"; + case 120: return "OpConvertUToPtr"; + case 121: return "OpPtrCastToGeneric"; + case 122: return "OpGenericCastToPtr"; + case 123: return "OpGenericCastToPtrExplicit"; + case 124: return "OpBitcast"; + case 125: return "Bad"; + case 126: return "OpSNegate"; + case 127: return "OpFNegate"; + case 128: return "OpIAdd"; + case 129: return "OpFAdd"; + case 130: return "OpISub"; + case 131: return "OpFSub"; + case 132: return "OpIMul"; + case 133: return "OpFMul"; + case 134: return "OpUDiv"; + case 135: return "OpSDiv"; + case 136: return "OpFDiv"; + case 137: return "OpUMod"; + case 138: return "OpSRem"; + case 139: return "OpSMod"; + case 140: return "OpFRem"; + case 141: return "OpFMod"; + case 142: return "OpVectorTimesScalar"; + case 143: return "OpMatrixTimesScalar"; + case 144: return "OpVectorTimesMatrix"; + case 145: return "OpMatrixTimesVector"; + case 146: return "OpMatrixTimesMatrix"; + case 147: return "OpOuterProduct"; + case 148: return "OpDot"; + case 149: return "OpIAddCarry"; + case 150: return "OpISubBorrow"; + case 151: return "OpUMulExtended"; + case 152: return "OpSMulExtended"; + case 153: return "Bad"; + case 154: return "OpAny"; + case 155: return "OpAll"; + case 156: return "OpIsNan"; + case 157: return "OpIsInf"; + case 158: return "OpIsFinite"; + case 159: return "OpIsNormal"; + case 160: return "OpSignBitSet"; + case 161: return "OpLessOrGreater"; + case 162: return "OpOrdered"; + case 163: return "OpUnordered"; + case 164: return "OpLogicalEqual"; + case 165: return "OpLogicalNotEqual"; + case 166: return "OpLogicalOr"; + case 167: return "OpLogicalAnd"; + case 168: return "OpLogicalNot"; + case 169: return "OpSelect"; + case 170: return "OpIEqual"; + case 171: return "OpINotEqual"; + case 172: return "OpUGreaterThan"; + case 173: return "OpSGreaterThan"; + case 174: return "OpUGreaterThanEqual"; + case 175: return "OpSGreaterThanEqual"; + case 176: return "OpULessThan"; + case 177: return "OpSLessThan"; + case 178: return "OpULessThanEqual"; + case 179: return "OpSLessThanEqual"; + case 180: return "OpFOrdEqual"; + case 181: return "OpFUnordEqual"; + case 182: return "OpFOrdNotEqual"; + case 183: return "OpFUnordNotEqual"; + case 184: return "OpFOrdLessThan"; + case 185: return "OpFUnordLessThan"; + case 186: return "OpFOrdGreaterThan"; + case 187: return "OpFUnordGreaterThan"; + case 188: return "OpFOrdLessThanEqual"; + case 189: return "OpFUnordLessThanEqual"; + case 190: return "OpFOrdGreaterThanEqual"; + case 191: return "OpFUnordGreaterThanEqual"; + case 192: return "Bad"; + case 193: return "Bad"; + case 194: return "OpShiftRightLogical"; + case 195: return "OpShiftRightArithmetic"; + case 196: return "OpShiftLeftLogical"; + case 197: return "OpBitwiseOr"; + case 198: return "OpBitwiseXor"; + case 199: return "OpBitwiseAnd"; + case 200: return "OpNot"; + case 201: return "OpBitFieldInsert"; + case 202: return "OpBitFieldSExtract"; + case 203: return "OpBitFieldUExtract"; + case 204: return "OpBitReverse"; + case 205: return "OpBitCount"; + case 206: return "Bad"; + case 207: return "OpDPdx"; + case 208: return "OpDPdy"; + case 209: return "OpFwidth"; + case 210: return "OpDPdxFine"; + case 211: return "OpDPdyFine"; + case 212: return "OpFwidthFine"; + case 213: return "OpDPdxCoarse"; + case 214: return "OpDPdyCoarse"; + case 215: return "OpFwidthCoarse"; + case 216: return "Bad"; + case 217: return "Bad"; + case 218: return "OpEmitVertex"; + case 219: return "OpEndPrimitive"; + case 220: return "OpEmitStreamVertex"; + case 221: return "OpEndStreamPrimitive"; + case 222: return "Bad"; + case 223: return "Bad"; + case 224: return "OpControlBarrier"; + case 225: return "OpMemoryBarrier"; + case 226: return "Bad"; + case 227: return "OpAtomicLoad"; + case 228: return "OpAtomicStore"; + case 229: return "OpAtomicExchange"; + case 230: return "OpAtomicCompareExchange"; + case 231: return "OpAtomicCompareExchangeWeak"; + case 232: return "OpAtomicIIncrement"; + case 233: return "OpAtomicIDecrement"; + case 234: return "OpAtomicIAdd"; + case 235: return "OpAtomicISub"; + case 236: return "OpAtomicSMin"; + case 237: return "OpAtomicUMin"; + case 238: return "OpAtomicSMax"; + case 239: return "OpAtomicUMax"; + case 240: return "OpAtomicAnd"; + case 241: return "OpAtomicOr"; + case 242: return "OpAtomicXor"; + case 243: return "Bad"; + case 244: return "Bad"; + case 245: return "OpPhi"; + case 246: return "OpLoopMerge"; + case 247: return "OpSelectionMerge"; + case 248: return "OpLabel"; + case 249: return "OpBranch"; + case 250: return "OpBranchConditional"; + case 251: return "OpSwitch"; + case 252: return "OpKill"; + case 253: return "OpReturn"; + case 254: return "OpReturnValue"; + case 255: return "OpUnreachable"; + case 256: return "OpLifetimeStart"; + case 257: return "OpLifetimeStop"; + case 258: return "Bad"; + case 259: return "OpGroupAsyncCopy"; + case 260: return "OpGroupWaitEvents"; + case 261: return "OpGroupAll"; + case 262: return "OpGroupAny"; + case 263: return "OpGroupBroadcast"; + case 264: return "OpGroupIAdd"; + case 265: return "OpGroupFAdd"; + case 266: return "OpGroupFMin"; + case 267: return "OpGroupUMin"; + case 268: return "OpGroupSMin"; + case 269: return "OpGroupFMax"; + case 270: return "OpGroupUMax"; + case 271: return "OpGroupSMax"; + case 272: return "Bad"; + case 273: return "Bad"; + case 274: return "OpReadPipe"; + case 275: return "OpWritePipe"; + case 276: return "OpReservedReadPipe"; + case 277: return "OpReservedWritePipe"; + case 278: return "OpReserveReadPipePackets"; + case 279: return "OpReserveWritePipePackets"; + case 280: return "OpCommitReadPipe"; + case 281: return "OpCommitWritePipe"; + case 282: return "OpIsValidReserveId"; + case 283: return "OpGetNumPipePackets"; + case 284: return "OpGetMaxPipePackets"; + case 285: return "OpGroupReserveReadPipePackets"; + case 286: return "OpGroupReserveWritePipePackets"; + case 287: return "OpGroupCommitReadPipe"; + case 288: return "OpGroupCommitWritePipe"; + case 289: return "Bad"; + case 290: return "Bad"; + case 291: return "OpEnqueueMarker"; + case 292: return "OpEnqueueKernel"; + case 293: return "OpGetKernelNDrangeSubGroupCount"; + case 294: return "OpGetKernelNDrangeMaxSubGroupSize"; + case 295: return "OpGetKernelWorkGroupSize"; + case 296: return "OpGetKernelPreferredWorkGroupSizeMultiple"; + case 297: return "OpRetainEvent"; + case 298: return "OpReleaseEvent"; + case 299: return "OpCreateUserEvent"; + case 300: return "OpIsValidEvent"; + case 301: return "OpSetUserEventStatus"; + case 302: return "OpCaptureEventProfilingInfo"; + case 303: return "OpGetDefaultQueue"; + case 304: return "OpBuildNDRange"; + case 305: return "OpImageSparseSampleImplicitLod"; + case 306: return "OpImageSparseSampleExplicitLod"; + case 307: return "OpImageSparseSampleDrefImplicitLod"; + case 308: return "OpImageSparseSampleDrefExplicitLod"; + case 309: return "OpImageSparseSampleProjImplicitLod"; + case 310: return "OpImageSparseSampleProjExplicitLod"; + case 311: return "OpImageSparseSampleProjDrefImplicitLod"; + case 312: return "OpImageSparseSampleProjDrefExplicitLod"; + case 313: return "OpImageSparseFetch"; + case 314: return "OpImageSparseGather"; + case 315: return "OpImageSparseDrefGather"; + case 316: return "OpImageSparseTexelsResident"; + case 317: return "OpNoLine"; + case 318: return "OpAtomicFlagTestAndSet"; + case 319: return "OpAtomicFlagClear"; + case 320: return "OpImageSparseRead"; + + case OpcodeCeiling: + default: + return "Bad"; + } +} + +// The set of objects that hold all the instruction/operand +// parameterization information. +InstructionParameters InstructionDesc[OpcodeCeiling]; +OperandParameters ExecutionModeOperands[ExecutionModeCeiling]; +OperandParameters DecorationOperands[DecorationCeiling]; + +EnumDefinition OperandClassParams[OperandCount]; +EnumParameters ExecutionModelParams[ExecutionModelCeiling]; +EnumParameters AddressingParams[AddressingModelCeiling]; +EnumParameters MemoryParams[MemoryModelCeiling]; +EnumParameters ExecutionModeParams[ExecutionModeCeiling]; +EnumParameters StorageParams[StorageClassCeiling]; +EnumParameters SamplerAddressingModeParams[SamplerAddressingModeCeiling]; +EnumParameters SamplerFilterModeParams[SamplerFilterModeCeiling]; +EnumParameters ImageFormatParams[ImageFormatCeiling]; +EnumParameters ImageChannelOrderParams[ImageChannelOrderCeiling]; +EnumParameters ImageChannelDataTypeParams[ImageChannelDataTypeCeiling]; +EnumParameters ImageOperandsParams[ImageOperandsCeiling]; +EnumParameters FPFastMathParams[FPFastMathCeiling]; +EnumParameters FPRoundingModeParams[FPRoundingModeCeiling]; +EnumParameters LinkageTypeParams[LinkageTypeCeiling]; +EnumParameters DecorationParams[DecorationCeiling]; +EnumParameters BuiltInParams[BuiltInCeiling]; +EnumParameters DimensionalityParams[DimensionCeiling]; +EnumParameters FuncParamAttrParams[FuncParamAttrCeiling]; +EnumParameters AccessQualifierParams[AccessQualifierCeiling]; +EnumParameters GroupOperationParams[GroupOperationCeiling]; +EnumParameters LoopControlParams[FunctionControlCeiling]; +EnumParameters SelectionControlParams[SelectControlCeiling]; +EnumParameters FunctionControlParams[FunctionControlCeiling]; +EnumParameters MemorySemanticsParams[MemorySemanticsCeiling]; +EnumParameters MemoryAccessParams[MemoryAccessCeiling]; +EnumParameters ScopeParams[ScopeCeiling]; +EnumParameters KernelEnqueueFlagsParams[KernelEnqueueFlagsCeiling]; +EnumParameters KernelProfilingInfoParams[KernelProfilingInfoCeiling]; +EnumParameters CapabilityParams[CapabilityCeiling]; + +// Set up all the parameterizing descriptions of the opcodes, operands, etc. +void Parameterize() +{ + // only do this once. + static bool initialized = false; + if (initialized) + return; + initialized = true; + + // Exceptions to having a result and a resulting type . + // (Everything is initialized to have both). + + InstructionDesc[OpNop].setResultAndType(false, false); + InstructionDesc[OpSource].setResultAndType(false, false); + InstructionDesc[OpSourceContinued].setResultAndType(false, false); + InstructionDesc[OpSourceExtension].setResultAndType(false, false); + InstructionDesc[OpExtension].setResultAndType(false, false); + InstructionDesc[OpExtInstImport].setResultAndType(true, false); + InstructionDesc[OpCapability].setResultAndType(false, false); + InstructionDesc[OpMemoryModel].setResultAndType(false, false); + InstructionDesc[OpEntryPoint].setResultAndType(false, false); + InstructionDesc[OpExecutionMode].setResultAndType(false, false); + InstructionDesc[OpTypeVoid].setResultAndType(true, false); + InstructionDesc[OpTypeBool].setResultAndType(true, false); + InstructionDesc[OpTypeInt].setResultAndType(true, false); + InstructionDesc[OpTypeFloat].setResultAndType(true, false); + InstructionDesc[OpTypeVector].setResultAndType(true, false); + InstructionDesc[OpTypeMatrix].setResultAndType(true, false); + InstructionDesc[OpTypeImage].setResultAndType(true, false); + InstructionDesc[OpTypeSampler].setResultAndType(true, false); + InstructionDesc[OpTypeSampledImage].setResultAndType(true, false); + InstructionDesc[OpTypeArray].setResultAndType(true, false); + InstructionDesc[OpTypeRuntimeArray].setResultAndType(true, false); + InstructionDesc[OpTypeStruct].setResultAndType(true, false); + InstructionDesc[OpTypeOpaque].setResultAndType(true, false); + InstructionDesc[OpTypePointer].setResultAndType(true, false); + InstructionDesc[OpTypeForwardPointer].setResultAndType(false, false); + InstructionDesc[OpTypeFunction].setResultAndType(true, false); + InstructionDesc[OpTypeEvent].setResultAndType(true, false); + InstructionDesc[OpTypeDeviceEvent].setResultAndType(true, false); + InstructionDesc[OpTypeReserveId].setResultAndType(true, false); + InstructionDesc[OpTypeQueue].setResultAndType(true, false); + InstructionDesc[OpTypePipe].setResultAndType(true, false); + InstructionDesc[OpFunctionEnd].setResultAndType(false, false); + InstructionDesc[OpStore].setResultAndType(false, false); + InstructionDesc[OpImageWrite].setResultAndType(false, false); + InstructionDesc[OpDecorationGroup].setResultAndType(true, false); + InstructionDesc[OpDecorate].setResultAndType(false, false); + InstructionDesc[OpMemberDecorate].setResultAndType(false, false); + InstructionDesc[OpGroupDecorate].setResultAndType(false, false); + InstructionDesc[OpGroupMemberDecorate].setResultAndType(false, false); + InstructionDesc[OpName].setResultAndType(false, false); + InstructionDesc[OpMemberName].setResultAndType(false, false); + InstructionDesc[OpString].setResultAndType(true, false); + InstructionDesc[OpLine].setResultAndType(false, false); + InstructionDesc[OpNoLine].setResultAndType(false, false); + InstructionDesc[OpCopyMemory].setResultAndType(false, false); + InstructionDesc[OpCopyMemorySized].setResultAndType(false, false); + InstructionDesc[OpEmitVertex].setResultAndType(false, false); + InstructionDesc[OpEndPrimitive].setResultAndType(false, false); + InstructionDesc[OpEmitStreamVertex].setResultAndType(false, false); + InstructionDesc[OpEndStreamPrimitive].setResultAndType(false, false); + InstructionDesc[OpControlBarrier].setResultAndType(false, false); + InstructionDesc[OpMemoryBarrier].setResultAndType(false, false); + InstructionDesc[OpAtomicStore].setResultAndType(false, false); + InstructionDesc[OpLoopMerge].setResultAndType(false, false); + InstructionDesc[OpSelectionMerge].setResultAndType(false, false); + InstructionDesc[OpLabel].setResultAndType(true, false); + InstructionDesc[OpBranch].setResultAndType(false, false); + InstructionDesc[OpBranchConditional].setResultAndType(false, false); + InstructionDesc[OpSwitch].setResultAndType(false, false); + InstructionDesc[OpKill].setResultAndType(false, false); + InstructionDesc[OpReturn].setResultAndType(false, false); + InstructionDesc[OpReturnValue].setResultAndType(false, false); + InstructionDesc[OpUnreachable].setResultAndType(false, false); + InstructionDesc[OpLifetimeStart].setResultAndType(false, false); + InstructionDesc[OpLifetimeStop].setResultAndType(false, false); + InstructionDesc[OpCommitReadPipe].setResultAndType(false, false); + InstructionDesc[OpCommitWritePipe].setResultAndType(false, false); + InstructionDesc[OpGroupCommitWritePipe].setResultAndType(false, false); + InstructionDesc[OpGroupCommitReadPipe].setResultAndType(false, false); + InstructionDesc[OpCaptureEventProfilingInfo].setResultAndType(false, false); + InstructionDesc[OpSetUserEventStatus].setResultAndType(false, false); + InstructionDesc[OpRetainEvent].setResultAndType(false, false); + InstructionDesc[OpReleaseEvent].setResultAndType(false, false); + InstructionDesc[OpGroupWaitEvents].setResultAndType(false, false); + InstructionDesc[OpAtomicFlagClear].setResultAndType(false, false); + + // Specific additional context-dependent operands + + ExecutionModeOperands[ExecutionModeInvocations].push(OperandLiteralNumber, "'Number of <>'"); + + ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'x size'"); + ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'y size'"); + ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'z size'"); + + ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'x size'"); + ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'y size'"); + ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'z size'"); + + ExecutionModeOperands[ExecutionModeOutputVertices].push(OperandLiteralNumber, "'Vertex count'"); + ExecutionModeOperands[ExecutionModeVecTypeHint].push(OperandLiteralNumber, "'Vector type'"); + + DecorationOperands[DecorationStream].push(OperandLiteralNumber, "'Stream Number'"); + DecorationOperands[DecorationLocation].push(OperandLiteralNumber, "'Location'"); + DecorationOperands[DecorationComponent].push(OperandLiteralNumber, "'Component'"); + DecorationOperands[DecorationIndex].push(OperandLiteralNumber, "'Index'"); + DecorationOperands[DecorationBinding].push(OperandLiteralNumber, "'Binding Point'"); + DecorationOperands[DecorationDescriptorSet].push(OperandLiteralNumber, "'Descriptor Set'"); + DecorationOperands[DecorationOffset].push(OperandLiteralNumber, "'Byte Offset'"); + DecorationOperands[DecorationXfbBuffer].push(OperandLiteralNumber, "'XFB Buffer Number'"); + DecorationOperands[DecorationXfbStride].push(OperandLiteralNumber, "'XFB Stride'"); + DecorationOperands[DecorationArrayStride].push(OperandLiteralNumber, "'Array Stride'"); + DecorationOperands[DecorationMatrixStride].push(OperandLiteralNumber, "'Matrix Stride'"); + DecorationOperands[DecorationBuiltIn].push(OperandLiteralNumber, "See <>"); + DecorationOperands[DecorationFPRoundingMode].push(OperandFPRoundingMode, "'Floating-Point Rounding Mode'"); + DecorationOperands[DecorationFPFastMathMode].push(OperandFPFastMath, "'Fast-Math Mode'"); + DecorationOperands[DecorationLinkageAttributes].push(OperandLiteralString, "'Name'"); + DecorationOperands[DecorationLinkageAttributes].push(OperandLinkageType, "'Linkage Type'"); + DecorationOperands[DecorationFuncParamAttr].push(OperandFuncParamAttr, "'Function Parameter Attribute'"); + DecorationOperands[DecorationSpecId].push(OperandLiteralNumber, "'Specialization Constant ID'"); + DecorationOperands[DecorationInputAttachmentIndex].push(OperandLiteralNumber, "'Attachment Index'"); + DecorationOperands[DecorationAlignment].push(OperandLiteralNumber, "'Alignment'"); + + OperandClassParams[OperandSource].set(SourceLanguageCeiling, SourceString, 0); + OperandClassParams[OperandExecutionModel].set(ExecutionModelCeiling, ExecutionModelString, ExecutionModelParams); + OperandClassParams[OperandAddressing].set(AddressingModelCeiling, AddressingString, AddressingParams); + OperandClassParams[OperandMemory].set(MemoryModelCeiling, MemoryString, MemoryParams); + OperandClassParams[OperandExecutionMode].set(ExecutionModeCeiling, ExecutionModeString, ExecutionModeParams); + OperandClassParams[OperandExecutionMode].setOperands(ExecutionModeOperands); + OperandClassParams[OperandStorage].set(StorageClassCeiling, StorageClassString, StorageParams); + OperandClassParams[OperandDimensionality].set(DimensionCeiling, DimensionString, DimensionalityParams); + OperandClassParams[OperandSamplerAddressingMode].set(SamplerAddressingModeCeiling, SamplerAddressingModeString, SamplerAddressingModeParams); + OperandClassParams[OperandSamplerFilterMode].set(SamplerFilterModeCeiling, SamplerFilterModeString, SamplerFilterModeParams); + OperandClassParams[OperandSamplerImageFormat].set(ImageFormatCeiling, ImageFormatString, ImageFormatParams); + OperandClassParams[OperandImageChannelOrder].set(ImageChannelOrderCeiling, ImageChannelOrderString, ImageChannelOrderParams); + OperandClassParams[OperandImageChannelDataType].set(ImageChannelDataTypeCeiling, ImageChannelDataTypeString, ImageChannelDataTypeParams); + OperandClassParams[OperandImageOperands].set(ImageOperandsCeiling, ImageOperandsString, ImageOperandsParams, true); + OperandClassParams[OperandFPFastMath].set(FPFastMathCeiling, FPFastMathString, FPFastMathParams, true); + OperandClassParams[OperandFPRoundingMode].set(FPRoundingModeCeiling, FPRoundingModeString, FPRoundingModeParams); + OperandClassParams[OperandLinkageType].set(LinkageTypeCeiling, LinkageTypeString, LinkageTypeParams); + OperandClassParams[OperandFuncParamAttr].set(FuncParamAttrCeiling, FuncParamAttrString, FuncParamAttrParams); + OperandClassParams[OperandAccessQualifier].set(AccessQualifierCeiling, AccessQualifierString, AccessQualifierParams); + OperandClassParams[OperandDecoration].set(DecorationCeiling, DecorationString, DecorationParams); + OperandClassParams[OperandDecoration].setOperands(DecorationOperands); + OperandClassParams[OperandBuiltIn].set(BuiltInCeiling, BuiltInString, BuiltInParams); + OperandClassParams[OperandSelect].set(SelectControlCeiling, SelectControlString, SelectionControlParams, true); + OperandClassParams[OperandLoop].set(LoopControlCeiling, LoopControlString, LoopControlParams, true); + OperandClassParams[OperandFunction].set(FunctionControlCeiling, FunctionControlString, FunctionControlParams, true); + OperandClassParams[OperandMemorySemantics].set(MemorySemanticsCeiling, MemorySemanticsString, MemorySemanticsParams, true); + OperandClassParams[OperandMemoryAccess].set(MemoryAccessCeiling, MemoryAccessString, MemoryAccessParams, true); + OperandClassParams[OperandScope].set(ScopeCeiling, ScopeString, ScopeParams); + OperandClassParams[OperandGroupOperation].set(GroupOperationCeiling, GroupOperationString, GroupOperationParams); + OperandClassParams[OperandKernelEnqueueFlags].set(KernelEnqueueFlagsCeiling, KernelEnqueueFlagsString, KernelEnqueueFlagsParams); + OperandClassParams[OperandKernelProfilingInfo].set(KernelProfilingInfoCeiling, KernelProfilingInfoString, KernelProfilingInfoParams, true); + OperandClassParams[OperandCapability].set(CapabilityCeiling, CapabilityString, CapabilityParams); + OperandClassParams[OperandOpcode].set(OpcodeCeiling, OpcodeString, 0); + + CapabilityParams[CapabilityShader].caps.push_back(CapabilityMatrix); + CapabilityParams[CapabilityGeometry].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityTessellation].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityVector16].caps.push_back(CapabilityKernel); + CapabilityParams[CapabilityFloat16Buffer].caps.push_back(CapabilityKernel); + CapabilityParams[CapabilityInt64Atomics].caps.push_back(CapabilityInt64); + CapabilityParams[CapabilityImageBasic].caps.push_back(CapabilityKernel); + CapabilityParams[CapabilityImageReadWrite].caps.push_back(CapabilityImageBasic); + CapabilityParams[CapabilityImageMipmap].caps.push_back(CapabilityImageBasic); + CapabilityParams[CapabilityPipes].caps.push_back(CapabilityKernel); + CapabilityParams[CapabilityDeviceEnqueue].caps.push_back(CapabilityKernel); + CapabilityParams[CapabilityLiteralSampler].caps.push_back(CapabilityKernel); + CapabilityParams[CapabilityAtomicStorage].caps.push_back(CapabilityShader); + CapabilityParams[CapabilitySampleRateShading].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityTessellationPointSize].caps.push_back(CapabilityTessellation); + CapabilityParams[CapabilityGeometryPointSize].caps.push_back(CapabilityGeometry); + CapabilityParams[CapabilityImageGatherExtended].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityStorageImageExtendedFormats].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityStorageImageMultisample].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityUniformBufferArrayDynamicIndexing].caps.push_back(CapabilityShader); + CapabilityParams[CapabilitySampledImageArrayDynamicIndexing].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityStorageBufferArrayDynamicIndexing].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityStorageImageArrayDynamicIndexing].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityClipDistance].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityCullDistance].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityGenericPointer].caps.push_back(CapabilityAddresses); + CapabilityParams[CapabilityInt8].caps.push_back(CapabilityKernel); + CapabilityParams[CapabilityInputAttachment].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityMinLod].caps.push_back(CapabilityShader); + CapabilityParams[CapabilitySparseResidency].caps.push_back(CapabilityShader); + CapabilityParams[CapabilitySampled1D].caps.push_back(CapabilityShader); + CapabilityParams[CapabilitySampledRect].caps.push_back(CapabilityShader); + CapabilityParams[CapabilitySampledBuffer].caps.push_back(CapabilityShader); + CapabilityParams[CapabilitySampledCubeArray].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityImageMSArray].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityImage1D].caps.push_back(CapabilitySampled1D); + CapabilityParams[CapabilityImageRect].caps.push_back(CapabilitySampledRect); + CapabilityParams[CapabilityImageBuffer].caps.push_back(CapabilitySampledBuffer); + CapabilityParams[CapabilityImageCubeArray].caps.push_back(CapabilitySampledCubeArray); + CapabilityParams[CapabilityImageQuery].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityDerivativeControl].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityInterpolationFunction].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityTransformFeedback].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityGeometryStreams].caps.push_back(CapabilityGeometry); + CapabilityParams[CapabilityStorageImageReadWithoutFormat].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityStorageImageWriteWithoutFormat].caps.push_back(CapabilityShader); + CapabilityParams[CapabilityMultiViewport].caps.push_back(CapabilityGeometry); + + AddressingParams[AddressingModelPhysical32].caps.push_back(CapabilityAddresses); + AddressingParams[AddressingModelPhysical64].caps.push_back(CapabilityAddresses); + + MemoryParams[MemoryModelSimple].caps.push_back(CapabilityShader); + MemoryParams[MemoryModelGLSL450].caps.push_back(CapabilityShader); + MemoryParams[MemoryModelOpenCL].caps.push_back(CapabilityKernel); + + MemorySemanticsParams[MemorySemanticsUniformMemoryShift].caps.push_back(CapabilityShader); + MemorySemanticsParams[MemorySemanticsAtomicCounterMemoryShift].caps.push_back(CapabilityAtomicStorage); + + ExecutionModelParams[ExecutionModelVertex].caps.push_back(CapabilityShader); + ExecutionModelParams[ExecutionModelTessellationControl].caps.push_back(CapabilityTessellation); + ExecutionModelParams[ExecutionModelTessellationEvaluation].caps.push_back(CapabilityTessellation); + ExecutionModelParams[ExecutionModelGeometry].caps.push_back(CapabilityGeometry); + ExecutionModelParams[ExecutionModelFragment].caps.push_back(CapabilityShader); + ExecutionModelParams[ExecutionModelGLCompute].caps.push_back(CapabilityShader); + ExecutionModelParams[ExecutionModelKernel].caps.push_back(CapabilityKernel); + + // Storage capabilites + StorageParams[StorageClassInput].caps.push_back(CapabilityShader); + StorageParams[StorageClassUniform].caps.push_back(CapabilityShader); + StorageParams[StorageClassOutput].caps.push_back(CapabilityShader); + StorageParams[StorageClassPrivate].caps.push_back(CapabilityShader); + StorageParams[StorageClassGeneric].caps.push_back(CapabilityKernel); + StorageParams[StorageClassAtomicCounter].caps.push_back(CapabilityAtomicStorage); + StorageParams[StorageClassPushConstant].caps.push_back(CapabilityShader); + + // Sampler Filter & Addressing mode capabilities + SamplerAddressingModeParams[SamplerAddressingModeNone].caps.push_back(CapabilityKernel); + SamplerAddressingModeParams[SamplerAddressingModeClampToEdge].caps.push_back(CapabilityKernel); + SamplerAddressingModeParams[SamplerAddressingModeClamp].caps.push_back(CapabilityKernel); + SamplerAddressingModeParams[SamplerAddressingModeRepeat].caps.push_back(CapabilityKernel); + SamplerAddressingModeParams[SamplerAddressingModeRepeatMirrored].caps.push_back(CapabilityKernel); + + SamplerFilterModeParams[SamplerFilterModeNearest].caps.push_back(CapabilityKernel); + SamplerFilterModeParams[SamplerFilterModeLinear].caps.push_back(CapabilityKernel); + + // image format capabilities + + // ES/Desktop float + ImageFormatParams[ImageFormatRgba32f].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatRgba16f].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatR32f].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatRgba8].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatRgba8Snorm].caps.push_back(CapabilityShader); + + // Desktop float + ImageFormatParams[ImageFormatRg32f].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg16f].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR11fG11fB10f].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR16f].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRgba16].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRgb10A2].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg16].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg8].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR16].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR8].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRgba16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg8Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR8Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); + + // ES/Desktop int + ImageFormatParams[ImageFormatRgba32i].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatRgba16i].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatRgba8i].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatR32i].caps.push_back(CapabilityShader); + + // Desktop int + ImageFormatParams[ImageFormatRg32i].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg16i].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg8i].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR16i].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR8i].caps.push_back(CapabilityStorageImageExtendedFormats); + + // ES/Desktop uint + ImageFormatParams[ImageFormatRgba32ui].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatRgba16ui].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatRgba8ui].caps.push_back(CapabilityShader); + ImageFormatParams[ImageFormatR32ui].caps.push_back(CapabilityShader); + + // Desktop uint + ImageFormatParams[ImageFormatRgb10a2ui].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg32ui].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg16ui].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatRg8ui].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR16ui].caps.push_back(CapabilityStorageImageExtendedFormats); + ImageFormatParams[ImageFormatR8ui].caps.push_back(CapabilityStorageImageExtendedFormats); + + // image channel order capabilities + for (int i = 0; i < ImageChannelOrderCeiling; ++i) { + ImageChannelOrderParams[i].caps.push_back(CapabilityKernel); + } + + // image channel type capabilities + for (int i = 0; i < ImageChannelDataTypeCeiling; ++i) { + ImageChannelDataTypeParams[i].caps.push_back(CapabilityKernel); + } + + // image lookup operands + ImageOperandsParams[ImageOperandsBiasShift].caps.push_back(CapabilityShader); + ImageOperandsParams[ImageOperandsOffsetShift].caps.push_back(CapabilityImageGatherExtended); + ImageOperandsParams[ImageOperandsMinLodShift].caps.push_back(CapabilityMinLod); + + // fast math flags capabilities + for (int i = 0; i < FPFastMathCeiling; ++i) { + FPFastMathParams[i].caps.push_back(CapabilityKernel); + } + + // fp rounding mode capabilities + for (int i = 0; i < FPRoundingModeCeiling; ++i) { + FPRoundingModeParams[i].caps.push_back(CapabilityKernel); + } + + // linkage types + for (int i = 0; i < LinkageTypeCeiling; ++i) { + LinkageTypeParams[i].caps.push_back(CapabilityLinkage); + } + + // function argument types + for (int i = 0; i < FuncParamAttrCeiling; ++i) { + FuncParamAttrParams[i].caps.push_back(CapabilityKernel); + } + + // function argument types + for (int i = 0; i < AccessQualifierCeiling; ++i) { + AccessQualifierParams[i].caps.push_back(CapabilityKernel); + } + + ExecutionModeParams[ExecutionModeInvocations].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeSpacingEqual].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeSpacingFractionalEven].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeSpacingFractionalOdd].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeVertexOrderCw].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeVertexOrderCcw].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModePixelCenterInteger].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModeOriginUpperLeft].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModeOriginLowerLeft].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModeEarlyFragmentTests].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModePointMode].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeXfb].caps.push_back(CapabilityTransformFeedback); + ExecutionModeParams[ExecutionModeDepthReplacing].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModeDepthGreater].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModeDepthLess].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModeDepthUnchanged].caps.push_back(CapabilityShader); + ExecutionModeParams[ExecutionModeLocalSizeHint].caps.push_back(CapabilityKernel); + ExecutionModeParams[ExecutionModeInputPoints].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeInputLines].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeInputLinesAdjacency].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeTriangles].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeTriangles].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeInputTrianglesAdjacency].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeQuads].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeIsolines].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeOutputVertices].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeOutputVertices].caps.push_back(CapabilityTessellation); + ExecutionModeParams[ExecutionModeOutputPoints].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeOutputLineStrip].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeOutputTriangleStrip].caps.push_back(CapabilityGeometry); + ExecutionModeParams[ExecutionModeVecTypeHint].caps.push_back(CapabilityKernel); + ExecutionModeParams[ExecutionModeContractionOff].caps.push_back(CapabilityKernel); + + DecorationParams[DecorationRelaxedPrecision].caps.push_back(CapabilityShader); + DecorationParams[DecorationBlock].caps.push_back(CapabilityShader); + DecorationParams[DecorationBufferBlock].caps.push_back(CapabilityShader); + DecorationParams[DecorationRowMajor].caps.push_back(CapabilityMatrix); + DecorationParams[DecorationColMajor].caps.push_back(CapabilityMatrix); + DecorationParams[DecorationGLSLShared].caps.push_back(CapabilityShader); + DecorationParams[DecorationGLSLPacked].caps.push_back(CapabilityShader); + DecorationParams[DecorationNoPerspective].caps.push_back(CapabilityShader); + DecorationParams[DecorationFlat].caps.push_back(CapabilityShader); + DecorationParams[DecorationPatch].caps.push_back(CapabilityTessellation); + DecorationParams[DecorationCentroid].caps.push_back(CapabilityShader); + DecorationParams[DecorationSample].caps.push_back(CapabilitySampleRateShading); + DecorationParams[DecorationInvariant].caps.push_back(CapabilityShader); + DecorationParams[DecorationConstant].caps.push_back(CapabilityKernel); + DecorationParams[DecorationUniform].caps.push_back(CapabilityShader); + DecorationParams[DecorationCPacked].caps.push_back(CapabilityKernel); + DecorationParams[DecorationSaturatedConversion].caps.push_back(CapabilityKernel); + DecorationParams[DecorationStream].caps.push_back(CapabilityGeometryStreams); + DecorationParams[DecorationLocation].caps.push_back(CapabilityShader); + DecorationParams[DecorationComponent].caps.push_back(CapabilityShader); + DecorationParams[DecorationOffset].caps.push_back(CapabilityShader); + DecorationParams[DecorationIndex].caps.push_back(CapabilityShader); + DecorationParams[DecorationBinding].caps.push_back(CapabilityShader); + DecorationParams[DecorationDescriptorSet].caps.push_back(CapabilityShader); + DecorationParams[DecorationXfbBuffer].caps.push_back(CapabilityTransformFeedback); + DecorationParams[DecorationXfbStride].caps.push_back(CapabilityTransformFeedback); + DecorationParams[DecorationArrayStride].caps.push_back(CapabilityShader); + DecorationParams[DecorationMatrixStride].caps.push_back(CapabilityMatrix); + DecorationParams[DecorationFuncParamAttr].caps.push_back(CapabilityKernel); + DecorationParams[DecorationFPRoundingMode].caps.push_back(CapabilityKernel); + DecorationParams[DecorationFPFastMathMode].caps.push_back(CapabilityKernel); + DecorationParams[DecorationLinkageAttributes].caps.push_back(CapabilityLinkage); + DecorationParams[DecorationSpecId].caps.push_back(CapabilityShader); + DecorationParams[DecorationNoContraction].caps.push_back(CapabilityShader); + DecorationParams[DecorationInputAttachmentIndex].caps.push_back(CapabilityInputAttachment); + DecorationParams[DecorationAlignment].caps.push_back(CapabilityKernel); + + BuiltInParams[BuiltInPosition].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInPointSize].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInClipDistance].caps.push_back(CapabilityClipDistance); + BuiltInParams[BuiltInCullDistance].caps.push_back(CapabilityCullDistance); + + BuiltInParams[BuiltInVertexId].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInVertexId].desc = "Vertex ID, which takes on values 0, 1, 2, . . . ."; + + BuiltInParams[BuiltInInstanceId].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInInstanceId].desc = "Instance ID, which takes on values 0, 1, 2, . . . ."; + + BuiltInParams[BuiltInVertexIndex].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInVertexIndex].desc = "Vertex index, which takes on values base, base+1, base+2, . . . ."; + + BuiltInParams[BuiltInInstanceIndex].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInInstanceIndex].desc = "Instance index, which takes on values base, base+1, base+2, . . . ."; + + BuiltInParams[BuiltInPrimitiveId].caps.push_back(CapabilityGeometry); + BuiltInParams[BuiltInPrimitiveId].caps.push_back(CapabilityTessellation); + BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityGeometry); + BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityTessellation); + BuiltInParams[BuiltInLayer].caps.push_back(CapabilityGeometry); + BuiltInParams[BuiltInViewportIndex].caps.push_back(CapabilityMultiViewport); + BuiltInParams[BuiltInTessLevelOuter].caps.push_back(CapabilityTessellation); + BuiltInParams[BuiltInTessLevelInner].caps.push_back(CapabilityTessellation); + BuiltInParams[BuiltInTessCoord].caps.push_back(CapabilityTessellation); + BuiltInParams[BuiltInPatchVertices].caps.push_back(CapabilityTessellation); + BuiltInParams[BuiltInFragCoord].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInPointCoord].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInFrontFacing].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInSampleId].caps.push_back(CapabilitySampleRateShading); + BuiltInParams[BuiltInSamplePosition].caps.push_back(CapabilitySampleRateShading); + BuiltInParams[BuiltInSampleMask].caps.push_back(CapabilitySampleRateShading); + BuiltInParams[BuiltInFragDepth].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInHelperInvocation].caps.push_back(CapabilityShader); + BuiltInParams[BuiltInWorkDim].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInGlobalSize].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInEnqueuedWorkgroupSize].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInGlobalOffset].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInGlobalLinearId].caps.push_back(CapabilityKernel); + + BuiltInParams[BuiltInSubgroupSize].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInSubgroupMaxSize].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInNumSubgroups].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInNumEnqueuedSubgroups].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInSubgroupId].caps.push_back(CapabilityKernel); + BuiltInParams[BuiltInSubgroupLocalInvocationId].caps.push_back(CapabilityKernel); + + DimensionalityParams[Dim1D].caps.push_back(CapabilitySampled1D); + DimensionalityParams[DimCube].caps.push_back(CapabilityShader); + DimensionalityParams[DimRect].caps.push_back(CapabilitySampledRect); + DimensionalityParams[DimBuffer].caps.push_back(CapabilitySampledBuffer); + DimensionalityParams[DimSubpassData].caps.push_back(CapabilityInputAttachment); + + // Group Operations + for (int i = 0; i < GroupOperationCeiling; ++i) { + GroupOperationParams[i].caps.push_back(CapabilityKernel); + } + + // Enqueue flags + for (int i = 0; i < KernelEnqueueFlagsCeiling; ++i) { + KernelEnqueueFlagsParams[i].caps.push_back(CapabilityKernel); + } + + // Profiling info + KernelProfilingInfoParams[0].caps.push_back(CapabilityKernel); + + // set name of operator, an initial set of style operands, and the description + + InstructionDesc[OpSource].operands.push(OperandSource, ""); + InstructionDesc[OpSource].operands.push(OperandLiteralNumber, "'Version'"); + InstructionDesc[OpSource].operands.push(OperandId, "'File'", true); + InstructionDesc[OpSource].operands.push(OperandLiteralString, "'Source'", true); + + InstructionDesc[OpSourceContinued].operands.push(OperandLiteralString, "'Continued Source'"); + + InstructionDesc[OpSourceExtension].operands.push(OperandLiteralString, "'Extension'"); + + InstructionDesc[OpName].operands.push(OperandId, "'Target'"); + InstructionDesc[OpName].operands.push(OperandLiteralString, "'Name'"); + + InstructionDesc[OpMemberName].operands.push(OperandId, "'Type'"); + InstructionDesc[OpMemberName].operands.push(OperandLiteralNumber, "'Member'"); + InstructionDesc[OpMemberName].operands.push(OperandLiteralString, "'Name'"); + + InstructionDesc[OpString].operands.push(OperandLiteralString, "'String'"); + + InstructionDesc[OpLine].operands.push(OperandId, "'File'"); + InstructionDesc[OpLine].operands.push(OperandLiteralNumber, "'Line'"); + InstructionDesc[OpLine].operands.push(OperandLiteralNumber, "'Column'"); + + InstructionDesc[OpExtension].operands.push(OperandLiteralString, "'Name'"); + + InstructionDesc[OpExtInstImport].operands.push(OperandLiteralString, "'Name'"); + + InstructionDesc[OpCapability].operands.push(OperandCapability, "'Capability'"); + + InstructionDesc[OpMemoryModel].operands.push(OperandAddressing, ""); + InstructionDesc[OpMemoryModel].operands.push(OperandMemory, ""); + + InstructionDesc[OpEntryPoint].operands.push(OperandExecutionModel, ""); + InstructionDesc[OpEntryPoint].operands.push(OperandId, "'Entry Point'"); + InstructionDesc[OpEntryPoint].operands.push(OperandLiteralString, "'Name'"); + InstructionDesc[OpEntryPoint].operands.push(OperandVariableIds, "'Interface'"); + + InstructionDesc[OpExecutionMode].operands.push(OperandId, "'Entry Point'"); + InstructionDesc[OpExecutionMode].operands.push(OperandExecutionMode, "'Mode'"); + InstructionDesc[OpExecutionMode].operands.push(OperandOptionalLiteral, "See <>"); + + InstructionDesc[OpTypeInt].operands.push(OperandLiteralNumber, "'Width'"); + InstructionDesc[OpTypeInt].operands.push(OperandLiteralNumber, "'Signedness'"); + + InstructionDesc[OpTypeFloat].operands.push(OperandLiteralNumber, "'Width'"); + + InstructionDesc[OpTypeVector].operands.push(OperandId, "'Component Type'"); + InstructionDesc[OpTypeVector].operands.push(OperandLiteralNumber, "'Component Count'"); + + InstructionDesc[OpTypeMatrix].capabilities.push_back(CapabilityMatrix); + InstructionDesc[OpTypeMatrix].operands.push(OperandId, "'Column Type'"); + InstructionDesc[OpTypeMatrix].operands.push(OperandLiteralNumber, "'Column Count'"); + + InstructionDesc[OpTypeImage].operands.push(OperandId, "'Sampled Type'"); + InstructionDesc[OpTypeImage].operands.push(OperandDimensionality, ""); + InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Depth'"); + InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Arrayed'"); + InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'MS'"); + InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Sampled'"); + InstructionDesc[OpTypeImage].operands.push(OperandSamplerImageFormat, ""); + InstructionDesc[OpTypeImage].operands.push(OperandAccessQualifier, "", true); + + InstructionDesc[OpTypeSampledImage].operands.push(OperandId, "'Image Type'"); + + InstructionDesc[OpTypeArray].operands.push(OperandId, "'Element Type'"); + InstructionDesc[OpTypeArray].operands.push(OperandId, "'Length'"); + + InstructionDesc[OpTypeRuntimeArray].capabilities.push_back(CapabilityShader); + InstructionDesc[OpTypeRuntimeArray].operands.push(OperandId, "'Element Type'"); + + InstructionDesc[OpTypeStruct].operands.push(OperandVariableIds, "'Member 0 type', +\n'member 1 type', +\n..."); + + InstructionDesc[OpTypeOpaque].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpTypeOpaque].operands.push(OperandLiteralString, "The name of the opaque type."); + + InstructionDesc[OpTypePointer].operands.push(OperandStorage, ""); + InstructionDesc[OpTypePointer].operands.push(OperandId, "'Type'"); + + InstructionDesc[OpTypeForwardPointer].capabilities.push_back(CapabilityAddresses); + InstructionDesc[OpTypeForwardPointer].operands.push(OperandId, "'Pointer Type'"); + InstructionDesc[OpTypeForwardPointer].operands.push(OperandStorage, ""); + + InstructionDesc[OpTypeEvent].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpTypeDeviceEvent].capabilities.push_back(CapabilityDeviceEnqueue); + + InstructionDesc[OpTypeReserveId].capabilities.push_back(CapabilityPipes); + + InstructionDesc[OpTypeQueue].capabilities.push_back(CapabilityDeviceEnqueue); + + InstructionDesc[OpTypePipe].operands.push(OperandAccessQualifier, "'Qualifier'"); + InstructionDesc[OpTypePipe].capabilities.push_back(CapabilityPipes); + + InstructionDesc[OpTypeFunction].operands.push(OperandId, "'Return Type'"); + InstructionDesc[OpTypeFunction].operands.push(OperandVariableIds, "'Parameter 0 Type', +\n'Parameter 1 Type', +\n..."); + + InstructionDesc[OpConstant].operands.push(OperandVariableLiterals, "'Value'"); + + InstructionDesc[OpConstantComposite].operands.push(OperandVariableIds, "'Constituents'"); + + InstructionDesc[OpConstantSampler].capabilities.push_back(CapabilityLiteralSampler); + InstructionDesc[OpConstantSampler].operands.push(OperandSamplerAddressingMode, ""); + InstructionDesc[OpConstantSampler].operands.push(OperandLiteralNumber, "'Param'"); + InstructionDesc[OpConstantSampler].operands.push(OperandSamplerFilterMode, ""); + + InstructionDesc[OpSpecConstant].operands.push(OperandVariableLiterals, "'Value'"); + + InstructionDesc[OpSpecConstantComposite].operands.push(OperandVariableIds, "'Constituents'"); + + InstructionDesc[OpSpecConstantOp].operands.push(OperandLiteralNumber, "'Opcode'"); + InstructionDesc[OpSpecConstantOp].operands.push(OperandVariableIds, "'Operands'"); + + InstructionDesc[OpVariable].operands.push(OperandStorage, ""); + InstructionDesc[OpVariable].operands.push(OperandId, "'Initializer'", true); + + InstructionDesc[OpFunction].operands.push(OperandFunction, ""); + InstructionDesc[OpFunction].operands.push(OperandId, "'Function Type'"); + + InstructionDesc[OpFunctionCall].operands.push(OperandId, "'Function'"); + InstructionDesc[OpFunctionCall].operands.push(OperandVariableIds, "'Argument 0', +\n'Argument 1', +\n..."); + + InstructionDesc[OpExtInst].operands.push(OperandId, "'Set'"); + InstructionDesc[OpExtInst].operands.push(OperandLiteralNumber, "'Instruction'"); + InstructionDesc[OpExtInst].operands.push(OperandVariableIds, "'Operand 1', +\n'Operand 2', +\n..."); + + InstructionDesc[OpLoad].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpLoad].operands.push(OperandMemoryAccess, "", true); + + InstructionDesc[OpStore].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpStore].operands.push(OperandId, "'Object'"); + InstructionDesc[OpStore].operands.push(OperandMemoryAccess, "", true); + + InstructionDesc[OpPhi].operands.push(OperandVariableIds, "'Variable, Parent, ...'"); + + InstructionDesc[OpDecorate].operands.push(OperandId, "'Target'"); + InstructionDesc[OpDecorate].operands.push(OperandDecoration, ""); + InstructionDesc[OpDecorate].operands.push(OperandVariableLiterals, "See <>."); + + InstructionDesc[OpMemberDecorate].operands.push(OperandId, "'Structure Type'"); + InstructionDesc[OpMemberDecorate].operands.push(OperandLiteralNumber, "'Member'"); + InstructionDesc[OpMemberDecorate].operands.push(OperandDecoration, ""); + InstructionDesc[OpMemberDecorate].operands.push(OperandVariableLiterals, "See <>."); + + InstructionDesc[OpGroupDecorate].operands.push(OperandId, "'Decoration Group'"); + InstructionDesc[OpGroupDecorate].operands.push(OperandVariableIds, "'Targets'"); + + InstructionDesc[OpGroupMemberDecorate].operands.push(OperandId, "'Decoration Group'"); + InstructionDesc[OpGroupMemberDecorate].operands.push(OperandVariableIdLiteral, "'Targets'"); + + InstructionDesc[OpVectorExtractDynamic].operands.push(OperandId, "'Vector'"); + InstructionDesc[OpVectorExtractDynamic].operands.push(OperandId, "'Index'"); + + InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Vector'"); + InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Component'"); + InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Index'"); + + InstructionDesc[OpVectorShuffle].operands.push(OperandId, "'Vector 1'"); + InstructionDesc[OpVectorShuffle].operands.push(OperandId, "'Vector 2'"); + InstructionDesc[OpVectorShuffle].operands.push(OperandVariableLiterals, "'Components'"); + + InstructionDesc[OpCompositeConstruct].operands.push(OperandVariableIds, "'Constituents'"); + + InstructionDesc[OpCompositeExtract].operands.push(OperandId, "'Composite'"); + InstructionDesc[OpCompositeExtract].operands.push(OperandVariableLiterals, "'Indexes'"); + + InstructionDesc[OpCompositeInsert].operands.push(OperandId, "'Object'"); + InstructionDesc[OpCompositeInsert].operands.push(OperandId, "'Composite'"); + InstructionDesc[OpCompositeInsert].operands.push(OperandVariableLiterals, "'Indexes'"); + + InstructionDesc[OpCopyObject].operands.push(OperandId, "'Operand'"); + + InstructionDesc[OpCopyMemory].operands.push(OperandId, "'Target'"); + InstructionDesc[OpCopyMemory].operands.push(OperandId, "'Source'"); + InstructionDesc[OpCopyMemory].operands.push(OperandMemoryAccess, "", true); + + InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Target'"); + InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Source'"); + InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Size'"); + InstructionDesc[OpCopyMemorySized].operands.push(OperandMemoryAccess, "", true); + + InstructionDesc[OpCopyMemorySized].capabilities.push_back(CapabilityAddresses); + + InstructionDesc[OpSampledImage].operands.push(OperandId, "'Image'"); + InstructionDesc[OpSampledImage].operands.push(OperandId, "'Sampler'"); + + InstructionDesc[OpImage].operands.push(OperandId, "'Sampled Image'"); + + InstructionDesc[OpImageRead].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageRead].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageRead].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageRead].operands.push(OperandVariableIds, "", true); + + InstructionDesc[OpImageWrite].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageWrite].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageWrite].operands.push(OperandId, "'Texel'"); + InstructionDesc[OpImageWrite].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageWrite].operands.push(OperandVariableIds, "", true); + + InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSampleImplicitLod].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandVariableIds, "", true); + + InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSampleDrefImplicitLod].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSampleDrefExplicitLod].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSampleProjImplicitLod].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSampleProjExplicitLod].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSampleProjDrefImplicitLod].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSampleProjDrefExplicitLod].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageFetch].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageFetch].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageFetch].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageFetch].operands.push(OperandVariableIds, "", true); + + InstructionDesc[OpImageGather].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageGather].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageGather].operands.push(OperandId, "'Component'"); + InstructionDesc[OpImageGather].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageGather].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageGather].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageDrefGather].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageDrefGather].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageDrefGather].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleImplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleExplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleDrefImplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleDrefExplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleProjImplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleProjExplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseFetch].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageSparseFetch].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseFetch].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseFetch].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseFetch].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Component'"); + InstructionDesc[OpImageSparseGather].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseGather].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseGather].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'Sampled Image'"); + InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'D~ref~'"); + InstructionDesc[OpImageSparseDrefGather].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseDrefGather].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseDrefGather].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageSparseRead].operands.push(OperandImageOperands, "", true); + InstructionDesc[OpImageSparseRead].operands.push(OperandVariableIds, "", true); + InstructionDesc[OpImageSparseRead].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageSparseTexelsResident].operands.push(OperandId, "'Resident Code'"); + InstructionDesc[OpImageSparseTexelsResident].capabilities.push_back(CapabilitySparseResidency); + + InstructionDesc[OpImageQuerySizeLod].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageQuerySizeLod].operands.push(OperandId, "'Level of Detail'"); + InstructionDesc[OpImageQuerySizeLod].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpImageQuerySizeLod].capabilities.push_back(CapabilityImageQuery); + + InstructionDesc[OpImageQuerySize].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageQuerySize].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpImageQuerySize].capabilities.push_back(CapabilityImageQuery); + + InstructionDesc[OpImageQueryLod].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageQueryLod].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageQueryLod].capabilities.push_back(CapabilityImageQuery); + + InstructionDesc[OpImageQueryLevels].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageQueryLevels].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpImageQueryLevels].capabilities.push_back(CapabilityImageQuery); + + InstructionDesc[OpImageQuerySamples].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageQuerySamples].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpImageQuerySamples].capabilities.push_back(CapabilityImageQuery); + + InstructionDesc[OpImageQueryFormat].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageQueryFormat].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpImageQueryOrder].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageQueryOrder].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpAccessChain].operands.push(OperandId, "'Base'"); + InstructionDesc[OpAccessChain].operands.push(OperandVariableIds, "'Indexes'"); + + InstructionDesc[OpInBoundsAccessChain].operands.push(OperandId, "'Base'"); + InstructionDesc[OpInBoundsAccessChain].operands.push(OperandVariableIds, "'Indexes'"); + + InstructionDesc[OpPtrAccessChain].operands.push(OperandId, "'Base'"); + InstructionDesc[OpPtrAccessChain].operands.push(OperandId, "'Element'"); + InstructionDesc[OpPtrAccessChain].operands.push(OperandVariableIds, "'Indexes'"); + InstructionDesc[OpPtrAccessChain].capabilities.push_back(CapabilityAddresses); + + InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandId, "'Base'"); + InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandId, "'Element'"); + InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandVariableIds, "'Indexes'"); + InstructionDesc[OpInBoundsPtrAccessChain].capabilities.push_back(CapabilityAddresses); + + InstructionDesc[OpSNegate].operands.push(OperandId, "'Operand'"); + + InstructionDesc[OpFNegate].operands.push(OperandId, "'Operand'"); + + InstructionDesc[OpNot].operands.push(OperandId, "'Operand'"); + + InstructionDesc[OpAny].operands.push(OperandId, "'Vector'"); + + InstructionDesc[OpAll].operands.push(OperandId, "'Vector'"); + + InstructionDesc[OpConvertFToU].operands.push(OperandId, "'Float Value'"); + + InstructionDesc[OpConvertFToS].operands.push(OperandId, "'Float Value'"); + + InstructionDesc[OpConvertSToF].operands.push(OperandId, "'Signed Value'"); + + InstructionDesc[OpConvertUToF].operands.push(OperandId, "'Unsigned Value'"); + + InstructionDesc[OpUConvert].operands.push(OperandId, "'Unsigned Value'"); + + InstructionDesc[OpSConvert].operands.push(OperandId, "'Signed Value'"); + + InstructionDesc[OpFConvert].operands.push(OperandId, "'Float Value'"); + + InstructionDesc[OpSatConvertSToU].operands.push(OperandId, "'Signed Value'"); + InstructionDesc[OpSatConvertSToU].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpSatConvertUToS].operands.push(OperandId, "'Unsigned Value'"); + InstructionDesc[OpSatConvertUToS].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpConvertPtrToU].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpConvertPtrToU].capabilities.push_back(CapabilityAddresses); + + InstructionDesc[OpConvertUToPtr].operands.push(OperandId, "'Integer Value'"); + InstructionDesc[OpConvertUToPtr].capabilities.push_back(CapabilityAddresses); + + InstructionDesc[OpPtrCastToGeneric].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpPtrCastToGeneric].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpGenericCastToPtr].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpGenericCastToPtr].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpGenericCastToPtrExplicit].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpGenericCastToPtrExplicit].operands.push(OperandStorage, "'Storage'"); + InstructionDesc[OpGenericCastToPtrExplicit].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpGenericPtrMemSemantics].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpGenericPtrMemSemantics].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpBitcast].operands.push(OperandId, "'Operand'"); + + InstructionDesc[OpQuantizeToF16].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpTranspose].capabilities.push_back(CapabilityMatrix); + InstructionDesc[OpTranspose].operands.push(OperandId, "'Matrix'"); + + InstructionDesc[OpIsNan].operands.push(OperandId, "'x'"); + + InstructionDesc[OpIsInf].operands.push(OperandId, "'x'"); + + InstructionDesc[OpIsFinite].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpIsFinite].operands.push(OperandId, "'x'"); + + InstructionDesc[OpIsNormal].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpIsNormal].operands.push(OperandId, "'x'"); + + InstructionDesc[OpSignBitSet].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpSignBitSet].operands.push(OperandId, "'x'"); + + InstructionDesc[OpLessOrGreater].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpLessOrGreater].operands.push(OperandId, "'x'"); + InstructionDesc[OpLessOrGreater].operands.push(OperandId, "'y'"); + + InstructionDesc[OpOrdered].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpOrdered].operands.push(OperandId, "'x'"); + InstructionDesc[OpOrdered].operands.push(OperandId, "'y'"); + + InstructionDesc[OpUnordered].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpUnordered].operands.push(OperandId, "'x'"); + InstructionDesc[OpUnordered].operands.push(OperandId, "'y'"); + + InstructionDesc[OpArrayLength].operands.push(OperandId, "'Structure'"); + InstructionDesc[OpArrayLength].operands.push(OperandLiteralNumber, "'Array member'"); + InstructionDesc[OpArrayLength].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpIAdd].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpIAdd].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFAdd].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFAdd].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpISub].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpISub].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFSub].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFSub].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpIMul].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpIMul].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFMul].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFMul].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpUDiv].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpUDiv].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSDiv].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSDiv].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFDiv].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFDiv].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpUMod].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpUMod].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSRem].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSRem].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSMod].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSMod].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFRem].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFRem].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFMod].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFMod].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpVectorTimesScalar].operands.push(OperandId, "'Vector'"); + InstructionDesc[OpVectorTimesScalar].operands.push(OperandId, "'Scalar'"); + + InstructionDesc[OpMatrixTimesScalar].capabilities.push_back(CapabilityMatrix); + InstructionDesc[OpMatrixTimesScalar].operands.push(OperandId, "'Matrix'"); + InstructionDesc[OpMatrixTimesScalar].operands.push(OperandId, "'Scalar'"); + + InstructionDesc[OpVectorTimesMatrix].capabilities.push_back(CapabilityMatrix); + InstructionDesc[OpVectorTimesMatrix].operands.push(OperandId, "'Vector'"); + InstructionDesc[OpVectorTimesMatrix].operands.push(OperandId, "'Matrix'"); + + InstructionDesc[OpMatrixTimesVector].capabilities.push_back(CapabilityMatrix); + InstructionDesc[OpMatrixTimesVector].operands.push(OperandId, "'Matrix'"); + InstructionDesc[OpMatrixTimesVector].operands.push(OperandId, "'Vector'"); + + InstructionDesc[OpMatrixTimesMatrix].capabilities.push_back(CapabilityMatrix); + InstructionDesc[OpMatrixTimesMatrix].operands.push(OperandId, "'LeftMatrix'"); + InstructionDesc[OpMatrixTimesMatrix].operands.push(OperandId, "'RightMatrix'"); + + InstructionDesc[OpOuterProduct].capabilities.push_back(CapabilityMatrix); + InstructionDesc[OpOuterProduct].operands.push(OperandId, "'Vector 1'"); + InstructionDesc[OpOuterProduct].operands.push(OperandId, "'Vector 2'"); + + InstructionDesc[OpDot].operands.push(OperandId, "'Vector 1'"); + InstructionDesc[OpDot].operands.push(OperandId, "'Vector 2'"); + + InstructionDesc[OpIAddCarry].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpIAddCarry].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpISubBorrow].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpISubBorrow].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpUMulExtended].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpUMulExtended].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSMulExtended].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSMulExtended].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpShiftRightLogical].operands.push(OperandId, "'Base'"); + InstructionDesc[OpShiftRightLogical].operands.push(OperandId, "'Shift'"); + + InstructionDesc[OpShiftRightArithmetic].operands.push(OperandId, "'Base'"); + InstructionDesc[OpShiftRightArithmetic].operands.push(OperandId, "'Shift'"); + + InstructionDesc[OpShiftLeftLogical].operands.push(OperandId, "'Base'"); + InstructionDesc[OpShiftLeftLogical].operands.push(OperandId, "'Shift'"); + + InstructionDesc[OpLogicalOr].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpLogicalOr].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpLogicalAnd].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpLogicalAnd].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpLogicalEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpLogicalEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpLogicalNotEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpLogicalNotEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpLogicalNot].operands.push(OperandId, "'Operand'"); + + InstructionDesc[OpBitwiseOr].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpBitwiseOr].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpBitwiseXor].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpBitwiseXor].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpBitwiseAnd].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpBitwiseAnd].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpBitFieldInsert].capabilities.push_back(CapabilityShader); + InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Base'"); + InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Insert'"); + InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Offset'"); + InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Count'"); + + InstructionDesc[OpBitFieldSExtract].capabilities.push_back(CapabilityShader); + InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Base'"); + InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Offset'"); + InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Count'"); + + InstructionDesc[OpBitFieldUExtract].capabilities.push_back(CapabilityShader); + InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Base'"); + InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Offset'"); + InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Count'"); + + InstructionDesc[OpBitReverse].capabilities.push_back(CapabilityShader); + InstructionDesc[OpBitReverse].operands.push(OperandId, "'Base'"); + + InstructionDesc[OpBitCount].operands.push(OperandId, "'Base'"); + + InstructionDesc[OpSelect].operands.push(OperandId, "'Condition'"); + InstructionDesc[OpSelect].operands.push(OperandId, "'Object 1'"); + InstructionDesc[OpSelect].operands.push(OperandId, "'Object 2'"); + + InstructionDesc[OpIEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpIEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFOrdEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFOrdEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFUnordEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFUnordEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpINotEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpINotEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFOrdNotEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFOrdNotEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFUnordNotEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFUnordNotEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpULessThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpULessThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSLessThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSLessThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFOrdLessThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFOrdLessThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFUnordLessThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFUnordLessThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpUGreaterThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpUGreaterThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSGreaterThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSGreaterThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFOrdGreaterThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFOrdGreaterThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFUnordGreaterThan].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFUnordGreaterThan].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpULessThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpULessThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSLessThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSLessThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFOrdLessThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFOrdLessThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFUnordLessThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFUnordLessThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpUGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpUGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpSGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpSGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFOrdGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFOrdGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpFUnordGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); + InstructionDesc[OpFUnordGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); + + InstructionDesc[OpDPdx].capabilities.push_back(CapabilityShader); + InstructionDesc[OpDPdx].operands.push(OperandId, "'P'"); + + InstructionDesc[OpDPdy].capabilities.push_back(CapabilityShader); + InstructionDesc[OpDPdy].operands.push(OperandId, "'P'"); + + InstructionDesc[OpFwidth].capabilities.push_back(CapabilityShader); + InstructionDesc[OpFwidth].operands.push(OperandId, "'P'"); + + InstructionDesc[OpDPdxFine].capabilities.push_back(CapabilityDerivativeControl); + InstructionDesc[OpDPdxFine].operands.push(OperandId, "'P'"); + + InstructionDesc[OpDPdyFine].capabilities.push_back(CapabilityDerivativeControl); + InstructionDesc[OpDPdyFine].operands.push(OperandId, "'P'"); + + InstructionDesc[OpFwidthFine].capabilities.push_back(CapabilityDerivativeControl); + InstructionDesc[OpFwidthFine].operands.push(OperandId, "'P'"); + + InstructionDesc[OpDPdxCoarse].capabilities.push_back(CapabilityDerivativeControl); + InstructionDesc[OpDPdxCoarse].operands.push(OperandId, "'P'"); + + InstructionDesc[OpDPdyCoarse].capabilities.push_back(CapabilityDerivativeControl); + InstructionDesc[OpDPdyCoarse].operands.push(OperandId, "'P'"); + + InstructionDesc[OpFwidthCoarse].capabilities.push_back(CapabilityDerivativeControl); + InstructionDesc[OpFwidthCoarse].operands.push(OperandId, "'P'"); + + InstructionDesc[OpEmitVertex].capabilities.push_back(CapabilityGeometry); + + InstructionDesc[OpEndPrimitive].capabilities.push_back(CapabilityGeometry); + + InstructionDesc[OpEmitStreamVertex].operands.push(OperandId, "'Stream'"); + InstructionDesc[OpEmitStreamVertex].capabilities.push_back(CapabilityGeometryStreams); + + InstructionDesc[OpEndStreamPrimitive].operands.push(OperandId, "'Stream'"); + InstructionDesc[OpEndStreamPrimitive].capabilities.push_back(CapabilityGeometryStreams); + + InstructionDesc[OpControlBarrier].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpControlBarrier].operands.push(OperandScope, "'Memory'"); + InstructionDesc[OpControlBarrier].operands.push(OperandMemorySemantics, "'Semantics'"); + + InstructionDesc[OpMemoryBarrier].operands.push(OperandScope, "'Memory'"); + InstructionDesc[OpMemoryBarrier].operands.push(OperandMemorySemantics, "'Semantics'"); + + InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Image'"); + InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Coordinate'"); + InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Sample'"); + + InstructionDesc[OpAtomicLoad].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicLoad].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicLoad].operands.push(OperandMemorySemantics, "'Semantics'"); + + InstructionDesc[OpAtomicStore].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicStore].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicStore].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicStore].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicExchange].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicExchange].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicExchange].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicExchange].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicCompareExchange].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicCompareExchange].operands.push(OperandMemorySemantics, "'Equal'"); + InstructionDesc[OpAtomicCompareExchange].operands.push(OperandMemorySemantics, "'Unequal'"); + InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Value'"); + InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Comparator'"); + + InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandMemorySemantics, "'Equal'"); + InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandMemorySemantics, "'Unequal'"); + InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Value'"); + InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Comparator'"); + InstructionDesc[OpAtomicCompareExchangeWeak].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpAtomicIIncrement].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicIIncrement].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicIIncrement].operands.push(OperandMemorySemantics, "'Semantics'"); + + InstructionDesc[OpAtomicIDecrement].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicIDecrement].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicIDecrement].operands.push(OperandMemorySemantics, "'Semantics'"); + + InstructionDesc[OpAtomicIAdd].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicIAdd].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicIAdd].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicIAdd].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicISub].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicISub].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicISub].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicISub].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicUMin].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicUMin].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicUMin].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicUMin].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicUMax].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicUMax].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicUMax].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicUMax].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicSMin].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicSMin].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicSMin].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicSMin].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicSMax].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicSMax].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicAnd].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicAnd].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicOr].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicOr].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicOr].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicOr].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicXor].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicXor].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicXor].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicXor].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicFlagTestAndSet].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpAtomicFlagClear].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpAtomicFlagClear].operands.push(OperandScope, "'Scope'"); + InstructionDesc[OpAtomicFlagClear].operands.push(OperandMemorySemantics, "'Semantics'"); + InstructionDesc[OpAtomicFlagClear].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Merge Block'"); + InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Continue Target'"); + InstructionDesc[OpLoopMerge].operands.push(OperandLoop, ""); + + InstructionDesc[OpSelectionMerge].operands.push(OperandId, "'Merge Block'"); + InstructionDesc[OpSelectionMerge].operands.push(OperandSelect, ""); + + InstructionDesc[OpBranch].operands.push(OperandId, "'Target Label'"); + + InstructionDesc[OpBranchConditional].operands.push(OperandId, "'Condition'"); + InstructionDesc[OpBranchConditional].operands.push(OperandId, "'True Label'"); + InstructionDesc[OpBranchConditional].operands.push(OperandId, "'False Label'"); + InstructionDesc[OpBranchConditional].operands.push(OperandVariableLiterals, "'Branch weights'"); + + InstructionDesc[OpSwitch].operands.push(OperandId, "'Selector'"); + InstructionDesc[OpSwitch].operands.push(OperandId, "'Default'"); + InstructionDesc[OpSwitch].operands.push(OperandVariableLiteralId, "'Target'"); + + InstructionDesc[OpKill].capabilities.push_back(CapabilityShader); + + InstructionDesc[OpReturnValue].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpLifetimeStart].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpLifetimeStart].operands.push(OperandLiteralNumber, "'Size'"); + InstructionDesc[OpLifetimeStart].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpLifetimeStop].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpLifetimeStop].operands.push(OperandLiteralNumber, "'Size'"); + InstructionDesc[OpLifetimeStop].capabilities.push_back(CapabilityKernel); + + InstructionDesc[OpGroupAsyncCopy].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpGroupAsyncCopy].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Destination'"); + InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Source'"); + InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Num Elements'"); + InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Stride'"); + InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Event'"); + + InstructionDesc[OpGroupWaitEvents].capabilities.push_back(CapabilityKernel); + InstructionDesc[OpGroupWaitEvents].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupWaitEvents].operands.push(OperandId, "'Num Events'"); + InstructionDesc[OpGroupWaitEvents].operands.push(OperandId, "'Events List'"); + + InstructionDesc[OpGroupAll].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupAll].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupAll].operands.push(OperandId, "'Predicate'"); + + InstructionDesc[OpGroupAny].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupAny].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupAny].operands.push(OperandId, "'Predicate'"); + + InstructionDesc[OpGroupBroadcast].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupBroadcast].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupBroadcast].operands.push(OperandId, "'Value'"); + InstructionDesc[OpGroupBroadcast].operands.push(OperandId, "'LocalId'"); + + InstructionDesc[OpGroupIAdd].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupIAdd].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupIAdd].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupIAdd].operands.push(OperandId, "'X'"); + + InstructionDesc[OpGroupFAdd].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupFAdd].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupFAdd].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupFAdd].operands.push(OperandId, "'X'"); + + InstructionDesc[OpGroupUMin].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupUMin].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupUMin].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupUMin].operands.push(OperandId, "'X'"); + + InstructionDesc[OpGroupSMin].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupSMin].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupSMin].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupSMin].operands.push(OperandId, "X"); + + InstructionDesc[OpGroupFMin].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupFMin].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupFMin].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupFMin].operands.push(OperandId, "X"); + + InstructionDesc[OpGroupUMax].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupUMax].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupUMax].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupUMax].operands.push(OperandId, "X"); + + InstructionDesc[OpGroupSMax].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupSMax].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupSMax].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupSMax].operands.push(OperandId, "X"); + + InstructionDesc[OpGroupFMax].capabilities.push_back(CapabilityGroups); + InstructionDesc[OpGroupFMax].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupFMax].operands.push(OperandGroupOperation, "'Operation'"); + InstructionDesc[OpGroupFMax].operands.push(OperandId, "X"); + + InstructionDesc[OpReadPipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpReadPipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpReadPipe].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpReadPipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpReadPipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpWritePipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpWritePipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpWritePipe].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpWritePipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpWritePipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpReservedReadPipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Reserve Id'"); + InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Index'"); + InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpReservedWritePipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Reserve Id'"); + InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Index'"); + InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Pointer'"); + InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpReserveReadPipePackets].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Num Packets'"); + InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpReserveWritePipePackets].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Num Packets'"); + InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpCommitReadPipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Reserve Id'"); + InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpCommitWritePipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Reserve Id'"); + InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpIsValidReserveId].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpIsValidReserveId].operands.push(OperandId, "'Reserve Id'"); + + InstructionDesc[OpGetNumPipePackets].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpGetMaxPipePackets].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpGroupReserveReadPipePackets].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Num Packets'"); + InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpGroupReserveWritePipePackets].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Num Packets'"); + InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpGroupCommitReadPipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Reserve Id'"); + InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpGroupCommitWritePipe].capabilities.push_back(CapabilityPipes); + InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandScope, "'Execution'"); + InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Pipe'"); + InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Reserve Id'"); + InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Packet Size'"); + InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Packet Alignment'"); + + InstructionDesc[OpBuildNDRange].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'GlobalWorkSize'"); + InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'LocalWorkSize'"); + InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'GlobalWorkOffset'"); + + InstructionDesc[OpGetDefaultQueue].capabilities.push_back(CapabilityDeviceEnqueue); + + InstructionDesc[OpCaptureEventProfilingInfo].capabilities.push_back(CapabilityDeviceEnqueue); + + InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Event'"); + InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Profiling Info'"); + InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Value'"); + + InstructionDesc[OpSetUserEventStatus].capabilities.push_back(CapabilityDeviceEnqueue); + + InstructionDesc[OpSetUserEventStatus].operands.push(OperandId, "'Event'"); + InstructionDesc[OpSetUserEventStatus].operands.push(OperandId, "'Status'"); + + InstructionDesc[OpIsValidEvent].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpIsValidEvent].operands.push(OperandId, "'Event'"); + + InstructionDesc[OpCreateUserEvent].capabilities.push_back(CapabilityDeviceEnqueue); + + InstructionDesc[OpRetainEvent].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpRetainEvent].operands.push(OperandId, "'Event'"); + + InstructionDesc[OpReleaseEvent].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpReleaseEvent].operands.push(OperandId, "'Event'"); + + InstructionDesc[OpGetKernelWorkGroupSize].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Invoke'"); + InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param'"); + InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param Size'"); + InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param Align'"); + + InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Invoke'"); + InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param'"); + InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param Size'"); + InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param Align'"); + + InstructionDesc[OpGetKernelNDrangeSubGroupCount].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'ND Range'"); + InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Invoke'"); + InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param'"); + InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param Size'"); + InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param Align'"); + + InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'ND Range'"); + InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Invoke'"); + InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param'"); + InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param Size'"); + InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param Align'"); + + InstructionDesc[OpEnqueueKernel].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Queue'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Flags'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'ND Range'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Num Events'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Wait Events'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Ret Event'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Invoke'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param Size'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param Align'"); + InstructionDesc[OpEnqueueKernel].operands.push(OperandVariableIds, "'Local Size'"); + + InstructionDesc[OpEnqueueMarker].capabilities.push_back(CapabilityDeviceEnqueue); + InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Queue'"); + InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Num Events'"); + InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Wait Events'"); + InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Ret Event'"); +} + +}; // end spv namespace diff --git a/third_party/glslang-spirv/doc.h b/third_party/glslang-spirv/doc.h new file mode 100644 index 000000000..948b6fe04 --- /dev/null +++ b/third_party/glslang-spirv/doc.h @@ -0,0 +1,261 @@ +// +//Copyright (C) 2014-2015 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// + +// +// Parameterize the SPIR-V enumerants. +// + +#include "spirv.hpp" + +#include + +namespace spv { + +// Fill in all the parameters +void Parameterize(); + +// Return the English names of all the enums. +const char* SourceString(int); +const char* AddressingString(int); +const char* MemoryString(int); +const char* ExecutionModelString(int); +const char* ExecutionModeString(int); +const char* StorageClassString(int); +const char* DecorationString(int); +const char* BuiltInString(int); +const char* DimensionString(int); +const char* SelectControlString(int); +const char* LoopControlString(int); +const char* FunctionControlString(int); +const char* SamplerAddressingModeString(int); +const char* SamplerFilterModeString(int); +const char* ImageFormatString(int); +const char* ImageChannelOrderString(int); +const char* ImageChannelTypeString(int); +const char* ImageOperands(int); +const char* FPFastMathString(int); +const char* FPRoundingModeString(int); +const char* LinkageTypeString(int); +const char* FuncParamAttrString(int); +const char* AccessQualifierString(int); +const char* MemorySemanticsString(int); +const char* MemoryAccessString(int); +const char* ExecutionScopeString(int); +const char* GroupOperationString(int); +const char* KernelEnqueueFlagsString(int); +const char* KernelProfilingInfoString(int); +const char* CapabilityString(int); +const char* OpcodeString(int); + +// For grouping opcodes into subsections +enum OpcodeClass { + OpClassMisc, + OpClassDebug, + OpClassAnnotate, + OpClassExtension, + OpClassMode, + OpClassType, + OpClassConstant, + OpClassMemory, + OpClassFunction, + OpClassImage, + OpClassConvert, + OpClassComposite, + OpClassArithmetic, + OpClassBit, + OpClassRelationalLogical, + OpClassDerivative, + OpClassFlowControl, + OpClassAtomic, + OpClassPrimitive, + OpClassBarrier, + OpClassGroup, + OpClassDeviceSideEnqueue, + OpClassPipe, + + OpClassCount, + OpClassMissing // all instructions start out as missing +}; + +// For parameterizing operands. +enum OperandClass { + OperandNone, + OperandId, + OperandVariableIds, + OperandOptionalLiteral, + OperandOptionalLiteralString, + OperandVariableLiterals, + OperandVariableIdLiteral, + OperandVariableLiteralId, + OperandLiteralNumber, + OperandLiteralString, + OperandSource, + OperandExecutionModel, + OperandAddressing, + OperandMemory, + OperandExecutionMode, + OperandStorage, + OperandDimensionality, + OperandSamplerAddressingMode, + OperandSamplerFilterMode, + OperandSamplerImageFormat, + OperandImageChannelOrder, + OperandImageChannelDataType, + OperandImageOperands, + OperandFPFastMath, + OperandFPRoundingMode, + OperandLinkageType, + OperandAccessQualifier, + OperandFuncParamAttr, + OperandDecoration, + OperandBuiltIn, + OperandSelect, + OperandLoop, + OperandFunction, + OperandMemorySemantics, + OperandMemoryAccess, + OperandScope, + OperandGroupOperation, + OperandKernelEnqueueFlags, + OperandKernelProfilingInfo, + OperandCapability, + + OperandOpcode, + + OperandCount +}; + +// Any specific enum can have a set of capabilities that allow it: +typedef std::vector EnumCaps; + +// Parameterize a set of operands with their OperandClass(es) and descriptions. +class OperandParameters { +public: + OperandParameters() { } + void push(OperandClass oc, const char* d, bool opt = false) + { + opClass.push_back(oc); + desc.push_back(d); + optional.push_back(opt); + } + void setOptional(); + OperandClass getClass(int op) const { return opClass[op]; } + const char* getDesc(int op) const { return desc[op]; } + bool isOptional(int op) const { return optional[op]; } + int getNum() const { return (int)opClass.size(); } + +protected: + std::vector opClass; + std::vector desc; + std::vector optional; +}; + +// Parameterize an enumerant +class EnumParameters { +public: + EnumParameters() : desc(0) { } + EnumCaps caps; + const char* desc; +}; + +// Parameterize a set of enumerants that form an enum +class EnumDefinition : public EnumParameters { +public: + EnumDefinition() : + ceiling(0), bitmask(false), getName(0), enumParams(0), operandParams(0) { } + void set(int ceil, const char* (*name)(int), EnumParameters* ep, bool mask = false) + { + ceiling = ceil; + getName = name; + bitmask = mask; + enumParams = ep; + } + void setOperands(OperandParameters* op) { operandParams = op; } + int ceiling; // ceiling of enumerants + bool bitmask; // true if these enumerants combine into a bitmask + const char* (*getName)(int); // a function that returns the name for each enumerant value (or shift) + EnumParameters* enumParams; // parameters for each individual enumerant + OperandParameters* operandParams; // sets of operands +}; + +// Parameterize an instruction's logical format, including its known set of operands, +// per OperandParameters above. +class InstructionParameters { +public: + InstructionParameters() : + opDesc("TBD"), + opClass(OpClassMissing), + typePresent(true), // most normal, only exceptions have to be spelled out + resultPresent(true) // most normal, only exceptions have to be spelled out + { } + + void setResultAndType(bool r, bool t) + { + resultPresent = r; + typePresent = t; + } + + bool hasResult() const { return resultPresent != 0; } + bool hasType() const { return typePresent != 0; } + + const char* opDesc; + EnumCaps capabilities; + OpcodeClass opClass; + OperandParameters operands; + +protected: + int typePresent : 1; + int resultPresent : 1; +}; + +const int OpcodeCeiling = 321; + +// The set of objects that hold all the instruction/operand +// parameterization information. +extern InstructionParameters InstructionDesc[]; + +// These hold definitions of the enumerants used for operands +extern EnumDefinition OperandClassParams[]; + +const char* GetOperandDesc(OperandClass operand); +void PrintImmediateRow(int imm, const char* name, const EnumParameters* enumParams, bool caps, bool hex = false); +const char* AccessQualifierString(int attr); + +void PrintOperands(const OperandParameters& operands, int reservedOperands); + +}; // end namespace spv diff --git a/third_party/glslang-spirv/spirv.hpp b/third_party/glslang-spirv/spirv.hpp new file mode 100644 index 000000000..526781fc8 --- /dev/null +++ b/third_party/glslang-spirv/spirv.hpp @@ -0,0 +1,879 @@ +// Copyright (c) 2014-2016 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10000 +#define SPV_REVISION 3 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010000; +static const unsigned int Revision = 3; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, +}; + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP diff --git a/third_party/glslang-spirv/spvIR.h b/third_party/glslang-spirv/spvIR.h new file mode 100644 index 000000000..98f4971b4 --- /dev/null +++ b/third_party/glslang-spirv/spvIR.h @@ -0,0 +1,403 @@ +// +//Copyright (C) 2014 LunarG, Inc. +// +//All rights reserved. +// +//Redistribution and use in source and binary forms, with or without +//modification, are permitted provided that the following conditions +//are met: +// +// Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// +// Neither the name of 3Dlabs Inc. Ltd. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +//POSSIBILITY OF SUCH DAMAGE. + +// +// Author: John Kessenich, LunarG +// + +// SPIRV-IR +// +// Simple in-memory representation (IR) of SPIRV. Just for holding +// Each function's CFG of blocks. Has this hierarchy: +// - Module, which is a list of +// - Function, which is a list of +// - Block, which is a list of +// - Instruction +// + +#pragma once +#ifndef spvIR_H +#define spvIR_H + +#include "spirv.hpp" + +#include +#include +#include +#include +#include +#include + +namespace spv { + +class Block; +class Function; +class Module; + +const Id NoResult = 0; +const Id NoType = 0; + +const unsigned int BadValue = 0xFFFFFFFF; +const Decoration NoPrecision = (Decoration)BadValue; +const MemorySemanticsMask MemorySemanticsAllMemory = + (MemorySemanticsMask)(MemorySemanticsAcquireMask | + MemorySemanticsReleaseMask | + MemorySemanticsAcquireReleaseMask | + MemorySemanticsSequentiallyConsistentMask | + MemorySemanticsUniformMemoryMask | + MemorySemanticsSubgroupMemoryMask | + MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask | + MemorySemanticsImageMemoryMask); + +// +// SPIR-V IR instruction. +// + +class Instruction { +public: + Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { } + explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { } + virtual ~Instruction() {} + void addIdOperand(Id id) { operands.push_back(id); } + void addImmediateOperand(unsigned int immediate) { operands.push_back(immediate); } + void addStringOperand(const char* str) + { + originalString = str; + unsigned int word; + char* wordString = (char*)&word; + char* wordPtr = wordString; + int charCount = 0; + char c; + do { + c = *(str++); + *(wordPtr++) = c; + ++charCount; + if (charCount == 4) { + addImmediateOperand(word); + wordPtr = wordString; + charCount = 0; + } + } while (c != 0); + + // deal with partial last word + if (charCount > 0) { + // pad with 0s + for (; charCount < 4; ++charCount) + *(wordPtr++) = 0; + addImmediateOperand(word); + } + } + void setBlock(Block* b) { block = b; } + Block* getBlock() const { return block; } + Op getOpCode() const { return opCode; } + int getNumOperands() const { return (int)operands.size(); } + Id getResultId() const { return resultId; } + Id getTypeId() const { return typeId; } + Id getIdOperand(int op) const { return operands[op]; } + unsigned int getImmediateOperand(int op) const { return operands[op]; } + const char* getStringOperand() const { return originalString.c_str(); } + + // Write out the binary form. + void dump(std::vector& out) const + { + // Compute the wordCount + unsigned int wordCount = 1; + if (typeId) + ++wordCount; + if (resultId) + ++wordCount; + wordCount += (unsigned int)operands.size(); + + // Write out the beginning of the instruction + out.push_back(((wordCount) << WordCountShift) | opCode); + if (typeId) + out.push_back(typeId); + if (resultId) + out.push_back(resultId); + + // Write out the operands + for (int op = 0; op < (int)operands.size(); ++op) + out.push_back(operands[op]); + } + +protected: + Instruction(const Instruction&); + Id resultId; + Id typeId; + Op opCode; + std::vector operands; + std::string originalString; // could be optimized away; convenience for getting string operand + Block* block; +}; + +// +// SPIR-V IR block. +// + +class Block { +public: + Block(Id id, Function& parent); + virtual ~Block() + { + } + + Id getId() { return instructions.front()->getResultId(); } + + Function& getParent() const { return parent; } + void addInstruction(std::unique_ptr inst); + void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} + void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } + const std::vector& getPredecessors() const { return predecessors; } + const std::vector& getSuccessors() const { return successors; } + void setUnreachable() { unreachable = true; } + bool isUnreachable() const { return unreachable; } + // Returns the block's merge instruction, if one exists (otherwise null). + const Instruction* getMergeInstruction() const { + if (instructions.size() < 2) return nullptr; + const Instruction* nextToLast = (instructions.cend() - 2)->get(); + switch (nextToLast->getOpCode()) { + case OpSelectionMerge: + case OpLoopMerge: + return nextToLast; + default: + return nullptr; + } + return nullptr; + } + + bool isTerminated() const + { + switch (instructions.back()->getOpCode()) { + case OpBranch: + case OpBranchConditional: + case OpSwitch: + case OpKill: + case OpReturn: + case OpReturnValue: + return true; + default: + return false; + } + } + + void dump(std::vector& out) const + { + instructions[0]->dump(out); + for (int i = 0; i < (int)localVariables.size(); ++i) + localVariables[i]->dump(out); + for (int i = 1; i < (int)instructions.size(); ++i) + instructions[i]->dump(out); + } + +protected: + Block(const Block&); + Block& operator=(Block&); + + // To enforce keeping parent and ownership in sync: + friend Function; + + std::vector > instructions; + std::vector predecessors, successors; + std::vector > localVariables; + Function& parent; + + // track whether this block is known to be uncreachable (not necessarily + // true for all unreachable blocks, but should be set at least + // for the extraneous ones introduced by the builder). + bool unreachable; +}; + +// Traverses the control-flow graph rooted at root in an order suited for +// readable code generation. Invokes callback at every node in the traversal +// order. +void inReadableOrder(Block* root, std::function callback); + +// +// SPIR-V IR Function. +// + +class Function { +public: + Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent); + virtual ~Function() + { + for (int i = 0; i < (int)parameterInstructions.size(); ++i) + delete parameterInstructions[i]; + + for (int i = 0; i < (int)blocks.size(); ++i) + delete blocks[i]; + } + Id getId() const { return functionInstruction.getResultId(); } + Id getParamId(int p) { return parameterInstructions[p]->getResultId(); } + + void addBlock(Block* block) { blocks.push_back(block); } + void removeBlock(Block* block) + { + auto found = find(blocks.begin(), blocks.end(), block); + assert(found != blocks.end()); + blocks.erase(found); + delete block; + } + + Module& getParent() const { return parent; } + Block* getEntryBlock() const { return blocks.front(); } + Block* getLastBlock() const { return blocks.back(); } + void addLocalVariable(std::unique_ptr inst); + Id getReturnType() const { return functionInstruction.getTypeId(); } + void dump(std::vector& out) const + { + // OpFunction + functionInstruction.dump(out); + + // OpFunctionParameter + for (int p = 0; p < (int)parameterInstructions.size(); ++p) + parameterInstructions[p]->dump(out); + + // Blocks + inReadableOrder(blocks[0], [&out](const Block* b) { b->dump(out); }); + Instruction end(0, 0, OpFunctionEnd); + end.dump(out); + } + +protected: + Function(const Function&); + Function& operator=(Function&); + + Module& parent; + Instruction functionInstruction; + std::vector parameterInstructions; + std::vector blocks; +}; + +// +// SPIR-V IR Module. +// + +class Module { +public: + Module() {} + virtual ~Module() + { + // TODO delete things + } + + void addFunction(Function *fun) { functions.push_back(fun); } + + void mapInstruction(Instruction *instruction) + { + spv::Id resultId = instruction->getResultId(); + // map the instruction's result id + if (resultId >= idToInstruction.size()) + idToInstruction.resize(resultId + 16); + idToInstruction[resultId] = instruction; + } + + Instruction* getInstruction(Id id) const { return idToInstruction[id]; } + spv::Id getTypeId(Id resultId) const { return idToInstruction[resultId]->getTypeId(); } + StorageClass getStorageClass(Id typeId) const + { + assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer); + return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0); + } + + void dump(std::vector& out) const + { + for (int f = 0; f < (int)functions.size(); ++f) + functions[f]->dump(out); + } + +protected: + Module(const Module&); + std::vector functions; + + // map from result id to instruction having that result id + std::vector idToInstruction; + + // map from a result id to its type id +}; + +// +// Implementation (it's here due to circular type definitions). +// + +// Add both +// - the OpFunction instruction +// - all the OpFunctionParameter instructions +__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent) + : parent(parent), functionInstruction(id, resultType, OpFunction) +{ + // OpFunction + functionInstruction.addImmediateOperand(FunctionControlMaskNone); + functionInstruction.addIdOperand(functionType); + parent.mapInstruction(&functionInstruction); + parent.addFunction(this); + + // OpFunctionParameter + Instruction* typeInst = parent.getInstruction(functionType); + int numParams = typeInst->getNumOperands() - 1; + for (int p = 0; p < numParams; ++p) { + Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter); + parent.mapInstruction(param); + parameterInstructions.push_back(param); + } +} + +__inline void Function::addLocalVariable(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + blocks[0]->addLocalVariable(std::move(inst)); + parent.mapInstruction(raw_instruction); +} + +__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false) +{ + instructions.push_back(std::unique_ptr(new Instruction(id, NoType, OpLabel))); + instructions.back()->setBlock(this); + parent.getParent().mapInstruction(instructions.back().get()); +} + +__inline void Block::addInstruction(std::unique_ptr inst) +{ + Instruction* raw_instruction = inst.get(); + instructions.push_back(std::move(inst)); + raw_instruction->setBlock(this); + if (raw_instruction->getResultId()) + parent.getParent().mapInstruction(raw_instruction); +} + +}; // end spv namespace + +#endif // spvIR_H From 00594da41735e556b1734ffae01fa13cd499d319 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 16:40:45 -0800 Subject: [PATCH 009/145] Adding mutliple queue/shared queue support. --- src/xenia/ui/vulkan/vulkan_context.cc | 24 +++++++------------- src/xenia/ui/vulkan/vulkan_context.h | 1 - src/xenia/ui/vulkan/vulkan_device.cc | 28 ++++++++++++++++++++++-- src/xenia/ui/vulkan/vulkan_device.h | 17 ++++++++++++++ src/xenia/ui/vulkan/vulkan_swap_chain.cc | 21 +++++++++++++----- 5 files changed, 66 insertions(+), 25 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 9dd9c7d58..a2c5998f4 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -35,28 +35,18 @@ VulkanContext::VulkanContext(VulkanProvider* provider, Window* target_window) VulkanContext::~VulkanContext() { auto provider = static_cast(provider_); auto device = provider->device(); - vkQueueWaitIdle(device->primary_queue()); + { + std::lock_guard queue_lock(device->primary_queue_mutex()); + vkQueueWaitIdle(device->primary_queue()); + } immediate_drawer_.reset(); swap_chain_.reset(); - if (cmd_pool_) { - vkDestroyCommandPool(*device, cmd_pool_, nullptr); - } } bool VulkanContext::Initialize() { auto provider = static_cast(provider_); auto device = provider->device(); - // All context-specific commands will be allocated from this. - // We may want to have additional pools for different rendering subsystems. - VkCommandPoolCreateInfo cmd_pool_info; - cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmd_pool_info.pNext = nullptr; - cmd_pool_info.queueFamilyIndex = device->queue_family_index(); - cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; - auto err = vkCreateCommandPool(*device, &cmd_pool_info, nullptr, &cmd_pool_); - CheckResult(err, "vkCreateCommandPool"); - if (target_window_) { // Create swap chain used to present to the window. VkSurfaceKHR surface = nullptr; @@ -68,8 +58,8 @@ bool VulkanContext::Initialize() { create_info.hinstance = static_cast(target_window_->native_platform_handle()); create_info.hwnd = static_cast(target_window_->native_handle()); - err = vkCreateWin32SurfaceKHR(*provider->instance(), &create_info, nullptr, - &surface); + auto err = vkCreateWin32SurfaceKHR(*provider->instance(), &create_info, + nullptr, &surface); CheckResult(err, "vkCreateWin32SurfaceKHR"); #else #error Platform not yet implemented. @@ -130,6 +120,7 @@ void VulkanContext::BeginSwap() { swap_chain_->Begin(); // TODO(benvanik): use a fence instead? May not be possible with target image. + std::lock_guard queue_lock(device->primary_queue_mutex()); auto err = vkQueueWaitIdle(device->primary_queue()); CheckResult(err, "vkQueueWaitIdle"); } @@ -145,6 +136,7 @@ void VulkanContext::EndSwap() { // Wait until the queue is idle. // TODO(benvanik): is this required? + std::lock_guard queue_lock(device->primary_queue_mutex()); auto err = vkQueueWaitIdle(device->primary_queue()); CheckResult(err, "vkQueueWaitIdle"); } diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index 1893ca287..f8ec41f05 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -53,7 +53,6 @@ class VulkanContext : public GraphicsContext { std::unique_ptr swap_chain_; std::unique_ptr immediate_drawer_; - VkCommandPool cmd_pool_ = nullptr; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc index 8f862f444..c7ca1d974 100644 --- a/src/xenia/ui/vulkan/vulkan_device.cc +++ b/src/xenia/ui/vulkan/vulkan_device.cc @@ -118,8 +118,8 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) { if (queue_flags & VK_QUEUE_GRAPHICS_BIT) { // Can do graphics and present - good! ideal_queue_family_index = static_cast(i); - // TODO(benvanik): pick a higher queue count? - queue_count = 1; + // Grab all the queues we can. + queue_count = device_info.queue_family_properties[i].queueCount; break; } } @@ -136,6 +136,8 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) { queue_info.queueFamilyIndex = ideal_queue_family_index; queue_info.queueCount = queue_count; std::vector queue_priorities(queue_count); + // Prioritize the primary queue. + queue_priorities[0] = 1.0f; queue_info.pQueuePriorities = queue_priorities.data(); VkDeviceCreateInfo create_info; @@ -179,10 +181,32 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) { // Get the primary queue used for most submissions/etc. vkGetDeviceQueue(handle, queue_family_index_, 0, &primary_queue_); + // Get all additional queues, if we got any. + for (uint32_t i = 0; i < queue_count - 1; ++i) { + VkQueue queue; + vkGetDeviceQueue(handle, queue_family_index_, i, &queue); + free_queues_.push_back(queue); + } + XELOGVK("Device initialized successfully!"); return true; } +VkQueue VulkanDevice::AcquireQueue() { + std::lock_guard lock(queue_mutex_); + if (free_queues_.empty()) { + return nullptr; + } + auto queue = free_queues_.back(); + free_queues_.pop_back(); + return queue; +} + +void VulkanDevice::ReleaseQueue(VkQueue queue) { + std::lock_guard lock(queue_mutex_); + free_queues_.push_back(queue); +} + VkDeviceMemory VulkanDevice::AllocateMemory( const VkMemoryRequirements& requirements, VkFlags required_properties) { // Search memory types to find one matching our requirements and our diff --git a/src/xenia/ui/vulkan/vulkan_device.h b/src/xenia/ui/vulkan/vulkan_device.h index f1194d662..e9b12e3fc 100644 --- a/src/xenia/ui/vulkan/vulkan_device.h +++ b/src/xenia/ui/vulkan/vulkan_device.h @@ -11,6 +11,7 @@ #define XENIA_UI_VULKAN_VULKAN_DEVICE_H_ #include +#include #include #include @@ -57,9 +58,23 @@ class VulkanDevice { bool Initialize(DeviceInfo device_info); uint32_t queue_family_index() const { return queue_family_index_; } + std::mutex& primary_queue_mutex() { return queue_mutex_; } + // Access to the primary queue must be synchronized with primary_queue_mutex. VkQueue primary_queue() const { return primary_queue_; } const DeviceInfo& device_info() const { return device_info_; } + // Acquires a queue for exclusive use by the caller. + // The queue will not be touched by any other code until it's returned with + // ReleaseQueue. + // Not all devices support queues or only support a limited number. If this + // returns null the primary_queue should be used with the + // primary_queue_mutex. + // This method is thread safe. + VkQueue AcquireQueue(); + // Releases a queue back to the device pool. + // This method is thread safe. + void ReleaseQueue(VkQueue queue); + // Allocates memory of the given size matching the required properties. VkDeviceMemory AllocateMemory( const VkMemoryRequirements& requirements, @@ -73,7 +88,9 @@ class VulkanDevice { DeviceInfo device_info_; uint32_t queue_family_index_ = 0; + std::mutex queue_mutex_; VkQueue primary_queue_ = nullptr; + std::vector free_queues_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc index cb088bb75..47d246d18 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.cc +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.cc @@ -373,12 +373,15 @@ bool VulkanSwapChain::Begin() { wait_submit_info.pCommandBuffers = nullptr; wait_submit_info.signalSemaphoreCount = 0; wait_submit_info.pSignalSemaphores = nullptr; - err = vkQueueSubmit(device_->primary_queue(), 1, &wait_submit_info, nullptr); + { + std::lock_guard queue_lock(device_->primary_queue_mutex()); + err = + vkQueueSubmit(device_->primary_queue(), 1, &wait_submit_info, nullptr); + } CheckResult(err, "vkQueueSubmit"); // Reset all command buffers. - vkResetCommandBuffer(render_cmd_buffer_, - VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + vkResetCommandBuffer(render_cmd_buffer_, 0); auto& current_buffer = buffers_[current_buffer_index_]; // Build the command buffer that will execute all queued rendering buffers. @@ -484,8 +487,11 @@ bool VulkanSwapChain::End() { render_submit_info.pCommandBuffers = &render_cmd_buffer_; render_submit_info.signalSemaphoreCount = 0; render_submit_info.pSignalSemaphores = nullptr; - err = - vkQueueSubmit(device_->primary_queue(), 1, &render_submit_info, nullptr); + { + std::lock_guard queue_lock(device_->primary_queue_mutex()); + err = vkQueueSubmit(device_->primary_queue(), 1, &render_submit_info, + nullptr); + } CheckResult(err, "vkQueueSubmit"); // Queue the present of our current image. @@ -500,7 +506,10 @@ bool VulkanSwapChain::End() { present_info.pSwapchains = swap_chains; present_info.pImageIndices = swap_chain_image_indices; present_info.pResults = nullptr; - err = vkQueuePresentKHR(device_->primary_queue(), &present_info); + { + std::lock_guard queue_lock(device_->primary_queue_mutex()); + err = vkQueuePresentKHR(device_->primary_queue(), &present_info); + } switch (err) { case VK_SUCCESS: break; From 35e08d94281d6ec8a2fd2550c9a5f6bb6b39822c Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 16:42:00 -0800 Subject: [PATCH 010/145] Switching from fork to main glslang spirv builder. --- src/xenia/gpu/premake5.lua | 2 + src/xenia/gpu/shader_translator.cc | 4 +- src/xenia/gpu/shader_translator.h | 7 + src/xenia/gpu/spirv_shader_translator.cc | 138 +- src/xenia/gpu/spirv_shader_translator.h | 18 +- src/xenia/gpu/vulkan/vulkan_shader.cc | 2 + src/xenia/gpu/vulkan/vulkan_shader.h | 7 + src/xenia/ui/spirv/premake5.lua | 1 + src/xenia/ui/spirv/spirv_emitter.cc | 2241 ---------------------- src/xenia/ui/spirv/spirv_emitter.h | 731 ------- src/xenia/ui/spirv/spirv_ir.h | 421 ---- src/xenia/ui/spirv/spirv_optimizer.cc | 22 - src/xenia/ui/spirv/spirv_optimizer.h | 31 - 13 files changed, 140 insertions(+), 3485 deletions(-) delete mode 100644 src/xenia/ui/spirv/spirv_emitter.cc delete mode 100644 src/xenia/ui/spirv/spirv_emitter.h delete mode 100644 src/xenia/ui/spirv/spirv_ir.h delete mode 100644 src/xenia/ui/spirv/spirv_optimizer.cc delete mode 100644 src/xenia/ui/spirv/spirv_optimizer.h diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index e63184edb..1f6a1eea6 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -7,6 +7,7 @@ project("xenia-gpu") kind("StaticLib") language("C++") links({ + "glslang-spirv", "snappy", "spirv-tools", "xenia-base", @@ -29,6 +30,7 @@ project("xenia-gpu-shader-compiler") language("C++") links({ "gflags", + "glslang-spirv", "spirv-tools", "xenia-base", "xenia-gpu", diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 1c9f31962..f117619cd 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -137,7 +137,7 @@ bool ShaderTranslator::Translate(Shader* shader) { constant_register_map_.packed_byte_length += 4 * xe::bit_count(constant_register_map_.int_bitmap); // Direct map between words and words we upload. - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 8; ++i) { if (constant_register_map_.bool_bitmap[i]) { constant_register_map_.packed_byte_length += 4; } @@ -161,6 +161,8 @@ bool ShaderTranslator::Translate(Shader* shader) { } } + PostTranslation(shader); + return shader->is_valid_; } diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 8c8a8c176..21bae4a53 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -71,6 +71,13 @@ class ShaderTranslator { return std::vector(); } + // Handles post-translation tasks when the shader has been fully translated. + virtual void PostTranslation(Shader* shader) {} + // Sets the host disassembly on a shader. + void set_host_disassembly(Shader* shader, std::string value) { + shader->host_disassembly_ = std::move(value); + } + // Handles translation for control flow label addresses. // This is triggered once for each label required (due to control flow // operations) before any of the instructions within the target exec. diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index b9af44c22..52848cedd 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -11,124 +11,182 @@ #include +#include "xenia/base/logging.h" + namespace xe { namespace gpu { +using spv::GLSLstd450; +using spv::Id; +using spv::Op; + SpirvShaderTranslator::SpirvShaderTranslator() = default; SpirvShaderTranslator::~SpirvShaderTranslator() = default; void SpirvShaderTranslator::StartTranslation() { - auto& e = emitter_; + // Create a new builder. + builder_ = std::make_unique(0xFFFFFFFF); + auto& b = *builder_; - auto fn = e.MakeMainEntry(); - auto float_1_0 = e.MakeFloatConstant(1.0f); - auto acos = e.CreateGlslStd450InstructionCall( - spv::Decoration::Invariant, e.MakeFloatType(32), spv::GLSLstd450::kAcos, - {float_1_0}); - e.MakeReturn(true); + // Import required modules. + glsl_std_450_instruction_set_ = b.import("GLSL.std.450"); + + // Configure environment. + b.setSource(spv::SourceLanguage::SourceLanguageUnknown, 0); + b.setMemoryModel(spv::AddressingModel::AddressingModelLogical, + spv::MemoryModel::MemoryModelGLSL450); + b.addCapability(spv::Capability::CapabilityShader); + b.addCapability(spv::Capability::CapabilityGenericPointer); + if (is_vertex_shader()) { + b.addCapability(spv::Capability::CapabilityClipDistance); + b.addCapability(spv::Capability::CapabilityCullDistance); + } + if (is_pixel_shader()) { + b.addCapability(spv::Capability::CapabilityDerivativeControl); + } + + // main() entry point. + auto mainFn = b.makeMain(); + if (is_vertex_shader()) { + b.addEntryPoint(spv::ExecutionModel::ExecutionModelVertex, mainFn, "main"); + } else { + b.addEntryPoint(spv::ExecutionModel::ExecutionModelFragment, mainFn, + "main"); + b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft); + } + + // TODO(benvanik): transform feedback. + if (false) { + b.addCapability(spv::Capability::CapabilityTransformFeedback); + b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb); + } + + auto float_1_0 = b.makeFloatConstant(2.0f); + auto acos = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, b.makeFloatType(32), + GLSLstd450::kAcos, {float_1_0}); } std::vector SpirvShaderTranslator::CompleteTranslation() { - auto& e = emitter_; + auto& b = *builder_; + + b.makeReturn(false); std::vector spirv_words; - e.Serialize(spirv_words); + b.dump(spirv_words); + // Cleanup builder. + builder_.reset(); + + // Copy bytes out. + // TODO(benvanik): avoid copy? std::vector spirv_bytes; spirv_bytes.resize(spirv_words.size() * 4); std::memcpy(spirv_bytes.data(), spirv_words.data(), spirv_bytes.size()); return spirv_bytes; } +void SpirvShaderTranslator::PostTranslation(Shader* shader) { + // TODO(benvanik): only if needed? could be slowish. + auto disasm = disassembler_.Disassemble( + reinterpret_cast(shader->translated_binary().data()), + shader->translated_binary().size() / 4); + if (disasm->has_error()) { + XELOGE("Failed to disassemble SPIRV - invalid?"); + return; + } + set_host_disassembly(shader, disasm->to_string()); +} + void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessControlFlowNopInstruction() { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessExecInstructionBegin( const ParsedExecInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessExecInstructionEnd( const ParsedExecInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessCallInstruction( const ParsedCallInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessReturnInstruction( const ParsedReturnInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessAllocInstruction( const ParsedAllocInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessTextureFetchInstruction( const ParsedTextureFetchInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessAluInstruction( const ParsedAluInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; switch (instr.type) { case ParsedAluInstruction::Type::kNop: - e.CreateNop(); + b.createNoResultOp(spv::Op::OpNop); break; case ParsedAluInstruction::Type::kVector: ProcessVectorAluInstruction(instr); @@ -141,14 +199,14 @@ void SpirvShaderTranslator::ProcessAluInstruction( void SpirvShaderTranslator::ProcessVectorAluInstruction( const ParsedAluInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessScalarAluInstruction( const ParsedAluInstruction& instr) { - auto& e = emitter_; + auto& b = *builder_; spv::Id value_id = LoadFromOperand(instr.operands[0]); @@ -157,11 +215,19 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( EmitUnimplementedTranslationError(); } -spv::Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { - auto& e = emitter_; +Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( + spv::Decoration precision, Id result_type, GLSLstd450 instruction_ordinal, + std::vector args) { + return builder_->createBuiltinCall(result_type, glsl_std_450_instruction_set_, + static_cast(instruction_ordinal), + args); +} - spv::Id current_type_id = e.MakeFloatType(32); - spv::Id current_value_id = e.CreateUndefined(current_type_id); +spv::Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { + auto& b = *builder_; + + spv::Id current_type_id = b.makeFloatType(32); + spv::Id current_value_id = b.createUndefined(current_type_id); // storage_addressing_mode switch (op.storage_source) { @@ -186,13 +252,13 @@ spv::Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { } if (op.is_absolute_value) { - current_value_id = e.CreateGlslStd450InstructionCall( - spv::Decoration::RelaxedPrecision, current_type_id, - spv::GLSLstd450::kFAbs, {current_value_id}); + current_value_id = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationRelaxedPrecision, current_type_id, + GLSLstd450::kFAbs, {current_value_id}); } if (op.is_negated) { current_value_id = - e.CreateUnaryOp(spv::Op::OpFNegate, current_type_id, current_value_id); + b.createUnaryOp(spv::Op::OpFNegate, current_type_id, current_value_id); } // swizzle @@ -202,7 +268,7 @@ spv::Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, const InstructionResult& result) { - auto& e = emitter_; + auto& b = *builder_; if (result.storage_target == InstructionStorageTarget::kNone) { // No-op? @@ -236,7 +302,7 @@ void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, } spv::Id current_value_id = source_value_id; - spv::Id current_type_id = e.GetTypeId(source_value_id); + spv::Id current_type_id = b.getTypeId(source_value_id); // Clamp the input value. if (result.is_clamped) { @@ -248,7 +314,7 @@ void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, // swizzle // Convert to the appropriate type, if needed. - spv::Id desired_type_id = e.MakeFloatType(32); + spv::Id desired_type_id = b.makeFloatType(32); if (current_value_id != desired_type_id) { EmitTranslationError("Type conversion on storage not yet implemented"); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index fc068a33d..2b233103b 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -14,8 +14,10 @@ #include #include +#include "third_party/glslang-spirv/SpvBuilder.h" +#include "third_party/spirv/GLSL.std.450.hpp11" #include "xenia/gpu/shader_translator.h" -#include "xenia/ui/spirv/spirv_emitter.h" +#include "xenia/ui/spirv/spirv_disassembler.h" namespace xe { namespace gpu { @@ -28,6 +30,7 @@ class SpirvShaderTranslator : public ShaderTranslator { protected: void StartTranslation() override; std::vector CompleteTranslation() override; + void PostTranslation(Shader* shader) override; void ProcessLabel(uint32_t cf_index) override; void ProcessControlFlowNopInstruction() override; @@ -48,9 +51,16 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: + void SetupPushConstants(); + void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); + // Creates a call to the given GLSL intrinsic. + spv::Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( + spv::Decoration precision, spv::Id result_type, + spv::GLSLstd450 instruction_ordinal, std::vector args); + // Loads an operand into a value. // The value returned will be in the form described in the operand (number of // components, etc). @@ -60,7 +70,11 @@ class SpirvShaderTranslator : public ShaderTranslator { // the proper components will be selected. void StoreToResult(spv::Id source_value_id, const InstructionResult& result); - xe::ui::spirv::SpirvEmitter emitter_; + xe::ui::spirv::SpirvDisassembler disassembler_; + + // TODO(benvanik): replace with something better, make reusable, etc. + std::unique_ptr builder_; + spv::Id glsl_std_450_instruction_set_ = 0; }; } // namespace gpu diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc index 00b68af42..8624480a3 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.cc +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -22,6 +22,8 @@ VulkanShader::VulkanShader(ShaderType shader_type, uint64_t data_hash, VulkanShader::~VulkanShader() = default; +bool VulkanShader::Prepare() { return true; } + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h index 9277ae44f..cc1d51e2a 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -24,6 +24,13 @@ class VulkanShader : public Shader { VulkanShader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count); ~VulkanShader() override; + + VkShaderModule shader_module() const { return shader_module_; } + + bool Prepare(); + + private: + VkShaderModule shader_module_ = nullptr; }; } // namespace vulkan diff --git a/src/xenia/ui/spirv/premake5.lua b/src/xenia/ui/spirv/premake5.lua index 94e52a0d5..423ad7bb6 100644 --- a/src/xenia/ui/spirv/premake5.lua +++ b/src/xenia/ui/spirv/premake5.lua @@ -7,6 +7,7 @@ project("xenia-ui-spirv") kind("StaticLib") language("C++") links({ + "glslang-spirv", "spirv-tools", "xenia-base", }) diff --git a/src/xenia/ui/spirv/spirv_emitter.cc b/src/xenia/ui/spirv/spirv_emitter.cc deleted file mode 100644 index 6be5b0a62..000000000 --- a/src/xenia/ui/spirv/spirv_emitter.cc +++ /dev/null @@ -1,2241 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -// Contents originally forked from: -// https://github.com/KhronosGroup/glslang/ -// -// Copyright (C) 2014 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#include "xenia/ui/spirv/spirv_emitter.h" - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvEmitter::SpirvEmitter() { ClearAccessChain(); } - -SpirvEmitter::~SpirvEmitter() = default; - -Id SpirvEmitter::ImportExtendedInstructions(const char* name) { - auto import = - new Instruction(AllocateUniqueId(), NoType, Op::OpExtInstImport); - import->AddStringOperand(name); - - imports_.push_back(import); - return import->result_id(); -} - -// For creating new grouped_types_ (will return old type if the requested one -// was already made). -Id SpirvEmitter::MakeVoidType() { - Instruction* type; - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeVoid)]; - if (grouped_type.empty()) { - type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeVoid); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - } else { - type = grouped_type.back(); - } - - return type->result_id(); -} - -Id SpirvEmitter::MakeBoolType() { - Instruction* type; - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeBool)]; - if (grouped_type.empty()) { - type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeBool); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - } else { - type = grouped_type.back(); - } - - return type->result_id(); -} - -Id SpirvEmitter::MakeSamplerType() { - Instruction* type; - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeSampler)]; - if (grouped_type.empty()) { - type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeSampler); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - } else { - type = grouped_type.back(); - } - return type->result_id(); -} - -Id SpirvEmitter::MakePointer(spv::StorageClass storage_class, Id pointee) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypePointer)]; - for (auto& type : grouped_type) { - if (type->immediate_operand(0) == (unsigned)storage_class && - type->id_operand(1) == pointee) { - return type->result_id(); - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypePointer); - type->AddImmediateOperand(storage_class); - type->AddIdOperand(pointee); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeIntegerType(int bit_width, bool is_signed) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeInt)]; - for (auto& type : grouped_type) { - if (type->immediate_operand(0) == (unsigned)bit_width && - type->immediate_operand(1) == (is_signed ? 1u : 0u)) { - return type->result_id(); - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeInt); - type->AddImmediateOperand(bit_width); - type->AddImmediateOperand(is_signed ? 1 : 0); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeFloatType(int bit_width) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeFloat)]; - for (auto& type : grouped_type) { - if (type->immediate_operand(0) == (unsigned)bit_width) { - return type->result_id(); - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeFloat); - type->AddImmediateOperand(bit_width); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -// Make a struct without checking for duplication. -// See makeStructResultType() for non-decorated structs -// needed as the result of some instructions, which does -// check for duplicates. -Id SpirvEmitter::MakeStructType(std::initializer_list members, - const char* name) { - // Don't look for previous one, because in the general case, - // structs can be duplicated except for decorations. - - // not found, make it - Instruction* type = - new Instruction(AllocateUniqueId(), NoType, Op::OpTypeStruct); - type->AddIdOperands(members); - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeStruct)]; - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - AddName(type->result_id(), name); - - return type->result_id(); -} - -// Make a struct for the simple results of several instructions, -// checking for duplication. -Id SpirvEmitter::MakePairStructType(Id type0, Id type1) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeStruct)]; - for (auto& type : grouped_type) { - if (type->operand_count() != 2) { - continue; - } - if (type->id_operand(0) != type0 || type->id_operand(1) != type1) { - continue; - } - return type->result_id(); - } - - // not found, make it - return MakeStructType({type0, type1}, "ResType"); -} - -Id SpirvEmitter::MakeVectorType(Id component_type, int component_count) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeVector)]; - for (auto& type : grouped_type) { - if (type->id_operand(0) == component_type && - type->immediate_operand(1) == (unsigned)component_count) { - return type->result_id(); - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeVector); - type->AddIdOperand(component_type); - type->AddImmediateOperand(component_count); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeMatrix2DType(Id component_type, int cols, int rows) { - assert(cols <= kMaxMatrixSize && rows <= kMaxMatrixSize); - - Id column = MakeVectorType(component_type, rows); - - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeMatrix)]; - for (auto& type : grouped_type) { - if (type->id_operand(0) == column && - type->immediate_operand(1) == (unsigned)cols) { - return type->result_id(); - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeMatrix); - type->AddIdOperand(column); - type->AddImmediateOperand(cols); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeArrayType(Id element_type, int length) { - // First, we need a constant instruction for the size - Id length_id = MakeUintConstant(length); - - // try to find existing type - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeArray)]; - for (auto& type : grouped_type) { - if (type->id_operand(0) == element_type && - type->id_operand(1) == length_id) { - return type->result_id(); - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeArray); - type->AddIdOperand(element_type); - type->AddIdOperand(length_id); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeRuntimeArray(Id element_type) { - auto type = - new Instruction(AllocateUniqueId(), NoType, Op::OpTypeRuntimeArray); - type->AddIdOperand(element_type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeFunctionType(Id return_type, - std::initializer_list param_types) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeFunction)]; - for (auto& type : grouped_type) { - if (type->id_operand(0) == return_type && - param_types.size() == type->operand_count() - 1) { - bool mismatch = false; - for (int i = 0; i < param_types.size(); ++i) { - if (type->id_operand(i + 1) != *(param_types.begin() + i)) { - mismatch = true; - break; - } - } - if (!mismatch) { - return type->result_id(); - } - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeFunction); - type->AddIdOperand(return_type); - type->AddIdOperands(param_types); - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeImageType(Id sampled_type, spv::Dim dim, bool has_depth, - bool is_arrayed, bool is_multisampled, - int sampled, spv::ImageFormat format) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeImage)]; - for (auto& type : grouped_type) { - if (type->id_operand(0) == sampled_type && - type->immediate_operand(1) == (unsigned int)dim && - type->immediate_operand(2) == (has_depth ? 1u : 0u) && - type->immediate_operand(3) == (is_arrayed ? 1u : 0u) && - type->immediate_operand(4) == (is_multisampled ? 1u : 0u) && - type->immediate_operand(5) == sampled && - type->immediate_operand(6) == static_cast(format)) { - return type->result_id(); - } - } - - // not found, make it - auto type = new Instruction(AllocateUniqueId(), NoType, Op::OpTypeImage); - type->AddIdOperand(sampled_type); - type->AddImmediateOperand(dim); - type->AddImmediateOperand(has_depth ? 1 : 0); - type->AddImmediateOperand(is_arrayed ? 1 : 0); - type->AddImmediateOperand(is_multisampled ? 1 : 0); - type->AddImmediateOperand(sampled); - type->AddImmediateOperand(format); - - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::MakeSampledImageType(Id image_type) { - // try to find it - auto& grouped_type = grouped_types_[static_cast(Op::OpTypeSampledImage)]; - for (auto& type : grouped_type) { - if (type->id_operand(0) == image_type) { - return type->result_id(); - } - } - - // not found, make it - auto type = - new Instruction(AllocateUniqueId(), NoType, Op::OpTypeSampledImage); - type->AddIdOperand(image_type); - - grouped_type.push_back(type); - constants_types_globals_.push_back(type); - module_.MapInstruction(type); - - return type->result_id(); -} - -Id SpirvEmitter::GetDerefTypeId(Id result_id) const { - Id type_id = GetTypeId(result_id); - assert(IsPointerType(type_id)); - return module_.instruction(type_id)->immediate_operand(1); -} - -Op SpirvEmitter::GetMostBasicTypeClass(Id type_id) const { - auto instr = module_.instruction(type_id); - - Op type_class = instr->opcode(); - switch (type_class) { - case Op::OpTypeVoid: - case Op::OpTypeBool: - case Op::OpTypeInt: - case Op::OpTypeFloat: - case Op::OpTypeStruct: - return type_class; - case Op::OpTypeVector: - case Op::OpTypeMatrix: - case Op::OpTypeArray: - case Op::OpTypeRuntimeArray: - return GetMostBasicTypeClass(instr->id_operand(0)); - case Op::OpTypePointer: - return GetMostBasicTypeClass(instr->id_operand(1)); - default: - assert(0); - return Op::OpTypeFloat; - } -} - -int SpirvEmitter::GetTypeComponentCount(Id type_id) const { - auto instr = module_.instruction(type_id); - - switch (instr->opcode()) { - case Op::OpTypeBool: - case Op::OpTypeInt: - case Op::OpTypeFloat: - return 1; - case Op::OpTypeVector: - case Op::OpTypeMatrix: - return instr->immediate_operand(1); - default: - assert(0); - return 1; - } -} - -// Return the lowest-level type of scalar that an homogeneous composite is made -// out of. -// Typically, this is just to find out if something is made out of ints or -// floats. -// However, it includes returning a structure, if say, it is an array of -// structure. -Id SpirvEmitter::GetScalarTypeId(Id type_id) const { - auto instr = module_.instruction(type_id); - - Op type_class = instr->opcode(); - switch (type_class) { - case Op::OpTypeVoid: - case Op::OpTypeBool: - case Op::OpTypeInt: - case Op::OpTypeFloat: - case Op::OpTypeStruct: - return instr->result_id(); - case Op::OpTypeVector: - case Op::OpTypeMatrix: - case Op::OpTypeArray: - case Op::OpTypeRuntimeArray: - case Op::OpTypePointer: - return GetScalarTypeId(GetContainedTypeId(type_id)); - default: - assert(0); - return NoResult; - } -} - -// Return the type of 'member' of a composite. -Id SpirvEmitter::GetContainedTypeId(Id type_id, int member) const { - auto instr = module_.instruction(type_id); - - Op type_class = instr->opcode(); - switch (type_class) { - case Op::OpTypeVector: - case Op::OpTypeMatrix: - case Op::OpTypeArray: - case Op::OpTypeRuntimeArray: - return instr->id_operand(0); - case Op::OpTypePointer: - return instr->id_operand(1); - case Op::OpTypeStruct: - return instr->id_operand(member); - default: - assert(0); - return NoResult; - } -} - -// Return the immediately contained type of a given composite type. -Id SpirvEmitter::GetContainedTypeId(Id type_id) const { - return GetContainedTypeId(type_id, 0); -} - -// See if a scalar constant of this type has already been created, so it -// can be reused rather than duplicated. (Required by the specification). -Id SpirvEmitter::FindScalarConstant(Op type_class, Op opcode, Id type_id, - uint32_t value) const { - auto& grouped_constant = grouped_constants_[static_cast(type_class)]; - for (auto constant : grouped_constant) { - if (constant->opcode() == opcode && constant->type_id() == type_id && - constant->immediate_operand(0) == value) { - return constant->result_id(); - } - } - return 0; -} - -// Version of findScalarConstant (see above) for scalars that take two operands -// (e.g. a 'double'). -Id SpirvEmitter::FindScalarConstant(Op type_class, Op opcode, Id type_id, - uint32_t v1, uint32_t v2) const { - auto& grouped_constant = grouped_constants_[static_cast(type_class)]; - for (auto constant : grouped_constant) { - if (constant->opcode() == opcode && constant->type_id() == type_id && - constant->immediate_operand(0) == v1 && - constant->immediate_operand(1) == v2) { - return constant->result_id(); - } - } - return 0; -} - -// Return true if consuming 'opcode' means consuming a constant. -// "constant" here means after final transform to executable code, -// the value consumed will be a constant, so includes specialization. -bool SpirvEmitter::IsConstantOpCode(Op opcode) const { - switch (opcode) { - case Op::OpUndef: - case Op::OpConstantTrue: - case Op::OpConstantFalse: - case Op::OpConstant: - case Op::OpConstantComposite: - case Op::OpConstantSampler: - case Op::OpConstantNull: - case Op::OpSpecConstantTrue: - case Op::OpSpecConstantFalse: - case Op::OpSpecConstant: - case Op::OpSpecConstantComposite: - case Op::OpSpecConstantOp: - return true; - default: - return false; - } -} - -Id SpirvEmitter::MakeBoolConstant(bool value, bool is_spec_constant) { - Id type_id = MakeBoolType(); - Op opcode = is_spec_constant - ? (value ? Op::OpSpecConstantTrue : Op::OpSpecConstantFalse) - : (value ? Op::OpConstantTrue : Op::OpConstantFalse); - - // See if we already made it - Id existing = 0; - auto& grouped_constant = grouped_constants_[static_cast(Op::OpTypeBool)]; - for (auto& constant : grouped_constant) { - if (constant->type_id() == type_id && constant->opcode() == opcode) { - return constant->result_id(); - } - } - - // Make it - auto c = new Instruction(AllocateUniqueId(), type_id, opcode); - constants_types_globals_.push_back(c); - grouped_constants_[static_cast(Op::OpTypeBool)].push_back(c); - module_.MapInstruction(c); - - return c->result_id(); -} - -Id SpirvEmitter::MakeIntegerConstant(Id type_id, uint32_t value, - bool is_spec_constant) { - Op opcode = is_spec_constant ? Op::OpSpecConstant : Op::OpConstant; - Id existing = FindScalarConstant(Op::OpTypeInt, opcode, type_id, value); - if (existing) { - return existing; - } - - auto c = new Instruction(AllocateUniqueId(), type_id, opcode); - c->AddImmediateOperand(value); - constants_types_globals_.push_back(c); - grouped_constants_[static_cast(Op::OpTypeInt)].push_back(c); - module_.MapInstruction(c); - - return c->result_id(); -} - -Id SpirvEmitter::MakeFloatConstant(float value, bool is_spec_constant) { - Op opcode = is_spec_constant ? Op::OpSpecConstant : Op::OpConstant; - Id type_id = MakeFloatType(32); - uint32_t uint32_value = *reinterpret_cast(&value); - Id existing = - FindScalarConstant(Op::OpTypeFloat, opcode, type_id, uint32_value); - if (existing) { - return existing; - } - - auto c = new Instruction(AllocateUniqueId(), type_id, opcode); - c->AddImmediateOperand(uint32_value); - constants_types_globals_.push_back(c); - grouped_constants_[static_cast(Op::OpTypeFloat)].push_back(c); - module_.MapInstruction(c); - - return c->result_id(); -} - -Id SpirvEmitter::MakeDoubleConstant(double value, bool is_spec_constant) { - Op opcode = is_spec_constant ? Op::OpSpecConstant : Op::OpConstant; - Id type_id = MakeFloatType(64); - uint64_t uint64_value = *reinterpret_cast(&value); - uint32_t op1 = static_cast(uint64_value & 0xFFFFFFFF); - uint32_t op2 = static_cast(uint64_value >> 32); - Id existing = FindScalarConstant(Op::OpTypeFloat, opcode, type_id, op1, op2); - if (existing) { - return existing; - } - - auto c = new Instruction(AllocateUniqueId(), type_id, opcode); - c->AddImmediateOperand(op1); - c->AddImmediateOperand(op2); - constants_types_globals_.push_back(c); - grouped_constants_[static_cast(Op::OpTypeFloat)].push_back(c); - module_.MapInstruction(c); - - return c->result_id(); -} - -Id SpirvEmitter::FindCompositeConstant( - Op type_class, std::initializer_list components) const { - auto& grouped_constant = grouped_constants_[static_cast(type_class)]; - for (auto& constant : grouped_constant) { - // same shape? - if (constant->operand_count() != components.size()) { - continue; - } - - // same contents? - bool mismatch = false; - for (int op = 0; op < constant->operand_count(); ++op) { - if (constant->id_operand(op) != *(components.begin() + op)) { - mismatch = true; - break; - } - } - if (!mismatch) { - return constant->result_id(); - } - } - - return NoResult; -} - -Id SpirvEmitter::MakeCompositeConstant(Id type_id, - std::initializer_list components) { - assert(type_id); - Op type_class = GetTypeClass(type_id); - - switch (type_class) { - case Op::OpTypeVector: - case Op::OpTypeArray: - case Op::OpTypeStruct: - case Op::OpTypeMatrix: - break; - default: - assert(0); - return MakeFloatConstant(0.0); - } - - Id existing = FindCompositeConstant(type_class, components); - if (existing) { - return existing; - } - - auto c = - new Instruction(AllocateUniqueId(), type_id, Op::OpConstantComposite); - c->AddIdOperands(components); - constants_types_globals_.push_back(c); - grouped_constants_[static_cast(type_class)].push_back(c); - module_.MapInstruction(c); - - return c->result_id(); -} - -Instruction* SpirvEmitter::AddEntryPoint(spv::ExecutionModel execution_model, - Function* entry_point, - const char* name) { - auto instr = new Instruction(Op::OpEntryPoint); - instr->AddImmediateOperand(execution_model); - instr->AddIdOperand(entry_point->id()); - instr->AddStringOperand(name); - - entry_points_.push_back(instr); - - return instr; -} - -// Currently relying on the fact that all 'value' of interest are small -// non-negative values. -void SpirvEmitter::AddExecutionMode(Function* entry_point, - spv::ExecutionMode execution_mode, - int value1, int value2, int value3) { - auto instr = new Instruction(Op::OpExecutionMode); - instr->AddIdOperand(entry_point->id()); - instr->AddImmediateOperand(execution_mode); - if (value1 >= 0) { - instr->AddImmediateOperand(value1); - } - if (value2 >= 0) { - instr->AddImmediateOperand(value2); - } - if (value3 >= 0) { - instr->AddImmediateOperand(value3); - } - - execution_modes_.push_back(instr); -} - -void SpirvEmitter::AddName(Id target_id, const char* value) { - if (!value) { - return; - } - auto instr = new Instruction(Op::OpName); - instr->AddIdOperand(target_id); - instr->AddStringOperand(value); - - names_.push_back(instr); -} - -void SpirvEmitter::AddMemberName(Id target_id, int member, const char* value) { - if (!value) { - return; - } - auto instr = new Instruction(Op::OpMemberName); - instr->AddIdOperand(target_id); - instr->AddImmediateOperand(member); - instr->AddStringOperand(value); - - names_.push_back(instr); -} - -void SpirvEmitter::AddLine(Id target_id, Id file_name, int line_number, - int column_number) { - auto instr = new Instruction(Op::OpLine); - instr->AddIdOperand(target_id); - instr->AddIdOperand(file_name); - instr->AddImmediateOperand(line_number); - instr->AddImmediateOperand(column_number); - - lines_.push_back(instr); -} - -void SpirvEmitter::AddDecoration(Id target_id, spv::Decoration decoration, - int num) { - if (decoration == static_cast(BadValue)) { - return; - } - auto instr = new Instruction(Op::OpDecorate); - instr->AddIdOperand(target_id); - instr->AddImmediateOperand(decoration); - if (num >= 0) { - instr->AddImmediateOperand(num); - } - - decorations_.push_back(instr); -} - -void SpirvEmitter::AddMemberDecoration(Id target_id, int member, - spv::Decoration decoration, int num) { - auto instr = new Instruction(Op::OpMemberDecorate); - instr->AddIdOperand(target_id); - instr->AddImmediateOperand(member); - instr->AddImmediateOperand(decoration); - if (num >= 0) { - instr->AddImmediateOperand(num); - } - - decorations_.push_back(instr); -} - -Function* SpirvEmitter::MakeMainEntry() { - assert(!main_function_); - Block* entry = nullptr; - main_function_ = MakeFunctionEntry(MakeVoidType(), "main", {}, &entry); - return main_function_; -} - -Function* SpirvEmitter::MakeFunctionEntry(Id return_type, const char* name, - std::initializer_list param_types, - Block** entry) { - Id type_id = MakeFunctionType(return_type, param_types); - Id first_param_id = - param_types.size() ? AllocateUniqueIds((int)param_types.size()) : 0; - auto function = new Function(AllocateUniqueId(), return_type, type_id, - first_param_id, module_); - if (entry) { - *entry = new Block(AllocateUniqueId(), *function); - function->push_block(*entry); - set_build_point(*entry); - } - AddName(function->id(), name); - return function; -} - -void SpirvEmitter::MakeReturn(bool implicit, Id return_value) { - if (return_value) { - auto inst = new Instruction(NoResult, NoType, Op::OpReturnValue); - inst->AddIdOperand(return_value); - build_point_->AddInstruction(inst); - } else { - build_point_->AddInstruction( - new Instruction(NoResult, NoType, Op::OpReturn)); - } - - if (!implicit) { - CreateAndSetNoPredecessorBlock("post-return"); - } -} - -void SpirvEmitter::LeaveFunction() { - Block* block = build_point_; - Function& function = build_point_->parent(); - assert(block); - - // If our function did not contain a return, add a return void now. - if (!block->is_terminated()) { - // Whether we're in an unreachable (non-entry) block. - bool unreachable = - function.entry_block() != block && !block->predecessor_count(); - - if (unreachable) { - // Given that this block is at the end of a function, it must be right - // after an explicit return, just remove it. - function.pop_block(block); - } else { - // We'll add a return instruction at the end of the current block, - // which for a non-void function is really error recovery (?), as the - // source being translated should have had an explicit return, which would - // have been followed by an unreachable block, which was handled above. - if (function.return_type() == MakeVoidType()) { - MakeReturn(true); - } else { - MakeReturn(true, CreateUndefined(function.return_type())); - } - } - } -} - -void SpirvEmitter::MakeDiscard() { - build_point_->AddInstruction(new Instruction(Op::OpKill)); - CreateAndSetNoPredecessorBlock("post-discard"); -} - -Id SpirvEmitter::CreateVariable(spv::StorageClass storage_class, Id type, - const char* name) { - Id pointer_type = MakePointer(storage_class, type); - auto instr = - new Instruction(AllocateUniqueId(), pointer_type, Op::OpVariable); - instr->AddImmediateOperand(storage_class); - - switch (storage_class) { - case spv::StorageClass::Function: - // Validation rules require the declaration in the entry block. - build_point_->parent().AddLocalVariable(instr); - break; - default: - constants_types_globals_.push_back(instr); - module_.MapInstruction(instr); - break; - } - - AddName(instr->result_id(), name); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateUndefined(Id type) { - auto instr = new Instruction(AllocateUniqueId(), type, Op::OpUndef); - build_point_->AddInstruction(instr); - return instr->result_id(); -} - -void SpirvEmitter::CreateStore(Id pointer_id, Id value_id) { - auto instr = new Instruction(Op::OpStore); - instr->AddIdOperand(pointer_id); - instr->AddIdOperand(value_id); - build_point_->AddInstruction(instr); -} - -Id SpirvEmitter::CreateLoad(Id pointer_id) { - auto instr = new Instruction(AllocateUniqueId(), GetDerefTypeId(pointer_id), - Op::OpLoad); - instr->AddIdOperand(pointer_id); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateAccessChain(spv::StorageClass storage_class, Id base_id, - std::vector index_ids) { - // Figure out the final resulting type. - auto base_type_id = GetTypeId(base_id); - assert(IsPointerType(base_type_id) && index_ids.size()); - auto type_id = GetContainedTypeId(base_type_id); - for (auto index_id : index_ids) { - if (IsStructType(type_id)) { - assert(IsConstantScalar(index_id)); - type_id = GetContainedTypeId(type_id, GetConstantScalar(index_id)); - } else { - type_id = GetContainedTypeId(type_id, index_id); - } - } - auto chain_type_id = MakePointer(storage_class, type_id); - - // Make the instruction - auto instr = - new Instruction(AllocateUniqueId(), chain_type_id, Op::OpAccessChain); - instr->AddIdOperand(base_id); - instr->AddIdOperands(index_ids); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateArrayLength(Id struct_id, int array_member) { - auto instr = - new Instruction(AllocateUniqueId(), MakeIntType(32), Op::OpArrayLength); - instr->AddIdOperand(struct_id); - instr->AddImmediateOperand(array_member); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateCompositeExtract(Id composite, Id type_id, - uint32_t index) { - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpCompositeExtract); - instr->AddIdOperand(composite); - instr->AddImmediateOperand(index); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateCompositeExtract(Id composite, Id type_id, - std::vector indices) { - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpCompositeExtract); - instr->AddIdOperand(composite); - instr->AddImmediateOperands(indices); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateCompositeInsert(Id object, Id composite, Id type_id, - uint32_t index) { - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpCompositeInsert); - instr->AddIdOperand(object); - instr->AddIdOperand(composite); - instr->AddImmediateOperand(index); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateCompositeInsert(Id object, Id composite, Id type_id, - std::vector indices) { - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpCompositeInsert); - instr->AddIdOperand(object); - instr->AddIdOperand(composite); - instr->AddImmediateOperands(indices); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateVectorExtractDynamic(Id vector, Id type_id, - Id component_index) { - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpVectorExtractDynamic); - instr->AddIdOperand(vector); - instr->AddIdOperand(component_index); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateVectorInsertDynamic(Id vector, Id type_id, Id component, - Id component_index) { - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpVectorInsertDynamic); - instr->AddIdOperand(vector); - instr->AddIdOperand(component); - instr->AddIdOperand(component_index); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -void SpirvEmitter::CreateNop() { - auto instr = new Instruction(spv::Op::OpNop); - build_point_->AddInstruction(instr); -} - -void SpirvEmitter::CreateControlBarrier( - spv::Scope execution_scope, spv::Scope memory_scope, - spv::MemorySemanticsMask memory_semantics) { - auto instr = new Instruction(Op::OpControlBarrier); - instr->AddImmediateOperand(MakeUintConstant(execution_scope)); - instr->AddImmediateOperand(MakeUintConstant(memory_scope)); - instr->AddImmediateOperand(MakeUintConstant(memory_semantics)); - build_point_->AddInstruction(instr); -} - -void SpirvEmitter::CreateMemoryBarrier( - spv::Scope execution_scope, spv::MemorySemanticsMask memory_semantics) { - auto instr = new Instruction(Op::OpMemoryBarrier); - instr->AddImmediateOperand(MakeUintConstant(execution_scope)); - instr->AddImmediateOperand(MakeUintConstant(memory_semantics)); - build_point_->AddInstruction(instr); -} - -Id SpirvEmitter::CreateUnaryOp(Op opcode, Id type_id, Id operand) { - auto instr = new Instruction(AllocateUniqueId(), type_id, opcode); - instr->AddIdOperand(operand); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateBinOp(Op opcode, Id type_id, Id left, Id right) { - auto instr = new Instruction(AllocateUniqueId(), type_id, opcode); - instr->AddIdOperand(left); - instr->AddIdOperand(right); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateTriOp(Op opcode, Id type_id, Id op1, Id op2, Id op3) { - auto instr = new Instruction(AllocateUniqueId(), type_id, opcode); - instr->AddIdOperand(op1); - instr->AddIdOperand(op2); - instr->AddIdOperand(op3); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateOp(Op opcode, Id type_id, - const std::vector& operands) { - auto instr = new Instruction(AllocateUniqueId(), type_id, opcode); - instr->AddIdOperands(operands); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateFunctionCall(Function* function, - std::vector args) { - auto instr = new Instruction(AllocateUniqueId(), function->return_type(), - Op::OpFunctionCall); - instr->AddIdOperand(function->id()); - instr->AddIdOperands(args); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateSwizzle(Id type_id, Id source, - std::vector channels) { - if (channels.size() == 1) { - return CreateCompositeExtract(source, type_id, channels.front()); - } - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpVectorShuffle); - assert(IsVector(source)); - instr->AddIdOperand(source); - instr->AddIdOperand(source); - instr->AddImmediateOperands(channels); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateLvalueSwizzle(Id type_id, Id target, Id source, - std::vector channels) { - assert(GetComponentCount(source) == channels.size()); - if (channels.size() == 1 && GetComponentCount(source) == 1) { - return CreateCompositeInsert(source, target, type_id, channels.front()); - } - - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpVectorShuffle); - assert(IsVector(source)); - assert(IsVector(target)); - instr->AddIdOperand(target); - instr->AddIdOperand(source); - - // Set up an identity shuffle from the base value to the result value. - uint32_t components[4] = {0, 1, 2, 3}; - - // Punch in the l-value swizzle. - int component_count = GetComponentCount(target); - for (int i = 0; i < (int)channels.size(); ++i) { - components[channels[i]] = component_count + i; - } - - // finish the instruction with these components selectors. - for (int i = 0; i < component_count; ++i) { - instr->AddImmediateOperand(components[i]); - } - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -void SpirvEmitter::PromoteScalar(spv::Decoration precision, Id& left, - Id& right) { - int direction = GetComponentCount(right) - GetComponentCount(left); - if (direction > 0) { - left = SmearScalar(precision, left, GetTypeId(right)); - } else if (direction < 0) { - right = SmearScalar(precision, right, GetTypeId(left)); - } -} - -Id SpirvEmitter::SmearScalar(spv::Decoration precision, Id scalar_value, - Id vector_type_id) { - assert(GetComponentCount(scalar_value) == 1); - int component_count = GetTypeComponentCount(vector_type_id); - if (component_count == 1) { - return scalar_value; - } - - auto instr = new Instruction(AllocateUniqueId(), vector_type_id, - Op::OpCompositeConstruct); - for (int i = 0; i < component_count; ++i) { - instr->AddIdOperand(scalar_value); - } - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateExtendedInstructionCall(spv::Decoration precision, - Id result_type, - Id instruction_set, - int instruction_ordinal, - std::initializer_list args) { - auto instr = new Instruction(AllocateUniqueId(), result_type, Op::OpExtInst); - instr->AddIdOperand(instruction_set); - instr->AddImmediateOperand(instruction_ordinal); - instr->AddIdOperands(args); - - build_point_->AddInstruction(instr); - return instr->result_id(); -} - -Id SpirvEmitter::CreateGlslStd450InstructionCall( - spv::Decoration precision, Id result_type, - spv::GLSLstd450 instruction_ordinal, std::initializer_list args) { - if (!glsl_std_450_instruction_set_) { - glsl_std_450_instruction_set_ = ImportExtendedInstructions("GLSL.std.450"); - } - return CreateExtendedInstructionCall( - precision, result_type, glsl_std_450_instruction_set_, - static_cast(instruction_ordinal), args); -} - -// Accept all parameters needed to create a texture instruction. -// Create the correct instruction based on the inputs, and make the call. -Id SpirvEmitter::CreateTextureCall(spv::Decoration precision, Id result_type, - bool fetch, bool proj, bool gather, - const TextureParameters& parameters) { - static const int kMaxTextureArgs = 10; - Id tex_args[kMaxTextureArgs] = {}; - - // Set up the fixed arguments. - int arg_count = 0; - bool is_explicit = false; - tex_args[arg_count++] = parameters.sampler; - tex_args[arg_count++] = parameters.coords; - if (parameters.depth_ref) { - tex_args[arg_count++] = parameters.depth_ref; - } - if (parameters.comp) { - tex_args[arg_count++] = parameters.comp; - } - - // Set up the optional arguments. - int opt_arg_index = arg_count; // track which operand, if it exists, is the - // mask of optional arguments speculatively - // make room for the mask operand. - ++arg_count; - auto mask = spv::ImageOperandsMask::MaskNone; // the mask operand - if (parameters.bias) { - mask = mask | spv::ImageOperandsMask::Bias; - tex_args[arg_count++] = parameters.bias; - } - if (parameters.lod) { - mask = mask | spv::ImageOperandsMask::Lod; - tex_args[arg_count++] = parameters.lod; - is_explicit = true; - } - if (parameters.grad_x) { - mask = mask | spv::ImageOperandsMask::Grad; - tex_args[arg_count++] = parameters.grad_x; - tex_args[arg_count++] = parameters.grad_y; - is_explicit = true; - } - if (parameters.offset) { - if (IsConstant(parameters.offset)) { - mask = mask | spv::ImageOperandsMask::ConstOffset; - } else { - mask = mask | spv::ImageOperandsMask::Offset; - } - tex_args[arg_count++] = parameters.offset; - } - if (parameters.offsets) { - mask = mask | spv::ImageOperandsMask::ConstOffsets; - tex_args[arg_count++] = parameters.offsets; - } - if (parameters.sample) { - mask = mask | spv::ImageOperandsMask::Sample; - tex_args[arg_count++] = parameters.sample; - } - if (mask == spv::ImageOperandsMask::MaskNone) { - --arg_count; // undo speculative reservation for the mask argument - } else { - tex_args[opt_arg_index] = static_cast(mask); - } - - // Set up the instruction. - Op opcode; - opcode = Op::OpImageSampleImplicitLod; - if (fetch) { - opcode = Op::OpImageFetch; - } else if (gather) { - if (parameters.depth_ref) { - opcode = Op::OpImageDrefGather; - } else { - opcode = Op::OpImageGather; - } - } else if (is_explicit) { - if (parameters.depth_ref) { - if (proj) { - opcode = Op::OpImageSampleProjDrefExplicitLod; - } else { - opcode = Op::OpImageSampleDrefExplicitLod; - } - } else { - if (proj) { - opcode = Op::OpImageSampleProjExplicitLod; - } else { - opcode = Op::OpImageSampleExplicitLod; - } - } - } else { - if (parameters.depth_ref) { - if (proj) { - opcode = Op::OpImageSampleProjDrefImplicitLod; - } else { - opcode = Op::OpImageSampleDrefImplicitLod; - } - } else { - if (proj) { - opcode = Op::OpImageSampleProjImplicitLod; - } else { - opcode = Op::OpImageSampleImplicitLod; - } - } - } - - // See if the result type is expecting a smeared result. - // This happens when a legacy shadow*() call is made, which gets a vec4 back - // instead of a float. - Id smeared_type = result_type; - if (!IsScalarType(result_type)) { - switch (opcode) { - case Op::OpImageSampleDrefImplicitLod: - case Op::OpImageSampleDrefExplicitLod: - case Op::OpImageSampleProjDrefImplicitLod: - case Op::OpImageSampleProjDrefExplicitLod: - result_type = GetScalarTypeId(result_type); - break; - default: - break; - } - } - - // Build the SPIR-V instruction - auto instr = new Instruction(AllocateUniqueId(), result_type, opcode); - for (int op = 0; op < opt_arg_index; ++op) { - instr->AddIdOperand(tex_args[op]); - } - if (opt_arg_index < arg_count) { - instr->AddImmediateOperand(tex_args[opt_arg_index]); - } - for (int op = opt_arg_index + 1; op < arg_count; ++op) { - instr->AddIdOperand(tex_args[op]); - } - SetPrecision(instr->result_id(), precision); - build_point_->AddInstruction(instr); - - Id result_id = instr->result_id(); - - // When a smear is needed, do it, as per what was computed above when - // result_type was changed to a scalar type. - if (result_type != smeared_type) { - result_id = SmearScalar(precision, result_id, smeared_type); - } - - return result_id; -} - -Id SpirvEmitter::CreateTextureQueryCall(Op opcode, - const TextureParameters& parameters) { - // Figure out the result type. - Id result_type = 0; - switch (opcode) { - case Op::OpImageQuerySize: - case Op::OpImageQuerySizeLod: { - int component_count; - switch (GetTypeDimensionality(GetImageType(parameters.sampler))) { - case spv::Dim::Dim1D: - case spv::Dim::Buffer: - component_count = 1; - break; - case spv::Dim::Dim2D: - case spv::Dim::Cube: - case spv::Dim::Rect: - component_count = 2; - break; - case spv::Dim::Dim3D: - component_count = 3; - break; - case spv::Dim::SubpassData: - CheckNotImplemented("input-attachment dim"); - break; - default: - assert(0); - break; - } - if (IsArrayedImageType(GetImageType(parameters.sampler))) { - ++component_count; - } - if (component_count == 1) { - result_type = MakeIntType(32); - } else { - result_type = MakeVectorType(MakeIntType(32), component_count); - } - break; - } - case Op::OpImageQueryLod: - result_type = MakeVectorType(MakeFloatType(32), 2); - break; - case Op::OpImageQueryLevels: - case Op::OpImageQuerySamples: - result_type = MakeIntType(32); - break; - default: - assert(0); - break; - } - - auto instr = new Instruction(AllocateUniqueId(), result_type, opcode); - instr->AddIdOperand(parameters.sampler); - if (parameters.coords) { - instr->AddIdOperand(parameters.coords); - } - if (parameters.lod) { - instr->AddIdOperand(parameters.lod); - } - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateCompare(spv::Decoration precision, Id value1, Id value2, - bool is_equal) { - Id bool_type_id = MakeBoolType(); - Id value_type_id = GetTypeId(value1); - - assert(value_type_id == GetTypeId(value2)); - assert(!IsScalar(value1)); - - // Vectors. - if (IsVectorType(value_type_id)) { - Op op; - if (GetMostBasicTypeClass(value_type_id) == Op::OpTypeFloat) { - op = is_equal ? Op::OpFOrdEqual : Op::OpFOrdNotEqual; - } else { - op = is_equal ? Op::OpIEqual : Op::OpINotEqual; - } - - Id bool_vector_type_id = - MakeVectorType(bool_type_id, GetTypeComponentCount(value_type_id)); - Id bool_vector = CreateBinOp(op, bool_vector_type_id, value1, value2); - SetPrecision(bool_vector, precision); - - // Reduce vector compares with any() and all(). - op = is_equal ? Op::OpAll : Op::OpAny; - - return CreateUnaryOp(op, bool_type_id, bool_vector); - } - - CheckNotImplemented("Composite comparison of non-vectors"); - return NoResult; - - // Recursively handle aggregates, which include matrices, arrays, and - // structures - // and accumulate the results. - - // Matrices - - // Arrays - - // int numElements; - // const llvm::ArrayType* arrayType = - // llvm::dyn_cast(value1->getType()); - // if (arrayType) - // numElements = (int)arrayType->getNumElements(); - // else { - // // better be structure - // const llvm::StructType* structType = - // llvm::dyn_cast(value1->getType()); - // assert(structType); - // numElements = structType->getNumElements(); - //} - - // assert(numElements > 0); - - // for (int element = 0; element < numElements; ++element) { - // // Get intermediate comparison values - // llvm::Value* element1 = builder.CreateExtractValue(value1, element, - // "element1"); - // setInstructionPrecision(element1, precision); - // llvm::Value* element2 = builder.CreateExtractValue(value2, element, - // "element2"); - // setInstructionPrecision(element2, precision); - - // llvm::Value* subResult = createCompare(precision, element1, element2, - // equal, "comp"); - - // // Accumulate intermediate comparison - // if (element == 0) - // result = subResult; - // else { - // if (equal) - // result = builder.CreateAnd(result, subResult); - // else - // result = builder.CreateOr(result, subResult); - // setInstructionPrecision(result, precision); - // } - //} - - // return result; -} - -// OpCompositeConstruct -Id SpirvEmitter::CreateCompositeConstruct(Id type_id, - std::vector constituent_ids) { - assert(IsAggregateType(type_id) || - (GetTypeComponentCount(type_id) > 1 && - GetTypeComponentCount(type_id) == constituent_ids.size())); - - auto instr = - new Instruction(AllocateUniqueId(), type_id, Op::OpCompositeConstruct); - instr->AddIdOperands(constituent_ids); - build_point_->AddInstruction(instr); - - return instr->result_id(); -} - -Id SpirvEmitter::CreateConstructor(spv::Decoration precision, - std::vector source_ids, - Id result_type_id) { - Id result = 0; - int target_component_count = GetTypeComponentCount(result_type_id); - int target_component = 0; - - // Special case: when calling a vector constructor with a single scalar - // argument, smear the scalar - if (source_ids.size() == 1 && IsScalar(source_ids[0]) && - target_component_count > 1) { - return SmearScalar(precision, source_ids[0], result_type_id); - } - - // Accumulate the arguments for OpCompositeConstruct. - Id scalar_type_id = GetScalarTypeId(result_type_id); - std::vector constituent_ids; - for (auto source_id : source_ids) { - assert(!IsAggregate(source_id)); - int source_component_count = GetComponentCount(source_id); - int sources_to_use = source_component_count; - if (sources_to_use + target_component > target_component_count) { - sources_to_use = target_component_count - target_component; - } - for (int s = 0; s < sources_to_use; ++s) { - Id arg = source_id; - if (source_component_count > 1) { - arg = CreateSwizzle(scalar_type_id, arg, {static_cast(s)}); - } - if (target_component_count > 1) { - constituent_ids.push_back(arg); - } else { - result = arg; - } - ++target_component; - } - if (target_component >= target_component_count) { - break; - } - } - - if (!constituent_ids.empty()) { - result = CreateCompositeConstruct(result_type_id, constituent_ids); - } - - SetPrecision(result, precision); - - return result; -} - -Id SpirvEmitter::CreateMatrixConstructor(spv::Decoration precision, - std::vector sources, - Id result_type_id) { - Id component_type_id = GetScalarTypeId(result_type_id); - int column_count = GetTypeColumnCount(result_type_id); - int row_count = GetTypeRowCount(result_type_id); - - // Will use a two step process: - // 1. make a compile-time 2D array of values - // 2. construct a matrix from that array - - // Step 1. - - // Initialize the array to the identity matrix. - Id ids[kMaxMatrixSize][kMaxMatrixSize]; - Id one = MakeFloatConstant(1.0); - Id zero = MakeFloatConstant(0.0); - for (int col = 0; col < kMaxMatrixSize; ++col) { - for (int row = 0; row < kMaxMatrixSize; ++row) { - if (col == row) { - ids[col][row] = one; - } else { - ids[col][row] = zero; - } - } - } - - // Modify components as dictated by the arguments. - if (sources.size() == 1 && IsScalar(sources[0])) { - // A single scalar; resets the diagonals. - for (int col = 0; col < kMaxMatrixSize; ++col) { - ids[col][col] = sources[0]; - } - } else if (IsMatrix(sources[0])) { - // Constructing from another matrix; copy over the parts that exist in both - // the argument and constructee. - Id matrix = sources[0]; - int min_column_count = std::min(column_count, GetColumnCount(matrix)); - int min_row_count = std::min(row_count, GetRowCount(matrix)); - for (int col = 0; col < min_column_count; ++col) { - std::vector indexes; - indexes.push_back(col); - for (int row = 0; row < min_row_count; ++row) { - indexes.push_back(row); - ids[col][row] = - CreateCompositeExtract(matrix, component_type_id, indexes); - indexes.pop_back(); - SetPrecision(ids[col][row], precision); - } - } - } else { - // Fill in the matrix in column-major order with whatever argument - // components are available. - int row = 0; - int col = 0; - for (auto source : sources) { - Id arg_component = source; - for (int comp = 0; comp < GetComponentCount(source); ++comp) { - if (GetComponentCount(source) > 1) { - arg_component = - CreateCompositeExtract(source, component_type_id, comp); - SetPrecision(arg_component, precision); - } - ids[col][row++] = arg_component; - if (row == row_count) { - row = 0; - ++col; - } - } - } - } - - // Step 2: construct a matrix from that array. - - // Make the column vectors. - Id column_type_id = GetContainedTypeId(result_type_id); - std::vector matrix_columns; - for (int col = 0; col < column_count; ++col) { - std::vector vector_components; - for (int row = 0; row < row_count; ++row) { - vector_components.push_back(ids[col][row]); - } - matrix_columns.push_back( - CreateCompositeConstruct(column_type_id, vector_components)); - } - - // Make the matrix. - return CreateCompositeConstruct(result_type_id, matrix_columns); -} - -SpirvEmitter::If::If(SpirvEmitter& emitter, Id condition) - : emitter_(emitter), condition_(condition) { - function_ = &emitter_.build_point()->parent(); - - // make the blocks, but only put the then-block into the function, - // the else-block and merge-block will be added later, in order, after - // earlier code is emitted - then_block_ = new Block(emitter_.AllocateUniqueId(), *function_); - merge_block_ = new Block(emitter_.AllocateUniqueId(), *function_); - - // Save the current block, so that we can add in the flow control split when - // makeEndIf is called. - header_block_ = emitter_.build_point(); - - function_->push_block(then_block_); - emitter_.set_build_point(then_block_); -} - -void SpirvEmitter::If::MakeBeginElse() { - // Close out the "then" by having it jump to the merge_block - emitter_.CreateBranch(merge_block_); - - // Make the first else block and add it to the function - else_block_ = new Block(emitter_.AllocateUniqueId(), *function_); - function_->push_block(else_block_); - - // Start building the else block - emitter_.set_build_point(else_block_); -} - -void SpirvEmitter::If::MakeEndIf() { - // jump to the merge block - emitter_.CreateBranch(merge_block_); - - // Go back to the header_block and make the flow control split - emitter_.set_build_point(header_block_); - emitter_.CreateSelectionMerge(merge_block_, - spv::SelectionControlMask::MaskNone); - if (else_block_) { - emitter_.CreateConditionalBranch(condition_, then_block_, else_block_); - } else { - emitter_.CreateConditionalBranch(condition_, then_block_, merge_block_); - } - - // add the merge block to the function - function_->push_block(merge_block_); - emitter_.set_build_point(merge_block_); -} - -void SpirvEmitter::MakeSwitch(Id selector, int segment_count, - std::vector case_values, - std::vector value_index_to_segment, - int default_segment, - std::vector& segment_blocks) { - Function& function = build_point_->parent(); - - // Make all the blocks. - for (int s = 0; s < segment_count; ++s) { - segment_blocks.push_back(new Block(AllocateUniqueId(), function)); - } - - Block* merge_block = new Block(AllocateUniqueId(), function); - - // Make and insert the switch's selection-merge instruction. - CreateSelectionMerge(merge_block, spv::SelectionControlMask::MaskNone); - - // Make the switch instruction. - auto switchInst = new Instruction(NoResult, NoType, Op::OpSwitch); - switchInst->AddIdOperand(selector); - switchInst->AddIdOperand(default_segment >= 0 - ? segment_blocks[default_segment]->id() - : merge_block->id()); - for (size_t i = 0; i < case_values.size(); ++i) { - switchInst->AddImmediateOperand(case_values[i]); - switchInst->AddIdOperand(segment_blocks[value_index_to_segment[i]]->id()); - } - build_point_->AddInstruction(switchInst); - - // Push the merge block. - switch_merges_.push(merge_block); -} - -void SpirvEmitter::AddSwitchBreak() { - // Branch to the top of the merge block stack. - CreateBranch(switch_merges_.top()); - CreateAndSetNoPredecessorBlock("post-switch-break"); -} - -void SpirvEmitter::NextSwitchSegment(std::vector& segment_block, - int next_segment) { - int last_segment = next_segment - 1; - if (last_segment >= 0) { - // Close out previous segment by jumping, if necessary, to next segment. - if (!build_point_->is_terminated()) { - CreateBranch(segment_block[next_segment]); - } - } - Block* block = segment_block[next_segment]; - block->parent().push_block(block); - set_build_point(block); -} - -void SpirvEmitter::EndSwitch(std::vector& segment_block) { - // Close out previous segment by jumping, if necessary, to next segment. - if (!build_point_->is_terminated()) { - AddSwitchBreak(); - } - - switch_merges_.top()->parent().push_block(switch_merges_.top()); - set_build_point(switch_merges_.top()); - - switch_merges_.pop(); -} - -void SpirvEmitter::MakeNewLoop(bool test_first) { - loops_.push(Loop(*this, test_first)); - const Loop& loop = loops_.top(); - - // The loop test is always emitted before the loop body. - // But if the loop test executes at the bottom of the loop, then - // execute the test only on the second and subsequent iterations. - - // Remember the block that branches to the loop header. This - // is required for the test-after-body case. - Block* preheader = build_point(); - - // Branch into the loop - CreateBranch(loop.header); - - // Set ourselves inside the loop - loop.function->push_block(loop.header); - set_build_point(loop.header); - - if (!test_first) { - // Generate code to defer the loop test until the second and - // subsequent iterations. - - // It's always the first iteration when coming from the preheader. - // All other branches to this loop header will need to indicate "false", - // but we don't yet know where they will come from. - loop.is_first_iteration->AddIdOperand(MakeBoolConstant(true)); - loop.is_first_iteration->AddIdOperand(preheader->id()); - build_point()->AddInstruction(loop.is_first_iteration); - - // Mark the end of the structured loop. This must exist in the loop header - // block. - CreateLoopMerge(loop.merge, loop.header, spv::LoopControlMask::MaskNone); - - // Generate code to see if this is the first iteration of the loop. - // It needs to be in its own block, since the loop merge and - // the selection merge instructions can't both be in the same - // (header) block. - Block* firstIterationCheck = new Block(AllocateUniqueId(), *loop.function); - CreateBranch(firstIterationCheck); - loop.function->push_block(firstIterationCheck); - set_build_point(firstIterationCheck); - - // Control flow after this "if" normally reconverges at the loop body. - // However, the loop test has a "break branch" out of this selection - // construct because it can transfer control to the loop merge block. - CreateSelectionMerge(loop.body, spv::SelectionControlMask::MaskNone); - - Block* loopTest = new Block(AllocateUniqueId(), *loop.function); - CreateConditionalBranch(loop.is_first_iteration->result_id(), loop.body, - loopTest); - - loop.function->push_block(loopTest); - set_build_point(loopTest); - } -} - -void SpirvEmitter::CreateLoopTestBranch(Id condition) { - const Loop& loop = loops_.top(); - - // Generate the merge instruction. If the loop test executes before - // the body, then this is a loop merge. Otherwise the loop merge - // has already been generated and this is a conditional merge. - if (loop.test_first) { - CreateLoopMerge(loop.merge, loop.header, spv::LoopControlMask::MaskNone); - // Branching to the "body" block will keep control inside - // the loop. - CreateConditionalBranch(condition, loop.body, loop.merge); - loop.function->push_block(loop.body); - set_build_point(loop.body); - } else { - // The branch to the loop merge block is the allowed exception - // to the structured control flow. Otherwise, control flow will - // continue to loop.body block. Since that is already the target - // of a merge instruction, and a block can't be the target of more - // than one merge instruction, we need to make an intermediate block. - Block* stayInLoopBlock = new Block(AllocateUniqueId(), *loop.function); - CreateSelectionMerge(stayInLoopBlock, spv::SelectionControlMask::MaskNone); - - // This is the loop test. - CreateConditionalBranch(condition, stayInLoopBlock, loop.merge); - - // The dummy block just branches to the real loop body. - loop.function->push_block(stayInLoopBlock); - set_build_point(stayInLoopBlock); - CreateBranchToBody(); - } -} - -void SpirvEmitter::CreateBranchToBody() { - const Loop& loop = loops_.top(); - assert(loop.body); - - // This is a reconvergence of control flow, so no merge instruction - // is required. - CreateBranch(loop.body); - loop.function->push_block(loop.body); - set_build_point(loop.body); -} - -void SpirvEmitter::CreateLoopContinue() { - CreateBranchToLoopHeaderFromInside(loops_.top()); - // Set up a block for dead code. - CreateAndSetNoPredecessorBlock("post-loop-continue"); -} - -void SpirvEmitter::CreateLoopExit() { - CreateBranch(loops_.top().merge); - // Set up a block for dead code. - CreateAndSetNoPredecessorBlock("post-loop-break"); -} - -void SpirvEmitter::CloseLoop() { - const Loop& loop = loops_.top(); - - // Branch back to the top. - CreateBranchToLoopHeaderFromInside(loop); - - // Add the merge block and set the build point to it. - loop.function->push_block(loop.merge); - set_build_point(loop.merge); - - loops_.pop(); -} - -void SpirvEmitter::ClearAccessChain() { - access_chain_.base = NoResult; - access_chain_.index_chain.clear(); - access_chain_.instr = NoResult; - access_chain_.swizzle.clear(); - access_chain_.component = NoResult; - access_chain_.pre_swizzle_base_type = NoType; - access_chain_.is_rvalue = false; -} - -// Turn the described access chain in 'accessChain' into an instruction -// computing its address. This *cannot* include complex swizzles, which must -// be handled after this is called, but it does include swizzles that select -// an individual element, as a single address of a scalar type can be -// computed by an OpAccessChain instruction. -Id SpirvEmitter::CollapseAccessChain() { - assert(access_chain_.is_rvalue == false); - - if (!access_chain_.index_chain.empty()) { - if (!access_chain_.instr) { - auto storage_class = module_.storage_class(GetTypeId(access_chain_.base)); - access_chain_.instr = CreateAccessChain(storage_class, access_chain_.base, - access_chain_.index_chain); - } - return access_chain_.instr; - } else { - return access_chain_.base; - } - - // Note that non-trivial swizzling is left pending... -} - -// Clear out swizzle if it is redundant, that is reselecting the same components -// that would be present without the swizzle. -void SpirvEmitter::SimplifyAccessChainSwizzle() { - // If the swizzle has fewer components than the vector, it is subsetting, and - // must stay to preserve that fact. - if (GetTypeComponentCount(access_chain_.pre_swizzle_base_type) > - access_chain_.swizzle.size()) { - return; - } - - // If components are out of order, it is a swizzle. - for (size_t i = 0; i < access_chain_.swizzle.size(); ++i) { - if (i != access_chain_.swizzle[i]) { - return; - } - } - - // Otherwise, there is no need to track this swizzle. - access_chain_.swizzle.clear(); - if (access_chain_.component == NoResult) { - access_chain_.pre_swizzle_base_type = NoType; - } -} - -// To the extent any swizzling can become part of the chain -// of accesses instead of a post operation, make it so. -// If 'dynamic' is true, include transfering a non-static component index, -// otherwise, only transfer static indexes. -// -// Also, Boolean vectors are likely to be special. While -// for external storage, they should only be integer types, -// function-local bool vectors could use sub-word indexing, -// so keep that as a separate Insert/Extract on a loaded vector. -void SpirvEmitter::TransferAccessChainSwizzle(bool dynamic) { - // too complex? - if (access_chain_.swizzle.size() > 1) { - return; - } - - // non existent? - if (access_chain_.swizzle.empty() && access_chain_.component == NoResult) { - return; - } - - // single component... - - // skip doing it for Boolean vectors - if (IsBoolType(GetContainedTypeId(access_chain_.pre_swizzle_base_type))) { - return; - } - - if (access_chain_.swizzle.size() == 1) { - // handle static component - access_chain_.index_chain.push_back( - MakeUintConstant(access_chain_.swizzle.front())); - access_chain_.swizzle.clear(); - // note, the only valid remaining dynamic access would be to this one - // component, so don't bother even looking at access_chain_.component - access_chain_.pre_swizzle_base_type = NoType; - access_chain_.component = NoResult; - } else if (dynamic && access_chain_.component != NoResult) { - // handle dynamic component - access_chain_.index_chain.push_back(access_chain_.component); - access_chain_.pre_swizzle_base_type = NoType; - access_chain_.component = NoResult; - } -} - -void SpirvEmitter::PushAccessChainSwizzle(std::vector swizzle, - Id pre_swizzle_base_type) { - // Swizzles can be stacked in GLSL, but simplified to a single - // one here; the base type doesn't change. - if (access_chain_.pre_swizzle_base_type == NoType) { - access_chain_.pre_swizzle_base_type = pre_swizzle_base_type; - } - - // If needed, propagate the swizzle for the current access chain. - if (access_chain_.swizzle.size()) { - std::vector oldSwizzle = access_chain_.swizzle; - access_chain_.swizzle.resize(0); - for (unsigned int i = 0; i < swizzle.size(); ++i) { - access_chain_.swizzle.push_back(oldSwizzle[swizzle[i]]); - } - } else { - access_chain_.swizzle = swizzle; - } - - // Determine if we need to track this swizzle anymore. - SimplifyAccessChainSwizzle(); -} - -void SpirvEmitter::CreateAccessChainStore(Id rvalue) { - assert(access_chain_.is_rvalue == false); - - TransferAccessChainSwizzle(true); - Id base = CollapseAccessChain(); - - if (access_chain_.swizzle.size() && access_chain_.component != NoResult) { - CheckNotImplemented( - "simultaneous l-value swizzle and dynamic component selection"); - return; - } - - // If swizzle still exists, it is out-of-order or not full, we must load the - // target vector, extract and insert elements to perform writeMask and/or - // swizzle. - Id source = NoResult; - if (access_chain_.swizzle.size()) { - Id temp_base_id = CreateLoad(base); - source = CreateLvalueSwizzle(GetTypeId(temp_base_id), temp_base_id, rvalue, - access_chain_.swizzle); - } - - // Dynamic component selection. - if (access_chain_.component != NoResult) { - Id temp_base_id = (source == NoResult) ? CreateLoad(base) : source; - source = CreateVectorInsertDynamic(temp_base_id, GetTypeId(temp_base_id), - rvalue, access_chain_.component); - } - - if (source == NoResult) { - source = rvalue; - } - - CreateStore(source, base); -} - -Id SpirvEmitter::CreateAccessChainLoad(Id result_type_id) { - Id id; - - if (access_chain_.is_rvalue) { - // Transfer access chain, but keep it static, so we can stay in registers. - TransferAccessChainSwizzle(false); - if (!access_chain_.index_chain.empty()) { - Id swizzle_base_type_id = access_chain_.pre_swizzle_base_type != NoType - ? access_chain_.pre_swizzle_base_type - : result_type_id; - - // If all the accesses are constants we can use OpCompositeExtract. - std::vector indexes; - bool constant = true; - for (auto index : access_chain_.index_chain) { - if (IsConstantScalar(index)) { - indexes.push_back(GetConstantScalar(index)); - } else { - constant = false; - break; - } - } - - if (constant) { - id = CreateCompositeExtract(access_chain_.base, swizzle_base_type_id, - indexes); - } else { - // Make a new function variable for this r-value. - Id lvalue = CreateVariable(spv::StorageClass::Function, - GetTypeId(access_chain_.base), "indexable"); - - // Store into it. - CreateStore(access_chain_.base, lvalue); - - // Move base to the new variable. - access_chain_.base = lvalue; - access_chain_.is_rvalue = false; - - // Load through the access chain. - id = CreateLoad(CollapseAccessChain()); - } - } else { - id = access_chain_.base; - } - } else { - TransferAccessChainSwizzle(true); - // Load through the access chain. - id = CreateLoad(CollapseAccessChain()); - } - - // Done, unless there are swizzles to do. - if (access_chain_.swizzle.empty() && access_chain_.component == NoResult) { - return id; - } - - // Do remaining swizzling. - // First, static swizzling. - if (access_chain_.swizzle.size()) { - // Static swizzle. - Id swizzledType = GetScalarTypeId(GetTypeId(id)); - if (access_chain_.swizzle.size() > 1) { - swizzledType = - MakeVectorType(swizzledType, (int)access_chain_.swizzle.size()); - } - id = CreateSwizzle(swizzledType, id, access_chain_.swizzle); - } - - // Dynamic single-component selection. - if (access_chain_.component != NoResult) { - id = - CreateVectorExtractDynamic(id, result_type_id, access_chain_.component); - } - - return id; -} - -Id SpirvEmitter::CreateAccessChainLValue() { - assert(access_chain_.is_rvalue == false); - - TransferAccessChainSwizzle(true); - Id lvalue = CollapseAccessChain(); - - // If swizzle exists, it is out-of-order or not full, we must load the target - // vector, extract and insert elements to perform writeMask and/or swizzle. - // This does not go with getting a direct l-value pointer. - assert(access_chain_.swizzle.empty()); - assert(access_chain_.component == NoResult); - - return lvalue; -} - -void SpirvEmitter::Serialize(std::vector& out) const { - // Header, before first instructions: - out.push_back(spv::MagicNumber); - out.push_back(spv::Version); - out.push_back(builder_number_); - out.push_back(unique_id_ + 1); - out.push_back(0); - - for (auto capability : capabilities_) { - Instruction capInst(0, 0, Op::OpCapability); - capInst.AddImmediateOperand(capability); - capInst.Serialize(out); - } - - // TBD: OpExtension ... - - SerializeInstructions(out, imports_); - Instruction memInst(0, 0, Op::OpMemoryModel); - memInst.AddImmediateOperand(addressing_model_); - memInst.AddImmediateOperand(memory_model_); - memInst.Serialize(out); - - // Instructions saved up while building: - SerializeInstructions(out, entry_points_); - SerializeInstructions(out, execution_modes_); - - // Debug instructions: - if (source_language_ != spv::SourceLanguage::Unknown) { - Instruction sourceInst(0, 0, Op::OpSource); - sourceInst.AddImmediateOperand(source_language_); - sourceInst.AddImmediateOperand(source_version_); - sourceInst.Serialize(out); - } - for (auto extension : source_extensions_) { - Instruction extInst(0, 0, Op::OpSourceExtension); - extInst.AddStringOperand(extension); - extInst.Serialize(out); - } - SerializeInstructions(out, names_); - SerializeInstructions(out, lines_); - - // Annotation instructions: - SerializeInstructions(out, decorations_); - - SerializeInstructions(out, constants_types_globals_); - SerializeInstructions(out, externals_); - - // The functions: - module_.Serialize(out); -} - -void SpirvEmitter::SerializeInstructions( - std::vector& out, - const std::vector& instructions) const { - for (auto instruction : instructions) { - instruction->Serialize(out); - } -} - -// Utility method for creating a new block and setting the insert point to -// be in it. This is useful for flow-control operations that need a "dummy" -// block proceeding them (e.g. instructions after a discard, etc). -void SpirvEmitter::CreateAndSetNoPredecessorBlock(const char* name) { - Block* block = new Block(AllocateUniqueId(), build_point_->parent()); - block->set_unreachable(true); - build_point_->parent().push_block(block); - set_build_point(block); - - AddName(block->id(), name); -} - -void SpirvEmitter::CreateBranch(Block* block) { - auto instr = new Instruction(Op::OpBranch); - instr->AddIdOperand(block->id()); - build_point_->AddInstruction(instr); - block->AddPredecessor(build_point_); -} - -void SpirvEmitter::CreateSelectionMerge(Block* merge_block, - spv::SelectionControlMask control) { - auto instr = new Instruction(Op::OpSelectionMerge); - instr->AddIdOperand(merge_block->id()); - instr->AddImmediateOperand(control); - build_point_->AddInstruction(instr); -} - -void SpirvEmitter::CreateLoopMerge(Block* merge_block, Block* continueBlock, - spv::LoopControlMask control) { - auto instr = new Instruction(Op::OpLoopMerge); - instr->AddIdOperand(merge_block->id()); - instr->AddIdOperand(continueBlock->id()); - instr->AddImmediateOperand(control); - build_point_->AddInstruction(instr); -} - -void SpirvEmitter::CreateConditionalBranch(Id condition, Block* then_block, - Block* else_block) { - auto instr = new Instruction(Op::OpBranchConditional); - instr->AddIdOperand(condition); - instr->AddIdOperand(then_block->id()); - instr->AddIdOperand(else_block->id()); - build_point_->AddInstruction(instr); - then_block->AddPredecessor(build_point_); - else_block->AddPredecessor(build_point_); -} - -SpirvEmitter::Loop::Loop(SpirvEmitter& emitter, bool testFirstArg) - : function(&emitter.build_point()->parent()), - header(new Block(emitter.AllocateUniqueId(), *function)), - merge(new Block(emitter.AllocateUniqueId(), *function)), - body(new Block(emitter.AllocateUniqueId(), *function)), - test_first(testFirstArg), - is_first_iteration(nullptr) { - if (!test_first) { - // You may be tempted to rewrite this as - // new Instruction(builder.getUniqueId(), builder.makeBoolType(), OpPhi); - // This will cause subtle test failures because builder.getUniqueId(), - // and builder.makeBoolType() can then get run in a compiler-specific - // order making tests fail for certain configurations. - Id instructionId = emitter.AllocateUniqueId(); - is_first_iteration = - new Instruction(instructionId, emitter.MakeBoolType(), Op::OpPhi); - } -} - -// Create a branch to the header of the given loop, from inside -// the loop body. -// Adjusts the phi node for the first-iteration value if needeed. -void SpirvEmitter::CreateBranchToLoopHeaderFromInside(const Loop& loop) { - CreateBranch(loop.header); - if (loop.is_first_iteration) { - loop.is_first_iteration->AddIdOperand(MakeBoolConstant(false)); - loop.is_first_iteration->AddIdOperand(build_point()->id()); - } -} - -void SpirvEmitter::CheckNotImplemented(const char* message) { - xe::FatalError("Missing functionality: %s", message); -} - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_emitter.h b/src/xenia/ui/spirv/spirv_emitter.h deleted file mode 100644 index ccd0fcdbd..000000000 --- a/src/xenia/ui/spirv/spirv_emitter.h +++ /dev/null @@ -1,731 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -// Contents originally forked from: -// https://github.com/KhronosGroup/glslang/ -// -// Copyright (C) 2014 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#ifndef XENIA_UI_SPIRV_SPIRV_EMITTER_H_ -#define XENIA_UI_SPIRV_SPIRV_EMITTER_H_ - -#include -#include -#include -#include - -#include "xenia/base/assert.h" -#include "xenia/ui/spirv/spirv_ir.h" -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvEmitter { - public: - SpirvEmitter(); - ~SpirvEmitter(); - - // Document what source language and text this module was translated from. - void SetSourceLanguage(spv::SourceLanguage language, int version) { - source_language_ = language; - source_version_ = version; - } - - // Document an extension to the source language. Informational only. - void AddSourceExtension(const char* ext) { - source_extensions_.push_back(ext); - } - - // Set addressing model and memory model for the entire module. - void SetMemoryModel(spv::AddressingModel addressing_model, - spv::MemoryModel memory_model) { - addressing_model_ = addressing_model; - memory_model_ = memory_model; - } - - // Declare a capability used by this module. - void DeclareCapability(spv::Capability cap) { capabilities_.push_back(cap); } - - // Import an extended set of instructions that can be later referenced by the - // returned id. - Id ImportExtendedInstructions(const char* name); - - // For creating new types (will return old type if the requested one was - // already made). - Id MakeVoidType(); - Id MakeBoolType(); - Id MakePointer(spv::StorageClass storage_class, Id pointee); - Id MakeIntegerType(int bit_width, bool is_signed); - Id MakeIntType(int bit_width) { return MakeIntegerType(bit_width, true); } - Id MakeUintType(int bit_width) { return MakeIntegerType(bit_width, false); } - Id MakeFloatType(int bit_width); - Id MakeStructType(std::initializer_list members, const char* name); - Id MakePairStructType(Id type0, Id type1); - Id MakeVectorType(Id component_type, int component_count); - Id MakeMatrix2DType(Id component_type, int cols, int rows); - Id MakeArrayType(Id element_type, int length); - Id MakeRuntimeArray(Id element_type); - Id MakeFunctionType(Id return_type, std::initializer_list param_types); - Id MakeImageType(Id sampled_type, spv::Dim dim, bool has_depth, - bool is_arrayed, bool is_multisampled, int sampled, - spv::ImageFormat format); - Id MakeSamplerType(); - Id MakeSampledImageType(Id image_type); - - // For querying about types. - Id GetTypeId(Id result_id) const { return module_.type_id(result_id); } - Id GetDerefTypeId(Id result_id) const; - Op GetOpcode(Id id) const { return module_.instruction(id)->opcode(); } - Op GetTypeClass(Id type_id) const { return GetOpcode(type_id); } - Op GetMostBasicTypeClass(Id type_id) const; - int GetComponentCount(Id result_id) const { - return GetTypeComponentCount(GetTypeId(result_id)); - } - int GetTypeComponentCount(Id type_id) const; - Id GetScalarTypeId(Id type_id) const; - Id GetContainedTypeId(Id type_id) const; - Id GetContainedTypeId(Id type_id, int member) const; - spv::StorageClass GetTypeStorageClass(Id type_id) const { - return module_.storage_class(type_id); - } - - bool IsPointer(Id result_id) const { - return IsPointerType(GetTypeId(result_id)); - } - bool IsScalar(Id result_id) const { - return IsScalarType(GetTypeId(result_id)); - } - bool IsVector(Id result_id) const { - return IsVectorType(GetTypeId(result_id)); - } - bool IsMatrix(Id result_id) const { - return IsMatrixType(GetTypeId(result_id)); - } - bool IsAggregate(Id result_id) const { - return IsAggregateType(GetTypeId(result_id)); - } - bool IsBoolType(Id type_id) const { - return grouped_types_[static_cast(spv::Op::OpTypeBool)].size() > 0 && - type_id == - grouped_types_[static_cast(spv::Op::OpTypeBool)] - .back() - ->result_id(); - } - - bool IsPointerType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypePointer; - } - bool IsScalarType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeFloat || - GetTypeClass(type_id) == spv::Op::OpTypeInt || - GetTypeClass(type_id) == spv::Op::OpTypeBool; - } - bool IsVectorType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeVector; - } - bool IsMatrixType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeMatrix; - } - bool IsStructType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeStruct; - } - bool IsArrayType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeArray; - } - bool IsAggregateType(Id type_id) const { - return IsArrayType(type_id) || IsStructType(type_id); - } - bool IsImageType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeImage; - } - bool IsSamplerType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeSampler; - } - bool IsSampledImageType(Id type_id) const { - return GetTypeClass(type_id) == spv::Op::OpTypeSampledImage; - } - - bool IsConstantOpCode(Op opcode) const; - bool IsConstant(Id result_id) const { - return IsConstantOpCode(GetOpcode(result_id)); - } - bool IsConstantScalar(Id result_id) const { - return GetOpcode(result_id) == spv::Op::OpConstant; - } - uint32_t GetConstantScalar(Id result_id) const { - return module_.instruction(result_id)->immediate_operand(0); - } - spv::StorageClass GetStorageClass(Id result_id) const { - return GetTypeStorageClass(GetTypeId(result_id)); - } - - int GetTypeColumnCount(Id type_id) const { - assert(IsMatrixType(type_id)); - return GetTypeComponentCount(type_id); - } - int GetColumnCount(Id result_id) const { - return GetTypeColumnCount(GetTypeId(result_id)); - } - int GetTypeRowCount(Id type_id) const { - assert(IsMatrixType(type_id)); - return GetTypeComponentCount(GetContainedTypeId(type_id)); - } - int GetRowCount(Id result_id) const { - return GetTypeRowCount(GetTypeId(result_id)); - } - - spv::Dim GetTypeDimensionality(Id type_id) const { - assert(IsImageType(type_id)); - return static_cast( - module_.instruction(type_id)->immediate_operand(1)); - } - Id GetImageType(Id result_id) const { - Id type_id = GetTypeId(result_id); - assert(IsImageType(type_id) || IsSampledImageType(type_id)); - return IsSampledImageType(type_id) - ? module_.instruction(type_id)->id_operand(0) - : type_id; - } - bool IsArrayedImageType(Id type_id) const { - assert(IsImageType(type_id)); - return module_.instruction(type_id)->immediate_operand(3) != 0; - } - - // For making new constants (will return old constant if the requested one was - // already made). - Id MakeBoolConstant(bool value, bool is_spec_constant = false); - Id MakeIntConstant(int value, bool is_spec_constant = false) { - return MakeIntegerConstant(MakeIntType(32), static_cast(value), - is_spec_constant); - } - Id MakeUintConstant(uint32_t value, bool is_spec_constant = false) { - return MakeIntegerConstant(MakeUintType(32), value, is_spec_constant); - } - template - Id MakeUintConstant(T value, bool is_spec_constant = false) { - static_assert(sizeof(T) == sizeof(uint32_t), "Invalid type"); - return MakeIntegerConstant(MakeUintType(32), static_cast(value), - is_spec_constant); - } - Id MakeFloatConstant(float value, bool is_spec_constant = false); - Id MakeDoubleConstant(double value, bool is_spec_constant = false); - - // Turns the array of constants into a proper constant of the requested type. - Id MakeCompositeConstant(Id type, std::initializer_list components); - - // Declares an entry point and its execution model. - Instruction* AddEntryPoint(spv::ExecutionModel execution_model, - Function* entry_point, const char* name); - void AddExecutionMode(Function* entry_point, - spv::ExecutionMode execution_mode, int value1 = -1, - int value2 = -1, int value3 = -1); - void AddName(Id target_id, const char* name); - void AddMemberName(Id target_id, int member, const char* name); - void AddLine(Id target_id, Id file_name, int line_number, int column_number); - void AddDecoration(Id target_id, spv::Decoration decoration, int num = -1); - void AddMemberDecoration(Id target_id, int member, spv::Decoration, - int num = -1); - - // At the end of what block do the next create*() instructions go? - Block* build_point() const { return build_point_; } - void set_build_point(Block* build_point) { build_point_ = build_point; } - - // Makes the main function. - Function* MakeMainEntry(); - - // Makes a shader-style function, and create its entry block if entry is - // non-zero. - // Return the function, pass back the entry. - Function* MakeFunctionEntry(Id return_type, const char* name, - std::initializer_list param_types, - Block** entry = 0); - - // Creates a return statement. - // An 'implicit' return is one not appearing in the source code. In the case - // of an implicit return, no post-return block is inserted. - void MakeReturn(bool implicit, Id return_value = 0); - - // Generates all the code needed to finish up a function. - void LeaveFunction(); - - // Creates a fragment-shader discard (kill). - void MakeDiscard(); - - // Creates a global or function local or IO variable. - Id CreateVariable(spv::StorageClass storage_class, Id type, - const char* name = 0); - - // Creates an intermediate object whose value is undefined. - Id CreateUndefined(Id type); - - // Stores the given value into the specified pointer. - void CreateStore(Id pointer_id, Id value_id); - - // Loads the value from the given pointer. - Id CreateLoad(Id pointer_id); - - // Creates a pointer into a composite object that can be used with OpLoad and - // OpStore. - Id CreateAccessChain(spv::StorageClass storage_class, Id base_id, - std::vector index_ids); - - // Queries the length of a run-time array. - Id CreateArrayLength(Id struct_id, int array_member); - - Id CreateCompositeExtract(Id composite, Id type_id, uint32_t index); - Id CreateCompositeExtract(Id composite, Id type_id, - std::vector indexes); - Id CreateCompositeInsert(Id object, Id composite, Id type_id, uint32_t index); - Id CreateCompositeInsert(Id object, Id composite, Id type_id, - std::vector indexes); - - Id CreateVectorExtractDynamic(Id vector, Id type_id, Id component_index); - Id CreateVectorInsertDynamic(Id vector, Id type_id, Id component, - Id component_index); - - // Does nothing. - void CreateNop(); - - // Waits for other invocations of this module to reach the current point of - // execution. - void CreateControlBarrier(spv::Scope execution_scope, spv::Scope memory_scope, - spv::MemorySemanticsMask memory_semantics); - // Controls the order that memory accesses are observed. - void CreateMemoryBarrier(spv::Scope execution_scope, - spv::MemorySemanticsMask memory_semantics); - - Id CreateUnaryOp(Op opcode, Id type_id, Id operand); - Id CreateBinOp(Op opcode, Id type_id, Id operand1, Id operand2); - Id CreateTriOp(Op opcode, Id type_id, Id operand1, Id operand2, Id operand3); - Id CreateOp(Op opcode, Id type_id, const std::vector& operands); - Id CreateFunctionCall(Function* function, std::vector args); - - // Takes an rvalue (source) and a set of channels to extract from it to - // make a new rvalue. - Id CreateSwizzle(Id type_id, Id source, std::vector channels); - - // Takes a copy of an lvalue (target) and a source of components, and sets the - // source components into the lvalue where the 'channels' say to put them. - Id CreateLvalueSwizzle(Id type_id, Id target, Id source, - std::vector channels); - - // If the value passed in is an instruction and the precision is not EMpNone, - // it gets tagged with the requested precision. - void SetPrecision(Id value, spv::Decoration precision) { - CheckNotImplemented("setPrecision"); - } - - // Smears a scalar to a vector for the following forms: - // - PromoteScalar(scalar, vector) // smear scalar to width of vector - // - PromoteScalar(vector, scalar) // smear scalar to width of vector - // - PromoteScalar(pointer, scalar) // smear scalar to width of what pointer - // points to - // - PromoteScalar(scalar, scalar) // do nothing - // Other forms are not allowed. - // - // Note: One of the arguments will change, with the result coming back that - // way rather than through the return value. - void PromoteScalar(spv::Decoration precision, Id& left, Id& right); - - // Makes a value by smearing the scalar to fill the type. - Id SmearScalar(spv::Decoration precision, Id scalar_value, Id vector_type_id); - - // Executes an instruction in an imported set of extended instructions. - Id CreateExtendedInstructionCall(spv::Decoration precision, Id result_type, - Id instruction_set, int instruction_ordinal, - std::initializer_list args); - // Executes an instruction from the extended GLSL set. - Id CreateGlslStd450InstructionCall(spv::Decoration precision, Id result_type, - spv::GLSLstd450 instruction_ordinal, - std::initializer_list args); - - // List of parameters used to create a texture operation - struct TextureParameters { - Id sampler; - Id coords; - Id bias; - Id lod; - Id depth_ref; - Id offset; - Id offsets; - Id grad_x; - Id grad_y; - Id sample; - Id comp; - }; - - // Selects the correct texture operation based on all inputs, and emit the - // correct instruction. - Id CreateTextureCall(spv::Decoration precision, Id result_type, bool fetch, - bool proj, bool gather, - const TextureParameters& parameters); - - // Emits the OpTextureQuery* instruction that was passed in and figures out - // the right return value and type. - Id CreateTextureQueryCall(Op opcode, const TextureParameters& parameters); - - Id CreateSamplePositionCall(spv::Decoration precision, Id, Id); - Id CreateBitFieldExtractCall(spv::Decoration precision, Id, Id, Id, - bool isSigned); - Id CreateBitFieldInsertCall(spv::Decoration precision, Id, Id, Id, Id); - - // Reduction comparision for composites: For equal and not-equal resulting in - // a scalar. - Id CreateCompare(spv::Decoration precision, Id value1, Id value2, - bool is_equal); - - // OpCompositeConstruct - Id CreateCompositeConstruct(Id type_id, std::vector constituent_ids); - - // vector or scalar constructor - Id CreateConstructor(spv::Decoration precision, std::vector source_ids, - Id result_type_id); - - // matrix constructor - Id CreateMatrixConstructor(spv::Decoration precision, std::vector sources, - Id constructee); - - // Helper to use for building nested control flow with if-then-else. - class If { - public: - If(SpirvEmitter& emitter, Id condition); - ~If() = default; - - void MakeBeginElse(); - void MakeEndIf(); - - private: - If(const If&) = delete; - If& operator=(If&) = delete; - - SpirvEmitter& emitter_; - Id condition_; - Function* function_ = nullptr; - Block* header_block_ = nullptr; - Block* then_block_ = nullptr; - Block* else_block_ = nullptr; - Block* merge_block_ = nullptr; - }; - - // Makes a switch statement. - // A switch has 'numSegments' of pieces of code, not containing any - // case/default labels, all separated by one or more case/default labels. - // Each possible case value v is a jump to the caseValues[v] segment. The - // defaultSegment is also in this number space. How to compute the value is - // given by 'condition', as in switch(condition). - // - // The SPIR-V Builder will maintain the stack of post-switch merge blocks for - // nested switches. - // - // Use a defaultSegment < 0 if there is no default segment (to branch to post - // switch). - // - // Returns the right set of basic blocks to start each code segment with, so - // that the caller's recursion stack can hold the memory for it. - void MakeSwitch(Id condition, int segment_count, std::vector case_values, - std::vector value_index_to_segment, int default_segment, - std::vector& segment_blocks); - - // Adds a branch to the innermost switch's merge block. - void AddSwitchBreak(); - - // Move sto the next code segment, passing in the return argument in - // MakeSwitch(). - void NextSwitchSegment(std::vector& segment_block, int next_segment); - - // Finishes off the innermost switch. - void EndSwitch(std::vector& segment_block); - - // Starts the beginning of a new loop, and prepare the builder to - // generate code for the loop test. - // The test_first parameter is true when the loop test executes before - // the body (it is false for do-while loops). - void MakeNewLoop(bool test_first); - - // Adds the branch for the loop test, based on the given condition. - // The true branch goes to the first block in the loop body, and - // the false branch goes to the loop's merge block. The builder insertion - // point will be placed at the start of the body. - void CreateLoopTestBranch(Id condition); - - // Generates an unconditional branch to the loop body. - // The builder insertion point will be placed at the start of the body. - // Use this when there is no loop test. - void CreateBranchToBody(); - - // Adds a branch to the test of the current (innermost) loop. - // The way we generate code, that's also the loop header. - void CreateLoopContinue(); - - // Adds an exit (e.g. "break") for the innermost loop that you're in. - void CreateLoopExit(); - - // Close the innermost loop that you're in. - void CloseLoop(); - - // Access chain design for an R-Value vs. L-Value: - // - // There is a single access chain the builder is building at - // any particular time. Such a chain can be used to either to a load or - // a store, when desired. - // - // Expressions can be r-values, l-values, or both, or only r-values: - // a[b.c].d = .... // l-value - // ... = a[b.c].d; // r-value, that also looks like an l-value - // ++a[b.c].d; // r-value and l-value - // (x + y)[2]; // r-value only, can't possibly be l-value - // - // Computing an r-value means generating code. Hence, - // r-values should only be computed when they are needed, not speculatively. - // - // Computing an l-value means saving away information for later use in the - // compiler, - // no code is generated until the l-value is later dereferenced. It is okay - // to speculatively generate an l-value, just not okay to speculatively - // dereference it. - // - // The base of the access chain (the left-most variable or expression - // from which everything is based) can be set either as an l-value - // or as an r-value. Most efficient would be to set an l-value if one - // is available. If an expression was evaluated, the resulting r-value - // can be set as the chain base. - // - // The users of this single access chain can save and restore if they - // want to nest or manage multiple chains. - // - struct AccessChain { - Id base; // for l-values, pointer to the base object, for r-values, the - // base object - std::vector index_chain; - Id instr; // cache the instruction that generates this access chain - std::vector swizzle; // each std::vector element selects the next - // GLSL component number - Id component; // a dynamic component index, can coexist with a swizzle, - // done after the swizzle, NoResult if not present - Id pre_swizzle_base_type; // dereferenced type, before swizzle or component - // is - // applied; NoType unless a swizzle or component is - // present - bool is_rvalue; // true if 'base' is an r-value, otherwise, base is an - // l-value - }; - - // - // the SPIR-V builder maintains a single active chain that - // the following methods operated on - // - - // for external save and restore - AccessChain access_chain() { return access_chain_; } - void set_access_chain(AccessChain new_chain) { access_chain_ = new_chain; } - - void ClearAccessChain(); - - // set new base as an l-value base - void set_access_chain_lvalue(Id lvalue) { - assert(IsPointer(lvalue)); - access_chain_.base = lvalue; - } - - // set new base value as an r-value - void set_access_chain_rvalue(Id rvalue) { - access_chain_.is_rvalue = true; - access_chain_.base = rvalue; - } - - // push offset onto the end of the chain - void PushAccessChainOffset(Id offset) { - access_chain_.index_chain.push_back(offset); - } - - // push new swizzle onto the end of any existing swizzle, merging into a - // single swizzle - void PushAccessChainSwizzle(std::vector swizzle, - Id pre_swizzle_base_type); - - // push a variable component selection onto the access chain; supporting only - // one, so unsided - void PushAccessChainComponent(Id component, Id pre_swizzle_base_type) { - access_chain_.component = component; - if (access_chain_.pre_swizzle_base_type == NoType) { - access_chain_.pre_swizzle_base_type = pre_swizzle_base_type; - } - } - - // use accessChain and swizzle to store value - void CreateAccessChainStore(Id rvalue); - - // use accessChain and swizzle to load an r-value - Id CreateAccessChainLoad(Id result_type_id); - - // get the direct pointer for an l-value - Id CreateAccessChainLValue(); - - void Serialize(std::vector& out) const; - - private: - // Maximum dimension for column/row in a matrix. - static const int kMaxMatrixSize = 4; - - // Allocates a new . - Id AllocateUniqueId() { return ++unique_id_; } - - // Allocates a contiguous sequence of s. - Id AllocateUniqueIds(int count) { - Id id = unique_id_ + 1; - unique_id_ += count; - return id; - } - - Id MakeIntegerConstant(Id type_id, uint32_t value, bool is_spec_constant); - Id FindScalarConstant(Op type_class, Op opcode, Id type_id, - uint32_t value) const; - Id FindScalarConstant(Op type_class, Op opcode, Id type_id, uint32_t v1, - uint32_t v2) const; - Id FindCompositeConstant(Op type_class, - std::initializer_list components) const; - - Id CollapseAccessChain(); - void SimplifyAccessChainSwizzle(); - void TransferAccessChainSwizzle(bool dynamic); - - void SerializeInstructions( - std::vector& out, - const std::vector& instructions) const; - - void CreateAndSetNoPredecessorBlock(const char* name); - void CreateBranch(Block* block); - void CreateSelectionMerge(Block* merge_block, - spv::SelectionControlMask control); - void CreateLoopMerge(Block* merge_block, Block* continueBlock, - spv::LoopControlMask control); - void CreateConditionalBranch(Id condition, Block* then_block, - Block* else_block); - - struct Loop; // Defined below. - void CreateBranchToLoopHeaderFromInside(const Loop& loop); - - // Asserts on unimplemented functionality. - void CheckNotImplemented(const char* message); - - spv::SourceLanguage source_language_ = spv::SourceLanguage::Unknown; - int source_version_ = 0; - std::vector source_extensions_; - spv::AddressingModel addressing_model_ = spv::AddressingModel::Logical; - spv::MemoryModel memory_model_ = spv::MemoryModel::GLSL450; - std::vector capabilities_; - int builder_number_ = 0; - Module module_; - Block* build_point_ = nullptr; - Id unique_id_ = 0; - Function* main_function_ = nullptr; - AccessChain access_chain_; - Id glsl_std_450_instruction_set_ = 0; - - // special blocks of instructions for output - std::vector imports_; - std::vector entry_points_; - std::vector execution_modes_; - std::vector names_; - std::vector lines_; - std::vector decorations_; - std::vector constants_types_globals_; - std::vector externals_; - - // not output, internally used for quick & dirty canonical (unique) creation - // All types appear before OpConstant. - std::vector - grouped_constants_[static_cast(spv::Op::OpConstant)]; - std::vector - grouped_types_[static_cast(spv::Op::OpConstant)]; - - // Stack of switches. - std::stack switch_merges_; - - // Data that needs to be kept in order to properly handle loops. - struct Loop { - // Constructs a default Loop structure containing new header, merge, and - // body blocks for the current function. - // The test_first argument indicates whether the loop test executes at - // the top of the loop rather than at the bottom. In the latter case, - // also create a phi instruction whose value indicates whether we're on - // the first iteration of the loop. The phi instruction is initialized - // with no values or predecessor operands. - Loop(SpirvEmitter& emitter, bool test_first); - - // The function containing the loop. - Function* const function; - // The header is the first block generated for the loop. - // It dominates all the blocks in the loop, i.e. it is always - // executed before any others. - // If the loop test is executed before the body (as in "while" and - // "for" loops), then the header begins with the test code. - // Otherwise, the loop is a "do-while" loop and the header contains the - // start of the body of the loop (if the body exists). - Block* const header; - // The merge block marks the end of the loop. Control is transferred - // to the merge block when either the loop test fails, or when a - // nested "break" is encountered. - Block* const merge; - // The body block is the first basic block in the body of the loop, i.e. - // the code that is to be repeatedly executed, aside from loop control. - // This member is null until we generate code that references the loop - // body block. - Block* const body; - // True when the loop test executes before the body. - const bool test_first; - // When the test executes after the body, this is defined as the phi - // instruction that tells us whether we are on the first iteration of - // the loop. Otherwise this is null. This is non-const because - // it has to be initialized outside of the initializer-list. - Instruction* is_first_iteration; - }; - - // Our loop stack. - std::stack loops_; -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_EMITTER_H_ diff --git a/src/xenia/ui/spirv/spirv_ir.h b/src/xenia/ui/spirv/spirv_ir.h deleted file mode 100644 index e75459844..000000000 --- a/src/xenia/ui/spirv/spirv_ir.h +++ /dev/null @@ -1,421 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -// Contents originally forked from: -// https://github.com/KhronosGroup/glslang/ -// -// Copyright (C) 2014 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// SPIRV-IR -// -// Simple in-memory representation (IR) of SPIRV. Just for holding -// Each function's CFG of blocks. Has this hierarchy: -// - Module, which is a list of -// - Function, which is a list of -// - Block, which is a list of -// - Instruction -// - -#ifndef XENIA_UI_SPIRV_SPIRV_IR_H_ -#define XENIA_UI_SPIRV_SPIRV_IR_H_ - -#include -#include - -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -using spv::Id; -using spv::Op; - -class Function; -class Module; - -const Id NoResult = 0; -const Id NoType = 0; - -const uint32_t BadValue = 0xFFFFFFFF; -const spv::Decoration NoPrecision = static_cast(BadValue); -const spv::MemorySemanticsMask MemorySemanticsAllMemory = - static_cast(0x3FF); - -class Instruction { - public: - Instruction(Id result_id, Id type_id, Op opcode) - : result_id_(result_id), type_id_(type_id), opcode_(opcode) {} - explicit Instruction(Op opcode) : opcode_(opcode) {} - ~Instruction() = default; - - void AddIdOperand(Id id) { operands_.push_back(id); } - - void AddIdOperands(const std::vector& ids) { - for (auto id : ids) { - operands_.push_back(id); - } - } - void AddIdOperands(std::initializer_list ids) { - for (auto id : ids) { - operands_.push_back(id); - } - } - - void AddImmediateOperand(uint32_t immediate) { - operands_.push_back(immediate); - } - - template - void AddImmediateOperand(T immediate) { - static_assert(sizeof(T) == sizeof(uint32_t), "Invalid operand size"); - operands_.push_back(static_cast(immediate)); - } - - void AddImmediateOperands(const std::vector& immediates) { - for (auto immediate : immediates) { - operands_.push_back(immediate); - } - } - - void AddImmediateOperands(std::initializer_list immediates) { - for (auto immediate : immediates) { - operands_.push_back(immediate); - } - } - - void AddStringOperand(const char* str) { - original_string_ = str; - uint32_t word; - char* word_string = reinterpret_cast(&word); - char* word_ptr = word_string; - int char_count = 0; - char c; - do { - c = *(str++); - *(word_ptr++) = c; - ++char_count; - if (char_count == 4) { - AddImmediateOperand(word); - word_ptr = word_string; - char_count = 0; - } - } while (c != 0); - - // deal with partial last word - if (char_count > 0) { - // pad with 0s - for (; char_count < 4; ++char_count) { - *(word_ptr++) = 0; - } - AddImmediateOperand(word); - } - } - - Op opcode() const { return opcode_; } - int operand_count() const { return static_cast(operands_.size()); } - Id result_id() const { return result_id_; } - Id type_id() const { return type_id_; } - Id id_operand(int op) const { return operands_[op]; } - uint32_t immediate_operand(int op) const { return operands_[op]; } - const char* string_operand() const { return original_string_.c_str(); } - - // Write out the binary form. - void Serialize(std::vector& out) const { - uint32_t word_count = 1; - if (type_id_) { - ++word_count; - } - if (result_id_) { - ++word_count; - } - word_count += static_cast(operands_.size()); - - out.push_back((word_count << spv::WordCountShift) | - static_cast(opcode_)); - if (type_id_) { - out.push_back(type_id_); - } - if (result_id_) { - out.push_back(result_id_); - } - for (auto operand : operands_) { - out.push_back(operand); - } - } - - private: - Instruction(const Instruction&) = delete; - - Id result_id_ = NoResult; - Id type_id_ = NoType; - Op opcode_; - std::vector operands_; - std::string original_string_; // could be optimized away; convenience for - // getting string operand -}; - -class Block { - public: - Block(Id id, Function& parent); - ~Block() { - for (size_t i = 0; i < instructions_.size(); ++i) { - delete instructions_[i]; - } - for (size_t i = 0; i < local_variables_.size(); ++i) { - delete local_variables_[i]; - } - } - - Id id() { return instructions_.front()->result_id(); } - - Function& parent() const { return parent_; } - - void AddInstruction(Instruction* instr); - void AddLocalVariable(Instruction* instr) { - local_variables_.push_back(instr); - } - - void AddPredecessor(Block* predecessor) { - predecessors_.push_back(predecessor); - } - - int predecessor_count() const { - return static_cast(predecessors_.size()); - } - - bool is_unreachable() const { return unreachable_; } - void set_unreachable(bool value) { unreachable_ = value; } - - bool is_terminated() const { - switch (instructions_.back()->opcode()) { - case spv::Op::OpBranch: - case spv::Op::OpBranchConditional: - case spv::Op::OpSwitch: - case spv::Op::OpKill: - case spv::Op::OpReturn: - case spv::Op::OpReturnValue: - return true; - default: - return false; - } - } - - void Serialize(std::vector& out) const { - // skip the degenerate unreachable blocks - // TODO: code gen: skip all unreachable blocks (transitive closure) - // (but, until that's done safer to keep non-degenerate - // unreachable blocks, in case others depend on something) - if (unreachable_ && instructions_.size() <= 2) { - return; - } - - instructions_[0]->Serialize(out); - for (auto variable : local_variables_) { - variable->Serialize(out); - } - for (int i = 1; i < instructions_.size(); ++i) { - instructions_[i]->Serialize(out); - } - } - - private: - Block(const Block&) = delete; - Block& operator=(Block&) = delete; - - // To enforce keeping parent and ownership in sync: - friend Function; - - std::vector instructions_; - std::vector predecessors_; - std::vector local_variables_; - Function& parent_; - - // track whether this block is known to be uncreachable (not necessarily - // true for all unreachable blocks, but should be set at least - // for the extraneous ones introduced by the builder). - bool unreachable_; -}; - -class Function { - public: - Function(Id id, Id resultType, Id functionType, Id firstParam, - Module& parent); - ~Function() { - for (size_t i = 0; i < parameter_instructions_.size(); ++i) { - delete parameter_instructions_[i]; - } - for (size_t i = 0; i < blocks_.size(); ++i) { - delete blocks_[i]; - } - } - - Id id() const { return function_instruction_.result_id(); } - Id param_id(int p) { return parameter_instructions_[p]->result_id(); } - - void push_block(Block* block) { blocks_.push_back(block); } - void pop_block(Block* block) { blocks_.pop_back(); } - - Module& parent() const { return parent_; } - Block* entry_block() const { return blocks_.front(); } - Block* last_block() const { return blocks_.back(); } - - void AddLocalVariable(Instruction* instr); - - Id return_type() const { return function_instruction_.type_id(); } - - void Serialize(std::vector& out) const { - // OpFunction - function_instruction_.Serialize(out); - - // OpFunctionParameter - for (auto instruction : parameter_instructions_) { - instruction->Serialize(out); - } - - // Blocks - for (auto block : blocks_) { - block->Serialize(out); - } - - Instruction end(0, 0, spv::Op::OpFunctionEnd); - end.Serialize(out); - } - - private: - Function(const Function&) = delete; - Function& operator=(Function&) = delete; - - Module& parent_; - Instruction function_instruction_; - std::vector parameter_instructions_; - std::vector blocks_; -}; - -class Module { - public: - Module() = default; - ~Module() { - for (size_t i = 0; i < functions_.size(); ++i) { - delete functions_[i]; - } - } - - void AddFunction(Function* function) { functions_.push_back(function); } - - void MapInstruction(Instruction* instr) { - spv::Id result_id = instr->result_id(); - // Map the instruction's result id. - if (result_id >= id_to_instruction_.size()) { - id_to_instruction_.resize(result_id + 16); - } - id_to_instruction_[result_id] = instr; - } - - Instruction* instruction(Id id) const { return id_to_instruction_[id]; } - - spv::Id type_id(Id result_id) const { - return id_to_instruction_[result_id]->type_id(); - } - - spv::StorageClass storage_class(Id type_id) const { - return (spv::StorageClass)id_to_instruction_[type_id]->immediate_operand(0); - } - - void Serialize(std::vector& out) const { - for (auto function : functions_) { - function->Serialize(out); - } - } - - private: - Module(const Module&) = delete; - - std::vector functions_; - - // Maps from result id to instruction having that result id. - std::vector id_to_instruction_; -}; - -inline Function::Function(Id id, Id result_type_id, Id function_type_id, - Id first_param_id, Module& parent) - : parent_(parent), - function_instruction_(id, result_type_id, spv::Op::OpFunction) { - // OpFunction - function_instruction_.AddImmediateOperand( - static_cast(spv::FunctionControlMask::MaskNone)); - function_instruction_.AddIdOperand(function_type_id); - parent.MapInstruction(&function_instruction_); - parent.AddFunction(this); - - // OpFunctionParameter - Instruction* type_instr = parent.instruction(function_type_id); - int param_count = type_instr->operand_count() - 1; - for (int p = 0; p < param_count; ++p) { - auto param = - new Instruction(first_param_id + p, type_instr->id_operand(p + 1), - spv::Op::OpFunctionParameter); - parent.MapInstruction(param); - parameter_instructions_.push_back(param); - } -} - -inline void Function::AddLocalVariable(Instruction* instr) { - blocks_[0]->AddLocalVariable(instr); - parent_.MapInstruction(instr); -} - -inline Block::Block(Id id, Function& parent) - : parent_(parent), unreachable_(false) { - instructions_.push_back(new Instruction(id, NoType, spv::Op::OpLabel)); -} - -inline void Block::AddInstruction(Instruction* inst) { - instructions_.push_back(inst); - if (inst->result_id()) { - parent_.parent().MapInstruction(inst); - } -} - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_IR_H_ diff --git a/src/xenia/ui/spirv/spirv_optimizer.cc b/src/xenia/ui/spirv/spirv_optimizer.cc deleted file mode 100644 index f21026f14..000000000 --- a/src/xenia/ui/spirv/spirv_optimizer.cc +++ /dev/null @@ -1,22 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_optimizer.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvOptimizer::SpirvOptimizer() = default; - -SpirvOptimizer::~SpirvOptimizer() = default; - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_optimizer.h b/src/xenia/ui/spirv/spirv_optimizer.h deleted file mode 100644 index b838feebc..000000000 --- a/src/xenia/ui/spirv/spirv_optimizer.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_OPTIMIZER_H_ -#define XENIA_UI_SPIRV_SPIRV_OPTIMIZER_H_ - -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvOptimizer { - public: - SpirvOptimizer(); - ~SpirvOptimizer(); - - private: -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_OPTIMIZER_H_ From 4c4a641096f0012731dc225c9ff9e0f7ca98c289 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 16:43:17 -0800 Subject: [PATCH 011/145] WIP rough sketch of vulkan backend structure. --- src/xenia/base/memory.cc | 4 + src/xenia/base/memory.h | 2 + src/xenia/gpu/vulkan/buffer_cache.cc | 330 ++++++++++ src/xenia/gpu/vulkan/buffer_cache.h | 111 ++++ src/xenia/gpu/vulkan/pipeline_cache.cc | 335 ++++++++++ src/xenia/gpu/vulkan/pipeline_cache.h | 167 +++++ src/xenia/gpu/vulkan/render_cache.cc | 45 ++ src/xenia/gpu/vulkan/render_cache.h | 46 ++ src/xenia/gpu/vulkan/texture_cache.cc | 37 ++ src/xenia/gpu/vulkan/texture_cache.h | 47 ++ .../gpu/vulkan/vulkan_command_processor.cc | 601 ++++++++---------- .../gpu/vulkan/vulkan_command_processor.h | 121 +--- src/xenia/ui/vulkan/fenced_pools.cc | 81 +++ src/xenia/ui/vulkan/fenced_pools.h | 200 ++++++ 14 files changed, 1691 insertions(+), 436 deletions(-) create mode 100644 src/xenia/gpu/vulkan/buffer_cache.cc create mode 100644 src/xenia/gpu/vulkan/buffer_cache.h create mode 100644 src/xenia/gpu/vulkan/pipeline_cache.cc create mode 100644 src/xenia/gpu/vulkan/pipeline_cache.h create mode 100644 src/xenia/gpu/vulkan/render_cache.cc create mode 100644 src/xenia/gpu/vulkan/render_cache.h create mode 100644 src/xenia/gpu/vulkan/texture_cache.cc create mode 100644 src/xenia/gpu/vulkan/texture_cache.h create mode 100644 src/xenia/ui/vulkan/fenced_pools.cc create mode 100644 src/xenia/ui/vulkan/fenced_pools.h diff --git a/src/xenia/base/memory.cc b/src/xenia/base/memory.cc index 5656b9798..f83b01d72 100644 --- a/src/xenia/base/memory.cc +++ b/src/xenia/base/memory.cc @@ -18,6 +18,10 @@ namespace xe { // http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/f2bc76cc65ffba51a141950f98e75364e49df874/entry/volk/kernels/volk/volk_32u_byteswap.h // http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/2c4c371885c31222362f70a1cd714415d1398021/entry/volk/kernels/volk/volk_64u_byteswap.h +void copy_128_aligned(void* dest, const void* src, size_t count) { + std::memcpy(dest, src, count * 16); +} + void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src, size_t count) { return copy_and_swap_16_unaligned(dest, src, count); diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index 05517a8e8..183843416 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -121,6 +121,8 @@ inline void* low_address(void* address) { return reinterpret_cast(uint64_t(address) & 0xFFFFFFFF); } +void copy_128_aligned(void* dest, const void* src, size_t count); + void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src, size_t count); void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src, diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc new file mode 100644 index 000000000..4cace24ba --- /dev/null +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -0,0 +1,330 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/buffer_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +BufferCache::BufferCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device, size_t capacity) + : register_file_(register_file), + device_(*device), + transient_capacity_(capacity) { + // Uniform buffer. + VkBufferCreateInfo uniform_buffer_info; + uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + uniform_buffer_info.pNext = nullptr; + uniform_buffer_info.flags = 0; + uniform_buffer_info.size = transient_capacity_; + uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + uniform_buffer_info.queueFamilyIndexCount = 0; + uniform_buffer_info.pQueueFamilyIndices = nullptr; + auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr, + &transient_uniform_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Index buffer. + VkBufferCreateInfo index_buffer_info; + index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + index_buffer_info.pNext = nullptr; + index_buffer_info.flags = 0; + index_buffer_info.size = transient_capacity_; + index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + index_buffer_info.queueFamilyIndexCount = 0; + index_buffer_info.pQueueFamilyIndices = nullptr; + err = vkCreateBuffer(device_, &index_buffer_info, nullptr, + &transient_index_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Vertex buffer. + VkBufferCreateInfo vertex_buffer_info; + vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + vertex_buffer_info.pNext = nullptr; + vertex_buffer_info.flags = 0; + vertex_buffer_info.size = transient_capacity_; + vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + vertex_buffer_info.queueFamilyIndexCount = 0; + vertex_buffer_info.pQueueFamilyIndices = nullptr; + err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr, + &transient_vertex_buffer_); + CheckResult(err, "vkCreateBuffer"); + + // Allocate the underlying buffer we use for all storage. + // We query all types and take the max alignment. + VkMemoryRequirements uniform_buffer_requirements; + VkMemoryRequirements index_buffer_requirements; + VkMemoryRequirements vertex_buffer_requirements; + vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_, + &uniform_buffer_requirements); + vkGetBufferMemoryRequirements(device_, transient_index_buffer_, + &index_buffer_requirements); + vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_, + &vertex_buffer_requirements); + uniform_buffer_alignment_ = uniform_buffer_requirements.alignment; + index_buffer_alignment_ = index_buffer_requirements.alignment; + vertex_buffer_alignment_ = vertex_buffer_requirements.alignment; + VkMemoryRequirements buffer_requirements; + buffer_requirements.size = transient_capacity_; + buffer_requirements.alignment = + std::max(uniform_buffer_requirements.alignment, + std::max(index_buffer_requirements.alignment, + vertex_buffer_requirements.alignment)); + buffer_requirements.memoryTypeBits = + uniform_buffer_requirements.memoryTypeBits | + index_buffer_requirements.memoryTypeBits | + vertex_buffer_requirements.memoryTypeBits; + transient_buffer_memory_ = device->AllocateMemory( + buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + + // Alias all buffers to our memory. + vkBindBufferMemory(device_, transient_uniform_buffer_, + transient_buffer_memory_, 0); + vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_, + 0); + vkBindBufferMemory(device_, transient_vertex_buffer_, + transient_buffer_memory_, 0); + + // Map memory and keep it mapped while we use it. + err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0, + &transient_buffer_data_); + CheckResult(err, "vkMapMemory"); + + // Descriptor pool used for all of our cached descriptors. + // In the steady state we don't allocate anything, so these are all manually + // managed. + VkDescriptorPoolCreateInfo descriptor_pool_info; + descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_info.pNext = nullptr; + descriptor_pool_info.flags = + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + descriptor_pool_info.maxSets = 1; + VkDescriptorPoolSize pool_sizes[1]; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + pool_sizes[0].descriptorCount = 2; + descriptor_pool_info.poolSizeCount = 1; + descriptor_pool_info.pPoolSizes = pool_sizes; + err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, + &descriptor_pool_); + CheckResult(err, "vkCreateDescriptorPool"); + + // Create the descriptor set layout used for our uniform buffer. + // As it is a static binding that uses dynamic offsets during draws we can + // create this once and reuse it forever. + VkDescriptorSetLayoutBinding vertex_uniform_binding; + vertex_uniform_binding.binding = 0; + vertex_uniform_binding.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + vertex_uniform_binding.descriptorCount = 1; + vertex_uniform_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + vertex_uniform_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutBinding fragment_uniform_binding; + fragment_uniform_binding.binding = 1; + fragment_uniform_binding.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + fragment_uniform_binding.descriptorCount = 1; + fragment_uniform_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + fragment_uniform_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info; + descriptor_set_layout_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_info.pNext = nullptr; + descriptor_set_layout_info.flags = 0; + VkDescriptorSetLayoutBinding uniform_bindings[] = { + vertex_uniform_binding, fragment_uniform_binding, + }; + descriptor_set_layout_info.bindingCount = + static_cast(xe::countof(uniform_bindings)); + descriptor_set_layout_info.pBindings = uniform_bindings; + err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info, + nullptr, &descriptor_set_layout_); + CheckResult(err, "vkCreateDescriptorSetLayout"); + + // Create the descriptor we'll use for the uniform buffer. + // This is what we hand out to everyone (who then also needs to use our + // offsets). + VkDescriptorSetAllocateInfo set_alloc_info; + set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_alloc_info.pNext = nullptr; + set_alloc_info.descriptorPool = descriptor_pool_; + set_alloc_info.descriptorSetCount = 1; + set_alloc_info.pSetLayouts = &descriptor_set_layout_; + err = vkAllocateDescriptorSets(device_, &set_alloc_info, + &transient_descriptor_set_); + CheckResult(err, "vkAllocateDescriptorSets"); +} + +BufferCache::~BufferCache() { + vkFreeDescriptorSets(device_, descriptor_pool_, 1, + &transient_descriptor_set_); + vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr); + vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); + vkUnmapMemory(device_, transient_buffer_memory_); + vkFreeMemory(device_, transient_buffer_memory_, nullptr); + vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr); + vkDestroyBuffer(device_, transient_index_buffer_, nullptr); + vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr); +} + +VkDeviceSize BufferCache::UploadConstantRegisters( + const Shader::ConstantRegisterMap& constant_register_map) { + // Allocate space in the buffer for our data. + auto offset = AllocateTransientData(uniform_buffer_alignment_, + constant_register_map.packed_byte_length); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return VK_WHOLE_SIZE; + } + + // Run through registers and copy them into the buffer. + // TODO(benvanik): optimize this - it's hit twice every call. + const auto& values = register_file_->values; + uint8_t* dest_ptr = + reinterpret_cast(transient_buffer_data_) + offset; + for (int i = 0; i < 4; ++i) { + auto piece = constant_register_map.float_bitmap[i]; + if (!piece) { + continue; + } + for (int j = 0, sh = 0; j < 64; ++j, sh << 1) { + if (piece & sh) { + xe::copy_128_aligned( + dest_ptr, + &values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1); + dest_ptr += 16; + } + } + } + for (int i = 0; i < 32; ++i) { + if (constant_register_map.int_bitmap & (1 << i)) { + xe::store(dest_ptr, + values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32); + dest_ptr += 4; + } + } + for (int i = 0; i < 8; ++i) { + if (constant_register_map.bool_bitmap[i]) { + xe::store( + dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32); + dest_ptr += 4; + } + } + + return offset; +} + +std::pair BufferCache::UploadIndexBuffer( + const void* source_ptr, size_t source_length, IndexFormat format) { + // TODO(benvanik): check cache. + + // Allocate space in the buffer for our data. + auto offset = AllocateTransientData(index_buffer_alignment_, source_length); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return {nullptr, VK_WHOLE_SIZE}; + } + + // Copy data into the buffer. + // TODO(benvanik): get min/max indices and pass back? + // TODO(benvanik): memcpy then use compute shaders to swap? + if (format == IndexFormat::kInt16) { + // Endian::k8in16, swap half-words. + xe::copy_and_swap_16_aligned( + reinterpret_cast(transient_buffer_data_) + offset, + reinterpret_cast(source_ptr), source_length / 2); + } else if (format == IndexFormat::kInt32) { + // Endian::k8in32, swap words. + xe::copy_and_swap_32_aligned( + reinterpret_cast(transient_buffer_data_) + offset, + reinterpret_cast(source_ptr), source_length / 4); + } + + return {transient_index_buffer_, offset}; +} + +std::pair BufferCache::UploadVertexBuffer( + const void* source_ptr, size_t source_length) { + // TODO(benvanik): check cache. + + // Allocate space in the buffer for our data. + auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return {nullptr, VK_WHOLE_SIZE}; + } + + // Copy data into the buffer. + // TODO(benvanik): memcpy then use compute shaders to swap? + // Endian::k8in32, swap words. + xe::copy_and_swap_32_aligned( + reinterpret_cast(transient_buffer_data_) + offset, + reinterpret_cast(source_ptr), source_length / 4); + + return {transient_vertex_buffer_, offset}; +} + +VkDeviceSize BufferCache::AllocateTransientData(size_t alignment, + size_t length) { + // Try to add to end, wrapping if required. + + // Check to ensure there is space. + if (false) { + // Consume all fences. + } + + // Slice off our bit. + + return VK_WHOLE_SIZE; +} + +void BufferCache::Flush(VkCommandBuffer command_buffer) { + // If we are flushing a big enough chunk queue up an event. + // We don't want to do this for everything but often enough so that we won't + // run out of space. + if (true) { + // VkEvent finish_event; + // vkCmdSetEvent(cmd_buffer, finish_event, + // VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + } + + // Flush memory. + // TODO(benvanik): subrange. + VkMappedMemoryRange dirty_range; + dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + dirty_range.pNext = nullptr; + dirty_range.memory = transient_buffer_memory_; + dirty_range.offset = 0; + dirty_range.size = transient_capacity_; + vkFlushMappedMemoryRanges(device_, 1, &dirty_range); +} + +void BufferCache::InvalidateCache() { + // TODO(benvanik): caching. +} + +void BufferCache::ClearCache() { + // TODO(benvanik): caching. +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h new file mode 100644 index 000000000..661e30aa7 --- /dev/null +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -0,0 +1,111 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_ +#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_ + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Efficiently manages buffers of various kinds. +// Used primarily for uploading index and vertex data from guest memory and +// transient data like shader constants. +class BufferCache { + public: + BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, + size_t capacity); + ~BufferCache(); + + // Descriptor set containing the dynamic uniform buffer used for constant + // uploads. Used in conjunction with a dynamic offset returned by + // UploadConstantRegisters. + // The set contains two bindings: + // binding = 0: for use in vertex shaders + // binding = 1: for use in fragment shaders + VkDescriptorSet constant_descriptor_set() const { + return transient_descriptor_set_; + } + + // Uploads the constants specified in the register maps to the transient + // uniform storage buffer. + // The registers are tightly packed in order as [floats, ints, bools]. + // Returns an offset that can be used with the transient_descriptor_set or + // VK_WHOLE_SIZE if the constants could not be uploaded (OOM). + VkDeviceSize UploadConstantRegisters( + const Shader::ConstantRegisterMap& constant_register_map); + + // Uploads index buffer data from guest memory, possibly eliding with + // recently uploaded data or cached copies. + // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. + // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). + std::pair UploadIndexBuffer(const void* source_ptr, + size_t source_length, + IndexFormat format); + + // Uploads vertex buffer data from guest memory, possibly eliding with + // recently uploaded data or cached copies. + // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. + // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). + std::pair UploadVertexBuffer(const void* source_ptr, + size_t source_length); + + // Flushes all pending data to the GPU. + // Until this is called the GPU is not guaranteed to see any data. + // The given command buffer will be used to queue up events so that the + // cache can determine when data has been consumed. + void Flush(VkCommandBuffer command_buffer); + + // Marks the cache as potentially invalid. + // This is not as strong as ClearCache and is a hint that any and all data + // should be verified before being reused. + void InvalidateCache(); + + // Clears all cached content and prevents future elision with pending data. + void ClearCache(); + + private: + // Allocates a block of memory in the transient buffer. + // Returns VK_WHOLE_SIZE if requested amount of memory is not available. + VkDeviceSize AllocateTransientData(size_t alignment, size_t length); + + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; + + // Staging ringbuffer we cycle through fast. Used for data we don't + // plan on keeping past the current frame. + size_t transient_capacity_ = 0; + VkBuffer transient_uniform_buffer_ = nullptr; + VkBuffer transient_index_buffer_ = nullptr; + VkBuffer transient_vertex_buffer_ = nullptr; + VkDeviceMemory transient_buffer_memory_ = nullptr; + void* transient_buffer_data_ = nullptr; + + // Required alignemnts for our various types. + // All allocations must start at the appropriate alignment. + VkDeviceSize uniform_buffer_alignment_ = 0; + VkDeviceSize index_buffer_alignment_ = 0; + VkDeviceSize vertex_buffer_alignment_ = 0; + + VkDescriptorPool descriptor_pool_ = nullptr; + VkDescriptorSetLayout descriptor_set_layout_ = nullptr; + VkDescriptorSet transient_descriptor_set_ = nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc new file mode 100644 index 000000000..e09931833 --- /dev/null +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -0,0 +1,335 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/pipeline_cache.h" + +#include "third_party/xxhash/xxhash.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +PipelineCache::PipelineCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device) + : register_file_(register_file), device_(*device) {} + +PipelineCache::~PipelineCache() { + // Destroy all shaders. + for (auto it : shader_map_) { + delete it.second; + } + shader_map_.clear(); +} + +VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, + uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) { + // Hash the input memory and lookup the shader. + uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); + auto it = shader_map_.find(data_hash); + if (it != shader_map_.end()) { + // Shader has been previously loaded. + return it->second; + } + + // Always create the shader and stash it away. + // We need to track it even if it fails translation so we know not to try + // again. + VulkanShader* shader = + new VulkanShader(shader_type, data_hash, host_address, dword_count); + shader_map_.insert({data_hash, shader}); + + // Perform translation. + // If this fails the shader will be marked as invalid and ignored later. + if (!shader_translator_.Translate(shader)) { + XELOGE("Shader translation failed; marking shader as ignored"); + return shader; + } + + // Prepare the shader for use (creates our VkShaderModule). + // It could still fail at this point. + if (!shader->Prepare()) { + XELOGE("Shader preparation failed; marking shader as ignored"); + return shader; + } + + if (shader->is_valid()) { + XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s", + shader_type == ShaderType::kVertex ? "vertex" : "pixel", + guest_address, dword_count * 4, + shader->ucode_disassembly().c_str()); + } + + // Dump shader files if desired. + if (!FLAGS_dump_shaders.empty()) { + shader->Dump(FLAGS_dump_shaders, "vk"); + } + + return shader; +} + +bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, + VkRenderPass render_pass, + PrimitiveType primitive_type) { + return false; +} + +void PipelineCache::ClearCache() { + // TODO(benvanik): caching. +} + +bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { + float value = register_file_->values[register_name].f32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateShaders( + PrimitiveType prim_type) { + auto& regs = update_shaders_regs_; + + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ. + assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == + 0x000FF000 || + register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == + 0x000FF100 || + register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); + // dirty |= regs.vertex_shader != active_vertex_shader_; + // dirty |= regs.pixel_shader != active_pixel_shader_; + dirty |= regs.prim_type != prim_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + // regs.vertex_shader = static_cast(active_vertex_shader_); + // regs.pixel_shader = static_cast(active_pixel_shader_); + regs.prim_type = prim_type; + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() { + auto& regs = update_render_targets_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateState( + PrimitiveType prim_type) { + bool mismatch = false; + +#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + XELOGE(error_message); \ + return status; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } + + UpdateStatus status; + status = UpdateViewportState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); + status = UpdateRasterizerState(prim_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); + status = UpdateBlendState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); + status = UpdateDepthStencilState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() { + auto& regs = update_viewport_state_regs_; + + bool dirty = false; + // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl, + // XE_GPU_REG_PA_CL_CLIP_CNTL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.pa_sc_window_offset, + XE_GPU_REG_PA_SC_WINDOW_OFFSET); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, + XE_GPU_REG_PA_CL_VPORT_XOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, + XE_GPU_REG_PA_CL_VPORT_YOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, + XE_GPU_REG_PA_CL_VPORT_ZOFFSET); + dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, + XE_GPU_REG_PA_CL_VPORT_XSCALE); + dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, + XE_GPU_REG_PA_CL_VPORT_YSCALE); + dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, + XE_GPU_REG_PA_CL_VPORT_ZSCALE); + + // Much of this state machine is extracted from: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to + // get 1/W0. + // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f, + // (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f, + // (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f); + + // Done in VS, no need to flush state. + // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { + // draw_batcher_.set_window_scalar(1.0f, 1.0f); + //} else { + // draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); + //} + + if (!dirty) { + return UpdateStatus::kCompatible; + } + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( + PrimitiveType prim_type) { + auto& regs = update_rasterizer_state_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, + XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, + XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); + dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, + XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + dirty |= regs.prim_type != prim_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + + regs.prim_type = prim_type; + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { + auto& reg_file = *register_file_; + auto& regs = update_blend_state_regs_; + + // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE + // Deprecated in GL, implemented in shader. + // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; + // uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32; + // draw_batcher_.set_alpha_test((color_control & 0x4) != 0, // + // ALPAHTESTENABLE + // color_control & 0x7, // ALPHAFUNC + // reg_file[XE_GPU_REG_RB_ALPHA_REF].f32); + + bool dirty = false; + dirty |= + SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); + dirty |= + SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); + dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); + dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); + dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); + dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { + auto& regs = update_depth_stencil_state_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h new file mode 100644 index 000000000..56727e67a --- /dev/null +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -0,0 +1,167 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ +#define XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ + +#include + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/spirv/spirv_disassembler.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Configures and caches pipelines based on render state. +// This is responsible for properly setting all state required for a draw +// including shaders, various blend/etc options, and input configuration. +class PipelineCache { + public: + PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + ~PipelineCache(); + + // Loads a shader from the cache, possibly translating it. + VulkanShader* LoadShader(ShaderType shader_type, uint32_t guest_address, + const uint32_t* host_address, uint32_t dword_count); + + // Configures a pipeline using the current render state and the given render + // pass. If a previously available pipeline is available it will be used, + // otherwise a new one may be created. Any state that can be set dynamically + // in the command buffer is issued at this time. + // Returns whether the pipeline could be successfully created. + bool ConfigurePipeline(VkCommandBuffer command_buffer, + VkRenderPass render_pass, + PrimitiveType primitive_type); + + // Currently configured pipeline layout, if any. + VkPipelineLayout current_pipeline_layout() const { return nullptr; } + // Currently configured vertex shader, if any. + VulkanShader* current_vertex_shader() const { return nullptr; } + // Currently configured pixel shader, if any. + VulkanShader* current_pixel_shader() const { return nullptr; } + + // Clears all cached content. + void ClearCache(); + + private: + // TODO(benvanik): geometry shader cache. + // TODO(benvanik): translated shader cache. + // TODO(benvanik): pipeline layouts. + // TODO(benvanik): pipeline cache. + + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; + + SpirvShaderTranslator shader_translator_; + xe::ui::spirv::SpirvDisassembler disassembler_; + // All loaded shaders mapped by their guest hash key. + std::unordered_map shader_map_; + + private: + enum class UpdateStatus { + kCompatible, + kMismatch, + kError, + }; + + UpdateStatus UpdateShaders(PrimitiveType prim_type); + UpdateStatus UpdateRenderTargets(); + UpdateStatus UpdateState(PrimitiveType prim_type); + UpdateStatus UpdateViewportState(); + UpdateStatus UpdateRasterizerState(PrimitiveType prim_type); + UpdateStatus UpdateBlendState(); + UpdateStatus UpdateDepthStencilState(); + + bool SetShadowRegister(uint32_t* dest, uint32_t register_name); + bool SetShadowRegister(float* dest, uint32_t register_name); + + struct UpdateRenderTargetsRegisters { + uint32_t rb_modecontrol; + uint32_t rb_surface_info; + uint32_t rb_color_info; + uint32_t rb_color1_info; + uint32_t rb_color2_info; + uint32_t rb_color3_info; + uint32_t rb_color_mask; + uint32_t rb_depthcontrol; + uint32_t rb_stencilrefmask; + uint32_t rb_depth_info; + + UpdateRenderTargetsRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_render_targets_regs_; + struct UpdateViewportStateRegisters { + // uint32_t pa_cl_clip_cntl; + uint32_t rb_surface_info; + uint32_t pa_cl_vte_cntl; + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_window_offset; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + float pa_cl_vport_xoffset; + float pa_cl_vport_yoffset; + float pa_cl_vport_zoffset; + float pa_cl_vport_xscale; + float pa_cl_vport_yscale; + float pa_cl_vport_zscale; + + UpdateViewportStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_viewport_state_regs_; + struct UpdateRasterizerStateRegisters { + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_screen_scissor_tl; + uint32_t pa_sc_screen_scissor_br; + uint32_t multi_prim_ib_reset_index; + PrimitiveType prim_type; + + UpdateRasterizerStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_rasterizer_state_regs_; + struct UpdateBlendStateRegisters { + uint32_t rb_blendcontrol[4]; + float rb_blend_rgba[4]; + + UpdateBlendStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_blend_state_regs_; + struct UpdateDepthStencilStateRegisters { + uint32_t rb_depthcontrol; + uint32_t rb_stencilrefmask; + + UpdateDepthStencilStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_depth_stencil_state_regs_; + struct UpdateShadersRegisters { + PrimitiveType prim_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t sq_program_cntl; + uint32_t sq_context_misc; + VulkanShader* vertex_shader; + VulkanShader* pixel_shader; + + UpdateShadersRegisters() { Reset(); } + void Reset() { + sq_program_cntl = 0; + vertex_shader = pixel_shader = nullptr; + } + } update_shaders_regs_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc new file mode 100644 index 000000000..fef05f11f --- /dev/null +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -0,0 +1,45 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/render_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +RenderCache::RenderCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device) + : register_file_(register_file), device_(*device) {} + +RenderCache::~RenderCache() = default; + +VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer) { + return nullptr; +} + +void RenderCache::EndRenderPass() { + // +} + +void RenderCache::ClearCache() { + // TODO(benvanik): caching. +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h new file mode 100644 index 000000000..fb7c84e6a --- /dev/null +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -0,0 +1,46 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_ +#define XENIA_GPU_VULKAN_RENDER_CACHE_H_ + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Configures and caches pipelines based on render state. +// This is responsible for properly setting all state required for a draw +// including shaders, various blend/etc options, and input configuration. +class RenderCache { + public: + RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + ~RenderCache(); + + VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer); + void EndRenderPass(); + + // Clears all cached content. + void ClearCache(); + + private: + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc new file mode 100644 index 000000000..bf95ef6a4 --- /dev/null +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -0,0 +1,37 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/texture_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +TextureCache::TextureCache(RegisterFile* register_file, + ui::vulkan::VulkanDevice* device) + : register_file_(register_file), device_(*device) {} + +TextureCache::~TextureCache() = default; + +void TextureCache::ClearCache() { + // TODO(benvanik): caching. +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h new file mode 100644 index 000000000..3545fb72d --- /dev/null +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -0,0 +1,47 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ +#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Configures and caches pipelines based on render state. +// This is responsible for properly setting all state required for a draw +// including shaders, various blend/etc options, and input configuration. +class TextureCache { + public: + TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + ~TextureCache(); + + // TODO(benvanik): UploadTexture. + // TODO(benvanik): Resolve. + // TODO(benvanik): ReadTexture. + + // Clears all cached content. + void ClearCache(); + + private: + RegisterFile* register_file_ = nullptr; + VkDevice device_ = nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 3320d2927..6490de44a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -20,12 +20,16 @@ #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { namespace vulkan { using namespace xe::gpu::xenos; +using xe::ui::vulkan::CheckResult; + +constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024; VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) @@ -33,7 +37,14 @@ VulkanCommandProcessor::VulkanCommandProcessor( VulkanCommandProcessor::~VulkanCommandProcessor() = default; -void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); } +void VulkanCommandProcessor::ClearCaches() { + CommandProcessor::ClearCaches(); + + buffer_cache_->ClearCache(); + pipeline_cache_->ClearCache(); + render_cache_->ClearCache(); + texture_cache_->ClearCache(); +} bool VulkanCommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { @@ -41,10 +52,47 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + // Acquire our device and queue. + auto context = static_cast(context_.get()); + device_ = context->device(); + queue_ = device_->AcquireQueue(); + if (!queue_) { + // Need to reuse primary queue (with locks). + queue_ = device_->primary_queue(); + queue_mutex_ = &device_->primary_queue_mutex(); + } + + // Setup fenced pools used for all our per-frame/per-draw resources. + command_buffer_pool_ = std::make_unique( + *device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + // Initialize the state machine caches. + buffer_cache_ = std::make_unique(register_file_, device_, + kDefaultBufferCacheCapacity); + pipeline_cache_ = std::make_unique(register_file_, device_); + render_cache_ = std::make_unique(register_file_, device_); + texture_cache_ = std::make_unique(register_file_, device_); + return true; } void VulkanCommandProcessor::ShutdownContext() { + // TODO(benvanik): wait until idle. + + buffer_cache_.reset(); + pipeline_cache_.reset(); + render_cache_.reset(); + texture_cache_.reset(); + + // Free all pools. This must come after all of our caches clean up. + command_buffer_pool_.reset(); + + // Release queue, if were using an acquired one. + if (!queue_mutex_) { + device_->ReleaseQueue(queue_); + queue_ = nullptr; + } + CommandProcessor::ShutdownContext(); } @@ -55,7 +103,8 @@ void VulkanCommandProcessor::MakeCoherent() { CommandProcessor::MakeCoherent(); if (status_host & 0x80000000ul) { - // scratch_buffer_.ClearCache(); + // TODO(benvanik): less-fine-grained clearing. + buffer_cache_->InvalidateCache(); } } @@ -103,346 +152,167 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - // return shader_cache_.LookupOrInsertShader(shader_type, host_address, - // dword_count); - return nullptr; + return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, + dword_count); } -bool VulkanCommandProcessor::IssueDraw(PrimitiveType prim_type, +bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) { + auto& regs = *register_file_; + #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - // Skip all drawing for now - what did you expect? :) - return true; - - bool draw_valid = false; - // if (index_buffer_info) { - // draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count, - // index_buffer_info->format); - //} else { - // draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count); - //} - if (!draw_valid) { - return false; - } - - auto& regs = *register_file_; - auto enable_mode = static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); if (enable_mode == ModeControl::kIgnore) { // Ignored. - // draw_batcher_.DiscardDraw(); return true; } else if (enable_mode == ModeControl::kCopy) { // Special copy handling. - // draw_batcher_.DiscardDraw(); return IssueCopy(); } -#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - /*draw_batcher_.DiscardDraw(); */ \ - return false; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } + // TODO(benvanik): bigger batches. + command_buffer_pool_->BeginBatch(); + VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry(); + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); + CheckResult(err, "vkBeginCommandBuffer"); - UpdateStatus status; - bool mismatch = false; - status = UpdateShaders(prim_type); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders"); - status = UpdateRenderTargets(); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets"); - // if (!active_framebuffer_) { - // // No framebuffer, so nothing we do will actually have an effect. - // // Treat it as a no-op. - // // TODO(benvanik): if we have a vs export, still allow it to go. - // draw_batcher_.DiscardDraw(); - // return true; - //} - - status = UpdateState(prim_type); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state"); - status = PopulateSamplers(); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, - "Unable to prepare draw samplers"); - - status = PopulateIndexBuffer(index_buffer_info); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer"); - status = PopulateVertexBuffers(); - CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers"); - - // if (!draw_batcher_.CommitDraw()) { - // return false; - //} - - // draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent); - if (context_->WasLost()) { - // This draw lost us the context. This typically isn't hit. - assert_always(); + // Begin the render pass. + // This will setup our framebuffer and begin the pass in the command buffer. + VkRenderPass render_pass = render_cache_->BeginRenderPass(command_buffer); + if (!render_pass) { return false; } + // Configure the pipeline for drawing. + // This encodes all render state (blend, depth, etc), our shader stages, + // and our vertex input layout. + if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass, + primitive_type)) { + render_cache_->EndRenderPass(); + return false; + } + + // Upload the constants the shaders require. + auto vertex_shader = pipeline_cache_->current_vertex_shader(); + auto pixel_shader = pipeline_cache_->current_pixel_shader(); + auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters( + vertex_shader->constant_register_map()); + auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters( + pixel_shader->constant_register_map()); + if (vertex_constant_offset == VK_WHOLE_SIZE || + pixel_constant_offset == VK_WHOLE_SIZE) { + render_cache_->EndRenderPass(); + return false; + } + + // Configure constant uniform access to point at our offsets. + auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); + auto pipeline_layout = pipeline_cache_->current_pipeline_layout(); + uint32_t constant_offsets[2] = {static_cast(vertex_constant_offset), + static_cast(pixel_constant_offset)}; + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout, 0, 1, &constant_descriptor_set, + static_cast(xe::countof(constant_offsets)), + constant_offsets); + + // Upload and bind index buffer data (if we have any). + if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { + render_cache_->EndRenderPass(); + return false; + } + + // Upload and bind all vertex buffer data. + if (!PopulateVertexBuffers(command_buffer, vertex_shader)) { + render_cache_->EndRenderPass(); + return false; + } + + // Upload and set descriptors for all textures. + if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) { + render_cache_->EndRenderPass(); + return false; + } + +#if 0 + // Actually issue the draw. + if (!index_buffer_info) { + // Auto-indexed draw. + uint32_t instance_count = 1; + uint32_t first_vertex = 0; + uint32_t first_instance = 0; + vkCmdDraw(command_buffer, index_count, instance_count, first_vertex, + first_instance); + } else { + // Index buffer draw. + uint32_t instance_count = 1; + uint32_t first_index = + register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32; + uint32_t vertex_offset = 0; + uint32_t first_instance = 0; + vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index, + vertex_offset, first_instance); + } +#endif + + // End the rendering pass. + render_cache_->EndRenderPass(); + + // TODO(benvanik): bigger batches. + err = vkEndCommandBuffer(command_buffer); + CheckResult(err, "vkEndCommandBuffer"); + VkFence fence; + VkFenceCreateInfo fence_info; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.pNext = nullptr; + fence_info.flags = 0; + vkCreateFence(*device_, &fence_info, nullptr, &fence); + command_buffer_pool_->EndBatch(fence); + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = nullptr; + if (queue_mutex_) { + queue_mutex_->lock(); + } + err = vkQueueSubmit(queue_, 1, &submit_info, fence); + if (queue_mutex_) { + queue_mutex_->unlock(); + } + CheckResult(err, "vkQueueSubmit"); + if (queue_mutex_) { + queue_mutex_->lock(); + } + vkQueueWaitIdle(queue_); + if (queue_mutex_) { + queue_mutex_->unlock(); + } + command_buffer_pool_->Scavenge(); + vkDestroyFence(*device_, fence, nullptr); + return true; } -bool VulkanCommandProcessor::SetShadowRegister(uint32_t* dest, - uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -bool VulkanCommandProcessor::SetShadowRegister(float* dest, - uint32_t register_name) { - float value = register_file_->values[register_name].f32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateShaders( - PrimitiveType prim_type) { - auto& regs = update_shaders_regs_; - - // These are the constant base addresses/ranges for shaders. - // We have these hardcoded right now cause nothing seems to differ. - assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == - 0x000FF000 || - register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == - 0x000FF100 || - register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); - dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); - dirty |= regs.vertex_shader != active_vertex_shader_; - dirty |= regs.pixel_shader != active_pixel_shader_; - dirty |= regs.prim_type != prim_type; - if (!dirty) { - return UpdateStatus::kCompatible; - } - regs.vertex_shader = static_cast(active_vertex_shader_); - regs.pixel_shader = static_cast(active_pixel_shader_); - regs.prim_type = prim_type; - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateRenderTargets() { - auto& regs = update_render_targets_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); - dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); - dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); - dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateState( - PrimitiveType prim_type) { - bool mismatch = false; - -#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - return status; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } - - UpdateStatus status; - status = UpdateViewportState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); - status = UpdateRasterizerState(prim_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); - status = UpdateBlendState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); - status = UpdateDepthStencilState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); - - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateViewportState() { - auto& regs = update_viewport_state_regs_; - - bool dirty = false; - // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl, - // XE_GPU_REG_PA_CL_CLIP_CNTL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_window_offset, - XE_GPU_REG_PA_SC_WINDOW_OFFSET); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, - XE_GPU_REG_PA_CL_VPORT_XOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, - XE_GPU_REG_PA_CL_VPORT_YOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, - XE_GPU_REG_PA_CL_VPORT_ZOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, - XE_GPU_REG_PA_CL_VPORT_XSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, - XE_GPU_REG_PA_CL_VPORT_YSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, - XE_GPU_REG_PA_CL_VPORT_ZSCALE); - - // Much of this state machine is extracted from: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. - // = false: multiply the X, Y coordinates by 1/W0. - // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. - // = false: multiply the Z coordinate by 1/W0. - // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to - // get 1/W0. - // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f); - - // Done in VS, no need to flush state. - // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { - // draw_batcher_.set_window_scalar(1.0f, 1.0f); - //} else { - // draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); - //} - - if (!dirty) { - return UpdateStatus::kCompatible; - } - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateRasterizerState(PrimitiveType prim_type) { - auto& regs = update_rasterizer_state_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); - dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, - XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - dirty |= regs.prim_type != prim_type; - if (!dirty) { - return UpdateStatus::kCompatible; - } - - regs.prim_type = prim_type; - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateBlendState() { - auto& reg_file = *register_file_; - auto& regs = update_blend_state_regs_; - - // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE - // Deprecated in GL, implemented in shader. - // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; - // uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32; - // draw_batcher_.set_alpha_test((color_control & 0x4) != 0, // - // ALPAHTESTENABLE - // color_control & 0x7, // ALPHAFUNC - // reg_file[XE_GPU_REG_RB_ALPHA_REF].f32); - - bool dirty = false; - dirty |= - SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); - dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); - dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); - dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); - dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::UpdateDepthStencilState() { - auto& regs = update_depth_stencil_state_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::PopulateIndexBuffer( - IndexBufferInfo* index_buffer_info) { +bool VulkanCommandProcessor::PopulateIndexBuffer( + VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) { auto& regs = *register_file_; if (!index_buffer_info || !index_buffer_info->guest_base) { // No index buffer or auto draw. - return UpdateStatus::kCompatible; + return true; } auto& info = *index_buffer_info; @@ -462,19 +332,44 @@ VulkanCommandProcessor::PopulateIndexBuffer( trace_writer_.WriteMemoryRead(info.guest_base, info.length); - return UpdateStatus::kCompatible; + // Upload (or get a cached copy of) the buffer. + const void* source_ptr = + memory_->TranslatePhysical(info.guest_base); + size_t source_length = + info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) + : sizeof(uint16_t)); + auto buffer_ref = + buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format); + if (buffer_ref.second == VK_WHOLE_SIZE) { + // Failed to upload buffer. + return false; + } + + // Bind the buffer. + VkIndexType index_type = info.format == IndexFormat::kInt32 + ? VK_INDEX_TYPE_UINT32 + : VK_INDEX_TYPE_UINT16; + vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second, + index_type); + + return true; } -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::PopulateVertexBuffers() { +bool VulkanCommandProcessor::PopulateVertexBuffers( + VkCommandBuffer command_buffer, VulkanShader* vertex_shader) { + auto& regs = *register_file_; + #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - auto& regs = *register_file_; - assert_not_null(active_vertex_shader_); + auto& vertex_bindings = vertex_shader->vertex_bindings(); + assert_true(vertex_bindings.size() <= 32); + VkBuffer all_buffers[32]; + VkDeviceSize all_buffer_offsets[32]; + uint32_t buffer_index = 0; - for (const auto& vertex_binding : active_vertex_shader_->vertex_bindings()) { + for (const auto& vertex_binding : vertex_bindings) { int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (vertex_binding.fetch_constant / 3) * 6; const auto group = reinterpret_cast(®s.values[r]); @@ -492,58 +387,72 @@ VulkanCommandProcessor::PopulateVertexBuffers() { } assert_true(fetch->endian == 2); + // TODO(benvanik): compute based on indices or vertex count. + // THIS CAN BE MASSIVELY INCORRECT (too large). size_t valid_range = size_t(fetch->size * 4); trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range); + + // Upload (or get a cached copy of) the buffer. + const void* source_ptr = + memory_->TranslatePhysical(fetch->address << 2); + size_t source_length = valid_range; + auto buffer_ref = + buffer_cache_->UploadVertexBuffer(source_ptr, source_length); + if (buffer_ref.second == VK_WHOLE_SIZE) { + // Failed to upload buffer. + return false; + } + + // Stash the buffer reference for our bulk bind at the end. + all_buffers[buffer_index] = buffer_ref.first; + all_buffer_offsets[buffer_index] = buffer_ref.second; + ++buffer_index; } - return UpdateStatus::kCompatible; + // Bind buffers. + vkCmdBindVertexBuffers(command_buffer, 0, buffer_index, all_buffers, + all_buffer_offsets); + + return true; } -VulkanCommandProcessor::UpdateStatus -VulkanCommandProcessor::PopulateSamplers() { +bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - bool mismatch = false; + bool any_failed = false; // VS and PS samplers are shared, but may be used exclusively. // We walk each and setup lazily. bool has_setup_sampler[32] = {false}; // Vertex texture samplers. - for (auto& texture_binding : active_vertex_shader_->texture_bindings()) { + for (auto& texture_binding : vertex_shader->texture_bindings()) { if (has_setup_sampler[texture_binding.fetch_constant]) { continue; } has_setup_sampler[texture_binding.fetch_constant] = true; - auto status = PopulateSampler(texture_binding); - if (status == UpdateStatus::kError) { - return status; - } else if (status == UpdateStatus::kMismatch) { - mismatch = true; - } + any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed; } // Pixel shader texture sampler. - for (auto& texture_binding : active_pixel_shader_->texture_bindings()) { + for (auto& texture_binding : pixel_shader->texture_bindings()) { if (has_setup_sampler[texture_binding.fetch_constant]) { continue; } has_setup_sampler[texture_binding.fetch_constant] = true; - auto status = PopulateSampler(texture_binding); - if (status == UpdateStatus::kError) { - return UpdateStatus::kError; - } else if (status == UpdateStatus::kMismatch) { - mismatch = true; - } + any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed; } - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; + return !any_failed; } -VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler( +bool VulkanCommandProcessor::PopulateSampler( + VkCommandBuffer command_buffer, const Shader::TextureBinding& texture_binding) { auto& regs = *register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + @@ -553,30 +462,34 @@ VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler( // ? if (!fetch.type) { - return UpdateStatus::kCompatible; + return true; } assert_true(fetch.type == 0x2); TextureInfo texture_info; if (!TextureInfo::Prepare(fetch, &texture_info)) { XELOGE("Unable to parse texture fetcher info"); - return UpdateStatus::kCompatible; // invalid texture used + return true; // invalid texture used } SamplerInfo sampler_info; if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr, &sampler_info)) { XELOGE("Unable to parse sampler info"); - return UpdateStatus::kCompatible; // invalid texture used + return true; // invalid texture used } trace_writer_.WriteMemoryRead(texture_info.guest_address, texture_info.input_length); - return UpdateStatus::kCompatible; + // TODO(benvanik): texture cache lookup. + // TODO(benvanik): bind or return so PopulateSamplers can batch. + + return true; } bool VulkanCommandProcessor::IssueCopy() { SCOPE_profile_cpu_f("gpu"); + // TODO(benvanik): resolve. return true; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index babbc9ab3..c350f77b0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -23,12 +23,17 @@ #include "xenia/base/threading.h" #include "xenia/gpu/command_processor.h" #include "xenia/gpu/register_file.h" -#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/buffer_cache.h" +#include "xenia/gpu/vulkan/pipeline_cache.h" +#include "xenia/gpu/vulkan/render_cache.h" +#include "xenia/gpu/vulkan/texture_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/xthread.h" #include "xenia/memory.h" +#include "xenia/ui/vulkan/fenced_pools.h" #include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_device.h" namespace xe { namespace gpu { @@ -45,12 +50,6 @@ class VulkanCommandProcessor : public CommandProcessor { void ClearCaches() override; private: - enum class UpdateStatus { - kCompatible, - kMismatch, - kError, - }; - bool SetupContext() override; void ShutdownContext() override; @@ -65,97 +64,35 @@ class VulkanCommandProcessor : public CommandProcessor { const uint32_t* host_address, uint32_t dword_count) override; - bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, + bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; - UpdateStatus UpdateShaders(PrimitiveType prim_type); - UpdateStatus UpdateRenderTargets(); - UpdateStatus UpdateState(PrimitiveType prim_type); - UpdateStatus UpdateViewportState(); - UpdateStatus UpdateRasterizerState(PrimitiveType prim_type); - UpdateStatus UpdateBlendState(); - UpdateStatus UpdateDepthStencilState(); - UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info); - UpdateStatus PopulateVertexBuffers(); - UpdateStatus PopulateSamplers(); - UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding); + bool PopulateIndexBuffer(VkCommandBuffer command_buffer, + IndexBufferInfo* index_buffer_info); + bool PopulateVertexBuffers(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader); + bool PopulateSamplers(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); + bool PopulateSampler(VkCommandBuffer command_buffer, + const Shader::TextureBinding& texture_binding); bool IssueCopy() override; - SpirvShaderTranslator shader_translator_; + xe::ui::vulkan::VulkanDevice* device_ = nullptr; - private: - bool SetShadowRegister(uint32_t* dest, uint32_t register_name); - bool SetShadowRegister(float* dest, uint32_t register_name); - struct UpdateRenderTargetsRegisters { - uint32_t rb_modecontrol; - uint32_t rb_surface_info; - uint32_t rb_color_info; - uint32_t rb_color1_info; - uint32_t rb_color2_info; - uint32_t rb_color3_info; - uint32_t rb_color_mask; - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - uint32_t rb_depth_info; + // TODO(benvanik): abstract behind context? + // Queue used to submit work. This may be a dedicated queue for the command + // processor and no locking will be required for use. If a dedicated queue + // was not available this will be the device primary_queue and the + // queue_mutex must be used to synchronize access to it. + VkQueue queue_ = nullptr; + std::mutex* queue_mutex_ = nullptr; - UpdateRenderTargetsRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_render_targets_regs_; - struct UpdateViewportStateRegisters { - // uint32_t pa_cl_clip_cntl; - uint32_t rb_surface_info; - uint32_t pa_cl_vte_cntl; - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_window_offset; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - float pa_cl_vport_xoffset; - float pa_cl_vport_yoffset; - float pa_cl_vport_zoffset; - float pa_cl_vport_xscale; - float pa_cl_vport_yscale; - float pa_cl_vport_zscale; + std::unique_ptr buffer_cache_; + std::unique_ptr pipeline_cache_; + std::unique_ptr render_cache_; + std::unique_ptr texture_cache_; - UpdateViewportStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_viewport_state_regs_; - struct UpdateRasterizerStateRegisters { - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_screen_scissor_tl; - uint32_t pa_sc_screen_scissor_br; - uint32_t multi_prim_ib_reset_index; - PrimitiveType prim_type; - - UpdateRasterizerStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_rasterizer_state_regs_; - struct UpdateBlendStateRegisters { - uint32_t rb_blendcontrol[4]; - float rb_blend_rgba[4]; - - UpdateBlendStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_blend_state_regs_; - struct UpdateDepthStencilStateRegisters { - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - - UpdateDepthStencilStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_depth_stencil_state_regs_; - struct UpdateShadersRegisters { - PrimitiveType prim_type; - uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; - uint32_t sq_context_misc; - VulkanShader* vertex_shader; - VulkanShader* pixel_shader; - - UpdateShadersRegisters() { Reset(); } - void Reset() { - sq_program_cntl = 0; - vertex_shader = pixel_shader = nullptr; - } - } update_shaders_regs_; + std::unique_ptr command_buffer_pool_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/fenced_pools.cc b/src/xenia/ui/vulkan/fenced_pools.cc new file mode 100644 index 000000000..0737b98c4 --- /dev/null +++ b/src/xenia/ui/vulkan/fenced_pools.cc @@ -0,0 +1,81 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/fenced_pools.h" + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace ui { +namespace vulkan { + +using xe::ui::vulkan::CheckResult; + +CommandBufferPool::CommandBufferPool(VkDevice device, + uint32_t queue_family_index, + VkCommandBufferLevel level) + : BaseFencedPool(device), level_(level) { + // Create the pool used for allocating buffers. + // They are marked as transient (short-lived) and cycled frequently. + VkCommandPoolCreateInfo cmd_pool_info; + cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmd_pool_info.pNext = nullptr; + cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + cmd_pool_info.queueFamilyIndex = queue_family_index; + auto err = + vkCreateCommandPool(device_, &cmd_pool_info, nullptr, &command_pool_); + CheckResult(err, "vkCreateCommandPool"); + + // Allocate a bunch of command buffers to start. + constexpr uint32_t kDefaultCount = 32; + VkCommandBufferAllocateInfo command_buffer_info; + command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_info.pNext = nullptr; + command_buffer_info.commandPool = command_pool_; + command_buffer_info.level = level; + command_buffer_info.commandBufferCount = kDefaultCount; + VkCommandBuffer command_buffers[kDefaultCount]; + err = + vkAllocateCommandBuffers(device_, &command_buffer_info, command_buffers); + CheckResult(err, "vkCreateCommandBuffer"); + for (size_t i = 0; i < xe::countof(command_buffers); ++i) { + PushEntry(command_buffers[i]); + } +} + +CommandBufferPool::~CommandBufferPool() { + vkDestroyCommandPool(device_, command_pool_, nullptr); + command_pool_ = nullptr; +} + +VkCommandBuffer CommandBufferPool::AllocateEntry() { + // TODO(benvanik): allocate a bunch at once? + VkCommandBufferAllocateInfo command_buffer_info; + command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_info.pNext = nullptr; + command_buffer_info.commandPool = command_pool_; + command_buffer_info.level = level_; + command_buffer_info.commandBufferCount = 1; + VkCommandBuffer command_buffer; + auto err = + vkAllocateCommandBuffers(device_, &command_buffer_info, &command_buffer); + CheckResult(err, "vkCreateCommandBuffer"); + return command_buffer; +} + +void CommandBufferPool::FreeEntry(VkCommandBuffer handle) { + vkFreeCommandBuffers(device_, command_pool_, 1, &handle); +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h new file mode 100644 index 000000000..52274a9d9 --- /dev/null +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -0,0 +1,200 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_ +#define XENIA_UI_VULKAN_FENCED_POOLS_H_ + +#include + +#include "xenia/base/assert.h" +#include "xenia/ui/vulkan/vulkan.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// Simple pool for Vulkan homogenous objects that cannot be reused while +// in-flight. +// It batches pooled objects into groups and uses a vkQueueSubmit fence to +// indicate their availability. If no objects are free when one is requested +// the caller is expected to create them. +template +class BaseFencedPool { + public: + BaseFencedPool(VkDevice device) : device_(device) {} + + virtual ~BaseFencedPool() { + // TODO(benvanik): wait on fence until done. + assert_null(pending_batch_list_head_); + + // Run down free lists. + while (free_batch_list_head_) { + auto batch = free_batch_list_head_; + free_batch_list_head_ = batch->next; + delete batch; + } + while (free_entry_list_head_) { + auto entry = free_entry_list_head_; + free_entry_list_head_ = entry->next; + static_cast(this)->FreeEntry(entry->handle); + delete entry; + } + } + + // Checks all pending batches for completion and scavenges their entries. + // This should be called as frequently as reasonable. + void Scavenge() { + while (pending_batch_list_head_) { + auto batch = pending_batch_list_head_; + if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) { + // Batch has completed. Reclaim. + pending_batch_list_head_ = batch->next; + if (batch == pending_batch_list_tail_) { + pending_batch_list_tail_ = nullptr; + } + batch->next = free_batch_list_head_; + free_batch_list_head_ = batch; + batch->entry_list_tail->next = free_entry_list_head_; + free_entry_list_head_ = batch->entry_list_head; + batch->entry_list_head = nullptr; + batch->entry_list_tail = nullptr; + } else { + // Batch is still in-flight. Since batches are executed in order we know + // no others after it could have completed, so early-exit. + return; + } + } + } + + // Begins a new batch. + // All entries acquired within this batch will be marked as in-use until + // the fence specified in EndBatch is signalled. + void BeginBatch() { + assert_null(open_batch_); + Batch* batch = nullptr; + if (free_batch_list_head_) { + // Reuse a batch. + batch = free_batch_list_head_; + free_batch_list_head_ = batch->next; + batch->next = nullptr; + } else { + // Allocate new batch. + batch = new Batch(); + batch->next = nullptr; + } + batch->entry_list_head = nullptr; + batch->entry_list_tail = nullptr; + batch->fence = nullptr; + open_batch_ = batch; + } + + // Attempts to acquire an entry from the pool in the current batch. + // If none are available a new one will be allocated. + HANDLE AcquireEntry() { + Entry* entry = nullptr; + if (free_entry_list_head_) { + // Slice off an entry from the free list. + entry = free_entry_list_head_; + free_entry_list_head_ = entry->next; + } else { + // No entry available; allocate new. + entry = new Entry(); + entry->handle = static_cast(this)->AllocateEntry(); + } + entry->next = nullptr; + if (!open_batch_->entry_list_head) { + open_batch_->entry_list_head = entry; + } + if (open_batch_->entry_list_tail) { + open_batch_->entry_list_tail->next = entry; + } + open_batch_->entry_list_tail = entry; + return entry->handle; + } + + // Ends the current batch using the given fence to indicate when the batch + // has completed execution on the GPU. + void EndBatch(VkFence fence) { + assert_not_null(open_batch_); + + // Close and see if we have anything. + auto batch = open_batch_; + open_batch_ = nullptr; + if (!batch->entry_list_head) { + // Nothing to do. + batch->next = free_batch_list_head_; + free_batch_list_head_ = batch; + return; + } + + // Track the fence. + batch->fence = fence; + + // Append to the end of the batch list. + batch->next = nullptr; + if (!pending_batch_list_head_) { + pending_batch_list_head_ = batch; + } + if (pending_batch_list_tail_) { + pending_batch_list_tail_->next = batch; + } else { + pending_batch_list_tail_ = batch; + } + } + + protected: + void PushEntry(HANDLE handle) { + auto entry = new Entry(); + entry->next = free_entry_list_head_; + entry->handle = handle; + free_entry_list_head_ = entry; + } + + VkDevice device_ = nullptr; + + private: + struct Entry { + Entry* next; + HANDLE handle; + }; + struct Batch { + Batch* next; + Entry* entry_list_head; + Entry* entry_list_tail; + VkFence fence; + }; + + Batch* free_batch_list_head_ = nullptr; + Entry* free_entry_list_head_ = nullptr; + Batch* pending_batch_list_head_ = nullptr; + Batch* pending_batch_list_tail_ = nullptr; + Batch* open_batch_ = nullptr; +}; + +class CommandBufferPool + : public BaseFencedPool { + public: + CommandBufferPool(VkDevice device, uint32_t queue_family_index, + VkCommandBufferLevel level); + ~CommandBufferPool() override; + + protected: + friend class BaseFencedPool; + VkCommandBuffer AllocateEntry(); + void FreeEntry(VkCommandBuffer handle); + + VkCommandPool command_pool_ = nullptr; + VkCommandBufferLevel level_ = VK_COMMAND_BUFFER_LEVEL_PRIMARY; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_ From 3bf1c91eaa981ab070024b8fbc4a63c939c2477f Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 16:52:52 -0800 Subject: [PATCH 012/145] Those are really pointers - need those bits. --- src/xenia/ui/imgui_drawer.cc | 2 +- src/xenia/ui/imgui_drawer.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/ui/imgui_drawer.cc b/src/xenia/ui/imgui_drawer.cc index 87284ad82..62d3c1d6b 100644 --- a/src/xenia/ui/imgui_drawer.cc +++ b/src/xenia/ui/imgui_drawer.cc @@ -208,7 +208,7 @@ void ImGuiDrawer::RenderDrawLists(ImDrawData* data) { draw.count = cmd.ElemCount; draw.index_offset = index_offset; draw.texture_handle = - reinterpret_cast(cmd.TextureId) & 0xFFFFFFFF; + reinterpret_cast(cmd.TextureId) & ~kIgnoreAlpha; draw.alpha_blend = reinterpret_cast(cmd.TextureId) & kIgnoreAlpha ? false : true; diff --git a/src/xenia/ui/imgui_drawer.h b/src/xenia/ui/imgui_drawer.h index f6a58f8f0..d66edef7d 100644 --- a/src/xenia/ui/imgui_drawer.h +++ b/src/xenia/ui/imgui_drawer.h @@ -35,7 +35,7 @@ class ImGuiDrawer : public WindowListener { ImGuiIO& GetIO(); - static const uint64_t kIgnoreAlpha = (1ull << 32); + static const uint64_t kIgnoreAlpha = (1ull << 63); protected: void Initialize(); From f9ca0b86a8b317e254c432a1c9f5ea254310667b Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 18 Feb 2016 20:06:40 -0600 Subject: [PATCH 013/145] Query to see if the physical device supports a swapchain surface to silence a validation layer error. --- src/xenia/ui/vulkan/vulkan_swap_chain.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc index 47d246d18..15d2795fd 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.cc +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.cc @@ -37,9 +37,15 @@ VulkanSwapChain::~VulkanSwapChain() { Shutdown(); } bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) { surface_ = surface; + VkBool32 surface_supported = false; + auto err = vkGetPhysicalDeviceSurfaceSupportKHR( + *device_, device_->queue_family_index(), surface, &surface_supported); + assert_true(surface_supported); + CheckResult(err, "vkGetPhysicalDeviceSurfaceSupportKHR"); + // Query supported target formats. uint32_t count = 0; - auto err = + err = vkGetPhysicalDeviceSurfaceFormatsKHR(*device_, surface_, &count, nullptr); CheckResult(err, "vkGetPhysicalDeviceSurfaceFormatsKHR"); std::vector surface_formats; From 52a6f795f76f4b1c7bc053d8c7aae8bc26d3958a Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 18:18:00 -0800 Subject: [PATCH 014/145] Shader modules and plumbing. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 6 ++- src/xenia/gpu/vulkan/pipeline_cache.h | 7 +--- src/xenia/gpu/vulkan/render_cache.cc | 16 ++++++-- src/xenia/gpu/vulkan/render_cache.h | 23 ++++++++++-- src/xenia/gpu/vulkan/texture_cache.h | 4 +- .../gpu/vulkan/vulkan_command_processor.cc | 16 ++++++-- src/xenia/gpu/vulkan/vulkan_gpu_flags.h | 2 + src/xenia/gpu/vulkan/vulkan_shader.cc | 37 ++++++++++++++++--- src/xenia/gpu/vulkan/vulkan_shader.h | 4 +- 9 files changed, 89 insertions(+), 26 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index e09931833..ec6c28eac 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -50,8 +50,8 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, // Always create the shader and stash it away. // We need to track it even if it fails translation so we know not to try // again. - VulkanShader* shader = - new VulkanShader(shader_type, data_hash, host_address, dword_count); + VulkanShader* shader = new VulkanShader(device_, shader_type, data_hash, + host_address, dword_count); shader_map_.insert({data_hash, shader}); // Perform translation. @@ -85,6 +85,8 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, VkRenderPass render_pass, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader, PrimitiveType primitive_type) { return false; } diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 56727e67a..00e36ef12 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -42,15 +42,12 @@ class PipelineCache { // in the command buffer is issued at this time. // Returns whether the pipeline could be successfully created. bool ConfigurePipeline(VkCommandBuffer command_buffer, - VkRenderPass render_pass, + VkRenderPass render_pass, VulkanShader* vertex_shader, + VulkanShader* pixel_shader, PrimitiveType primitive_type); // Currently configured pipeline layout, if any. VkPipelineLayout current_pipeline_layout() const { return nullptr; } - // Currently configured vertex shader, if any. - VulkanShader* current_vertex_shader() const { return nullptr; } - // Currently configured pixel shader, if any. - VulkanShader* current_pixel_shader() const { return nullptr; } // Clears all cached content. void ClearCache(); diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index fef05f11f..de25fb2e3 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -28,12 +28,22 @@ RenderCache::RenderCache(RegisterFile* register_file, RenderCache::~RenderCache() = default; -VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer) { - return nullptr; +VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { + assert_null(current_command_buffer_); + current_command_buffer_ = command_buffer; + + // Lookup or construct a render pass compatible with our current state. + VkRenderPass render_pass = nullptr; + + return render_pass; } void RenderCache::EndRenderPass() { - // + assert_not_null(current_command_buffer_); + auto command_buffer = current_command_buffer_; + current_command_buffer_ = nullptr; } void RenderCache::ClearCache() { diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index fb7c84e6a..ceeea2a07 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -12,6 +12,7 @@ #include "xenia/gpu/register_file.h" #include "xenia/gpu/shader.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan_device.h" @@ -20,15 +21,29 @@ namespace xe { namespace gpu { namespace vulkan { -// Configures and caches pipelines based on render state. -// This is responsible for properly setting all state required for a draw -// including shaders, various blend/etc options, and input configuration. +// Manages the virtualized EDRAM and the render target cache. +// +// On the 360 the render target is an opaque block of memory in EDRAM that's +// only accessible via resolves. We use this to our advantage to simulate +// something like it as best we can by having a shared backing memory with +// a multitude of views for each tile location in EDRAM. +// +// This allows us to have the same base address write to the same memory +// regardless of framebuffer format. Resolving then uses whatever format the +// resolve requests straight from the backing memory. class RenderCache { public: RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); ~RenderCache(); - VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer); + // Begins a render pass targeting the state-specified framebuffer formats. + // The command buffer will be transitioned into the render pass phase. + VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); + + // Ends the current render pass. + // The command buffer will be transitioned out of the render pass phase. void EndRenderPass(); // Clears all cached content. diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index 3545fb72d..3f18a7be1 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -20,9 +20,7 @@ namespace xe { namespace gpu { namespace vulkan { -// Configures and caches pipelines based on render state. -// This is responsible for properly setting all state required for a draw -// including shaders, various blend/etc options, and input configuration. +// class TextureCache { public: TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 6490de44a..8047bd202 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -175,6 +175,16 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return IssueCopy(); } + // Shaders will have already been defined by previous loads. + // We need the to do just about anything so validate here. + auto vertex_shader = static_cast(active_vertex_shader()); + auto pixel_shader = static_cast(active_pixel_shader()); + if (!vertex_shader || !vertex_shader->is_valid() || !pixel_shader || + !pixel_shader->is_valid()) { + // Skipped because we can't understand the shader. + return true; + } + // TODO(benvanik): bigger batches. command_buffer_pool_->BeginBatch(); VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry(); @@ -188,7 +198,8 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // Begin the render pass. // This will setup our framebuffer and begin the pass in the command buffer. - VkRenderPass render_pass = render_cache_->BeginRenderPass(command_buffer); + VkRenderPass render_pass = render_cache_->BeginRenderPass( + command_buffer, vertex_shader, pixel_shader); if (!render_pass) { return false; } @@ -197,14 +208,13 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // This encodes all render state (blend, depth, etc), our shader stages, // and our vertex input layout. if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass, + vertex_shader, pixel_shader, primitive_type)) { render_cache_->EndRenderPass(); return false; } // Upload the constants the shaders require. - auto vertex_shader = pipeline_cache_->current_vertex_shader(); - auto pixel_shader = pipeline_cache_->current_pixel_shader(); auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters( vertex_shader->constant_register_map()); auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters( diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h index c78637a47..b5a00c74a 100644 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h @@ -12,4 +12,6 @@ #include +#define FINE_GRAINED_DRAW_SCOPES 1 + #endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc index 8624480a3..b3c72abf3 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.cc +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -9,20 +9,47 @@ #include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { namespace vulkan { -VulkanShader::VulkanShader(ShaderType shader_type, uint64_t data_hash, - const uint32_t* dword_ptr, uint32_t dword_count) - : Shader(shader_type, data_hash, dword_ptr, dword_count) {} +using xe::ui::vulkan::CheckResult; -VulkanShader::~VulkanShader() = default; +VulkanShader::VulkanShader(VkDevice device, ShaderType shader_type, + uint64_t data_hash, const uint32_t* dword_ptr, + uint32_t dword_count) + : Shader(shader_type, data_hash, dword_ptr, dword_count), device_(device) {} -bool VulkanShader::Prepare() { return true; } +VulkanShader::~VulkanShader() { + if (shader_module_) { + vkDestroyShaderModule(device_, shader_module_, nullptr); + shader_module_ = nullptr; + } +} + +bool VulkanShader::Prepare() { + assert_null(shader_module_); + assert_true(is_valid()); + + // Create the shader module. + VkShaderModuleCreateInfo shader_info; + shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_info.pNext = nullptr; + shader_info.flags = 0; + shader_info.codeSize = translated_binary_.size(); + shader_info.pCode = + reinterpret_cast(translated_binary_.data()); + auto err = + vkCreateShaderModule(device_, &shader_info, nullptr, &shader_module_); + CheckResult(err, "vkCreateShaderModule"); + + return true; +} } // namespace vulkan } // namespace gpu diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h index cc1d51e2a..97dbd5822 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -21,15 +21,17 @@ namespace vulkan { class VulkanShader : public Shader { public: - VulkanShader(ShaderType shader_type, uint64_t data_hash, + VulkanShader(VkDevice device, ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count); ~VulkanShader() override; + // Available only if the shader is_valid and has been prepared. VkShaderModule shader_module() const { return shader_module_; } bool Prepare(); private: + VkDevice device_ = nullptr; VkShaderModule shader_module_ = nullptr; }; From 1dcc84a4724e0ee0af1e82ed4706cb250c9c30a5 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 18 Feb 2016 18:18:13 -0800 Subject: [PATCH 015/145] Starting render passes. --- src/xenia/gpu/vulkan/render_cache.cc | 34 ++++++++++++++++++++++++++++ src/xenia/gpu/vulkan/render_cache.h | 3 +++ 2 files changed, 37 insertions(+) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index de25fb2e3..32d9349e5 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -37,6 +37,37 @@ VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, // Lookup or construct a render pass compatible with our current state. VkRenderPass render_pass = nullptr; + // Begin render pass. + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = render_pass; + + // Target framebuffer. + // render_pass_begin_info.framebuffer = current_buffer.framebuffer; + + // Render into the entire buffer (or at least tell the API we are doing + // this). In theory it'd be better to clip this to the scissor region, but + // the docs warn anything but the full framebuffer may be slow. + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + // render_pass_begin_info.renderArea.extent.width = surface_width_; + // render_pass_begin_info.renderArea.extent.height = surface_height_; + + // Configure clear color, if clearing. + VkClearValue color_clear_value; + color_clear_value.color.float32[0] = 238 / 255.0f; + color_clear_value.color.float32[1] = 238 / 255.0f; + color_clear_value.color.float32[2] = 238 / 255.0f; + color_clear_value.color.float32[3] = 1.0f; + VkClearValue clear_values[] = {color_clear_value}; + render_pass_begin_info.clearValueCount = + static_cast(xe::countof(clear_values)); + render_pass_begin_info.pClearValues = clear_values; + + vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + return render_pass; } @@ -44,6 +75,9 @@ void RenderCache::EndRenderPass() { assert_not_null(current_command_buffer_); auto command_buffer = current_command_buffer_; current_command_buffer_ = nullptr; + + // End the render pass. + vkCmdEndRenderPass(command_buffer); } void RenderCache::ClearCache() { diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index ceeea2a07..2bdcef924 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -52,6 +52,9 @@ class RenderCache { private: RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; + + // Only valid during a BeginRenderPass/EndRenderPass block. + VkCommandBuffer current_command_buffer_ = nullptr; }; } // namespace vulkan From 8820c735328acac0e336352dd879e64770ac872b Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 19 Feb 2016 10:38:11 -0800 Subject: [PATCH 016/145] A night's worth of work: documented EDRAM. Seems mostly right. --- src/xenia/gpu/vulkan/render_cache.h | 127 ++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 2bdcef924..aeaa07264 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -31,6 +31,133 @@ namespace vulkan { // This allows us to have the same base address write to the same memory // regardless of framebuffer format. Resolving then uses whatever format the // resolve requests straight from the backing memory. +// +// EDRAM is a beast and we only approximate it as best we can. Basically, +// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px. +// +-----+-----+-----+--- +// |tile0|tile1|tile2|... 2048 times +// +-----+-----+-----+--- +// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile +// offset 256, 256*5120=1310720b into the buffer. All rendering operations are +// aligned to tiles so trying to draw at 256px wide will have a real width of +// 320px by rounding up to the next tile. +// +// MSAA and other settings will modify the exact pixel sizes, like 4X makes +// each tile effectively 40x8px, but they are still all 5120b. As we try to +// emulate this we adjust our viewport when rendering to stretch pixels as +// needed. +// +// The good news is that games cannot read EDRAM directly but must use a copy +// operation to get the data out. That gives us a chance to do whatever we +// need to (re-tile, etc) only when requested. +// +// To approximate the tiled EDRAM layout we use a single large chunk of memory. +// From this memory we create many VkImages (and VkImageViews) of various +// formats and dimensions as requested by the game. These are used as +// attachments during rendering and as sources during copies. They are also +// heavily aliased - lots of images will reference the same locations in the +// underlying EDRAM buffer. The only requirement is that there are no hazards +// with specific tiles (reading/writing the same tile through different images) +// and otherwise it should be ok *fingers crossed*. +// +// One complication is the copy/resolve process itself: we need to give back +// the data asked for in the format desired and where it goes is arbitrary +// (any address in physical memory). If the game is good we get resolves of +// EDRAM into fixed base addresses with scissored regions. If the game is bad +// we are broken. +// +// Resolves from EDRAM result in tiled textures - that's texture tiles, not +// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to +// then tile the images as we wrote them out. For now, we just attempt to +// get the (X, Y) in linear space and do that. This really comes into play +// when multiple resolves write to the same texture or memory aliased by +// multiple textures - which is common due to predicated tiling. The examples +// below demonstrate what this looks like, but the important thing is that +// we are aware of partial textures and overlapping regions. +// +// TODO(benvanik): what, if any, barriers do we need? any transitions? +// +// Example with multiple render targets: +// Two color targets of 256x256px tightly packed in EDRAM: +// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256 +// starts at tile 0, buffer offset 0 +// contains 64 tiles (320/80)*(256/16) +// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256 +// starts at tile 64 (after color target 0), buffer offset 327680b +// contains 64 tiles +// In EDRAM each set of 64 tiles is contiguous: +// +------+------+ +------+------+------+ +// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |... +// +------+------+ +------+------+------+ +// To render into these, we setup two VkImages: +// image 0: bound to buffer offset 0, 320x256x4=327680b +// image 1: bound to buffer offset 327680b, 320x256x4=327680b +// So when we render to them: +// +------+-+ scissored to 256x256, actually 320x256 +// | . | | <- . appears at some untiled offset in the buffer, but +// | | | consistent if aliased with the same format +// +------+-+ +// In theory, this gives us proper aliasing in most cases. +// +// Example with horizontal predicated tiling: +// Trying to render 1024x576 @4X MSAA, splitting into two regions +// horizontally: +// +----------+ +// | 1024x288 | +// +----------+ +// | 1024x288 | +// +----------+ +// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA): +// color target 0: base 0x0, pitch 1080, 26x36 tiles +// First render (top): +// window offset 0,0 +// scissor 0,0, 1024x288 +// First resolve (top): +// RB_COPY_DEST_BASE 0x1F45D000 +// RB_COPY_DEST_PITCH pitch=1024, height=576 +// vertices: 0,0, 1024,0, 1024,288 +// Second render (bottom): +// window offset 0,-288 +// scissor 0,288, 1024x288 +// Second resolve (bottom): +// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b) +// RB_COPY_DEST_PITCH pitch=1024, height=576 +// (exactly 1024x288*4b after first resolve) +// vertices: 0,288, 1024,288, 1024,576 +// Resolving here is easy as the textures are contiguous in memory. We can +// snoop in the first resolve with the dest height to know the total size, +// and in the second resolve see that it overlaps and place it in the +// existing target. +// +// Example with vertical predicated tiling: +// Trying to render 1280x720 @2X MSAA, splitting into two regions +// vertically: +// +-----+-----+ +// | 640 | 640 | +// | x | x | +// | 720 | 720 | +// +-----+-----+ +// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA): +// color target 0: base 0x0, pitch 640, 8x92 tiles +// First render (left): +// window offset 0,0 +// scissor 0,0, 640x720 +// First resolve (left): +// RB_COPY_DEST_BASE 0x1BC6D000 +// RB_COPY_DEST_PITCH pitch=1280, height=720 +// vertices: 0,0, 640,0, 640,720 +// Second render (right): +// window offset -640,0 +// scissor 640,0, 640x720 +// Second resolve (right): +// RB_COPY_DEST_BASE 0x1BC81000 (+81920b) +// RB_COPY_DEST_PITCH pitch=1280, height=720 +// vertices: 640,0, 1280,0, 1280,720 +// Resolving here is much more difficult as resolves are tiled and the right +// half of the texture is 81920b away: +// 81920/4bpp=20480px, /32 (texture tile size)=640px +// We know the texture size with the first resolve and with the second we +// must check for overlap then compute the offset (in both X and Y). class RenderCache { public: RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); From de1e4661ff538b20da3d3f72f87c13822b794404 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 19 Feb 2016 20:36:10 -0800 Subject: [PATCH 017/145] Not crashing (but also likely not working) EDRAM emulation. --- src/xenia/gpu/trace_dump.cc | 15 +- src/xenia/gpu/vulkan/pipeline_cache.cc | 2 +- src/xenia/gpu/vulkan/pipeline_cache.h | 4 +- src/xenia/gpu/vulkan/render_cache.cc | 713 +++++++++++++++++- src/xenia/gpu/vulkan/render_cache.h | 106 ++- .../gpu/vulkan/vulkan_command_processor.cc | 36 +- src/xenia/gpu/vulkan/vulkan_shader.h | 2 +- src/xenia/ui/vulkan/fenced_pools.h | 3 + src/xenia/ui/vulkan/vulkan.cc | 4 + src/xenia/ui/vulkan/vulkan.h | 1 + src/xenia/ui/vulkan/vulkan_context.cc | 2 +- src/xenia/ui/vulkan/vulkan_device.cc | 5 + 12 files changed, 851 insertions(+), 42 deletions(-) diff --git a/src/xenia/gpu/trace_dump.cc b/src/xenia/gpu/trace_dump.cc index b7804ebec..7535e0f01 100644 --- a/src/xenia/gpu/trace_dump.cc +++ b/src/xenia/gpu/trace_dump.cc @@ -13,6 +13,7 @@ #include "third_party/stb/stb_image_write.h" #include "xenia/base/logging.h" +#include "xenia/base/profiling.h" #include "xenia/base/string.h" #include "xenia/base/threading.h" #include "xenia/gpu/command_processor.h" @@ -189,10 +190,16 @@ void TraceDump::Run() { }); xe::threading::Fence capture_fence; + bool did_capture = false; loop_->PostDelayed( [&]() { // Capture. auto raw_image = window_->context()->Capture(); + if (!raw_image) { + // Failed to capture anything. + capture_fence.Signal(); + return; + } // Save framebuffer png. std::string png_path = xe::to_string(base_output_path_ + L".png"); @@ -201,6 +208,7 @@ void TraceDump::Run() { raw_image->data.data(), static_cast(raw_image->stride)); + did_capture = true; capture_fence.Signal(); }, 50); @@ -211,10 +219,13 @@ void TraceDump::Run() { loop_->Quit(); loop_->AwaitQuit(); - player_.reset(); - emulator_.reset(); + Profiler::Shutdown(); window_.reset(); loop_.reset(); + player_.reset(); + emulator_.reset(); + + // TODO(benvanik): die if failed to capture? } } // namespace gpu diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index ec6c28eac..5416aea7f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -84,7 +84,7 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, } bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, - VkRenderPass render_pass, + const RenderState* render_state, VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type) { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 00e36ef12..74c461504 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -14,6 +14,7 @@ #include "xenia/gpu/register_file.h" #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/render_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/spirv/spirv_disassembler.h" @@ -42,7 +43,8 @@ class PipelineCache { // in the command buffer is issued at this time. // Returns whether the pipeline could be successfully created. bool ConfigurePipeline(VkCommandBuffer command_buffer, - VkRenderPass render_pass, VulkanShader* vertex_shader, + const RenderState* render_state, + VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type); diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 32d9349e5..b477d5633 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -9,6 +9,8 @@ #include "xenia/gpu/vulkan/render_cache.h" +#include + #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/memory.h" @@ -20,55 +22,711 @@ namespace xe { namespace gpu { namespace vulkan { +using namespace xe::gpu::xenos; using xe::ui::vulkan::CheckResult; +constexpr uint32_t kEdramBufferCapacity = 10 * 1024 * 1024; + +VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) { + switch (format) { + case ColorRenderTargetFormat::k_8_8_8_8: + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + return VK_FORMAT_R8G8B8A8_UNORM; + case ColorRenderTargetFormat::k_2_10_10_10: + case ColorRenderTargetFormat::k_2_10_10_10_unknown: + return VK_FORMAT_A2R10G10B10_UNORM_PACK32; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: + // WARNING: this is wrong, most likely - no float form in vulkan? + XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used"); + return VK_FORMAT_A2R10G10B10_SSCALED_PACK32; + case ColorRenderTargetFormat::k_16_16: + return VK_FORMAT_R16G16_UNORM; + case ColorRenderTargetFormat::k_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UNORM; + case ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_SFLOAT; + case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_SFLOAT; + case ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_SFLOAT; + default: + assert_unhandled_case(key.edram_format); + return VK_FORMAT_UNDEFINED; + } +} + +VkFormat DepthRenderTargetFormatToVkFormat(DepthRenderTargetFormat format) { + switch (format) { + case DepthRenderTargetFormat::kD24S8: + return VK_FORMAT_D24_UNORM_S8_UINT; + case DepthRenderTargetFormat::kD24FS8: + // TODO(benvanik): some way to emulate? resolve-time flag? + XELOGW("Unsupported EDRAM format kD24FS8 used"); + return VK_FORMAT_D24_UNORM_S8_UINT; + default: + return VK_FORMAT_UNDEFINED; + } +} + +// Cached view into the EDRAM memory. +// The image is aliased to a region of the edram_memory_ based on the tile +// parameters. +// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible +// formats? +class CachedTileView { + public: + // Key identifying the view in the cache. + TileViewKey key; + // Image mapped into EDRAM. + VkImage image = nullptr; + // Simple view on the image matching the format. + VkImageView image_view = nullptr; + + CachedTileView(VkDevice device, VkDeviceMemory edram_memory, + TileViewKey view_key); + ~CachedTileView(); + + bool IsEqual(const TileViewKey& other_key) const { + auto a = reinterpret_cast(&key); + auto b = reinterpret_cast(&other_key); + return *a == *b; + } + + private: + VkDevice device_ = nullptr; +}; + +// Cached framebuffer referencing tile attachments. +// Each framebuffer is specific to a render pass. Ugh. +class CachedFramebuffer { + public: + // TODO(benvanik): optimized key? tile base + format for each? + + // Framebuffer with the attachments ready for use in the parent render pass. + VkFramebuffer handle = nullptr; + // Width of the framebuffer in pixels. + uint32_t width = 0; + // Height of the framebuffer in pixels. + uint32_t height = 0; + // References to color attachments, if used. + CachedTileView* color_attachments[4] = {nullptr}; + // Reference to depth/stencil attachment, if used. + CachedTileView* depth_stencil_attachment = nullptr; + + CachedFramebuffer(VkDevice device, VkRenderPass render_pass, + uint32_t surface_width, uint32_t surface_height, + CachedTileView* target_color_attachments[4], + CachedTileView* target_depth_stencil_attachment); + ~CachedFramebuffer(); + + bool IsCompatible(const RenderConfiguration& desired_config) const; + + private: + VkDevice device_ = nullptr; +}; + +// Cached render passes based on register states. +// Each render pass is dependent on the format, dimensions, and use of +// all attachments. The same render pass can be reused for multiple +// framebuffers pointing at various tile views, though those cached +// framebuffers are specific to the render pass. +class CachedRenderPass { + public: + // Configuration this pass was created with. + RenderConfiguration config; + // Initialized render pass for the register state. + VkRenderPass handle = nullptr; + // Cache of framebuffers for the various tile attachments. + std::vector cached_framebuffers; + + CachedRenderPass(VkDevice device, const RenderConfiguration& desired_config); + ~CachedRenderPass(); + + bool IsCompatible(const RenderConfiguration& desired_config) const; + + private: + VkDevice device_ = nullptr; +}; + +CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory, + TileViewKey view_key) + : device_(device), key(std::move(view_key)) { + // Map format to Vulkan. + VkFormat vulkan_format = VK_FORMAT_UNDEFINED; + uint32_t bpp = 4; + if (key.color_or_depth) { + auto edram_format = static_cast(key.edram_format); + vulkan_format = ColorRenderTargetFormatToVkFormat(edram_format); + switch (edram_format) { + case ColorRenderTargetFormat::k_16_16_16_16: + case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + case ColorRenderTargetFormat::k_32_32_FLOAT: + bpp = 8; + break; + default: + bpp = 4; + break; + } + } else { + auto edram_format = static_cast(key.edram_format); + vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format); + } + assert_true(vulkan_format != VK_FORMAT_UNDEFINED); + assert_true(bpp == 4); + + // Create the image with the desired properties. + VkImageCreateInfo image_info; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = nullptr; + // TODO(benvanik): exploit VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT so we can have + // multiple views. + image_info.flags = 0; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = vulkan_format; + image_info.extent.width = key.tile_width * 80; + image_info.extent.height = key.tile_height * 16; + image_info.extent.depth = 1; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + // TODO(benvanik): native MSAA support? + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.usage |= key.color_or_depth + ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + auto err = vkCreateImage(device_, &image_info, nullptr, &image); + CheckResult(err, "vkCreateImage"); + + // Verify our assumptions about memory layout are correct. + VkDeviceSize edram_offset = key.tile_offset * 5120; + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(device, image, &memory_requirements); + assert_true(edram_offset + memory_requirements.size <= kEdramBufferCapacity); + assert_true(edram_offset % memory_requirements.alignment == 0); + + // Bind to the region of EDRAM we occupy. + err = vkBindImageMemory(device_, image, edram_memory, edram_offset); + CheckResult(err, "vkBindImageMemory"); + + // Create the image view we'll use to attach it to a framebuffer. + VkImageViewCreateInfo image_view_info; + image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + image_view_info.pNext = nullptr; + image_view_info.flags = 0; + image_view_info.image = image; + image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + image_view_info.format = image_info.format; + // TODO(benvanik): manipulate? may not be able to when attached. + image_view_info.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, + }; + image_view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + if (key.color_or_depth) { + image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + } else { + image_view_info.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + } + err = vkCreateImageView(device_, &image_view_info, nullptr, &image_view); + CheckResult(err, "vkCreateImageView"); + + // TODO(benvanik): transition to general layout? +} + +CachedTileView::~CachedTileView() { + vkDestroyImageView(device_, image_view, nullptr); + vkDestroyImage(device_, image, nullptr); +} + +CachedFramebuffer::CachedFramebuffer( + VkDevice device, VkRenderPass render_pass, uint32_t surface_width, + uint32_t surface_height, CachedTileView* target_color_attachments[4], + CachedTileView* target_depth_stencil_attachment) + : device_(device), + width(surface_width), + height(surface_height), + depth_stencil_attachment(target_depth_stencil_attachment) { + for (int i = 0; i < 4; ++i) { + color_attachments[i] = target_color_attachments[i]; + } + + // Create framebuffer. + VkImageView image_views[5] = {nullptr}; + int image_view_count = 0; + for (int i = 0; i < 4; ++i) { + if (color_attachments[i]) { + image_views[image_view_count++] = color_attachments[i]->image_view; + } + } + if (depth_stencil_attachment) { + image_views[image_view_count++] = depth_stencil_attachment->image_view; + } + VkFramebufferCreateInfo framebuffer_info; + framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_info.pNext = nullptr; + framebuffer_info.renderPass = render_pass; + framebuffer_info.attachmentCount = image_view_count; + framebuffer_info.pAttachments = image_views; + framebuffer_info.width = width; + framebuffer_info.height = height; + framebuffer_info.layers = 1; + auto err = vkCreateFramebuffer(device_, &framebuffer_info, nullptr, &handle); + CheckResult(err, "vkCreateFramebuffer"); +} + +CachedFramebuffer::~CachedFramebuffer() { + vkDestroyFramebuffer(device_, handle, nullptr); +} + +bool CachedFramebuffer::IsCompatible( + const RenderConfiguration& desired_config) const { + // We already know all render pass things line up, so let's verify dimensions, + // edram offsets, etc. We need an exact match. + // TODO(benvanik): separate image views from images in tiles and store in fb? + for (int i = 0; i < 4; ++i) { + // Ensure the the attachment points to the same tile. + if (!color_attachments[i]) { + continue; + } + auto& color_info = color_attachments[i]->key; + auto& desired_color_info = desired_config.color[i]; + if (color_info.tile_offset != desired_color_info.edram_base || + color_info.edram_format != + static_cast(desired_color_info.format)) { + return false; + } + } + // Ensure depth attachment is correct. + if (depth_stencil_attachment && + (depth_stencil_attachment->key.tile_offset != + desired_config.depth_stencil.edram_base || + depth_stencil_attachment->key.edram_format != + static_cast(desired_config.depth_stencil.format))) { + return false; + } + return true; +} + +CachedRenderPass::CachedRenderPass(VkDevice device, + const RenderConfiguration& desired_config) + : device_(device) { + std::memcpy(&config, &desired_config, sizeof(config)); + + // Initialize all attachments to default unused. + // As we set layout(location=RT) in shaders we must always provide 4. + VkAttachmentDescription attachments[5]; + for (int i = 0; i < 4; ++i) { + attachments[i].flags = 0; + attachments[i].format = VK_FORMAT_UNDEFINED; + attachments[i].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL; + attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL; + } + auto& depth_stencil_attachment = attachments[4]; + depth_stencil_attachment.flags = 0; + depth_stencil_attachment.format = VK_FORMAT_UNDEFINED; + depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + depth_stencil_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + depth_stencil_attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL; + depth_stencil_attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL; + VkAttachmentReference depth_stencil_attachment_ref; + depth_stencil_attachment_ref.attachment = VK_ATTACHMENT_UNUSED; + depth_stencil_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; + + // Configure attachments based on what's enabled. + VkAttachmentReference color_attachment_refs[4]; + for (int i = 0; i < 4; ++i) { + auto& color_config = config.color[i]; + // TODO(benvanik): see how loose we can be with these. + attachments[i].format = + ColorRenderTargetFormatToVkFormat(color_config.format); + auto& color_attachment_ref = color_attachment_refs[i]; + color_attachment_ref.attachment = i; + color_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; + } + auto& depth_config = config.depth_stencil; + depth_stencil_attachment_ref.attachment = 4; + depth_stencil_attachment.format = + DepthRenderTargetFormatToVkFormat(depth_config.format); + + // Single subpass that writes to our attachments. + VkSubpassDescription subpass_info; + subpass_info.flags = 0; + subpass_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_info.inputAttachmentCount = 0; + subpass_info.pInputAttachments = nullptr; + subpass_info.colorAttachmentCount = 4; + subpass_info.pColorAttachments = color_attachment_refs; + subpass_info.pResolveAttachments = nullptr; + subpass_info.pDepthStencilAttachment = &depth_stencil_attachment_ref; + subpass_info.preserveAttachmentCount = 0; + subpass_info.pPreserveAttachments = nullptr; + + // Create the render pass. + VkRenderPassCreateInfo render_pass_info; + render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_info.pNext = nullptr; + render_pass_info.attachmentCount = 5; + render_pass_info.pAttachments = attachments; + render_pass_info.subpassCount = 1; + render_pass_info.pSubpasses = &subpass_info; + render_pass_info.dependencyCount = 0; + render_pass_info.pDependencies = nullptr; + auto err = vkCreateRenderPass(device_, &render_pass_info, nullptr, &handle); + CheckResult(err, "vkCreateRenderPass"); +} + +CachedRenderPass::~CachedRenderPass() { + for (auto framebuffer : cached_framebuffers) { + delete framebuffer; + } + cached_framebuffers.clear(); + + vkDestroyRenderPass(device_, handle, nullptr); +} + +bool CachedRenderPass::IsCompatible( + const RenderConfiguration& desired_config) const { + for (int i = 0; i < 4; ++i) { + // TODO(benvanik): allow compatible vulkan formats. + if (config.color[i].format != desired_config.color[i].format) { + return false; + } + } + if (config.depth_stencil.format != desired_config.depth_stencil.format) { + return false; + } + return true; +} + RenderCache::RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(*device) {} + : register_file_(register_file), device_(*device) { + // Create the buffer we'll bind to our memory. + // We do this first so we can get the right memory type. + VkBufferCreateInfo buffer_info; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.pNext = nullptr; + buffer_info.flags = 0; + buffer_info.size = kEdramBufferCapacity; + buffer_info.usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_info.queueFamilyIndexCount = 0; + buffer_info.pQueueFamilyIndices = nullptr; + auto err = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_); + CheckResult(err, "vkCreateBuffer"); -RenderCache::~RenderCache() = default; + // Query requirements for the buffer. + // It should be 1:1. + VkMemoryRequirements buffer_requirements; + vkGetBufferMemoryRequirements(device_, edram_buffer_, &buffer_requirements); + assert_true(buffer_requirements.size == kEdramBufferCapacity); -VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { + // Create a dummy image so we can see what memory bits it requires. + // They should overlap with the buffer requirements but are likely more + // strict. + VkImageCreateInfo test_image_info; + test_image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + test_image_info.pNext = nullptr; + test_image_info.flags = 0; + test_image_info.imageType = VK_IMAGE_TYPE_2D; + test_image_info.format = VK_FORMAT_R8G8B8A8_UINT; + test_image_info.extent.width = 128; + test_image_info.extent.height = 128; + test_image_info.extent.depth = 1; + test_image_info.mipLevels = 1; + test_image_info.arrayLayers = 1; + test_image_info.samples = VK_SAMPLE_COUNT_1_BIT; + test_image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + test_image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + test_image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + test_image_info.queueFamilyIndexCount = 0; + test_image_info.pQueueFamilyIndices = nullptr; + test_image_info.initialLayout = VK_IMAGE_LAYOUT_GENERAL; + VkImage test_image = nullptr; + err = vkCreateImage(device_, &test_image_info, nullptr, &test_image); + CheckResult(err, "vkCreateImage"); + VkMemoryRequirements image_requirements; + vkGetImageMemoryRequirements(device_, test_image, &image_requirements); + vkDestroyImage(device_, test_image, nullptr); + assert_true((image_requirements.memoryTypeBits & + buffer_requirements.memoryTypeBits) != 0); + + // Allocate EDRAM memory. + VkMemoryRequirements memory_requirements; + memory_requirements.size = buffer_requirements.size; + memory_requirements.alignment = buffer_requirements.alignment; + memory_requirements.memoryTypeBits = image_requirements.memoryTypeBits; + // TODO(benvanik): do we need it host visible? + edram_memory_ = device->AllocateMemory(memory_requirements, 0); + + // Bind buffer to map our entire memory. + vkBindBufferMemory(device_, edram_buffer_, edram_memory_, 0); +} + +RenderCache::~RenderCache() { + // TODO(benvanik): wait for idle. + + // Dispose all render passes (and their framebuffers). + for (auto render_pass : cached_render_passes_) { + delete render_pass; + } + cached_render_passes_.clear(); + + // Dispose all of our cached tile views. + for (auto tile_view : cached_tile_views_) { + delete tile_view; + } + cached_tile_views_.clear(); + + // Release underlying EDRAM memory. + vkDestroyBuffer(device_, edram_buffer_, nullptr); + vkFreeMemory(device_, edram_memory_, nullptr); +} + +const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { assert_null(current_command_buffer_); current_command_buffer_ = command_buffer; // Lookup or construct a render pass compatible with our current state. - VkRenderPass render_pass = nullptr; + auto config = ¤t_state_.config; + CachedRenderPass* render_pass = nullptr; + CachedFramebuffer* framebuffer = nullptr; + auto& regs = shadow_registers_; + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + if (!dirty && current_state_.render_pass) { + // No registers have changed so we can reuse the previous render pass - + // just begin with what we had. + render_pass = current_state_.render_pass; + framebuffer = current_state_.framebuffer; + } else { + // Re-parse configuration. + if (!ParseConfiguration(config)) { + return nullptr; + } - // Begin render pass. + // Lookup or generate a new render pass and framebuffer for the new state. + if (!ConfigureRenderPass(config, &render_pass, &framebuffer)) { + return nullptr; + } + current_state_.render_pass = render_pass; + current_state_.framebuffer = framebuffer; + } + if (!render_pass) { + return nullptr; + } + + // Setup render pass in command buffer. + // This is meant to preserve previous contents as we may be called + // repeatedly. VkRenderPassBeginInfo render_pass_begin_info; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin_info.pNext = nullptr; - render_pass_begin_info.renderPass = render_pass; - - // Target framebuffer. - // render_pass_begin_info.framebuffer = current_buffer.framebuffer; + render_pass_begin_info.renderPass = render_pass->handle; + render_pass_begin_info.framebuffer = framebuffer->handle; // Render into the entire buffer (or at least tell the API we are doing // this). In theory it'd be better to clip this to the scissor region, but // the docs warn anything but the full framebuffer may be slow. render_pass_begin_info.renderArea.offset.x = 0; render_pass_begin_info.renderArea.offset.y = 0; - // render_pass_begin_info.renderArea.extent.width = surface_width_; - // render_pass_begin_info.renderArea.extent.height = surface_height_; + render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px; + render_pass_begin_info.renderArea.extent.height = config->surface_height_px; // Configure clear color, if clearing. - VkClearValue color_clear_value; - color_clear_value.color.float32[0] = 238 / 255.0f; - color_clear_value.color.float32[1] = 238 / 255.0f; - color_clear_value.color.float32[2] = 238 / 255.0f; - color_clear_value.color.float32[3] = 1.0f; - VkClearValue clear_values[] = {color_clear_value}; - render_pass_begin_info.clearValueCount = - static_cast(xe::countof(clear_values)); - render_pass_begin_info.pClearValues = clear_values; + // TODO(benvanik): enable clearing here during resolve? + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; + // Begin the render pass. vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); - return render_pass; + return ¤t_state_; +} + +bool RenderCache::ParseConfiguration(RenderConfiguration* config) { + auto& regs = shadow_registers_; + + // RB_MODECONTROL + // Rough mode control (color, color+depth, etc). + config->mode_control = static_cast(regs.rb_modecontrol & 0x7); + + // RB_SURFACE_INFO + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + config->surface_pitch_px = regs.rb_surface_info & 0x3FFF; + config->surface_msaa = + static_cast((regs.rb_surface_info >> 16) & 0x3); + + // TODO(benvanik): verify min/max so we don't go out of bounds. + // TODO(benvanik): has to be a good way to get height. + // Guess the height from the scissor height. + // It's wildly inaccurate, but I've never seen it be bigger than the + // EDRAM tiling. + uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; + uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; + config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16)); + + // Color attachment configuration. + if (config->mode_control == ModeControl::kColorDepth) { + uint32_t color_info[4] = { + regs.rb_color_info, regs.rb_color1_info, regs.rb_color2_info, + regs.rb_color3_info, + }; + for (int i = 0; i < 4; ++i) { + config->color[i].edram_base = color_info[i] & 0xFFF; + config->color[i].format = + static_cast((color_info[i] >> 16) & 0xF); + // We don't support GAMMA formats, so switch them to what we do support. + switch (config->color[i].format) { + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; + break; + } + } + } else { + for (int i = 0; i < 4; ++i) { + config->color[i].edram_base = 0; + config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; + } + } + + // Depth/stencil attachment configuration. + if (config->mode_control == ModeControl::kColorDepth || + config->mode_control == ModeControl::kDepth) { + config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF; + config->depth_stencil.format = + static_cast((regs.rb_depth_info >> 16) & 0x1); + } else { + config->depth_stencil.edram_base = 0; + config->depth_stencil.format = DepthRenderTargetFormat::kD24S8; + } + + return true; +} + +bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, + CachedRenderPass** out_render_pass, + CachedFramebuffer** out_framebuffer) { + *out_render_pass = nullptr; + *out_framebuffer = nullptr; + + // TODO(benvanik): better lookup. + // Attempt to find the render pass in our cache. + CachedRenderPass* render_pass = nullptr; + for (auto cached_render_pass : cached_render_passes_) { + if (cached_render_pass->IsCompatible(*config)) { + // Found a match. + render_pass = cached_render_pass; + break; + } + } + + // If no render pass was found in the cache create a new one. + if (!render_pass) { + render_pass = new CachedRenderPass(device_, *config); + cached_render_passes_.push_back(render_pass); + } + + // TODO(benvanik): better lookup. + // Attempt to find the framebuffer in the render pass cache. + CachedFramebuffer* framebuffer = nullptr; + for (auto cached_framebuffer : render_pass->cached_framebuffers) { + if (cached_framebuffer->IsCompatible(*config)) { + // Found a match. + framebuffer = cached_framebuffer; + break; + } + } + + // If no framebuffer was found in the cache create a new one. + if (!framebuffer) { + CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr, + nullptr}; + for (int i = 0; i < 4; ++i) { + TileViewKey color_key; + color_key.tile_offset = config->color[i].edram_base; + color_key.tile_width = config->surface_pitch_px / 80; + color_key.tile_height = config->surface_height_px / 16; + color_key.color_or_depth = 1; + color_key.edram_format = static_cast(config->color[i].format); + target_color_attachments[i] = GetTileView(color_key); + if (!target_color_attachments) { + XELOGE("Failed to get tile view for color attachment"); + return false; + } + } + + TileViewKey depth_stencil_key; + depth_stencil_key.tile_offset = config->depth_stencil.edram_base; + depth_stencil_key.tile_width = config->surface_pitch_px / 80; + depth_stencil_key.tile_height = config->surface_height_px / 16; + depth_stencil_key.color_or_depth = 0; + depth_stencil_key.edram_format = + static_cast(config->depth_stencil.format); + auto target_depth_stencil_attachment = GetTileView(depth_stencil_key); + if (!target_depth_stencil_attachment) { + XELOGE("Failed to get tile view for depth/stencil attachment"); + return false; + } + + framebuffer = new CachedFramebuffer( + device_, render_pass->handle, config->surface_pitch_px, + config->surface_height_px, target_color_attachments, + target_depth_stencil_attachment); + render_pass->cached_framebuffers.push_back(framebuffer); + } + + *out_render_pass = render_pass; + *out_framebuffer = framebuffer; + return true; +} + +CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) { + // Check the cache. + // TODO(benvanik): better lookup. + for (auto tile_view : cached_tile_views_) { + if (tile_view->IsEqual(view_key)) { + return tile_view; + } + } + + // Create a new tile and add to the cache. + auto tile_view = new CachedTileView(device_, edram_memory_, view_key); + cached_tile_views_.push_back(tile_view); + return tile_view; } void RenderCache::EndRenderPass() { @@ -84,6 +742,15 @@ void RenderCache::ClearCache() { // TODO(benvanik): caching. } +bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index aeaa07264..865b34cfd 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -21,6 +21,58 @@ namespace xe { namespace gpu { namespace vulkan { +// TODO(benvanik): make public API? +class CachedTileView; +class CachedFramebuffer; +class CachedRenderPass; + +// Uniquely identifies EDRAM tiles. +struct TileViewKey { + // Offset into EDRAM in 5120b tiles. + uint16_t tile_offset; + // Tile width of the view in base 80x16 tiles. + uint16_t tile_width; + // Tile height of the view in base 80x16 tiles. + uint16_t tile_height; + // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. + uint16_t color_or_depth : 1; + // Either ColorRenderTargetFormat or DepthRenderTargetFormat. + uint16_t edram_format : 15; +}; +static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); + +// Parsed render configuration from the current render state. +struct RenderConfiguration { + // Render mode (color+depth, depth-only, etc). + xenos::ModeControl mode_control; + // Target surface pitch, in pixels. + uint32_t surface_pitch_px; + // ESTIMATED target surface height, in pixels. + uint32_t surface_height_px; + // Surface MSAA setting. + MsaaSamples surface_msaa; + // Color attachments for the 4 render targets. + struct { + uint32_t edram_base; + ColorRenderTargetFormat format; + } color[4]; + // Depth/stencil attachment. + struct { + uint32_t edram_base; + DepthRenderTargetFormat format; + } depth_stencil; +}; + +// Current render state based on the register-specified configuration. +struct RenderState { + // Parsed configuration. + RenderConfiguration config; + // Render pass (to be used with pipelines/etc). + CachedRenderPass* render_pass = nullptr; + // Target framebuffer bound to the render pass. + CachedFramebuffer* framebuffer = nullptr; +}; + // Manages the virtualized EDRAM and the render target cache. // // On the 360 the render target is an opaque block of memory in EDRAM that's @@ -165,9 +217,9 @@ class RenderCache { // Begins a render pass targeting the state-specified framebuffer formats. // The command buffer will be transitioned into the render pass phase. - VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); + const RenderState* BeginRenderPass(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); // Ends the current render pass. // The command buffer will be transitioned out of the render pass phase. @@ -177,9 +229,57 @@ class RenderCache { void ClearCache(); private: + // Parses the current state into a configuration object. + bool ParseConfiguration(RenderConfiguration* config); + + // Gets or creates a render pass and frame buffer for the given configuration. + // This attempts to reuse as much as possible across render passes and + // framebuffers. + bool ConfigureRenderPass(RenderConfiguration* config, + CachedRenderPass** out_render_pass, + CachedFramebuffer** out_framebuffer); + + // Gets or creates a tile view with the given parameters. + CachedTileView* GetTileView(const TileViewKey& view_key); + RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; + // Entire 10MiB of EDRAM, aliased to hell by various VkImages. + VkDeviceMemory edram_memory_ = nullptr; + // Buffer overlayed 1:1 with edram_memory_ to allow raw access. + VkBuffer edram_buffer_ = nullptr; + + // Cache of VkImage and VkImageView's for all of our EDRAM tilings. + // TODO(benvanik): non-linear lookup? Should only be a small number of these. + std::vector cached_tile_views_; + + // Cache of render passes based on formats. + std::vector cached_render_passes_; + + // Shadows of the registers that impact the render pass we choose. + // If the registers don't change between passes we can quickly reuse the + // previous one. + struct ShadowRegisters { + uint32_t rb_modecontrol; + uint32_t rb_surface_info; + uint32_t rb_color_info; + uint32_t rb_color1_info; + uint32_t rb_color2_info; + uint32_t rb_color3_info; + uint32_t rb_depth_info; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + + ShadowRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } shadow_registers_; + bool SetShadowRegister(uint32_t* dest, uint32_t register_name); + + // Configuration used for the current/previous Begin/End, representing the + // current shadow register state. + RenderState current_state_; + // Only valid during a BeginRenderPass/EndRenderPass block. VkCommandBuffer current_command_buffer_ = nullptr; }; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 8047bd202..e19e89c29 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -179,9 +179,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // We need the to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); auto pixel_shader = static_cast(active_pixel_shader()); - if (!vertex_shader || !vertex_shader->is_valid() || !pixel_shader || - !pixel_shader->is_valid()) { - // Skipped because we can't understand the shader. + if (!vertex_shader || !vertex_shader->is_valid()) { + // Always need a vertex shader. + return true; + } + // Depth-only mode doesn't need a pixel shader (we'll use a fake one). + if (enable_mode == ModeControl::kDepth) { + // Use a dummy pixel shader when required. + // TODO(benvanik): dummy pixel shader. + assert_not_null(pixel_shader); + } else if (!pixel_shader || !pixel_shader->is_valid()) { + // Need a pixel shader in normal color mode. return true; } @@ -198,16 +206,16 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // Begin the render pass. // This will setup our framebuffer and begin the pass in the command buffer. - VkRenderPass render_pass = render_cache_->BeginRenderPass( + auto render_state = render_cache_->BeginRenderPass( command_buffer, vertex_shader, pixel_shader); - if (!render_pass) { + if (!render_state) { return false; } // Configure the pipeline for drawing. // This encodes all render state (blend, depth, etc), our shader stages, // and our vertex input layout. - if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass, + if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_state, vertex_shader, pixel_shader, primitive_type)) { render_cache_->EndRenderPass(); @@ -215,12 +223,14 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, } // Upload the constants the shaders require. - auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters( + // These are optional, and if none are defined 0 will be returned. + VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters( vertex_shader->constant_register_map()); - auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters( + VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters( pixel_shader->constant_register_map()); if (vertex_constant_offset == VK_WHOLE_SIZE || pixel_constant_offset == VK_WHOLE_SIZE) { + // Shader wants constants but we couldn't upload them. render_cache_->EndRenderPass(); return false; } @@ -307,11 +317,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, if (queue_mutex_) { queue_mutex_->lock(); } - vkQueueWaitIdle(queue_); + err = vkQueueWaitIdle(queue_); + CheckResult(err, "vkQueueWaitIdle"); + err = vkDeviceWaitIdle(*device_); + CheckResult(err, "vkDeviceWaitIdle"); if (queue_mutex_) { queue_mutex_->unlock(); } - command_buffer_pool_->Scavenge(); + while (command_buffer_pool_->has_pending()) { + command_buffer_pool_->Scavenge(); + xe::threading::MaybeYield(); + } vkDestroyFence(*device_, fence, nullptr); return true; diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h index 97dbd5822..1dc55d8b0 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -31,7 +31,7 @@ class VulkanShader : public Shader { bool Prepare(); private: - VkDevice device_ = nullptr; + VkDevice device_ = nullptr; VkShaderModule shader_module_ = nullptr; }; diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h index 52274a9d9..3bc7e30f6 100644 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -47,6 +47,9 @@ class BaseFencedPool { } } + // True if one or more batches are still pending on the GPU. + bool has_pending() const { return pending_batch_list_head_ != nullptr; } + // Checks all pending batches for completion and scavenges their entries. // This should be called as frequently as reasonable. void Scavenge() { diff --git a/src/xenia/ui/vulkan/vulkan.cc b/src/xenia/ui/vulkan/vulkan.cc index ba889e109..77c8fb548 100644 --- a/src/xenia/ui/vulkan/vulkan.cc +++ b/src/xenia/ui/vulkan/vulkan.cc @@ -10,3 +10,7 @@ #include "xenia/ui/vulkan/vulkan.h" DEFINE_bool(vulkan_validation, false, "Enable Vulkan validation layers."); + +DEFINE_bool(vulkan_primary_queue_only, false, + "Force the use of the primary queue, ignoring any additional that " + "may be present."); diff --git a/src/xenia/ui/vulkan/vulkan.h b/src/xenia/ui/vulkan/vulkan.h index 697c9fa57..7a7e64f10 100644 --- a/src/xenia/ui/vulkan/vulkan.h +++ b/src/xenia/ui/vulkan/vulkan.h @@ -30,5 +30,6 @@ #define XELOGVK XELOGI DECLARE_bool(vulkan_validation); +DECLARE_bool(vulkan_primary_queue_only); #endif // XENIA_UI_VULKAN_VULKAN_H_ diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index a2c5998f4..381fb0ab7 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -142,7 +142,7 @@ void VulkanContext::EndSwap() { } std::unique_ptr VulkanContext::Capture() { - assert_always(); + // TODO(benvanik): read back swap chain front buffer. return nullptr; } diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc index c7ca1d974..ded29212d 100644 --- a/src/xenia/ui/vulkan/vulkan_device.cc +++ b/src/xenia/ui/vulkan/vulkan_device.cc @@ -129,6 +129,11 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) { return false; } + // Some tools *cough* renderdoc *cough* can't handle multiple queues. + if (FLAGS_vulkan_primary_queue_only) { + queue_count = 1; + } + VkDeviceQueueCreateInfo queue_info; queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_info.pNext = nullptr; From 990b600f532f3ff99efbf3ad4f22a8a9521d5e7d Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 19 Feb 2016 21:08:50 -0800 Subject: [PATCH 018/145] Buffer uploads (without reclaiming, yet). --- src/xenia/gpu/vulkan/buffer_cache.cc | 92 +++++++++++++++++++++++++--- src/xenia/gpu/vulkan/buffer_cache.h | 12 +++- 2 files changed, 94 insertions(+), 10 deletions(-) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 4cace24ba..5b9f8a182 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -22,6 +22,9 @@ namespace vulkan { using xe::ui::vulkan::CheckResult; +// Space kept between tail and head when wrapping. +constexpr VkDeviceSize kDeadZone = 4 * 1024; + BufferCache::BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, size_t capacity) : register_file_(register_file), @@ -187,6 +190,42 @@ BufferCache::~BufferCache() { VkDeviceSize BufferCache::UploadConstantRegisters( const Shader::ConstantRegisterMap& constant_register_map) { + // Fat struct, including all registers: + // struct { + // vec4 float[512]; + // uint bool[8]; + // uint loop[32]; + // }; + size_t total_size = xe::round_up( + static_cast((512 * 4 * 4) + (32 * 4) + (8 * 4)), + uniform_buffer_alignment_); + auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size); + if (offset == VK_WHOLE_SIZE) { + // OOM. + return VK_WHOLE_SIZE; + } + + // Copy over all the registers. + const auto& values = register_file_->values; + uint8_t* dest_ptr = + reinterpret_cast(transient_buffer_data_) + offset; + std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, + (512 * 4 * 4)); + dest_ptr += 512 * 4 * 4; + std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + 8 * 4); + dest_ptr += 8 * 4; + std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32, + 32 * 4); + dest_ptr += 32 * 4; + + return offset; + +// Packed upload code. +// This is not currently supported by the shaders, but would be awesome. +// We should be able to use this for any shader that does not do dynamic +// constant indexing. +#if 0 // Allocate space in the buffer for our data. auto offset = AllocateTransientData(uniform_buffer_alignment_, constant_register_map.packed_byte_length); @@ -230,6 +269,7 @@ VkDeviceSize BufferCache::UploadConstantRegisters( } return offset; +#endif // 0 } std::pair BufferCache::UploadIndexBuffer( @@ -282,17 +322,53 @@ std::pair BufferCache::UploadVertexBuffer( return {transient_vertex_buffer_, offset}; } -VkDeviceSize BufferCache::AllocateTransientData(size_t alignment, - size_t length) { - // Try to add to end, wrapping if required. - - // Check to ensure there is space. - if (false) { - // Consume all fences. +VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length) { + // Try fast path (if we have space). + VkDeviceSize offset = TryAllocateTransientData(alignment, length); + if (offset != VK_WHOLE_SIZE) { + return offset; } - // Slice off our bit. + // Ran out of easy allocations. + // Try consuming fences before we panic. + assert_always("Reclamation not yet implemented"); + // Try again. It may still fail if we didn't get enough space back. + return TryAllocateTransientData(alignment, length); +} + +VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length) { + if (transient_tail_offset_ >= transient_head_offset_) { + // Tail follows head, so things are easy: + // | H----T | + if (transient_tail_offset_ + length <= transient_capacity_) { + // Allocation fits from tail to end of buffer, so grow. + // | H----**T | + VkDeviceSize offset = transient_tail_offset_; + transient_tail_offset_ += length; + return offset; + } else if (length + kDeadZone <= transient_head_offset_) { + // Can't fit at the end, but can fit if we wrap around. + // |**T H----....| + VkDeviceSize offset = 0; + transient_tail_offset_ = length; + return offset; + } + } else { + // Head follows tail, so we're reversed: + // |----T H---| + if (transient_tail_offset_ + length + kDeadZone <= transient_head_offset_) { + // Fits from tail to head. + // |----***T H---| + VkDeviceSize offset = transient_tail_offset_; + transient_tail_offset_ += length; + return offset; + } + } + + // No more space. return VK_WHOLE_SIZE; } diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index 661e30aa7..af42f23d8 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -78,8 +78,14 @@ class BufferCache { private: // Allocates a block of memory in the transient buffer. + // When memory is not available fences are checked and space is reclaimed. // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize AllocateTransientData(size_t alignment, size_t length); + VkDeviceSize AllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length); + // Tries to allocate a block of memory in the transient buffer. + // Returns VK_WHOLE_SIZE if requested amount of memory is not available. + VkDeviceSize TryAllocateTransientData(VkDeviceSize alignment, + VkDeviceSize length); RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; @@ -92,8 +98,10 @@ class BufferCache { VkBuffer transient_vertex_buffer_ = nullptr; VkDeviceMemory transient_buffer_memory_ = nullptr; void* transient_buffer_data_ = nullptr; + VkDeviceSize transient_head_offset_ = 0; + VkDeviceSize transient_tail_offset_ = 0; - // Required alignemnts for our various types. + // Required alignments for our various types. // All allocations must start at the appropriate alignment. VkDeviceSize uniform_buffer_alignment_ = 0; VkDeviceSize index_buffer_alignment_ = 0; From 97174dbe4d69ad1ecfa1b270219868a10caee86e Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 19 Feb 2016 23:23:58 -0800 Subject: [PATCH 019/145] Skeleton leaky hacky hardcoded pipeline setup. --- src/xenia/gpu/vulkan/buffer_cache.h | 3 + src/xenia/gpu/vulkan/pipeline_cache.cc | 379 +++++++++++++++++- src/xenia/gpu/vulkan/pipeline_cache.h | 26 +- src/xenia/gpu/vulkan/render_cache.cc | 2 + src/xenia/gpu/vulkan/render_cache.h | 2 + src/xenia/gpu/vulkan/texture_cache.cc | 50 ++- src/xenia/gpu/vulkan/texture_cache.h | 9 + .../gpu/vulkan/vulkan_command_processor.cc | 57 +-- .../gpu/vulkan/vulkan_command_processor.h | 3 + 9 files changed, 495 insertions(+), 36 deletions(-) diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index af42f23d8..9a264a80b 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -38,6 +38,9 @@ class BufferCache { VkDescriptorSet constant_descriptor_set() const { return transient_descriptor_set_; } + VkDescriptorSetLayout constant_descriptor_set_layout() const { + return descriptor_set_layout_; + } // Uploads the constants specified in the register maps to the transient // uniform storage buffer. diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 5416aea7f..aca0d72b5 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -23,11 +23,64 @@ namespace vulkan { using xe::ui::vulkan::CheckResult; -PipelineCache::PipelineCache(RegisterFile* register_file, - ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(*device) {} +PipelineCache::PipelineCache( + RegisterFile* register_file, ui::vulkan::VulkanDevice* device, + VkDescriptorSetLayout uniform_descriptor_set_layout, + VkDescriptorSetLayout texture_descriptor_set_layout) + : register_file_(register_file), device_(*device) { + // Initialize the shared driver pipeline cache. + // We'll likely want to serialize this and reuse it, if that proves to be + // useful. If the shaders are expensive and this helps we could do it per + // game, otherwise a single shared cache for render state/etc. + VkPipelineCacheCreateInfo pipeline_cache_info; + pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + pipeline_cache_info.pNext = nullptr; + pipeline_cache_info.flags = 0; + pipeline_cache_info.initialDataSize = 0; + pipeline_cache_info.pInitialData = nullptr; + auto err = vkCreatePipelineCache(device_, &pipeline_cache_info, nullptr, + &pipeline_cache_); + CheckResult(err, "vkCreatePipelineCache"); + + // Descriptors used by the pipelines. + // These are the only ones we can ever bind. + VkDescriptorSetLayout set_layouts[] = { + // Per-draw constant register uniforms. + uniform_descriptor_set_layout, + // All texture bindings. + texture_descriptor_set_layout, + }; + + // Push constants used for draw parameters. + // We need to keep these under 128b across all stages. + VkPushConstantRange push_constant_ranges[2]; + push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + push_constant_ranges[0].offset = 0; + push_constant_ranges[0].size = sizeof(float) * 16; + push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + push_constant_ranges[1].offset = sizeof(float) * 16; + push_constant_ranges[1].size = sizeof(int); + + // Shared pipeline layout. + VkPipelineLayoutCreateInfo pipeline_layout_info; + pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_info.pNext = nullptr; + pipeline_layout_info.flags = 0; + pipeline_layout_info.setLayoutCount = + static_cast(xe::countof(set_layouts)); + pipeline_layout_info.pSetLayouts = set_layouts; + pipeline_layout_info.pushConstantRangeCount = + static_cast(xe::countof(push_constant_ranges)); + pipeline_layout_info.pPushConstantRanges = push_constant_ranges; + err = vkCreatePipelineLayout(*device, &pipeline_layout_info, nullptr, + &pipeline_layout_); + CheckResult(err, "vkCreatePipelineLayout"); +} PipelineCache::~PipelineCache() { + vkDestroyPipelineLayout(device_, pipeline_layout_, nullptr); + vkDestroyPipelineCache(device_, pipeline_cache_, nullptr); + // Destroy all shaders. for (auto it : shader_map_) { delete it.second; @@ -88,13 +141,331 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type) { - return false; + // Uh, yeah. This happened. + + VkPipelineShaderStageCreateInfo pipeline_stages[3]; + uint32_t pipeline_stage_count = 0; + auto& vertex_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + vertex_pipeline_stage.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + vertex_pipeline_stage.pNext = nullptr; + vertex_pipeline_stage.flags = 0; + vertex_pipeline_stage.stage = VK_SHADER_STAGE_VERTEX_BIT; + vertex_pipeline_stage.module = vertex_shader->shader_module(); + vertex_pipeline_stage.pName = "main"; + vertex_pipeline_stage.pSpecializationInfo = nullptr; + auto geometry_shader = GetGeometryShader(primitive_type); + if (geometry_shader) { + auto& geometry_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + geometry_pipeline_stage.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + geometry_pipeline_stage.pNext = nullptr; + geometry_pipeline_stage.flags = 0; + geometry_pipeline_stage.stage = VK_SHADER_STAGE_GEOMETRY_BIT; + geometry_pipeline_stage.module = geometry_shader; + geometry_pipeline_stage.pName = "main"; + geometry_pipeline_stage.pSpecializationInfo = nullptr; + } + auto& pixel_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + pixel_pipeline_stage.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pixel_pipeline_stage.pNext = nullptr; + pixel_pipeline_stage.flags = 0; + pixel_pipeline_stage.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + pixel_pipeline_stage.module = pixel_shader->shader_module(); + pixel_pipeline_stage.pName = "main"; + pixel_pipeline_stage.pSpecializationInfo = nullptr; + + VkPipelineVertexInputStateCreateInfo vertex_state_info; + vertex_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_state_info.pNext = nullptr; + VkVertexInputBindingDescription vertex_binding_descrs[64]; + uint32_t vertex_binding_count = 0; + VkVertexInputAttributeDescription vertex_attrib_descrs[64]; + uint32_t vertex_attrib_count = 0; + for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { + assert_true(vertex_binding_count < xe::countof(vertex_binding_descrs)); + auto& vertex_binding_descr = vertex_binding_descrs[vertex_binding_count++]; + vertex_binding_descr.binding = vertex_binding.binding_index; + vertex_binding_descr.stride = vertex_binding.stride_words * 4; + vertex_binding_descr.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + + for (const auto& attrib : vertex_binding.attributes) { + assert_true(vertex_attrib_count < xe::countof(vertex_attrib_descrs)); + auto& vertex_attrib_descr = vertex_attrib_descrs[vertex_attrib_count++]; + vertex_attrib_descr.location = attrib.attrib_index; + vertex_attrib_descr.binding = vertex_binding.binding_index; + vertex_attrib_descr.format = VK_FORMAT_UNDEFINED; + vertex_attrib_descr.offset = attrib.fetch_instr.attributes.offset * 4; + + bool is_signed = attrib.fetch_instr.attributes.is_signed; + bool is_integer = attrib.fetch_instr.attributes.is_integer; + switch (attrib.fetch_instr.attributes.data_format) { + case VertexFormat::k_8_8_8_8: + vertex_attrib_descr.format = + is_signed ? VK_FORMAT_R8G8B8A8_SNORM : VK_FORMAT_R8G8B8A8_UNORM; + break; + case VertexFormat::k_2_10_10_10: + vertex_attrib_descr.format = is_signed + ? VK_FORMAT_A2R10G10B10_SNORM_PACK32 + : VK_FORMAT_A2R10G10B10_UNORM_PACK32; + break; + case VertexFormat::k_10_11_11: + assert_always("unsupported?"); + vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; + break; + case VertexFormat::k_11_11_10: + assert_true(is_signed); + vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; + break; + case VertexFormat::k_16_16: + vertex_attrib_descr.format = + is_signed ? VK_FORMAT_R16G16_SNORM : VK_FORMAT_R16G16_UNORM; + break; + case VertexFormat::k_16_16_FLOAT: + vertex_attrib_descr.format = + is_signed ? VK_FORMAT_R16G16_SSCALED : VK_FORMAT_R16G16_USCALED; + break; + case VertexFormat::k_16_16_16_16: + vertex_attrib_descr.format = is_signed ? VK_FORMAT_R16G16B16A16_SNORM + : VK_FORMAT_R16G16B16A16_UNORM; + break; + case VertexFormat::k_16_16_16_16_FLOAT: + vertex_attrib_descr.format = is_signed + ? VK_FORMAT_R16G16B16A16_SSCALED + : VK_FORMAT_R16G16B16A16_USCALED; + break; + case VertexFormat::k_32: + vertex_attrib_descr.format = + is_signed ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT; + break; + case VertexFormat::k_32_32: + vertex_attrib_descr.format = + is_signed ? VK_FORMAT_R32G32_SINT : VK_FORMAT_R32G32_UINT; + break; + case VertexFormat::k_32_32_32_32: + vertex_attrib_descr.format = + is_signed ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32_UINT; + break; + case VertexFormat::k_32_FLOAT: + assert_true(is_signed); + vertex_attrib_descr.format = VK_FORMAT_R32_SFLOAT; + break; + case VertexFormat::k_32_32_FLOAT: + assert_true(is_signed); + vertex_attrib_descr.format = VK_FORMAT_R32G32_SFLOAT; + break; + case VertexFormat::k_32_32_32_FLOAT: + assert_true(is_signed); + vertex_attrib_descr.format = VK_FORMAT_R32G32B32_SFLOAT; + break; + case VertexFormat::k_32_32_32_32_FLOAT: + assert_true(is_signed); + vertex_attrib_descr.format = VK_FORMAT_R32G32B32A32_SFLOAT; + break; + default: + assert_unhandled_case(attrib.fetch_instr.attributes.data_format); + break; + } + } + } + vertex_state_info.vertexBindingDescriptionCount = vertex_binding_count; + vertex_state_info.pVertexBindingDescriptions = vertex_binding_descrs; + vertex_state_info.vertexAttributeDescriptionCount = vertex_attrib_count; + vertex_state_info.pVertexAttributeDescriptions = vertex_attrib_descrs; + + VkPipelineInputAssemblyStateCreateInfo input_info; + input_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_info.pNext = nullptr; + input_info.flags = 0; + input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + input_info.primitiveRestartEnable = VK_FALSE; + + VkPipelineViewportStateCreateInfo viewport_state_info; + viewport_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state_info.pNext = nullptr; + viewport_state_info.flags = 0; + VkViewport viewport; + viewport.x = 0; + viewport.y = 0; + viewport.width = 100; + viewport.height = 100; + viewport.minDepth = 0; + viewport.maxDepth = 1; + viewport_state_info.viewportCount = 1; + viewport_state_info.pViewports = &viewport; + VkRect2D scissor; + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = 100; + scissor.extent.height = 100; + viewport_state_info.scissorCount = 1; + viewport_state_info.pScissors = &scissor; + + VkPipelineRasterizationStateCreateInfo rasterization_info; + rasterization_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_info.pNext = nullptr; + rasterization_info.flags = 0; + rasterization_info.depthClampEnable = VK_FALSE; + rasterization_info.rasterizerDiscardEnable = VK_FALSE; + rasterization_info.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_info.cullMode = VK_CULL_MODE_BACK_BIT; + rasterization_info.frontFace = VK_FRONT_FACE_CLOCKWISE; + rasterization_info.depthBiasEnable = VK_FALSE; + rasterization_info.depthBiasConstantFactor = 0; + rasterization_info.depthBiasClamp = 0; + rasterization_info.depthBiasSlopeFactor = 0; + rasterization_info.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisample_info; + multisample_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_info.pNext = nullptr; + multisample_info.flags = 0; + multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisample_info.sampleShadingEnable = VK_FALSE; + multisample_info.minSampleShading = 0; + multisample_info.pSampleMask = nullptr; + multisample_info.alphaToCoverageEnable = VK_FALSE; + multisample_info.alphaToOneEnable = VK_FALSE; + + VkPipelineDepthStencilStateCreateInfo depth_stencil_info; + depth_stencil_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depth_stencil_info.pNext = nullptr; + depth_stencil_info.flags = 0; + depth_stencil_info.depthTestEnable = VK_FALSE; + depth_stencil_info.depthWriteEnable = VK_FALSE; + depth_stencil_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; + depth_stencil_info.depthBoundsTestEnable = VK_FALSE; + depth_stencil_info.stencilTestEnable = VK_FALSE; + depth_stencil_info.front.failOp = VK_STENCIL_OP_KEEP; + depth_stencil_info.front.passOp = VK_STENCIL_OP_KEEP; + depth_stencil_info.front.depthFailOp = VK_STENCIL_OP_KEEP; + depth_stencil_info.front.compareOp = VK_COMPARE_OP_ALWAYS; + depth_stencil_info.front.compareMask = 0; + depth_stencil_info.front.writeMask = 0; + depth_stencil_info.front.reference = 0; + depth_stencil_info.back.failOp = VK_STENCIL_OP_KEEP; + depth_stencil_info.back.passOp = VK_STENCIL_OP_KEEP; + depth_stencil_info.back.depthFailOp = VK_STENCIL_OP_KEEP; + depth_stencil_info.back.compareOp = VK_COMPARE_OP_ALWAYS; + depth_stencil_info.back.compareMask = 0; + depth_stencil_info.back.writeMask = 0; + depth_stencil_info.back.reference = 0; + depth_stencil_info.minDepthBounds = 0; + depth_stencil_info.maxDepthBounds = 0; + + VkPipelineColorBlendStateCreateInfo blend_info; + blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + blend_info.pNext = nullptr; + blend_info.flags = 0; + blend_info.logicOpEnable = VK_FALSE; + blend_info.logicOp = VK_LOGIC_OP_NO_OP; + + VkPipelineColorBlendAttachmentState blend_attachments[1]; + blend_attachments[0].blendEnable = VK_TRUE; + blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + blend_attachments[0].dstColorBlendFactor = + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD; + blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + blend_attachments[0].dstAlphaBlendFactor = + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD; + blend_attachments[0].colorWriteMask = 0xF; + blend_info.attachmentCount = + static_cast(xe::countof(blend_attachments)); + blend_info.pAttachments = blend_attachments; + std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants)); + + VkPipelineDynamicStateCreateInfo dynamic_state_info; + dynamic_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_info.pNext = nullptr; + dynamic_state_info.flags = 0; + // VkDynamicState dynamic_states[] = { + // VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + //}; + // dynamic_state_info.dynamicStateCount = + // static_cast(xe::countof(dynamic_states)); + // dynamic_state_info.pDynamicStates = dynamic_states; + dynamic_state_info.dynamicStateCount = 0; + dynamic_state_info.pDynamicStates = nullptr; + + VkGraphicsPipelineCreateInfo pipeline_info; + pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_info.pNext = nullptr; + pipeline_info.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + pipeline_info.stageCount = pipeline_stage_count; + pipeline_info.pStages = pipeline_stages; + pipeline_info.pVertexInputState = &vertex_state_info; + pipeline_info.pInputAssemblyState = &input_info; + pipeline_info.pTessellationState = nullptr; + pipeline_info.pViewportState = &viewport_state_info; + pipeline_info.pRasterizationState = &rasterization_info; + pipeline_info.pMultisampleState = &multisample_info; + pipeline_info.pDepthStencilState = &depth_stencil_info; + pipeline_info.pColorBlendState = &blend_info; + pipeline_info.pDynamicState = &dynamic_state_info; + pipeline_info.layout = pipeline_layout_; + pipeline_info.renderPass = render_state->render_pass_handle; + pipeline_info.subpass = 0; + pipeline_info.basePipelineHandle = nullptr; + pipeline_info.basePipelineIndex = 0; + + VkPipeline pipeline = nullptr; + auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, + nullptr, &pipeline); + CheckResult(err, "vkCreateGraphicsPipelines"); + + // TODO(benvanik): don't leak pipelines >_> + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + return true; } void PipelineCache::ClearCache() { // TODO(benvanik): caching. } +VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type) { + switch (primitive_type) { + case PrimitiveType::kLineList: + case PrimitiveType::kLineStrip: + case PrimitiveType::kTriangleList: + case PrimitiveType::kTriangleFan: + case PrimitiveType::kTriangleStrip: + // Supported directly - no need to emulate. + return nullptr; + case PrimitiveType::kPointList: + // TODO(benvanik): point list geometry shader. + return nullptr; + case PrimitiveType::kUnknown0x07: + assert_always("Unknown geometry type"); + return nullptr; + case PrimitiveType::kRectangleList: + // TODO(benvanik): rectangle list geometry shader. + return nullptr; + case PrimitiveType::kLineLoop: + // TODO(benvanik): line loop geometry shader. + return nullptr; + case PrimitiveType::kQuadList: + // TODO(benvanik): quad list geometry shader. + return nullptr; + case PrimitiveType::kQuadStrip: + // TODO(benvanik): quad strip geometry shader. + return nullptr; + default: + assert_unhandled_case(primitive_type); + return nullptr; + } +} + bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { uint32_t value = register_file_->values[register_name].u32; if (*dest == value) { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 74c461504..aad43ca80 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -30,7 +30,9 @@ namespace vulkan { // including shaders, various blend/etc options, and input configuration. class PipelineCache { public: - PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, + VkDescriptorSetLayout uniform_descriptor_set_layout, + VkDescriptorSetLayout texture_descriptor_set_layout); ~PipelineCache(); // Loads a shader from the cache, possibly translating it. @@ -48,26 +50,36 @@ class PipelineCache { VulkanShader* pixel_shader, PrimitiveType primitive_type); - // Currently configured pipeline layout, if any. - VkPipelineLayout current_pipeline_layout() const { return nullptr; } + // Pipeline layout shared by all pipelines. + VkPipelineLayout pipeline_layout() const { return pipeline_layout_; } // Clears all cached content. void ClearCache(); private: - // TODO(benvanik): geometry shader cache. - // TODO(benvanik): translated shader cache. - // TODO(benvanik): pipeline layouts. - // TODO(benvanik): pipeline cache. + // Gets a geometry shader used to emulate the given primitive type. + // Returns nullptr if the primitive doesn't need to be emulated. + VkShaderModule GetGeometryShader(PrimitiveType primitive_type); RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; + // Reusable shader translator. SpirvShaderTranslator shader_translator_; + // Disassembler used to get the SPIRV disasm. Only used in debug. xe::ui::spirv::SpirvDisassembler disassembler_; // All loaded shaders mapped by their guest hash key. std::unordered_map shader_map_; + // Vulkan pipeline cache, which in theory helps us out. + // This can be serialized to disk and reused, if we want. + VkPipelineCache pipeline_cache_ = nullptr; + // Layout used for all pipelines describing our uniforms, textures, and push + // constants. + VkPipelineLayout pipeline_layout_ = nullptr; + + // TODO(benvanik): geometry shader cache. + private: enum class UpdateStatus { kCompatible, diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index b477d5633..a9595741f 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -542,7 +542,9 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, return nullptr; } current_state_.render_pass = render_pass; + current_state_.render_pass_handle = render_pass->handle; current_state_.framebuffer = framebuffer; + current_state_.framebuffer_handle = framebuffer->handle; } if (!render_pass) { return nullptr; diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 865b34cfd..4a1574e9b 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -69,8 +69,10 @@ struct RenderState { RenderConfiguration config; // Render pass (to be used with pipelines/etc). CachedRenderPass* render_pass = nullptr; + VkRenderPass render_pass_handle = nullptr; // Target framebuffer bound to the render pass. CachedFramebuffer* framebuffer = nullptr; + VkFramebuffer framebuffer_handle = nullptr; }; // Manages the virtualized EDRAM and the render target cache. diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index bf95ef6a4..ea051ca52 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -22,11 +22,57 @@ namespace vulkan { using xe::ui::vulkan::CheckResult; +constexpr uint32_t kMaxTextureSamplers = 32; + TextureCache::TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(*device) {} + : register_file_(register_file), device_(*device) { + // Descriptor pool used for all of our cached descriptors. + VkDescriptorPoolCreateInfo descriptor_pool_info; + descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_info.pNext = nullptr; + descriptor_pool_info.flags = + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + descriptor_pool_info.maxSets = 256; + VkDescriptorPoolSize pool_sizes[1]; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + pool_sizes[0].descriptorCount = 256; + descriptor_pool_info.poolSizeCount = 1; + descriptor_pool_info.pPoolSizes = pool_sizes; + auto err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, + &descriptor_pool_); + CheckResult(err, "vkCreateDescriptorPool"); -TextureCache::~TextureCache() = default; + // Create the descriptor set layout used for rendering. + // We always have the same number of samplers but only some are used. + VkDescriptorSetLayoutBinding texture_bindings[1]; + for (int i = 0; i < 1; ++i) { + auto& texture_binding = texture_bindings[i]; + texture_binding.binding = 0; + texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + texture_binding.descriptorCount = kMaxTextureSamplers; + texture_binding.stageFlags = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + texture_binding.pImmutableSamplers = nullptr; + } + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info; + descriptor_set_layout_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_info.pNext = nullptr; + descriptor_set_layout_info.flags = 0; + descriptor_set_layout_info.bindingCount = + static_cast(xe::countof(texture_bindings)); + descriptor_set_layout_info.pBindings = texture_bindings; + err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info, + nullptr, &texture_descriptor_set_layout_); + CheckResult(err, "vkCreateDescriptorSetLayout"); +} + +TextureCache::~TextureCache() { + vkDestroyDescriptorSetLayout(device_, texture_descriptor_set_layout_, + nullptr); + vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); +} void TextureCache::ClearCache() { // TODO(benvanik): caching. diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index 3f18a7be1..34ae6f114 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -26,6 +26,12 @@ class TextureCache { TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); ~TextureCache(); + // Descriptor set layout containing all possible texture bindings. + // The set contains one descriptor for each texture sampler [0-31]. + VkDescriptorSetLayout texture_descriptor_set_layout() const { + return texture_descriptor_set_layout_; + } + // TODO(benvanik): UploadTexture. // TODO(benvanik): Resolve. // TODO(benvanik): ReadTexture. @@ -36,6 +42,9 @@ class TextureCache { private: RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; + + VkDescriptorPool descriptor_pool_ = nullptr; + VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index e19e89c29..31460be79 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -69,9 +69,11 @@ bool VulkanCommandProcessor::SetupContext() { // Initialize the state machine caches. buffer_cache_ = std::make_unique(register_file_, device_, kDefaultBufferCacheCapacity); - pipeline_cache_ = std::make_unique(register_file_, device_); - render_cache_ = std::make_unique(register_file_, device_); texture_cache_ = std::make_unique(register_file_, device_); + pipeline_cache_ = std::make_unique( + register_file_, device_, buffer_cache_->constant_descriptor_set_layout(), + texture_cache_->texture_descriptor_set_layout()); + render_cache_ = std::make_unique(register_file_, device_); return true; } @@ -222,29 +224,12 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return false; } - // Upload the constants the shaders require. - // These are optional, and if none are defined 0 will be returned. - VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters( - vertex_shader->constant_register_map()); - VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters( - pixel_shader->constant_register_map()); - if (vertex_constant_offset == VK_WHOLE_SIZE || - pixel_constant_offset == VK_WHOLE_SIZE) { - // Shader wants constants but we couldn't upload them. + // Pass registers to the shaders. + if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { render_cache_->EndRenderPass(); return false; } - // Configure constant uniform access to point at our offsets. - auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); - auto pipeline_layout = pipeline_cache_->current_pipeline_layout(); - uint32_t constant_offsets[2] = {static_cast(vertex_constant_offset), - static_cast(pixel_constant_offset)}; - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout, 0, 1, &constant_descriptor_set, - static_cast(xe::countof(constant_offsets)), - constant_offsets); - // Upload and bind index buffer data (if we have any). if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { render_cache_->EndRenderPass(); @@ -263,7 +248,6 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return false; } -#if 0 // Actually issue the draw. if (!index_buffer_info) { // Auto-indexed draw. @@ -282,7 +266,6 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index, vertex_offset, first_instance); } -#endif // End the rendering pass. render_cache_->EndRenderPass(); @@ -333,6 +316,34 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } +bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { + // Upload the constants the shaders require. + // These are optional, and if none are defined 0 will be returned. + VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters( + vertex_shader->constant_register_map()); + VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters( + pixel_shader->constant_register_map()); + if (vertex_constant_offset == VK_WHOLE_SIZE || + pixel_constant_offset == VK_WHOLE_SIZE) { + // Shader wants constants but we couldn't upload them. + return false; + } + + // Configure constant uniform access to point at our offsets. + auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); + auto pipeline_layout = pipeline_cache_->pipeline_layout(); + uint32_t constant_offsets[2] = {static_cast(vertex_constant_offset), + static_cast(pixel_constant_offset)}; + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout, 0, 1, &constant_descriptor_set, + static_cast(xe::countof(constant_offsets)), + constant_offsets); + + return true; +} + bool VulkanCommandProcessor::PopulateIndexBuffer( VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) { auto& regs = *register_file_; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index c350f77b0..179c31a73 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -66,6 +66,9 @@ class VulkanCommandProcessor : public CommandProcessor { bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; + bool PopulateConstants(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); bool PopulateIndexBuffer(VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info); bool PopulateVertexBuffers(VkCommandBuffer command_buffer, From 731ff527731a6699c2822f45f805bd4811b93b00 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 20 Feb 2016 12:59:18 -0800 Subject: [PATCH 020/145] Implementing a lot of the pipeline state setting. --- src/xenia/gpu/vulkan/buffer_cache.cc | 9 +- src/xenia/gpu/vulkan/buffer_cache.h | 6 +- src/xenia/gpu/vulkan/pipeline_cache.cc | 1089 +++++++++++------ src/xenia/gpu/vulkan/pipeline_cache.h | 134 +- .../gpu/vulkan/vulkan_command_processor.cc | 23 +- 5 files changed, 850 insertions(+), 411 deletions(-) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 5b9f8a182..32c2cef4b 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -188,8 +188,9 @@ BufferCache::~BufferCache() { vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr); } -VkDeviceSize BufferCache::UploadConstantRegisters( - const Shader::ConstantRegisterMap& constant_register_map) { +std::pair BufferCache::UploadConstantRegisters( + const Shader::ConstantRegisterMap& vertex_constant_register_map, + const Shader::ConstantRegisterMap& pixel_constant_register_map) { // Fat struct, including all registers: // struct { // vec4 float[512]; @@ -202,7 +203,7 @@ VkDeviceSize BufferCache::UploadConstantRegisters( auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size); if (offset == VK_WHOLE_SIZE) { // OOM. - return VK_WHOLE_SIZE; + return {VK_WHOLE_SIZE, VK_WHOLE_SIZE}; } // Copy over all the registers. @@ -219,7 +220,7 @@ VkDeviceSize BufferCache::UploadConstantRegisters( 32 * 4); dest_ptr += 32 * 4; - return offset; + return {offset, offset}; // Packed upload code. // This is not currently supported by the shaders, but would be awesome. diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index 9a264a80b..1c7330e52 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -47,8 +47,10 @@ class BufferCache { // The registers are tightly packed in order as [floats, ints, bools]. // Returns an offset that can be used with the transient_descriptor_set or // VK_WHOLE_SIZE if the constants could not be uploaded (OOM). - VkDeviceSize UploadConstantRegisters( - const Shader::ConstantRegisterMap& constant_register_map); + // The returned offsets may alias. + std::pair UploadConstantRegisters( + const Shader::ConstantRegisterMap& vertex_constant_register_map, + const Shader::ConstantRegisterMap& pixel_constant_register_map); // Uploads index buffer data from guest memory, possibly eliding with // recently uploaded data or cached copies. diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index aca0d72b5..63bad5164 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -141,11 +141,435 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type) { - // Uh, yeah. This happened. + // Perform a pass over all registers and state updating our cached structures. + // This will tell us if anything has changed that requires us to either build + // a new pipeline or use an existing one. + VkPipeline pipeline = nullptr; + auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type); + switch (update_status) { + case UpdateStatus::kCompatible: + // Requested pipeline is compatible with our previous one, so use that. + // Note that there still may be dynamic state that needs updating. + pipeline = current_pipeline_; + break; + case UpdateStatus::kMismatch: + // Pipeline state has changed. We need to either create a new one or find + // an old one that matches. + current_pipeline_ = nullptr; + break; + case UpdateStatus::kError: + // Error updating state - bail out. + // We are in an indeterminate state, so reset things for the next attempt. + current_pipeline_ = nullptr; + return false; + } + if (!pipeline) { + pipeline = GetPipeline(render_state); + current_pipeline_ = pipeline; + if (!pipeline) { + // Unable to create pipeline. + return false; + } + } - VkPipelineShaderStageCreateInfo pipeline_stages[3]; - uint32_t pipeline_stage_count = 0; - auto& vertex_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + // Bind the pipeline. + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + // Issue all changed dynamic state information commands. + // TODO(benvanik): dynamic state is kept in the command buffer, so if we + // have issued it before (regardless of pipeline) we don't need to do it now. + // TODO(benvanik): track whether we have issued on the given command buffer. + bool full_dynamic_state = true; + if (!SetDynamicState(command_buffer, full_dynamic_state)) { + // Failed to update state. + return false; + } + + return true; +} + +void PipelineCache::ClearCache() { + // TODO(benvanik): caching. +} + +VkPipeline PipelineCache::GetPipeline(const RenderState* render_state) { + VkPipelineDynamicStateCreateInfo dynamic_state_info; + dynamic_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_info.pNext = nullptr; + dynamic_state_info.flags = 0; + VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + dynamic_state_info.dynamicStateCount = + static_cast(xe::countof(dynamic_states)); + dynamic_state_info.pDynamicStates = dynamic_states; + + VkGraphicsPipelineCreateInfo pipeline_info; + pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_info.pNext = nullptr; + pipeline_info.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + pipeline_info.stageCount = update_shader_stages_stage_count_; + pipeline_info.pStages = update_shader_stages_info_; + pipeline_info.pVertexInputState = &update_vertex_input_state_info_; + pipeline_info.pInputAssemblyState = &update_input_assembly_state_info_; + pipeline_info.pTessellationState = nullptr; + pipeline_info.pViewportState = &update_viewport_state_info_; + pipeline_info.pRasterizationState = &update_rasterization_state_info_; + pipeline_info.pMultisampleState = &update_multisample_state_info_; + pipeline_info.pDepthStencilState = &update_depth_stencil_state_info_; + pipeline_info.pColorBlendState = &update_color_blend_state_info_; + pipeline_info.pDynamicState = &dynamic_state_info; + pipeline_info.layout = pipeline_layout_; + pipeline_info.renderPass = render_state->render_pass_handle; + pipeline_info.subpass = 0; + pipeline_info.basePipelineHandle = nullptr; + pipeline_info.basePipelineIndex = 0; + + VkPipeline pipeline = nullptr; + auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, + nullptr, &pipeline); + CheckResult(err, "vkCreateGraphicsPipelines"); + + // TODO(benvanik): don't leak. + + return pipeline; +} + +VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type) { + switch (primitive_type) { + case PrimitiveType::kLineList: + case PrimitiveType::kLineStrip: + case PrimitiveType::kTriangleList: + case PrimitiveType::kTriangleFan: + case PrimitiveType::kTriangleStrip: + // Supported directly - no need to emulate. + return nullptr; + case PrimitiveType::kPointList: + // TODO(benvanik): point list geometry shader. + return nullptr; + case PrimitiveType::kUnknown0x07: + assert_always("Unknown geometry type"); + return nullptr; + case PrimitiveType::kRectangleList: + // TODO(benvanik): rectangle list geometry shader. + return nullptr; + case PrimitiveType::kLineLoop: + // TODO(benvanik): line loop geometry shader. + return nullptr; + case PrimitiveType::kQuadList: + // TODO(benvanik): quad list geometry shader. + return nullptr; + case PrimitiveType::kQuadStrip: + // TODO(benvanik): quad strip geometry shader. + return nullptr; + default: + assert_unhandled_case(primitive_type); + return nullptr; + } +} + +bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, + bool full_update) { + auto& regs = set_dynamic_state_registers_; + + bool window_offset_dirty = SetShadowRegister(®s.pa_sc_window_offset, + XE_GPU_REG_PA_SC_WINDOW_OFFSET); + + // Window parameters. + // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h + // See r200UpdateWindow: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + int16_t window_offset_x = 0; + int16_t window_offset_y = 0; + if ((regs.pa_su_sc_mode_cntl >> 16) & 1) { + window_offset_x = regs.pa_sc_window_offset & 0x7FFF; + window_offset_y = (regs.pa_sc_window_offset >> 16) & 0x7FFF; + if (window_offset_x & 0x4000) { + window_offset_x |= 0x8000; + } + if (window_offset_y & 0x4000) { + window_offset_y |= 0x8000; + } + } + + // VK_DYNAMIC_STATE_SCISSOR + bool scissor_state_dirty = full_update || window_offset_dirty; + scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + if (scissor_state_dirty) { + int32_t ws_x = regs.pa_sc_window_scissor_tl & 0x7FFF; + int32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; + uint32_t ws_w = (regs.pa_sc_window_scissor_br & 0x7FFF) - ws_x; + uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; + ws_x += window_offset_x; + ws_y += window_offset_y; + + VkRect2D scissor_rect; + scissor_rect.offset.x = ws_x; + scissor_rect.offset.y = ws_y; + scissor_rect.extent.width = ws_w; + scissor_rect.extent.height = ws_h; + vkCmdSetScissor(command_buffer, 0, 1, &scissor_rect); + } + + // VK_DYNAMIC_STATE_VIEWPORT + bool viewport_state_dirty = full_update || window_offset_dirty; + viewport_state_dirty |= + SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + viewport_state_dirty |= + SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, + XE_GPU_REG_PA_CL_VPORT_XOFFSET); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, + XE_GPU_REG_PA_CL_VPORT_YOFFSET); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, + XE_GPU_REG_PA_CL_VPORT_ZOFFSET); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, + XE_GPU_REG_PA_CL_VPORT_XSCALE); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, + XE_GPU_REG_PA_CL_VPORT_YSCALE); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, + XE_GPU_REG_PA_CL_VPORT_ZSCALE); + if (viewport_state_dirty) { + // HACK: no clue where to get these values. + // RB_SURFACE_INFO + auto surface_msaa = + static_cast((regs.rb_surface_info >> 16) & 0x3); + // TODO(benvanik): ?? + float window_width_scalar = 1; + float window_height_scalar = 1; + switch (surface_msaa) { + case MsaaSamples::k1X: + break; + case MsaaSamples::k2X: + window_width_scalar = 2; + break; + case MsaaSamples::k4X: + window_width_scalar = 2; + window_height_scalar = 2; + break; + } + + // Whether each of the viewport settings are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0; + bool vport_xoffset_enable = (regs.pa_cl_vte_cntl & (1 << 1)) > 0; + bool vport_yscale_enable = (regs.pa_cl_vte_cntl & (1 << 2)) > 0; + bool vport_yoffset_enable = (regs.pa_cl_vte_cntl & (1 << 3)) > 0; + bool vport_zscale_enable = (regs.pa_cl_vte_cntl & (1 << 4)) > 0; + bool vport_zoffset_enable = (regs.pa_cl_vte_cntl & (1 << 5)) > 0; + assert_true(vport_xscale_enable == vport_yscale_enable == + vport_zscale_enable == vport_xoffset_enable == + vport_yoffset_enable == vport_zoffset_enable); + + VkViewport viewport_rect; + viewport_rect.x = 0; + viewport_rect.y = 0; + viewport_rect.width = 100; + viewport_rect.height = 100; + viewport_rect.minDepth = 0; + viewport_rect.maxDepth = 1; + + if (vport_xscale_enable) { + float texel_offset_x = 0.0f; + float texel_offset_y = 0.0f; + float vox = vport_xoffset_enable ? regs.pa_cl_vport_xoffset : 0; + float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0; + float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1; + float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1; + window_width_scalar = window_height_scalar = 1; + float vpw = 2 * window_width_scalar * vsx; + float vph = -2 * window_height_scalar * vsy; + float vpx = window_width_scalar * vox - vpw / 2 + window_offset_x; + float vpy = window_height_scalar * voy - vph / 2 + window_offset_y; + viewport_rect.x = vpx + texel_offset_x; + viewport_rect.y = vpy + texel_offset_y; + viewport_rect.width = vpw; + viewport_rect.height = vph; + + // TODO(benvanik): depth range adjustment? + // float voz = vport_zoffset_enable ? regs.pa_cl_vport_zoffset : 0; + // float vsz = vport_zscale_enable ? regs.pa_cl_vport_zscale : 1; + } else { + float texel_offset_x = 0.0f; + float texel_offset_y = 0.0f; + float vpw = 2 * 2560.0f * window_width_scalar; + float vph = 2 * 2560.0f * window_height_scalar; + float vpx = -2560.0f * window_width_scalar + window_offset_x; + float vpy = -2560.0f * window_height_scalar + window_offset_y; + viewport_rect.x = vpx + texel_offset_x; + viewport_rect.y = vpy + texel_offset_y; + viewport_rect.width = vpw; + viewport_rect.height = vph; + } + float voz = vport_zoffset_enable ? regs.pa_cl_vport_zoffset : 0; + float vsz = vport_zscale_enable ? regs.pa_cl_vport_zscale : 1; + viewport_rect.minDepth = voz; + viewport_rect.maxDepth = voz + vsz; + + vkCmdSetViewport(command_buffer, 0, 1, &viewport_rect); + } + + // VK_DYNAMIC_STATE_BLEND_CONSTANTS + bool blend_constant_state_dirty = full_update; + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); + if (blend_constant_state_dirty) { + vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba); + } + + // VK_DYNAMIC_STATE_LINE_WIDTH + vkCmdSetLineWidth(command_buffer, 1.0f); + + // VK_DYNAMIC_STATE_DEPTH_BIAS + vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); + + // VK_DYNAMIC_STATE_DEPTH_BOUNDS + vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); + + // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK + vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + + // VK_DYNAMIC_STATE_STENCIL_REFERENCE + vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + + // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK + vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + + // TODO(benvanik): push constants. + + return true; +} + +bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { + float value = register_file_->values[register_name].f32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() { + auto& regs = update_render_targets_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateState( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + PrimitiveType primitive_type) { + bool mismatch = false; + +#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + XELOGE(error_message); \ + return status; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } + + UpdateStatus status; + status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages"); + status = UpdateVertexInputState(vertex_shader); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update vertex input state"); + status = UpdateInputAssemblyState(primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, + "Unable to update input assembly state"); + status = UpdateViewportState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); + status = UpdateRasterizationState(primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterization state"); + status = UpdateMultisampleState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update multisample state"); + status = UpdateDepthStencilState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); + status = UpdateColorBlendState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update color blend state"); + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + PrimitiveType primitive_type) { + auto& regs = update_shader_stages_regs_; + + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ. + assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == + 0x000FF000 || + register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == + 0x000FF100 || + register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); + // dirty |= regs.vertex_shader != active_vertex_shader_; + // dirty |= regs.pixel_shader != active_pixel_shader_; + dirty |= regs.prim_type != primitive_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + // regs.vertex_shader = static_cast(active_vertex_shader_); + // regs.pixel_shader = static_cast(active_pixel_shader_); + regs.prim_type = primitive_type; + + update_shader_stages_stage_count_ = 0; + + auto& vertex_pipeline_stage = + update_shader_stages_info_[update_shader_stages_stage_count_++]; vertex_pipeline_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; vertex_pipeline_stage.pNext = nullptr; @@ -154,9 +578,11 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, vertex_pipeline_stage.module = vertex_shader->shader_module(); vertex_pipeline_stage.pName = "main"; vertex_pipeline_stage.pSpecializationInfo = nullptr; + auto geometry_shader = GetGeometryShader(primitive_type); if (geometry_shader) { - auto& geometry_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + auto& geometry_pipeline_stage = + update_shader_stages_info_[update_shader_stages_stage_count_++]; geometry_pipeline_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; geometry_pipeline_stage.pNext = nullptr; @@ -166,7 +592,9 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, geometry_pipeline_stage.pName = "main"; geometry_pipeline_stage.pSpecializationInfo = nullptr; } - auto& pixel_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + + auto& pixel_pipeline_stage = + update_shader_stages_info_[update_shader_stages_stage_count_++]; pixel_pipeline_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; pixel_pipeline_stage.pNext = nullptr; @@ -176,13 +604,28 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, pixel_pipeline_stage.pName = "main"; pixel_pipeline_stage.pSpecializationInfo = nullptr; - VkPipelineVertexInputStateCreateInfo vertex_state_info; - vertex_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_state_info.pNext = nullptr; - VkVertexInputBindingDescription vertex_binding_descrs[64]; + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( + VulkanShader* vertex_shader) { + auto& regs = update_vertex_input_state_regs_; + auto& state_info = update_vertex_input_state_info_; + + bool dirty = false; + dirty |= vertex_shader != regs.vertex_shader; + if (!dirty) { + return UpdateStatus::kCompatible; + } + regs.vertex_shader = vertex_shader; + + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; + + auto& vertex_binding_descrs = update_vertex_input_state_binding_descrs_; + auto& vertex_attrib_descrs = update_vertex_input_state_attrib_descrs_; uint32_t vertex_binding_count = 0; - VkVertexInputAttributeDescription vertex_attrib_descrs[64]; uint32_t vertex_attrib_count = 0; for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { assert_true(vertex_binding_count < xe::countof(vertex_binding_descrs)); @@ -270,366 +713,114 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, } } } - vertex_state_info.vertexBindingDescriptionCount = vertex_binding_count; - vertex_state_info.pVertexBindingDescriptions = vertex_binding_descrs; - vertex_state_info.vertexAttributeDescriptionCount = vertex_attrib_count; - vertex_state_info.pVertexAttributeDescriptions = vertex_attrib_descrs; - VkPipelineInputAssemblyStateCreateInfo input_info; - input_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_info.pNext = nullptr; - input_info.flags = 0; - input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - input_info.primitiveRestartEnable = VK_FALSE; + state_info.vertexBindingDescriptionCount = vertex_binding_count; + state_info.pVertexBindingDescriptions = vertex_binding_descrs; + state_info.vertexAttributeDescriptionCount = vertex_attrib_count; + state_info.pVertexAttributeDescriptions = vertex_attrib_descrs; - VkPipelineViewportStateCreateInfo viewport_state_info; - viewport_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_state_info.pNext = nullptr; - viewport_state_info.flags = 0; - VkViewport viewport; - viewport.x = 0; - viewport.y = 0; - viewport.width = 100; - viewport.height = 100; - viewport.minDepth = 0; - viewport.maxDepth = 1; - viewport_state_info.viewportCount = 1; - viewport_state_info.pViewports = &viewport; - VkRect2D scissor; - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent.width = 100; - scissor.extent.height = 100; - viewport_state_info.scissorCount = 1; - viewport_state_info.pScissors = &scissor; - - VkPipelineRasterizationStateCreateInfo rasterization_info; - rasterization_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_info.pNext = nullptr; - rasterization_info.flags = 0; - rasterization_info.depthClampEnable = VK_FALSE; - rasterization_info.rasterizerDiscardEnable = VK_FALSE; - rasterization_info.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_info.cullMode = VK_CULL_MODE_BACK_BIT; - rasterization_info.frontFace = VK_FRONT_FACE_CLOCKWISE; - rasterization_info.depthBiasEnable = VK_FALSE; - rasterization_info.depthBiasConstantFactor = 0; - rasterization_info.depthBiasClamp = 0; - rasterization_info.depthBiasSlopeFactor = 0; - rasterization_info.lineWidth = 1.0f; - - VkPipelineMultisampleStateCreateInfo multisample_info; - multisample_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_info.pNext = nullptr; - multisample_info.flags = 0; - multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisample_info.sampleShadingEnable = VK_FALSE; - multisample_info.minSampleShading = 0; - multisample_info.pSampleMask = nullptr; - multisample_info.alphaToCoverageEnable = VK_FALSE; - multisample_info.alphaToOneEnable = VK_FALSE; - - VkPipelineDepthStencilStateCreateInfo depth_stencil_info; - depth_stencil_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - depth_stencil_info.pNext = nullptr; - depth_stencil_info.flags = 0; - depth_stencil_info.depthTestEnable = VK_FALSE; - depth_stencil_info.depthWriteEnable = VK_FALSE; - depth_stencil_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; - depth_stencil_info.depthBoundsTestEnable = VK_FALSE; - depth_stencil_info.stencilTestEnable = VK_FALSE; - depth_stencil_info.front.failOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.front.passOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.front.depthFailOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.front.compareOp = VK_COMPARE_OP_ALWAYS; - depth_stencil_info.front.compareMask = 0; - depth_stencil_info.front.writeMask = 0; - depth_stencil_info.front.reference = 0; - depth_stencil_info.back.failOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.back.passOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.back.depthFailOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.back.compareOp = VK_COMPARE_OP_ALWAYS; - depth_stencil_info.back.compareMask = 0; - depth_stencil_info.back.writeMask = 0; - depth_stencil_info.back.reference = 0; - depth_stencil_info.minDepthBounds = 0; - depth_stencil_info.maxDepthBounds = 0; - - VkPipelineColorBlendStateCreateInfo blend_info; - blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - blend_info.pNext = nullptr; - blend_info.flags = 0; - blend_info.logicOpEnable = VK_FALSE; - blend_info.logicOp = VK_LOGIC_OP_NO_OP; - - VkPipelineColorBlendAttachmentState blend_attachments[1]; - blend_attachments[0].blendEnable = VK_TRUE; - blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstColorBlendFactor = - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstAlphaBlendFactor = - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].colorWriteMask = 0xF; - blend_info.attachmentCount = - static_cast(xe::countof(blend_attachments)); - blend_info.pAttachments = blend_attachments; - std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants)); - - VkPipelineDynamicStateCreateInfo dynamic_state_info; - dynamic_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pNext = nullptr; - dynamic_state_info.flags = 0; - // VkDynamicState dynamic_states[] = { - // VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - //}; - // dynamic_state_info.dynamicStateCount = - // static_cast(xe::countof(dynamic_states)); - // dynamic_state_info.pDynamicStates = dynamic_states; - dynamic_state_info.dynamicStateCount = 0; - dynamic_state_info.pDynamicStates = nullptr; - - VkGraphicsPipelineCreateInfo pipeline_info; - pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipeline_info.pNext = nullptr; - pipeline_info.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; - pipeline_info.stageCount = pipeline_stage_count; - pipeline_info.pStages = pipeline_stages; - pipeline_info.pVertexInputState = &vertex_state_info; - pipeline_info.pInputAssemblyState = &input_info; - pipeline_info.pTessellationState = nullptr; - pipeline_info.pViewportState = &viewport_state_info; - pipeline_info.pRasterizationState = &rasterization_info; - pipeline_info.pMultisampleState = &multisample_info; - pipeline_info.pDepthStencilState = &depth_stencil_info; - pipeline_info.pColorBlendState = &blend_info; - pipeline_info.pDynamicState = &dynamic_state_info; - pipeline_info.layout = pipeline_layout_; - pipeline_info.renderPass = render_state->render_pass_handle; - pipeline_info.subpass = 0; - pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = 0; - - VkPipeline pipeline = nullptr; - auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, - nullptr, &pipeline); - CheckResult(err, "vkCreateGraphicsPipelines"); - - // TODO(benvanik): don't leak pipelines >_> - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - - return true; + return UpdateStatus::kMismatch; } -void PipelineCache::ClearCache() { - // TODO(benvanik): caching. -} - -VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type) { - switch (primitive_type) { - case PrimitiveType::kLineList: - case PrimitiveType::kLineStrip: - case PrimitiveType::kTriangleList: - case PrimitiveType::kTriangleFan: - case PrimitiveType::kTriangleStrip: - // Supported directly - no need to emulate. - return nullptr; - case PrimitiveType::kPointList: - // TODO(benvanik): point list geometry shader. - return nullptr; - case PrimitiveType::kUnknown0x07: - assert_always("Unknown geometry type"); - return nullptr; - case PrimitiveType::kRectangleList: - // TODO(benvanik): rectangle list geometry shader. - return nullptr; - case PrimitiveType::kLineLoop: - // TODO(benvanik): line loop geometry shader. - return nullptr; - case PrimitiveType::kQuadList: - // TODO(benvanik): quad list geometry shader. - return nullptr; - case PrimitiveType::kQuadStrip: - // TODO(benvanik): quad strip geometry shader. - return nullptr; - default: - assert_unhandled_case(primitive_type); - return nullptr; - } -} - -bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { - float value = register_file_->values[register_name].f32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateShaders( - PrimitiveType prim_type) { - auto& regs = update_shaders_regs_; - - // These are the constant base addresses/ranges for shaders. - // We have these hardcoded right now cause nothing seems to differ. - assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == - 0x000FF000 || - register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == - 0x000FF100 || - register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); +PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState( + PrimitiveType primitive_type) { + auto& regs = update_input_assembly_state_regs_; + auto& state_info = update_input_assembly_state_info_; bool dirty = false; + dirty |= primitive_type != regs.primitive_type; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); - dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); - // dirty |= regs.vertex_shader != active_vertex_shader_; - // dirty |= regs.pixel_shader != active_pixel_shader_; - dirty |= regs.prim_type != prim_type; + dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, + XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); if (!dirty) { return UpdateStatus::kCompatible; } - // regs.vertex_shader = static_cast(active_vertex_shader_); - // regs.pixel_shader = static_cast(active_pixel_shader_); - regs.prim_type = prim_type; + regs.primitive_type = primitive_type; - SCOPE_profile_cpu_f("gpu"); + state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() { - auto& regs = update_render_targets_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); - dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); - dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); - dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); - if (!dirty) { - return UpdateStatus::kCompatible; + switch (primitive_type) { + case PrimitiveType::kPointList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + break; + case PrimitiveType::kLineList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + break; + case PrimitiveType::kLineStrip: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + break; + case PrimitiveType::kLineLoop: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + break; + case PrimitiveType::kTriangleList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + case PrimitiveType::kTriangleStrip: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + break; + case PrimitiveType::kTriangleFan: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + break; + case PrimitiveType::kRectangleList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + case PrimitiveType::kQuadList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + break; + default: + case PrimitiveType::kUnknown0x07: + XELOGE("unsupported primitive type %d", primitive_type); + assert_unhandled_case(primitive_type); + return UpdateStatus::kError; } - SCOPE_profile_cpu_f("gpu"); + // TODO(benvanik): anything we can do about this? Vulkan seems to only support + // first. + assert_zero(regs.pa_su_sc_mode_cntl & (1 << 19)); + // if (regs.pa_su_sc_mode_cntl & (1 << 19)) { + // glProvokingVertex(GL_LAST_VERTEX_CONVENTION); + // } else { + // glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + // } + + if (regs.pa_su_sc_mode_cntl & (1 << 21)) { + state_info.primitiveRestartEnable = VK_TRUE; + } else { + state_info.primitiveRestartEnable = VK_FALSE; + } + // TODO(benvanik): no way to specify in Vulkan? + assert_true(regs.multi_prim_ib_reset_index == 0xFFFF || + regs.multi_prim_ib_reset_index == 0xFFFFFFFF); + // glPrimitiveRestartIndex(regs.multi_prim_ib_reset_index); return UpdateStatus::kMismatch; } -PipelineCache::UpdateStatus PipelineCache::UpdateState( - PrimitiveType prim_type) { - bool mismatch = false; - -#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - return status; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } - - UpdateStatus status; - status = UpdateViewportState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); - status = UpdateRasterizerState(prim_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); - status = UpdateBlendState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); - status = UpdateDepthStencilState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); - - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; -} - PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() { - auto& regs = update_viewport_state_regs_; + auto& state_info = update_viewport_state_info_; - bool dirty = false; - // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl, - // XE_GPU_REG_PA_CL_CLIP_CNTL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_window_offset, - XE_GPU_REG_PA_SC_WINDOW_OFFSET); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, - XE_GPU_REG_PA_CL_VPORT_XOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, - XE_GPU_REG_PA_CL_VPORT_YOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, - XE_GPU_REG_PA_CL_VPORT_ZOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, - XE_GPU_REG_PA_CL_VPORT_XSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, - XE_GPU_REG_PA_CL_VPORT_YSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, - XE_GPU_REG_PA_CL_VPORT_ZSCALE); + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - // Much of this state machine is extracted from: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + state_info.viewportCount = 1; + state_info.scissorCount = 1; - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. - // = false: multiply the X, Y coordinates by 1/W0. - // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. - // = false: multiply the Z coordinate by 1/W0. - // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to - // get 1/W0. - // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f); + // Ignored; set dynamically. + state_info.pViewports = nullptr; + state_info.pScissors = nullptr; - // Done in VS, no need to flush state. - // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { - // draw_batcher_.set_window_scalar(1.0f, 1.0f); - //} else { - // draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); - //} - - if (!dirty) { - return UpdateStatus::kCompatible; - } - - return UpdateStatus::kMismatch; + return UpdateStatus::kCompatible; } -PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( - PrimitiveType prim_type) { - auto& regs = update_rasterizer_state_regs_; +PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( + PrimitiveType primitive_type) { + auto& regs = update_rasterization_state_regs_; + auto& state_info = update_rasterization_state_info_; bool dirty = false; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, @@ -640,21 +831,130 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - dirty |= regs.prim_type != prim_type; if (!dirty) { return UpdateStatus::kCompatible; } - regs.prim_type = prim_type; + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - SCOPE_profile_cpu_f("gpu"); + // TODO(benvanik): right setting? + state_info.depthClampEnable = VK_FALSE; + + // TODO(benvanik): use in depth-only mode? + state_info.rasterizerDiscardEnable = VK_FALSE; + + bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; + if (poly_mode) { + uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; + uint32_t back_poly_mode = (regs.pa_su_sc_mode_cntl >> 8) & 0x7; + // Vulkan only supports both matching. + assert_true(front_poly_mode == back_poly_mode); + static const VkPolygonMode kFillModes[3] = { + VK_POLYGON_MODE_POINT, VK_POLYGON_MODE_LINE, VK_POLYGON_MODE_FILL, + }; + state_info.polygonMode = kFillModes[front_poly_mode]; + } else { + state_info.polygonMode = VK_POLYGON_MODE_FILL; + } + + switch (regs.pa_su_sc_mode_cntl & 0x3) { + case 0: + state_info.cullMode = VK_CULL_MODE_NONE; + break; + case 1: + state_info.cullMode = VK_CULL_MODE_FRONT_BIT; + break; + case 2: + state_info.cullMode = VK_CULL_MODE_BACK_BIT; + break; + } + if (regs.pa_su_sc_mode_cntl & 0x4) { + state_info.frontFace = VK_FRONT_FACE_CLOCKWISE; + } else { + state_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + } + if (primitive_type == PrimitiveType::kRectangleList) { + // Rectangle lists aren't culled. There may be other things they skip too. + state_info.cullMode = VK_CULL_MODE_NONE; + } + + state_info.depthBiasEnable = VK_FALSE; + + // Ignored; set dynamically: + state_info.depthBiasConstantFactor = 0; + state_info.depthBiasClamp = 0; + state_info.depthBiasSlopeFactor = 0; + state_info.lineWidth = 1.0f; return UpdateStatus::kMismatch; } -PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { - auto& reg_file = *register_file_; - auto& regs = update_blend_state_regs_; +PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() { + auto& regs = update_multisample_state_regs_; + auto& state_info = update_multisample_state_info_; + + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; + + state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + state_info.sampleShadingEnable = VK_FALSE; + state_info.minSampleShading = 0; + state_info.pSampleMask = nullptr; + state_info.alphaToCoverageEnable = VK_FALSE; + state_info.alphaToOneEnable = VK_FALSE; + + return UpdateStatus::kCompatible; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { + auto& regs = update_depth_stencil_state_regs_; + auto& state_info = update_depth_stencil_state_info_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; + + state_info.depthTestEnable = VK_FALSE; + state_info.depthWriteEnable = VK_FALSE; + state_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; + state_info.depthBoundsTestEnable = VK_FALSE; + state_info.stencilTestEnable = VK_FALSE; + state_info.front.failOp = VK_STENCIL_OP_KEEP; + state_info.front.passOp = VK_STENCIL_OP_KEEP; + state_info.front.depthFailOp = VK_STENCIL_OP_KEEP; + state_info.front.compareOp = VK_COMPARE_OP_ALWAYS; + state_info.back.failOp = VK_STENCIL_OP_KEEP; + state_info.back.passOp = VK_STENCIL_OP_KEEP; + state_info.back.depthFailOp = VK_STENCIL_OP_KEEP; + state_info.back.compareOp = VK_COMPARE_OP_ALWAYS; + + // Ignored; set dynamically. + state_info.minDepthBounds = 0; + state_info.maxDepthBounds = 0; + state_info.front.compareMask = 0; + state_info.front.writeMask = 0; + state_info.front.reference = 0; + state_info.back.compareMask = 0; + state_info.back.writeMask = 0; + state_info.back.reference = 0; + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { + auto& regs = update_color_blend_state_regs_; + auto& state_info = update_color_blend_state_info_; // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // Deprecated in GL, implemented in shader. @@ -666,6 +966,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { // reg_file[XE_GPU_REG_RB_ALPHA_REF].f32); bool dirty = false; + dirty |= SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); dirty |= @@ -674,31 +976,80 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); dirty |= SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); - dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); - dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); - dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); - dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); if (!dirty) { return UpdateStatus::kCompatible; } - SCOPE_profile_cpu_f("gpu"); + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - return UpdateStatus::kMismatch; -} + state_info.logicOpEnable = VK_FALSE; + state_info.logicOp = VK_LOGIC_OP_NO_OP; -PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { - auto& regs = update_depth_stencil_state_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - if (!dirty) { - return UpdateStatus::kCompatible; + static const VkBlendFactor kBlendFactorMap[] = { + /* 0 */ VK_BLEND_FACTOR_ZERO, + /* 1 */ VK_BLEND_FACTOR_ONE, + /* 2 */ VK_BLEND_FACTOR_ZERO, // ? + /* 3 */ VK_BLEND_FACTOR_ZERO, // ? + /* 4 */ VK_BLEND_FACTOR_SRC_COLOR, + /* 5 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + /* 6 */ VK_BLEND_FACTOR_SRC_ALPHA, + /* 7 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + /* 8 */ VK_BLEND_FACTOR_DST_COLOR, + /* 9 */ VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + /* 10 */ VK_BLEND_FACTOR_DST_ALPHA, + /* 11 */ VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + /* 12 */ VK_BLEND_FACTOR_CONSTANT_COLOR, + /* 13 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + /* 14 */ VK_BLEND_FACTOR_CONSTANT_ALPHA, + /* 15 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + /* 16 */ VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, + }; + static const VkBlendOp kBlendOpMap[] = { + /* 0 */ VK_BLEND_OP_ADD, + /* 1 */ VK_BLEND_OP_SUBTRACT, + /* 2 */ VK_BLEND_OP_MIN, + /* 3 */ VK_BLEND_OP_MAX, + /* 4 */ VK_BLEND_OP_REVERSE_SUBTRACT, + }; + auto& attachment_states = update_color_blend_attachment_states_; + for (int i = 0; i < 4; ++i) { + uint32_t blend_control = regs.rb_blendcontrol[i]; + auto& attachment_state = attachment_states[i]; + attachment_state.blendEnable = !(regs.rb_colorcontrol & 0x20); + // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND + attachment_state.srcColorBlendFactor = + kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; + // A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND + attachment_state.dstColorBlendFactor = + kBlendFactorMap[(blend_control & 0x00001F00) >> 8]; + // A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN + attachment_state.colorBlendOp = + kBlendOpMap[(blend_control & 0x000000E0) >> 5]; + // A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND + attachment_state.srcAlphaBlendFactor = + kBlendFactorMap[(blend_control & 0x001F0000) >> 16]; + // A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND + attachment_state.dstAlphaBlendFactor = + kBlendFactorMap[(blend_control & 0x1F000000) >> 24]; + // A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN + attachment_state.alphaBlendOp = + kBlendOpMap[(blend_control & 0x00E00000) >> 21]; + // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE + // Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc.. + uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; + attachment_state.colorWriteMask = write_mask; } - SCOPE_profile_cpu_f("gpu"); + state_info.attachmentCount = 4; + state_info.pAttachments = attachment_states; + + // Ignored; set dynamically. + state_info.blendConstants[0] = 0.0f; + state_info.blendConstants[1] = 0.0f; + state_info.blendConstants[2] = 0.0f; + state_info.blendConstants[3] = 0.0f; return UpdateStatus::kMismatch; } diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index aad43ca80..1eb9d75ed 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -57,10 +57,19 @@ class PipelineCache { void ClearCache(); private: + // Creates or retrieves an existing pipeline for the currently configured + // state. + VkPipeline GetPipeline(const RenderState* render_state); + // Gets a geometry shader used to emulate the given primitive type. // Returns nullptr if the primitive doesn't need to be emulated. VkShaderModule GetGeometryShader(PrimitiveType primitive_type); + // Sets required dynamic state on the command buffer. + // Only state that has changed since the last call will be set unless + // full_update is true. + bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update); + RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; @@ -80,6 +89,11 @@ class PipelineCache { // TODO(benvanik): geometry shader cache. + // Previously used pipeline. This matches our current state settings + // and allows us to quickly(ish) reuse the pipeline if no registers have + // changed. + VkPipeline current_pipeline_ = nullptr; + private: enum class UpdateStatus { kCompatible, @@ -87,13 +101,21 @@ class PipelineCache { kError, }; - UpdateStatus UpdateShaders(PrimitiveType prim_type); UpdateStatus UpdateRenderTargets(); - UpdateStatus UpdateState(PrimitiveType prim_type); + UpdateStatus UpdateState(VulkanShader* vertex_shader, + VulkanShader* pixel_shader, + PrimitiveType primitive_type); + + UpdateStatus UpdateShaderStages(VulkanShader* vertex_shader, + VulkanShader* pixel_shader, + PrimitiveType primitive_type); + UpdateStatus UpdateVertexInputState(VulkanShader* vertex_shader); + UpdateStatus UpdateInputAssemblyState(PrimitiveType primitive_type); UpdateStatus UpdateViewportState(); - UpdateStatus UpdateRasterizerState(PrimitiveType prim_type); - UpdateStatus UpdateBlendState(); + UpdateStatus UpdateRasterizationState(PrimitiveType primitive_type); + UpdateStatus UpdateMultisampleState(); UpdateStatus UpdateDepthStencilState(); + UpdateStatus UpdateColorBlendState(); bool SetShadowRegister(uint32_t* dest, uint32_t register_name); bool SetShadowRegister(float* dest, uint32_t register_name); @@ -113,6 +135,45 @@ class PipelineCache { UpdateRenderTargetsRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_render_targets_regs_; + + struct UpdateShaderStagesRegisters { + PrimitiveType prim_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t sq_program_cntl; + uint32_t sq_context_misc; + VulkanShader* vertex_shader; + VulkanShader* pixel_shader; + + UpdateShaderStagesRegisters() { Reset(); } + void Reset() { + sq_program_cntl = 0; + vertex_shader = pixel_shader = nullptr; + } + } update_shader_stages_regs_; + VkPipelineShaderStageCreateInfo update_shader_stages_info_[3]; + uint32_t update_shader_stages_stage_count_ = 0; + + struct UpdateVertexInputStateRegisters { + VulkanShader* vertex_shader; + + UpdateVertexInputStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_vertex_input_state_regs_; + VkPipelineVertexInputStateCreateInfo update_vertex_input_state_info_; + VkVertexInputBindingDescription update_vertex_input_state_binding_descrs_[64]; + VkVertexInputAttributeDescription + update_vertex_input_state_attrib_descrs_[64]; + + struct UpdateInputAssemblyStateRegisters { + PrimitiveType primitive_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t multi_prim_ib_reset_index; + + UpdateInputAssemblyStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_input_assembly_state_regs_; + VkPipelineInputAssemblyStateCreateInfo update_input_assembly_state_info_; + struct UpdateViewportStateRegisters { // uint32_t pa_cl_clip_cntl; uint32_t rb_surface_info; @@ -131,23 +192,26 @@ class PipelineCache { UpdateViewportStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_viewport_state_regs_; - struct UpdateRasterizerStateRegisters { + VkPipelineViewportStateCreateInfo update_viewport_state_info_; + + struct UpdateRasterizationStateRegisters { uint32_t pa_su_sc_mode_cntl; uint32_t pa_sc_screen_scissor_tl; uint32_t pa_sc_screen_scissor_br; uint32_t multi_prim_ib_reset_index; PrimitiveType prim_type; - UpdateRasterizerStateRegisters() { Reset(); } + UpdateRasterizationStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_rasterizer_state_regs_; - struct UpdateBlendStateRegisters { - uint32_t rb_blendcontrol[4]; - float rb_blend_rgba[4]; + } update_rasterization_state_regs_; + VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_; - UpdateBlendStateRegisters() { Reset(); } + struct UpdateMultisampleStateeRegisters { + UpdateMultisampleStateeRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_blend_state_regs_; + } update_multisample_state_regs_; + VkPipelineMultisampleStateCreateInfo update_multisample_state_info_; + struct UpdateDepthStencilStateRegisters { uint32_t rb_depthcontrol; uint32_t rb_stencilrefmask; @@ -155,20 +219,40 @@ class PipelineCache { UpdateDepthStencilStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_depth_stencil_state_regs_; - struct UpdateShadersRegisters { - PrimitiveType prim_type; - uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; - uint32_t sq_context_misc; - VulkanShader* vertex_shader; - VulkanShader* pixel_shader; + VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_; - UpdateShadersRegisters() { Reset(); } - void Reset() { - sq_program_cntl = 0; - vertex_shader = pixel_shader = nullptr; - } - } update_shaders_regs_; + struct UpdateColorBlendStateRegisters { + uint32_t rb_colorcontrol; + uint32_t rb_color_mask; + uint32_t rb_blendcontrol[4]; + + UpdateColorBlendStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_color_blend_state_regs_; + VkPipelineColorBlendStateCreateInfo update_color_blend_state_info_; + VkPipelineColorBlendAttachmentState update_color_blend_attachment_states_[4]; + + struct SetDynamicStateRegisters { + uint32_t pa_sc_window_offset; + + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + + uint32_t rb_surface_info; + uint32_t pa_cl_vte_cntl; + float pa_cl_vport_xoffset; + float pa_cl_vport_yoffset; + float pa_cl_vport_zoffset; + float pa_cl_vport_xscale; + float pa_cl_vport_yscale; + float pa_cl_vport_zscale; + + float rb_blend_rgba[4]; + + SetDynamicStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } set_dynamic_state_registers_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 31460be79..646b050fb 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -321,12 +321,11 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, VulkanShader* pixel_shader) { // Upload the constants the shaders require. // These are optional, and if none are defined 0 will be returned. - VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters( - vertex_shader->constant_register_map()); - VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters( + auto constant_offsets = buffer_cache_->UploadConstantRegisters( + vertex_shader->constant_register_map(), pixel_shader->constant_register_map()); - if (vertex_constant_offset == VK_WHOLE_SIZE || - pixel_constant_offset == VK_WHOLE_SIZE) { + if (constant_offsets.first == VK_WHOLE_SIZE || + constant_offsets.second == VK_WHOLE_SIZE) { // Shader wants constants but we couldn't upload them. return false; } @@ -334,12 +333,14 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, // Configure constant uniform access to point at our offsets. auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); auto pipeline_layout = pipeline_cache_->pipeline_layout(); - uint32_t constant_offsets[2] = {static_cast(vertex_constant_offset), - static_cast(pixel_constant_offset)}; - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout, 0, 1, &constant_descriptor_set, - static_cast(xe::countof(constant_offsets)), - constant_offsets); + uint32_t set_constant_offsets[2] = { + static_cast(constant_offsets.first), + static_cast(constant_offsets.second)}; + vkCmdBindDescriptorSets( + command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, + &constant_descriptor_set, + static_cast(xe::countof(set_constant_offsets)), + set_constant_offsets); return true; } From 5759f82276b051f934dd92c7f1b7462e319e0677 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 20 Feb 2016 15:00:11 -0800 Subject: [PATCH 021/145] Adding pipeline caching. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 109 ++++++++++++++++--------- src/xenia/gpu/vulkan/pipeline_cache.h | 27 +++--- 2 files changed, 88 insertions(+), 48 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 63bad5164..af4c218eb 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -78,6 +78,12 @@ PipelineCache::PipelineCache( } PipelineCache::~PipelineCache() { + // Destroy all pipelines. + for (auto it : cached_pipelines_) { + vkDestroyPipeline(device_, it.second, nullptr); + } + cached_pipelines_.clear(); + vkDestroyPipelineLayout(device_, pipeline_layout_, nullptr); vkDestroyPipelineCache(device_, pipeline_cache_, nullptr); @@ -164,7 +170,9 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, return false; } if (!pipeline) { - pipeline = GetPipeline(render_state); + // Should have a hash key produced by the UpdateState pass. + uint64_t hash_key = XXH64_digest(&hash_state_); + pipeline = GetPipeline(render_state, hash_key); current_pipeline_ = pipeline; if (!pipeline) { // Unable to create pipeline. @@ -192,7 +200,15 @@ void PipelineCache::ClearCache() { // TODO(benvanik): caching. } -VkPipeline PipelineCache::GetPipeline(const RenderState* render_state) { +VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, + uint64_t hash_key) { + // Lookup the pipeline in the cache. + auto it = cached_pipelines_.find(hash_key); + if (it != cached_pipelines_.end()) { + // Found existing pipeline. + return it->second; + } + VkPipelineDynamicStateCreateInfo dynamic_state_info; dynamic_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; @@ -233,18 +249,19 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state) { pipeline_info.subpass = 0; pipeline_info.basePipelineHandle = nullptr; pipeline_info.basePipelineIndex = 0; - VkPipeline pipeline = nullptr; auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, nullptr, &pipeline); CheckResult(err, "vkCreateGraphicsPipelines"); - // TODO(benvanik): don't leak. + // Add to cache with the hash key for reuse. + cached_pipelines_.insert({hash_key, pipeline}); return pipeline; } -VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type) { +VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type, + bool is_line_mode) { switch (primitive_type) { case PrimitiveType::kLineList: case PrimitiveType::kLineStrip: @@ -267,6 +284,11 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type) { return nullptr; case PrimitiveType::kQuadList: // TODO(benvanik): quad list geometry shader. + if (is_line_mode) { + // + } else { + // + } return nullptr; case PrimitiveType::kQuadStrip: // TODO(benvanik): quad strip geometry shader. @@ -455,6 +477,27 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, // TODO(benvanik): push constants. + bool push_constants_dirty = full_update; + push_constants_dirty |= + SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + push_constants_dirty |= + SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); + + xenos::xe_gpu_program_cntl_t program_cntl; + program_cntl.dword_0 = regs.sq_program_cntl; + + // Populate a register in the pixel shader with frag coord. + int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; + // draw_batcher_.set_ps_param_gen(program_cntl.param_gen ? ps_param_gen : -1); + + // Normal vertex shaders only, for now. + // TODO(benvanik): transform feedback/memexport. + // https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h + // 0 = normal + // 2 = point size + assert_true(program_cntl.vs_export_mode == 0 || + program_cntl.vs_export_mode == 2); + return true; } @@ -476,35 +519,14 @@ bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { return true; } -PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() { - auto& regs = update_render_targets_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); - dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); - dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); - dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - SCOPE_profile_cpu_f("gpu"); - - return UpdateStatus::kMismatch; -} - PipelineCache::UpdateStatus PipelineCache::UpdateState( VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type) { bool mismatch = false; + // Reset hash so we can build it up. + XXH64_reset(&hash_state_, 0); + #define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ { \ if (status == UpdateStatus::kError) { \ @@ -554,17 +576,16 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( bool dirty = false; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); - dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); - // dirty |= regs.vertex_shader != active_vertex_shader_; - // dirty |= regs.pixel_shader != active_pixel_shader_; - dirty |= regs.prim_type != primitive_type; + dirty |= regs.vertex_shader != vertex_shader; + dirty |= regs.pixel_shader != pixel_shader; + dirty |= regs.primitive_type != primitive_type; + XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } - // regs.vertex_shader = static_cast(active_vertex_shader_); - // regs.pixel_shader = static_cast(active_pixel_shader_); - regs.prim_type = primitive_type; + regs.vertex_shader = vertex_shader; + regs.pixel_shader = pixel_shader; + regs.primitive_type = primitive_type; update_shader_stages_stage_count_ = 0; @@ -579,7 +600,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( vertex_pipeline_stage.pName = "main"; vertex_pipeline_stage.pSpecializationInfo = nullptr; - auto geometry_shader = GetGeometryShader(primitive_type); + bool is_line_mode = false; + if (((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0) { + uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; + if (front_poly_mode == 1) { + is_line_mode = true; + } + } + auto geometry_shader = GetGeometryShader(primitive_type, is_line_mode); if (geometry_shader) { auto& geometry_pipeline_stage = update_shader_stages_info_[update_shader_stages_stage_count_++]; @@ -614,6 +642,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( bool dirty = false; dirty |= vertex_shader != regs.vertex_shader; + XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } @@ -733,6 +762,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState( XE_GPU_REG_PA_SU_SC_MODE_CNTL); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } @@ -831,6 +861,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } @@ -917,6 +948,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); dirty |= SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } @@ -976,6 +1008,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); dirty |= SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); + XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 1eb9d75ed..37a53c751 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -12,6 +12,8 @@ #include +#include "third_party/xxhash/xxhash.h" + #include "xenia/gpu/register_file.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/render_cache.h" @@ -59,11 +61,12 @@ class PipelineCache { private: // Creates or retrieves an existing pipeline for the currently configured // state. - VkPipeline GetPipeline(const RenderState* render_state); + VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); // Gets a geometry shader used to emulate the given primitive type. // Returns nullptr if the primitive doesn't need to be emulated. - VkShaderModule GetGeometryShader(PrimitiveType primitive_type); + VkShaderModule GetGeometryShader(PrimitiveType primitive_type, + bool is_line_mode); // Sets required dynamic state on the command buffer. // Only state that has changed since the last call will be set unless @@ -89,6 +92,13 @@ class PipelineCache { // TODO(benvanik): geometry shader cache. + // Hash state used to incrementally produce pipeline hashes during update. + // By the time the full update pass has run the hash will represent the + // current state in a way that can uniquely identify the produced VkPipeline. + XXH64_state_t hash_state_; + // All previously generated pipelines mapped by hash. + std::unordered_map cached_pipelines_; + // Previously used pipeline. This matches our current state settings // and allows us to quickly(ish) reuse the pipeline if no registers have // changed. @@ -101,7 +111,6 @@ class PipelineCache { kError, }; - UpdateStatus UpdateRenderTargets(); UpdateStatus UpdateState(VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type); @@ -137,18 +146,13 @@ class PipelineCache { } update_render_targets_regs_; struct UpdateShaderStagesRegisters { - PrimitiveType prim_type; + PrimitiveType primitive_type; uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; - uint32_t sq_context_misc; VulkanShader* vertex_shader; VulkanShader* pixel_shader; UpdateShaderStagesRegisters() { Reset(); } - void Reset() { - sq_program_cntl = 0; - vertex_shader = pixel_shader = nullptr; - } + void Reset() { std::memset(this, 0, sizeof(*this)); } } update_shader_stages_regs_; VkPipelineShaderStageCreateInfo update_shader_stages_info_[3]; uint32_t update_shader_stages_stage_count_ = 0; @@ -250,6 +254,9 @@ class PipelineCache { float rb_blend_rgba[4]; + uint32_t sq_program_cntl; + uint32_t sq_context_misc; + SetDynamicStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } set_dynamic_state_registers_; From 769c58a9b2c45bbef6f33f03a9c5275a6a3eb183 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 20 Feb 2016 16:24:42 -0800 Subject: [PATCH 022/145] Adding `xb genspirv` to do glsl->.h. --- src/xenia/ui/vulkan/premake5.lua | 3 + .../immediate_frag.h} | 4 +- .../immediate_frag.spv} | Bin .../ui/vulkan/shaders/bin/immediate_frag.txt | 94 +++++++++++++++++ .../immediate_vert.h} | 4 +- .../immediate_vert.spv} | Bin .../ui/vulkan/shaders/bin/immediate_vert.txt | 96 +++++++++++++++++ src/xenia/ui/vulkan/shaders/build.bat | 2 - src/xenia/ui/vulkan/shaders/immediate.frag | 3 + src/xenia/ui/vulkan/shaders/immediate.vert | 3 + .../ui/vulkan/vulkan_immediate_drawer.cc | 14 +-- xenia-build | 98 ++++++++++++++++++ 12 files changed, 310 insertions(+), 11 deletions(-) rename src/xenia/ui/vulkan/shaders/{immediate.frag.h => bin/immediate_frag.h} (99%) rename src/xenia/ui/vulkan/shaders/{immediate.frag.spv => bin/immediate_frag.spv} (100%) create mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt rename src/xenia/ui/vulkan/shaders/{immediate.vert.h => bin/immediate_vert.h} (99%) rename src/xenia/ui/vulkan/shaders/{immediate.vert.spv => bin/immediate_vert.spv} (100%) create mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt delete mode 100644 src/xenia/ui/vulkan/shaders/build.bat diff --git a/src/xenia/ui/vulkan/premake5.lua b/src/xenia/ui/vulkan/premake5.lua index 2144ca30a..71824e5ec 100644 --- a/src/xenia/ui/vulkan/premake5.lua +++ b/src/xenia/ui/vulkan/premake5.lua @@ -18,6 +18,9 @@ project("xenia-ui-vulkan") project_root.."/third_party/vulkan/", }) local_platform_files() + files({ + "shaders/bin/*.h", + }) removefiles({"*_demo.cc"}) group("demos") diff --git a/src/xenia/ui/vulkan/shaders/immediate.frag.h b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h similarity index 99% rename from src/xenia/ui/vulkan/shaders/immediate.frag.h rename to src/xenia/ui/vulkan/shaders/bin/immediate_frag.h index e1efd613a..4cdf8593b 100644 --- a/src/xenia/ui/vulkan/shaders/immediate.frag.h +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h @@ -1,4 +1,6 @@ -const uint8_t immediate_frag_spv[] = { +// generated from `xb genspirv` +// source: immediate.frag +const uint8_t immediate_frag[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, diff --git a/src/xenia/ui/vulkan/shaders/immediate.frag.spv b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.spv similarity index 100% rename from src/xenia/ui/vulkan/shaders/immediate.frag.spv rename to src/xenia/ui/vulkan/shaders/bin/immediate_frag.spv diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt new file mode 100644 index 000000000..c4b6ea61f --- /dev/null +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt @@ -0,0 +1,94 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 53 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %4 "main" %9 %11 %30 + OpExecutionMode %4 OriginLowerLeft + OpSource GLSL 450 + OpName %4 "main" + OpName %9 "out_color" + OpName %11 "vtx_color" + OpName %16 "PushConstants" + OpMemberName %16 0 "projection_matrix" + OpMemberName %16 1 "restrict_texture_samples" + OpName %18 "push_constants" + OpName %30 "vtx_uv" + OpName %42 "tex_color" + OpName %46 "texture_sampler" + OpDecorate %9 Location 0 + OpDecorate %11 Location 1 + OpMemberDecorate %16 0 ColMajor + OpMemberDecorate %16 0 Offset 0 + OpMemberDecorate %16 0 MatrixStride 16 + OpMemberDecorate %16 1 Offset 64 + OpDecorate %16 Block + OpDecorate %18 DescriptorSet 0 + OpDecorate %30 Location 0 + OpDecorate %46 DescriptorSet 0 + OpDecorate %46 Binding 0 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeFloat 32 + %7 = OpTypeVector %6 4 + %8 = OpTypePointer Output %7 + %9 = OpVariable %8 Output + %10 = OpTypePointer Input %7 + %11 = OpVariable %10 Input + %13 = OpTypeBool + %14 = OpTypeMatrix %7 4 + %15 = OpTypeInt 32 1 + %16 = OpTypeStruct %14 %15 + %17 = OpTypePointer PushConstant %16 + %18 = OpVariable %17 PushConstant + %19 = OpConstant %15 1 + %20 = OpTypePointer PushConstant %15 + %23 = OpConstant %15 0 + %28 = OpTypeVector %6 2 + %29 = OpTypePointer Input %28 + %30 = OpVariable %29 Input + %31 = OpTypeInt 32 0 + %32 = OpConstant %31 0 + %33 = OpTypePointer Input %6 + %36 = OpConstant %6 1 + %41 = OpTypePointer Function %7 + %43 = OpTypeImage %6 2D 0 0 0 1 Unknown + %44 = OpTypeSampledImage %43 + %45 = OpTypePointer UniformConstant %44 + %46 = OpVariable %45 UniformConstant + %4 = OpFunction %2 None %3 + %5 = OpLabel + %42 = OpVariable %41 Function + %12 = OpLoad %7 %11 + OpStore %9 %12 + %21 = OpAccessChain %20 %18 %19 + %22 = OpLoad %15 %21 + %24 = OpIEqual %13 %22 %23 + %25 = OpLogicalNot %13 %24 + OpSelectionMerge %27 None + OpBranchConditional %25 %26 %27 + %26 = OpLabel + %34 = OpAccessChain %33 %30 %32 + %35 = OpLoad %6 %34 + %37 = OpFOrdLessThanEqual %13 %35 %36 + OpBranch %27 + %27 = OpLabel + %38 = OpPhi %13 %24 %5 %37 %26 + OpSelectionMerge %40 None + OpBranchConditional %38 %39 %40 + %39 = OpLabel + %47 = OpLoad %44 %46 + %48 = OpLoad %28 %30 + %49 = OpImageSampleImplicitLod %7 %47 %48 + OpStore %42 %49 + %50 = OpLoad %7 %42 + %51 = OpLoad %7 %9 + %52 = OpFMul %7 %51 %50 + OpStore %9 %52 + OpBranch %40 + %40 = OpLabel + OpReturn + OpFunctionEnd diff --git a/src/xenia/ui/vulkan/shaders/immediate.vert.h b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h similarity index 99% rename from src/xenia/ui/vulkan/shaders/immediate.vert.h rename to src/xenia/ui/vulkan/shaders/bin/immediate_vert.h index b454eb260..3d2c0687e 100644 --- a/src/xenia/ui/vulkan/shaders/immediate.vert.h +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h @@ -1,4 +1,6 @@ -const uint8_t immediate_vert_spv[] = { +// generated from `xb genspirv` +// source: immediate.vert +const uint8_t immediate_vert[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, diff --git a/src/xenia/ui/vulkan/shaders/immediate.vert.spv b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.spv similarity index 100% rename from src/xenia/ui/vulkan/shaders/immediate.vert.spv rename to src/xenia/ui/vulkan/shaders/bin/immediate_vert.spv diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt new file mode 100644 index 000000000..a8e36189e --- /dev/null +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 48 +; Schema: 0 + OpCapability Shader + OpCapability ClipDistance + OpCapability CullDistance + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %4 "main" %13 %25 %41 %42 %44 %46 + OpSource GLSL 450 + OpName %4 "main" + OpName %11 "gl_PerVertex" + OpMemberName %11 0 "gl_Position" + OpMemberName %11 1 "gl_PointSize" + OpMemberName %11 2 "gl_ClipDistance" + OpMemberName %11 3 "gl_CullDistance" + OpName %13 "" + OpName %17 "PushConstants" + OpMemberName %17 0 "projection_matrix" + OpMemberName %17 1 "restrict_texture_samples" + OpName %19 "push_constants" + OpName %25 "in_pos" + OpName %41 "vtx_uv" + OpName %42 "in_uv" + OpName %44 "vtx_color" + OpName %46 "in_color" + OpMemberDecorate %11 0 BuiltIn Position + OpMemberDecorate %11 1 BuiltIn PointSize + OpMemberDecorate %11 2 BuiltIn ClipDistance + OpMemberDecorate %11 3 BuiltIn CullDistance + OpDecorate %11 Block + OpMemberDecorate %17 0 ColMajor + OpMemberDecorate %17 0 Offset 0 + OpMemberDecorate %17 0 MatrixStride 16 + OpMemberDecorate %17 1 Offset 64 + OpDecorate %17 Block + OpDecorate %19 DescriptorSet 0 + OpDecorate %25 Location 0 + OpDecorate %41 Location 0 + OpDecorate %42 Location 1 + OpDecorate %44 Location 1 + OpDecorate %46 Location 2 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeFloat 32 + %7 = OpTypeVector %6 4 + %8 = OpTypeInt 32 0 + %9 = OpConstant %8 1 + %10 = OpTypeArray %6 %9 + %11 = OpTypeStruct %7 %6 %10 %10 + %12 = OpTypePointer Output %11 + %13 = OpVariable %12 Output + %14 = OpTypeInt 32 1 + %15 = OpConstant %14 0 + %16 = OpTypeMatrix %7 4 + %17 = OpTypeStruct %16 %14 + %18 = OpTypePointer PushConstant %17 + %19 = OpVariable %18 PushConstant + %20 = OpTypePointer PushConstant %16 + %23 = OpTypeVector %6 2 + %24 = OpTypePointer Input %23 + %25 = OpVariable %24 Input + %27 = OpConstant %6 0 + %28 = OpConstant %6 1 + %33 = OpTypePointer Output %7 + %35 = OpTypePointer Output %6 + %40 = OpTypePointer Output %23 + %41 = OpVariable %40 Output + %42 = OpVariable %24 Input + %44 = OpVariable %33 Output + %45 = OpTypePointer Input %7 + %46 = OpVariable %45 Input + %4 = OpFunction %2 None %3 + %5 = OpLabel + %21 = OpAccessChain %20 %19 %15 + %22 = OpLoad %16 %21 + %26 = OpLoad %23 %25 + %29 = OpCompositeExtract %6 %26 0 + %30 = OpCompositeExtract %6 %26 1 + %31 = OpCompositeConstruct %7 %29 %30 %27 %28 + %32 = OpMatrixTimesVector %7 %22 %31 + %34 = OpAccessChain %33 %13 %15 + OpStore %34 %32 + %36 = OpAccessChain %35 %13 %15 %9 + %37 = OpLoad %6 %36 + %38 = OpFNegate %6 %37 + %39 = OpAccessChain %35 %13 %15 %9 + OpStore %39 %38 + %43 = OpLoad %23 %42 + OpStore %41 %43 + %47 = OpLoad %7 %46 + OpStore %44 %47 + OpReturn + OpFunctionEnd diff --git a/src/xenia/ui/vulkan/shaders/build.bat b/src/xenia/ui/vulkan/shaders/build.bat deleted file mode 100644 index c3e0322b0..000000000 --- a/src/xenia/ui/vulkan/shaders/build.bat +++ /dev/null @@ -1,2 +0,0 @@ -glslangValidator -V immediate.vert -o immediate.vert.spv -glslangValidator -V immediate.frag -o immediate.frag.spv diff --git a/src/xenia/ui/vulkan/shaders/immediate.frag b/src/xenia/ui/vulkan/shaders/immediate.frag index b5fcdda35..c1ebb265e 100644 --- a/src/xenia/ui/vulkan/shaders/immediate.frag +++ b/src/xenia/ui/vulkan/shaders/immediate.frag @@ -1,3 +1,6 @@ +// NOTE: This file is compiled and embedded into the exe. +// Use `xenia-build genspirv` and check in any changes under bin/. + #version 450 core precision highp float; diff --git a/src/xenia/ui/vulkan/shaders/immediate.vert b/src/xenia/ui/vulkan/shaders/immediate.vert index 732553dcf..025c6ae4a 100644 --- a/src/xenia/ui/vulkan/shaders/immediate.vert +++ b/src/xenia/ui/vulkan/shaders/immediate.vert @@ -1,3 +1,6 @@ +// NOTE: This file is compiled and embedded into the exe. +// Use `xenia-build genspirv` and check in any changes under bin/. + #version 450 core precision highp float; diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 463e7ece0..aa9c84c72 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -20,8 +20,9 @@ namespace xe { namespace ui { namespace vulkan { -#include "xenia/ui/vulkan/shaders/immediate.frag.h" -#include "xenia/ui/vulkan/shaders/immediate.vert.h" +// Generated with `xenia-build genspirv`. +#include "xenia/ui/vulkan/shaders/bin/immediate_frag.h" +#include "xenia/ui/vulkan/shaders/bin/immediate_vert.h" constexpr uint32_t kCircularBufferCapacity = 2 * 1024 * 1024; @@ -380,9 +381,8 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context) vertex_shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; vertex_shader_info.pNext = nullptr; vertex_shader_info.flags = 0; - vertex_shader_info.codeSize = sizeof(immediate_vert_spv); - vertex_shader_info.pCode = - reinterpret_cast(immediate_vert_spv); + vertex_shader_info.codeSize = sizeof(immediate_vert); + vertex_shader_info.pCode = reinterpret_cast(immediate_vert); VkShaderModule vertex_shader; err = vkCreateShaderModule(*device, &vertex_shader_info, nullptr, &vertex_shader); @@ -391,9 +391,9 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context) fragment_shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; fragment_shader_info.pNext = nullptr; fragment_shader_info.flags = 0; - fragment_shader_info.codeSize = sizeof(immediate_frag_spv); + fragment_shader_info.codeSize = sizeof(immediate_frag); fragment_shader_info.pCode = - reinterpret_cast(immediate_frag_spv); + reinterpret_cast(immediate_frag); VkShaderModule fragment_shader; err = vkCreateShaderModule(*device, &fragment_shader_info, nullptr, &fragment_shader); diff --git a/xenia-build b/xenia-build index 1986b3299..4587374c4 100755 --- a/xenia-build +++ b/xenia-build @@ -396,6 +396,7 @@ def discover_commands(subparsers): 'pull': PullCommand(subparsers), 'premake': PremakeCommand(subparsers), 'build': BuildCommand(subparsers), + 'genspirv': GenSpirvCommand(subparsers), 'gentests': GenTestsCommand(subparsers), 'test': TestCommand(subparsers), 'gputest': GpuTestCommand(subparsers), @@ -623,6 +624,103 @@ class BuildCommand(BaseBuildCommand): return result +class GenSpirvCommand(Command): + """'genspirv' command.""" + + def __init__(self, subparsers, *args, **kwargs): + super(GenSpirvCommand, self).__init__( + subparsers, + name='genspirv', + help_short='Generates SPIR-V binaries and header files.', + help_long=''' + Generates the .spv/.h binaries under src/xenia/*/vulkan/shaders/bin/). + Run after modifying any .vert/.geom/.frag files. + ''', + *args, **kwargs) + + def execute(self, args, pass_args, cwd): + print('Generating SPIR-V binaries...') + print('') + + # TODO(benvanik): actually find vulkan SDK. Env var? etc? + vulkan_sdk_path = 'C:\\VulkanSDK\\1.0.3.1' + vulkan_bin_path = os.path.join(vulkan_sdk_path, 'bin') + glslang = os.path.join(vulkan_bin_path, 'glslangValidator') + spirv_dis = os.path.join(vulkan_bin_path, 'spirv-dis') + spirv_remap = os.path.join(vulkan_bin_path, 'spirv-remap') + + # Ensure we have the tools. + if not os.path.exists(vulkan_sdk_path): + print('ERROR: could not find the Vulkan SDK') + return 1 + elif not has_bin(glslang): + print('ERROR: could not find glslangValidator') + return 1 + elif not has_bin(spirv_dis): + print('ERROR: could not find spirv-dis') + return 1 + elif not has_bin(spirv_remap): + print('ERROR: could not find spirv-remap') + return 1 + + src_files = [os.path.join(root, name) + for root, dirs, files in os.walk('src') + for name in files + if (name.endswith('.vert') or name.endswith('.geom') or + name.endswith('.frag'))] + + any_errors = False + for src_file in src_files: + print('- %s' % (src_file)) + src_name = os.path.splitext(os.path.basename(src_file))[0] + identifier = os.path.basename(src_file).replace('.', '_') + + bin_path = os.path.join(os.path.dirname(src_file), 'bin') + spv_file = os.path.join(bin_path, identifier) + '.spv' + txt_file = os.path.join(bin_path, identifier) + '.txt' + h_file = os.path.join(bin_path, identifier) + '.h' + + # GLSL source -> .spv binary + shell_call([ + glslang, + '-V', src_file, + '-o', spv_file, + ]) + + # Disassemble binary into human-readable text. + shell_call([ + spirv_dis, + '-o', txt_file, + spv_file, + ]) + + # TODO(benvanik): remap? + + # bin2c so we get a header file we can compile in. + with open(h_file, 'wb') as out_file: + out_file.write('// generated from `xb genspirv`\n') + out_file.write('// source: %s\n' % os.path.basename(src_file)) + out_file.write('const uint8_t %s[] = {' % (identifier)) + with open(spv_file, 'rb') as in_file: + index = 0 + c = in_file.read(1) + while c != '': + if index % 12 == 0: + out_file.write('\n ') + else: + out_file.write(' ') + index += 1 + out_file.write('0x%02X,' % ord(c)) + c = in_file.read(1) + out_file.write('\n};\n') + + if any_errors: + print('ERROR: failed to build one or more SPIR-V files.') + return 1 + + return 0 + + class TestCommand(BaseBuildCommand): """'test' command.""" From b5a0c4715ba69a85538678b745987e3832ab87f7 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 20 Feb 2016 16:35:21 -0800 Subject: [PATCH 023/145] Porting GL4 geometry shaders. Likely not working. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 62 ++- src/xenia/gpu/vulkan/pipeline_cache.h | 8 +- src/xenia/gpu/vulkan/premake5.lua | 3 + .../vulkan/shaders/bin/line_quad_list_geom.h | 200 ++++++++ .../shaders/bin/line_quad_list_geom.spv | Bin 0 -> 2344 bytes .../shaders/bin/line_quad_list_geom.txt | 147 ++++++ .../gpu/vulkan/shaders/bin/point_list_geom.h | 206 ++++++++ .../vulkan/shaders/bin/point_list_geom.spv | Bin 0 -> 2420 bytes .../vulkan/shaders/bin/point_list_geom.txt | 153 ++++++ .../gpu/vulkan/shaders/bin/quad_list_geom.h | 178 +++++++ .../gpu/vulkan/shaders/bin/quad_list_geom.spv | Bin 0 -> 2084 bytes .../gpu/vulkan/shaders/bin/quad_list_geom.txt | 136 ++++++ .../gpu/vulkan/shaders/bin/rect_list_geom.h | 438 ++++++++++++++++++ .../gpu/vulkan/shaders/bin/rect_list_geom.spv | Bin 0 -> 5204 bytes .../gpu/vulkan/shaders/bin/rect_list_geom.txt | 328 +++++++++++++ .../gpu/vulkan/shaders/line_quad_list.geom | 49 ++ src/xenia/gpu/vulkan/shaders/point_list.geom | 48 ++ src/xenia/gpu/vulkan/shaders/quad_list.geom | 37 ++ src/xenia/gpu/vulkan/shaders/rect_list.geom | 101 ++++ 19 files changed, 2079 insertions(+), 15 deletions(-) create mode 100644 src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h create mode 100644 src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.spv create mode 100644 src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt create mode 100644 src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h create mode 100644 src/xenia/gpu/vulkan/shaders/bin/point_list_geom.spv create mode 100644 src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt create mode 100644 src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h create mode 100644 src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.spv create mode 100644 src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt create mode 100644 src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h create mode 100644 src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.spv create mode 100644 src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt create mode 100644 src/xenia/gpu/vulkan/shaders/line_quad_list.geom create mode 100644 src/xenia/gpu/vulkan/shaders/point_list.geom create mode 100644 src/xenia/gpu/vulkan/shaders/quad_list.geom create mode 100644 src/xenia/gpu/vulkan/shaders/rect_list.geom diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index af4c218eb..e86220f61 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -23,6 +23,12 @@ namespace vulkan { using xe::ui::vulkan::CheckResult; +// Generated with `xenia-build genspirv`. +#include "xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h" +#include "xenia/gpu/vulkan/shaders/bin/point_list_geom.h" +#include "xenia/gpu/vulkan/shaders/bin/quad_list_geom.h" +#include "xenia/gpu/vulkan/shaders/bin/rect_list_geom.h" + PipelineCache::PipelineCache( RegisterFile* register_file, ui::vulkan::VulkanDevice* device, VkDescriptorSetLayout uniform_descriptor_set_layout, @@ -75,6 +81,36 @@ PipelineCache::PipelineCache( err = vkCreatePipelineLayout(*device, &pipeline_layout_info, nullptr, &pipeline_layout_); CheckResult(err, "vkCreatePipelineLayout"); + + // Initialize our shared geometry shaders. + // These will be used as needed to emulate primitive types Vulkan doesn't + // support. + VkShaderModuleCreateInfo shader_module_info; + shader_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_module_info.pNext = nullptr; + shader_module_info.flags = 0; + shader_module_info.codeSize = + static_cast(sizeof(line_quad_list_geom)); + shader_module_info.pCode = + reinterpret_cast(line_quad_list_geom); + err = vkCreateShaderModule(device_, &shader_module_info, nullptr, + &geometry_shaders_.line_quad_list); + CheckResult(err, "vkCreateShaderModule"); + shader_module_info.codeSize = static_cast(sizeof(point_list_geom)); + shader_module_info.pCode = reinterpret_cast(point_list_geom); + err = vkCreateShaderModule(device_, &shader_module_info, nullptr, + &geometry_shaders_.point_list); + CheckResult(err, "vkCreateShaderModule"); + shader_module_info.codeSize = static_cast(sizeof(quad_list_geom)); + shader_module_info.pCode = reinterpret_cast(quad_list_geom); + err = vkCreateShaderModule(device_, &shader_module_info, nullptr, + &geometry_shaders_.quad_list); + CheckResult(err, "vkCreateShaderModule"); + shader_module_info.codeSize = static_cast(sizeof(rect_list_geom)); + shader_module_info.pCode = reinterpret_cast(rect_list_geom); + err = vkCreateShaderModule(device_, &shader_module_info, nullptr, + &geometry_shaders_.rect_list); + CheckResult(err, "vkCreateShaderModule"); } PipelineCache::~PipelineCache() { @@ -84,6 +120,12 @@ PipelineCache::~PipelineCache() { } cached_pipelines_.clear(); + // Destroy geometry shaders. + vkDestroyShaderModule(device_, geometry_shaders_.line_quad_list, nullptr); + vkDestroyShaderModule(device_, geometry_shaders_.point_list, nullptr); + vkDestroyShaderModule(device_, geometry_shaders_.quad_list, nullptr); + vkDestroyShaderModule(device_, geometry_shaders_.rect_list, nullptr); + vkDestroyPipelineLayout(device_, pipeline_layout_, nullptr); vkDestroyPipelineCache(device_, pipeline_cache_, nullptr); @@ -264,6 +306,7 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type, bool is_line_mode) { switch (primitive_type) { case PrimitiveType::kLineList: + case PrimitiveType::kLineLoop: case PrimitiveType::kLineStrip: case PrimitiveType::kTriangleList: case PrimitiveType::kTriangleFan: @@ -271,27 +314,18 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type, // Supported directly - no need to emulate. return nullptr; case PrimitiveType::kPointList: - // TODO(benvanik): point list geometry shader. - return nullptr; + return geometry_shaders_.point_list; case PrimitiveType::kUnknown0x07: assert_always("Unknown geometry type"); return nullptr; case PrimitiveType::kRectangleList: - // TODO(benvanik): rectangle list geometry shader. - return nullptr; - case PrimitiveType::kLineLoop: - // TODO(benvanik): line loop geometry shader. - return nullptr; + return geometry_shaders_.rect_list; case PrimitiveType::kQuadList: - // TODO(benvanik): quad list geometry shader. - if (is_line_mode) { - // - } else { - // - } - return nullptr; + return is_line_mode ? geometry_shaders_.line_quad_list + : geometry_shaders_.quad_list; case PrimitiveType::kQuadStrip: // TODO(benvanik): quad strip geometry shader. + assert_always("Quad strips not implemented"); return nullptr; default: assert_unhandled_case(primitive_type); diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 37a53c751..7d35fc496 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -90,7 +90,13 @@ class PipelineCache { // constants. VkPipelineLayout pipeline_layout_ = nullptr; - // TODO(benvanik): geometry shader cache. + // Shared geometry shaders. + struct { + VkShaderModule line_quad_list; + VkShaderModule point_list; + VkShaderModule quad_list; + VkShaderModule rect_list; + } geometry_shaders_; // Hash state used to incrementally produce pipeline hashes during update. // By the time the full update pass has run the hash will represent the diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index 5a89101e2..ca6b1ae77 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -21,6 +21,9 @@ project("xenia-gpu-vulkan") project_root.."/third_party/gflags/src", }) local_platform_files() + files({ + "shaders/bin/*.h", + }) -- TODO(benvanik): kill this and move to the debugger UI. group("src") diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h new file mode 100644 index 000000000..cb3511e37 --- /dev/null +++ b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h @@ -0,0 +1,200 @@ +// generated from `xb genspirv` +// source: line_quad_list.geom +const uint8_t line_quad_list_geom[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, + 0x4E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, + 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, + 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, + 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, + 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, + 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, + 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, + 0x06, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x37, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x3A, 0x00, 0x00, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, + 0x46, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, + 0x49, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x4B, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xFD, 0x00, 0x01, 0x00, + 0x38, 0x00, 0x01, 0x00, +}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.spv b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.spv new file mode 100644 index 0000000000000000000000000000000000000000..c8ade8408f0e7a26a1eff170df2428d68b8686c6 GIT binary patch literal 2344 zcmZ{l>rPWq5QR6i1rg;U0$vb{ct;VnAc~*}T2wS9B>p9hHs&PQ8hSLu$MZ4tMf_vp z`u6FHJE*4_XV%Q@nOU>X!BX#Re+WJL9SN^P@r;IE{X#&W>gsA&FG&xEKIMl{-`?4t z@8s|2Yl{nlVJMVPF*Y0qA}R}iY-BC`$0a8v7bRCEBg!B?9_@0pWybT4O(n52vMZ93 ze$4kxLsWDs?NQNegNl9F+kMmAf7{&8n+JmO7Jh7E?M{|w?Uua#F&{qBpSALx>}!)e z$LOWl#$NVmGwbAy*1M+qmLebW=s8m4{M8db&*7f<|JRe)zk8PB{!}Y2w4%-zlv}Xe zY~&4QE=Ql-p)FxQ?7RIr-~Eyw2v@=-;lpR}@%Rk)leOZJu%$EXl*igWIN(Fu$L#oc z8|wNO8(&?sV4sulfO#j3Iq!Xu$7A2&KHgE<$3D)m7oKSeXX%B{y}<0nm^0pi&x~nm z=AID;xL{An>XWWYCRIZpPsGK0T-H}q#H7NU;@|-ZGfW8|mF7IY>F!=#gIHBL$E4{8 zx0NntPx!`#hoq|#YSIHdEa9Ohdq`_~j#2rUlQ~XElZTIaPfsx4Dn8kTpcqUA5=1)1)g^8KNmh6-Rm>hF>F57id4lm><=6iT4 fJLLc-$M>);8xEd@dw3WV|{;6LG@9P&f@5Odyty*8KM&sV#r+%)%-oSIC*0c|OeN+cqlYLZ> zH65yb|5uZ^=f5?Hzo{l|udk_WfS$eP+<-JG~ z@Gi8E9FuT$_qWUU6kJT&gZ@^=`QjWadi41|`c`n`#b(N!Bd0cfw!xjDKHjk%ZcpOp zi|+1{)w`+G&M`)*TH9c$k3{}|aB>}E=z z!0qJ_yxEJn4ud&MZPu>t+le{Pqj2wW)-kWokj?EZ=HU8>Y1j92D5XcwLO)UKmyT0| zz9%7l!~;83^d|O2@zXc_P8Yu#o6Ps00W+sLjM2y5zJ%MaHL8DAbZbz*3b#*vqPJ_v zzMZ{&S8QT4cwUE#8T&olSTX0O?*5AD>+b4no||y>*q2+#n;>yx)mI8;?%PE+<}5RF z_Ria5{=Y#waAT+7(~v!=yF31!4Bg%EHx;@w@$Z)U0UqZJ^nBD9&2_^?$!c#x;tM(V*2QB zuJe$%y8V9x+4mD-v|oVq5r15;i^$fXEo!`k+^lgPT}<-*efQhMd&dG~4o_g0k;UvK zuq(*(kSDN3~GyehEnLpxA-}*;L%)N@;5o_*F7yU1C{sWzZ{sk~OpBVrE literal 0 HcmV?d00001 diff --git a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt new file mode 100644 index 000000000..0eecef563 --- /dev/null +++ b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt @@ -0,0 +1,153 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 86 +; Schema: 0 + OpCapability Geometry + OpCapability GeometryPointSize + OpCapability ClipDistance + OpCapability GeometryStreams + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Geometry %4 "main" %16 %41 %76 %80 + OpExecutionMode %4 InputPoints + OpExecutionMode %4 Invocations 1 + OpExecutionMode %4 OutputTriangleStrip + OpExecutionMode %4 OutputVertices 4 + OpSource GLSL 450 + OpName %4 "main" + OpName %9 "pos" + OpName %13 "gl_PerVertex" + OpMemberName %13 0 "gl_Position" + OpMemberName %13 1 "gl_PointSize" + OpMemberName %13 2 "gl_ClipDistance" + OpName %16 "gl_in" + OpName %23 "psize" + OpName %29 "i" + OpName %39 "gl_PerVertex" + OpMemberName %39 0 "gl_Position" + OpMemberName %39 1 "gl_PointSize" + OpMemberName %39 2 "gl_ClipDistance" + OpName %41 "" + OpName %56 "indexable" + OpName %74 "VertexData" + OpMemberName %74 0 "o" + OpName %76 "out_vtx" + OpName %77 "VertexData" + OpMemberName %77 0 "o" + OpName %80 "in_vtx" + OpMemberDecorate %13 0 BuiltIn Position + OpMemberDecorate %13 1 BuiltIn PointSize + OpMemberDecorate %13 2 BuiltIn ClipDistance + OpDecorate %13 Block + OpMemberDecorate %39 0 BuiltIn Position + OpMemberDecorate %39 1 BuiltIn PointSize + OpMemberDecorate %39 2 BuiltIn ClipDistance + OpDecorate %39 Block + OpDecorate %39 Stream 0 + OpDecorate %41 Stream 0 + OpMemberDecorate %74 0 Location 1 + OpDecorate %74 Stream 0 + OpDecorate %76 Stream 0 + OpMemberDecorate %77 0 Location 1 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeFloat 32 + %7 = OpTypeVector %6 4 + %8 = OpTypePointer Function %7 + %10 = OpTypeInt 32 0 + %11 = OpConstant %10 1 + %12 = OpTypeArray %6 %11 + %13 = OpTypeStruct %7 %6 %12 + %14 = OpTypeArray %13 %11 + %15 = OpTypePointer Input %14 + %16 = OpVariable %15 Input + %17 = OpTypeInt 32 1 + %18 = OpConstant %17 0 + %19 = OpTypePointer Input %7 + %22 = OpTypePointer Function %6 + %24 = OpConstant %17 1 + %25 = OpTypePointer Input %6 + %28 = OpTypePointer Function %17 + %36 = OpConstant %17 4 + %37 = OpTypeBool + %39 = OpTypeStruct %7 %6 %12 + %40 = OpTypePointer Output %39 + %41 = OpVariable %40 Output + %42 = OpTypeVector %6 2 + %45 = OpConstant %10 4 + %46 = OpTypeArray %42 %45 + %47 = OpConstant %6 -1 + %48 = OpConstant %6 1 + %49 = OpConstantComposite %42 %47 %48 + %50 = OpConstantComposite %42 %48 %48 + %51 = OpConstantComposite %42 %47 %47 + %52 = OpConstantComposite %42 %48 %47 + %53 = OpConstantComposite %46 %49 %50 %51 %52 + %55 = OpTypePointer Function %46 + %57 = OpTypePointer Function %42 + %70 = OpTypePointer Output %7 + %72 = OpConstant %10 16 + %73 = OpTypeArray %7 %72 + %74 = OpTypeStruct %73 + %75 = OpTypePointer Output %74 + %76 = OpVariable %75 Output + %77 = OpTypeStruct %73 + %78 = OpTypeArray %77 %11 + %79 = OpTypePointer Input %78 + %80 = OpVariable %79 Input + %81 = OpTypePointer Input %77 + %4 = OpFunction %2 None %3 + %5 = OpLabel + %9 = OpVariable %8 Function + %23 = OpVariable %22 Function + %29 = OpVariable %28 Function + %56 = OpVariable %55 Function + %20 = OpAccessChain %19 %16 %18 %18 + %21 = OpLoad %7 %20 + OpStore %9 %21 + %26 = OpAccessChain %25 %16 %18 %24 + %27 = OpLoad %6 %26 + OpStore %23 %27 + OpStore %29 %18 + OpBranch %30 + %30 = OpLabel + OpLoopMerge %32 %33 None + OpBranch %34 + %34 = OpLabel + %35 = OpLoad %17 %29 + %38 = OpSLessThan %37 %35 %36 + OpBranchConditional %38 %31 %32 + %31 = OpLabel + %43 = OpLoad %7 %9 + %44 = OpVectorShuffle %42 %43 %43 0 1 + %54 = OpLoad %17 %29 + OpStore %56 %53 + %58 = OpAccessChain %57 %56 %54 + %59 = OpLoad %42 %58 + %60 = OpLoad %6 %23 + %61 = OpVectorTimesScalar %42 %59 %60 + %62 = OpFAdd %42 %44 %61 + %63 = OpLoad %7 %9 + %64 = OpVectorShuffle %42 %63 %63 2 3 + %65 = OpCompositeExtract %6 %62 0 + %66 = OpCompositeExtract %6 %62 1 + %67 = OpCompositeExtract %6 %64 0 + %68 = OpCompositeExtract %6 %64 1 + %69 = OpCompositeConstruct %7 %65 %66 %67 %68 + %71 = OpAccessChain %70 %41 %18 + OpStore %71 %69 + %82 = OpAccessChain %81 %80 %18 + %83 = OpLoad %77 %82 + OpStore %76 %83 + OpEmitVertex + OpBranch %33 + %33 = OpLabel + %84 = OpLoad %17 %29 + %85 = OpIAdd %17 %84 %24 + OpStore %29 %85 + OpBranch %30 + %32 = OpLabel + OpEndPrimitive + OpReturn + OpFunctionEnd diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h new file mode 100644 index 000000000..7a27bde3a --- /dev/null +++ b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h @@ -0,0 +1,178 @@ +// generated from `xb genspirv` +// source: quad_list.geom +const uint8_t quad_list_geom[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, + 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, + 0x3F, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x69, 0x6E, 0x70, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x64, 0x65, + 0x78, 0x61, 0x62, 0x6C, 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x24, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, + 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, + 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, + 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, + 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, + 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, + 0x06, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x3B, 0x00, 0x00, 0x00, + 0x6F, 0x75, 0x74, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, + 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x3F, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x35, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x03, 0x00, 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x3A, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x3E, 0x00, 0x00, 0x00, + 0x3F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x41, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x41, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x00, 0x00, 0xDB, 0x00, 0x01, 0x00, + 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.spv b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.spv new file mode 100644 index 0000000000000000000000000000000000000000..a57165e8afcbae1d14f3f1d47613a26fa566156b GIT binary patch literal 2084 zcmZ{l*-lhJ5QZCO0Yq73b3q)$9mEX~MHB?Z3ocAZyd|Sd%t>&Dz-WwDzLKxtW9WMr zH8Jt~dQOuGYM&HU_1DsMs_G1d&Z({tI(WLnv(P$&p_3;B`Kg>&a=rlX4Q2d8*x20K zoIPmn%+Af-n-^@R^O?8h?qH}R(-Wwi+<&B5&mX(`$cX4Wo^HS zBd5$G?$Opb$eLNB4%!uQdl2!g-rUMQn(EMIec0A^v%U4~pjoZIu5ouE_CUW9o>y8v zf88nWIeMq~|L)ZG?>mdJ=j4h*uqXHNZN24swOO_1V#I($1G1;is6p4>H(Nb>h<-Fi z%#&iAX;zOR;W=4S?|P$GIK+GhF?sT{b}=_-gTKa#ZBUD{GxMmsM@ct6^{f4!)T{P; zQjgm2Ntk);i)R9|Ht!Nkcd6T#q#KiZkYa|Y+PWus2j4|`gt#)i0-3jh-UXM$r*nG` zvM1>S$VF(FEQ4_K48tpsM?ZDHAN|JpI|?_aSjsbo?7d0%o3^&3pGLOM87eZ~@61|^ z(^tDSkHf85pHVVY;L;ej$y~OMRM!RM4#>Kua=X|>z8fi~Wh;hD*sRNWFA<~eti_CR zR_!t7y$UAo`QxOa-1L*3sTG3_(CJelh^;pR{Jo!nl5>pu_IuAbiO9wOAwcl0&SGTdEC@9_w^0}?k@-S11x+^e~4%mTe}e)isD z{vRMExUtU6-$Z%R{asADb9JZH9}%su^Z!YXV{miHzmw}Y`~>9D-Urt%U&ik4^+V?P zOa<=k$sDt1zwlA_8#I@($|?3Nva!}-P440orK+cy~$tD_h|#oQwUGr=07^_VW`pIGd-CSgJvr PSo^G<%QtfVh8Ceea|?_> literal 0 HcmV?d00001 diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt new file mode 100644 index 000000000..4a1ed2b02 --- /dev/null +++ b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt @@ -0,0 +1,136 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 70 +; Schema: 0 + OpCapability Geometry + OpCapability GeometryPointSize + OpCapability ClipDistance + OpCapability GeometryStreams + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Geometry %4 "main" %38 %42 %59 %63 + OpExecutionMode %4 InputLinesAdjacency + OpExecutionMode %4 Invocations 1 + OpExecutionMode %4 OutputTriangleStrip + OpExecutionMode %4 OutputVertices 4 + OpSource GLSL 450 + OpName %4 "main" + OpName %8 "i" + OpName %19 "input_index" + OpName %29 "indexable" + OpName %36 "gl_PerVertex" + OpMemberName %36 0 "gl_Position" + OpMemberName %36 1 "gl_PointSize" + OpMemberName %36 2 "gl_ClipDistance" + OpName %38 "" + OpName %39 "gl_PerVertex" + OpMemberName %39 0 "gl_Position" + OpMemberName %39 1 "gl_PointSize" + OpMemberName %39 2 "gl_ClipDistance" + OpName %42 "gl_in" + OpName %57 "VertexData" + OpMemberName %57 0 "o" + OpName %59 "out_vtx" + OpName %60 "VertexData" + OpMemberName %60 0 "o" + OpName %63 "in_vtx" + OpMemberDecorate %36 0 BuiltIn Position + OpMemberDecorate %36 1 BuiltIn PointSize + OpMemberDecorate %36 2 BuiltIn ClipDistance + OpDecorate %36 Block + OpDecorate %36 Stream 0 + OpDecorate %38 Stream 0 + OpMemberDecorate %39 0 BuiltIn Position + OpMemberDecorate %39 1 BuiltIn PointSize + OpMemberDecorate %39 2 BuiltIn ClipDistance + OpDecorate %39 Block + OpMemberDecorate %57 0 Location 1 + OpDecorate %57 Stream 0 + OpDecorate %59 Stream 0 + OpMemberDecorate %60 0 Location 1 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeInt 32 1 + %7 = OpTypePointer Function %6 + %9 = OpConstant %6 0 + %16 = OpConstant %6 4 + %17 = OpTypeBool + %20 = OpTypeInt 32 0 + %21 = OpConstant %20 4 + %22 = OpTypeArray %6 %21 + %23 = OpConstant %6 1 + %24 = OpConstant %6 3 + %25 = OpConstant %6 2 + %26 = OpConstantComposite %22 %9 %23 %24 %25 + %28 = OpTypePointer Function %22 + %32 = OpTypeFloat 32 + %33 = OpTypeVector %32 4 + %34 = OpConstant %20 1 + %35 = OpTypeArray %32 %34 + %36 = OpTypeStruct %33 %32 %35 + %37 = OpTypePointer Output %36 + %38 = OpVariable %37 Output + %39 = OpTypeStruct %33 %32 %35 + %40 = OpTypeArray %39 %21 + %41 = OpTypePointer Input %40 + %42 = OpVariable %41 Input + %44 = OpTypePointer Input %33 + %47 = OpTypePointer Output %33 + %50 = OpTypePointer Input %32 + %53 = OpTypePointer Output %32 + %55 = OpConstant %20 16 + %56 = OpTypeArray %33 %55 + %57 = OpTypeStruct %56 + %58 = OpTypePointer Output %57 + %59 = OpVariable %58 Output + %60 = OpTypeStruct %56 + %61 = OpTypeArray %60 %21 + %62 = OpTypePointer Input %61 + %63 = OpVariable %62 Input + %65 = OpTypePointer Input %60 + %4 = OpFunction %2 None %3 + %5 = OpLabel + %8 = OpVariable %7 Function + %19 = OpVariable %7 Function + %29 = OpVariable %28 Function + OpStore %8 %9 + OpBranch %10 + %10 = OpLabel + OpLoopMerge %12 %13 None + OpBranch %14 + %14 = OpLabel + %15 = OpLoad %6 %8 + %18 = OpSLessThan %17 %15 %16 + OpBranchConditional %18 %11 %12 + %11 = OpLabel + %27 = OpLoad %6 %8 + OpStore %29 %26 + %30 = OpAccessChain %7 %29 %27 + %31 = OpLoad %6 %30 + OpStore %19 %31 + %43 = OpLoad %6 %19 + %45 = OpAccessChain %44 %42 %43 %9 + %46 = OpLoad %33 %45 + %48 = OpAccessChain %47 %38 %9 + OpStore %48 %46 + %49 = OpLoad %6 %19 + %51 = OpAccessChain %50 %42 %49 %23 + %52 = OpLoad %32 %51 + %54 = OpAccessChain %53 %38 %23 + OpStore %54 %52 + %64 = OpLoad %6 %19 + %66 = OpAccessChain %65 %63 %64 + %67 = OpLoad %60 %66 + OpStore %59 %67 + OpEmitVertex + OpBranch %13 + %13 = OpLabel + %68 = OpLoad %6 %8 + %69 = OpIAdd %6 %68 %23 + OpStore %8 %69 + OpBranch %10 + %12 = OpLabel + OpEndPrimitive + OpReturn + OpFunctionEnd diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h new file mode 100644 index 000000000..511aeb2d0 --- /dev/null +++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h @@ -0,0 +1,438 @@ +// generated from `xb genspirv` +// source: rect_list.geom +const uint8_t rect_list_geom[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, + 0xCC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x6C, 0x65, 0x66, 0x74, 0x5F, 0x61, 0x6C, 0x69, 0x67, 0x6E, 0x65, 0x64, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, + 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, + 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, + 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, + 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, + 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, + 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, + 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, + 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, + 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, + 0x06, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x66, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0xB4, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x03, 0x00, 0x32, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x36, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x65, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x43, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, + 0x49, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x4B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, + 0x4C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x4E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x51, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x52, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x53, 0x00, 0x00, 0x00, + 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x56, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x58, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, + 0x5B, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x5D, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, + 0x5E, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x60, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x66, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, + 0xF6, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, + 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, + 0x6C, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, + 0x6E, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, + 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x71, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, + 0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x74, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, + 0x75, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x77, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, + 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x79, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, + 0x77, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x7C, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x6A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x6A, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, + 0x66, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x7E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x66, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x69, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x7F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x82, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x83, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x84, 0x00, 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x85, 0x00, 0x00, 0x00, + 0x84, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x86, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, + 0x86, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x87, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x8D, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, + 0x90, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x92, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, + 0x93, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x95, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x9D, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, + 0xA0, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0xA2, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, + 0xA3, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0xA5, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x32, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAC, 0x00, 0x00, 0x00, + 0xA9, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, + 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00, + 0xAC, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, 0x00, + 0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0xB1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0xB3, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB3, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, + 0xB7, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0xB9, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xBA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, + 0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0xBB, 0x00, 0x00, 0x00, + 0xB6, 0x00, 0x00, 0x00, 0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0xB6, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xBC, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0xBD, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xBF, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0xC0, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xC2, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00, + 0xBF, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0xC5, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00, + 0xC7, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, + 0xC9, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0xBC, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xC9, 0x00, 0x00, 0x00, + 0xC8, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0xCA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00, + 0xCA, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0xB4, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.spv b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.spv new file mode 100644 index 0000000000000000000000000000000000000000..8074f71f5d7169723c5b2ea2533ccd7f0d20bc8a GIT binary patch literal 5204 zcmZ{m2a{Dr5QVSqvgU|lhNmcIlvObY6vcoMbHY^?e7fu^JQX8g&N=6tvzT*^zd-+o zsmdz9@4kDgrdGUE@0`=!(=(@M?tS#EIB}(_T7iEnSHD!w%nKOCU-FAjnYgRoDb!@F_m74X!=MS`p^{*1gK`-)ABVOF`u7&2BSHZ(Z5GkajDJ#Tn$4wSq;_E%N&hGri# zxZuRW1)ae~`uiHc*x1OzcBefujCZ9vU!AqTJ=~eqzHHFE#M}e1gNE7{Om8pj3=E$$ zSno%j_-oOw+v@WFtgGK=^tvs4|F^EO<*(aQ_m(hw7*p-U*i|`wpfli}duyNZkvgZh zK8KNuIm&;6{!Xg&RVH{83c&pnJ~Y&G0^^7SyE)L*|#>w3P&&UIhO z-w13U++&Ne+@pKg9H09$<{sppP1o03O$4uiInVq_O-!COwgH=CeETMzJ=n(`F*VwQ z$&Aj`H=b2IV|~u`Bj&pL+>6{Z*JrNhZLW28VU(wryMo<=e&@=4SF$g^n_}mXaJx#cwPc z_5+t11r3QI7r+W_5;i;4Ly>|y= z#-ia6uyxFFPjYjjVFp+Y?kDGPC|G|q$kp&$aTbT+%lhS69FE^ubQ}RLb;!-hS29e!8k z=VI1!t~hf?z|IZ7py7V^CxS1;Jk#{fxViRv0cL!0!xuKb)at;komv-xb3d$SJhd(c z8_zjj0=Az1)LR5D>s<=BUhahP)VmC9JoPRITT{RHViMyO*c8mY*ay!-pSkwhzS}Ry z?BPnVdssrR?fq4lKF>a9sjoas`=0NQ{t0mFE@sRfT>@U-9bJmwSnlK1VDFK8c0IW{ z))~im4VF6PeZ01b!`h-@dix4>)JPY`7YlGcWu|rJnzX( zn6YTM8C+_Rn-dMUfK#W`a4UXe(Qq5M)F3w}8g2*YzD2_w`1D7Eywq?f+_iJ3a-Z(P zXDk}-2A3M-=0w9i;M6HK+>75>G~5R+HOS40hWo*4a6i%T09b!C$aBAPpB}{L`kq61 z77xLVMaRS7Qit4}=y(L2I;DbAo*rhN z_vtCjSTsBhE;Y!_iH2vusZ(lr7QeA*cn(}@ked??&x1#w#S8fKM}xf7@FG0@D>b}? z-&oG;Ny|1G$z{~g@=xf8}y|9i0U)cXN!P5qwTB*q`HDVVzKgJ+@7 sTzhTb?U!Tr@DtcQ Date: Sat, 20 Feb 2016 18:44:37 -0600 Subject: [PATCH 024/145] (WIP) SPIR-V Shader Translator --- src/xenia/gpu/shader_translator.cc | 40 +- src/xenia/gpu/shader_translator.h | 3 + src/xenia/gpu/spirv_shader_translator.cc | 909 +++++++++++++++++++++-- src/xenia/gpu/spirv_shader_translator.h | 24 + 4 files changed, 918 insertions(+), 58 deletions(-) diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index f117619cd..a89be80f5 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -14,6 +14,7 @@ #include #include +#include "xenia/base/logging.h" #include "xenia/base/math.h" namespace xe { @@ -353,7 +354,7 @@ bool ShaderTranslator::TranslateBlocks() { // This is what freedreno does. uint32_t max_cf_dword_index = static_cast(ucode_dword_count_); std::set label_addresses; - for (uint32_t i = 0; i < max_cf_dword_index; i += 3) { + for (uint32_t i = 0, cf_index = 0; i < max_cf_dword_index; i += 3) { ControlFlowInstruction cf_a; ControlFlowInstruction cf_b; UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b); @@ -367,6 +368,11 @@ bool ShaderTranslator::TranslateBlocks() { } AddControlFlowTargetLabel(cf_a, &label_addresses); AddControlFlowTargetLabel(cf_b, &label_addresses); + + PreProcessControlFlowInstruction(cf_index); + ++cf_index; + PreProcessControlFlowInstruction(cf_index); + ++cf_index; } // Translate all instructions. @@ -666,11 +672,11 @@ void ShaderTranslator::TranslateExecInstructions( static_cast(ucode_dwords_[instr_offset * 3] & 0x1F); if (fetch_opcode == FetchOpcode::kVertexFetch) { auto& op = *reinterpret_cast( - ucode_dwords_ + instr_offset * 3); + ucode_dwords_ + instr_offset * 3); TranslateVertexFetchInstruction(op); } else { auto& op = *reinterpret_cast( - ucode_dwords_ + instr_offset * 3); + ucode_dwords_ + instr_offset * 3); TranslateTextureFetchInstruction(op); } } else { @@ -1114,9 +1120,15 @@ void ShaderTranslator::ParseAluVectorInstruction( i.result.storage_target = InstructionStorageTarget::kPointSize; break; default: - assert_true(dest_num < 16); - i.result.storage_target = InstructionStorageTarget::kInterpolant; - i.result.storage_index = dest_num; + if (dest_num < 16) { + i.result.storage_target = InstructionStorageTarget::kInterpolant; + i.result.storage_index = dest_num; + } else { + // Unimplemented. + // assert_always(); + i.result.storage_target = InstructionStorageTarget::kNone; + i.result.storage_index = 0; + } break; } } else if (is_pixel_shader()) { @@ -1236,9 +1248,19 @@ void ShaderTranslator::ParseAluScalarInstruction( i.result.storage_target = InstructionStorageTarget::kPointSize; break; default: - assert_true(dest_num < 16); - i.result.storage_target = InstructionStorageTarget::kInterpolant; - i.result.storage_index = dest_num; + if (dest_num < 16) { + i.result.storage_target = InstructionStorageTarget::kInterpolant; + i.result.storage_index = dest_num; + } else { + // Unimplemented. + // assert_always(); + XELOGE( + "ShaderTranslator::ParseAluScalarInstruction: Unsupported write " + "to export %d", + dest_num); + i.result.storage_target = InstructionStorageTarget::kNone; + i.result.storage_index = 0; + } break; } } else if (is_pixel_shader()) { diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 21bae4a53..d1b27a997 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -78,6 +78,9 @@ class ShaderTranslator { shader->host_disassembly_ = std::move(value); } + // Pre-process a control-flow instruction before anything else. + virtual void PreProcessControlFlowInstruction(uint32_t cf_index) {} + // Handles translation for control flow label addresses. // This is triggered once for each label required (due to control flow // operations) before any of the instructions within the target exec. diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 52848cedd..2ea0ff89c 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -15,6 +15,7 @@ namespace xe { namespace gpu { +using namespace ucode; using spv::GLSLstd450; using spv::Id; @@ -62,10 +63,155 @@ void SpirvShaderTranslator::StartTranslation() { b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb); } - auto float_1_0 = b.makeFloatConstant(2.0f); - auto acos = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, b.makeFloatType(32), - GLSLstd450::kAcos, {float_1_0}); + bool_type_ = b.makeBoolType(); + float_type_ = b.makeFloatType(32); + Id uint_type = b.makeUintType(32); + vec2_float_type_ = b.makeVectorType(float_type_, 2); + vec3_float_type_ = b.makeVectorType(float_type_, 3); + vec4_float_type_ = b.makeVectorType(float_type_, 4); + vec4_uint_type_ = b.makeVectorType(uint_type, 4); + vec4_bool_type_ = b.makeVectorType(bool_type_, 4); + + vec4_float_one_ = b.makeCompositeConstant( + vec4_float_type_, + std::vector({b.makeFloatConstant(1.f), b.makeFloatConstant(1.f), + b.makeFloatConstant(1.f), b.makeFloatConstant(1.f)})); + vec4_float_zero_ = b.makeCompositeConstant( + vec4_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), + b.makeFloatConstant(0.f), b.makeFloatConstant(0.f)})); + + registers_type_ = + b.makeArrayType(vec4_float_type_, b.makeUintConstant(64), 0); + registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + registers_type_, "r"); + + aL_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + vec4_uint_type_, "aL"); + + p0_ = b.createVariable(spv::StorageClass::StorageClassPrivate, bool_type_, + "p0"); + ps_ = b.createVariable(spv::StorageClass::StorageClassPrivate, float_type_, + "ps"); + pv_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + vec4_float_type_, "pv"); + a0_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + b.makeUintType(32), "a0"); + + // Uniform constants. + Id float_consts_type = + b.makeArrayType(vec4_float_type_, b.makeUintConstant(256), 1); + Id loop_consts_type = + b.makeArrayType(b.makeUintType(32), b.makeUintConstant(32), 1); + Id bool_consts_type = + b.makeArrayType(b.makeUintType(32), b.makeUintConstant(8), 1); + + Id consts_struct_type = b.makeStructType( + {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); + b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock); + + // Constants member decorations + b.addMemberDecoration(consts_struct_type, 0, + spv::Decoration::DecorationOffset, 0); + b.addMemberDecoration(consts_struct_type, 0, + spv::Decoration::DecorationArrayStride, + 4 * sizeof(float)); + + b.addMemberDecoration(consts_struct_type, 1, + spv::Decoration::DecorationOffset, + 256 * 4 * sizeof(float)); + b.addMemberDecoration(consts_struct_type, 1, + spv::Decoration::DecorationArrayStride, + sizeof(uint32_t)); + + b.addMemberDecoration(consts_struct_type, 2, + spv::Decoration::DecorationOffset, + 256 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); + b.addMemberDecoration(consts_struct_type, 2, + spv::Decoration::DecorationArrayStride, + sizeof(uint32_t)); + + consts_ = b.createVariable(spv::StorageClass::StorageClassUniform, + consts_struct_type, "consts"); + + b.addDecoration(consts_, spv::Decoration::DecorationDescriptorSet, 0); + if (is_vertex_shader()) { + b.addDecoration(consts_, spv::Decoration::DecorationBinding, 0); + } else if (is_pixel_shader()) { + b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1); + } + + // Interpolators + Id interpolators_type = + b.makeArrayType(vec4_float_type_, b.makeUintConstant(16), 0); + if (is_vertex_shader()) { + // Vertex inputs/outputs + for (const auto& binding : vertex_bindings()) { + for (const auto& attrib : binding.attributes) { + Id attrib_type = 0; + switch (attrib.fetch_instr.attributes.data_format) { + case VertexFormat::k_32: + case VertexFormat::k_32_FLOAT: + attrib_type = float_type_; + break; + case VertexFormat::k_16_16: + case VertexFormat::k_32_32: + case VertexFormat::k_16_16_FLOAT: + case VertexFormat::k_32_32_FLOAT: + attrib_type = vec2_float_type_; + break; + case VertexFormat::k_10_11_11: + case VertexFormat::k_11_11_10: + case VertexFormat::k_32_32_32_FLOAT: + attrib_type = vec3_float_type_; + break; + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_2_10_10_10: + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_32_32_32_32: + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32_32_32_32_FLOAT: + attrib_type = vec4_float_type_; + break; + default: + assert_always(); + } + + auto attrib_var = b.createVariable( + spv::StorageClass::StorageClassInput, attrib_type, + xe::format_string("vf%d_%d", binding.fetch_constant, + attrib.fetch_instr.attributes.offset) + .c_str()); + b.addDecoration(attrib_var, spv::Decoration::DecorationLocation, + attrib.attrib_index); + + vertex_binding_map_[binding.fetch_constant][attrib.fetch_instr + .attributes.offset] = + attrib_var; + } + } + + interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, + interpolators_type, "interpolators"); + b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective); + b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); + + pos_ = b.createVariable(spv::StorageClass::StorageClassOutput, + vec4_float_type_, "gl_Position"); + b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn, + spv::BuiltIn::BuiltInPosition); + } else { + // Pixel inputs/outputs + interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput, + interpolators_type, "interpolators"); + b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective); + b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); + + // Copy interpolators to r[0..16] + b.createNoResultOp(spv::Op::OpCopyMemorySized, + {registers_ptr_, interpolators_, + b.makeUintConstant(16 * 4 * sizeof(float))}); + } } std::vector SpirvShaderTranslator::CompleteTranslation() { @@ -99,79 +245,232 @@ void SpirvShaderTranslator::PostTranslation(Shader* shader) { set_host_disassembly(shader, disasm->to_string()); } +void SpirvShaderTranslator::PreProcessControlFlowInstruction( + uint32_t cf_index) { + auto& b = *builder_; + + cf_blocks_[cf_index] = &b.makeNewBlock(); +} + void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { auto& b = *builder_; EmitUnimplementedTranslationError(); } +void SpirvShaderTranslator::ProcessControlFlowInstructionBegin( + uint32_t cf_index) { + auto& b = *builder_; + + if (cf_index == 0) { + // Kind of cheaty, but emit a branch to the first block. + b.createBranch(cf_blocks_[cf_index]); + } +} + +void SpirvShaderTranslator::ProcessControlFlowInstructionEnd( + uint32_t cf_index) { + auto& b = *builder_; +} + void SpirvShaderTranslator::ProcessControlFlowNopInstruction() { auto& b = *builder_; - EmitUnimplementedTranslationError(); + b.createNoResultOp(spv::Op::OpNop); } void SpirvShaderTranslator::ProcessExecInstructionBegin( const ParsedExecInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + // Head has the logic to check if the body should execute. + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + auto body = head; + switch (instr.type) { + case ParsedExecInstruction::Type::kUnconditional: { + // No need to do anything. + } break; + case ParsedExecInstruction::Type::kConditional: { + // Based off of bool_consts + std::vector offsets; + offsets.push_back(b.makeUintConstant(2)); // bool_consts + offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32)); + auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, + consts_, offsets); + v = b.createLoad(v); + + // Bitfield extract the bool constant. + v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v, + b.makeUintConstant(instr.bool_constant_index % 32), + b.makeUintConstant(1)); + + // Conditional branch + assert_true(cf_blocks_.size() > instr.dword_index + 1); + body = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); + } break; + case ParsedExecInstruction::Type::kPredicated: { + // Branch based on p0. + assert_true(cf_blocks_.size() > instr.dword_index + 1); + body = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); + } break; + } + b.setBuildPoint(body); } void SpirvShaderTranslator::ProcessExecInstructionEnd( const ParsedExecInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + if (instr.is_end) { + b.makeReturn(false); + } else { + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); + } } void SpirvShaderTranslator::ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + + // TODO: Emit a spv LoopMerge + // (need to know the continue target and merge target beforehand though) + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessCallInstruction( const ParsedCallInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessReturnInstruction( const ParsedReturnInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } +// CF jump void SpirvShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + switch (instr.type) { + case ParsedJumpInstruction::Type::kUnconditional: { + b.createBranch(cf_blocks_[instr.target_address]); + } break; + case ParsedJumpInstruction::Type::kConditional: { + // Based off of bool_consts + std::vector offsets; + offsets.push_back(b.makeUintConstant(2)); // bool_consts + offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32)); + auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, + consts_, offsets); + v = b.createLoad(v); + + // Bitfield extract the bool constant. + v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v, + b.makeUintConstant(instr.bool_constant_index % 32), + b.makeUintConstant(1)); + + // Conditional branch + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, cf_blocks_[instr.target_address], + cf_blocks_[instr.dword_index]); + } break; + case ParsedJumpInstruction::Type::kPredicated: { + assert_true(cf_blocks_.size() > instr.dword_index + 1); + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, cf_blocks_[instr.target_address], + cf_blocks_[instr.dword_index]); + } break; + } } void SpirvShaderTranslator::ProcessAllocInstruction( const ParsedAllocInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + + switch (instr.type) { + case AllocType::kNone: { + // ? + } break; + case AllocType::kVsPosition: { + assert_true(is_vertex_shader()); + } break; + // Also PS Colors + case AllocType::kVsInterpolators: { + } break; + default: + break; + } + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + // Operand 0 is the index + // Operand 1 is the binding + // TODO: Indexed fetch + auto vertex_ptr = + vertex_binding_map_[instr.operands[1].storage_index][instr.attributes + .offset]; + assert_not_zero(vertex_ptr); + + auto vertex = b.createLoad(vertex_ptr); + StoreToResult(vertex, instr.result); } void SpirvShaderTranslator::ProcessTextureFetchInstruction( @@ -201,18 +500,327 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + // TODO: instr.is_predicated + + Id sources[3] = {0}; + Id dest = 0; + for (size_t i = 0; i < instr.operand_count; i++) { + sources[i] = LoadFromOperand(instr.operands[i]); + } + + switch (instr.vector_opcode) { + case AluVectorOpcode::kAdd: { + dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], + sources[1]); + } break; + + case AluVectorOpcode::kCube: { + // TODO: + } break; + + case AluVectorOpcode::kFloor: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFloor, {sources[0]}); + } break; + + case AluVectorOpcode::kFrc: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFract, {sources[0]}); + } break; + + case AluVectorOpcode::kMad: { + dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], + sources[1]); + dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, dest, sources[2]); + } break; + + case AluVectorOpcode::kMax: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + } break; + + case AluVectorOpcode::kMin: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFMin, {sources[0], sources[1]}); + } break; + + case AluVectorOpcode::kMul: { + dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], + sources[1]); + } break; + + case AluVectorOpcode::kSeq: { + // foreach(el) src0 == src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], + sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kSge: { + // foreach(el) src0 >= src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_float_type_, + sources[0], sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kSgt: { + // foreach(el) src0 > src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_float_type_, + sources[0], sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kSne: { + // foreach(el) src0 != src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_float_type_, + sources[0], sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kTrunc: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + GLSLstd450::kTrunc, {sources[0]}); + } break; + + default: + break; + } + + if (dest) { + b.createStore(dest, pv_); + StoreToResult(dest, instr.result); + } } void SpirvShaderTranslator::ProcessScalarAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; - spv::Id value_id = LoadFromOperand(instr.operands[0]); + // TODO: instr.is_predicated - StoreToResult(value_id, instr.result); + Id sources[3] = {0}; + Id dest = 0; + for (size_t i = 0, x = 0; i < instr.operand_count; i++) { + auto src = LoadFromOperand(instr.operands[i]); - EmitUnimplementedTranslationError(); + // Pull components out of the vector operands and use them as sources. + for (size_t j = 0; j < instr.operands[i].component_count; j++) { + uint32_t component = 0; + switch (instr.operands[i].components[j]) { + case SwizzleSource::kX: + component = 0; + break; + case SwizzleSource::kY: + component = 1; + break; + case SwizzleSource::kZ: + component = 2; + break; + case SwizzleSource::kW: + component = 3; + break; + case SwizzleSource::k0: + case SwizzleSource::k1: + // Don't believe this can happen. + assert_always(); + break; + default: + assert_always(); + break; + } + + sources[x++] = b.createCompositeExtract(src, float_type_, component); + } + } + + switch (instr.scalar_opcode) { + case AluScalarOpcode::kAdds: + case AluScalarOpcode::kAddsc0: + case AluScalarOpcode::kAddsc1: { + // dest = src0 + src1 + dest = + b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], sources[1]); + } break; + + case AluScalarOpcode::kAddsPrev: { + // dest = src0 + ps + dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], ps_); + } break; + + case AluScalarOpcode::kCos: { + // dest = cos(src0) + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kCos, + {sources[0]}); + } break; + + case AluScalarOpcode::kMaxs: { + // dest = max(src0, src1) + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMax, + {sources[0], sources[1]}); + } break; + + case AluScalarOpcode::kMins: { + // dest = min(src0, src1) + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMin, + {sources[0], sources[1]}); + } break; + + case AluScalarOpcode::kMuls: + case AluScalarOpcode::kMulsc0: + case AluScalarOpcode::kMulsc1: { + // dest = src0 * src1 + dest = + b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], sources[1]); + } break; + + case AluScalarOpcode::kMulsPrev: { + // dest = src0 * ps + dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], ps_); + } break; + + case AluScalarOpcode::kMulsPrev2: { + // TODO: Uh... see GLSL translator for impl. + } break; + + case AluScalarOpcode::kRcpc: { + // TODO: dest = src0 != 0.0 ? 1.0 / src0 : FLT_MAX; + } break; + + case AluScalarOpcode::kRcp: + case AluScalarOpcode::kRcpf: { + // dest = src0 != 0.0 ? 1.0 / src0 : 0.0; + auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0], + b.makeFloatConstant(0.f)); + auto d = b.createBinOp(spv::Op::OpFDiv, float_type_, + b.makeFloatConstant(1.f), sources[0]); + dest = b.createBinOp(spv::Op::OpSelect, c, b.makeFloatConstant(0.f), d); + } break; + + case AluScalarOpcode::kRsq: { + // dest = src0 != 0.0 ? inversesqrt(src0) : 0.0; + auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0], + b.makeFloatConstant(0.f)); + auto d = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kInverseSqrt, {sources[0]}); + dest = b.createBinOp(spv::Op::OpSelect, c, b.makeFloatConstant(0.f), d); + } break; + + case AluScalarOpcode::kSeqs: { + // TODO: dest = src0 == 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSges: { + // TODO: dest = src0 >= 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSgts: { + // TODO: dest = src0 > 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSnes: { + // TODO: dest = src0 != 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSetpEq: { + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSetpGe: { + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSetpGt: { + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSetpInv: { + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(1.f)); + auto pred = + b.createTriOp(spv::Op::OpSelect, bool_type_, cond, + b.makeBoolConstant(true), b.makeBoolConstant(false)); + b.createStore(pred, p0_); + + // if (!cond) dest = src0 == 0.0 ? 1.0 : src0; + auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + auto dst_false = b.createTriOp(spv::Op::OpSelect, float_type_, dst_cond, + b.makeFloatConstant(1.f), sources[0]); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), dst_false); + } break; + + case AluScalarOpcode::kSetpNe: { + auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSin: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kSin, + {sources[0]}); + } break; + + case AluScalarOpcode::kSubs: + case AluScalarOpcode::kSubsc0: + case AluScalarOpcode::kSubsc1: { + dest = + b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], sources[1]); + } break; + + case AluScalarOpcode::kSubsPrev: { + dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], ps_); + } break; + + default: + break; + } + + if (dest) { + b.createStore(dest, ps_); + StoreToResult(dest, instr.result); + } } Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( @@ -223,50 +831,129 @@ Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( args); } -spv::Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { +Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { auto& b = *builder_; - spv::Id current_type_id = b.makeFloatType(32); - spv::Id current_value_id = b.createUndefined(current_type_id); + Id storage_pointer = 0; + Id storage_type = vec4_float_type_; + spv::StorageClass storage_class; + Id storage_index = 0; // Storage index at lowest level + std::vector storage_offsets; // Offsets in nested arrays -> storage + + switch (op.storage_addressing_mode) { + case InstructionStorageAddressingMode::kStatic: { + storage_index = b.makeUintConstant(op.storage_index); + } break; + case InstructionStorageAddressingMode::kAddressAbsolute: { + // storage_index + a0 + storage_index = + b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_), + b.makeUintConstant(op.storage_index)); + } break; + case InstructionStorageAddressingMode::kAddressRelative: { + // TODO: Based on loop index + // storage_index + aL.x + storage_index = b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), + b.makeUintConstant(0), + b.makeUintConstant(op.storage_index)); + } break; + default: + assert_always(); + break; + } - // storage_addressing_mode switch (op.storage_source) { case InstructionStorageSource::kRegister: - // TODO(benvanik): op.storage_index + storage_pointer = registers_ptr_; + storage_class = spv::StorageClass::StorageClassPrivate; + storage_type = vec4_float_type_; + storage_offsets.push_back(storage_index); break; case InstructionStorageSource::kConstantFloat: - // TODO(benvanik): op.storage_index - break; - case InstructionStorageSource::kConstantInt: - // TODO(benvanik): op.storage_index - break; - case InstructionStorageSource::kConstantBool: - // TODO(benvanik): op.storage_index + storage_pointer = consts_; + storage_class = spv::StorageClass::StorageClassUniform; + storage_type = vec4_float_type_; + storage_offsets.push_back(b.makeUintConstant(0)); + storage_offsets.push_back(storage_index); break; case InstructionStorageSource::kVertexFetchConstant: - // TODO(benvanik): op.storage_index - break; case InstructionStorageSource::kTextureFetchConstant: - // TODO(benvanik): op.storage_index + // Should not reach this. + assert_always(); + break; + default: + assert_always(); break; } + if (!storage_pointer) { + return b.createUndefined(vec4_float_type_); + } + + storage_pointer = + b.createAccessChain(storage_class, storage_pointer, storage_offsets); + auto storage_value = b.createLoad(storage_pointer); + assert_true(b.getTypeId(storage_value) == vec4_float_type_); + if (op.is_absolute_value) { - current_value_id = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationRelaxedPrecision, current_type_id, - GLSLstd450::kFAbs, {current_value_id}); + storage_value = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, storage_type, GLSLstd450::kFAbs, + {storage_value}); } if (op.is_negated) { - current_value_id = - b.createUnaryOp(spv::Op::OpFNegate, current_type_id, current_value_id); + storage_value = + b.createUnaryOp(spv::Op::OpFNegate, storage_type, storage_value); } // swizzle + if (!op.is_standard_swizzle()) { + std::vector operands; + operands.push_back(storage_value); + operands.push_back(b.makeCompositeConstant( + vec2_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); - return current_value_id; + // Components start from left and are duplicated rightwards + // e.g. count = 1, xxxx / count = 2, xyyy ... + for (int i = 0; i < 4; i++) { + auto swiz = op.components[i]; + if (i > op.component_count - 1) { + swiz = op.components[op.component_count - 1]; + } + + uint32_t swiz_id = 0; + switch (swiz) { + case SwizzleSource::kX: + swiz_id = 0; + break; + case SwizzleSource::kY: + swiz_id = 1; + break; + case SwizzleSource::kZ: + swiz_id = 2; + break; + case SwizzleSource::kW: + swiz_id = 3; + break; + case SwizzleSource::k0: + swiz_id = 4; + break; + case SwizzleSource::k1: + swiz_id = 5; + break; + } + + operands.push_back(swiz_id); + } + + storage_value = + b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); + } + + return storage_value; } -void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, +void SpirvShaderTranslator::StoreToResult(Id source_value_id, const InstructionResult& result) { auto& b = *builder_; @@ -275,25 +962,67 @@ void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, return; } - spv::Id storage_pointer = 0; - // storage_addressing_mode + Id storage_pointer = 0; + Id storage_type = vec4_float_type_; + spv::StorageClass storage_class; + Id storage_index = 0; // Storage index at lowest level + std::vector storage_offsets; // Offsets in nested arrays -> storage + + switch (result.storage_addressing_mode) { + case InstructionStorageAddressingMode::kStatic: { + storage_index = b.makeUintConstant(result.storage_index); + } break; + case InstructionStorageAddressingMode::kAddressAbsolute: { + // storage_index + a0 + storage_index = + b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_), + b.makeUintConstant(result.storage_index)); + } break; + case InstructionStorageAddressingMode::kAddressRelative: { + // storage_index + aL.x + // TODO + } break; + default: + assert_always(); + return; + } + + bool storage_array; switch (result.storage_target) { case InstructionStorageTarget::kRegister: - // TODO(benvanik): result.storage_index + storage_pointer = registers_ptr_; + storage_class = spv::StorageClass::StorageClassPrivate; + storage_type = vec4_float_type_; + storage_offsets.push_back(storage_index); + storage_array = true; break; case InstructionStorageTarget::kInterpolant: - // TODO(benvanik): result.storage_index + assert_true(is_vertex_shader()); + storage_pointer = interpolators_; + storage_class = spv::StorageClass::StorageClassOutput; + storage_type = vec4_float_type_; + storage_offsets.push_back(storage_index); + storage_array = true; break; case InstructionStorageTarget::kPosition: - // TODO(benvanik): result.storage_index + assert_true(is_vertex_shader()); + assert_not_zero(pos_); + storage_pointer = pos_; + storage_class = spv::StorageClass::StorageClassOutput; + storage_type = vec4_float_type_; + storage_offsets.push_back(0); + storage_array = false; break; case InstructionStorageTarget::kPointSize: + assert_true(is_vertex_shader()); // TODO(benvanik): result.storage_index break; case InstructionStorageTarget::kColorTarget: + assert_true(is_pixel_shader()); // TODO(benvanik): result.storage_index break; case InstructionStorageTarget::kDepth: + assert_true(is_pixel_shader()); // TODO(benvanik): result.storage_index break; case InstructionStorageTarget::kNone: @@ -301,25 +1030,107 @@ void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, break; } - spv::Id current_value_id = source_value_id; - spv::Id current_type_id = b.getTypeId(source_value_id); + if (!storage_pointer) { + // assert_always(); + return; + } + + if (storage_array) { + storage_pointer = + b.createAccessChain(storage_class, storage_pointer, storage_offsets); + } + auto storage_value = b.createLoad(storage_pointer); + + // Convert to the appropriate type, if needed. + if (b.getTypeId(source_value_id) != storage_type) { + std::vector constituents; + auto n_el = b.getNumComponents(source_value_id); + auto n_dst = b.getNumTypeComponents(storage_type); + assert_true(n_el < n_dst); + + constituents.push_back(source_value_id); + for (int i = n_el; i < n_dst; i++) { + // Pad with zeroes. + constituents.push_back(b.makeFloatConstant(0.f)); + } + + source_value_id = b.createConstructor(spv::Decoration::DecorationInvariant, + constituents, storage_type); + } // Clamp the input value. if (result.is_clamped) { - // + source_value_id = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, b.getTypeId(source_value_id), + spv::GLSLstd450::kFClamp, + {b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)}); + } + + // swizzle + // TODO: 0.0 and 1.0 swizzles + if (!result.is_standard_swizzle()) { + std::vector operands; + operands.push_back(source_value_id); + operands.push_back(b.makeCompositeConstant( + vec2_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); + + // Components start from left and are duplicated rightwards + // e.g. count = 1, xxxx / count = 2, xyyy ... + for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) { + auto swiz = result.components[i]; + if (!result.write_mask[i]) { + // Undefined / don't care. + operands.push_back(0); + continue; + } + + uint32_t swiz_id = 0; + switch (swiz) { + case SwizzleSource::kX: + operands.push_back(0); + break; + case SwizzleSource::kY: + operands.push_back(1); + break; + case SwizzleSource::kZ: + operands.push_back(2); + break; + case SwizzleSource::kW: + operands.push_back(3); + break; + case SwizzleSource::k0: + operands.push_back(4); + break; + case SwizzleSource::k1: + operands.push_back(5); + break; + } + } + + source_value_id = + b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); } // write mask + if (!result.has_all_writes()) { + std::vector operands; + operands.push_back(source_value_id); + operands.push_back(storage_value); - // swizzle + for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) { + operands.push_back( + result.write_mask[i] ? i : b.getNumComponents(source_value_id) + i); + } - // Convert to the appropriate type, if needed. - spv::Id desired_type_id = b.makeFloatType(32); - if (current_value_id != desired_type_id) { - EmitTranslationError("Type conversion on storage not yet implemented"); + source_value_id = + b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); } // Perform store into the pointer. + assert_true(b.getNumComponents(source_value_id) == + b.getNumTypeComponents(storage_type)); + b.createStore(source_value_id, storage_pointer); } } // namespace gpu diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 2b233103b..fbd3af8cd 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -32,7 +32,10 @@ class SpirvShaderTranslator : public ShaderTranslator { std::vector CompleteTranslation() override; void PostTranslation(Shader* shader) override; + void PreProcessControlFlowInstruction(uint32_t cf_index) override; void ProcessLabel(uint32_t cf_index) override; + void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; + void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; void ProcessControlFlowNopInstruction() override; void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; @@ -75,6 +78,27 @@ class SpirvShaderTranslator : public ShaderTranslator { // TODO(benvanik): replace with something better, make reusable, etc. std::unique_ptr builder_; spv::Id glsl_std_450_instruction_set_ = 0; + + // Types + spv::Id float_type_ = 0, bool_type_ = 0; + spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; + spv::Id vec4_uint_type_ = 0; + spv::Id vec4_bool_type_ = 0; + + // Constants + spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; + + // Array of AMD registers + // These values are all pointers. + spv::Id registers_ptr_ = 0, registers_type_ = 0; + spv::Id consts_ = 0, a0_ = 0, aL_ = 0, p0_ = 0; + spv::Id ps_ = 0, pv_ = 0; // IDs of previous results + spv::Id pos_ = 0; + spv::Id interpolators_ = 0; + + // Map of {binding -> {offset -> spv input}} + std::map> vertex_binding_map_; + std::map cf_blocks_; }; } // namespace gpu From 1d82d7bd9212be518499882891d18f4dad0e9ae8 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 20 Feb 2016 17:05:53 -0800 Subject: [PATCH 025/145] For now, all 512 constants live in the same block. --- src/xenia/gpu/spirv_shader_translator.cc | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 2ea0ff89c..d4ddf3d77 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -100,7 +100,7 @@ void SpirvShaderTranslator::StartTranslation() { // Uniform constants. Id float_consts_type = - b.makeArrayType(vec4_float_type_, b.makeUintConstant(256), 1); + b.makeArrayType(vec4_float_type_, b.makeUintConstant(512), 1); Id loop_consts_type = b.makeArrayType(b.makeUintType(32), b.makeUintConstant(32), 1); Id bool_consts_type = @@ -119,14 +119,14 @@ void SpirvShaderTranslator::StartTranslation() { b.addMemberDecoration(consts_struct_type, 1, spv::Decoration::DecorationOffset, - 256 * 4 * sizeof(float)); + 512 * 4 * sizeof(float)); b.addMemberDecoration(consts_struct_type, 1, spv::Decoration::DecorationArrayStride, sizeof(uint32_t)); b.addMemberDecoration(consts_struct_type, 2, spv::Decoration::DecorationOffset, - 256 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); + 512 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); b.addMemberDecoration(consts_struct_type, 2, spv::Decoration::DecorationArrayStride, sizeof(uint32_t)); @@ -840,22 +840,28 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { Id storage_index = 0; // Storage index at lowest level std::vector storage_offsets; // Offsets in nested arrays -> storage + // Out of the 512 constant registers pixel shaders get the last 256. + uint32_t storage_base = 0; + if (op.storage_source == InstructionStorageSource::kConstantFloat) { + storage_base = is_pixel_shader() ? 256 : 0; + } + switch (op.storage_addressing_mode) { case InstructionStorageAddressingMode::kStatic: { - storage_index = b.makeUintConstant(op.storage_index); + storage_index = b.makeUintConstant(storage_base + op.storage_index); } break; case InstructionStorageAddressingMode::kAddressAbsolute: { // storage_index + a0 storage_index = b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_), - b.makeUintConstant(op.storage_index)); + b.makeUintConstant(storage_base + op.storage_index)); } break; case InstructionStorageAddressingMode::kAddressRelative: { // TODO: Based on loop index // storage_index + aL.x - storage_index = b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), - b.makeUintConstant(0), - b.makeUintConstant(op.storage_index)); + storage_index = b.createBinOp( + spv::Op::OpIAdd, b.makeUintType(32), b.makeUintConstant(0), + b.makeUintConstant(storage_base + op.storage_index)); } break; default: assert_always(); @@ -1063,7 +1069,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, source_value_id = CreateGlslStd450InstructionCall( spv::Decoration::DecorationInvariant, b.getTypeId(source_value_id), spv::GLSLstd450::kFClamp, - {b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)}); + {source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)}); } // swizzle From 8777d22b72ca0df52648b3e5ff6ccaf9eb97cade Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 20 Feb 2016 19:31:28 -0600 Subject: [PATCH 026/145] Add name information to constants Fix Rcp/Rsq OpSelect incorrect usage --- src/xenia/gpu/spirv_shader_translator.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index d4ddf3d77..943da9d45 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -116,6 +116,7 @@ void SpirvShaderTranslator::StartTranslation() { b.addMemberDecoration(consts_struct_type, 0, spv::Decoration::DecorationArrayStride, 4 * sizeof(float)); + b.addMemberName(consts_struct_type, 0, "float_consts"); b.addMemberDecoration(consts_struct_type, 1, spv::Decoration::DecorationOffset, @@ -123,6 +124,7 @@ void SpirvShaderTranslator::StartTranslation() { b.addMemberDecoration(consts_struct_type, 1, spv::Decoration::DecorationArrayStride, sizeof(uint32_t)); + b.addMemberName(consts_struct_type, 1, "loop_consts"); b.addMemberDecoration(consts_struct_type, 2, spv::Decoration::DecorationOffset, @@ -130,6 +132,7 @@ void SpirvShaderTranslator::StartTranslation() { b.addMemberDecoration(consts_struct_type, 2, spv::Decoration::DecorationArrayStride, sizeof(uint32_t)); + b.addMemberName(consts_struct_type, 2, "bool_consts"); consts_ = b.createVariable(spv::StorageClass::StorageClassUniform, consts_struct_type, "consts"); @@ -705,7 +708,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.makeFloatConstant(0.f)); auto d = b.createBinOp(spv::Op::OpFDiv, float_type_, b.makeFloatConstant(1.f), sources[0]); - dest = b.createBinOp(spv::Op::OpSelect, c, b.makeFloatConstant(0.f), d); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + b.makeFloatConstant(0.f), d); } break; case AluScalarOpcode::kRsq: { @@ -715,7 +719,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto d = CreateGlslStd450InstructionCall( spv::Decoration::DecorationInvariant, vec4_float_type_, spv::GLSLstd450::kInverseSqrt, {sources[0]}); - dest = b.createBinOp(spv::Op::OpSelect, c, b.makeFloatConstant(0.f), d); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + b.makeFloatConstant(0.f), d); } break; case AluScalarOpcode::kSeqs: { From 3130d4b38ed77d8e9920d2ae1e1a451538db1467 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 20 Feb 2016 21:02:28 -0600 Subject: [PATCH 027/145] Add the vulkan backend as a choice in xenia-app --- src/xenia/app/premake5.lua | 1 + src/xenia/app/xenia_main.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index 3cfef9a82..e6b98a3ec 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -19,6 +19,7 @@ project("xenia-app") "xenia-debug-ui", "xenia-gpu", "xenia-gpu-gl4", + "xenia-gpu-vulkan", "xenia-hid-nop", "xenia-kernel", "xenia-ui", diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index 0c95bc562..bc9b662c5 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -25,6 +25,7 @@ // Available graphics systems: #include "xenia/gpu/gl4/gl4_graphics_system.h" +#include "xenia/gpu/vulkan/vulkan_graphics_system.h" // Available input drivers: #include "xenia/hid/nop/nop_hid.h" @@ -69,6 +70,9 @@ std::unique_ptr CreateGraphicsSystem() { if (FLAGS_gpu.compare("gl4") == 0) { return std::unique_ptr( new xe::gpu::gl4::GL4GraphicsSystem()); + } else if (FLAGS_gpu.compare("vulkan") == 0) { + return std::unique_ptr( + new xe::gpu::vulkan::VulkanGraphicsSystem()); } else { // Create best available. std::unique_ptr best; From cd02cdfc70584d98e42e1f6bbb1580217c9e75d3 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 20 Feb 2016 19:19:29 -0800 Subject: [PATCH 028/145] Making memory API less error prone; fixes buffer/constant uploads. --- src/xenia/base/memory.cc | 90 +++++++++------------ src/xenia/base/memory.h | 21 ++--- src/xenia/gpu/command_processor.cc | 5 +- src/xenia/gpu/gl4/gl4_command_processor.cc | 2 +- src/xenia/gpu/gl4/texture_cache.cc | 12 +-- src/xenia/gpu/vulkan/buffer_cache.cc | 12 +-- src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc | 3 +- 7 files changed, 60 insertions(+), 85 deletions(-) diff --git a/src/xenia/base/memory.cc b/src/xenia/base/memory.cc index f83b01d72..223ebf379 100644 --- a/src/xenia/base/memory.cc +++ b/src/xenia/base/memory.cc @@ -22,109 +22,99 @@ void copy_128_aligned(void* dest, const void* src, size_t count) { std::memcpy(dest, src, count * 16); } -void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src, - size_t count) { +void copy_and_swap_16_aligned(void* dest, const void* src, size_t count) { return copy_and_swap_16_unaligned(dest, src, count); } -void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src, +void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr, size_t count) { + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); size_t i; - __m128i input, output; - for (i = 0; i + 8 <= count; i += 8) { - input = _mm_loadu_si128(reinterpret_cast(&src[i])); - output = _mm_or_si128(_mm_slli_epi16(input, 8), _mm_srli_epi16(input, 8)); + __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); + __m128i output = + _mm_or_si128(_mm_slli_epi16(input, 8), _mm_srli_epi16(input, 8)); _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); } - for (; i < count; ++i) { // handle residual elements dest[i] = byte_swap(src[i]); } } -void copy_and_swap_32_aligned(uint32_t* dest, const uint32_t* src, - size_t count) { +void copy_and_swap_32_aligned(void* dest, const void* src, size_t count) { return copy_and_swap_32_unaligned(dest, src, count); } -void copy_and_swap_32_unaligned(uint32_t* dest, const uint32_t* src, +void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr, size_t count) { - size_t i; - __m128i input, byte1, byte2, byte3, byte4, output; + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); __m128i byte2mask = _mm_set1_epi32(0x00FF0000); __m128i byte3mask = _mm_set1_epi32(0x0000FF00); - + size_t i; for (i = 0; i + 4 <= count; i += 4) { - input = _mm_loadu_si128(reinterpret_cast(&src[i])); - - // Do the four shifts - byte1 = _mm_slli_epi32(input, 24); - byte2 = _mm_slli_epi32(input, 8); - byte3 = _mm_srli_epi32(input, 8); - byte4 = _mm_srli_epi32(input, 24); - - // Or bytes together - output = _mm_or_si128(byte1, byte4); + __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); + // Do the four shifts. + __m128i byte1 = _mm_slli_epi32(input, 24); + __m128i byte2 = _mm_slli_epi32(input, 8); + __m128i byte3 = _mm_srli_epi32(input, 8); + __m128i byte4 = _mm_srli_epi32(input, 24); + // OR bytes together. + __m128i output = _mm_or_si128(byte1, byte4); byte2 = _mm_and_si128(byte2, byte2mask); output = _mm_or_si128(output, byte2); byte3 = _mm_and_si128(byte3, byte3mask); output = _mm_or_si128(output, byte3); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); } - for (; i < count; ++i) { // handle residual elements dest[i] = byte_swap(src[i]); } } -void copy_and_swap_64_aligned(uint64_t* dest, const uint64_t* src, - size_t count) { +void copy_and_swap_64_aligned(void* dest, const void* src, size_t count) { return copy_and_swap_64_unaligned(dest, src, count); } -void copy_and_swap_64_unaligned(uint64_t* dest, const uint64_t* src, +void copy_and_swap_64_unaligned(void* dest_ptr, const void* src_ptr, size_t count) { - size_t i; - __m128i input, byte1, byte2, byte3, byte4, output; + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); __m128i byte2mask = _mm_set1_epi32(0x00FF0000); __m128i byte3mask = _mm_set1_epi32(0x0000FF00); - + size_t i; for (i = 0; i + 2 <= count; i += 2) { - input = _mm_loadu_si128(reinterpret_cast(&src[i])); - - // Do the four shifts - byte1 = _mm_slli_epi32(input, 24); - byte2 = _mm_slli_epi32(input, 8); - byte3 = _mm_srli_epi32(input, 8); - byte4 = _mm_srli_epi32(input, 24); - - // Or bytes together - output = _mm_or_si128(byte1, byte4); + __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); + // Do the four shifts. + __m128i byte1 = _mm_slli_epi32(input, 24); + __m128i byte2 = _mm_slli_epi32(input, 8); + __m128i byte3 = _mm_srli_epi32(input, 8); + __m128i byte4 = _mm_srli_epi32(input, 24); + // OR bytes together. + __m128i output = _mm_or_si128(byte1, byte4); byte2 = _mm_and_si128(byte2, byte2mask); output = _mm_or_si128(output, byte2); byte3 = _mm_and_si128(byte3, byte3mask); output = _mm_or_si128(output, byte3); - - // Reorder the two words + // Reorder the two words. output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1)); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); } - for (; i < count; ++i) { // handle residual elements dest[i] = byte_swap(src[i]); } } -void copy_and_swap_16_in_32_aligned(uint32_t* dest, const uint32_t* src, +void copy_and_swap_16_in_32_aligned(void* dest_ptr, const void* src_ptr, size_t count) { + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); size_t i; - __m128i input, output; for (i = 0; i + 4 <= count; i += 4) { - input = _mm_loadu_si128(reinterpret_cast(&src[i])); - output = _mm_or_si128(_mm_slli_epi32(input, 16), _mm_srli_epi32(input, 16)); + __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); + __m128i output = + _mm_or_si128(_mm_slli_epi32(input, 16), _mm_srli_epi32(input, 16)); _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); } for (; i < count; ++i) { // handle residual elements diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index 183843416..c35bfb1db 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -123,20 +123,13 @@ inline void* low_address(void* address) { void copy_128_aligned(void* dest, const void* src, size_t count); -void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src, - size_t count); -void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src, - size_t count); -void copy_and_swap_32_aligned(uint32_t* dest, const uint32_t* src, - size_t count); -void copy_and_swap_32_unaligned(uint32_t* dest, const uint32_t* src, - size_t count); -void copy_and_swap_64_aligned(uint64_t* dest, const uint64_t* src, - size_t count); -void copy_and_swap_64_unaligned(uint64_t* dest, const uint64_t* src, - size_t count); -void copy_and_swap_16_in_32_aligned(uint32_t* dest, const uint32_t* src, - size_t count); +void copy_and_swap_16_aligned(void* dest, const void* src, size_t count); +void copy_and_swap_16_unaligned(void* dest, const void* src, size_t count); +void copy_and_swap_32_aligned(void* dest, const void* src, size_t count); +void copy_and_swap_32_unaligned(void* dest, const void* src, size_t count); +void copy_and_swap_64_aligned(void* dest, const void* src, size_t count); +void copy_and_swap_64_unaligned(void* dest, const void* src, size_t count); +void copy_and_swap_16_in_32_aligned(void* dest, const void* src, size_t count); template void copy_and_swap(T* dest, const T* src, size_t count) { diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 545679d6b..14e381b6d 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -1019,9 +1019,8 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(RingBuffer* reader, 1, // max z }; assert_true(endianness == Endian::k8in16); - xe::copy_and_swap_16_aligned( - reinterpret_cast(memory_->TranslatePhysical(address)), extents, - xe::countof(extents)); + xe::copy_and_swap_16_aligned(memory_->TranslatePhysical(address), extents, + xe::countof(extents)); trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents)); return true; } diff --git a/src/xenia/gpu/gl4/gl4_command_processor.cc b/src/xenia/gpu/gl4/gl4_command_processor.cc index 2305b38b4..aaed5c0c6 100644 --- a/src/xenia/gpu/gl4/gl4_command_processor.cc +++ b/src/xenia/gpu/gl4/gl4_command_processor.cc @@ -1410,7 +1410,7 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateVertexBuffers() { // as we copy and only if it differs from the previous value committing // it (and if it matches just discard and reuse). xe::copy_and_swap_32_aligned( - reinterpret_cast(allocation.host_ptr), + allocation.host_ptr, memory_->TranslatePhysical(fetch->address << 2), valid_range / 4); diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index 0e1132218..4a8917e71 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -662,19 +662,13 @@ void TextureSwap(Endian endianness, void* dest, const void* src, size_t length) { switch (endianness) { case Endian::k8in16: - xe::copy_and_swap_16_aligned(reinterpret_cast(dest), - reinterpret_cast(src), - length / 2); + xe::copy_and_swap_16_aligned(dest, src, length / 2); break; case Endian::k8in32: - xe::copy_and_swap_32_aligned(reinterpret_cast(dest), - reinterpret_cast(src), - length / 4); + xe::copy_and_swap_32_aligned(dest, src, length / 4); break; case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word - xe::copy_and_swap_16_in_32_aligned(reinterpret_cast(dest), - reinterpret_cast(src), - length); + xe::copy_and_swap_16_in_32_aligned(dest, src, length); break; default: case Endian::kUnspecified: diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 32c2cef4b..1def6d26f 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -290,13 +290,13 @@ std::pair BufferCache::UploadIndexBuffer( if (format == IndexFormat::kInt16) { // Endian::k8in16, swap half-words. xe::copy_and_swap_16_aligned( - reinterpret_cast(transient_buffer_data_) + offset, - reinterpret_cast(source_ptr), source_length / 2); + reinterpret_cast(transient_buffer_data_) + offset, source_ptr, + source_length / 2); } else if (format == IndexFormat::kInt32) { // Endian::k8in32, swap words. xe::copy_and_swap_32_aligned( - reinterpret_cast(transient_buffer_data_) + offset, - reinterpret_cast(source_ptr), source_length / 4); + reinterpret_cast(transient_buffer_data_) + offset, source_ptr, + source_length / 4); } return {transient_index_buffer_, offset}; @@ -317,8 +317,8 @@ std::pair BufferCache::UploadVertexBuffer( // TODO(benvanik): memcpy then use compute shaders to swap? // Endian::k8in32, swap words. xe::copy_and_swap_32_aligned( - reinterpret_cast(transient_buffer_data_) + offset, - reinterpret_cast(source_ptr), source_length / 4); + reinterpret_cast(transient_buffer_data_) + offset, source_ptr, + source_length / 4); return {transient_vertex_buffer_, offset}; } diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc index 895e85455..e979cb62a 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc @@ -343,8 +343,7 @@ void VdSwap(lpvoid_t buffer_ptr, // ptr into primary ringbuffer lpunknown_t unk8, unknown_t unk9) { gpu::xenos::xe_gpu_texture_fetch_t fetch; xe::copy_and_swap_32_unaligned( - reinterpret_cast(&fetch), - reinterpret_cast(fetch_ptr.host_address()), 6); + &fetch, reinterpret_cast(fetch_ptr.host_address()), 6); auto color_format = gpu::ColorFormat(color_format_ptr.value()); auto color_space = *color_space_ptr; From 08a173e5ecc3c06f70d6a66d28a43d587eb22cf6 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 20 Feb 2016 21:28:25 -0600 Subject: [PATCH 029/145] Scalar Sxxx --- src/xenia/gpu/spirv_shader_translator.cc | 40 +++++++++++++++--------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 943da9d45..c071cfa1f 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -724,19 +724,35 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kSeqs: { - // TODO: dest = src0 == 0.0 ? 1.0 : 0.0; + // dest = src0 == 0.0 ? 1.0 : 0.0; + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); } break; case AluScalarOpcode::kSges: { - // TODO: dest = src0 >= 0.0 ? 1.0 : 0.0; + // dest = src0 >= 0.0 ? 1.0 : 0.0; + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); } break; case AluScalarOpcode::kSgts: { - // TODO: dest = src0 > 0.0 ? 1.0 : 0.0; + // dest = src0 > 0.0 ? 1.0 : 0.0; + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); } break; case AluScalarOpcode::kSnes: { - // TODO: dest = src0 != 0.0 ? 1.0 : 0.0; + // dest = src0 != 0.0 ? 1.0 : 0.0; + auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); } break; case AluScalarOpcode::kSetpEq: { @@ -932,29 +948,26 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { swiz = op.components[op.component_count - 1]; } - uint32_t swiz_id = 0; switch (swiz) { case SwizzleSource::kX: - swiz_id = 0; + operands.push_back(0); break; case SwizzleSource::kY: - swiz_id = 1; + operands.push_back(1); break; case SwizzleSource::kZ: - swiz_id = 2; + operands.push_back(2); break; case SwizzleSource::kW: - swiz_id = 3; + operands.push_back(3); break; case SwizzleSource::k0: - swiz_id = 4; + operands.push_back(4); break; case SwizzleSource::k1: - swiz_id = 5; + operands.push_back(5); break; } - - operands.push_back(swiz_id); } storage_value = @@ -1096,7 +1109,6 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, continue; } - uint32_t swiz_id = 0; switch (swiz) { case SwizzleSource::kX: operands.push_back(0); From ca01bb231169732df929c7c0f782ac1da3352219 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 20 Feb 2016 21:53:22 -0600 Subject: [PATCH 030/145] ALU predicated discard --- src/xenia/gpu/spirv_shader_translator.cc | 39 ++++++++++++++++++++++-- src/xenia/gpu/spirv_shader_translator.h | 3 +- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index c071cfa1f..f3997266d 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -599,8 +599,20 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( } if (dest) { + // If predicated, discard the result from the instruction. + Id pred_cond = 0; + Id pv_dest = dest; + if (instr.is_predicated) { + pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + + pv_dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, pred_cond, + dest, b.createLoad(pv_)); + } + b.createStore(dest, pv_); - StoreToResult(dest, instr.result); + StoreToResult(dest, instr.result, pred_cond); } } @@ -839,8 +851,20 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } if (dest) { + // If predicated, discard the result from the instruction. + Id pred_cond = 0; + Id ps_dest = dest; + if (instr.is_predicated) { + pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + + ps_dest = b.createTriOp(spv::Op::OpSelect, float_type_, pred_cond, dest, + b.createLoad(ps_)); + } + b.createStore(dest, ps_); - StoreToResult(dest, instr.result); + StoreToResult(dest, instr.result, pred_cond); } } @@ -978,7 +1002,8 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { } void SpirvShaderTranslator::StoreToResult(Id source_value_id, - const InstructionResult& result) { + const InstructionResult& result, + Id predicate_cond) { auto& b = *builder_; if (result.storage_target == InstructionStorageTarget::kNone) { @@ -1153,6 +1178,14 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, // Perform store into the pointer. assert_true(b.getNumComponents(source_value_id) == b.getNumTypeComponents(storage_type)); + + // Discard if predicate condition is false. + if (predicate_cond) { + source_value_id = + b.createTriOp(spv::Op::OpSelect, storage_type, predicate_cond, + source_value_id, storage_value); + } + b.createStore(source_value_id, storage_pointer); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index fbd3af8cd..f56325fc6 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -71,7 +71,8 @@ class SpirvShaderTranslator : public ShaderTranslator { // Stores a value based on the specified result information. // The value will be transformed into the appropriate form for the result and // the proper components will be selected. - void StoreToResult(spv::Id source_value_id, const InstructionResult& result); + void StoreToResult(spv::Id source_value_id, const InstructionResult& result, + spv::Id predicate_cond = 0); xe::ui::spirv::SpirvDisassembler disassembler_; From 2629ae4a14447d4877713a1f87373da6136384f9 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 20 Feb 2016 21:55:32 -0600 Subject: [PATCH 031/145] Fix using incorrect result types for compares --- src/xenia/gpu/spirv_shader_translator.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f3997266d..f8cf69af9 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -566,7 +566,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kSge: { // foreach(el) src0 >= src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_float_type_, + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, sources[0], sources[1]); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, vec4_float_one_, vec4_float_zero_); @@ -574,7 +574,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kSgt: { // foreach(el) src0 > src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_float_type_, + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, sources[0], sources[1]); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, vec4_float_one_, vec4_float_zero_); @@ -582,7 +582,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kSne: { // foreach(el) src0 != src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_float_type_, + auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, sources[0], sources[1]); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, vec4_float_one_, vec4_float_zero_); @@ -726,7 +726,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kRsq: { // dest = src0 != 0.0 ? inversesqrt(src0) : 0.0; - auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0], + auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); auto d = CreateGlslStd450InstructionCall( spv::Decoration::DecorationInvariant, vec4_float_type_, From ff373a83a4547d0bef6c22644ad4fc918d632f74 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 20 Feb 2016 20:44:11 -0800 Subject: [PATCH 032/145] Private->Function variables, fragment output, geometry shader tweaks. --- src/xenia/gpu/spirv_shader_translator.cc | 42 ++++++++++++------ src/xenia/gpu/spirv_shader_translator.h | 9 ++-- .../vulkan/shaders/bin/line_quad_list_geom.h | 4 +- .../shaders/bin/line_quad_list_geom.spv | Bin 2344 -> 0 bytes .../shaders/bin/line_quad_list_geom.txt | 4 +- .../gpu/vulkan/shaders/bin/point_list_geom.h | 4 +- .../vulkan/shaders/bin/point_list_geom.spv | Bin 2420 -> 0 bytes .../vulkan/shaders/bin/point_list_geom.txt | 4 +- .../gpu/vulkan/shaders/bin/quad_list_geom.h | 4 +- .../gpu/vulkan/shaders/bin/quad_list_geom.spv | Bin 2084 -> 0 bytes .../gpu/vulkan/shaders/bin/quad_list_geom.txt | 4 +- .../gpu/vulkan/shaders/bin/rect_list_geom.h | 4 +- .../gpu/vulkan/shaders/bin/rect_list_geom.spv | Bin 5204 -> 0 bytes .../gpu/vulkan/shaders/bin/rect_list_geom.txt | 4 +- .../gpu/vulkan/shaders/line_quad_list.geom | 6 +-- src/xenia/gpu/vulkan/shaders/point_list.geom | 6 +-- src/xenia/gpu/vulkan/shaders/quad_list.geom | 6 +-- src/xenia/gpu/vulkan/shaders/rect_list.geom | 6 +-- 18 files changed, 60 insertions(+), 47 deletions(-) delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.spv delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/point_list_geom.spv delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.spv delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.spv diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f8cf69af9..f46d76bbd 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -83,19 +83,19 @@ void SpirvShaderTranslator::StartTranslation() { registers_type_ = b.makeArrayType(vec4_float_type_, b.makeUintConstant(64), 0); - registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction, registers_type_, "r"); - aL_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + aL_ = b.createVariable(spv::StorageClass::StorageClassFunction, vec4_uint_type_, "aL"); - p0_ = b.createVariable(spv::StorageClass::StorageClassPrivate, bool_type_, + p0_ = b.createVariable(spv::StorageClass::StorageClassFunction, bool_type_, "p0"); - ps_ = b.createVariable(spv::StorageClass::StorageClassPrivate, float_type_, + ps_ = b.createVariable(spv::StorageClass::StorageClassFunction, float_type_, "ps"); - pv_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + pv_ = b.createVariable(spv::StorageClass::StorageClassFunction, vec4_float_type_, "pv"); - a0_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, b.makeUintType(32), "a0"); // Uniform constants. @@ -110,7 +110,7 @@ void SpirvShaderTranslator::StartTranslation() { {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock); - // Constants member decorations + // Constants member decorations. b.addMemberDecoration(consts_struct_type, 0, spv::Decoration::DecorationOffset, 0); b.addMemberDecoration(consts_struct_type, 0, @@ -144,11 +144,11 @@ void SpirvShaderTranslator::StartTranslation() { b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1); } - // Interpolators + // Interpolators. Id interpolators_type = b.makeArrayType(vec4_float_type_, b.makeUintConstant(16), 0); if (is_vertex_shader()) { - // Vertex inputs/outputs + // Vertex inputs/outputs. for (const auto& binding : vertex_bindings()) { for (const auto& attrib : binding.attributes) { Id attrib_type = 0; @@ -204,13 +204,22 @@ void SpirvShaderTranslator::StartTranslation() { b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn, spv::BuiltIn::BuiltInPosition); } else { - // Pixel inputs/outputs + // Pixel inputs from vertex shader. interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput, interpolators_type, "interpolators"); b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective); b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); - // Copy interpolators to r[0..16] + // Pixel fragment outputs (one per render target). + Id frag_outputs_type = + b.makeArrayType(vec4_float_type_, b.makeUintConstant(4), 0); + frag_outputs_ = b.createVariable(spv::StorageClass::StorageClassOutput, + frag_outputs_type, "o"); + b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0); + + // TODO(benvanik): frag depth, etc. + + // Copy interpolators to r[0..16]. b.createNoResultOp(spv::Op::OpCopyMemorySized, {registers_ptr_, interpolators_, b.makeUintConstant(16 * 4 * sizeof(float))}); @@ -916,7 +925,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { switch (op.storage_source) { case InstructionStorageSource::kRegister: storage_pointer = registers_ptr_; - storage_class = spv::StorageClass::StorageClassPrivate; + storage_class = spv::StorageClass::StorageClassFunction; storage_type = vec4_float_type_; storage_offsets.push_back(storage_index); break; @@ -1040,7 +1049,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, switch (result.storage_target) { case InstructionStorageTarget::kRegister: storage_pointer = registers_ptr_; - storage_class = spv::StorageClass::StorageClassPrivate; + storage_class = spv::StorageClass::StorageClassFunction; storage_type = vec4_float_type_; storage_offsets.push_back(storage_index); storage_array = true; @@ -1068,7 +1077,12 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, break; case InstructionStorageTarget::kColorTarget: assert_true(is_pixel_shader()); - // TODO(benvanik): result.storage_index + assert_not_zero(frag_outputs_); + storage_pointer = frag_outputs_; + storage_class = spv::StorageClass::StorageClassOutput; + storage_type = vec4_float_type_; + storage_offsets.push_back(storage_index); + storage_array = true; break; case InstructionStorageTarget::kDepth: assert_true(is_pixel_shader()); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index f56325fc6..aabd6fec1 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -54,8 +54,6 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: - void SetupPushConstants(); - void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); @@ -80,22 +78,23 @@ class SpirvShaderTranslator : public ShaderTranslator { std::unique_ptr builder_; spv::Id glsl_std_450_instruction_set_ = 0; - // Types + // Types. spv::Id float_type_ = 0, bool_type_ = 0; spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; spv::Id vec4_uint_type_ = 0; spv::Id vec4_bool_type_ = 0; - // Constants + // Constants. spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; - // Array of AMD registers + // Array of AMD registers. // These values are all pointers. spv::Id registers_ptr_ = 0, registers_type_ = 0; spv::Id consts_ = 0, a0_ = 0, aL_ = 0, p0_ = 0; spv::Id ps_ = 0, pv_ = 0; // IDs of previous results spv::Id pos_ = 0; spv::Id interpolators_ = 0; + spv::Id frag_outputs_ = 0; // Map of {binding -> {offset -> spv input}} std::map> vertex_binding_map_; diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h index cb3511e37..af848e905 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h @@ -62,12 +62,12 @@ const uint8_t line_quad_list_geom[] = { 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.spv b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.spv deleted file mode 100644 index c8ade8408f0e7a26a1eff170df2428d68b8686c6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2344 zcmZ{l>rPWq5QR6i1rg;U0$vb{ct;VnAc~*}T2wS9B>p9hHs&PQ8hSLu$MZ4tMf_vp z`u6FHJE*4_XV%Q@nOU>X!BX#Re+WJL9SN^P@r;IE{X#&W>gsA&FG&xEKIMl{-`?4t z@8s|2Yl{nlVJMVPF*Y0qA}R}iY-BC`$0a8v7bRCEBg!B?9_@0pWybT4O(n52vMZ93 ze$4kxLsWDs?NQNegNl9F+kMmAf7{&8n+JmO7Jh7E?M{|w?Uua#F&{qBpSALx>}!)e z$LOWl#$NVmGwbAy*1M+qmLebW=s8m4{M8db&*7f<|JRe)zk8PB{!}Y2w4%-zlv}Xe zY~&4QE=Ql-p)FxQ?7RIr-~Eyw2v@=-;lpR}@%Rk)leOZJu%$EXl*igWIN(Fu$L#oc z8|wNO8(&?sV4sulfO#j3Iq!Xu$7A2&KHgE<$3D)m7oKSeXX%B{y}<0nm^0pi&x~nm z=AID;xL{An>XWWYCRIZpPsGK0T-H}q#H7NU;@|-ZGfW8|mF7IY>F!=#gIHBL$E4{8 zx0NntPx!`#hoq|#YSIHdEa9Ohdq`_~j#2rUlQ~XElZTIaPfsx4Dn8kTpcqUA5=1)1)g^8KNmh6-Rm>hF>F57id4lm><=6iT4 fJLLc-$M>);8xEd@dw3WV|{;6LG@9P&f@5Odyty*8KM&sV#r+%)%-oSIC*0c|OeN+cqlYLZ> zH65yb|5uZ^=f5?Hzo{l|udk_WfS$eP+<-JG~ z@Gi8E9FuT$_qWUU6kJT&gZ@^=`QjWadi41|`c`n`#b(N!Bd0cfw!xjDKHjk%ZcpOp zi|+1{)w`+G&M`)*TH9c$k3{}|aB>}E=z z!0qJ_yxEJn4ud&MZPu>t+le{Pqj2wW)-kWokj?EZ=HU8>Y1j92D5XcwLO)UKmyT0| zz9%7l!~;83^d|O2@zXc_P8Yu#o6Ps00W+sLjM2y5zJ%MaHL8DAbZbz*3b#*vqPJ_v zzMZ{&S8QT4cwUE#8T&olSTX0O?*5AD>+b4no||y>*q2+#n;>yx)mI8;?%PE+<}5RF z_Ria5{=Y#waAT+7(~v!=yF31!4Bg%EHx;@w@$Z)U0UqZJ^nBD9&2_^?$!c#x;tM(V*2QB zuJe$%y8V9x+4mD-v|oVq5r15;i^$fXEo!`k+^lgPT}<-*efQhMd&dG~4o_g0k;UvK zuq(*(kSDN3~GyehEnLpxA-}*;L%)N@;5o_*F7yU1C{sWzZ{sk~OpBVrE diff --git a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt index 0eecef563..ea6523102 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt +++ b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt @@ -46,10 +46,10 @@ OpDecorate %39 Block OpDecorate %39 Stream 0 OpDecorate %41 Stream 0 - OpMemberDecorate %74 0 Location 1 + OpMemberDecorate %74 0 Location 0 OpDecorate %74 Stream 0 OpDecorate %76 Stream 0 - OpMemberDecorate %77 0 Location 1 + OpMemberDecorate %77 0 Location 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 %6 = OpTypeFloat 32 diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h index 7a27bde3a..f168ce835 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h @@ -67,11 +67,11 @@ const uint8_t quad_list_geom[] = { 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.spv b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.spv deleted file mode 100644 index a57165e8afcbae1d14f3f1d47613a26fa566156b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2084 zcmZ{l*-lhJ5QZCO0Yq73b3q)$9mEX~MHB?Z3ocAZyd|Sd%t>&Dz-WwDzLKxtW9WMr zH8Jt~dQOuGYM&HU_1DsMs_G1d&Z({tI(WLnv(P$&p_3;B`Kg>&a=rlX4Q2d8*x20K zoIPmn%+Af-n-^@R^O?8h?qH}R(-Wwi+<&B5&mX(`$cX4Wo^HS zBd5$G?$Opb$eLNB4%!uQdl2!g-rUMQn(EMIec0A^v%U4~pjoZIu5ouE_CUW9o>y8v zf88nWIeMq~|L)ZG?>mdJ=j4h*uqXHNZN24swOO_1V#I($1G1;is6p4>H(Nb>h<-Fi z%#&iAX;zOR;W=4S?|P$GIK+GhF?sT{b}=_-gTKa#ZBUD{GxMmsM@ct6^{f4!)T{P; zQjgm2Ntk);i)R9|Ht!Nkcd6T#q#KiZkYa|Y+PWus2j4|`gt#)i0-3jh-UXM$r*nG` zvM1>S$VF(FEQ4_K48tpsM?ZDHAN|JpI|?_aSjsbo?7d0%o3^&3pGLOM87eZ~@61|^ z(^tDSkHf85pHVVY;L;ej$y~OMRM!RM4#>Kua=X|>z8fi~Wh;hD*sRNWFA<~eti_CR zR_!t7y$UAo`QxOa-1L*3sTG3_(CJelh^;pR{Jo!nl5>pu_IuAbiO9wOAwcl0&SGTdEC@9_w^0}?k@-S11x+^e~4%mTe}e)isD z{vRMExUtU6-$Z%R{asADb9JZH9}%su^Z!YXV{miHzmw}Y`~>9D-Urt%U&ik4^+V?P zOa<=k$sDt1zwlA_8#I@($|?3Nva!}-P440orK+cy~$tD_h|#oQwUGr=07^_VW`pIGd-CSgJvr PSo^G<%QtfVh8Ceea|?_> diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt index 4a1ed2b02..ee4a83586 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt +++ b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt @@ -45,10 +45,10 @@ OpMemberDecorate %39 1 BuiltIn PointSize OpMemberDecorate %39 2 BuiltIn ClipDistance OpDecorate %39 Block - OpMemberDecorate %57 0 Location 1 + OpMemberDecorate %57 0 Location 0 OpDecorate %57 Stream 0 OpDecorate %59 Stream 0 - OpMemberDecorate %60 0 Location 1 + OpMemberDecorate %60 0 Location 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 %6 = OpTypeInt 32 1 diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h index 511aeb2d0..b9598cfa9 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h @@ -66,12 +66,12 @@ const uint8_t rect_list_geom[] = { 0x20, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.spv b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.spv deleted file mode 100644 index 8074f71f5d7169723c5b2ea2533ccd7f0d20bc8a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5204 zcmZ{m2a{Dr5QVSqvgU|lhNmcIlvObY6vcoMbHY^?e7fu^JQX8g&N=6tvzT*^zd-+o zsmdz9@4kDgrdGUE@0`=!(=(@M?tS#EIB}(_T7iEnSHD!w%nKOCU-FAjnYgRoDb!@F_m74X!=MS`p^{*1gK`-)ABVOF`u7&2BSHZ(Z5GkajDJ#Tn$4wSq;_E%N&hGri# zxZuRW1)ae~`uiHc*x1OzcBefujCZ9vU!AqTJ=~eqzHHFE#M}e1gNE7{Om8pj3=E$$ zSno%j_-oOw+v@WFtgGK=^tvs4|F^EO<*(aQ_m(hw7*p-U*i|`wpfli}duyNZkvgZh zK8KNuIm&;6{!Xg&RVH{83c&pnJ~Y&G0^^7SyE)L*|#>w3P&&UIhO z-w13U++&Ne+@pKg9H09$<{sppP1o03O$4uiInVq_O-!COwgH=CeETMzJ=n(`F*VwQ z$&Aj`H=b2IV|~u`Bj&pL+>6{Z*JrNhZLW28VU(wryMo<=e&@=4SF$g^n_}mXaJx#cwPc z_5+t11r3QI7r+W_5;i;4Ly>|y= z#-ia6uyxFFPjYjjVFp+Y?kDGPC|G|q$kp&$aTbT+%lhS69FE^ubQ}RLb;!-hS29e!8k z=VI1!t~hf?z|IZ7py7V^CxS1;Jk#{fxViRv0cL!0!xuKb)at;komv-xb3d$SJhd(c z8_zjj0=Az1)LR5D>s<=BUhahP)VmC9JoPRITT{RHViMyO*c8mY*ay!-pSkwhzS}Ry z?BPnVdssrR?fq4lKF>a9sjoas`=0NQ{t0mFE@sRfT>@U-9bJmwSnlK1VDFK8c0IW{ z))~im4VF6PeZ01b!`h-@dix4>)JPY`7YlGcWu|rJnzX( zn6YTM8C+_Rn-dMUfK#W`a4UXe(Qq5M)F3w}8g2*YzD2_w`1D7Eywq?f+_iJ3a-Z(P zXDk}-2A3M-=0w9i;M6HK+>75>G~5R+HOS40hWo*4a6i%T09b!C$aBAPpB}{L`kq61 z77xLVMaRS7Qit4}=y(L2I;DbAo*rhN z_vtCjSTsBhE;Y!_iH2vusZ(lr7QeA*cn(}@ked??&x1#w#S8fKM}xf7@FG0@D>b}? z-&oG;Ny|1G$z{~g@=xf8}y|9i0U)cXN!P5qwTB*q`HDVVzKgJ+@7 sTzhTb?U!Tr@DtcQ Date: Sat, 20 Feb 2016 23:45:01 -0600 Subject: [PATCH 033/145] Vector Cndxx --- src/xenia/gpu/spirv_shader_translator.cc | 25 +++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f46d76bbd..f0167c58e 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -526,6 +526,30 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( sources[1]); } break; + case AluVectorOpcode::kCndEq: { + // dest = src0 == 0.0 ? src1 : src2; + auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], + b.makeFloatConstant(0.f)); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], + sources[2]); + } break; + + case AluVectorOpcode::kCndGe: { + // dest = src0 == 0.0 ? src1 : src2; + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, + sources[0], b.makeFloatConstant(0.f)); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], + sources[2]); + } break; + + case AluVectorOpcode::kCndGt: { + // dest = src0 == 0.0 ? src1 : src2; + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, + sources[0], b.makeFloatConstant(0.f)); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], + sources[2]); + } break; + case AluVectorOpcode::kCube: { // TODO: } break; @@ -1130,7 +1154,6 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, } // swizzle - // TODO: 0.0 and 1.0 swizzles if (!result.is_standard_swizzle()) { std::vector operands; operands.push_back(source_value_id); From 71440a41371040e21580ea0a7e3af922b3b28b35 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 12:06:59 -0600 Subject: [PATCH 034/145] Add code to correct vertex format Move translated shader code to its own function Add push constants --- src/xenia/gpu/spirv_shader_translator.cc | 89 ++++++++++++++++++++---- src/xenia/gpu/spirv_shader_translator.h | 4 ++ 2 files changed, 78 insertions(+), 15 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f0167c58e..f6f2ba229 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -47,21 +47,10 @@ void SpirvShaderTranslator::StartTranslation() { b.addCapability(spv::Capability::CapabilityDerivativeControl); } - // main() entry point. - auto mainFn = b.makeMain(); - if (is_vertex_shader()) { - b.addEntryPoint(spv::ExecutionModel::ExecutionModelVertex, mainFn, "main"); - } else { - b.addEntryPoint(spv::ExecutionModel::ExecutionModelFragment, mainFn, - "main"); - b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft); - } - - // TODO(benvanik): transform feedback. - if (false) { - b.addCapability(spv::Capability::CapabilityTransformFeedback); - b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb); - } + spv::Block* function_block = nullptr; + translated_main_ = b.makeFunctionEntry(spv::Decoration::DecorationInvariant, + b.makeVoidType(), "translated_main", + {}, {}, &function_block); bool_type_ = b.makeBoolType(); float_type_ = b.makeFloatType(32); @@ -144,6 +133,27 @@ void SpirvShaderTranslator::StartTranslation() { b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1); } + // Push constants. + Id push_constants_type = + b.makeStructType({vec4_float_type_, vec4_float_type_, vec4_float_type_}, + "push_consts_type"); + + b.addMemberDecoration(push_constants_type, 0, + spv::Decoration::DecorationOffset, 0); + b.addMemberName(push_constants_type, 0, "window_scale"); + + b.addMemberDecoration(push_constants_type, 1, + spv::Decoration::DecorationOffset, 4 * sizeof(float)); + b.addMemberName(push_constants_type, 1, "vtx_fmt"); + + b.addMemberDecoration(push_constants_type, 2, + spv::Decoration::DecorationOffset, + 2 * 4 * sizeof(float)); + b.addMemberName(push_constants_type, 2, "alpha_test"); + + push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant, + push_constants_type, "push_consts"); + // Interpolators. Id interpolators_type = b.makeArrayType(vec4_float_type_, b.makeUintConstant(16), 0); @@ -231,6 +241,55 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { b.makeReturn(false); + // main() entry point. + auto mainFn = b.makeMain(); + if (is_vertex_shader()) { + b.addEntryPoint(spv::ExecutionModel::ExecutionModelVertex, mainFn, "main"); + } else { + b.addEntryPoint(spv::ExecutionModel::ExecutionModelFragment, mainFn, + "main"); + b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft); + } + + // TODO(benvanik): transform feedback. + if (false) { + b.addCapability(spv::Capability::CapabilityTransformFeedback); + b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb); + } + + b.createFunctionCall(translated_main_, std::vector({})); + if (is_vertex_shader()) { + // gl_Position transform + auto vtx_fmt_ptr = b.createAccessChain( + spv::StorageClass::StorageClassPushConstant, push_consts_, + std::vector({b.makeUintConstant(1)})); + auto vtx_fmt = b.createLoad(vtx_fmt_ptr); + + auto p = b.createLoad(pos_); + auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, vtx_fmt, + b.makeFloatConstant(0.f)); + + // pos.w = vtx_fmt.w != 0.0 ? 1.0 / pos.w : pos.w + auto c_w = b.createCompositeExtract(c, bool_type_, 3); + auto p_w = b.createCompositeExtract(p, float_type_, 3); + auto p_w_inv = b.createBinOp(spv::Op::OpFDiv, float_type_, + b.makeFloatConstant(1.f), p_w); + p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w_inv, p_w); + + // pos.xyz = vtx_fmt.xyz != 0.0 ? pos.xyz / pos.w : pos.xyz + auto p_all_w = b.smearScalar(spv::Decoration::DecorationInvariant, p_w, + vec4_float_type_); + auto p_inv = b.createBinOp(spv::Op::OpFDiv, vec4_float_type_, p, p_all_w); + p = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, p_inv, p); + + // Reinsert w + p = b.createCompositeInsert(p_w, p, vec4_float_type_, 3); + + b.createStore(p, pos_); + } + + b.makeReturn(false); + std::vector spirv_words; b.dump(spirv_words); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index aabd6fec1..8138bbdc9 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -78,6 +78,9 @@ class SpirvShaderTranslator : public ShaderTranslator { std::unique_ptr builder_; spv::Id glsl_std_450_instruction_set_ = 0; + // Generated function + spv::Function* translated_main_ = 0; + // Types. spv::Id float_type_ = 0, bool_type_ = 0; spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; @@ -93,6 +96,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id consts_ = 0, a0_ = 0, aL_ = 0, p0_ = 0; spv::Id ps_ = 0, pv_ = 0; // IDs of previous results spv::Id pos_ = 0; + spv::Id push_consts_ = 0; spv::Id interpolators_ = 0; spv::Id frag_outputs_ = 0; From b3faba50a323244a091d85779d46448889d4bf63 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 10:38:58 -0800 Subject: [PATCH 035/145] Pushing constants. --- src/xenia/gpu/spirv_shader_translator.cc | 36 +++++----- src/xenia/gpu/spirv_shader_translator.h | 22 ++++++ src/xenia/gpu/vulkan/pipeline_cache.cc | 85 ++++++++++++++++++------ src/xenia/gpu/vulkan/pipeline_cache.h | 2 + 4 files changed, 111 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f6f2ba229..ec19f76c0 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -133,24 +133,30 @@ void SpirvShaderTranslator::StartTranslation() { b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1); } - // Push constants. - Id push_constants_type = - b.makeStructType({vec4_float_type_, vec4_float_type_, vec4_float_type_}, - "push_consts_type"); - - b.addMemberDecoration(push_constants_type, 0, - spv::Decoration::DecorationOffset, 0); + // Push constants, represented by SpirvPushConstants. + Id push_constants_type = b.makeStructType( + {vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type}, + "push_consts_type"); + // float4 window_scale; + b.addMemberDecoration( + push_constants_type, 0, spv::Decoration::DecorationOffset, + static_cast(offsetof(SpirvPushConstants, window_scale))); b.addMemberName(push_constants_type, 0, "window_scale"); - - b.addMemberDecoration(push_constants_type, 1, - spv::Decoration::DecorationOffset, 4 * sizeof(float)); + // float4 vtx_fmt; + b.addMemberDecoration( + push_constants_type, 1, spv::Decoration::DecorationOffset, + static_cast(offsetof(SpirvPushConstants, vtx_fmt))); b.addMemberName(push_constants_type, 1, "vtx_fmt"); - - b.addMemberDecoration(push_constants_type, 2, - spv::Decoration::DecorationOffset, - 2 * 4 * sizeof(float)); + // float4 alpha_test; + b.addMemberDecoration( + push_constants_type, 2, spv::Decoration::DecorationOffset, + static_cast(offsetof(SpirvPushConstants, alpha_test))); b.addMemberName(push_constants_type, 2, "alpha_test"); - + // uint ps_param_gen; + b.addMemberDecoration( + push_constants_type, 3, spv::Decoration::DecorationOffset, + static_cast(offsetof(SpirvPushConstants, ps_param_gen))); + b.addMemberName(push_constants_type, 3, "ps_param_gen"); push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant, push_constants_type, "push_consts"); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 8138bbdc9..1ec006d50 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -22,6 +22,28 @@ namespace xe { namespace gpu { +// Push constants embedded within the command buffer. +// The total size of this struct must be <= 128b (as that's the commonly +// supported size). +struct SpirvPushConstants { + // Accessible to vertex shader only: + float window_scale[4]; // sx,sy, ?, ? + float vtx_fmt[4]; + + // Accessible to fragment shader only: + float alpha_test[4]; // alpha test enable, func, ref, ? + uint32_t ps_param_gen; +}; +static_assert(sizeof(SpirvPushConstants) <= 128, + "Push constants must fit <= 128b"); +constexpr uint32_t kSpirvPushConstantVertexRangeOffset = 0; +constexpr uint32_t kSpirvPushConstantVertexRangeSize = (sizeof(float) * 4) * 2; +constexpr uint32_t kSpirvPushConstantFragmentRangeOffset = + kSpirvPushConstantVertexRangeSize; +constexpr uint32_t kSpirvPushConstantFragmentRangeSize = + (sizeof(float) * 4) + sizeof(uint32_t); +constexpr uint32_t kSpirvPushConstantsSize = sizeof(SpirvPushConstants); + class SpirvShaderTranslator : public ShaderTranslator { public: SpirvShaderTranslator(); diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index e86220f61..a8938e407 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -59,13 +59,12 @@ PipelineCache::PipelineCache( // Push constants used for draw parameters. // We need to keep these under 128b across all stages. - VkPushConstantRange push_constant_ranges[2]; - push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + // TODO(benvanik): split between the stages? + VkPushConstantRange push_constant_ranges[1]; + push_constant_ranges[0].stageFlags = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; push_constant_ranges[0].offset = 0; - push_constant_ranges[0].size = sizeof(float) * 16; - push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_ranges[1].offset = sizeof(float) * 16; - push_constant_ranges[1].size = sizeof(int); + push_constant_ranges[0].size = kSpirvPushConstantsSize; // Shared pipeline layout. VkPipelineLayoutCreateInfo pipeline_layout_info; @@ -511,26 +510,74 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, // TODO(benvanik): push constants. - bool push_constants_dirty = full_update; + bool push_constants_dirty = full_update || viewport_state_dirty; push_constants_dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); push_constants_dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); + push_constants_dirty |= + SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); + push_constants_dirty |= + SetShadowRegister(®s.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF); + if (push_constants_dirty) { + xenos::xe_gpu_program_cntl_t program_cntl; + program_cntl.dword_0 = regs.sq_program_cntl; - xenos::xe_gpu_program_cntl_t program_cntl; - program_cntl.dword_0 = regs.sq_program_cntl; + // Normal vertex shaders only, for now. + // TODO(benvanik): transform feedback/memexport. + // https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h + // 0 = normal + // 2 = point size + assert_true(program_cntl.vs_export_mode == 0 || + program_cntl.vs_export_mode == 2); - // Populate a register in the pixel shader with frag coord. - int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; - // draw_batcher_.set_ps_param_gen(program_cntl.param_gen ? ps_param_gen : -1); + SpirvPushConstants push_constants; - // Normal vertex shaders only, for now. - // TODO(benvanik): transform feedback/memexport. - // https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h - // 0 = normal - // 2 = point size - assert_true(program_cntl.vs_export_mode == 0 || - program_cntl.vs_export_mode == 2); + // Done in VS, no need to flush state. + if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { + push_constants.window_scale[0] = 1.0f; + push_constants.window_scale[1] = 1.0f; + } else { + push_constants.window_scale[0] = 1.0f / 2560.0f; + push_constants.window_scale[1] = -1.0f / 2560.0f; + } + + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to + // get 1/W0. + float vtx_xy_fmt = (regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f; + float vtx_z_fmt = (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f; + float vtx_w0_fmt = (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f; + push_constants.vtx_fmt[0] = vtx_xy_fmt; + push_constants.vtx_fmt[1] = vtx_xy_fmt; + push_constants.vtx_fmt[2] = vtx_z_fmt; + push_constants.vtx_fmt[3] = vtx_w0_fmt; + + // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE + // Deprecated in Vulkan, implemented in shader. + // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; + // ALPHATESTENABLE + push_constants.alpha_test[0] = + (regs.rb_colorcontrol & 0x8) != 0 ? 1.0f : 0.0f; + // ALPHAFUNC + push_constants.alpha_test[1] = + static_cast(regs.rb_colorcontrol & 0x7); + // ALPHAREF + push_constants.alpha_test[2] = regs.rb_alpha_ref; + + // Whether to populate a register in the pixel shader with frag coord. + int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; + push_constants.ps_param_gen = program_cntl.param_gen ? ps_param_gen : -1; + + vkCmdPushConstants( + command_buffer, pipeline_layout_, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, + kSpirvPushConstantsSize, &push_constants); + } return true; } diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 7d35fc496..3e623f14e 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -262,6 +262,8 @@ class PipelineCache { uint32_t sq_program_cntl; uint32_t sq_context_misc; + uint32_t rb_colorcontrol; + float rb_alpha_ref; SetDynamicStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } From 32c4f3ce24b69324e040cf2d48bc863c46bfde0e Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 10:46:47 -0800 Subject: [PATCH 036/145] Fixing pool shutdown. --- src/xenia/ui/vulkan/fenced_pools.cc | 1 + src/xenia/ui/vulkan/fenced_pools.h | 30 +++++++++++++++++------------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/xenia/ui/vulkan/fenced_pools.cc b/src/xenia/ui/vulkan/fenced_pools.cc index 0737b98c4..dc79a9869 100644 --- a/src/xenia/ui/vulkan/fenced_pools.cc +++ b/src/xenia/ui/vulkan/fenced_pools.cc @@ -53,6 +53,7 @@ CommandBufferPool::CommandBufferPool(VkDevice device, } CommandBufferPool::~CommandBufferPool() { + FreeAllEntries(); vkDestroyCommandPool(device_, command_pool_, nullptr); command_pool_ = nullptr; } diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h index 3bc7e30f6..a50f82d08 100644 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -33,18 +33,9 @@ class BaseFencedPool { // TODO(benvanik): wait on fence until done. assert_null(pending_batch_list_head_); - // Run down free lists. - while (free_batch_list_head_) { - auto batch = free_batch_list_head_; - free_batch_list_head_ = batch->next; - delete batch; - } - while (free_entry_list_head_) { - auto entry = free_entry_list_head_; - free_entry_list_head_ = entry->next; - static_cast(this)->FreeEntry(entry->handle); - delete entry; - } + // Subclasses must call FreeAllEntries() to properly clean up things. + assert_null(free_batch_list_head_); + assert_null(free_entry_list_head_); } // True if one or more batches are still pending on the GPU. @@ -159,6 +150,21 @@ class BaseFencedPool { free_entry_list_head_ = entry; } + void FreeAllEntries() { + // Run down free lists. + while (free_batch_list_head_) { + auto batch = free_batch_list_head_; + free_batch_list_head_ = batch->next; + delete batch; + } + while (free_entry_list_head_) { + auto entry = free_entry_list_head_; + free_entry_list_head_ = entry->next; + static_cast(this)->FreeEntry(entry->handle); + delete entry; + } + } + VkDevice device_ = nullptr; private: From bd0fb81b22008b1eaa23daf796eebefabb4d6046 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 12:46:54 -0600 Subject: [PATCH 037/145] Apply window scaling --- src/xenia/gpu/spirv_shader_translator.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index ec19f76c0..51d989cfe 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -269,7 +269,11 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { auto vtx_fmt_ptr = b.createAccessChain( spv::StorageClass::StorageClassPushConstant, push_consts_, std::vector({b.makeUintConstant(1)})); + auto window_scale_ptr = b.createAccessChain( + spv::StorageClass::StorageClassPushConstant, push_consts_, + std::vector({b.makeUintConstant(0)})); auto vtx_fmt = b.createLoad(vtx_fmt_ptr); + auto window_scale = b.createLoad(window_scale_ptr); auto p = b.createLoad(pos_); auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, vtx_fmt, @@ -291,6 +295,16 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { // Reinsert w p = b.createCompositeInsert(p_w, p, vec4_float_type_, 3); + // Apply window scaling + // pos.xy *= window_scale.xy + auto p_scaled = b.createUnaryOp(spv::Op::OpCopyObject, vec4_float_type_, p); + p_scaled = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p_scaled, + window_scale); + + std::vector operands({p, p_scaled}); + p = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, + {p, p_scaled, 4, 5, 2, 3}); + b.createStore(p, pos_); } From b83b9c28a13cf9f0864723a238111b3af9a25f7f Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 13:19:08 -0600 Subject: [PATCH 038/145] Fix push constants declaration --- src/xenia/gpu/spirv_shader_translator.cc | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 51d989cfe..b33cc7a75 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -137,6 +137,8 @@ void SpirvShaderTranslator::StartTranslation() { Id push_constants_type = b.makeStructType( {vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type}, "push_consts_type"); + b.addDecoration(push_constants_type, spv::Decoration::DecorationBlock); + // float4 window_scale; b.addMemberDecoration( push_constants_type, 0, spv::Decoration::DecorationOffset, @@ -297,11 +299,8 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { // Apply window scaling // pos.xy *= window_scale.xy - auto p_scaled = b.createUnaryOp(spv::Op::OpCopyObject, vec4_float_type_, p); - p_scaled = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p_scaled, - window_scale); - - std::vector operands({p, p_scaled}); + auto p_scaled = + b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p, window_scale); p = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, {p, p_scaled, 4, 5, 2, 3}); @@ -591,8 +590,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; - // TODO: instr.is_predicated - Id sources[3] = {0}; Id dest = 0; for (size_t i = 0; i < instr.operand_count; i++) { @@ -732,8 +729,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; - // TODO: instr.is_predicated - Id sources[3] = {0}; Id dest = 0; for (size_t i = 0, x = 0; i < instr.operand_count; i++) { From 8bf5eba098d1505deb81d45ef50b1af8906e0cc4 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 12:44:16 -0800 Subject: [PATCH 039/145] Capture exe with renderdoc and pass --vulkan_renderdoc_capture_all. --- .../gpu/vulkan/vulkan_command_processor.cc | 10 + src/xenia/gpu/vulkan/vulkan_gpu_flags.cc | 3 + src/xenia/gpu/vulkan/vulkan_gpu_flags.h | 2 + src/xenia/ui/vulkan/vulkan_device.cc | 27 + src/xenia/ui/vulkan/vulkan_device.h | 9 + src/xenia/ui/vulkan/vulkan_instance.cc | 44 ++ src/xenia/ui/vulkan/vulkan_instance.h | 11 + third_party/renderdoc/renderdoc_app.h | 524 ++++++++++++++++++ 8 files changed, 630 insertions(+) create mode 100644 third_party/renderdoc/renderdoc_app.h diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 646b050fb..33483791a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -163,6 +163,11 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, IndexBufferInfo* index_buffer_info) { auto& regs = *register_file_; + // TODO(benvanik): move to CP or to host (trace dump, etc). + if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { + device_->BeginRenderDocFrameCapture(); + } + #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -313,6 +318,11 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, } vkDestroyFence(*device_, fence, nullptr); + // TODO(benvanik): move to CP or to host (trace dump, etc). + if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { + device_->EndRenderDocFrameCapture(); + } + return true; } diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc index 675e60476..1f018db54 100644 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc @@ -8,3 +8,6 @@ */ #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" + +DEFINE_bool(vulkan_renderdoc_capture_all, false, + "Capture everything with RenderDoc."); diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h index b5a00c74a..ca83dfb7a 100644 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h @@ -14,4 +14,6 @@ #define FINE_GRAINED_DRAW_SCOPES 1 +DECLARE_bool(vulkan_renderdoc_capture_all); + #endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc index ded29212d..42077ca82 100644 --- a/src/xenia/ui/vulkan/vulkan_device.cc +++ b/src/xenia/ui/vulkan/vulkan_device.cc @@ -15,12 +15,15 @@ #include #include +#include "third_party/renderdoc/renderdoc_app.h" + #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan_instance.h" #include "xenia/ui/vulkan/vulkan_util.h" #include "xenia/ui/window.h" @@ -212,6 +215,30 @@ void VulkanDevice::ReleaseQueue(VkQueue queue) { free_queues_.push_back(queue); } +bool VulkanDevice::is_renderdoc_attached() const { + return instance_->is_renderdoc_attached(); +} + +void VulkanDevice::BeginRenderDocFrameCapture() { + auto api = reinterpret_cast(instance_->renderdoc_api()); + if (!api) { + return; + } + assert_true(api->IsFrameCapturing() == 0); + + api->StartFrameCapture(nullptr, nullptr); +} + +void VulkanDevice::EndRenderDocFrameCapture() { + auto api = reinterpret_cast(instance_->renderdoc_api()); + if (!api) { + return; + } + assert_true(api->IsFrameCapturing() == 1); + + api->EndFrameCapture(nullptr, nullptr); +} + VkDeviceMemory VulkanDevice::AllocateMemory( const VkMemoryRequirements& requirements, VkFlags required_properties) { // Search memory types to find one matching our requirements and our diff --git a/src/xenia/ui/vulkan/vulkan_device.h b/src/xenia/ui/vulkan/vulkan_device.h index e9b12e3fc..fcb82acaf 100644 --- a/src/xenia/ui/vulkan/vulkan_device.h +++ b/src/xenia/ui/vulkan/vulkan_device.h @@ -75,6 +75,15 @@ class VulkanDevice { // This method is thread safe. void ReleaseQueue(VkQueue queue); + // True if RenderDoc is attached and available for use. + bool is_renderdoc_attached() const; + // Begins capturing the current frame in RenderDoc, if it is attached. + // Must be paired with EndRenderDocCapture. Multiple frames cannot be + // captured at the same time. + void BeginRenderDocFrameCapture(); + // Ends a capture. + void EndRenderDocFrameCapture(); + // Allocates memory of the given size matching the required properties. VkDeviceMemory AllocateMemory( const VkMemoryRequirements& requirements, diff --git a/src/xenia/ui/vulkan/vulkan_instance.cc b/src/xenia/ui/vulkan/vulkan_instance.cc index 900bf66e1..5306fb29a 100644 --- a/src/xenia/ui/vulkan/vulkan_instance.cc +++ b/src/xenia/ui/vulkan/vulkan_instance.cc @@ -15,6 +15,8 @@ #include #include +#include "third_party/renderdoc/renderdoc_app.h" + #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" @@ -58,6 +60,9 @@ bool VulkanInstance::Initialize(Window* any_target_window) { auto version = Version::Parse(VK_API_VERSION); XELOGVK("Initializing Vulkan %s...", version.pretty_string.c_str()); + // Hook into renderdoc, if it's available. + EnableRenderDoc(); + // Get all of the global layers and extensions provided by the system. if (!QueryGlobals()) { XELOGE("Failed to query instance globals"); @@ -81,6 +86,45 @@ bool VulkanInstance::Initialize(Window* any_target_window) { return true; } +bool VulkanInstance::EnableRenderDoc() { + // RenderDoc injects itself into our process, so we should be able to get it. + pRENDERDOC_GetAPI get_api = nullptr; +#if XE_PLATFORM_WIN32 + auto module_handle = GetModuleHandle(L"renderdoc.dll"); + if (!module_handle) { + XELOGI("RenderDoc support requested but it is not attached"); + return false; + } + get_api = reinterpret_cast( + GetProcAddress(module_handle, "RENDERDOC_GetAPI")); +#else +// TODO(benvanik): dlsym/etc - abstracted in base/. +#endif // XE_PLATFORM_32 + if (!get_api) { + XELOGI("RenderDoc support requested but it is not attached"); + return false; + } + + // Request all API function pointers. + if (!get_api(eRENDERDOC_API_Version_1_0_1, + reinterpret_cast(&renderdoc_api_))) { + XELOGE("RenderDoc found but was unable to get API - version mismatch?"); + return false; + } + auto api = reinterpret_cast(renderdoc_api_); + + // Query version. + int major; + int minor; + int patch; + api->GetAPIVersion(&major, &minor, &patch); + XELOGI("RenderDoc attached; %d.%d.%d", major, minor, patch); + + is_renderdoc_attached_ = true; + + return true; +} + bool VulkanInstance::QueryGlobals() { // Scan global layers and accumulate properties. // We do this in a loop so that we can allocate the required amount of diff --git a/src/xenia/ui/vulkan/vulkan_instance.h b/src/xenia/ui/vulkan/vulkan_instance.h index c292f3020..88ad1bef4 100644 --- a/src/xenia/ui/vulkan/vulkan_instance.h +++ b/src/xenia/ui/vulkan/vulkan_instance.h @@ -58,7 +58,15 @@ class VulkanInstance { return available_devices_; } + // True if RenderDoc is attached and available for use. + bool is_renderdoc_attached() const { return is_renderdoc_attached_; } + // RenderDoc API handle, if attached. + void* renderdoc_api() const { return renderdoc_api_; } + private: + // Attempts to enable RenderDoc via the API, if it is attached. + bool EnableRenderDoc(); + // Queries the system to find global extensions and layers. bool QueryGlobals(); @@ -86,6 +94,9 @@ class VulkanInstance { std::vector available_devices_; VkDebugReportCallbackEXT dbg_report_callback_ = nullptr; + + void* renderdoc_api_ = nullptr; + bool is_renderdoc_attached_ = false; }; } // namespace vulkan diff --git a/third_party/renderdoc/renderdoc_app.h b/third_party/renderdoc/renderdoc_app.h new file mode 100644 index 000000000..0e59b4d43 --- /dev/null +++ b/third_party/renderdoc/renderdoc_app.h @@ -0,0 +1,524 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2015-2016 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +#if !defined(RENDERDOC_NO_STDINT) +#include +#endif + +#if defined(WIN32) + #define RENDERDOC_CC __cdecl +#elif defined(__linux__) + #define RENDERDOC_CC +#else + #error "Unknown platform" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc capture options +// + +typedef enum +{ + // Allow the application to enable vsync + // + // Default - enabled + // + // 1 - The application can enable or disable vsync at will + // 0 - vsync is force disabled + eRENDERDOC_Option_AllowVSync = 0, + + // Allow the application to enable fullscreen + // + // Default - enabled + // + // 1 - The application can enable or disable fullscreen at will + // 0 - fullscreen is force disabled + eRENDERDOC_Option_AllowFullscreen = 1, + + // Record API debugging events and messages + // + // Default - disabled + // + // 1 - Enable built-in API debugging features and records the results into + // the capture logfile, which is matched up with events on replay + // 0 - no API debugging is forcibly enabled + eRENDERDOC_Option_DebugDeviceMode = 2, + + // Capture CPU callstacks for API events + // + // Default - disabled + // + // 1 - Enables capturing of callstacks + // 0 - no callstacks are captured + eRENDERDOC_Option_CaptureCallstacks = 3, + + // When capturing CPU callstacks, only capture them from drawcalls. + // This option does nothing without the above option being enabled + // + // Default - disabled + // + // 1 - Only captures callstacks for drawcall type API events. + // Ignored if CaptureCallstacks is disabled + // 0 - Callstacks, if enabled, are captured for every event. + eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4, + + // Specify a delay in seconds to wait for a debugger to attach, after + // creating or injecting into a process, before continuing to allow it to run. + // + // 0 indicates no delay, and the process will run immediately after injection + // + // Default - 0 seconds + // + eRENDERDOC_Option_DelayForDebugger = 5, + + // Verify any writes to mapped buffers, by checking the memory after the + // bounds of the returned pointer to detect any modification. + // + // Default - disabled + // + // 1 - Verify any writes to mapped buffers + // 0 - No verification is performed, and overwriting bounds may cause + // crashes or corruption in RenderDoc + eRENDERDOC_Option_VerifyMapWrites = 6, + + // Hooks any system API calls that create child processes, and injects + // RenderDoc into them recursively with the same options. + // + // Default - disabled + // + // 1 - Hooks into spawned child processes + // 0 - Child processes are not hooked by RenderDoc + eRENDERDOC_Option_HookIntoChildren = 7, + + // By default RenderDoc only includes resources in the final logfile necessary + // for that frame, this allows you to override that behaviour. + // + // Default - disabled + // + // 1 - all live resources at the time of capture are included in the log + // and available for inspection + // 0 - only the resources referenced by the captured frame are included + eRENDERDOC_Option_RefAllResources = 8, + + // By default RenderDoc skips saving initial states for resources where the + // previous contents don't appear to be used, assuming that writes before + // reads indicate previous contents aren't used. + // + // Default - disabled + // + // 1 - initial contents at the start of each captured frame are saved, even if + // they are later overwritten or cleared before being used. + // 0 - unless a read is detected, initial contents will not be saved and will + // appear as black or empty data. + eRENDERDOC_Option_SaveAllInitials = 9, + + // In APIs that allow for the recording of command lists to be replayed later, + // RenderDoc may choose to not capture command lists before a frame capture is + // triggered, to reduce overheads. This means any command lists recorded once + // and replayed many times will not be available and may cause a failure to + // capture. + // + // Note this is only true for APIs where multithreading is difficult or + // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option + // and always capture all command lists since the API is heavily oriented + // around it and the overheads have been reduced by API design. + // + // 1 - All command lists are captured from the start of the application + // 0 - Command lists are only captured if their recording begins during + // the period when a frame capture is in progress. + eRENDERDOC_Option_CaptureAllCmdLists = 10, + + // Mute API debugging output when the debug device mode option is enabled + // + // Default - enabled + // + // 1 - Mute any API debug messages from being displayed or passed through + // 0 - API debugging is displayed as normal + eRENDERDOC_Option_DebugOutputMute = 11, + +} RENDERDOC_CaptureOption; + +// Sets an option that controls how RenderDoc behaves on capture. +// +// Returns 1 if the option and value are valid +// Returns 0 if either is invalid and the option is unchanged +typedef int (RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val); +typedef int (RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val); + +// Gets the current value of an option as a uint32_t +// +// If the option is invalid, 0xffffffff is returned +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt); + +// Gets the current value of an option as a float +// +// If the option is invalid, -FLT_MAX is returned +typedef float (RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); + +typedef enum +{ + // '0' - '9' matches ASCII values + eRENDERDOC_Key_0 = 0x30, + eRENDERDOC_Key_1 = 0x31, + eRENDERDOC_Key_2 = 0x32, + eRENDERDOC_Key_3 = 0x33, + eRENDERDOC_Key_4 = 0x34, + eRENDERDOC_Key_5 = 0x35, + eRENDERDOC_Key_6 = 0x36, + eRENDERDOC_Key_7 = 0x37, + eRENDERDOC_Key_8 = 0x38, + eRENDERDOC_Key_9 = 0x39, + + // 'A' - 'Z' matches ASCII values + eRENDERDOC_Key_A = 0x41, + eRENDERDOC_Key_B = 0x42, + eRENDERDOC_Key_C = 0x43, + eRENDERDOC_Key_D = 0x44, + eRENDERDOC_Key_E = 0x45, + eRENDERDOC_Key_F = 0x46, + eRENDERDOC_Key_G = 0x47, + eRENDERDOC_Key_H = 0x48, + eRENDERDOC_Key_I = 0x49, + eRENDERDOC_Key_J = 0x4A, + eRENDERDOC_Key_K = 0x4B, + eRENDERDOC_Key_L = 0x4C, + eRENDERDOC_Key_M = 0x4D, + eRENDERDOC_Key_N = 0x4E, + eRENDERDOC_Key_O = 0x4F, + eRENDERDOC_Key_P = 0x50, + eRENDERDOC_Key_Q = 0x51, + eRENDERDOC_Key_R = 0x52, + eRENDERDOC_Key_S = 0x53, + eRENDERDOC_Key_T = 0x54, + eRENDERDOC_Key_U = 0x55, + eRENDERDOC_Key_V = 0x56, + eRENDERDOC_Key_W = 0x57, + eRENDERDOC_Key_X = 0x58, + eRENDERDOC_Key_Y = 0x59, + eRENDERDOC_Key_Z = 0x5A, + + // leave the rest of the ASCII range free + // in case we want to use it later + eRENDERDOC_Key_NonPrintable = 0x100, + + eRENDERDOC_Key_Divide, + eRENDERDOC_Key_Multiply, + eRENDERDOC_Key_Subtract, + eRENDERDOC_Key_Plus, + + eRENDERDOC_Key_F1, + eRENDERDOC_Key_F2, + eRENDERDOC_Key_F3, + eRENDERDOC_Key_F4, + eRENDERDOC_Key_F5, + eRENDERDOC_Key_F6, + eRENDERDOC_Key_F7, + eRENDERDOC_Key_F8, + eRENDERDOC_Key_F9, + eRENDERDOC_Key_F10, + eRENDERDOC_Key_F11, + eRENDERDOC_Key_F12, + + eRENDERDOC_Key_Home, + eRENDERDOC_Key_End, + eRENDERDOC_Key_Insert, + eRENDERDOC_Key_Delete, + eRENDERDOC_Key_PageUp, + eRENDERDOC_Key_PageDn, + + eRENDERDOC_Key_Backspace, + eRENDERDOC_Key_Tab, + eRENDERDOC_Key_PrtScrn, + eRENDERDOC_Key_Pause, + + eRENDERDOC_Key_Max, +} RENDERDOC_InputButton; + +// Sets which key or keys can be used to toggle focus between multiple windows +// +// If keys is NULL or num is 0, toggle keys will be disabled +typedef void (RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num); + +// Sets which key or keys can be used to capture the next frame +// +// If keys is NULL or num is 0, captures keys will be disabled +typedef void (RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); + +typedef enum +{ + // This single bit controls whether the overlay is enabled or disabled globally + eRENDERDOC_Overlay_Enabled = 0x1, + + // Show the average framerate over several seconds as well as min/max + eRENDERDOC_Overlay_FrameRate = 0x2, + + // Show the current frame number + eRENDERDOC_Overlay_FrameNumber = 0x4, + + // Show a list of recent captures, and how many captures have been made + eRENDERDOC_Overlay_CaptureList = 0x8, + + // Default values for the overlay mask + eRENDERDOC_Overlay_Default = + (eRENDERDOC_Overlay_Enabled| + eRENDERDOC_Overlay_FrameRate| + eRENDERDOC_Overlay_FrameNumber| + eRENDERDOC_Overlay_CaptureList), + + // Enable all bits + eRENDERDOC_Overlay_All = ~0U, + + // Disable all bits + eRENDERDOC_Overlay_None = 0, +} RENDERDOC_OverlayBits; + +// returns the overlay bits that have been set +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(); +// sets the overlay bits with an and & or mask +typedef void (RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or); + +// this function will attempt to shut down RenderDoc. +// +// Note: that this will only work correctly if done immediately after +// the dll is loaded, before any API work happens. RenderDoc will remove its +// injected hooks and shut down. Behaviour is undefined if this is called +// after any API functions have been called. +typedef void (RENDERDOC_CC *pRENDERDOC_Shutdown)(); + +// This function will unload RenderDoc's crash handler. +// +// If you use your own crash handler and don't want RenderDoc's handler to +// intercede, you can call this function to unload it and any unhandled +// exceptions will pass to the next handler. +typedef void (RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(); + +// Sets the logfile path template +// +// logfile is a UTF-8 string that gives a template for how captures will be named +// and where they will be saved. +// +// Any extension is stripped off the path, and captures are saved in the directory +// specified, and named with the filename and the frame number appended. If the +// directory does not exist it will be created, including any parent directories. +// +// If pathtemplate is NULL, the template will remain unchanged +// +// Example: +// +// SetLogFilePathTemplate("my_captures/example"); +// +// Capture #1 -> my_captures/example_frame123.rdc +// Capture #2 -> my_captures/example_frame456.rdc +typedef void (RENDERDOC_CC *pRENDERDOC_SetLogFilePathTemplate)(const char *pathtemplate); + +// returns the current logfile template, see SetLogFileTemplate above, as a UTF-8 string +typedef const char* (RENDERDOC_CC *pRENDERDOC_GetLogFilePathTemplate)(); + +// returns the number of captures that have been made +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(); + +// This function returns the details of a capture, by index. New captures are added +// to the end of the list. +// +// logfile will be filled with the absolute path to the capture file, as a UTF-8 string +// pathlength will be written with the length in bytes of the logfile string +// timestamp will be written with the time of the capture, in seconds since the Unix epoch +// +// Any of the parameters can be NULL and they'll be skipped. +// +// The function will return 1 if the capture index is valid, or 0 if the index is invalid +// If the index is invalid, the values will be unchanged +// +// Note: when captures are deleted in the UI they will remain in this list, so the +// logfile path may not exist anymore. +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *logfile, uint32_t *pathlength, uint64_t *timestamp); + +// capture the next frame on whichever window and API is currently considered active +typedef void (RENDERDOC_CC *pRENDERDOC_TriggerCapture)(); + +// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_IsRemoteAccessConnected)(); + +// This function will launch the Replay UI associated with the RenderDoc library injected +// into the running application. +// +// if connectRemoteAccess is 1, the Replay UI will be launched with a command line parameter +// to connect to this application +// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open +// if cmdline is NULL, the command line will be empty. +// +// returns the PID of the replay UI if successful, 0 if not successful. +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectRemoteAccess, const char *cmdline); + +// RenderDoc can return a higher version than requested if it's backwards compatible, +// this function returns the actual version returned. If a parameter is NULL, it will be +// ignored and the others will be filled out. +typedef void (RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); + +////////////////////////////////////////////////////////////////////////// +// Capturing functions +// + +// A device pointer is a pointer to the API's root handle. +// +// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc +typedef void* RENDERDOC_DevicePointer; + +// A window handle is the OS's native window handle +// +// This would be an HWND, GLXDrawable, etc +typedef void* RENDERDOC_WindowHandle; + +// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will +// respond to keypresses. Neither parameter can be NULL +typedef void (RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// When choosing either a device pointer or a window handle to capture, you can pass NULL. +// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify +// any API rendering to a specific window, or a specific API instance rendering to any window, +// or in the simplest case of one window and one API, you can just pass NULL for both. +// +// In either case, if there are two or more possible matching (device,window) pairs it +// is undefined which one will be captured. +// +// Note: for headless rendering you can pass NULL for the window handle and either specify +// a device pointer or leave it NULL as above. + +// Immediately starts capturing API calls on the specified device pointer and window handle. +// +// If there is no matching thing to capture (e.g. no supported API has been initialised), +// this will do nothing. +// +// The results are undefined (including crashes) if two captures are started overlapping, +// even on separate devices and/oror windows. +typedef void (RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// Returns whether or not a frame capture is currently ongoing anywhere. +// +// This will return 1 if a capture is ongoing, and 0 if there is no capture running +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(); + +// Ends capturing immediately. +// +// This will return 1 if the capture succeeded, and 0 if there was an error capturing. +typedef uint32_t (RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API versions +// + +// RenderDoc uses semantic versioning (http://semver.org/). +// +// MAJOR version is incremented when incompatible API changes happen. +// MINOR version is incremented when functionality is added in a backwards-compatible manner. +// PATCH version is incremented when backwards-compatible bug fixes happen. +// +// Note that this means the API returned can be higher than the one you might have requested. +// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned +// instead of 1.0.0. You can check this with the GetAPIVersion entry point +typedef enum +{ + eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 + eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 +} RENDERDOC_Version; + +// API version changelog: +// +// 1.0.0 - initial release +// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered +// by keypress or TriggerCapture, instead of Start/EndFrameCapture. + +// eRENDERDOC_API_Version_1_0_1 +typedef struct +{ + pRENDERDOC_GetAPIVersion GetAPIVersion; + + pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32; + pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32; + + pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32; + pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32; + + pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys; + pRENDERDOC_SetCaptureKeys SetCaptureKeys; + + pRENDERDOC_GetOverlayBits GetOverlayBits; + pRENDERDOC_MaskOverlayBits MaskOverlayBits; + + pRENDERDOC_Shutdown Shutdown; + pRENDERDOC_UnloadCrashHandler UnloadCrashHandler; + + pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate; + pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate; + + pRENDERDOC_GetNumCaptures GetNumCaptures; + pRENDERDOC_GetCapture GetCapture; + + pRENDERDOC_TriggerCapture TriggerCapture; + + pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected; + pRENDERDOC_LaunchReplayUI LaunchReplayUI; + + pRENDERDOC_SetActiveWindow SetActiveWindow; + + pRENDERDOC_StartFrameCapture StartFrameCapture; + pRENDERDOC_IsFrameCapturing IsFrameCapturing; + pRENDERDOC_EndFrameCapture EndFrameCapture; +} RENDERDOC_API_1_0_1; + +typedef RENDERDOC_API_1_0_1 RENDERDOC_API_1_0_0; + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API entry point +// +// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available. +// +// The name is the same as the typedef - "RENDERDOC_GetAPI" +// +// This function is not thread safe, and should not be called on multiple threads at once. +// Ideally, call this once as early as possible in your application's startup, before doing +// any API work, since some configuration functionality etc has to be done also before +// initialising any APIs. +// +// Parameters: +// version is a single value from the RENDERDOC_Version above. +// +// outAPIPointers will be filled out with a pointer to the corresponding struct of function +// pointers. +// +// Returns: +// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested +// 0 - if the requested version is not supported or the arguments are invalid. +// +typedef int (RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers); + +#ifdef __cplusplus +} // extern "C" +#endif From 06ba273492b9adc915d63be85d27303e298a1280 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 14:26:16 -0800 Subject: [PATCH 040/145] Warn and be ok with symbol services failing. --- src/xenia/cpu/processor.cc | 9 +++++++-- src/xenia/cpu/stack_walker.h | 1 + src/xenia/cpu/stack_walker_win.cc | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index eaa9165c8..19e9dfaeb 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -140,10 +140,15 @@ bool Processor::Setup() { frontend_ = std::move(frontend); // Stack walker is used when profiling, debugging, and dumping. + // Note that creation may fail, in which case we'll have to disable those + // features. stack_walker_ = StackWalker::Create(backend_->code_cache()); if (!stack_walker_) { - XELOGE("Unable to create stack walker"); - return false; + // TODO(benvanik): disable features. + if (FLAGS_debug) { + XELOGW("Disabling --debug due to lack of stack walker"); + FLAGS_debug = false; + } } // Open the trace data path, if requested. diff --git a/src/xenia/cpu/stack_walker.h b/src/xenia/cpu/stack_walker.h index 0d120d1e6..d9cabaf6a 100644 --- a/src/xenia/cpu/stack_walker.h +++ b/src/xenia/cpu/stack_walker.h @@ -58,6 +58,7 @@ struct StackFrame { class StackWalker { public: // Creates a stack walker. Only one should exist within a process. + // May fail if another process has mucked with ours (like RenderDoc). static std::unique_ptr Create(backend::CodeCache* code_cache); // Dumps all thread stacks to the log. diff --git a/src/xenia/cpu/stack_walker_win.cc b/src/xenia/cpu/stack_walker_win.cc index 50ec5707f..9ff6b220c 100644 --- a/src/xenia/cpu/stack_walker_win.cc +++ b/src/xenia/cpu/stack_walker_win.cc @@ -106,7 +106,7 @@ bool InitializeStackWalker() { options |= SYMOPT_FAIL_CRITICAL_ERRORS; sym_set_options_(options); if (!sym_initialize_(GetCurrentProcess(), nullptr, TRUE)) { - XELOGE("Unable to initialize symbol services"); + XELOGE("Unable to initialize symbol services - already in use?"); return false; } @@ -311,7 +311,7 @@ std::unique_ptr StackWalker::Create( backend::CodeCache* code_cache) { auto stack_walker = std::make_unique(code_cache); if (!stack_walker->Initialize()) { - XELOGE("Unable to initialize stack walker"); + XELOGE("Unable to initialize stack walker: debug/save states disabled"); return nullptr; } return std::unique_ptr(stack_walker.release()); From d57f974e2ec40ffefa3e3a949106de42da4d5a36 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 14:43:59 -0800 Subject: [PATCH 041/145] Fix buffer alloc alignment and framebuffer comparison. --- src/xenia/gpu/vulkan/buffer_cache.cc | 14 ++++++++------ src/xenia/gpu/vulkan/render_cache.cc | 4 ++++ src/xenia/gpu/vulkan/vulkan_command_processor.cc | 10 +++++----- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 1def6d26f..359f90819 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -344,11 +344,12 @@ VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize alignment, if (transient_tail_offset_ >= transient_head_offset_) { // Tail follows head, so things are easy: // | H----T | - if (transient_tail_offset_ + length <= transient_capacity_) { + if (xe::round_up(transient_tail_offset_, alignment) + length <= + transient_capacity_) { // Allocation fits from tail to end of buffer, so grow. // | H----**T | - VkDeviceSize offset = transient_tail_offset_; - transient_tail_offset_ += length; + VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment); + transient_tail_offset_ = offset + length; return offset; } else if (length + kDeadZone <= transient_head_offset_) { // Can't fit at the end, but can fit if we wrap around. @@ -360,11 +361,12 @@ VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize alignment, } else { // Head follows tail, so we're reversed: // |----T H---| - if (transient_tail_offset_ + length + kDeadZone <= transient_head_offset_) { + if (xe::round_up(transient_tail_offset_, alignment) + length + kDeadZone <= + transient_head_offset_) { // Fits from tail to head. // |----***T H---| - VkDeviceSize offset = transient_tail_offset_; - transient_tail_offset_ += length; + VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment); + transient_tail_offset_ = offset + length; return offset; } } diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index a9595741f..5637d44eb 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -293,6 +293,10 @@ bool CachedFramebuffer::IsCompatible( const RenderConfiguration& desired_config) const { // We already know all render pass things line up, so let's verify dimensions, // edram offsets, etc. We need an exact match. + if (desired_config.surface_pitch_px != width || + desired_config.surface_height_px != height) { + return false; + } // TODO(benvanik): separate image views from images in tiles and store in fb? for (int i = 0; i < 4; ++i) { // Ensure the the attachment points to the same tile. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 33483791a..e568df482 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -163,11 +163,6 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, IndexBufferInfo* index_buffer_info) { auto& regs = *register_file_; - // TODO(benvanik): move to CP or to host (trace dump, etc). - if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { - device_->BeginRenderDocFrameCapture(); - } - #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -182,6 +177,11 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return IssueCopy(); } + // TODO(benvanik): move to CP or to host (trace dump, etc). + if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { + device_->BeginRenderDocFrameCapture(); + } + // Shaders will have already been defined by previous loads. // We need the to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); From 24743bf676f272970de99d9ce13b88fff5927689 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 15:20:35 -0800 Subject: [PATCH 042/145] Actually configure constant descriptor set. --- src/xenia/gpu/vulkan/buffer_cache.cc | 36 +++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 359f90819..7fd3c4768 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -25,6 +25,9 @@ using xe::ui::vulkan::CheckResult; // Space kept between tail and head when wrapping. constexpr VkDeviceSize kDeadZone = 4 * 1024; +constexpr VkDeviceSize kConstantRegisterUniformRange = + 512 * 4 * 4 + 8 * 4 + 32 * 4; + BufferCache::BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, size_t capacity) : register_file_(register_file), @@ -174,6 +177,34 @@ BufferCache::BufferCache(RegisterFile* register_file, err = vkAllocateDescriptorSets(device_, &set_alloc_info, &transient_descriptor_set_); CheckResult(err, "vkAllocateDescriptorSets"); + + // Initialize descriptor set with our buffers. + VkDescriptorBufferInfo buffer_info; + buffer_info.buffer = transient_uniform_buffer_; + buffer_info.offset = 0; + buffer_info.range = kConstantRegisterUniformRange; + VkWriteDescriptorSet descriptor_writes[2]; + auto& vertex_uniform_binding_write = descriptor_writes[0]; + vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vertex_uniform_binding_write.pNext = nullptr; + vertex_uniform_binding_write.dstSet = transient_descriptor_set_; + vertex_uniform_binding_write.dstBinding = 0; + vertex_uniform_binding_write.dstArrayElement = 0; + vertex_uniform_binding_write.descriptorCount = 1; + vertex_uniform_binding_write.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + vertex_uniform_binding_write.pBufferInfo = &buffer_info; + auto& fragment_uniform_binding_write = descriptor_writes[1]; + fragment_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + fragment_uniform_binding_write.pNext = nullptr; + fragment_uniform_binding_write.dstSet = transient_descriptor_set_; + fragment_uniform_binding_write.dstBinding = 1; + fragment_uniform_binding_write.dstArrayElement = 0; + fragment_uniform_binding_write.descriptorCount = 1; + fragment_uniform_binding_write.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + fragment_uniform_binding_write.pBufferInfo = &buffer_info; + vkUpdateDescriptorSets(device_, 2, descriptor_writes, 0, nullptr); } BufferCache::~BufferCache() { @@ -197,9 +228,8 @@ std::pair BufferCache::UploadConstantRegisters( // uint bool[8]; // uint loop[32]; // }; - size_t total_size = xe::round_up( - static_cast((512 * 4 * 4) + (32 * 4) + (8 * 4)), - uniform_buffer_alignment_); + size_t total_size = + xe::round_up(kConstantRegisterUniformRange, uniform_buffer_alignment_); auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size); if (offset == VK_WHOLE_SIZE) { // OOM. From ace895d4f3949bb3cce92c287bcb99ec74ede2ae Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 15:44:09 -0800 Subject: [PATCH 043/145] Fixing 1/w0 correction. --- src/xenia/gpu/spirv_shader_translator.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index b33cc7a75..5684a24e3 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -279,14 +279,14 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { auto p = b.createLoad(pos_); auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, vtx_fmt, - b.makeFloatConstant(0.f)); + vec4_float_zero_); - // pos.w = vtx_fmt.w != 0.0 ? 1.0 / pos.w : pos.w + // pos.w = vtx_fmt.w == 0.0 ? 1.0 / pos.w : pos.w auto c_w = b.createCompositeExtract(c, bool_type_, 3); auto p_w = b.createCompositeExtract(p, float_type_, 3); auto p_w_inv = b.createBinOp(spv::Op::OpFDiv, float_type_, b.makeFloatConstant(1.f), p_w); - p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w_inv, p_w); + p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w, p_w_inv); // pos.xyz = vtx_fmt.xyz != 0.0 ? pos.xyz / pos.w : pos.xyz auto p_all_w = b.smearScalar(spv::Decoration::DecorationInvariant, p_w, From 1def5bece922d161283c3c15f4a98a1994201d73 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 15:59:35 -0800 Subject: [PATCH 044/145] Fix viewport inversion. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index a8938e407..542329af5 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -536,7 +536,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, // Done in VS, no need to flush state. if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { push_constants.window_scale[0] = 1.0f; - push_constants.window_scale[1] = 1.0f; + push_constants.window_scale[1] = -1.0f; } else { push_constants.window_scale[0] = 1.0f / 2560.0f; push_constants.window_scale[1] = -1.0f / 2560.0f; From cda08ff82612654eace1375ce7d69b9b570fc3e1 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 17:17:57 -0800 Subject: [PATCH 045/145] Setup descriptor set for textures. --- src/xenia/gpu/vulkan/texture_cache.cc | 39 +++++++++++-------- .../gpu/vulkan/vulkan_command_processor.cc | 9 +++-- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index ea051ca52..2a67f1727 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -34,10 +34,12 @@ TextureCache::TextureCache(RegisterFile* register_file, descriptor_pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; descriptor_pool_info.maxSets = 256; - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[0].descriptorCount = 256; - descriptor_pool_info.poolSizeCount = 1; + VkDescriptorPoolSize pool_sizes[2]; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER; + pool_sizes[0].descriptorCount = 32; + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + pool_sizes[1].descriptorCount = 32; + descriptor_pool_info.poolSizeCount = 2; descriptor_pool_info.pPoolSizes = pool_sizes; auto err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, &descriptor_pool_); @@ -45,24 +47,29 @@ TextureCache::TextureCache(RegisterFile* register_file, // Create the descriptor set layout used for rendering. // We always have the same number of samplers but only some are used. - VkDescriptorSetLayoutBinding texture_bindings[1]; - for (int i = 0; i < 1; ++i) { - auto& texture_binding = texture_bindings[i]; - texture_binding.binding = 0; - texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - texture_binding.descriptorCount = kMaxTextureSamplers; - texture_binding.stageFlags = - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - texture_binding.pImmutableSamplers = nullptr; - } + VkDescriptorSetLayoutBinding bindings[2]; + auto& sampler_binding = bindings[0]; + sampler_binding.binding = 0; + sampler_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sampler_binding.descriptorCount = kMaxTextureSamplers; + sampler_binding.stageFlags = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + sampler_binding.pImmutableSamplers = nullptr; + auto& texture_binding = bindings[1]; + texture_binding.binding = 1; + texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + texture_binding.descriptorCount = kMaxTextureSamplers; + texture_binding.stageFlags = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + texture_binding.pImmutableSamplers = nullptr; VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info; descriptor_set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; descriptor_set_layout_info.pNext = nullptr; descriptor_set_layout_info.flags = 0; descriptor_set_layout_info.bindingCount = - static_cast(xe::countof(texture_bindings)); - descriptor_set_layout_info.pBindings = texture_bindings; + static_cast(xe::countof(bindings)); + descriptor_set_layout_info.pBindings = bindings; err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info, nullptr, &texture_descriptor_set_layout_); CheckResult(err, "vkCreateDescriptorSetLayout"); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index e568df482..723fa8d07 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -484,7 +484,8 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, continue; } has_setup_sampler[texture_binding.fetch_constant] = true; - any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed; + any_failed = + !PopulateSampler(command_buffer, texture_binding) || any_failed; } // Pixel shader texture sampler. @@ -493,7 +494,8 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, continue; } has_setup_sampler[texture_binding.fetch_constant] = true; - any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed; + any_failed = + !PopulateSampler(command_buffer, texture_binding) || any_failed; } return !any_failed; @@ -508,7 +510,8 @@ bool VulkanCommandProcessor::PopulateSampler( auto group = reinterpret_cast(®s.values[r]); auto& fetch = group->texture_fetch; - // ? + // Disabled? + // TODO(benvanik): reset sampler. if (!fetch.type) { return true; } From 633746b5e4c635574408ca6210021b5c30483055 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 15:38:10 -0600 Subject: [PATCH 046/145] Actually preserve pv/ps if predicate fails --- src/xenia/gpu/spirv_shader_translator.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 5684a24e3..a36b8dfca 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -720,7 +720,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( dest, b.createLoad(pv_)); } - b.createStore(dest, pv_); + b.createStore(pv_dest, pv_); StoreToResult(dest, instr.result, pred_cond); } } @@ -970,7 +970,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.createLoad(ps_)); } - b.createStore(dest, ps_); + b.createStore(ps_dest, ps_); StoreToResult(dest, instr.result, pred_cond); } } @@ -1200,7 +1200,12 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, storage_pointer = b.createAccessChain(storage_class, storage_pointer, storage_offsets); } - auto storage_value = b.createLoad(storage_pointer); + + // Only load from storage if we need it later. + Id storage_value = 0; + if (!result.has_all_writes() || predicate_cond) { + b.createLoad(storage_pointer); + } // Convert to the appropriate type, if needed. if (b.getTypeId(source_value_id) != storage_type) { From e78537571f81a4010f712204d57ae9303e73973c Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:13:13 -0600 Subject: [PATCH 047/145] Vector kill ops --- src/xenia/gpu/spirv_shader_translator.cc | 77 ++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index a36b8dfca..fd97c6bf1 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -596,6 +596,10 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( sources[i] = LoadFromOperand(instr.operands[i]); } + Id pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + switch (instr.vector_opcode) { case AluVectorOpcode::kAdd: { dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], @@ -630,6 +634,10 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( // TODO: } break; + case AluVectorOpcode::kDst: { + // TODO + } break; + case AluVectorOpcode::kFloor: { dest = CreateGlslStd450InstructionCall( spv::Decoration::DecorationInvariant, vec4_float_type_, @@ -642,6 +650,70 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( spv::GLSLstd450::kFract, {sources[0]}); } break; + case AluVectorOpcode::kKillEq: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, + sources[0], sources[1]); + cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = vec4_float_zero_; + } break; + + case AluVectorOpcode::kKillGe: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, + vec4_bool_type_, sources[0], sources[1]); + cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = vec4_float_zero_; + } break; + + case AluVectorOpcode::kKillGt: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, + sources[0], sources[1]); + cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = vec4_float_zero_; + } break; + + case AluVectorOpcode::kKillNe: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, + sources[0], sources[1]); + cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = vec4_float_zero_; + } break; + case AluVectorOpcode::kMad: { dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], sources[1]); @@ -709,13 +781,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( if (dest) { // If predicated, discard the result from the instruction. - Id pred_cond = 0; Id pv_dest = dest; if (instr.is_predicated) { - pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - pv_dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, pred_cond, dest, b.createLoad(pv_)); } From 9b805b929e403497fb23975aea0ebd2f10222ac9 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:17:43 -0600 Subject: [PATCH 048/145] Scalar kill ops --- src/xenia/gpu/spirv_shader_translator.cc | 84 ++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index fd97c6bf1..5b146ce8d 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -831,6 +831,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } } + Id pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + switch (instr.scalar_opcode) { case AluScalarOpcode::kAdds: case AluScalarOpcode::kAddsc0: @@ -852,6 +856,81 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( {sources[0]}); } break; + case AluScalarOpcode::kKillsEq: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = b.makeFloatConstant(0.f); + } break; + + case AluScalarOpcode::kKillsGe: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = b.makeFloatConstant(0.f); + } break; + + case AluScalarOpcode::kKillsGt: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = b.makeFloatConstant(0.f); + } break; + + case AluScalarOpcode::kKillsNe: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = b.makeFloatConstant(0.f); + } break; + + case AluScalarOpcode::kKillsOne: { + auto continue_block = &b.makeNewBlock(); + auto kill_block = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(1.f)); + cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + b.createConditionalBranch(cond, kill_block, continue_block); + + b.setBuildPoint(kill_block); + b.createNoResultOp(spv::Op::OpKill); + + b.setBuildPoint(continue_block); + dest = b.makeFloatConstant(0.f); + } break; + case AluScalarOpcode::kMaxs: { // dest = max(src0, src1) dest = CreateGlslStd450InstructionCall( @@ -1026,13 +1105,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( if (dest) { // If predicated, discard the result from the instruction. - Id pred_cond = 0; Id ps_dest = dest; if (instr.is_predicated) { - pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - ps_dest = b.createTriOp(spv::Op::OpSelect, float_type_, pred_cond, dest, b.createLoad(ps_)); } From d2e3b5533d27aa86a080d0e1a27c2e2c43e5bc5e Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:29:12 -0600 Subject: [PATCH 049/145] Whoops --- src/xenia/gpu/spirv_shader_translator.cc | 60 ++++++++++++++++++------ 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 5b146ce8d..9d1253353 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -596,9 +596,12 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( sources[i] = LoadFromOperand(instr.operands[i]); } - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); + Id pred_cond = 0; + if (instr.is_predicated) { + pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + } switch (instr.vector_opcode) { case AluVectorOpcode::kAdd: { @@ -656,7 +659,10 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -672,7 +678,10 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -688,7 +697,10 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -704,7 +716,10 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -831,9 +846,12 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } } - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); + Id pred_cond = 0; + if (instr.is_predicated) { + pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + } switch (instr.scalar_opcode) { case AluScalarOpcode::kAdds: @@ -876,7 +894,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -891,7 +912,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, sources[0], b.makeFloatConstant(0.f)); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -906,7 +930,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -921,7 +948,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(1.f)); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1345,7 +1375,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, // Only load from storage if we need it later. Id storage_value = 0; if (!result.has_all_writes() || predicate_cond) { - b.createLoad(storage_pointer); + storage_value = b.createLoad(storage_pointer); } // Convert to the appropriate type, if needed. From fa3ca4a5d7c806b3ea16e43e26a0c3e67aa16ff3 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:30:57 -0600 Subject: [PATCH 050/145] Short-circuit if the store has no writes. --- src/xenia/gpu/spirv_shader_translator.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 9d1253353..bff854a87 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1289,6 +1289,10 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, return; } + if (!result.has_any_writes()) { + return; + } + Id storage_pointer = 0; Id storage_type = vec4_float_type_; spv::StorageClass storage_class; From d217f7b3c34786642193717aa034cb37a6f2a0ee Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:39:54 -0600 Subject: [PATCH 051/145] MaxAs/SetpClr --- src/xenia/gpu/spirv_shader_translator.cc | 24 ++++++++++++++++++++++++ src/xenia/gpu/spirv_shader_translator.h | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index bff854a87..9fd4ba10d 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -54,6 +54,7 @@ void SpirvShaderTranslator::StartTranslation() { bool_type_ = b.makeBoolType(); float_type_ = b.makeFloatType(32); + int_type_ = b.makeIntType(32); Id uint_type = b.makeUintType(32); vec2_float_type_ = b.makeVectorType(float_type_, 2); vec3_float_type_ = b.makeVectorType(float_type_, 3); @@ -961,6 +962,24 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( dest = b.makeFloatConstant(0.f); } break; + case AluScalarOpcode::kMaxAs: { + // a0 = clamp(floor(src0 + 0.5), -256, 255) + auto addr = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], + b.makeFloatConstant(0.5f)); + addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); + addr = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, int_type_, + spv::GLSLstd450::kSClamp, + {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); + b.createStore(addr, a0_); + + // dest = src0 >= src1 ? src0 : src1 + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, + sources[0], sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, sources[0], + sources[1]); + } break; + case AluScalarOpcode::kMaxs: { // dest = max(src0, src1) dest = CreateGlslStd450InstructionCall( @@ -1050,6 +1069,11 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); } break; + case AluScalarOpcode::kSetpClr: { + b.createStore(b.makeBoolConstant(false), p0_); + dest = b.makeFloatConstant(FLT_MAX); + } break; + case AluScalarOpcode::kSetpEq: { auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 1ec006d50..31894a901 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -104,7 +104,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Function* translated_main_ = 0; // Types. - spv::Id float_type_ = 0, bool_type_ = 0; + spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0; spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; spv::Id vec4_uint_type_ = 0; spv::Id vec4_bool_type_ = 0; From 3877afe90a68f922f8ffcb26fccb77029cdf8e93 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:42:58 -0600 Subject: [PATCH 052/145] MaxAsf --- src/xenia/gpu/spirv_shader_translator.cc | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 9fd4ba10d..bccd797d1 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -962,6 +962,21 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( dest = b.makeFloatConstant(0.f); } break; + case AluScalarOpcode::kMaxAsf: { + auto addr = + b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, sources[0]); + addr = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, int_type_, + spv::GLSLstd450::kSClamp, + {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); + b.createStore(addr, a0_); + + // dest = src0 >= src1 ? src0 : src1 + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, + spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + } break; + case AluScalarOpcode::kMaxAs: { // a0 = clamp(floor(src0 + 0.5), -256, 255) auto addr = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], @@ -974,10 +989,9 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.createStore(addr, a0_); // dest = src0 >= src1 ? src0 : src1 - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, - sources[0], sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, sources[0], - sources[1]); + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, + spv::GLSLstd450::kFMax, {sources[0], sources[1]}); } break; case AluScalarOpcode::kMaxs: { From 568845e81db9c1e13babb2896d6c0582c2bb774b Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:47:06 -0600 Subject: [PATCH 053/145] Dp4 --- src/xenia/gpu/spirv_shader_translator.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index bccd797d1..da1d88850 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -642,6 +642,10 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( // TODO } break; + case AluVectorOpcode::kDp4: { + dest = b.createBinOp(spv::Op::OpDot, float_type_, sources[0], sources[1]); + } break; + case AluVectorOpcode::kFloor: { dest = CreateGlslStd450InstructionCall( spv::Decoration::DecorationInvariant, vec4_float_type_, From 1d4190af02a7ed36ddde98e4647bf1fcfcc71c9a Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 16:52:43 -0600 Subject: [PATCH 054/145] MaxA --- src/xenia/gpu/spirv_shader_translator.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index da1d88850..1c829ddec 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -740,6 +740,24 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, dest, sources[2]); } break; + case AluVectorOpcode::kMaxA: { + // a0 = clamp(floor(src0.w + 0.5), -256, 255) + auto addr = b.createCompositeExtract(sources[0], float_type_, 3); + addr = b.createBinOp(spv::Op::OpFAdd, float_type_, addr, + b.makeFloatConstant(0.5f)); + addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); + addr = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, int_type_, + spv::GLSLstd450::kSClamp, + {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); + b.createStore(addr, a0_); + + // dest = src0 >= src1 ? src0 : src1 + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + } break; + case AluVectorOpcode::kMax: { dest = CreateGlslStd450InstructionCall( spv::Decoration::DecorationInvariant, vec4_float_type_, From 13049912ee91a014b71cdf5b71162eb2b05260a9 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 17:12:08 -0600 Subject: [PATCH 055/145] Vec Log --- src/xenia/gpu/spirv_shader_translator.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 1c829ddec..ed03d29e0 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -740,6 +740,9 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, dest, sources[2]); } break; + case AluVectorOpcode::kMax4: { + } break; + case AluVectorOpcode::kMaxA: { // a0 = clamp(floor(src0.w + 0.5), -256, 255) auto addr = b.createCompositeExtract(sources[0], float_type_, 3); @@ -984,6 +987,15 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( dest = b.makeFloatConstant(0.f); } break; + case AluScalarOpcode::kLogc: { + } break; + + case AluScalarOpcode::kLog: { + auto log = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, + spv::GLSLstd450::kLog2, {sources[0]}); + } break; + case AluScalarOpcode::kMaxAsf: { auto addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, sources[0]); From cbac9b2f4e2fb84704ec10be4253d248b4b22f1a Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 17:39:18 -0600 Subject: [PATCH 056/145] Use vec4 zero rather than float zero where needed --- src/xenia/gpu/spirv_shader_translator.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index ed03d29e0..644e766ef 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -613,7 +613,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kCndEq: { // dest = src0 == 0.0 ? src1 : src2; auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - b.makeFloatConstant(0.f)); + vec4_float_zero_); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], sources[2]); } break; @@ -621,7 +621,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kCndGe: { // dest = src0 == 0.0 ? src1 : src2; auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, - sources[0], b.makeFloatConstant(0.f)); + sources[0], vec4_float_zero_); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], sources[2]); } break; @@ -629,7 +629,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( case AluVectorOpcode::kCndGt: { // dest = src0 == 0.0 ? src1 : src2; auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[0], b.makeFloatConstant(0.f)); + sources[0], vec4_float_zero_); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], sources[2]); } break; @@ -1184,6 +1184,12 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); } break; + case AluScalarOpcode::kSetpPop: { + } break; + + case AluScalarOpcode::kSetpRstr: { + } break; + case AluScalarOpcode::kSin: { dest = CreateGlslStd450InstructionCall( spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kSin, From 0680e451bc8a0c1e9ef1b65b5572d84ff871aa7d Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 17:59:59 -0600 Subject: [PATCH 057/145] Exp2 --- src/xenia/gpu/spirv_shader_translator.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 644e766ef..4370d020e 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -900,6 +900,12 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( {sources[0]}); } break; + case AluScalarOpcode::kExp: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kExp2, + {sources[0]}); + } break; + case AluScalarOpcode::kKillsEq: { auto continue_block = &b.makeNewBlock(); auto kill_block = &b.makeNewBlock(); From 5c2b5123ac166b1e6eb960bc9ffc9ce605e91ec4 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 18:15:11 -0600 Subject: [PATCH 058/145] Floors/Frcs/Truncs --- src/xenia/gpu/spirv_shader_translator.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 4370d020e..1f3140ed8 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -906,6 +906,18 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( {sources[0]}); } break; + case AluScalarOpcode::kFloors: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFloor, + {sources[0]}); + } break; + + case AluScalarOpcode::kFrcs: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFract, + {sources[0]}); + } break; + case AluScalarOpcode::kKillsEq: { auto continue_block = &b.makeNewBlock(); auto kill_block = &b.makeNewBlock(); @@ -1213,6 +1225,12 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], ps_); } break; + case AluScalarOpcode::kTruncs: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kTrunc, + {sources[0]}); + } break; + default: break; } From 9030c87386efd215c8d4ed27ae4ae07c700b26c4 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 18:52:49 -0600 Subject: [PATCH 059/145] SetpPop/SetpRstr --- src/xenia/gpu/spirv_shader_translator.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 1f3140ed8..b4e49a1ca 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1203,9 +1203,22 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kSetpPop: { + auto src = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], + b.makeFloatConstant(1.f)); + auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, src, + b.makeFloatConstant(0.f)); + b.createStore(c, p0_); + + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMax, + {sources[0], b.makeFloatConstant(0.f)}); } break; case AluScalarOpcode::kSetpRstr: { + auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + b.createStore(c, p0_); + dest = sources[0]; } break; case AluScalarOpcode::kSin: { From 2785a94fea1f8d69ee4a376ed5693a5fbbae2475 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 19:10:24 -0600 Subject: [PATCH 060/145] SetpXXPush --- src/xenia/gpu/spirv_shader_translator.cc | 96 ++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index b4e49a1ca..fcf862fda 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -778,6 +778,102 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( sources[1]); } break; + case AluVectorOpcode::kSetpEqPush: { + auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], + vec4_float_zero_); + auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1], + vec4_float_zero_); + auto c_and = + b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); + auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); + + // p0 + b.createStore(c_and_w, p0_); + + // dest + auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); + s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, + b.makeFloatConstant(1.f)); + auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, + vec4_float_type_); + + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, + vec4_float_zero_, s0); + } break; + + case AluVectorOpcode::kSetpGePush: { + auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], + vec4_float_zero_); + auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, + sources[1], vec4_float_zero_); + auto c_and = + b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); + auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); + + // p0 + b.createStore(c_and_w, p0_); + + // dest + auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); + s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, + b.makeFloatConstant(1.f)); + auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, + vec4_float_type_); + + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, + vec4_float_zero_, s0); + } break; + + case AluVectorOpcode::kSetpGtPush: { + auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], + vec4_float_zero_); + auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, + sources[1], vec4_float_zero_); + auto c_and = + b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); + auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); + + // p0 + b.createStore(c_and_w, p0_); + + // dest + auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); + s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, + b.makeFloatConstant(1.f)); + auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, + vec4_float_type_); + + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, + vec4_float_zero_, s0); + } break; + + case AluVectorOpcode::kSetpNePush: { + auto c0 = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, + sources[0], vec4_float_zero_); + auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1], + vec4_float_zero_); + auto c_and = + b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); + auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); + + // p0 + b.createStore(c_and_w, p0_); + + // dest + auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); + s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, + b.makeFloatConstant(1.f)); + auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, + vec4_float_type_); + + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, + vec4_float_zero_, s0); + } break; + case AluVectorOpcode::kSeq: { // foreach(el) src0 == src1 ? 1.0 : 0.0 auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], From 8a29330f8c40533404510a0168f9105fdb68eddd Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 20:42:37 -0600 Subject: [PATCH 061/145] First-pass image sampling --- src/xenia/gpu/spirv_shader_translator.cc | 94 +++++++++++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 3 + 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index fcf862fda..8c2057242 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -163,6 +163,46 @@ void SpirvShaderTranslator::StartTranslation() { push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant, push_constants_type, "push_consts"); + // Texture bindings + Id img_t[] = { + b.makeImageType(float_type_, spv::Dim::Dim1D, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown), + b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown), + b.makeImageType(float_type_, spv::Dim::Dim3D, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown), + b.makeImageType(float_type_, spv::Dim::DimCube, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown)}; + Id samplers_t = b.makeSamplerType(); + + Id img_a_t[] = {b.makeArrayType(img_t[0], b.makeUintConstant(32), 0), + b.makeArrayType(img_t[1], b.makeUintConstant(32), 0), + b.makeArrayType(img_t[2], b.makeUintConstant(32), 0), + b.makeArrayType(img_t[3], b.makeUintConstant(32), 0)}; + Id samplers_a = b.makeArrayType(samplers_t, b.makeUintConstant(32), 0); + + Id img_s[] = { + b.makeStructType({img_a_t[0]}, "img1D_type"), + b.makeStructType({img_a_t[1]}, "img2D_type"), + b.makeStructType({img_a_t[2]}, "img3D_type"), + b.makeStructType({img_a_t[3]}, "imgCube_type"), + }; + Id samplers_s = b.makeStructType({samplers_a}, "samplers_type"); + + for (int i = 0; i < 4; i++) { + img_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, + img_s[i], + xe::format_string("images%dD", i + 1).c_str()); + b.addDecoration(img_[i], spv::Decoration::DecorationBlock); + b.addDecoration(img_[i], spv::Decoration::DecorationDescriptorSet, 2); + b.addDecoration(img_[i], spv::Decoration::DecorationBinding, i + 1); + } + samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant, + samplers_s, "samplers"); + b.addDecoration(samplers_, spv::Decoration::DecorationBlock); + b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 2); + b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); + // Interpolators. Id interpolators_type = b.makeArrayType(vec4_float_type_, b.makeUintConstant(16), 0); @@ -552,6 +592,8 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) { auto& b = *builder_; + // TODO: instr.is_predicated + // Operand 0 is the index // Operand 1 is the binding // TODO: Indexed fetch @@ -568,7 +610,57 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( const ParsedTextureFetchInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + // TODO: instr.is_predicated + // Operand 0 is the offset + // Operand 1 is the sampler index + Id dest = 0; + Id src = LoadFromOperand(instr.operands[0]); + assert_not_zero(src); + + uint32_t dim_idx = 0; + switch (instr.dimension) { + case TextureDimension::k1D: + dim_idx = 0; + break; + case TextureDimension::k2D: + dim_idx = 1; + break; + case TextureDimension::k3D: + dim_idx = 2; + break; + case TextureDimension::kCube: + dim_idx = 3; + break; + default: + assert_unhandled_case(instr.dimension); + } + + switch (instr.opcode) { + case FetchOpcode::kTextureFetch: { + auto image_index = b.makeUintConstant(instr.operands[1].storage_index); + auto image_ptr = + b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, + img_[dim_idx], std::vector({image_index})); + auto sampler_ptr = + b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, + samplers_, std::vector({image_index})); + auto image = b.createLoad(image_ptr); + auto sampler = b.createLoad(sampler_ptr); + + auto tex = b.createBinOp(spv::Op::OpSampledImage, b.getImageType(image), + image, sampler); + dest = b.createBinOp(spv::Op::OpImageSampleImplicitLod, vec4_float_type_, + tex, src); + } break; + default: + // TODO: the rest of these + break; + } + + if (dest) { + b.createStore(dest, pv_); + StoreToResult(dest, instr.result); + } } void SpirvShaderTranslator::ProcessAluInstruction( diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 31894a901..0d8b1e14c 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -108,6 +108,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; spv::Id vec4_uint_type_ = 0; spv::Id vec4_bool_type_ = 0; + spv::Id sampled_image_type_ = 0; // Constants. spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; @@ -121,6 +122,8 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id push_consts_ = 0; spv::Id interpolators_ = 0; spv::Id frag_outputs_ = 0; + spv::Id samplers_ = 0; + spv::Id img_[4] = {0}; // Images {1D, 2D, 3D, Cube} // Map of {binding -> {offset -> spv input}} std::map> vertex_binding_map_; From 62931f8c8ea468650c8fdc768573005950bdc626 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 19:56:23 -0800 Subject: [PATCH 062/145] Adding texture setup and a dummy grid texture. --- src/xenia/gpu/vulkan/texture_cache.cc | 292 +++++++++++++++++- src/xenia/gpu/vulkan/texture_cache.h | 44 ++- .../gpu/vulkan/vulkan_command_processor.cc | 73 +---- .../gpu/vulkan/vulkan_command_processor.h | 2 - .../ui/vulkan/vulkan_immediate_drawer.cc | 3 +- 5 files changed, 335 insertions(+), 79 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 2a67f1727..4e93a46ca 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -14,6 +14,8 @@ #include "xenia/base/memory.h" #include "xenia/base/profiling.h" #include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/sampler_info.h" +#include "xenia/gpu/texture_info.h" #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" namespace xe { @@ -25,8 +27,11 @@ using xe::ui::vulkan::CheckResult; constexpr uint32_t kMaxTextureSamplers = 32; TextureCache::TextureCache(RegisterFile* register_file, + TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(*device) { + : register_file_(register_file), + trace_writer_(trace_writer), + device_(device) { // Descriptor pool used for all of our cached descriptors. VkDescriptorPoolCreateInfo descriptor_pool_info; descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; @@ -41,13 +46,13 @@ TextureCache::TextureCache(RegisterFile* register_file, pool_sizes[1].descriptorCount = 32; descriptor_pool_info.poolSizeCount = 2; descriptor_pool_info.pPoolSizes = pool_sizes; - auto err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, + auto err = vkCreateDescriptorPool(*device_, &descriptor_pool_info, nullptr, &descriptor_pool_); CheckResult(err, "vkCreateDescriptorPool"); // Create the descriptor set layout used for rendering. // We always have the same number of samplers but only some are used. - VkDescriptorSetLayoutBinding bindings[2]; + VkDescriptorSetLayoutBinding bindings[5]; auto& sampler_binding = bindings[0]; sampler_binding.binding = 0; sampler_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; @@ -55,13 +60,15 @@ TextureCache::TextureCache(RegisterFile* register_file, sampler_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; sampler_binding.pImmutableSamplers = nullptr; - auto& texture_binding = bindings[1]; - texture_binding.binding = 1; - texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - texture_binding.descriptorCount = kMaxTextureSamplers; - texture_binding.stageFlags = - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - texture_binding.pImmutableSamplers = nullptr; + for (int i = 0; i < 4; ++i) { + auto& texture_binding = bindings[1 + i]; + texture_binding.binding = 1 + i; + texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + texture_binding.descriptorCount = kMaxTextureSamplers; + texture_binding.stageFlags = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + texture_binding.pImmutableSamplers = nullptr; + } VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info; descriptor_set_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -70,15 +77,274 @@ TextureCache::TextureCache(RegisterFile* register_file, descriptor_set_layout_info.bindingCount = static_cast(xe::countof(bindings)); descriptor_set_layout_info.pBindings = bindings; - err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info, + err = vkCreateDescriptorSetLayout(*device_, &descriptor_set_layout_info, nullptr, &texture_descriptor_set_layout_); CheckResult(err, "vkCreateDescriptorSetLayout"); + + SetupGridImages(); } TextureCache::~TextureCache() { - vkDestroyDescriptorSetLayout(device_, texture_descriptor_set_layout_, + vkDestroyImageView(*device_, grid_image_2d_view_, nullptr); + vkDestroyImage(*device_, grid_image_2d_, nullptr); + vkFreeMemory(*device_, grid_image_2d_memory_, nullptr); + + vkDestroyDescriptorSetLayout(*device_, texture_descriptor_set_layout_, nullptr); - vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); + vkDestroyDescriptorPool(*device_, descriptor_pool_, nullptr); +} + +void TextureCache::SetupGridImages() { + VkImageCreateInfo image_info; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = nullptr; + image_info.flags = 0; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_info.extent = {8, 8, 1}; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_LINEAR; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + auto err = vkCreateImage(*device_, &image_info, nullptr, &grid_image_2d_); + CheckResult(err, "vkCreateImage"); + + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(*device_, grid_image_2d_, &memory_requirements); + grid_image_2d_memory_ = device_->AllocateMemory( + memory_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + err = vkBindImageMemory(*device_, grid_image_2d_, grid_image_2d_memory_, 0); + CheckResult(err, "vkBindImageMemory"); + + VkImageViewCreateInfo view_info; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.pNext = nullptr; + view_info.flags = 0; + view_info.image = grid_image_2d_; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = VK_FORMAT_R8G8B8A8_UNORM; + view_info.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, + }; + view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + err = vkCreateImageView(*device_, &view_info, nullptr, &grid_image_2d_view_); + CheckResult(err, "vkCreateImageView"); + + VkImageSubresource subresource; + subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + subresource.mipLevel = 0; + subresource.arrayLayer = 0; + VkSubresourceLayout layout; + vkGetImageSubresourceLayout(*device_, grid_image_2d_, &subresource, &layout); + + void* gpu_data = nullptr; + err = vkMapMemory(*device_, grid_image_2d_memory_, 0, layout.size, 0, + &gpu_data); + CheckResult(err, "vkMapMemory"); + + uint32_t grid_pixels[8 * 8]; + for (int y = 0; y < 8; ++y) { + for (int x = 0; x < 8; ++x) { + grid_pixels[y * 8 + x] = + ((y % 2 == 0) ^ (x % 2 != 0)) ? 0xFFFFFFFF : 0xFF0000FF; + } + } + std::memcpy(gpu_data, grid_pixels, sizeof(grid_pixels)); + + vkUnmapMemory(*device_, grid_image_2d_memory_); +} + +VkDescriptorSet TextureCache::PrepareTextureSet( + VkCommandBuffer command_buffer, + const std::vector& vertex_bindings, + const std::vector& pixel_bindings) { + // Clear state. + auto update_set_info = &update_set_info_; + update_set_info->has_setup_fetch_mask = 0; + update_set_info->image_1d_write_count = 0; + update_set_info->image_2d_write_count = 0; + update_set_info->image_3d_write_count = 0; + update_set_info->image_cube_write_count = 0; + + std::memset(update_set_info, 0, sizeof(update_set_info_)); + + // Process vertex and pixel shader bindings. + // This does things lazily and de-dupes fetch constants reused in both + // shaders. + bool any_failed = false; + any_failed = + !SetupTextureBindings(update_set_info, vertex_bindings) || any_failed; + any_failed = + !SetupTextureBindings(update_set_info, pixel_bindings) || any_failed; + if (any_failed) { + XELOGW("Failed to setup one or more texture bindings"); + // TODO(benvanik): actually bail out here? + } + + // TODO(benvanik): reuse. + VkDescriptorSet descriptor_set = nullptr; + VkDescriptorSetAllocateInfo set_alloc_info; + set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_alloc_info.pNext = nullptr; + set_alloc_info.descriptorPool = descriptor_pool_; + set_alloc_info.descriptorSetCount = 1; + set_alloc_info.pSetLayouts = &texture_descriptor_set_layout_; + auto err = + vkAllocateDescriptorSets(*device_, &set_alloc_info, &descriptor_set); + CheckResult(err, "vkAllocateDescriptorSets"); + + // Write all updated descriptors. + // TODO(benvanik): optimize? split into multiple sets? set per type? + VkWriteDescriptorSet descriptor_writes[4]; + std::memset(descriptor_writes, 0, sizeof(descriptor_writes)); + uint32_t descriptor_write_count = 0; + if (update_set_info->sampler_write_count) { + auto& sampler_write = descriptor_writes[descriptor_write_count++]; + sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + sampler_write.pNext = nullptr; + sampler_write.dstSet = descriptor_set; + sampler_write.dstBinding = 0; + sampler_write.dstArrayElement = 0; + sampler_write.descriptorCount = update_set_info->sampler_write_count; + sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sampler_write.pImageInfo = update_set_info->sampler_infos; + } + if (update_set_info->image_1d_write_count) { + auto& image_write = descriptor_writes[descriptor_write_count++]; + image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + image_write.pNext = nullptr; + image_write.dstSet = descriptor_set; + image_write.dstBinding = 1; + image_write.dstArrayElement = 0; + image_write.descriptorCount = update_set_info->image_1d_write_count; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.pImageInfo = update_set_info->image_1d_infos; + } + if (update_set_info->image_2d_write_count) { + auto& image_write = descriptor_writes[descriptor_write_count++]; + image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + image_write.pNext = nullptr; + image_write.dstSet = descriptor_set; + image_write.dstBinding = 2; + image_write.dstArrayElement = 0; + image_write.descriptorCount = update_set_info->image_2d_write_count; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.pImageInfo = update_set_info->image_2d_infos; + } + if (update_set_info->image_3d_write_count) { + auto& image_write = descriptor_writes[descriptor_write_count++]; + image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + image_write.pNext = nullptr; + image_write.dstSet = descriptor_set; + image_write.dstBinding = 3; + image_write.dstArrayElement = 0; + image_write.descriptorCount = update_set_info->image_3d_write_count; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.pImageInfo = update_set_info->image_3d_infos; + } + if (update_set_info->image_cube_write_count) { + auto& image_write = descriptor_writes[descriptor_write_count++]; + image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + image_write.pNext = nullptr; + image_write.dstSet = descriptor_set; + image_write.dstBinding = 4; + image_write.dstArrayElement = 0; + image_write.descriptorCount = update_set_info->image_cube_write_count; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.pImageInfo = update_set_info->image_cube_infos; + } + if (descriptor_write_count) { + vkUpdateDescriptorSets(*device_, descriptor_write_count, descriptor_writes, + 0, nullptr); + } + + return descriptor_set; +} + +bool TextureCache::SetupTextureBindings( + UpdateSetInfo* update_set_info, + const std::vector& bindings) { + bool any_failed = false; + for (auto& binding : bindings) { + uint32_t fetch_bit = 1 << binding.fetch_constant; + if ((update_set_info->has_setup_fetch_mask & fetch_bit) == 0) { + // Needs setup. + any_failed = !SetupTextureBinding(update_set_info, binding) || any_failed; + update_set_info->has_setup_fetch_mask |= fetch_bit; + } + } + return !any_failed; +} + +bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info, + const Shader::TextureBinding& binding) { + auto& regs = *register_file_; + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; + auto group = + reinterpret_cast(®s.values[r]); + auto& fetch = group->texture_fetch; + + // Disabled? + // TODO(benvanik): reset sampler. + if (!fetch.type) { + return true; + } + assert_true(fetch.type == 0x2); + + TextureInfo texture_info; + if (!TextureInfo::Prepare(fetch, &texture_info)) { + XELOGE("Unable to parse texture fetcher info"); + return false; // invalid texture used + } + SamplerInfo sampler_info; + if (!SamplerInfo::Prepare(fetch, binding.fetch_instr, &sampler_info)) { + XELOGE("Unable to parse sampler info"); + return false; // invalid texture used + } + + trace_writer_->WriteMemoryRead(texture_info.guest_address, + texture_info.input_length); + + // TODO(benvanik): reuse. + VkSamplerCreateInfo sampler_create_info; + sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_create_info.pNext = nullptr; + sampler_create_info.flags = 0; + sampler_create_info.magFilter = VK_FILTER_NEAREST; + sampler_create_info.minFilter = VK_FILTER_NEAREST; + sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.mipLodBias = 0.0f; + sampler_create_info.anisotropyEnable = VK_FALSE; + sampler_create_info.maxAnisotropy = 1.0f; + sampler_create_info.compareEnable = VK_FALSE; + sampler_create_info.compareOp = VK_COMPARE_OP_ALWAYS; + sampler_create_info.minLod = 0.0f; + sampler_create_info.maxLod = 0.0f; + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + sampler_create_info.unnormalizedCoordinates = VK_FALSE; + VkSampler sampler; + auto err = vkCreateSampler(*device_, &sampler_create_info, nullptr, &sampler); + CheckResult(err, "vkCreateSampler"); + + auto& sampler_write = + update_set_info->sampler_infos[update_set_info->sampler_write_count++]; + sampler_write.sampler = sampler; + + auto& image_write = + update_set_info->image_2d_infos[update_set_info->image_2d_write_count++]; + image_write.imageView = grid_image_2d_view_; + image_write.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + return true; } void TextureCache::ClearCache() { diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index 34ae6f114..9ba3f3577 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -12,6 +12,7 @@ #include "xenia/gpu/register_file.h" #include "xenia/gpu/shader.h" +#include "xenia/gpu/trace_writer.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan_device.h" @@ -23,7 +24,8 @@ namespace vulkan { // class TextureCache { public: - TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); + TextureCache(RegisterFile* register_file, TraceWriter* trace_writer, + ui::vulkan::VulkanDevice* device); ~TextureCache(); // Descriptor set layout containing all possible texture bindings. @@ -32,6 +34,13 @@ class TextureCache { return texture_descriptor_set_layout_; } + // Prepares a descriptor set containing the samplers and images for all + // bindings. The textures will be uploaded/converted/etc as needed. + VkDescriptorSet PrepareTextureSet( + VkCommandBuffer command_buffer, + const std::vector& vertex_bindings, + const std::vector& pixel_bindings); + // TODO(benvanik): UploadTexture. // TODO(benvanik): Resolve. // TODO(benvanik): ReadTexture. @@ -40,11 +49,42 @@ class TextureCache { void ClearCache(); private: + struct UpdateSetInfo; + + void SetupGridImages(); + + bool SetupTextureBindings( + UpdateSetInfo* update_set_info, + const std::vector& bindings); + bool SetupTextureBinding(UpdateSetInfo* update_set_info, + const Shader::TextureBinding& binding); + RegisterFile* register_file_ = nullptr; - VkDevice device_ = nullptr; + TraceWriter* trace_writer_ = nullptr; + ui::vulkan::VulkanDevice* device_ = nullptr; VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; + + VkDeviceMemory grid_image_2d_memory_ = nullptr; + VkImage grid_image_2d_ = nullptr; + VkImageView grid_image_2d_view_ = nullptr; + + struct UpdateSetInfo { + // Bitmap of all 32 fetch constants and whether they have been setup yet. + // This prevents duplication across the vertex and pixel shader. + uint32_t has_setup_fetch_mask; + uint32_t sampler_write_count = 0; + VkDescriptorImageInfo sampler_infos[32]; + uint32_t image_1d_write_count = 0; + VkDescriptorImageInfo image_1d_infos[32]; + uint32_t image_2d_write_count = 0; + VkDescriptorImageInfo image_2d_infos[32]; + uint32_t image_3d_write_count = 0; + VkDescriptorImageInfo image_3d_infos[32]; + uint32_t image_cube_write_count = 0; + VkDescriptorImageInfo image_cube_infos[32]; + } update_set_info_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 723fa8d07..f04ec1ad3 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -69,7 +69,8 @@ bool VulkanCommandProcessor::SetupContext() { // Initialize the state machine caches. buffer_cache_ = std::make_unique(register_file_, device_, kDefaultBufferCacheCapacity); - texture_cache_ = std::make_unique(register_file_, device_); + texture_cache_ = + std::make_unique(register_file_, &trace_writer_, device_); pipeline_cache_ = std::make_unique( register_file_, device_, buffer_cache_->constant_descriptor_set_layout(), texture_cache_->texture_descriptor_set_layout()); @@ -472,68 +473,18 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - bool any_failed = false; - - // VS and PS samplers are shared, but may be used exclusively. - // We walk each and setup lazily. - bool has_setup_sampler[32] = {false}; - - // Vertex texture samplers. - for (auto& texture_binding : vertex_shader->texture_bindings()) { - if (has_setup_sampler[texture_binding.fetch_constant]) { - continue; - } - has_setup_sampler[texture_binding.fetch_constant] = true; - any_failed = - !PopulateSampler(command_buffer, texture_binding) || any_failed; + auto descriptor_set = texture_cache_->PrepareTextureSet( + command_buffer, vertex_shader->texture_bindings(), + pixel_shader->texture_bindings()); + if (!descriptor_set) { + // Unable to bind set. + return false; } - // Pixel shader texture sampler. - for (auto& texture_binding : pixel_shader->texture_bindings()) { - if (has_setup_sampler[texture_binding.fetch_constant]) { - continue; - } - has_setup_sampler[texture_binding.fetch_constant] = true; - any_failed = - !PopulateSampler(command_buffer, texture_binding) || any_failed; - } - - return !any_failed; -} - -bool VulkanCommandProcessor::PopulateSampler( - VkCommandBuffer command_buffer, - const Shader::TextureBinding& texture_binding) { - auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - texture_binding.fetch_constant * 6; - auto group = reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; - - // Disabled? - // TODO(benvanik): reset sampler. - if (!fetch.type) { - return true; - } - assert_true(fetch.type == 0x2); - - TextureInfo texture_info; - if (!TextureInfo::Prepare(fetch, &texture_info)) { - XELOGE("Unable to parse texture fetcher info"); - return true; // invalid texture used - } - SamplerInfo sampler_info; - if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr, - &sampler_info)) { - XELOGE("Unable to parse sampler info"); - return true; // invalid texture used - } - - trace_writer_.WriteMemoryRead(texture_info.guest_address, - texture_info.input_length); - - // TODO(benvanik): texture cache lookup. - // TODO(benvanik): bind or return so PopulateSamplers can batch. + // Bind samplers/textures. + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_cache_->pipeline_layout(), 1, 1, + &descriptor_set, 0, nullptr); return true; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 179c31a73..43aec9edd 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -76,8 +76,6 @@ class VulkanCommandProcessor : public CommandProcessor { bool PopulateSamplers(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader); - bool PopulateSampler(VkCommandBuffer command_buffer, - const Shader::TextureBinding& texture_binding); bool IssueCopy() override; xe::ui::vulkan::VulkanDevice* device_ = nullptr; diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index aa9c84c72..23dffd6c6 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -278,7 +278,8 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context) sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; sampler_info.mipLodBias = 0.0f; sampler_info.anisotropyEnable = VK_FALSE; - sampler_info.maxAnisotropy = 1; + sampler_info.maxAnisotropy = 1.0f; + sampler_info.compareEnable = VK_FALSE; sampler_info.compareOp = VK_COMPARE_OP_NEVER; sampler_info.minLod = 0.0f; sampler_info.maxLod = 0.0f; From 5e31c51a3dde601b6aa5f79351469984cea64668 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 21 Feb 2016 20:03:51 -0800 Subject: [PATCH 063/145] Textures are now in descriptor set 1. --- src/xenia/gpu/spirv_shader_translator.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 8c2057242..ea5c176c8 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -194,13 +194,13 @@ void SpirvShaderTranslator::StartTranslation() { img_s[i], xe::format_string("images%dD", i + 1).c_str()); b.addDecoration(img_[i], spv::Decoration::DecorationBlock); - b.addDecoration(img_[i], spv::Decoration::DecorationDescriptorSet, 2); + b.addDecoration(img_[i], spv::Decoration::DecorationDescriptorSet, 1); b.addDecoration(img_[i], spv::Decoration::DecorationBinding, i + 1); } samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant, samplers_s, "samplers"); b.addDecoration(samplers_, spv::Decoration::DecorationBlock); - b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 2); + b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); // Interpolators. From 06d5a286156c8b67611ca64855ff39670e2b4eaa Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 22:27:23 -0600 Subject: [PATCH 064/145] Fix texture fetches --- src/xenia/gpu/spirv_shader_translator.cc | 49 +++++++++++++++++------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index ea5c176c8..a9882bb4c 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -620,17 +620,33 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( uint32_t dim_idx = 0; switch (instr.dimension) { case TextureDimension::k1D: + src = b.createCompositeExtract(src, float_type_, 0); dim_idx = 0; break; - case TextureDimension::k2D: + case TextureDimension::k2D: { + auto s0 = b.createCompositeExtract(src, float_type_, 0); + auto s1 = b.createCompositeExtract(src, float_type_, 1); + src = b.createCompositeConstruct(vec2_float_type_, + std::vector({s0, s1})); dim_idx = 1; - break; - case TextureDimension::k3D: + } break; + case TextureDimension::k3D: { + auto s0 = b.createCompositeExtract(src, float_type_, 0); + auto s1 = b.createCompositeExtract(src, float_type_, 1); + auto s2 = b.createCompositeExtract(src, float_type_, 2); + src = b.createCompositeConstruct(vec3_float_type_, + std::vector({s0, s1, s2})); dim_idx = 2; - break; - case TextureDimension::kCube: + } break; + case TextureDimension::kCube: { + auto s0 = b.createCompositeExtract(src, float_type_, 0); + auto s1 = b.createCompositeExtract(src, float_type_, 1); + auto s2 = b.createCompositeExtract(src, float_type_, 2); + auto s3 = b.createCompositeExtract(src, float_type_, 3); + src = b.createCompositeConstruct(vec4_float_type_, + std::vector({s0, s1, s2, s3})); dim_idx = 3; - break; + } break; default: assert_unhandled_case(instr.dimension); } @@ -638,19 +654,24 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( switch (instr.opcode) { case FetchOpcode::kTextureFetch: { auto image_index = b.makeUintConstant(instr.operands[1].storage_index); - auto image_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - img_[dim_idx], std::vector({image_index})); - auto sampler_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - samplers_, std::vector({image_index})); + auto image_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniformConstant, img_[dim_idx], + std::vector({b.makeUintConstant(0), image_index})); + auto sampler_ptr = b.createAccessChain( + spv::StorageClass::StorageClassUniformConstant, samplers_, + std::vector({b.makeUintConstant(0), image_index})); auto image = b.createLoad(image_ptr); auto sampler = b.createLoad(sampler_ptr); auto tex = b.createBinOp(spv::Op::OpSampledImage, b.getImageType(image), image, sampler); - dest = b.createBinOp(spv::Op::OpImageSampleImplicitLod, vec4_float_type_, - tex, src); + + spv::Builder::TextureParameters params = {0}; + params.coords = src; + params.sampler = sampler; + dest = b.createTextureCall(spv::Decoration::DecorationInvariant, + vec4_float_type_, false, false, false, false, + false, params); } break; default: // TODO: the rest of these From 08a287d2ad20395ab71ae79df16709e92e65a3bc Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 21 Feb 2016 22:30:31 -0600 Subject: [PATCH 065/145] Whoops --- src/xenia/gpu/spirv_shader_translator.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index a9882bb4c..bdd4c7e97 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -639,12 +639,6 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( dim_idx = 2; } break; case TextureDimension::kCube: { - auto s0 = b.createCompositeExtract(src, float_type_, 0); - auto s1 = b.createCompositeExtract(src, float_type_, 1); - auto s2 = b.createCompositeExtract(src, float_type_, 2); - auto s3 = b.createCompositeExtract(src, float_type_, 3); - src = b.createCompositeConstruct(vec4_float_type_, - std::vector({s0, s1, s2, s3})); dim_idx = 3; } break; default: From 6109e0b03aa604f2be5e3128402d98f8d46fdaff Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 25 Feb 2016 17:41:41 -0600 Subject: [PATCH 066/145] Fix incorrect images/samplers definitions ps_param_gen and fix interpolators being copied incorrectly --- src/xenia/gpu/spirv_shader_translator.cc | 112 ++++++++++++++--------- src/xenia/gpu/spirv_shader_translator.h | 1 - 2 files changed, 69 insertions(+), 44 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index bdd4c7e97..2cf137b43 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -164,6 +164,7 @@ void SpirvShaderTranslator::StartTranslation() { push_constants_type, "push_consts"); // Texture bindings + Id samplers_t = b.makeSamplerType(); Id img_t[] = { b.makeImageType(float_type_, spv::Dim::Dim1D, false, false, false, 1, spv::ImageFormat::ImageFormatUnknown), @@ -173,35 +174,24 @@ void SpirvShaderTranslator::StartTranslation() { spv::ImageFormat::ImageFormatUnknown), b.makeImageType(float_type_, spv::Dim::DimCube, false, false, false, 1, spv::ImageFormat::ImageFormatUnknown)}; - Id samplers_t = b.makeSamplerType(); + Id samplers_a = b.makeArrayType(samplers_t, b.makeUintConstant(32), 0); Id img_a_t[] = {b.makeArrayType(img_t[0], b.makeUintConstant(32), 0), b.makeArrayType(img_t[1], b.makeUintConstant(32), 0), b.makeArrayType(img_t[2], b.makeUintConstant(32), 0), b.makeArrayType(img_t[3], b.makeUintConstant(32), 0)}; - Id samplers_a = b.makeArrayType(samplers_t, b.makeUintConstant(32), 0); - - Id img_s[] = { - b.makeStructType({img_a_t[0]}, "img1D_type"), - b.makeStructType({img_a_t[1]}, "img2D_type"), - b.makeStructType({img_a_t[2]}, "img3D_type"), - b.makeStructType({img_a_t[3]}, "imgCube_type"), - }; - Id samplers_s = b.makeStructType({samplers_a}, "samplers_type"); + samplers_ = b.createVariable(spv::StorageClass::StorageClassUniform, + samplers_a, "samplers"); + b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); + b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); for (int i = 0; i < 4; i++) { - img_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, - img_s[i], - xe::format_string("images%dD", i + 1).c_str()); - b.addDecoration(img_[i], spv::Decoration::DecorationBlock); + img_[i] = + b.createVariable(spv::StorageClass::StorageClassUniform, img_a_t[i], + xe::format_string("images%dD", i + 1).c_str()); b.addDecoration(img_[i], spv::Decoration::DecorationDescriptorSet, 1); b.addDecoration(img_[i], spv::Decoration::DecorationBinding, i + 1); } - samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant, - samplers_s, "samplers"); - b.addDecoration(samplers_, spv::Decoration::DecorationBlock); - b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); - b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); // Interpolators. Id interpolators_type = @@ -255,7 +245,6 @@ void SpirvShaderTranslator::StartTranslation() { interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, interpolators_type, "interpolators"); - b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective); b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); pos_ = b.createVariable(spv::StorageClass::StorageClassOutput, @@ -266,22 +255,68 @@ void SpirvShaderTranslator::StartTranslation() { // Pixel inputs from vertex shader. interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput, interpolators_type, "interpolators"); - b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective); b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); // Pixel fragment outputs (one per render target). Id frag_outputs_type = b.makeArrayType(vec4_float_type_, b.makeUintConstant(4), 0); frag_outputs_ = b.createVariable(spv::StorageClass::StorageClassOutput, - frag_outputs_type, "o"); + frag_outputs_type, "oC"); b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0); // TODO(benvanik): frag depth, etc. // Copy interpolators to r[0..16]. - b.createNoResultOp(spv::Op::OpCopyMemorySized, - {registers_ptr_, interpolators_, - b.makeUintConstant(16 * 4 * sizeof(float))}); + // TODO: Need physical addressing in order to do this. + // b.createNoResultOp(spv::Op::OpCopyMemorySized, + // {registers_ptr_, interpolators_, + // b.makeUintConstant(16 * 4 * sizeof(float))}); + for (int i = 0; i < 16; i++) { + // For now, copy interpolators register-by-register :/ + auto idx = b.makeUintConstant(i); + auto i_a = b.createAccessChain(spv::StorageClass::StorageClassInput, + interpolators_, std::vector({idx})); + auto r_a = b.createAccessChain(spv::StorageClass::StorageClassFunction, + registers_ptr_, std::vector({idx})); + b.createNoResultOp(spv::Op::OpCopyMemory, std::vector({r_a, i_a})); + } + + // Setup ps_param_gen + auto ps_param_gen_idx_ptr = b.createAccessChain( + spv::StorageClass::StorageClassPushConstant, push_consts_, + std::vector({b.makeUintConstant(3)})); + auto ps_param_gen_idx = b.createLoad(ps_param_gen_idx_ptr); + + auto frag_coord = b.createVariable(spv::StorageClass::StorageClassInput, + vec4_float_type_, "gl_FragCoord"); + b.addDecoration(frag_coord, spv::Decoration::DecorationBuiltIn, + spv::BuiltIn::BuiltInFragCoord); + + auto point_coord = b.createVariable(spv::StorageClass::StorageClassInput, + vec2_float_type_, "gl_PointCoord"); + b.addDecoration(point_coord, spv::Decoration::DecorationBuiltIn, + spv::BuiltIn::BuiltInPointCoord); + auto param = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, + {frag_coord, point_coord, 0, 1, 4, 5}); + /* + // TODO: gl_FrontFacing + auto param_x = b.createCompositeExtract(param, float_type_, 0); + auto param_x_inv = b.createBinOp(spv::Op::OpFMul, float_type_, param_x, + b.makeFloatConstant(-1.f)); + param_x = b.createCompositeInsert(param_x_inv, param, vec4_float_type_, 0); + */ + + auto cond = b.createBinOp(spv::Op::OpINotEqual, bool_type_, + ps_param_gen_idx, b.makeUintConstant(-1)); + spv::Builder::If ifb(cond, b); + + // Index is specified + auto reg_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction, + registers_ptr_, + std::vector({ps_param_gen_idx})); + b.createStore(param, reg_ptr); + + ifb.makeEndIf(); } } @@ -620,22 +655,12 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( uint32_t dim_idx = 0; switch (instr.dimension) { case TextureDimension::k1D: - src = b.createCompositeExtract(src, float_type_, 0); dim_idx = 0; break; case TextureDimension::k2D: { - auto s0 = b.createCompositeExtract(src, float_type_, 0); - auto s1 = b.createCompositeExtract(src, float_type_, 1); - src = b.createCompositeConstruct(vec2_float_type_, - std::vector({s0, s1})); dim_idx = 1; } break; case TextureDimension::k3D: { - auto s0 = b.createCompositeExtract(src, float_type_, 0); - auto s1 = b.createCompositeExtract(src, float_type_, 1); - auto s2 = b.createCompositeExtract(src, float_type_, 2); - src = b.createCompositeConstruct(vec3_float_type_, - std::vector({s0, s1, s2})); dim_idx = 2; } break; case TextureDimension::kCube: { @@ -648,21 +673,22 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( switch (instr.opcode) { case FetchOpcode::kTextureFetch: { auto image_index = b.makeUintConstant(instr.operands[1].storage_index); - auto image_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniformConstant, img_[dim_idx], - std::vector({b.makeUintConstant(0), image_index})); - auto sampler_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniformConstant, samplers_, - std::vector({b.makeUintConstant(0), image_index})); + auto image_ptr = + b.createAccessChain(spv::StorageClass::StorageClassUniform, + img_[dim_idx], std::vector({image_index})); + auto sampler_ptr = + b.createAccessChain(spv::StorageClass::StorageClassUniform, samplers_, + std::vector({image_index})); auto image = b.createLoad(image_ptr); auto sampler = b.createLoad(sampler_ptr); - auto tex = b.createBinOp(spv::Op::OpSampledImage, b.getImageType(image), + auto sampled_image_type = b.makeSampledImageType(b.getImageType(image)); + auto tex = b.createBinOp(spv::Op::OpSampledImage, sampled_image_type, image, sampler); spv::Builder::TextureParameters params = {0}; params.coords = src; - params.sampler = sampler; + params.sampler = tex; dest = b.createTextureCall(spv::Decoration::DecorationInvariant, vec4_float_type_, false, false, false, false, false, params); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 0d8b1e14c..ed4356322 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -108,7 +108,6 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; spv::Id vec4_uint_type_ = 0; spv::Id vec4_bool_type_ = 0; - spv::Id sampled_image_type_ = 0; // Constants. spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; From c648e545395a6bb4fac62c9f0a68af200a787b7b Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 27 Feb 2016 11:30:50 -0600 Subject: [PATCH 067/145] Short-circuit draw calls if the render target's pitch is 0 --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index f04ec1ad3..1bd05f16a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -178,6 +178,11 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return IssueCopy(); } + if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { + // Doesn't actually draw. + return true; + } + // TODO(benvanik): move to CP or to host (trace dump, etc). if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { device_->BeginRenderDocFrameCapture(); From 48cf270724aaa897a90ccdf62257c59d4a12f23b Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 27 Feb 2016 16:21:37 -0600 Subject: [PATCH 068/145] Use spv::NoPrecision instead of DecorationInvariant Set samplers/images as uniform constants --- src/xenia/gpu/spirv_shader_translator.cc | 167 +++++++++++------------ 1 file changed, 78 insertions(+), 89 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 2cf137b43..a45294415 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -48,9 +48,9 @@ void SpirvShaderTranslator::StartTranslation() { } spv::Block* function_block = nullptr; - translated_main_ = b.makeFunctionEntry(spv::Decoration::DecorationInvariant, - b.makeVoidType(), "translated_main", - {}, {}, &function_block); + translated_main_ = + b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main", + {}, {}, &function_block); bool_type_ = b.makeBoolType(); float_type_ = b.makeFloatType(32); @@ -181,14 +181,14 @@ void SpirvShaderTranslator::StartTranslation() { b.makeArrayType(img_t[2], b.makeUintConstant(32), 0), b.makeArrayType(img_t[3], b.makeUintConstant(32), 0)}; - samplers_ = b.createVariable(spv::StorageClass::StorageClassUniform, + samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant, samplers_a, "samplers"); b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); for (int i = 0; i < 4; i++) { - img_[i] = - b.createVariable(spv::StorageClass::StorageClassUniform, img_a_t[i], - xe::format_string("images%dD", i + 1).c_str()); + img_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, + img_a_t[i], + xe::format_string("images%dD", i + 1).c_str()); b.addDecoration(img_[i], spv::Decoration::DecorationDescriptorSet, 1); b.addDecoration(img_[i], spv::Decoration::DecorationBinding, i + 1); } @@ -264,6 +264,11 @@ void SpirvShaderTranslator::StartTranslation() { frag_outputs_type, "oC"); b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0); + Id frag_depth = b.createVariable(spv::StorageClass::StorageClassOutput, + vec4_float_type_, "gl_FragDepth"); + b.addDecoration(frag_depth, spv::Decoration::DecorationBuiltIn, + spv::BuiltIn::BuiltInFragDepth); + // TODO(benvanik): frag depth, etc. // Copy interpolators to r[0..16]. @@ -365,8 +370,7 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w, p_w_inv); // pos.xyz = vtx_fmt.xyz != 0.0 ? pos.xyz / pos.w : pos.xyz - auto p_all_w = b.smearScalar(spv::Decoration::DecorationInvariant, p_w, - vec4_float_type_); + auto p_all_w = b.smearScalar(spv::NoPrecision, p_w, vec4_float_type_); auto p_inv = b.createBinOp(spv::Op::OpFDiv, vec4_float_type_, p, p_all_w); p = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, p_inv, p); @@ -654,9 +658,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( uint32_t dim_idx = 0; switch (instr.dimension) { - case TextureDimension::k1D: + case TextureDimension::k1D: { dim_idx = 0; - break; + } break; case TextureDimension::k2D: { dim_idx = 1; } break; @@ -674,13 +678,15 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( case FetchOpcode::kTextureFetch: { auto image_index = b.makeUintConstant(instr.operands[1].storage_index); auto image_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniform, + b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, img_[dim_idx], std::vector({image_index})); auto sampler_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniform, samplers_, - std::vector({image_index})); + b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, + samplers_, std::vector({image_index})); auto image = b.createLoad(image_ptr); auto sampler = b.createLoad(sampler_ptr); + assert(b.isImageType(b.getTypeId(image))); + assert(b.isSamplerType(b.getTypeId(sampler))); auto sampled_image_type = b.makeSampledImageType(b.getImageType(image)); auto tex = b.createBinOp(spv::Op::OpSampledImage, sampled_image_type, @@ -689,9 +695,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( spv::Builder::TextureParameters params = {0}; params.coords = src; params.sampler = tex; - dest = b.createTextureCall(spv::Decoration::DecorationInvariant, - vec4_float_type_, false, false, false, false, - false, params); + dest = b.createTextureCall(spv::NoPrecision, vec4_float_type_, false, + false, false, false, false, params); } break; default: // TODO: the rest of these @@ -780,15 +785,15 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( } break; case AluVectorOpcode::kFloor: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, vec4_float_type_, - spv::GLSLstd450::kFloor, {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, + spv::GLSLstd450::kFloor, + {sources[0]}); } break; case AluVectorOpcode::kFrc: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, vec4_float_type_, - spv::GLSLstd450::kFract, {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, + spv::GLSLstd450::kFract, + {sources[0]}); } break; case AluVectorOpcode::kKillEq: { @@ -883,27 +888,26 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( b.makeFloatConstant(0.5f)); addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); addr = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, int_type_, - spv::GLSLstd450::kSClamp, + spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); b.createStore(addr, a0_); // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, vec4_float_type_, - spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, + spv::GLSLstd450::kFMax, + {sources[0], sources[1]}); } break; case AluVectorOpcode::kMax: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, vec4_float_type_, - spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, + spv::GLSLstd450::kFMax, + {sources[0], sources[1]}); } break; case AluVectorOpcode::kMin: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, vec4_float_type_, - spv::GLSLstd450::kFMin, {sources[0], sources[1]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, + spv::GLSLstd450::kFMin, + {sources[0], sources[1]}); } break; case AluVectorOpcode::kMul: { @@ -928,8 +932,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, - vec4_float_type_); + auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, vec4_float_zero_, s0); @@ -952,8 +955,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, - vec4_float_type_); + auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, vec4_float_zero_, s0); @@ -976,8 +978,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, - vec4_float_type_); + auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, vec4_float_zero_, s0); @@ -1000,8 +1001,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::Decoration::DecorationInvariant, s0_x, - vec4_float_type_); + auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, vec4_float_zero_, s0); @@ -1040,9 +1040,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( } break; case AluVectorOpcode::kTrunc: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, vec4_float_type_, - GLSLstd450::kTrunc, {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, + GLSLstd450::kTrunc, {sources[0]}); } break; default: @@ -1124,27 +1123,23 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kCos: { // dest = cos(src0) - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kCos, - {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kCos, {sources[0]}); } break; case AluScalarOpcode::kExp: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kExp2, - {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kExp2, {sources[0]}); } break; case AluScalarOpcode::kFloors: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFloor, - {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kFloor, {sources[0]}); } break; case AluScalarOpcode::kFrcs: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFract, - {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kFract, {sources[0]}); } break; case AluScalarOpcode::kKillsEq: { @@ -1239,23 +1234,21 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kLog: { auto log = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, - spv::GLSLstd450::kLog2, {sources[0]}); + spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]}); } break; case AluScalarOpcode::kMaxAsf: { auto addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, sources[0]); addr = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, int_type_, - spv::GLSLstd450::kSClamp, + spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); b.createStore(addr, a0_); // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, - spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + spv::GLSLstd450::kFMax, + {sources[0], sources[1]}); } break; case AluScalarOpcode::kMaxAs: { @@ -1264,29 +1257,28 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.makeFloatConstant(0.5f)); addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); addr = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, int_type_, - spv::GLSLstd450::kSClamp, + spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); b.createStore(addr, a0_); // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, - spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + spv::GLSLstd450::kFMax, + {sources[0], sources[1]}); } break; case AluScalarOpcode::kMaxs: { // dest = max(src0, src1) - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMax, - {sources[0], sources[1]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kFMax, + {sources[0], sources[1]}); } break; case AluScalarOpcode::kMins: { // dest = min(src0, src1) - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMin, - {sources[0], sources[1]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kFMin, + {sources[0], sources[1]}); } break; case AluScalarOpcode::kMuls: @@ -1326,8 +1318,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); auto d = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, vec4_float_type_, - spv::GLSLstd450::kInverseSqrt, {sources[0]}); + spv::NoPrecision, vec4_float_type_, spv::GLSLstd450::kInverseSqrt, + {sources[0]}); dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, b.makeFloatConstant(0.f), d); } break; @@ -1439,7 +1431,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.createStore(c, p0_); dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMax, + spv::NoPrecision, float_type_, GLSLstd450::kFMax, {sources[0], b.makeFloatConstant(0.f)}); } break; @@ -1451,9 +1443,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kSin: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kSin, - {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kSin, {sources[0]}); } break; case AluScalarOpcode::kSubs: @@ -1468,9 +1459,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kTruncs: { - dest = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kTrunc, - {sources[0]}); + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kTrunc, {sources[0]}); } break; default: @@ -1570,8 +1560,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { if (op.is_absolute_value) { storage_value = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, storage_type, GLSLstd450::kFAbs, - {storage_value}); + spv::NoPrecision, storage_type, GLSLstd450::kFAbs, {storage_value}); } if (op.is_negated) { storage_value = @@ -1739,14 +1728,14 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, constituents.push_back(b.makeFloatConstant(0.f)); } - source_value_id = b.createConstructor(spv::Decoration::DecorationInvariant, - constituents, storage_type); + source_value_id = + b.createConstructor(spv::NoPrecision, constituents, storage_type); } // Clamp the input value. if (result.is_clamped) { source_value_id = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, b.getTypeId(source_value_id), + spv::NoPrecision, b.getTypeId(source_value_id), spv::GLSLstd450::kFClamp, {source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)}); } From 740c70f270c4654d3b4cf30dc55d888f45866209 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 1 Mar 2016 12:52:34 -0600 Subject: [PATCH 069/145] Scalar logc, fix log --- src/xenia/gpu/spirv_shader_translator.cc | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index a45294415..600e3fe56 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -729,6 +729,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; + // TODO: If we have identical operands, reuse previous one. Id sources[3] = {0}; Id dest = 0; for (size_t i = 0; i < instr.operand_count; i++) { @@ -899,12 +900,24 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( } break; case AluVectorOpcode::kMax: { + if (sources[0] == sources[1]) { + // mov dst, src + dest = sources[0]; + break; + } + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, spv::GLSLstd450::kFMax, {sources[0], sources[1]}); } break; case AluVectorOpcode::kMin: { + if (sources[0] == sources[1]) { + // mov dst, src + dest = sources[0]; + break; + } + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, spv::GLSLstd450::kFMin, {sources[0], sources[1]}); @@ -1065,6 +1078,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; + // TODO: If we have identical operands, reuse previous one. Id sources[3] = {0}; Id dest = 0; for (size_t i = 0, x = 0; i < instr.operand_count; i++) { @@ -1230,10 +1244,17 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kLogc: { + auto t = CreateGlslStd450InstructionCall( + spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]}); + + // FIXME: We don't check to see if t == -INF, we just check for INF + auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, t); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, + b.makeFloatConstant(-FLT_MAX), t); } break; case AluScalarOpcode::kLog: { - auto log = CreateGlslStd450InstructionCall( + dest = CreateGlslStd450InstructionCall( spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]}); } break; From 38094ac81955170816b776202299c95ad971fe57 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 2 Mar 2016 21:16:38 -0600 Subject: [PATCH 070/145] Updated local clang-format. --- src/xenia/gpu/shader_translator.cc | 4 ++-- src/xenia/gpu/spirv_shader_translator.cc | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index a89be80f5..6e8b69cea 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -672,11 +672,11 @@ void ShaderTranslator::TranslateExecInstructions( static_cast(ucode_dwords_[instr_offset * 3] & 0x1F); if (fetch_opcode == FetchOpcode::kVertexFetch) { auto& op = *reinterpret_cast( - ucode_dwords_ + instr_offset * 3); + ucode_dwords_ + instr_offset * 3); TranslateVertexFetchInstruction(op); } else { auto& op = *reinterpret_cast( - ucode_dwords_ + instr_offset * 3); + ucode_dwords_ + instr_offset * 3); TranslateTextureFetchInstruction(op); } } else { diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 600e3fe56..57af04e24 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -237,9 +237,8 @@ void SpirvShaderTranslator::StartTranslation() { b.addDecoration(attrib_var, spv::Decoration::DecorationLocation, attrib.attrib_index); - vertex_binding_map_[binding.fetch_constant][attrib.fetch_instr - .attributes.offset] = - attrib_var; + vertex_binding_map_[binding.fetch_constant] + [attrib.fetch_instr.attributes.offset] = attrib_var; } } @@ -636,9 +635,8 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( // Operand 0 is the index // Operand 1 is the binding // TODO: Indexed fetch - auto vertex_ptr = - vertex_binding_map_[instr.operands[1].storage_index][instr.attributes - .offset]; + auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index] + [instr.attributes.offset]; assert_not_zero(vertex_ptr); auto vertex = b.createLoad(vertex_ptr); From 8ca9c6f6f4f6acfcf8b593290deaa858a069e6c7 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 3 Mar 2016 20:11:23 -0600 Subject: [PATCH 071/145] Fix spirv-tools incorrect includes --- third_party/spirv-tools.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/spirv-tools.lua b/third_party/spirv-tools.lua index 4218ff08e..afa3cdef5 100644 --- a/third_party/spirv-tools.lua +++ b/third_party/spirv-tools.lua @@ -13,9 +13,9 @@ project("spirv-tools") "spirv-tools/include", }) files({ - "spirv-tools/external/include/headers/GLSL.std.450.h", - "spirv-tools/external/include/headers/OpenCL.std.h", - "spirv-tools/external/include/headers/spirv.h", + "spirv-tools/include/spirv/GLSL.std.450.h", + "spirv-tools/include/spirv/OpenCL.std.h", + "spirv-tools/include/spirv/spirv.h", "spirv-tools/include/spirv-tools/libspirv.h", "spirv-tools/source/assembly_grammar.cpp", "spirv-tools/source/assembly_grammar.h", From af7fc20c38f6e3f6cbbd013f575bcbecc320667a Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 5 Mar 2016 22:09:18 -0600 Subject: [PATCH 072/145] Beginnings of texture conversion/uploads --- src/xenia/gpu/vulkan/texture_cache.cc | 359 ++++++++++++++---- src/xenia/gpu/vulkan/texture_cache.h | 59 ++- .../gpu/vulkan/vulkan_command_processor.cc | 82 +++- .../gpu/vulkan/vulkan_command_processor.h | 8 +- 4 files changed, 405 insertions(+), 103 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 4e93a46ca..8a8e2e2f4 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -81,83 +81,304 @@ TextureCache::TextureCache(RegisterFile* register_file, nullptr, &texture_descriptor_set_layout_); CheckResult(err, "vkCreateDescriptorSetLayout"); - SetupGridImages(); + // Allocate memory for a staging buffer. + VkBufferCreateInfo staging_buffer_info; + staging_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + staging_buffer_info.pNext = nullptr; + staging_buffer_info.flags = 0; + staging_buffer_info.size = 2048 * 2048 * 4; // 16MB buffer + staging_buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + staging_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + staging_buffer_info.queueFamilyIndexCount = 0; + staging_buffer_info.pQueueFamilyIndices = nullptr; + err = + vkCreateBuffer(*device_, &staging_buffer_info, nullptr, &staging_buffer_); + CheckResult(err, "vkCreateBuffer"); + + if (err == VK_SUCCESS) { + VkMemoryRequirements staging_buffer_reqs; + vkGetBufferMemoryRequirements(*device_, staging_buffer_, + &staging_buffer_reqs); + staging_buffer_mem_ = device_->AllocateMemory(staging_buffer_reqs); + assert_not_null(staging_buffer_mem_); + + err = vkBindBufferMemory(*device_, staging_buffer_, staging_buffer_mem_, 0); + CheckResult(err, "vkBindBufferMemory"); + + // Upload a grid into the staging buffer. + uint32_t* gpu_data = nullptr; + err = + vkMapMemory(*device_, staging_buffer_mem_, 0, staging_buffer_info.size, + 0, reinterpret_cast(&gpu_data)); + CheckResult(err, "vkMapMemory"); + + int width = 2048; + int height = 2048; + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + gpu_data[y * width + x] = + ((y % 32 < 16) ^ (x % 32 >= 16)) ? 0xFF0000FF : 0xFFFFFFFF; + } + } + + vkUnmapMemory(*device_, staging_buffer_mem_); + } } TextureCache::~TextureCache() { - vkDestroyImageView(*device_, grid_image_2d_view_, nullptr); - vkDestroyImage(*device_, grid_image_2d_, nullptr); - vkFreeMemory(*device_, grid_image_2d_memory_, nullptr); - vkDestroyDescriptorSetLayout(*device_, texture_descriptor_set_layout_, nullptr); vkDestroyDescriptorPool(*device_, descriptor_pool_, nullptr); } -void TextureCache::SetupGridImages() { - VkImageCreateInfo image_info; +TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, + VkCommandBuffer command_buffer) { + // Run a tight loop to scan for an existing texture. + auto texture_hash = texture_info.hash(); + for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { + if (it->second->texture_info == texture_info) { + return it->second.get(); + } + } + + // Though we didn't find an exact match, that doesn't mean we're out of the + // woods yet. This texture could either be a portion of another texture or + // vice versa. Check for overlap before uploading. + for (auto it = textures_.begin(); it != textures_.end(); ++it) { + } + + if (!command_buffer) { + // Texture not found and no command buffer was passed allowing us to upload + // a new one. + return nullptr; + } + + // Create a new texture and cache it. + auto texture = AllocateTexture(texture_info); + if (!texture) { + // Failed to allocate texture (out of memory?) + assert_always(); + return nullptr; + } + + if (!UploadTexture2D(command_buffer, texture, texture_info)) { + // TODO: Destroy the texture. + assert_always(); + return nullptr; + } + + textures_[texture_hash] = std::unique_ptr(texture); + + return texture; +} + +TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { + auto sampler_hash = sampler_info.hash(); + for (auto it = samplers_.find(sampler_hash); it != samplers_.end(); ++it) { + if (it->second->sampler_info == sampler_info) { + // Found a compatible sampler. + return it->second.get(); + } + } + + VkResult status = VK_SUCCESS; + + // Create a new sampler and cache it. + // TODO: Actually set the properties + VkSamplerCreateInfo sampler_create_info; + sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_create_info.pNext = nullptr; + sampler_create_info.flags = 0; + sampler_create_info.magFilter = VK_FILTER_NEAREST; + sampler_create_info.minFilter = VK_FILTER_NEAREST; + sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.mipLodBias = 0.0f; + sampler_create_info.anisotropyEnable = VK_FALSE; + sampler_create_info.maxAnisotropy = 1.0f; + sampler_create_info.compareEnable = VK_FALSE; + sampler_create_info.compareOp = VK_COMPARE_OP_ALWAYS; + sampler_create_info.minLod = 0.0f; + sampler_create_info.maxLod = 0.0f; + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + sampler_create_info.unnormalizedCoordinates = VK_FALSE; + VkSampler vk_sampler; + status = + vkCreateSampler(*device_, &sampler_create_info, nullptr, &vk_sampler); + CheckResult(status, "vkCreateSampler"); + if (status != VK_SUCCESS) { + return nullptr; + } + + auto sampler = new Sampler(); + sampler->sampler = vk_sampler; + sampler->sampler_info = sampler_info; + samplers_[sampler_hash] = std::unique_ptr(sampler); + + return sampler; +} + +TextureCache::Texture* TextureCache::AllocateTexture(TextureInfo texture_info) { + // Create an image first. + VkImageCreateInfo image_info = {}; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = nullptr; - image_info.flags = 0; - image_info.imageType = VK_IMAGE_TYPE_2D; + switch (texture_info.dimension) { + case Dimension::k1D: + image_info.imageType = VK_IMAGE_TYPE_1D; + break; + case Dimension::k2D: + image_info.imageType = VK_IMAGE_TYPE_2D; + break; + case Dimension::k3D: + image_info.imageType = VK_IMAGE_TYPE_3D; + break; + case Dimension::kCube: + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + break; + default: + assert_unhandled_case(texture_info.dimension); + return nullptr; + } + + // TODO: Format image_info.format = VK_FORMAT_R8G8B8A8_UNORM; - image_info.extent = {8, 8, 1}; + image_info.extent = {texture_info.width + 1, texture_info.height + 1, + texture_info.depth + 1}; image_info.mipLevels = 1; image_info.arrayLayers = 1; image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.tiling = VK_IMAGE_TILING_LINEAR; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.queueFamilyIndexCount = 0; image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - auto err = vkCreateImage(*device_, &image_info, nullptr, &grid_image_2d_); + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkImage image; + auto err = vkCreateImage(*device_, &image_info, nullptr, &image); CheckResult(err, "vkCreateImage"); - VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(*device_, grid_image_2d_, &memory_requirements); - grid_image_2d_memory_ = device_->AllocateMemory( - memory_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - err = vkBindImageMemory(*device_, grid_image_2d_, grid_image_2d_memory_, 0); + VkMemoryRequirements mem_requirements; + vkGetImageMemoryRequirements(*device_, image, &mem_requirements); + + // TODO: Use a circular buffer or something else to allocate this memory. + // The device has a limited amount (around 64) of memory allocations that we + // can make. + // Now that we have the size, back the image with GPU memory. + auto memory = device_->AllocateMemory(mem_requirements, 0); + err = vkBindImageMemory(*device_, image, memory, 0); CheckResult(err, "vkBindImageMemory"); + auto texture = new Texture(); + texture->format = image_info.format; + texture->image = image; + texture->memory_offset = 0; + texture->memory_size = mem_requirements.size; + texture->texture_info = texture_info; + texture->texture_memory = memory; + + // Create a default view, just for kicks. VkImageViewCreateInfo view_info; view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; view_info.pNext = nullptr; view_info.flags = 0; - view_info.image = grid_image_2d_; + view_info.image = image; view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - view_info.format = VK_FORMAT_R8G8B8A8_UNORM; + view_info.format = image_info.format; view_info.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, }; view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - err = vkCreateImageView(*device_, &view_info, nullptr, &grid_image_2d_view_); + VkImageView view; + err = vkCreateImageView(*device_, &view_info, nullptr, &view); CheckResult(err, "vkCreateImageView"); - - VkImageSubresource subresource; - subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subresource.mipLevel = 0; - subresource.arrayLayer = 0; - VkSubresourceLayout layout; - vkGetImageSubresourceLayout(*device_, grid_image_2d_, &subresource, &layout); - - void* gpu_data = nullptr; - err = vkMapMemory(*device_, grid_image_2d_memory_, 0, layout.size, 0, - &gpu_data); - CheckResult(err, "vkMapMemory"); - - uint32_t grid_pixels[8 * 8]; - for (int y = 0; y < 8; ++y) { - for (int x = 0; x < 8; ++x) { - grid_pixels[y * 8 + x] = - ((y % 2 == 0) ^ (x % 2 != 0)) ? 0xFFFFFFFF : 0xFF0000FF; - } + if (err == VK_SUCCESS) { + auto texture_view = std::make_unique(); + texture_view->texture = texture; + texture_view->view = view; + texture->views.push_back(std::move(texture_view)); } - std::memcpy(gpu_data, grid_pixels, sizeof(grid_pixels)); - vkUnmapMemory(*device_, grid_image_2d_memory_); + return texture; +} + +bool TextureCache::FreeTexture(Texture* texture) { + // TODO(DrChat) + return false; +} + +bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, + Texture* dest, TextureInfo src) { + // TODO: We need to allocate memory to use as a staging buffer. We can then + // raw copy the texture from system memory into the staging buffer and use a + // shader to convert the texture into a format consumable by the host GPU. + + // Need to have unique memory for every upload for at least one frame. If we + // run out of memory, we need to flush all queued upload commands to the GPU. + + // TODO: Upload memory here. + + // Insert a memory barrier into the command buffer to ensure the upload has + // finished before we copy it into the destination texture. + VkBufferMemoryBarrier upload_barrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_HOST_WRITE_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + staging_buffer_, + 0, + 2048 * 2048 * 4, + }; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &upload_barrier, 0, nullptr); + + // Transition the texture into a transfer destination layout. + VkImageMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = + VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = dest->image; + barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + + // For now, just transfer the grid we uploaded earlier into the texture. + VkBufferImageCopy copy_region; + copy_region.bufferOffset = 0; + copy_region.bufferRowLength = 0; + copy_region.bufferImageHeight = 0; + copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + copy_region.imageOffset = {0, 0, 0}; + copy_region.imageExtent = {dest->texture_info.width + 1, + dest->texture_info.height + 1, + dest->texture_info.depth + 1}; + vkCmdCopyBufferToImage(command_buffer, staging_buffer_, dest->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); + + // Now transition the texture into a shader readonly source. + barrier.srcAccessMask = barrier.dstAccessMask; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.oldLayout = barrier.newLayout; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + + return true; } VkDescriptorSet TextureCache::PrepareTextureSet( @@ -179,9 +400,11 @@ VkDescriptorSet TextureCache::PrepareTextureSet( // shaders. bool any_failed = false; any_failed = - !SetupTextureBindings(update_set_info, vertex_bindings) || any_failed; + !SetupTextureBindings(update_set_info, vertex_bindings, command_buffer) || + any_failed; any_failed = - !SetupTextureBindings(update_set_info, pixel_bindings) || any_failed; + !SetupTextureBindings(update_set_info, pixel_bindings, command_buffer) || + any_failed; if (any_failed) { XELOGW("Failed to setup one or more texture bindings"); // TODO(benvanik): actually bail out here? @@ -269,13 +492,16 @@ VkDescriptorSet TextureCache::PrepareTextureSet( bool TextureCache::SetupTextureBindings( UpdateSetInfo* update_set_info, - const std::vector& bindings) { + const std::vector& bindings, + VkCommandBuffer command_buffer) { bool any_failed = false; for (auto& binding : bindings) { uint32_t fetch_bit = 1 << binding.fetch_constant; if ((update_set_info->has_setup_fetch_mask & fetch_bit) == 0) { // Needs setup. - any_failed = !SetupTextureBinding(update_set_info, binding) || any_failed; + any_failed = + !SetupTextureBinding(update_set_info, binding, command_buffer) || + any_failed; update_set_info->has_setup_fetch_mask |= fetch_bit; } } @@ -283,7 +509,8 @@ bool TextureCache::SetupTextureBindings( } bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info, - const Shader::TextureBinding& binding) { + const Shader::TextureBinding& binding, + VkCommandBuffer command_buffer) { auto& regs = *register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; auto group = @@ -308,41 +535,21 @@ bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info, return false; // invalid texture used } + auto texture = Demand(texture_info, command_buffer); + auto sampler = Demand(sampler_info); + assert_true(texture != nullptr && sampler != nullptr); + trace_writer_->WriteMemoryRead(texture_info.guest_address, texture_info.input_length); - // TODO(benvanik): reuse. - VkSamplerCreateInfo sampler_create_info; - sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - sampler_create_info.pNext = nullptr; - sampler_create_info.flags = 0; - sampler_create_info.magFilter = VK_FILTER_NEAREST; - sampler_create_info.minFilter = VK_FILTER_NEAREST; - sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.mipLodBias = 0.0f; - sampler_create_info.anisotropyEnable = VK_FALSE; - sampler_create_info.maxAnisotropy = 1.0f; - sampler_create_info.compareEnable = VK_FALSE; - sampler_create_info.compareOp = VK_COMPARE_OP_ALWAYS; - sampler_create_info.minLod = 0.0f; - sampler_create_info.maxLod = 0.0f; - sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; - sampler_create_info.unnormalizedCoordinates = VK_FALSE; - VkSampler sampler; - auto err = vkCreateSampler(*device_, &sampler_create_info, nullptr, &sampler); - CheckResult(err, "vkCreateSampler"); - auto& sampler_write = update_set_info->sampler_infos[update_set_info->sampler_write_count++]; - sampler_write.sampler = sampler; + sampler_write.sampler = sampler->sampler; auto& image_write = update_set_info->image_2d_infos[update_set_info->image_2d_write_count++]; - image_write.imageView = grid_image_2d_view_; - image_write.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + image_write.imageView = texture->views[0]->view; + image_write.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; return true; } diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index 9ba3f3577..896bb3155 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -10,8 +10,12 @@ #ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ #define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ +#include + #include "xenia/gpu/register_file.h" +#include "xenia/gpu/sampler_info.h" #include "xenia/gpu/shader.h" +#include "xenia/gpu/texture_info.h" #include "xenia/gpu/trace_writer.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan.h" @@ -50,14 +54,51 @@ class TextureCache { private: struct UpdateSetInfo; + struct TextureView; - void SetupGridImages(); + // This represents an uploaded Vulkan texture. + struct Texture { + TextureInfo texture_info; + VkDeviceMemory texture_memory; + VkDeviceSize memory_offset; + VkDeviceSize memory_size; + VkImage image; + VkFormat format; + std::vector> views; + }; - bool SetupTextureBindings( - UpdateSetInfo* update_set_info, - const std::vector& bindings); + struct TextureView { + Texture* texture; + VkImageView view; + }; + + // Cached Vulkan sampler. + struct Sampler { + SamplerInfo sampler_info; + VkSampler sampler; + }; + + // Demands a texture. If command_buffer is null and the texture hasn't been + // uploaded to graphics memory already, we will return null and bail. + Texture* Demand(const TextureInfo& texture_info, + VkCommandBuffer command_buffer = nullptr); + Sampler* Demand(const SamplerInfo& sampler_info); + + // Allocates a new texture and memory to back it on the GPU. + Texture* AllocateTexture(TextureInfo texture_info); + bool FreeTexture(Texture* texture); + + // Queues commands to upload a texture from system memory, applying any + // conversions necessary. + bool UploadTexture2D(VkCommandBuffer command_buffer, Texture* dest, + TextureInfo src); + + bool SetupTextureBindings(UpdateSetInfo* update_set_info, + const std::vector& bindings, + VkCommandBuffer command_buffer = nullptr); bool SetupTextureBinding(UpdateSetInfo* update_set_info, - const Shader::TextureBinding& binding); + const Shader::TextureBinding& binding, + VkCommandBuffer command_buffer = nullptr); RegisterFile* register_file_ = nullptr; TraceWriter* trace_writer_ = nullptr; @@ -66,9 +107,11 @@ class TextureCache { VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; - VkDeviceMemory grid_image_2d_memory_ = nullptr; - VkImage grid_image_2d_ = nullptr; - VkImageView grid_image_2d_view_ = nullptr; + // Temporary until we have circular buffers. + VkBuffer staging_buffer_ = nullptr; + VkDeviceMemory staging_buffer_mem_ = nullptr; + std::unordered_map> textures_; + std::unordered_map> samplers_; struct UpdateSetInfo { // Bitmap of all 32 fetch constants and whether they have been setup yet. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 1bd05f16a..48c7d681d 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -217,6 +217,14 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); CheckResult(err, "vkBeginCommandBuffer"); + // Upload and set descriptors for all textures. + // We do this outside of the render pass so the texture cache can upload and + // convert textures. + auto samplers = PopulateSamplers(command_buffer, vertex_shader, pixel_shader); + if (!samplers) { + return false; + } + // Begin the render pass. // This will setup our framebuffer and begin the pass in the command buffer. auto render_state = render_cache_->BeginRenderPass( @@ -253,11 +261,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return false; } - // Upload and set descriptors for all textures. - if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) { - render_cache_->EndRenderPass(); - return false; - } + // Bind samplers/textures. + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_cache_->pipeline_layout(), 1, 1, &samplers, + 0, nullptr); // Actually issue the draw. if (!index_buffer_info) { @@ -471,9 +478,9 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( return true; } -bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { +VkDescriptorSet VulkanCommandProcessor::PopulateSamplers( + VkCommandBuffer command_buffer, VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -483,20 +490,63 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, pixel_shader->texture_bindings()); if (!descriptor_set) { // Unable to bind set. - return false; + return nullptr; } - // Bind samplers/textures. - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_cache_->pipeline_layout(), 1, 1, - &descriptor_set, 0, nullptr); - - return true; + return descriptor_set; } bool VulkanCommandProcessor::IssueCopy() { SCOPE_profile_cpu_f("gpu"); - // TODO(benvanik): resolve. + auto& regs = *register_file_; + + // This is used to resolve surfaces, taking them from EDRAM render targets + // to system memory. It can optionally clear color/depth surfaces, too. + // The command buffer has stuff for actually doing this by drawing, however + // we should be able to do it without that much easier. + + uint32_t copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; + // Render targets 0-3, 4 = depth + uint32_t copy_src_select = copy_control & 0x7; + bool color_clear_enabled = (copy_control >> 8) & 0x1; + bool depth_clear_enabled = (copy_control >> 9) & 0x1; + auto copy_command = static_cast((copy_control >> 20) & 0x3); + + uint32_t copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32; + auto copy_dest_endian = static_cast(copy_dest_info & 0x7); + uint32_t copy_dest_array = (copy_dest_info >> 3) & 0x1; + assert_true(copy_dest_array == 0); + uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7; + assert_true(copy_dest_slice == 0); + auto copy_dest_format = + static_cast((copy_dest_info >> 7) & 0x3F); + uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7; + // assert_true(copy_dest_number == 0); // ? + uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F; + // assert_true(copy_dest_bias == 0); + uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1; + + uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; + uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; + uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF; + copy_dest_pitch &= 0x3FFF; + + // None of this is supported yet: + uint32_t copy_surface_slice = regs[XE_GPU_REG_RB_COPY_SURFACE_SLICE].u32; + assert_true(copy_surface_slice == 0); + uint32_t copy_func = regs[XE_GPU_REG_RB_COPY_FUNC].u32; + assert_true(copy_func == 0); + uint32_t copy_ref = regs[XE_GPU_REG_RB_COPY_REF].u32; + assert_true(copy_ref == 0); + uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32; + assert_true(copy_mask == 0); + + // RB_SURFACE_INFO + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_pitch = surface_info & 0x3FFF; + auto surface_msaa = static_cast((surface_info >> 16) & 0x3); + return true; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 43aec9edd..b45be07fb 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -49,6 +49,8 @@ class VulkanCommandProcessor : public CommandProcessor { void ClearCaches() override; + RenderCache* render_cache() { return render_cache_.get(); } + private: bool SetupContext() override; void ShutdownContext() override; @@ -73,9 +75,9 @@ class VulkanCommandProcessor : public CommandProcessor { IndexBufferInfo* index_buffer_info); bool PopulateVertexBuffers(VkCommandBuffer command_buffer, VulkanShader* vertex_shader); - bool PopulateSamplers(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); + VkDescriptorSet PopulateSamplers(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); bool IssueCopy() override; xe::ui::vulkan::VulkanDevice* device_ = nullptr; From 4e2753970943649c747f2413b01216430da6eaa3 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 8 Mar 2016 17:57:04 -0600 Subject: [PATCH 073/145] Fix Vulkan texture drawing. --- src/xenia/gpu/spirv_shader_translator.cc | 88 +++--- src/xenia/gpu/spirv_shader_translator.h | 2 +- src/xenia/gpu/vulkan/texture_cache.cc | 347 ++++++++++++++--------- src/xenia/gpu/vulkan/texture_cache.h | 75 +++-- 4 files changed, 310 insertions(+), 202 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 57af04e24..3f991baa8 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -164,33 +164,37 @@ void SpirvShaderTranslator::StartTranslation() { push_constants_type, "push_consts"); // Texture bindings - Id samplers_t = b.makeSamplerType(); - Id img_t[] = { - b.makeImageType(float_type_, spv::Dim::Dim1D, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown), - b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown), - b.makeImageType(float_type_, spv::Dim::Dim3D, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown), - b.makeImageType(float_type_, spv::Dim::DimCube, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown)}; + Id tex_t[] = {b.makeSampledImageType(b.makeImageType( + float_type_, spv::Dim::Dim1D, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown)), + b.makeSampledImageType(b.makeImageType( + float_type_, spv::Dim::Dim2D, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown)), + b.makeSampledImageType(b.makeImageType( + float_type_, spv::Dim::Dim3D, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown)), + b.makeSampledImageType(b.makeImageType( + float_type_, spv::Dim::DimCube, false, false, false, 1, + spv::ImageFormat::ImageFormatUnknown))}; - Id samplers_a = b.makeArrayType(samplers_t, b.makeUintConstant(32), 0); - Id img_a_t[] = {b.makeArrayType(img_t[0], b.makeUintConstant(32), 0), - b.makeArrayType(img_t[1], b.makeUintConstant(32), 0), - b.makeArrayType(img_t[2], b.makeUintConstant(32), 0), - b.makeArrayType(img_t[3], b.makeUintConstant(32), 0)}; + // Id samplers_a = b.makeArrayType(sampler_t, b.makeUintConstant(32), 0); + Id tex_a_t[] = {b.makeArrayType(tex_t[0], b.makeUintConstant(32), 0), + b.makeArrayType(tex_t[1], b.makeUintConstant(32), 0), + b.makeArrayType(tex_t[2], b.makeUintConstant(32), 0), + b.makeArrayType(tex_t[3], b.makeUintConstant(32), 0)}; - samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant, - samplers_a, "samplers"); - b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); - b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); + // TODO(DrChat): See texture_cache.cc - do we need separate samplers here? + // samplers_ = + // b.createVariable(spv::StorageClass::StorageClassUniformConstant, + // samplers_a, "samplers"); + // b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); + // b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); for (int i = 0; i < 4; i++) { - img_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, - img_a_t[i], - xe::format_string("images%dD", i + 1).c_str()); - b.addDecoration(img_[i], spv::Decoration::DecorationDescriptorSet, 1); - b.addDecoration(img_[i], spv::Decoration::DecorationBinding, i + 1); + tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, + tex_a_t[i], + xe::format_string("textures%dD", i + 1).c_str()); + b.addDecoration(tex_[i], spv::Decoration::DecorationDescriptorSet, 1); + b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, i + 1); } // Interpolators. @@ -674,25 +678,15 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( switch (instr.opcode) { case FetchOpcode::kTextureFetch: { - auto image_index = b.makeUintConstant(instr.operands[1].storage_index); - auto image_ptr = + auto texture_index = b.makeUintConstant(instr.operands[1].storage_index); + auto texture_ptr = b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - img_[dim_idx], std::vector({image_index})); - auto sampler_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - samplers_, std::vector({image_index})); - auto image = b.createLoad(image_ptr); - auto sampler = b.createLoad(sampler_ptr); - assert(b.isImageType(b.getTypeId(image))); - assert(b.isSamplerType(b.getTypeId(sampler))); - - auto sampled_image_type = b.makeSampledImageType(b.getImageType(image)); - auto tex = b.createBinOp(spv::Op::OpSampledImage, sampled_image_type, - image, sampler); + tex_[dim_idx], std::vector({texture_index})); + auto texture = b.createLoad(texture_ptr); spv::Builder::TextureParameters params = {0}; params.coords = src; - params.sampler = tex; + params.sampler = texture; dest = b.createTextureCall(spv::NoPrecision, vec4_float_type_, false, false, false, false, false, params); } break; @@ -1741,10 +1735,18 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, auto n_dst = b.getNumTypeComponents(storage_type); assert_true(n_el < n_dst); - constituents.push_back(source_value_id); - for (int i = n_el; i < n_dst; i++) { - // Pad with zeroes. - constituents.push_back(b.makeFloatConstant(0.f)); + if (n_el == 1) { + // Smear scalar. + for (int i = 0; i < n_dst; i++) { + constituents.push_back(source_value_id); + } + } else { + // FIXME: This may not work as intended. + constituents.push_back(source_value_id); + for (int i = n_el; i < n_dst; i++) { + // Pad with zeroes. + constituents.push_back(b.makeFloatConstant(0.f)); + } } source_value_id = diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index ed4356322..3327dccbd 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -122,7 +122,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id interpolators_ = 0; spv::Id frag_outputs_ = 0; spv::Id samplers_ = 0; - spv::Id img_[4] = {0}; // Images {1D, 2D, 3D, Cube} + spv::Id tex_[4] = {0}; // Images {1D, 2D, 3D, Cube} // Map of {binding -> {offset -> spv input}} std::map> vertex_binding_map_; diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 8a8e2e2f4..5c6e42b8b 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -42,7 +42,7 @@ TextureCache::TextureCache(RegisterFile* register_file, VkDescriptorPoolSize pool_sizes[2]; pool_sizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER; pool_sizes[0].descriptorCount = 32; - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; pool_sizes[1].descriptorCount = 32; descriptor_pool_info.poolSizeCount = 2; descriptor_pool_info.pPoolSizes = pool_sizes; @@ -63,7 +63,7 @@ TextureCache::TextureCache(RegisterFile* register_file, for (int i = 0; i < 4; ++i) { auto& texture_binding = bindings[1 + i]; texture_binding.binding = 1 + i; - texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; texture_binding.descriptorCount = kMaxTextureSamplers; texture_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; @@ -94,35 +94,37 @@ TextureCache::TextureCache(RegisterFile* register_file, err = vkCreateBuffer(*device_, &staging_buffer_info, nullptr, &staging_buffer_); CheckResult(err, "vkCreateBuffer"); - - if (err == VK_SUCCESS) { - VkMemoryRequirements staging_buffer_reqs; - vkGetBufferMemoryRequirements(*device_, staging_buffer_, - &staging_buffer_reqs); - staging_buffer_mem_ = device_->AllocateMemory(staging_buffer_reqs); - assert_not_null(staging_buffer_mem_); - - err = vkBindBufferMemory(*device_, staging_buffer_, staging_buffer_mem_, 0); - CheckResult(err, "vkBindBufferMemory"); - - // Upload a grid into the staging buffer. - uint32_t* gpu_data = nullptr; - err = - vkMapMemory(*device_, staging_buffer_mem_, 0, staging_buffer_info.size, - 0, reinterpret_cast(&gpu_data)); - CheckResult(err, "vkMapMemory"); - - int width = 2048; - int height = 2048; - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - gpu_data[y * width + x] = - ((y % 32 < 16) ^ (x % 32 >= 16)) ? 0xFF0000FF : 0xFFFFFFFF; - } - } - - vkUnmapMemory(*device_, staging_buffer_mem_); + if (err != VK_SUCCESS) { + // This isn't good. + assert_always(); + return; } + + VkMemoryRequirements staging_buffer_reqs; + vkGetBufferMemoryRequirements(*device_, staging_buffer_, + &staging_buffer_reqs); + staging_buffer_mem_ = device_->AllocateMemory(staging_buffer_reqs); + assert_not_null(staging_buffer_mem_); + + err = vkBindBufferMemory(*device_, staging_buffer_, staging_buffer_mem_, 0); + CheckResult(err, "vkBindBufferMemory"); + + // Upload a grid into the staging buffer. + uint32_t* gpu_data = nullptr; + err = vkMapMemory(*device_, staging_buffer_mem_, 0, staging_buffer_info.size, + 0, reinterpret_cast(&gpu_data)); + CheckResult(err, "vkMapMemory"); + + int width = 2048; + int height = 2048; + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + gpu_data[y * width + x] = + ((y % 32 < 16) ^ (x % 32 >= 16)) ? 0xFF0000FF : 0xFFFFFFFF; + } + } + + vkUnmapMemory(*device_, staging_buffer_mem_); } TextureCache::~TextureCache() { @@ -131,9 +133,141 @@ TextureCache::~TextureCache() { vkDestroyDescriptorPool(*device_, descriptor_pool_, nullptr); } +TextureCache::Texture* TextureCache::AllocateTexture( + const TextureInfo& texture_info) { + // Create an image first. + VkImageCreateInfo image_info = {}; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + switch (texture_info.dimension) { + case Dimension::k1D: + image_info.imageType = VK_IMAGE_TYPE_1D; + break; + case Dimension::k2D: + image_info.imageType = VK_IMAGE_TYPE_2D; + break; + case Dimension::k3D: + image_info.imageType = VK_IMAGE_TYPE_3D; + break; + case Dimension::kCube: + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + break; + default: + assert_unhandled_case(texture_info.dimension); + return nullptr; + } + + // TODO: Format + image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_info.extent = {texture_info.width + 1, texture_info.height + 1, + texture_info.depth + 1}; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkImage image; + auto err = vkCreateImage(*device_, &image_info, nullptr, &image); + CheckResult(err, "vkCreateImage"); + + VkMemoryRequirements mem_requirements; + vkGetImageMemoryRequirements(*device_, image, &mem_requirements); + + // TODO: Use a circular buffer or something else to allocate this memory. + // The device has a limited amount (around 64) of memory allocations that we + // can make. + // Now that we have the size, back the image with GPU memory. + auto memory = device_->AllocateMemory(mem_requirements, 0); + if (!memory) { + // Crap. + assert_always(); + vkDestroyImage(*device_, image, nullptr); + return nullptr; + } + + err = vkBindImageMemory(*device_, image, memory, 0); + CheckResult(err, "vkBindImageMemory"); + + auto texture = new Texture(); + texture->format = image_info.format; + texture->image = image; + texture->image_layout = image_info.initialLayout; + texture->image_memory = memory; + texture->memory_offset = 0; + texture->memory_size = mem_requirements.size; + texture->texture_info = texture_info; + + // Create a default view, just for kicks. + VkImageViewCreateInfo view_info; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.pNext = nullptr; + view_info.flags = 0; + view_info.image = image; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = image_info.format; + view_info.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, + }; + view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VkImageView view; + err = vkCreateImageView(*device_, &view_info, nullptr, &view); + CheckResult(err, "vkCreateImageView"); + if (err == VK_SUCCESS) { + auto texture_view = std::make_unique(); + texture_view->texture = texture; + texture_view->view = view; + texture->views.push_back(std::move(texture_view)); + } + + return texture; +} + +bool TextureCache::FreeTexture(Texture* texture) { + // TODO(DrChat) + return false; +} + +TextureCache::Texture* TextureCache::DemandResolveTexture( + const TextureInfo& texture_info, TextureFormat format, + uint32_t* out_offset_x, uint32_t* out_offset_y) { + // Check to see if we've already used a texture at this location. + auto texture = LookupAddress( + texture_info.guest_address, texture_info.size_2d.block_width, + texture_info.size_2d.block_height, format, out_offset_x, out_offset_y); + if (texture) { + return texture; + } + + // Check resolve textures. + for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); + ++it) { + texture = (*it).get(); + if (texture_info.guest_address == texture->texture_info.guest_address && + texture_info.size_2d.logical_width == + texture->texture_info.size_2d.logical_width && + texture_info.size_2d.logical_height == + texture->texture_info.size_2d.logical_height) { + // Exact match. + return texture; + } + } + + // No texture at this location. Make a new one. + texture = AllocateTexture(texture_info); + resolve_textures_.push_back(std::unique_ptr(texture)); + return texture; +} + TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, VkCommandBuffer command_buffer) { - // Run a tight loop to scan for an existing texture. + // Run a tight loop to scan for an exact match existing texture. auto texture_hash = texture_info.hash(); for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { if (it->second->texture_info == texture_info) { @@ -141,15 +275,25 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, } } - // Though we didn't find an exact match, that doesn't mean we're out of the - // woods yet. This texture could either be a portion of another texture or - // vice versa. Check for overlap before uploading. - for (auto it = textures_.begin(); it != textures_.end(); ++it) { + // Check resolve textures. + for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); + ++it) { + auto texture = (*it).get(); + if (texture_info.guest_address == texture->texture_info.guest_address && + texture_info.size_2d.logical_width == + texture->texture_info.size_2d.logical_width && + texture_info.size_2d.logical_height == + texture->texture_info.size_2d.logical_height) { + // Exact match. + // TODO: Lazy match + texture->texture_info = texture_info; + textures_[texture_hash] = std::move(*it); + } } if (!command_buffer) { - // Texture not found and no command buffer was passed allowing us to upload - // a new one. + // Texture not found and no command buffer was passed, preventing us from + // uploading a new one. return nullptr; } @@ -167,6 +311,12 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, return nullptr; } + // Though we didn't find an exact match, that doesn't mean we're out of the + // woods yet. This texture could either be a portion of another texture or + // vice versa. Copy any overlapping textures into this texture. + for (auto it = textures_.begin(); it != textures_.end(); ++it) { + } + textures_[texture_hash] = std::unique_ptr(texture); return texture; @@ -199,7 +349,7 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { sampler_create_info.anisotropyEnable = VK_FALSE; sampler_create_info.maxAnisotropy = 1.0f; sampler_create_info.compareEnable = VK_FALSE; - sampler_create_info.compareOp = VK_COMPARE_OP_ALWAYS; + sampler_create_info.compareOp = VK_COMPARE_OP_NEVER; sampler_create_info.minLod = 0.0f; sampler_create_info.maxLod = 0.0f; sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; @@ -220,95 +370,21 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { return sampler; } -TextureCache::Texture* TextureCache::AllocateTexture(TextureInfo texture_info) { - // Create an image first. - VkImageCreateInfo image_info = {}; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - switch (texture_info.dimension) { - case Dimension::k1D: - image_info.imageType = VK_IMAGE_TYPE_1D; - break; - case Dimension::k2D: - image_info.imageType = VK_IMAGE_TYPE_2D; - break; - case Dimension::k3D: - image_info.imageType = VK_IMAGE_TYPE_3D; - break; - case Dimension::kCube: - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - break; - default: - assert_unhandled_case(texture_info.dimension); - return nullptr; +TextureCache::Texture* TextureCache::LookupAddress( + uint32_t guest_address, uint32_t width, uint32_t height, + TextureFormat format, uint32_t* offset_x, uint32_t* offset_y) { + for (auto it = textures_.begin(); it != textures_.end(); ++it) { + const auto& texture_info = it->second->texture_info; + if (texture_info.guest_address == guest_address && + texture_info.dimension == Dimension::k2D && + texture_info.size_2d.input_width == width && + texture_info.size_2d.input_height == height) { + return it->second.get(); + } } - // TODO: Format - image_info.format = VK_FORMAT_R8G8B8A8_UNORM; - image_info.extent = {texture_info.width + 1, texture_info.height + 1, - texture_info.depth + 1}; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImage image; - auto err = vkCreateImage(*device_, &image_info, nullptr, &image); - CheckResult(err, "vkCreateImage"); - - VkMemoryRequirements mem_requirements; - vkGetImageMemoryRequirements(*device_, image, &mem_requirements); - - // TODO: Use a circular buffer or something else to allocate this memory. - // The device has a limited amount (around 64) of memory allocations that we - // can make. - // Now that we have the size, back the image with GPU memory. - auto memory = device_->AllocateMemory(mem_requirements, 0); - err = vkBindImageMemory(*device_, image, memory, 0); - CheckResult(err, "vkBindImageMemory"); - - auto texture = new Texture(); - texture->format = image_info.format; - texture->image = image; - texture->memory_offset = 0; - texture->memory_size = mem_requirements.size; - texture->texture_info = texture_info; - texture->texture_memory = memory; - - // Create a default view, just for kicks. - VkImageViewCreateInfo view_info; - view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_info.pNext = nullptr; - view_info.flags = 0; - view_info.image = image; - view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - view_info.format = image_info.format; - view_info.components = { - VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, - VK_COMPONENT_SWIZZLE_A, - }; - view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - VkImageView view; - err = vkCreateImageView(*device_, &view_info, nullptr, &view); - CheckResult(err, "vkCreateImageView"); - if (err == VK_SUCCESS) { - auto texture_view = std::make_unique(); - texture_view->texture = texture; - texture_view->view = view; - texture->views.push_back(std::move(texture_view)); - } - - return texture; -} - -bool TextureCache::FreeTexture(Texture* texture) { - // TODO(DrChat) - return false; + // TODO: Try to match at an offset. + return nullptr; } bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, @@ -359,8 +435,8 @@ bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, // For now, just transfer the grid we uploaded earlier into the texture. VkBufferImageCopy copy_region; copy_region.bufferOffset = 0; - copy_region.bufferRowLength = 0; - copy_region.bufferImageHeight = 0; + copy_region.bufferRowLength = 2048; + copy_region.bufferImageHeight = 2048; copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; copy_region.imageOffset = {0, 0, 0}; copy_region.imageExtent = {dest->texture_info.width + 1, @@ -378,6 +454,7 @@ bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + dest->image_layout = barrier.newLayout; return true; } @@ -427,6 +504,8 @@ VkDescriptorSet TextureCache::PrepareTextureSet( VkWriteDescriptorSet descriptor_writes[4]; std::memset(descriptor_writes, 0, sizeof(descriptor_writes)); uint32_t descriptor_write_count = 0; + /* + // TODO(DrChat): Do we really need to separate samplers and images here? if (update_set_info->sampler_write_count) { auto& sampler_write = descriptor_writes[descriptor_write_count++]; sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -438,6 +517,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; sampler_write.pImageInfo = update_set_info->sampler_infos; } + */ if (update_set_info->image_1d_write_count) { auto& image_write = descriptor_writes[descriptor_write_count++]; image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -446,7 +526,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( image_write.dstBinding = 1; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_1d_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; image_write.pImageInfo = update_set_info->image_1d_infos; } if (update_set_info->image_2d_write_count) { @@ -457,7 +537,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( image_write.dstBinding = 2; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_2d_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; image_write.pImageInfo = update_set_info->image_2d_infos; } if (update_set_info->image_3d_write_count) { @@ -468,7 +548,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( image_write.dstBinding = 3; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_3d_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; image_write.pImageInfo = update_set_info->image_3d_infos; } if (update_set_info->image_cube_write_count) { @@ -479,7 +559,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( image_write.dstBinding = 4; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_cube_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; image_write.pImageInfo = update_set_info->image_cube_infos; } if (descriptor_write_count) { @@ -542,14 +622,11 @@ bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info, trace_writer_->WriteMemoryRead(texture_info.guest_address, texture_info.input_length); - auto& sampler_write = - update_set_info->sampler_infos[update_set_info->sampler_write_count++]; - sampler_write.sampler = sampler->sampler; - auto& image_write = update_set_info->image_2d_infos[update_set_info->image_2d_write_count++]; image_write.imageView = texture->views[0]->view; image_write.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_write.sampler = sampler->sampler; return true; } diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index 896bb3155..6264a4a98 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -28,6 +28,30 @@ namespace vulkan { // class TextureCache { public: + struct TextureView; + + // This represents an uploaded Vulkan texture. + struct Texture { + TextureInfo texture_info; + std::vector> views; + + // True if we know all info about this texture, false otherwise. + // (e.g. we resolve to system memory and may not know the full details about + // this texture) + bool full_texture; + VkFormat format; + VkImage image; + VkImageLayout image_layout; + VkDeviceMemory image_memory; + VkDeviceSize memory_offset; + VkDeviceSize memory_size; + }; + + struct TextureView { + Texture* texture; + VkImageView view; + }; + TextureCache(RegisterFile* register_file, TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device); ~TextureCache(); @@ -49,28 +73,24 @@ class TextureCache { // TODO(benvanik): Resolve. // TODO(benvanik): ReadTexture. + // Demands a texture for the purpose of resolving from EDRAM. This either + // creates a new texture or returns a previously created texture. texture_info + // is not required to be completely filled out, just guest_address and size. + // + // It's possible that this may return an image that is larger than the + // requested size (e.g. resolving into a bigger texture) or an image that + // must have an offset applied. If so, the caller must handle this. + // At the very least, it's guaranteed that the image will be large enough to + // hold the requested size. + Texture* DemandResolveTexture(const TextureInfo& texture_info, + TextureFormat format, uint32_t* out_offset_x, + uint32_t* out_offset_y); + // Clears all cached content. void ClearCache(); private: struct UpdateSetInfo; - struct TextureView; - - // This represents an uploaded Vulkan texture. - struct Texture { - TextureInfo texture_info; - VkDeviceMemory texture_memory; - VkDeviceSize memory_offset; - VkDeviceSize memory_size; - VkImage image; - VkFormat format; - std::vector> views; - }; - - struct TextureView { - Texture* texture; - VkImageView view; - }; // Cached Vulkan sampler. struct Sampler { @@ -78,18 +98,28 @@ class TextureCache { VkSampler sampler; }; + // Allocates a new texture and memory to back it on the GPU. + Texture* AllocateTexture(const TextureInfo& texture_info); + bool FreeTexture(Texture* texture); + // Demands a texture. If command_buffer is null and the texture hasn't been // uploaded to graphics memory already, we will return null and bail. Texture* Demand(const TextureInfo& texture_info, VkCommandBuffer command_buffer = nullptr); Sampler* Demand(const SamplerInfo& sampler_info); - // Allocates a new texture and memory to back it on the GPU. - Texture* AllocateTexture(TextureInfo texture_info); - bool FreeTexture(Texture* texture); + // Looks for a texture either containing or matching these parameters. + // Caller is responsible for checking if the texture returned is an exact + // match or just contains the texture given by the parameters. + // If offset_x and offset_y are not null, this may return a texture that + // contains this image at an offset. + Texture* LookupAddress(uint32_t guest_address, uint32_t width, + uint32_t height, TextureFormat format, + uint32_t* offset_x, uint32_t* offset_y); // Queues commands to upload a texture from system memory, applying any - // conversions necessary. + // conversions necessary. This may flush the command buffer to the GPU if we + // run out of staging memory. bool UploadTexture2D(VkCommandBuffer command_buffer, Texture* dest, TextureInfo src); @@ -112,13 +142,12 @@ class TextureCache { VkDeviceMemory staging_buffer_mem_ = nullptr; std::unordered_map> textures_; std::unordered_map> samplers_; + std::vector> resolve_textures_; struct UpdateSetInfo { // Bitmap of all 32 fetch constants and whether they have been setup yet. // This prevents duplication across the vertex and pixel shader. uint32_t has_setup_fetch_mask; - uint32_t sampler_write_count = 0; - VkDescriptorImageInfo sampler_infos[32]; uint32_t image_1d_write_count = 0; VkDescriptorImageInfo image_1d_infos[32]; uint32_t image_2d_write_count = 0; From 86cb40f0c69dc78deb03453dbd1e9bee76448f71 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 8 Mar 2016 18:02:03 -0600 Subject: [PATCH 074/145] Change how the render cache renders into EDRAM. Rendering directly into the EDRAM buffer is bad because we don't know how the GPU lays out memory when it draws. Instead, we create temporary render targets and copy EDRAM contents to/from those temporary RTs before and after each draw. --- src/xenia/gpu/vulkan/render_cache.cc | 347 +++++++++++++++++++-------- src/xenia/gpu/vulkan/render_cache.h | 50 +++- 2 files changed, 295 insertions(+), 102 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 5637d44eb..a2b496330 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -71,34 +71,6 @@ VkFormat DepthRenderTargetFormatToVkFormat(DepthRenderTargetFormat format) { } } -// Cached view into the EDRAM memory. -// The image is aliased to a region of the edram_memory_ based on the tile -// parameters. -// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible -// formats? -class CachedTileView { - public: - // Key identifying the view in the cache. - TileViewKey key; - // Image mapped into EDRAM. - VkImage image = nullptr; - // Simple view on the image matching the format. - VkImageView image_view = nullptr; - - CachedTileView(VkDevice device, VkDeviceMemory edram_memory, - TileViewKey view_key); - ~CachedTileView(); - - bool IsEqual(const TileViewKey& other_key) const { - auto a = reinterpret_cast(&key); - auto b = reinterpret_cast(&other_key); - return *a == *b; - } - - private: - VkDevice device_ = nullptr; -}; - // Cached framebuffer referencing tile attachments. // Each framebuffer is specific to a render pass. Ugh. class CachedFramebuffer { @@ -151,9 +123,11 @@ class CachedRenderPass { VkDevice device_ = nullptr; }; -CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory, +CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, + VkCommandBuffer command_buffer, + VkDeviceMemory edram_memory, TileViewKey view_key) - : device_(device), key(std::move(view_key)) { + : device_(*device), key(std::move(view_key)) { // Map format to Vulkan. VkFormat vulkan_format = VK_FORMAT_UNDEFINED; uint32_t bpp = 4; @@ -191,8 +165,8 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory, image_info.extent.depth = 1; image_info.mipLevels = 1; image_info.arrayLayers = 1; - // TODO(benvanik): native MSAA support? - image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.samples = + static_cast(VK_SAMPLE_COUNT_1_BIT); image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | @@ -203,19 +177,17 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory, image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.queueFamilyIndexCount = 0; image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; auto err = vkCreateImage(device_, &image_info, nullptr, &image); CheckResult(err, "vkCreateImage"); - // Verify our assumptions about memory layout are correct. - VkDeviceSize edram_offset = key.tile_offset * 5120; VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(device, image, &memory_requirements); - assert_true(edram_offset + memory_requirements.size <= kEdramBufferCapacity); - assert_true(edram_offset % memory_requirements.alignment == 0); + vkGetImageMemoryRequirements(*device, image, &memory_requirements); - // Bind to the region of EDRAM we occupy. - err = vkBindImageMemory(device_, image, edram_memory, edram_offset); + // Bind to a newly allocated chunk. + // TODO: Alias from a really big buffer? + memory = device->AllocateMemory(memory_requirements, 0); + err = vkBindImageMemory(device_, image, memory, 0); CheckResult(err, "vkBindImageMemory"); // Create the image view we'll use to attach it to a framebuffer. @@ -242,11 +214,34 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory, CheckResult(err, "vkCreateImageView"); // TODO(benvanik): transition to general layout? + VkImageMemoryBarrier image_barrier; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + image_barrier.dstAccessMask = + key.color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT + : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + image_barrier.dstAccessMask |= + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = image; + image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_barrier.subresourceRange.baseMipLevel = 0; + image_barrier.subresourceRange.levelCount = 1; + image_barrier.subresourceRange.baseArrayLayer = 0; + image_barrier.subresourceRange.layerCount = 1; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); } CachedTileView::~CachedTileView() { vkDestroyImageView(device_, image_view, nullptr); vkDestroyImage(device_, image, nullptr); + vkFreeMemory(device_, memory, nullptr); } CachedFramebuffer::CachedFramebuffer( @@ -423,9 +418,10 @@ bool CachedRenderPass::IsCompatible( RenderCache::RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(*device) { + : register_file_(register_file), device_(device) { + VkResult status = VK_SUCCESS; + // Create the buffer we'll bind to our memory. - // We do this first so we can get the right memory type. VkBufferCreateInfo buffer_info; buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; buffer_info.pNext = nullptr; @@ -436,55 +432,42 @@ RenderCache::RenderCache(RegisterFile* register_file, buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; buffer_info.queueFamilyIndexCount = 0; buffer_info.pQueueFamilyIndices = nullptr; - auto err = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_); - CheckResult(err, "vkCreateBuffer"); + status = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_); + CheckResult(status, "vkCreateBuffer"); // Query requirements for the buffer. // It should be 1:1. VkMemoryRequirements buffer_requirements; - vkGetBufferMemoryRequirements(device_, edram_buffer_, &buffer_requirements); + vkGetBufferMemoryRequirements(*device_, edram_buffer_, &buffer_requirements); assert_true(buffer_requirements.size == kEdramBufferCapacity); - // Create a dummy image so we can see what memory bits it requires. - // They should overlap with the buffer requirements but are likely more - // strict. - VkImageCreateInfo test_image_info; - test_image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - test_image_info.pNext = nullptr; - test_image_info.flags = 0; - test_image_info.imageType = VK_IMAGE_TYPE_2D; - test_image_info.format = VK_FORMAT_R8G8B8A8_UINT; - test_image_info.extent.width = 128; - test_image_info.extent.height = 128; - test_image_info.extent.depth = 1; - test_image_info.mipLevels = 1; - test_image_info.arrayLayers = 1; - test_image_info.samples = VK_SAMPLE_COUNT_1_BIT; - test_image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - test_image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - test_image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - test_image_info.queueFamilyIndexCount = 0; - test_image_info.pQueueFamilyIndices = nullptr; - test_image_info.initialLayout = VK_IMAGE_LAYOUT_GENERAL; - VkImage test_image = nullptr; - err = vkCreateImage(device_, &test_image_info, nullptr, &test_image); - CheckResult(err, "vkCreateImage"); - VkMemoryRequirements image_requirements; - vkGetImageMemoryRequirements(device_, test_image, &image_requirements); - vkDestroyImage(device_, test_image, nullptr); - assert_true((image_requirements.memoryTypeBits & - buffer_requirements.memoryTypeBits) != 0); - // Allocate EDRAM memory. - VkMemoryRequirements memory_requirements; - memory_requirements.size = buffer_requirements.size; - memory_requirements.alignment = buffer_requirements.alignment; - memory_requirements.memoryTypeBits = image_requirements.memoryTypeBits; // TODO(benvanik): do we need it host visible? - edram_memory_ = device->AllocateMemory(memory_requirements, 0); + edram_memory_ = device->AllocateMemory(buffer_requirements); + assert_not_null(edram_memory_); // Bind buffer to map our entire memory. - vkBindBufferMemory(device_, edram_buffer_, edram_memory_, 0); + status = vkBindBufferMemory(*device_, edram_buffer_, edram_memory_, 0); + CheckResult(status, "vkBindBufferMemory"); + + if (status == VK_SUCCESS) { + status = vkBindBufferMemory(*device_, edram_buffer_, edram_memory_, 0); + CheckResult(status, "vkBindBufferMemory"); + + // Upload a grid into the EDRAM buffer. + uint32_t* gpu_data = nullptr; + status = vkMapMemory(*device_, edram_memory_, 0, buffer_requirements.size, + 0, reinterpret_cast(&gpu_data)); + CheckResult(status, "vkMapMemory"); + + if (status == VK_SUCCESS) { + for (int i = 0; i < kEdramBufferCapacity / 4; i++) { + gpu_data[i] = (i % 8) >= 4 ? 0xFF0000FF : 0xFFFFFFFF; + } + + vkUnmapMemory(*device_, edram_memory_); + } + } } RenderCache::~RenderCache() { @@ -503,8 +486,8 @@ RenderCache::~RenderCache() { cached_tile_views_.clear(); // Release underlying EDRAM memory. - vkDestroyBuffer(device_, edram_buffer_, nullptr); - vkFreeMemory(device_, edram_memory_, nullptr); + vkDestroyBuffer(*device_, edram_buffer_, nullptr); + vkFreeMemory(*device_, edram_memory_, nullptr); } const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, @@ -542,13 +525,74 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, } // Lookup or generate a new render pass and framebuffer for the new state. - if (!ConfigureRenderPass(config, &render_pass, &framebuffer)) { + if (!ConfigureRenderPass(command_buffer, config, &render_pass, + &framebuffer)) { return nullptr; } current_state_.render_pass = render_pass; current_state_.render_pass_handle = render_pass->handle; current_state_.framebuffer = framebuffer; current_state_.framebuffer_handle = framebuffer->handle; + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = edram_buffer_; + barrier.offset = 0; + barrier.size = 0; + + // Copy EDRAM buffer into render targets with tight packing. + VkBufferImageCopy region; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageOffset = {0, 0, 0}; + region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + for (int i = 0; i < 4; i++) { + auto target = current_state_.framebuffer->color_attachments[i]; + if (!target) { + continue; + } + + region.bufferOffset = target->key.tile_offset * 5120; + + // Wait for any potential copies to finish. + barrier.offset = region.bufferOffset; + barrier.size = + target->key.tile_width * 80 * target->key.tile_height * 16 * 4; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + + region.imageExtent = {target->key.tile_width * 80u, + target->key.tile_height * 16u, 1}; + vkCmdCopyBufferToImage(command_buffer, edram_buffer_, target->image, + VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); + } + + // Depth + auto depth_target = current_state_.framebuffer->depth_stencil_attachment; + if (depth_target) { + region.imageSubresource = { + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; + region.bufferOffset = depth_target->key.tile_offset * 5120; + + // Wait for any potential copies to finish. + barrier.offset = region.bufferOffset; + barrier.size = depth_target->key.tile_width * 80 * + depth_target->key.tile_height * 16 * 4; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + + region.imageExtent = {depth_target->key.tile_width * 80u, + depth_target->key.tile_height * 16u, 1}; + vkCmdCopyBufferToImage(command_buffer, edram_buffer_, depth_target->image, + VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); + } } if (!render_pass) { return nullptr; @@ -593,6 +637,7 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) { // RB_SURFACE_INFO // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html config->surface_pitch_px = regs.rb_surface_info & 0x3FFF; + // config->surface_height_px = (regs.rb_surface_info >> 18) & 0x3FFF; config->surface_msaa = static_cast((regs.rb_surface_info >> 16) & 0x3); @@ -643,7 +688,8 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) { return true; } -bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, +bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, + RenderConfiguration* config, CachedRenderPass** out_render_pass, CachedFramebuffer** out_framebuffer) { *out_render_pass = nullptr; @@ -662,7 +708,7 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, // If no render pass was found in the cache create a new one. if (!render_pass) { - render_pass = new CachedRenderPass(device_, *config); + render_pass = new CachedRenderPass(*device_, *config); cached_render_passes_.push_back(render_pass); } @@ -688,7 +734,8 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, color_key.tile_height = config->surface_height_px / 16; color_key.color_or_depth = 1; color_key.edram_format = static_cast(config->color[i].format); - target_color_attachments[i] = GetTileView(color_key); + target_color_attachments[i] = + FindOrCreateTileView(command_buffer, color_key); if (!target_color_attachments) { XELOGE("Failed to get tile view for color attachment"); return false; @@ -702,14 +749,15 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, depth_stencil_key.color_or_depth = 0; depth_stencil_key.edram_format = static_cast(config->depth_stencil.format); - auto target_depth_stencil_attachment = GetTileView(depth_stencil_key); + auto target_depth_stencil_attachment = + FindOrCreateTileView(command_buffer, depth_stencil_key); if (!target_depth_stencil_attachment) { XELOGE("Failed to get tile view for depth/stencil attachment"); return false; } framebuffer = new CachedFramebuffer( - device_, render_pass->handle, config->surface_pitch_px, + *device_, render_pass->handle, config->surface_pitch_px, config->surface_height_px, target_color_attachments, target_depth_stencil_attachment); render_pass->cached_framebuffers.push_back(framebuffer); @@ -720,7 +768,22 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, return true; } -CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) { +CachedTileView* RenderCache::FindOrCreateTileView( + VkCommandBuffer command_buffer, const TileViewKey& view_key) { + auto tile_view = FindTileView(view_key); + if (tile_view) { + return tile_view; + } + + // Create a new tile and add to the cache. + tile_view = + new CachedTileView(device_, command_buffer, edram_memory_, view_key); + cached_tile_views_.push_back(tile_view); + + return tile_view; +} + +CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const { // Check the cache. // TODO(benvanik): better lookup. for (auto tile_view : cached_tile_views_) { @@ -729,25 +792,115 @@ CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) { } } - // Create a new tile and add to the cache. - auto tile_view = new CachedTileView(device_, edram_memory_, view_key); - cached_tile_views_.push_back(tile_view); - return tile_view; + return nullptr; } void RenderCache::EndRenderPass() { assert_not_null(current_command_buffer_); - auto command_buffer = current_command_buffer_; - current_command_buffer_ = nullptr; // End the render pass. - vkCmdEndRenderPass(command_buffer); + vkCmdEndRenderPass(current_command_buffer_); + + // Copy all render targets back into our EDRAM buffer. + // Don't bother waiting on this command to complete, as next render pass may + // reuse previous framebuffer attachments. If they need this, they will wait. + // TODO: Should we bother re-tiling the images on copy back? + VkBufferImageCopy region; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageOffset = {0, 0, 0}; + region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + for (int i = 0; i < 4; i++) { + auto target = current_state_.framebuffer->color_attachments[i]; + if (!target) { + continue; + } + + region.bufferOffset = target->key.tile_offset * 5120; + region.imageExtent = {target->key.tile_width * 80u, + target->key.tile_height * 16u, 1}; + vkCmdCopyImageToBuffer(current_command_buffer_, target->image, + VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion); + } + + // Depth/stencil + auto depth_target = current_state_.framebuffer->depth_stencil_attachment; + if (depth_target) { + region.imageSubresource = { + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; + region.bufferOffset = depth_target->key.tile_offset * 5120; + region.imageExtent = {depth_target->key.tile_width * 80u, + depth_target->key.tile_height * 16u, 1}; + vkCmdCopyImageToBuffer(current_command_buffer_, depth_target->image, + VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion); + } + + current_command_buffer_ = nullptr; } void RenderCache::ClearCache() { // TODO(benvanik): caching. } +void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, + uint32_t edram_base, VkImage image, + VkImageLayout image_layout, + bool color_or_depth, int32_t offset_x, + int32_t offset_y, uint32_t width, + uint32_t height) { + // Transition the texture into a transfer destination layout. + VkImageMemoryBarrier image_barrier; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + image_barrier.oldLayout = image_layout; + image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = image; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + VkBufferMemoryBarrier buffer_barrier; + buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + buffer_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.buffer = edram_buffer_; + buffer_barrier.offset = edram_base * 5120; + buffer_barrier.size = width * height * 4; // TODO: Calculate this accurately. + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &buffer_barrier, 1, &image_barrier); + + // Issue the copy command. + VkBufferImageCopy region; + region.bufferImageHeight = 0; + region.bufferOffset = edram_base * 5120; + region.bufferRowLength = 0; + region.imageExtent = {width, height, 1}; + region.imageOffset = {offset_x, offset_y, 0}; + region.imageSubresource = {0, 0, 0, 1}; + region.imageSubresource.aspectMask = + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1, + ®ion); + + // Transition the image back into its previous layout. + image_barrier.srcAccessMask = image_barrier.dstAccessMask; + image_barrier.dstAccessMask = 0; + std::swap(image_barrier.oldLayout, image_barrier.newLayout); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); +} + bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { uint32_t value = register_file_->values[register_name].u32; if (*dest == value) { diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 4a1574e9b..13397bf1b 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -41,6 +41,35 @@ struct TileViewKey { }; static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); +// Cached view representing EDRAM memory. +// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible +// formats? +class CachedTileView { + public: + // Key identifying the view in the cache. + TileViewKey key; + // Image + VkImage image = nullptr; + // Simple view on the image matching the format. + VkImageView image_view = nullptr; + // Memory buffer + VkDeviceMemory memory = nullptr; + + CachedTileView(ui::vulkan::VulkanDevice* device, + VkCommandBuffer command_buffer, VkDeviceMemory edram_memory, + TileViewKey view_key); + ~CachedTileView(); + + bool IsEqual(const TileViewKey& other_key) const { + auto a = reinterpret_cast(&key); + auto b = reinterpret_cast(&other_key); + return *a == *b; + } + + private: + VkDevice device_ = nullptr; +}; + // Parsed render configuration from the current render state. struct RenderConfiguration { // Render mode (color+depth, depth-only, etc). @@ -230,22 +259,33 @@ class RenderCache { // Clears all cached content. void ClearCache(); + // Queues commands to copy EDRAM contents into an image. + void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base, + VkImage image, VkImageLayout image_layout, + bool color_or_depth, int32_t offset_x, int32_t offset_y, + uint32_t width, uint32_t height); + private: // Parses the current state into a configuration object. bool ParseConfiguration(RenderConfiguration* config); + // Finds a tile view. Returns nullptr if none found matching the key. + CachedTileView* FindTileView(const TileViewKey& view_key) const; + + // Gets or creates a tile view with the given parameters. + CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer, + const TileViewKey& view_key); + // Gets or creates a render pass and frame buffer for the given configuration. // This attempts to reuse as much as possible across render passes and // framebuffers. - bool ConfigureRenderPass(RenderConfiguration* config, + bool ConfigureRenderPass(VkCommandBuffer command_buffer, + RenderConfiguration* config, CachedRenderPass** out_render_pass, CachedFramebuffer** out_framebuffer); - // Gets or creates a tile view with the given parameters. - CachedTileView* GetTileView(const TileViewKey& view_key); - RegisterFile* register_file_ = nullptr; - VkDevice device_ = nullptr; + ui::vulkan::VulkanDevice* device_ = nullptr; // Entire 10MiB of EDRAM, aliased to hell by various VkImages. VkDeviceMemory edram_memory_ = nullptr; From f839a1293f449a83ac6aadfa8ecaf82d9a36da0f Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 10 Mar 2016 12:59:48 -0600 Subject: [PATCH 075/145] Add a specialized copy command buffer to the vulkan swap chain --- src/xenia/ui/vulkan/vulkan_swap_chain.cc | 71 +++++++++++++++++------- src/xenia/ui/vulkan/vulkan_swap_chain.h | 6 ++ 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc index 15d2795fd..ad383f32f 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.cc +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.cc @@ -187,6 +187,10 @@ bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) { vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &render_cmd_buffer_); CheckResult(err, "vkCreateCommandBuffer"); + // Create another command buffer that handles image copies. + err = vkAllocateCommandBuffers(*device_, &cmd_buffer_info, ©_cmd_buffer_); + CheckResult(err, "vkCreateCommandBuffer"); + // Create the render pass used to draw to the swap chain. // The actual framebuffer attached will depend on which image we are drawing // into. @@ -194,7 +198,7 @@ bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) { color_attachment.flags = 0; color_attachment.format = surface_format_; color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; - color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // CLEAR; color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; @@ -388,6 +392,7 @@ bool VulkanSwapChain::Begin() { // Reset all command buffers. vkResetCommandBuffer(render_cmd_buffer_, 0); + vkResetCommandBuffer(copy_cmd_buffer_, 0); auto& current_buffer = buffers_[current_buffer_index_]; // Build the command buffer that will execute all queued rendering buffers. @@ -399,14 +404,18 @@ bool VulkanSwapChain::Begin() { err = vkBeginCommandBuffer(render_cmd_buffer_, &begin_info); CheckResult(err, "vkBeginCommandBuffer"); - // Transition the image to a format we can render to. + // Start recording the copy command buffer as well. + err = vkBeginCommandBuffer(copy_cmd_buffer_, &begin_info); + CheckResult(err, "vkBeginCommandBuffer"); + + // Transition the image to a format we can copy to. VkImageMemoryBarrier pre_image_memory_barrier; pre_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; pre_image_memory_barrier.pNext = nullptr; pre_image_memory_barrier.srcAccessMask = 0; - pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + pre_image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; pre_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; pre_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; pre_image_memory_barrier.image = current_buffer.image; @@ -416,23 +425,37 @@ bool VulkanSwapChain::Begin() { pre_image_memory_barrier.subresourceRange.levelCount = 1; pre_image_memory_barrier.subresourceRange.baseArrayLayer = 0; pre_image_memory_barrier.subresourceRange.layerCount = 1; + vkCmdPipelineBarrier(copy_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &pre_image_memory_barrier); + + // First: Issue a command to clear the render target. + VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VkClearColorValue clear_color; + clear_color.float32[0] = 238 / 255.0f; + clear_color.float32[1] = 238 / 255.0f; + clear_color.float32[2] = 238 / 255.0f; + clear_color.float32[3] = 1.0f; + if (FLAGS_vulkan_random_clear_color) { + clear_color.float32[0] = + rand() / static_cast(RAND_MAX); // NOLINT(runtime/threadsafe_fn) + clear_color.float32[1] = 1.0f; + clear_color.float32[2] = 0.0f; + } + vkCmdClearColorImage(copy_cmd_buffer_, current_buffer.image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, + &clear_range); + + // Transition the image to a color attachment target for drawing. + pre_image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; vkCmdPipelineBarrier(render_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &pre_image_memory_barrier); // Begin render pass. - VkClearValue color_clear_value; - color_clear_value.color.float32[0] = 238 / 255.0f; - color_clear_value.color.float32[1] = 238 / 255.0f; - color_clear_value.color.float32[2] = 238 / 255.0f; - color_clear_value.color.float32[3] = 1.0f; - if (FLAGS_vulkan_random_clear_color) { - color_clear_value.color.float32[0] = - rand() / static_cast(RAND_MAX); // NOLINT(runtime/threadsafe_fn) - color_clear_value.color.float32[1] = 1.0f; - color_clear_value.color.float32[2] = 0.0f; - } - VkClearValue clear_values[] = {color_clear_value}; VkRenderPassBeginInfo render_pass_begin_info; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin_info.pNext = nullptr; @@ -442,9 +465,8 @@ bool VulkanSwapChain::Begin() { render_pass_begin_info.renderArea.offset.y = 0; render_pass_begin_info.renderArea.extent.width = surface_width_; render_pass_begin_info.renderArea.extent.height = surface_height_; - render_pass_begin_info.clearValueCount = - static_cast(xe::countof(clear_values)); - render_pass_begin_info.pClearValues = clear_values; + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; vkCmdBeginRenderPass(render_cmd_buffer_, &render_pass_begin_info, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS); @@ -458,6 +480,7 @@ bool VulkanSwapChain::End() { vkCmdEndRenderPass(render_cmd_buffer_); // Transition the image to a format the presentation engine can source from. + // FIXME: Do we need more synchronization here between the copy buffer? VkImageMemoryBarrier post_image_memory_barrier; post_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; post_image_memory_barrier.pNext = nullptr; @@ -483,14 +506,20 @@ bool VulkanSwapChain::End() { auto err = vkEndCommandBuffer(render_cmd_buffer_); CheckResult(err, "vkEndCommandBuffer"); + err = vkEndCommandBuffer(copy_cmd_buffer_); + CheckResult(err, "vkEndCommandBuffer"); + + VkCommandBuffer command_buffers[] = {copy_cmd_buffer_, render_cmd_buffer_}; + // Submit rendering. VkSubmitInfo render_submit_info; render_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; render_submit_info.pNext = nullptr; render_submit_info.waitSemaphoreCount = 0; render_submit_info.pWaitSemaphores = nullptr; - render_submit_info.commandBufferCount = 1; - render_submit_info.pCommandBuffers = &render_cmd_buffer_; + render_submit_info.commandBufferCount = + static_cast(xe::countof(command_buffers)); + render_submit_info.pCommandBuffers = command_buffers; render_submit_info.signalSemaphoreCount = 0; render_submit_info.pSignalSemaphores = nullptr; { diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.h b/src/xenia/ui/vulkan/vulkan_swap_chain.h index 1d1f578c3..773a52053 100644 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.h +++ b/src/xenia/ui/vulkan/vulkan_swap_chain.h @@ -35,11 +35,16 @@ class VulkanSwapChain { uint32_t surface_width() const { return surface_width_; } uint32_t surface_height() const { return surface_height_; } + VkImage surface_image() const { + return buffers_[current_buffer_index_].image; + } // Render pass used for compositing. VkRenderPass render_pass() const { return render_pass_; } // Render command buffer, active inside the render pass from Begin to End. VkCommandBuffer render_cmd_buffer() const { return render_cmd_buffer_; } + // Copy commands, ran before the render command buffer. + VkCommandBuffer copy_cmd_buffer() const { return copy_cmd_buffer_; } // Initializes the swap chain with the given WSI surface. bool Initialize(VkSurfaceKHR surface); @@ -74,6 +79,7 @@ class VulkanSwapChain { uint32_t surface_height_ = 0; VkFormat surface_format_ = VK_FORMAT_UNDEFINED; VkCommandPool cmd_pool_ = nullptr; + VkCommandBuffer copy_cmd_buffer_ = nullptr; VkCommandBuffer render_cmd_buffer_ = nullptr; VkRenderPass render_pass_ = nullptr; VkSemaphore image_available_semaphore_ = nullptr; From f903a559b372755a5d0dbc1f66c95281020f5f34 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 10 Mar 2016 13:01:39 -0600 Subject: [PATCH 076/145] Blit Vulkan CP output to the main window's swap chain --- .../gpu/vulkan/vulkan_graphics_system.cc | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc index 74ec57849..27b2ff073 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc @@ -19,14 +19,14 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" #include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_swap_chain.h" #include "xenia/ui/window.h" namespace xe { namespace gpu { namespace vulkan { -VulkanGraphicsSystem::VulkanGraphicsSystem() = default; - +VulkanGraphicsSystem::VulkanGraphicsSystem() {} VulkanGraphicsSystem::~VulkanGraphicsSystem() = default; X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor, @@ -74,12 +74,25 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { return; } - // Blit the frontbuffer. - // display_context_->blitter()->BlitTexture2D( - // static_cast(swap_state.front_buffer_texture), - // Rect2D(0, 0, swap_state.width, swap_state.height), - // Rect2D(0, 0, target_window_->width(), target_window_->height()), - // GL_LINEAR, false); + auto swap_chain = display_context_->swap_chain(); + auto copy_cmd_buffer = swap_chain->copy_cmd_buffer(); + + VkImageBlit region; + region.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + region.srcOffsets[0] = {0, 0, 0}; + region.srcOffsets[1] = {static_cast(swap_state.width), + static_cast(swap_state.height), 1}; + + region.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + region.dstOffsets[0] = {0, 0, 0}; + region.dstOffsets[1] = {static_cast(swap_chain->surface_width()), + static_cast(swap_chain->surface_height()), + 1}; + vkCmdBlitImage(copy_cmd_buffer, + reinterpret_cast(swap_state.front_buffer_texture), + VK_IMAGE_LAYOUT_GENERAL, swap_chain->surface_image(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion, + VK_FILTER_LINEAR); } } // namespace vulkan From 635d095b8777bef5ded36a7a92cd2835b31ce9e2 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 10 Mar 2016 20:39:46 -0600 Subject: [PATCH 077/145] RenderCache: Track color target / depth target usage, refactor RawCopyToImage --- src/xenia/gpu/vulkan/render_cache.cc | 131 +++++++++++++++------------ src/xenia/gpu/vulkan/render_cache.h | 7 +- 2 files changed, 79 insertions(+), 59 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index a2b496330..727fa59e2 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -508,6 +508,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); @@ -529,6 +530,12 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, &framebuffer)) { return nullptr; } + + for (int i = 0; i < 4; i++) { + config->color[i].used = pixel_shader->writes_color_target(i); + } + config->depth_stencil.used = !!(regs.rb_depthcontrol & (0x4 | 0x2)); + current_state_.render_pass = render_pass; current_state_.render_pass_handle = render_pass->handle; current_state_.framebuffer = framebuffer; @@ -550,10 +557,33 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, region.bufferRowLength = 0; region.bufferImageHeight = 0; region.imageOffset = {0, 0, 0}; + + // Depth + auto depth_target = current_state_.framebuffer->depth_stencil_attachment; + if (depth_target && current_state_.config.depth_stencil.used) { + region.imageSubresource = { + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; + region.bufferOffset = depth_target->key.tile_offset * 5120; + + // Wait for any potential copies to finish. + barrier.offset = region.bufferOffset; + barrier.size = depth_target->key.tile_width * 80 * + depth_target->key.tile_height * 16 * 4; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + + region.imageExtent = {depth_target->key.tile_width * 80u, + depth_target->key.tile_height * 16u, 1}; + vkCmdCopyBufferToImage(command_buffer, edram_buffer_, depth_target->image, + VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); + } + + // Color region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; for (int i = 0; i < 4; i++) { auto target = current_state_.framebuffer->color_attachments[i]; - if (!target) { + if (!target || !current_state_.config.color[i].used) { continue; } @@ -572,27 +602,6 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, vkCmdCopyBufferToImage(command_buffer, edram_buffer_, target->image, VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); } - - // Depth - auto depth_target = current_state_.framebuffer->depth_stencil_attachment; - if (depth_target) { - region.imageSubresource = { - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; - region.bufferOffset = depth_target->key.tile_offset * 5120; - - // Wait for any potential copies to finish. - barrier.offset = region.bufferOffset; - barrier.size = depth_target->key.tile_width * 80 * - depth_target->key.tile_height * 16 * 4; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - region.imageExtent = {depth_target->key.tile_width * 80u, - depth_target->key.tile_height * 16u, 1}; - vkCmdCopyBufferToImage(command_buffer, edram_buffer_, depth_target->image, - VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); - } } if (!render_pass) { return nullptr; @@ -809,10 +818,23 @@ void RenderCache::EndRenderPass() { region.bufferRowLength = 0; region.bufferImageHeight = 0; region.imageOffset = {0, 0, 0}; + // Depth/stencil + auto depth_target = current_state_.framebuffer->depth_stencil_attachment; + if (depth_target && current_state_.config.depth_stencil.used) { + region.imageSubresource = { + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; + region.bufferOffset = depth_target->key.tile_offset * 5120; + region.imageExtent = {depth_target->key.tile_width * 80u, + depth_target->key.tile_height * 16u, 1}; + vkCmdCopyImageToBuffer(current_command_buffer_, depth_target->image, + VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion); + } + + // Color region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; for (int i = 0; i < 4; i++) { auto target = current_state_.framebuffer->color_attachments[i]; - if (!target) { + if (!target || !current_state_.config.color[i].used) { continue; } @@ -823,18 +845,6 @@ void RenderCache::EndRenderPass() { VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion); } - // Depth/stencil - auto depth_target = current_state_.framebuffer->depth_stencil_attachment; - if (depth_target) { - region.imageSubresource = { - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; - region.bufferOffset = depth_target->key.tile_offset * 5120; - region.imageExtent = {depth_target->key.tile_width * 80u, - depth_target->key.tile_height * 16u, 1}; - vkCmdCopyImageToBuffer(current_command_buffer_, depth_target->image, - VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion); - } - current_command_buffer_ = nullptr; } @@ -845,24 +855,27 @@ void RenderCache::ClearCache() { void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base, VkImage image, VkImageLayout image_layout, - bool color_or_depth, int32_t offset_x, - int32_t offset_y, uint32_t width, - uint32_t height) { + bool color_or_depth, VkOffset3D offset, + VkExtent3D extents) { // Transition the texture into a transfer destination layout. VkImageMemoryBarrier image_barrier; image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_barrier.pNext = nullptr; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_barrier.oldLayout = image_layout; - image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.image = image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + if (image_layout != VK_IMAGE_LAYOUT_GENERAL && + image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + image_barrier.oldLayout = image_layout; + image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_barrier.image = image; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = + color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + } VkBufferMemoryBarrier buffer_barrier; buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -872,7 +885,8 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; buffer_barrier.buffer = edram_buffer_; buffer_barrier.offset = edram_base * 5120; - buffer_barrier.size = width * height * 4; // TODO: Calculate this accurately. + // TODO: Calculate this accurately (need texel size) + buffer_barrier.size = extents.width * extents.height * 4; vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, @@ -880,11 +894,11 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, // Issue the copy command. VkBufferImageCopy region; - region.bufferImageHeight = 0; region.bufferOffset = edram_base * 5120; + region.bufferImageHeight = 0; region.bufferRowLength = 0; - region.imageExtent = {width, height, 1}; - region.imageOffset = {offset_x, offset_y, 0}; + region.imageOffset = offset; + region.imageExtent = extents; region.imageSubresource = {0, 0, 0, 1}; region.imageSubresource.aspectMask = color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT @@ -893,12 +907,15 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, ®ion); // Transition the image back into its previous layout. - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = 0; - std::swap(image_barrier.oldLayout, image_barrier.newLayout); - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); + if (image_layout != VK_IMAGE_LAYOUT_GENERAL && + image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + image_barrier.srcAccessMask = image_barrier.dstAccessMask; + image_barrier.dstAccessMask = 0; + std::swap(image_barrier.oldLayout, image_barrier.newLayout); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); + } } bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 13397bf1b..d1aad23de 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -82,11 +82,13 @@ struct RenderConfiguration { MsaaSamples surface_msaa; // Color attachments for the 4 render targets. struct { + bool used; uint32_t edram_base; ColorRenderTargetFormat format; } color[4]; // Depth/stencil attachment. struct { + bool used; uint32_t edram_base; DepthRenderTargetFormat format; } depth_stencil; @@ -262,8 +264,8 @@ class RenderCache { // Queues commands to copy EDRAM contents into an image. void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base, VkImage image, VkImageLayout image_layout, - bool color_or_depth, int32_t offset_x, int32_t offset_y, - uint32_t width, uint32_t height); + bool color_or_depth, VkOffset3D offset, + VkExtent3D extents); private: // Parses the current state into a configuration object. @@ -309,6 +311,7 @@ class RenderCache { uint32_t rb_color1_info; uint32_t rb_color2_info; uint32_t rb_color3_info; + uint32_t rb_depthcontrol; uint32_t rb_depth_info; uint32_t pa_sc_window_scissor_tl; uint32_t pa_sc_window_scissor_br; From 5ba04b9e55d76eb82bbadc8fee5e4056743b3e53 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 12 Mar 2016 11:49:59 -0600 Subject: [PATCH 078/145] RenderCache::ClearEDRAMColor/ClearEDRAMDepthStencil --- src/xenia/gpu/vulkan/render_cache.cc | 84 +++++++++++++++++++++++++++- src/xenia/gpu/vulkan/render_cache.h | 11 ++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 727fa59e2..379b3893f 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -531,6 +531,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, return nullptr; } + // Speculatively see if targets are actually used so we can skip copies for (int i = 0; i < 4; i++) { config->color[i].used = pixel_shader->writes_color_target(i); } @@ -646,7 +647,6 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) { // RB_SURFACE_INFO // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html config->surface_pitch_px = regs.rb_surface_info & 0x3FFF; - // config->surface_height_px = (regs.rb_surface_info >> 18) & 0x3FFF; config->surface_msaa = static_cast((regs.rb_surface_info >> 16) & 0x3); @@ -814,6 +814,11 @@ void RenderCache::EndRenderPass() { // Don't bother waiting on this command to complete, as next render pass may // reuse previous framebuffer attachments. If they need this, they will wait. // TODO: Should we bother re-tiling the images on copy back? + // + // FIXME: There's a case where we may have a really big render target (as we + // can't get the correct height atm) and we may end up overwriting the valid + // contents of another render target by mistake! Need to reorder copy commands + // to avoid this. VkBufferImageCopy region; region.bufferRowLength = 0; region.bufferImageHeight = 0; @@ -918,6 +923,83 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, } } +void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, + uint32_t edram_base, + ColorRenderTargetFormat format, + uint32_t pitch, uint32_t height, + float* color) { + // Grab a tile view (as we need to clear an image first) + TileViewKey key; + key.color_or_depth = 1; + key.edram_format = static_cast(format); + key.tile_offset = edram_base; + key.tile_width = pitch / 80; + key.tile_height = height / 16; + auto tile_view = FindOrCreateTileView(command_buffer, key); + assert_not_null(tile_view); + + VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VkClearColorValue clear_value; + std::memcpy(clear_value.float32, color, sizeof(float) * 4); + + // Issue a clear command + vkCmdClearColorImage(command_buffer, tile_view->image, + VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); + + // Copy image back into EDRAM buffer + VkBufferImageCopy copy_range; + copy_range.bufferOffset = edram_base * 5120; + copy_range.bufferImageHeight = 0; + copy_range.bufferRowLength = 0; + copy_range.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; + copy_range.imageOffset = {0, 0, 0}; + vkCmdCopyImageToBuffer(command_buffer, tile_view->image, + VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, + ©_range); +} + +void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, + uint32_t edram_base, + DepthRenderTargetFormat format, + uint32_t pitch, uint32_t height, + float depth, uint32_t stencil) { + // Grab a tile view (as we need to clear an image first) + TileViewKey key; + key.color_or_depth = 0; + key.edram_format = static_cast(format); + key.tile_offset = edram_base; + key.tile_width = pitch / 80; + key.tile_height = height / 16; + auto tile_view = FindOrCreateTileView(command_buffer, key); + assert_not_null(tile_view); + + VkImageSubresourceRange range = { + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1, + }; + VkClearDepthStencilValue clear_value; + clear_value.depth = depth; + clear_value.stencil = stencil; + + // Issue a clear command + vkCmdClearDepthStencilImage(command_buffer, tile_view->image, + VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); + + // Copy image back into EDRAM buffer + VkBufferImageCopy copy_range; + copy_range.bufferOffset = edram_base * 5120; + copy_range.bufferImageHeight = 0; + copy_range.bufferRowLength = 0; + copy_range.imageSubresource = { + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1, + }; + copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; + copy_range.imageOffset = {0, 0, 0}; + vkCmdCopyImageToBuffer(command_buffer, tile_view->image, + VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, + ©_range); +} + bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { uint32_t value = register_file_->values[register_name].u32; if (*dest == value) { diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index d1aad23de..4de9d0e72 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -12,6 +12,7 @@ #include "xenia/gpu/register_file.h" #include "xenia/gpu/shader.h" +#include "xenia/gpu/texture_info.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan.h" @@ -267,6 +268,16 @@ class RenderCache { bool color_or_depth, VkOffset3D offset, VkExtent3D extents); + // Queues commands to clear EDRAM contents with a solid color + void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, + ColorRenderTargetFormat format, uint32_t pitch, + uint32_t height, float* color); + // Queues commands to clear EDRAM contents with depth/stencil values. + void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, + uint32_t edram_base, + DepthRenderTargetFormat format, uint32_t pitch, + uint32_t height, float depth, uint32_t stencil); + private: // Parses the current state into a configuration object. bool ParseConfiguration(RenderConfiguration* config); From 822d61c3d96b35faa6e94f0c78139aeea97c43aa Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 12 Mar 2016 22:03:11 -0600 Subject: [PATCH 079/145] Fix a few stale data usage bugs in the pipeline cache. Hook up part of depth/stencil tests/writes --- src/xenia/gpu/vulkan/pipeline_cache.cc | 51 ++++++++++++++++++++------ 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 542329af5..b69aa0243 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -291,8 +291,8 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, pipeline_info.basePipelineHandle = nullptr; pipeline_info.basePipelineIndex = 0; VkPipeline pipeline = nullptr; - auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, - nullptr, &pipeline); + auto err = vkCreateGraphicsPipelines(device_, pipeline_cache_, 1, + &pipeline_info, nullptr, &pipeline); CheckResult(err, "vkCreateGraphicsPipelines"); // Add to cache with the hash key for reuse. @@ -338,6 +338,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, bool window_offset_dirty = SetShadowRegister(®s.pa_sc_window_offset, XE_GPU_REG_PA_SC_WINDOW_OFFSET); + window_offset_dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); // Window parameters. // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h @@ -660,13 +662,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( dirty |= regs.vertex_shader != vertex_shader; dirty |= regs.pixel_shader != pixel_shader; dirty |= regs.primitive_type != primitive_type; + regs.vertex_shader = vertex_shader; + regs.pixel_shader = pixel_shader; + regs.primitive_type = primitive_type; XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } - regs.vertex_shader = vertex_shader; - regs.pixel_shader = pixel_shader; - regs.primitive_type = primitive_type; update_shader_stages_stage_count_ = 0; @@ -723,11 +725,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( bool dirty = false; dirty |= vertex_shader != regs.vertex_shader; + regs.vertex_shader = vertex_shader; XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } - regs.vertex_shader = vertex_shader; state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; state_info.pNext = nullptr; @@ -843,11 +845,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState( XE_GPU_REG_PA_SU_SC_MODE_CNTL); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + regs.primitive_type = primitive_type; XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; } - regs.primitive_type = primitive_type; state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; @@ -1038,11 +1040,38 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { state_info.pNext = nullptr; state_info.flags = 0; - state_info.depthTestEnable = VK_FALSE; - state_info.depthWriteEnable = VK_FALSE; - state_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; + static const VkCompareOp compare_func_map[] = { + /* 0 */ VK_COMPARE_OP_NEVER, + /* 1 */ VK_COMPARE_OP_LESS, + /* 2 */ VK_COMPARE_OP_EQUAL, + /* 3 */ VK_COMPARE_OP_LESS_OR_EQUAL, + /* 4 */ VK_COMPARE_OP_GREATER, + /* 5 */ VK_COMPARE_OP_NOT_EQUAL, + /* 6 */ VK_COMPARE_OP_GREATER_OR_EQUAL, + /* 7 */ VK_COMPARE_OP_ALWAYS, + }; + static const VkStencilOp stencil_op_map[] = { + /* 0 */ VK_STENCIL_OP_KEEP, + /* 1 */ VK_STENCIL_OP_ZERO, + /* 2 */ VK_STENCIL_OP_REPLACE, + /* 3 */ VK_STENCIL_OP_INCREMENT_AND_WRAP, + /* 4 */ VK_STENCIL_OP_DECREMENT_AND_WRAP, + /* 5 */ VK_STENCIL_OP_INVERT, + /* 6 */ VK_STENCIL_OP_INCREMENT_AND_CLAMP, + /* 7 */ VK_STENCIL_OP_DECREMENT_AND_CLAMP, + }; + + // Depth state + // TODO: EARLY_Z_ENABLE (needs to be enabled in shaders) + state_info.depthWriteEnable = !!(regs.rb_depthcontrol & 0x4); + state_info.depthTestEnable = !!(regs.rb_depthcontrol & 0x2); + state_info.stencilTestEnable = !!(regs.rb_depthcontrol & 0x1); + + state_info.depthCompareOp = + compare_func_map[(regs.rb_depthcontrol & 0x70) >> 4]; state_info.depthBoundsTestEnable = VK_FALSE; - state_info.stencilTestEnable = VK_FALSE; + + // Stencil state state_info.front.failOp = VK_STENCIL_OP_KEEP; state_info.front.passOp = VK_STENCIL_OP_KEEP; state_info.front.depthFailOp = VK_STENCIL_OP_KEEP; From 245102e9e5fbf1bd8f68d51a8901f2866d25794e Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 12 Mar 2016 22:04:32 -0600 Subject: [PATCH 080/145] RenderCache::BlitToImage --- src/xenia/gpu/vulkan/render_cache.cc | 124 +++++++++++++++++++++++++-- src/xenia/gpu/vulkan/render_cache.h | 7 ++ 2 files changed, 125 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 379b3893f..5047bff21 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -508,12 +508,12 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + regs.rb_depthcontrol = register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32; if (!dirty && current_state_.render_pass) { // No registers have changed so we can reuse the previous render pass - // just begin with what we had. @@ -880,6 +880,10 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); } VkBufferMemoryBarrier buffer_barrier; @@ -895,7 +899,7 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &buffer_barrier, 1, &image_barrier); + &buffer_barrier, 0, nullptr); // Issue the copy command. VkBufferImageCopy region; @@ -923,6 +927,114 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, } } +void RenderCache::BlitToImage(VkCommandBuffer command_buffer, + uint32_t edram_base, uint32_t pitch, + uint32_t height, VkImage image, + VkImageLayout image_layout, bool color_or_depth, + uint32_t format, VkFilter filter, + VkOffset3D offset, VkExtent3D extents) { + // Grab a tile view that represents the source image. + TileViewKey key; + key.color_or_depth = color_or_depth ? 1 : 0; + key.edram_format = format; + key.tile_offset = edram_base; + key.tile_width = xe::round_up(pitch, 80) / 80; + key.tile_height = xe::round_up(height, 16) / 16; + auto tile_view = FindOrCreateTileView(command_buffer, key); + assert_not_null(tile_view); + + // Issue a memory barrier before we update this tile view. + VkBufferMemoryBarrier buffer_barrier; + buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + buffer_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_barrier.buffer = edram_buffer_; + buffer_barrier.offset = edram_base * 5120; + // TODO: Calculate this accurately (need texel size) + buffer_barrier.size = extents.width * extents.height * 4; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &buffer_barrier, 0, nullptr); + + // Update the tile view with current EDRAM contents. + VkBufferImageCopy buffer_copy; + buffer_copy.bufferOffset = edram_base * 5120; + buffer_copy.bufferImageHeight = 0; + buffer_copy.bufferRowLength = 0; + buffer_copy.imageSubresource = {0, 0, 0, 1}; + buffer_copy.imageSubresource.aspectMask = + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + buffer_copy.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; + buffer_copy.imageOffset = {0, 0, 0}; + vkCmdCopyBufferToImage(command_buffer, edram_buffer_, tile_view->image, + VK_IMAGE_LAYOUT_GENERAL, 1, &buffer_copy); + + // Transition the image into a transfer destination layout, if needed. + // TODO: Util function for this + VkImageMemoryBarrier image_barrier; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + if (image_layout != VK_IMAGE_LAYOUT_GENERAL && + image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + image_barrier.oldLayout = image_layout; + image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_barrier.image = image; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = + color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); + } + + // If we overflow we'll lose the device here. + assert_true(extents.width <= key.tile_width * 80u); + assert_true(extents.height <= key.tile_height * 16u); + + // Now issue the blit to the destination. + VkImageBlit image_blit; + image_blit.srcSubresource = {0, 0, 0, 1}; + image_blit.srcSubresource.aspectMask = + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + image_blit.srcOffsets[0] = {0, 0, 0}; + image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height), + int32_t(extents.depth)}; + + image_blit.dstSubresource = {0, 0, 0, 1}; + image_blit.dstSubresource.aspectMask = + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + image_blit.dstOffsets[0] = offset; + image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width), + offset.y + int32_t(extents.height), + offset.z + int32_t(extents.depth)}; + vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL, + image, image_layout, 1, &image_blit, filter); + + // Transition the image back into its previous layout. + if (image_layout != VK_IMAGE_LAYOUT_GENERAL && + image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + image_barrier.srcAccessMask = image_barrier.dstAccessMask; + image_barrier.dstAccessMask = 0; + std::swap(image_barrier.oldLayout, image_barrier.newLayout); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); + } +} + void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, ColorRenderTargetFormat format, @@ -933,8 +1045,8 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, key.color_or_depth = 1; key.edram_format = static_cast(format); key.tile_offset = edram_base; - key.tile_width = pitch / 80; - key.tile_height = height / 16; + key.tile_width = xe::round_up(pitch, 80) / 80; + key.tile_height = xe::round_up(height, 16) / 16; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); @@ -969,8 +1081,8 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, key.color_or_depth = 0; key.edram_format = static_cast(format); key.tile_offset = edram_base; - key.tile_width = pitch / 80; - key.tile_height = height / 16; + key.tile_width = xe::round_up(pitch, 80) / 80; + key.tile_height = xe::round_up(height, 16) / 16; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 4de9d0e72..97816c365 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -268,6 +268,13 @@ class RenderCache { bool color_or_depth, VkOffset3D offset, VkExtent3D extents); + // Queues commands to blit EDRAM contents into an image. + void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, + uint32_t pitch, uint32_t height, VkImage image, + VkImageLayout image_layout, bool color_or_depth, + uint32_t format, VkFilter filter, VkOffset3D offset, + VkExtent3D extents); + // Queues commands to clear EDRAM contents with a solid color void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, ColorRenderTargetFormat format, uint32_t pitch, From 54f89825d978968250abdde12817f7e522fc618d Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 15 Mar 2016 00:30:39 -0500 Subject: [PATCH 081/145] SPIR-V Dp2Add/Dp3 --- src/xenia/gpu/spirv_shader_translator.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 3f991baa8..28158ed20 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -773,6 +773,26 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( // TODO } break; + case AluVectorOpcode::kDp2Add: { + auto src0_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_, + {sources[0], sources[0], 0, 1}); + auto src1_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_, + {sources[1], sources[1], 0, 1}); + auto src2_x = b.createCompositeExtract(sources[2], float_type_, 0); + auto dot = b.createBinOp(spv::Op::OpDot, float_type_, src0_xy, src1_xy); + dest = b.createBinOp(spv::Op::OpFAdd, float_type_, dot, src2_x); + dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); + } break; + + case AluVectorOpcode::kDp3: { + auto src0_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_, + {sources[0], sources[0], 0, 1, 2}); + auto src1_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_, + {sources[1], sources[1], 0, 1, 2}); + auto dot = b.createBinOp(spv::Op::OpDot, float_type_, src0_xyz, src1_xyz); + dest = b.smearScalar(spv::NoPrecision, dot, vec4_float_type_); + } break; + case AluVectorOpcode::kDp4: { dest = b.createBinOp(spv::Op::OpDot, float_type_, sources[0], sources[1]); } break; @@ -1050,9 +1070,11 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( } break; default: + assert_unhandled_case(instr.vector_opcode); break; } + assert_not_zero(dest); if (dest) { // If predicated, discard the result from the instruction. Id pv_dest = dest; @@ -1477,9 +1499,11 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; default: + assert_unhandled_case(instr.scalar_opcode); break; } + assert_not_zero(dest); if (dest) { // If predicated, discard the result from the instruction. Id ps_dest = dest; From 1831e7a936b0acd866a2771d0bbf7ddc27cfdcb4 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 16 Mar 2016 14:45:40 -0500 Subject: [PATCH 082/145] Pipeline stencil state --- src/xenia/gpu/vulkan/pipeline_cache.cc | 32 ++++++++++++++++++-------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index b69aa0243..ca7c37b46 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -1068,18 +1068,32 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { state_info.stencilTestEnable = !!(regs.rb_depthcontrol & 0x1); state_info.depthCompareOp = - compare_func_map[(regs.rb_depthcontrol & 0x70) >> 4]; + compare_func_map[(regs.rb_depthcontrol >> 4) & 0x7]; state_info.depthBoundsTestEnable = VK_FALSE; + uint32_t stencil_ref = (regs.rb_stencilrefmask & 0x000000FF); + uint32_t stencil_read_mask = (regs.rb_stencilrefmask & 0x0000FF00) >> 8; + // Stencil state - state_info.front.failOp = VK_STENCIL_OP_KEEP; - state_info.front.passOp = VK_STENCIL_OP_KEEP; - state_info.front.depthFailOp = VK_STENCIL_OP_KEEP; - state_info.front.compareOp = VK_COMPARE_OP_ALWAYS; - state_info.back.failOp = VK_STENCIL_OP_KEEP; - state_info.back.passOp = VK_STENCIL_OP_KEEP; - state_info.back.depthFailOp = VK_STENCIL_OP_KEEP; - state_info.back.compareOp = VK_COMPARE_OP_ALWAYS; + state_info.front.compareOp = + compare_func_map[(regs.rb_depthcontrol >> 8) & 0x7]; + state_info.front.failOp = stencil_op_map[(regs.rb_depthcontrol >> 11) & 0x7]; + state_info.front.passOp = stencil_op_map[(regs.rb_depthcontrol >> 14) & 0x7]; + state_info.front.depthFailOp = + stencil_op_map[(regs.rb_depthcontrol >> 17) & 0x7]; + + // BACKFACE_ENABLE + if (!!(regs.rb_depthcontrol & 0x80)) { + state_info.back.compareOp = + compare_func_map[(regs.rb_depthcontrol >> 20) & 0x7]; + state_info.back.failOp = stencil_op_map[(regs.rb_depthcontrol >> 23) & 0x7]; + state_info.back.passOp = stencil_op_map[(regs.rb_depthcontrol >> 26) & 0x7]; + state_info.back.depthFailOp = + stencil_op_map[(regs.rb_depthcontrol >> 29) & 0x7]; + } else { + // Back state is identical to front state. + std::memcpy(&state_info.back, &state_info.front, sizeof(VkStencilOpState)); + } // Ignored; set dynamically. state_info.minDepthBounds = 0; From 0e3c113375bb5ba88a863e1127cfa17190b8f195 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 17 Mar 2016 21:55:16 -0500 Subject: [PATCH 083/145] Physical write watches -> access watches (read and/or write watching) --- src/xenia/cpu/mmio_handler.cc | 157 ++++++++++++++++++----------- src/xenia/cpu/mmio_handler.h | 36 ++++--- src/xenia/gpu/gl4/texture_cache.cc | 55 +++++++--- src/xenia/gpu/gl4/texture_cache.h | 8 +- src/xenia/memory.cc | 25 +++-- src/xenia/memory.h | 11 +- 6 files changed, 189 insertions(+), 103 deletions(-) diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index e5412d8e7..3edd9703e 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -87,13 +87,12 @@ bool MMIOHandler::CheckStore(uint32_t virtual_address, uint32_t value) { return false; } -uintptr_t MMIOHandler::AddPhysicalWriteWatch(uint32_t guest_address, - size_t length, - WriteWatchCallback callback, - void* callback_context, - void* callback_data) { - uint32_t base_address = guest_address; - assert_true(base_address < 0x1FFFFFFF); +uintptr_t MMIOHandler::AddPhysicalAccessWatch(uint32_t guest_address, + size_t length, WatchType type, + AccessWatchCallback callback, + void* callback_context, + void* callback_data) { + uint32_t base_address = guest_address & 0x1FFFFFFF; // Can only protect sizes matching system page size. // This means we need to round up, which will cause spurious access @@ -103,32 +102,45 @@ uintptr_t MMIOHandler::AddPhysicalWriteWatch(uint32_t guest_address, xe::memory::page_size()); base_address = base_address - (base_address % xe::memory::page_size()); + auto lock = global_critical_region_.Acquire(); + // Add to table. The slot reservation may evict a previous watch, which // could include our target, so we do it first. - auto entry = new WriteWatchEntry(); + auto entry = new AccessWatchEntry(); entry->address = base_address; entry->length = uint32_t(length); entry->callback = callback; entry->callback_context = callback_context; entry->callback_data = callback_data; - global_critical_region_.mutex().lock(); - write_watches_.push_back(entry); - global_critical_region_.mutex().unlock(); + access_watches_.push_back(entry); - // Make the desired range read only under all address spaces. + auto page_access = memory::PageAccess::kNoAccess; + switch (type) { + case kWatchWrite: + page_access = memory::PageAccess::kReadOnly; + break; + case kWatchReadWrite: + page_access = memory::PageAccess::kNoAccess; + break; + default: + assert_unhandled_case(type); + break; + } + + // Protect the range under all address spaces memory::Protect(physical_membase_ + entry->address, entry->length, - xe::memory::PageAccess::kReadOnly, nullptr); + page_access, nullptr); memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length, - xe::memory::PageAccess::kReadOnly, nullptr); + page_access, nullptr); memory::Protect(virtual_membase_ + 0xC0000000 + entry->address, entry->length, - xe::memory::PageAccess::kReadOnly, nullptr); + page_access, nullptr); memory::Protect(virtual_membase_ + 0xE0000000 + entry->address, entry->length, - xe::memory::PageAccess::kReadOnly, nullptr); + page_access, nullptr); return reinterpret_cast(entry); } -void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) { +void MMIOHandler::ClearAccessWatch(AccessWatchEntry* entry) { memory::Protect(physical_membase_ + entry->address, entry->length, xe::memory::PageAccess::kReadWrite, nullptr); memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length, @@ -139,19 +151,20 @@ void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) { xe::memory::PageAccess::kReadWrite, nullptr); } -void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) { - auto entry = reinterpret_cast(watch_handle); +void MMIOHandler::CancelAccessWatch(uintptr_t watch_handle) { + auto entry = reinterpret_cast(watch_handle); + auto lock = global_critical_region_.Acquire(); // Allow access to the range again. - ClearWriteWatch(entry); + ClearAccessWatch(entry); // Remove from table. - global_critical_region_.mutex().lock(); - auto it = std::find(write_watches_.begin(), write_watches_.end(), entry); - if (it != write_watches_.end()) { - write_watches_.erase(it); + auto it = std::find(access_watches_.begin(), access_watches_.end(), entry); + assert_false(it == access_watches_.end()); + + if (it != access_watches_.end()) { + access_watches_.erase(it); } - global_critical_region_.mutex().unlock(); delete entry; } @@ -159,18 +172,19 @@ void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) { void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) { auto lock = global_critical_region_.Acquire(); - for (auto it = write_watches_.begin(); it != write_watches_.end();) { + for (auto it = access_watches_.begin(); it != access_watches_.end();) { auto entry = *it; if ((entry->address <= physical_address && entry->address + entry->length > physical_address) || (entry->address >= physical_address && entry->address < physical_address + length)) { // This watch lies within the range. End it. - ClearWriteWatch(entry); + ClearAccessWatch(entry); entry->callback(entry->callback_context, entry->callback_data, entry->address); - it = write_watches_.erase(it); + it = access_watches_.erase(it); + delete entry; continue; } @@ -178,50 +192,49 @@ void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) { } } -bool MMIOHandler::CheckWriteWatch(uint64_t fault_address) { - uint32_t physical_address = uint32_t(fault_address); - if (physical_address > 0x1FFFFFFF) { - physical_address &= 0x1FFFFFFF; - } - std::list pending_invalidates; - global_critical_region_.mutex().lock(); - // Now that we hold the lock, recheck and see if the pages are still - // protected. - memory::PageAccess cur_access; - size_t page_length = memory::page_size(); - memory::QueryProtect((void*)fault_address, page_length, cur_access); - if (cur_access != memory::PageAccess::kReadOnly && - cur_access != memory::PageAccess::kNoAccess) { - // Another thread has cleared this write watch. Abort. - global_critical_region_.mutex().unlock(); - return true; +bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) { + auto lock = global_critical_region_.Acquire(); + + for (auto it = access_watches_.begin(); it != access_watches_.end(); ++it) { + auto entry = *it; + if ((entry->address <= physical_address && + entry->address + entry->length > physical_address) || + (entry->address >= physical_address && + entry->address < physical_address + length)) { + // This watch lies within the range. + return true; + } } - for (auto it = write_watches_.begin(); it != write_watches_.end();) { + return false; +} + +bool MMIOHandler::CheckAccessWatch(uint32_t physical_address) { + auto lock = global_critical_region_.Acquire(); + + bool hit = false; + for (auto it = access_watches_.begin(); it != access_watches_.end();) { auto entry = *it; if (entry->address <= physical_address && entry->address + entry->length > physical_address) { - // Hit! Remove the writewatch. - pending_invalidates.push_back(entry); + // Hit! Remove the watch. + hit = true; + ClearAccessWatch(entry); + entry->callback(entry->callback_context, entry->callback_data, + physical_address); - ClearWriteWatch(entry); - it = write_watches_.erase(it); + it = access_watches_.erase(it); + delete entry; continue; } ++it; } - global_critical_region_.mutex().unlock(); - if (pending_invalidates.empty()) { + + if (!hit) { // Rethrow access violation - range was not being watched. return false; } - while (!pending_invalidates.empty()) { - auto entry = pending_invalidates.back(); - pending_invalidates.pop_back(); - entry->callback(entry->callback_context, entry->callback_data, - physical_address); - delete entry; - } + // Range was watched, so lets eat this access violation. return true; } @@ -414,9 +427,33 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) { } } if (!range) { + auto fault_address = reinterpret_cast(ex->fault_address()); + uint32_t guest_address = 0; + if (fault_address >= virtual_membase_ && + fault_address < physical_membase_) { + // Faulting on a virtual address. + guest_address = static_cast(ex->fault_address()) & 0x1FFFFFFF; + } else { + // Faulting on a physical address. + guest_address = static_cast(ex->fault_address()); + } + + // HACK: Recheck if the pages are still protected (race condition - another + // thread clears the writewatch we just hit) + // Do this under the lock so we don't introduce another race condition. + auto lock = global_critical_region_.Acquire(); + memory::PageAccess cur_access; + size_t page_length = memory::page_size(); + memory::QueryProtect((void*)fault_address, page_length, cur_access); + if (cur_access != memory::PageAccess::kReadOnly && + cur_access != memory::PageAccess::kNoAccess) { + // Another thread has cleared this write watch. Abort. + return true; + } + // Access is not found within any range, so fail and let the caller handle // it (likely by aborting). - return CheckWriteWatch(ex->fault_address()); + return CheckAccessWatch(guest_address); } auto rip = ex->pc(); diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index 70d89ac02..bb8cd665f 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -28,9 +28,8 @@ typedef uint32_t (*MMIOReadCallback)(void* ppc_context, void* callback_context, uint32_t addr); typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context, uint32_t addr, uint32_t value); - -typedef void (*WriteWatchCallback)(void* context_ptr, void* data_ptr, - uint32_t address); +typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr, + uint32_t address); struct MMIORange { uint32_t address; @@ -46,6 +45,12 @@ class MMIOHandler { public: virtual ~MMIOHandler(); + enum WatchType { + kWatchInvalid = 0, + kWatchWrite = 1, + kWatchReadWrite = 2, + }; + static std::unique_ptr Install(uint8_t* virtual_membase, uint8_t* physical_membase, uint8_t* membase_end); @@ -59,17 +64,24 @@ class MMIOHandler { bool CheckLoad(uint32_t virtual_address, uint32_t* out_value); bool CheckStore(uint32_t virtual_address, uint32_t value); - uintptr_t AddPhysicalWriteWatch(uint32_t guest_address, size_t length, - WriteWatchCallback callback, - void* callback_context, void* callback_data); - void CancelWriteWatch(uintptr_t watch_handle); + // Memory watches: These are one-shot alarms that fire a callback (in the + // context of the thread that caused the callback) when a memory range is + // either written to or read from, depending on the watch type. These fire as + // soon as a read/write happens, and only fire once. + // These watches may be spuriously fired if memory is accessed nearby. + uintptr_t AddPhysicalAccessWatch(uint32_t guest_address, size_t length, + WatchType type, AccessWatchCallback callback, + void* callback_context, void* callback_data); + void CancelAccessWatch(uintptr_t watch_handle); void InvalidateRange(uint32_t physical_address, size_t length); + bool IsRangeWatched(uint32_t physical_address, size_t length); protected: - struct WriteWatchEntry { + struct AccessWatchEntry { uint32_t address; uint32_t length; - WriteWatchCallback callback; + WatchType type; + AccessWatchCallback callback; void* callback_context; void* callback_data; }; @@ -83,8 +95,8 @@ class MMIOHandler { static bool ExceptionCallbackThunk(Exception* ex, void* data); bool ExceptionCallback(Exception* ex); - void ClearWriteWatch(WriteWatchEntry* entry); - bool CheckWriteWatch(uint64_t fault_address); + void ClearAccessWatch(AccessWatchEntry* entry); + bool CheckAccessWatch(uint32_t guest_address); uint8_t* virtual_membase_; uint8_t* physical_membase_; @@ -94,7 +106,7 @@ class MMIOHandler { xe::global_critical_region global_critical_region_; // TODO(benvanik): data structure magic. - std::list write_watches_; + std::list access_watches_; static MMIOHandler* global_handler_; }; diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index 4a8917e71..72e1c9639 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -427,7 +427,7 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( // Not found, create. auto entry = std::make_unique(); entry->texture_info = texture_info; - entry->write_watch_handle = 0; + entry->access_watch_handle = 0; entry->pending_invalidation = false; entry->handle = 0; @@ -442,6 +442,7 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( // Found! Acquire the handle and remove the readbuffer entry. read_buffer_textures_.erase(it); entry->handle = read_buffer_entry->handle; + entry->access_watch_handle = read_buffer_entry->access_watch_handle; delete read_buffer_entry; // TODO(benvanik): set more texture properties? swizzle/etc? auto entry_ptr = entry.get(); @@ -495,14 +496,15 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( // Add a write watch. If any data in the given range is touched we'll get a // callback and evict the texture. We could reuse the storage, though the // driver is likely in a better position to pool that kind of stuff. - entry->write_watch_handle = memory_->AddPhysicalWriteWatch( + entry->access_watch_handle = memory_->AddPhysicalAccessWatch( texture_info.guest_address, texture_info.input_length, + cpu::MMIOHandler::kWatchWrite, [](void* context_ptr, void* data_ptr, uint32_t address) { auto self = reinterpret_cast(context_ptr); auto touched_entry = reinterpret_cast(data_ptr); // Clear watch handle first so we don't redundantly // remove. - touched_entry->write_watch_handle = 0; + touched_entry->access_watch_handle = 0; touched_entry->pending_invalidation = true; // Add to pending list so Scavenge will clean it up. self->invalidated_textures_mutex_.lock(); @@ -574,14 +576,27 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address, dest_rect, GL_LINEAR, swap_channels); } - // HACK: remove texture from write watch list so readback won't kill us. - // Not needed now, as readback is disabled. - /* - if (texture_entry->write_watch_handle) { - memory_->CancelWriteWatch(texture_entry->write_watch_handle); - texture_entry->write_watch_handle = 0; + // Setup a read/write access watch. If the game tries to touch the memory + // we were supposed to populate with this texture, then we'll actually + // populate it. + if (texture_entry->access_watch_handle) { + memory_->CancelAccessWatch(texture_entry->access_watch_handle); + texture_entry->access_watch_handle = 0; } - //*/ + + texture_entry->access_watch_handle = memory_->AddPhysicalAccessWatch( + guest_address, texture_entry->texture_info.input_length, + cpu::MMIOHandler::kWatchReadWrite, + [](void* context, void* data, uint32_t address) { + auto touched_entry = reinterpret_cast(data); + touched_entry->access_watch_handle = 0; + + // This happens. RDR resolves to a texture then upsizes it, BF1943 + // writes to a resolved texture. + // TODO (for Vulkan): Copy this texture back into system memory. + // assert_always(); + }, + nullptr, texture_entry); return texture_entry->handle; } @@ -618,6 +633,20 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address, entry->block_height = block_height; entry->format = format; + entry->access_watch_handle = memory_->AddPhysicalAccessWatch( + guest_address, block_height * block_width * 4, + cpu::MMIOHandler::kWatchReadWrite, + [](void* context, void* data, uint32_t address) { + auto entry = reinterpret_cast(data); + entry->access_watch_handle = 0; + + // This happens. RDR resolves to a texture then upsizes it, BF1943 + // writes to a resolved texture. + // TODO (for Vulkan): Copy this texture back into system memory. + // assert_always(); + }, + nullptr, entry.get()); + glCreateTextures(GL_TEXTURE_2D, 1, &entry->handle); glTextureParameteri(entry->handle, GL_TEXTURE_BASE_LEVEL, 0); glTextureParameteri(entry->handle, GL_TEXTURE_MAX_LEVEL, 1); @@ -636,9 +665,9 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address, } void TextureCache::EvictTexture(TextureEntry* entry) { - if (entry->write_watch_handle) { - memory_->CancelWriteWatch(entry->write_watch_handle); - entry->write_watch_handle = 0; + if (entry->access_watch_handle) { + memory_->CancelAccessWatch(entry->access_watch_handle); + entry->access_watch_handle = 0; } for (auto& view : entry->views) { diff --git a/src/xenia/gpu/gl4/texture_cache.h b/src/xenia/gpu/gl4/texture_cache.h index d214dac53..d55aa37a1 100644 --- a/src/xenia/gpu/gl4/texture_cache.h +++ b/src/xenia/gpu/gl4/texture_cache.h @@ -44,7 +44,7 @@ class TextureCache { }; struct TextureEntry { TextureInfo texture_info; - uintptr_t write_watch_handle; + uintptr_t access_watch_handle; GLuint handle; bool pending_invalidation; std::vector> views; @@ -74,8 +74,12 @@ class TextureCache { TextureFormat format, bool swap_channels, GLuint src_texture, Rect2D src_rect, Rect2D dest_rect); + TextureEntry* LookupAddress(uint32_t guest_address, uint32_t width, + uint32_t height, TextureFormat format); + private: struct ReadBufferTexture { + uintptr_t access_watch_handle; uint32_t guest_address; uint32_t logical_width; uint32_t logical_height; @@ -90,8 +94,6 @@ class TextureCache { void EvictSampler(SamplerEntry* entry); TextureEntry* LookupOrInsertTexture(const TextureInfo& texture_info, uint64_t opt_hash = 0); - TextureEntry* LookupAddress(uint32_t guest_address, uint32_t width, - uint32_t height, TextureFormat format); void EvictTexture(TextureEntry* entry); bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info); diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index d7507df23..5dcf5bfa8 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -376,17 +376,19 @@ cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) { return mmio_handler_->LookupRange(virtual_address); } -uintptr_t Memory::AddPhysicalWriteWatch(uint32_t physical_address, - uint32_t length, - cpu::WriteWatchCallback callback, - void* callback_context, - void* callback_data) { - return mmio_handler_->AddPhysicalWriteWatch( - physical_address, length, callback, callback_context, callback_data); +uintptr_t Memory::AddPhysicalAccessWatch(uint32_t physical_address, + uint32_t length, + cpu::MMIOHandler::WatchType type, + cpu::AccessWatchCallback callback, + void* callback_context, + void* callback_data) { + return mmio_handler_->AddPhysicalAccessWatch(physical_address, length, type, + callback, callback_context, + callback_data); } -void Memory::CancelWriteWatch(uintptr_t watch_handle) { - mmio_handler_->CancelWriteWatch(watch_handle); +void Memory::CancelAccessWatch(uintptr_t watch_handle) { + mmio_handler_->CancelAccessWatch(watch_handle); } uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, @@ -453,6 +455,7 @@ bool Memory::Save(ByteStream* stream) { } bool Memory::Restore(ByteStream* stream) { + XELOGD("Restoring memory..."); heaps_.v00000000.Restore(stream); heaps_.v40000000.Restore(stream); heaps_.v80000000.Restore(stream); @@ -577,6 +580,8 @@ bool BaseHeap::Save(ByteStream* stream) { } bool BaseHeap::Restore(ByteStream* stream) { + XELOGD("Heap %.8X-%.8X", heap_base_, heap_base_ + heap_size_); + for (size_t i = 0; i < page_table_.size(); i++) { auto& page = page_table_[i]; page.qword = stream->Read(); @@ -897,7 +902,7 @@ bool BaseHeap::Release(uint32_t base_address, uint32_t* out_region_size) { auto base_page_entry = page_table_[base_page_number]; if (base_page_entry.base_address != base_page_number) { XELOGE("BaseHeap::Release failed because address is not a region start"); - // return false; + return false; } if (out_region_size) { diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 6a0fc9c5d..e27976de2 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -303,12 +303,13 @@ class Memory { // // This has a significant performance penalty for writes in in the range or // nearby (sharing 64KiB pages). - uintptr_t AddPhysicalWriteWatch(uint32_t physical_address, uint32_t length, - cpu::WriteWatchCallback callback, - void* callback_context, void* callback_data); + uintptr_t AddPhysicalAccessWatch(uint32_t physical_address, uint32_t length, + cpu::MMIOHandler::WatchType type, + cpu::AccessWatchCallback callback, + void* callback_context, void* callback_data); - // Cancels a write watch requested with AddPhysicalWriteWatch. - void CancelWriteWatch(uintptr_t watch_handle); + // Cancels a write watch requested with AddPhysicalAccessWatch. + void CancelAccessWatch(uintptr_t watch_handle); // Allocates virtual memory from the 'system' heap. // System memory is kept separate from game memory but is still accessible From 2512a6360eec4770874e994a0292b980bbf9c61a Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 17 Mar 2016 21:55:47 -0500 Subject: [PATCH 084/145] Pass the physical frontbuffer address into the CP --- src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc index e979cb62a..208473cf2 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_video.cc @@ -366,7 +366,7 @@ void VdSwap(lpvoid_t buffer_ptr, // ptr into primary ringbuffer auto dwords = buffer_ptr.as_array(); dwords[0] = xenos::MakePacketType3(); dwords[1] = 'SWAP'; - dwords[2] = *frontbuffer_ptr; + dwords[2] = (*frontbuffer_ptr) & 0x1FFFFFFF; // Set by VdCallGraphicsNotificationRoutines. dwords[3] = last_frontbuffer_width_; From 38b94dd9e2ae355971ae2881ff065ffca0ed6b2b Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 17 Mar 2016 21:58:23 -0500 Subject: [PATCH 085/145] Add in Xenos events --- src/xenia/gpu/xenos.h | 45 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 65c1f0bad..32c33cae8 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -49,6 +49,7 @@ enum class PrimitiveType : uint32_t { kLineLoop = 0x0C, kQuadList = 0x0D, kQuadStrip = 0x0E, + kUnknown0x11 = 0x11, }; enum class Dimension : uint32_t { @@ -382,7 +383,7 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, { uint32_t type : 2; uint32_t address : 30; uint32_t endian : 2; - uint32_t size : 24; + uint32_t size : 24; // size in words uint32_t unk1 : 6; }); XEPACKEDSTRUCTANONYMOUS({ @@ -486,6 +487,46 @@ XEPACKEDUNION(xe_gpu_fetch_group_t, { }); }); +enum Event { + SAMPLE_STREAMOUTSTATS1 = (1 << 0), + SAMPLE_STREAMOUTSTATS2 = (2 << 0), + SAMPLE_STREAMOUTSTATS3 = (3 << 0), + CACHE_FLUSH_TS = (4 << 0), + CACHE_FLUSH = (6 << 0), + CS_PARTIAL_FLUSH = (7 << 0), + VGT_STREAMOUT_RESET = (10 << 0), + END_OF_PIPE_INCR_DE = (11 << 0), + END_OF_PIPE_IB_END = (12 << 0), + RST_PIX_CNT = (13 << 0), + VS_PARTIAL_FLUSH = (15 << 0), + PS_PARTIAL_FLUSH = (16 << 0), + CACHE_FLUSH_AND_INV_TS_EVENT = (20 << 0), + ZPASS_DONE = (21 << 0), + CACHE_FLUSH_AND_INV_EVENT = (22 << 0), + PERFCOUNTER_START = (23 << 0), + PERFCOUNTER_STOP = (24 << 0), + PIPELINESTAT_START = (25 << 0), + PIPELINESTAT_STOP = (26 << 0), + PERFCOUNTER_SAMPLE = (27 << 0), + SAMPLE_PIPELINESTAT = (30 << 0), + SAMPLE_STREAMOUTSTATS = (32 << 0), + RESET_VTX_CNT = (33 << 0), + VGT_FLUSH = (36 << 0), + BOTTOM_OF_PIPE_TS = (40 << 0), + DB_CACHE_FLUSH_AND_INV = (42 << 0), + FLUSH_AND_INV_DB_DATA_TS = (43 << 0), + FLUSH_AND_INV_DB_META = (44 << 0), + FLUSH_AND_INV_CB_DATA_TS = (45 << 0), + FLUSH_AND_INV_CB_META = (46 << 0), + CS_DONE = (47 << 0), + PS_DONE = (48 << 0), + FLUSH_AND_INV_CB_PIXEL_DATA = (49 << 0), + THREAD_TRACE_START = (51 << 0), + THREAD_TRACE_STOP = (52 << 0), + THREAD_TRACE_FLUSH = (54 << 0), + THREAD_TRACE_FINISH = (55 << 0), +}; + // Opcodes (IT_OPCODE) for Type-3 commands in the ringbuffer. // https://github.com/freedreno/amd-gpu/blob/master/include/api/gsl_pm4types.h // Not sure if all of these are used. @@ -501,7 +542,7 @@ enum Type3Opcode { PM4_WAIT_FOR_IDLE = 0x26, // wait for the IDLE state of the engine PM4_WAIT_REG_MEM = 0x3c, // wait until a register or memory location is a specific value PM4_WAIT_REG_EQ = 0x52, // wait until a register location is equal to a specific value - PM4_WAT_REG_GTE = 0x53, // wait until a register location is >= a specific value + PM4_WAIT_REG_GTE = 0x53, // wait until a register location is >= a specific value PM4_WAIT_UNTIL_READ = 0x5c, // wait until a read completes PM4_WAIT_IB_PFD_COMPLETE = 0x5d, // wait until all base/size writes from an IB_PFD packet have completed From 7b962e59a4f3ef0d41e89d1a6cf216f1e61d86ea Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 20 Mar 2016 14:21:55 -0500 Subject: [PATCH 086/145] SPIR-V Dst Fix a few bugs in the translator --- src/xenia/gpu/spirv_shader_translator.cc | 73 +++++++++++++++++------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 28158ed20..f7a1660fb 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -164,6 +164,7 @@ void SpirvShaderTranslator::StartTranslation() { push_constants_type, "push_consts"); // Texture bindings + Id sampler_t = b.makeSamplerType(); Id tex_t[] = {b.makeSampledImageType(b.makeImageType( float_type_, spv::Dim::Dim1D, false, false, false, 1, spv::ImageFormat::ImageFormatUnknown)), @@ -177,18 +178,17 @@ void SpirvShaderTranslator::StartTranslation() { float_type_, spv::Dim::DimCube, false, false, false, 1, spv::ImageFormat::ImageFormatUnknown))}; - // Id samplers_a = b.makeArrayType(sampler_t, b.makeUintConstant(32), 0); + Id samplers_a = b.makeArrayType(sampler_t, b.makeUintConstant(32), 0); Id tex_a_t[] = {b.makeArrayType(tex_t[0], b.makeUintConstant(32), 0), b.makeArrayType(tex_t[1], b.makeUintConstant(32), 0), b.makeArrayType(tex_t[2], b.makeUintConstant(32), 0), b.makeArrayType(tex_t[3], b.makeUintConstant(32), 0)}; // TODO(DrChat): See texture_cache.cc - do we need separate samplers here? - // samplers_ = - // b.createVariable(spv::StorageClass::StorageClassUniformConstant, - // samplers_a, "samplers"); - // b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); - // b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); + samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant, + samplers_a, "samplers"); + b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); + b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); for (int i = 0; i < 4; i++) { tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, tex_a_t[i], @@ -481,16 +481,17 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( // Conditional branch assert_true(cf_blocks_.size() > instr.dword_index + 1); body = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v, - b.makeBoolConstant(instr.condition)); + auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, + b.makeUintConstant(uint32_t(instr.condition))); b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); } break; case ParsedExecInstruction::Type::kPredicated: { // Branch based on p0. assert_true(cf_blocks_.size() > instr.dword_index + 1); body = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_, - b.makeBoolConstant(instr.condition)); + auto cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.condition)); b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); } break; } @@ -545,6 +546,8 @@ void SpirvShaderTranslator::ProcessCallInstruction( auto head = cf_blocks_[instr.dword_index]; b.setBuildPoint(head); + // Unused instruction(?) + assert_always(); EmitUnimplementedTranslationError(); assert_true(cf_blocks_.size() > instr.dword_index + 1); @@ -558,6 +561,8 @@ void SpirvShaderTranslator::ProcessReturnInstruction( auto head = cf_blocks_[instr.dword_index]; b.setBuildPoint(head); + // Unused instruction(?) + assert_always(); EmitUnimplementedTranslationError(); assert_true(cf_blocks_.size() > instr.dword_index + 1); @@ -576,6 +581,8 @@ void SpirvShaderTranslator::ProcessJumpInstruction( b.createBranch(cf_blocks_[instr.target_address]); } break; case ParsedJumpInstruction::Type::kConditional: { + assert_true(cf_blocks_.size() > instr.dword_index + 1); + // Based off of bool_consts std::vector offsets; offsets.push_back(b.makeUintConstant(2)); // bool_consts @@ -590,17 +597,19 @@ void SpirvShaderTranslator::ProcessJumpInstruction( b.makeUintConstant(1)); // Conditional branch - auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v, - b.makeBoolConstant(instr.condition)); + auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, + b.makeUintConstant(uint32_t(instr.condition))); b.createConditionalBranch(cond, cf_blocks_[instr.target_address], - cf_blocks_[instr.dword_index]); + cf_blocks_[instr.dword_index + 1]); } break; case ParsedJumpInstruction::Type::kPredicated: { assert_true(cf_blocks_.size() > instr.dword_index + 1); - auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_, - b.makeBoolConstant(instr.condition)); + + auto cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.condition)); b.createConditionalBranch(cond, cf_blocks_[instr.target_address], - cf_blocks_[instr.dword_index]); + cf_blocks_[instr.dword_index + 1]); } break; } } @@ -770,7 +779,15 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( } break; case AluVectorOpcode::kDst: { - // TODO + auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1); + auto src1_y = b.createCompositeExtract(sources[1], float_type_, 1); + auto dst_y = b.createBinOp(spv::Op::OpFMul, float_type_, src0_y, src1_y); + + auto src0_z = b.createCompositeExtract(sources[0], float_type_, 3); + auto src1_w = b.createCompositeExtract(sources[0], float_type_, 4); + dest = b.createCompositeConstruct( + vec4_float_type_, + std::vector({b.makeFloatConstant(1.f), dst_y, src0_z, src1_w})); } break; case AluVectorOpcode::kDp2Add: { @@ -1175,7 +1192,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); - cond = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + if (pred_cond) { + cond = + b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); + } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1348,6 +1368,12 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.makeFloatConstant(0.f), d); } break; + case AluScalarOpcode::kRsqc: { + } break; + + case AluScalarOpcode::kRsqf: { + } break; + case AluScalarOpcode::kRsq: { // dest = src0 != 0.0 ? inversesqrt(src0) : 0.0; auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], @@ -1430,12 +1456,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kSetpInv: { + // p0 = src0 == 1.0 auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(1.f)); - auto pred = - b.createTriOp(spv::Op::OpSelect, bool_type_, cond, - b.makeBoolConstant(true), b.makeBoolConstant(false)); - b.createStore(pred, p0_); + b.createStore(cond, p0_); // if (!cond) dest = src0 == 0.0 ? 1.0 : src0; auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, @@ -1482,6 +1506,11 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( GLSLstd450::kSin, {sources[0]}); } break; + case AluScalarOpcode::kSqrt: { + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + GLSLstd450::kSqrt, {sources[0]}); + } break; + case AluScalarOpcode::kSubs: case AluScalarOpcode::kSubsc0: case AluScalarOpcode::kSubsc1: { From e72e283e79c664ec55055b4ba04f712038a4569e Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 23 Mar 2016 16:19:18 -0500 Subject: [PATCH 087/145] Primitive type makes rasterization state dirty too! --- src/xenia/gpu/vulkan/pipeline_cache.cc | 8 ++++++++ src/xenia/gpu/vulkan/pipeline_cache.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index ca7c37b46..ee1174a72 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -936,6 +936,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( auto& state_info = update_rasterization_state_info_; bool dirty = false; + dirty |= regs.primitive_type != primitive_type; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL); dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, @@ -944,6 +945,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + regs.primitive_type = primitive_type; XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; @@ -983,6 +985,10 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( case 2: state_info.cullMode = VK_CULL_MODE_BACK_BIT; break; + case 3: + // Cull both sides? + assert_always(); + break; } if (regs.pa_su_sc_mode_cntl & 0x4) { state_info.frontFace = VK_FRONT_FACE_CLOCKWISE; @@ -1013,6 +1019,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() { state_info.pNext = nullptr; state_info.flags = 0; + // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES + // PA_SU_SC_MODE_CNTL MSAA_ENABLE state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; state_info.sampleShadingEnable = VK_FALSE; state_info.minSampleShading = 0; diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 3e623f14e..b33c030ed 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -205,11 +205,11 @@ class PipelineCache { VkPipelineViewportStateCreateInfo update_viewport_state_info_; struct UpdateRasterizationStateRegisters { + PrimitiveType primitive_type; uint32_t pa_su_sc_mode_cntl; uint32_t pa_sc_screen_scissor_tl; uint32_t pa_sc_screen_scissor_br; uint32_t multi_prim_ib_reset_index; - PrimitiveType prim_type; UpdateRasterizationStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } From 181b2af5a4c0ffa2124e97d6a6e512705ec76fc7 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 13:45:44 -0500 Subject: [PATCH 088/145] Vulkan Circular Buffer --- src/xenia/ui/vulkan/circular_buffer.cc | 258 +++++++++++++++++++++++++ src/xenia/ui/vulkan/circular_buffer.h | 85 ++++++++ 2 files changed, 343 insertions(+) create mode 100644 src/xenia/ui/vulkan/circular_buffer.cc create mode 100644 src/xenia/ui/vulkan/circular_buffer.h diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc new file mode 100644 index 000000000..4cc22366f --- /dev/null +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -0,0 +1,258 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" + +#include "xenia/ui/vulkan/circular_buffer.h" + +namespace xe { +namespace ui { +namespace vulkan { + +CircularBuffer::CircularBuffer(VulkanDevice* device) : device_(device) {} +CircularBuffer::~CircularBuffer() { Shutdown(); } + +bool CircularBuffer::Initialize(VkDeviceSize capacity, VkBufferUsageFlags usage, + VkDeviceSize alignment) { + VkResult status = VK_SUCCESS; + capacity = xe::round_up(capacity, alignment); + + // Create our internal buffer. + VkBufferCreateInfo buffer_info; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.pNext = nullptr; + buffer_info.flags = 0; + buffer_info.size = capacity; + buffer_info.usage = usage; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_info.queueFamilyIndexCount = 0; + buffer_info.pQueueFamilyIndices = nullptr; + status = vkCreateBuffer(*device_, &buffer_info, nullptr, &gpu_buffer_); + CheckResult(status, "vkCreateBuffer"); + if (status != VK_SUCCESS) { + return false; + } + + VkMemoryRequirements reqs; + vkGetBufferMemoryRequirements(*device_, gpu_buffer_, &reqs); + + // Allocate memory from the device to back the buffer. + assert_true(reqs.size == capacity); + reqs.alignment = std::max(alignment, reqs.alignment); + gpu_memory_ = device_->AllocateMemory(reqs); + if (!gpu_memory_) { + XELOGE("CircularBuffer::Initialize - Failed to allocate memory!"); + Shutdown(); + return false; + } + + alignment_ = reqs.alignment; + capacity_ = reqs.size; + gpu_base_ = 0; + + // Bind the buffer to its backing memory. + status = vkBindBufferMemory(*device_, gpu_buffer_, gpu_memory_, gpu_base_); + CheckResult(status, "vkBindBufferMemory"); + if (status != VK_SUCCESS) { + XELOGE("CircularBuffer::Initialize - Failed to bind memory!"); + Shutdown(); + return false; + } + + // Map the memory so we can access it. + status = vkMapMemory(*device_, gpu_memory_, gpu_base_, capacity_, 0, + reinterpret_cast(&host_base_)); + CheckResult(status, "vkMapMemory"); + if (status != VK_SUCCESS) { + XELOGE("CircularBuffer::Initialize - Failed to map memory!"); + Shutdown(); + return false; + } + + return true; +} + +void CircularBuffer::Shutdown() { + Clear(); + if (host_base_) { + vkUnmapMemory(*device_, gpu_memory_); + host_base_ = nullptr; + } + if (gpu_buffer_) { + vkDestroyBuffer(*device_, gpu_buffer_, nullptr); + gpu_buffer_ = nullptr; + } + if (gpu_memory_) { + vkFreeMemory(*device_, gpu_memory_, nullptr); + gpu_memory_ = nullptr; + } +} + +bool CircularBuffer::CanAcquire(VkDeviceSize length) { + // Make sure the length is aligned. + length = xe::round_up(length, alignment_); + if (allocations_.empty()) { + // Read head has caught up to write head (entire buffer available for write) + assert(read_head_ == write_head_); + return capacity_ > length; + } else if (write_head_ < read_head_) { + // Write head wrapped around and is behind read head. + // | write |---- read ----| + return (read_head_ - write_head_) > length; + } else { + // Read head behind write head. + // 1. Check if there's enough room from write -> capacity + // | |---- read ----| write | + if ((capacity_ - write_head_) > length) { + return true; + } + + // 2. Check if there's enough room from 0 -> read + // | write |---- read ----| | + if ((read_head_) > length) { + return true; + } + } + + return false; +} + +CircularBuffer::Allocation* CircularBuffer::Acquire( + VkDeviceSize length, std::shared_ptr fence) { + if (!CanAcquire(length)) { + return nullptr; + } + + VkDeviceSize aligned_length = xe::round_up(length, alignment_); + if (allocations_.empty()) { + // Entire buffer available. + assert(read_head_ == write_head_); + assert(capacity_ > aligned_length); + + read_head_ = 0; + write_head_ = length; + + auto alloc = new Allocation(); + alloc->host_ptr = host_base_ + 0; + alloc->gpu_memory = gpu_memory_; + alloc->offset = gpu_base_ + 0; + alloc->length = length; + alloc->aligned_length = aligned_length; + alloc->fence = fence; + allocations_.push_back(alloc); + return alloc; + } else if (write_head_ < read_head_) { + // Write head behind read head. + assert_true(read_head_ - write_head_ >= aligned_length); + + auto alloc = new Allocation(); + alloc->host_ptr = host_base_ + write_head_; + alloc->gpu_memory = gpu_memory_; + alloc->offset = gpu_base_ + write_head_; + alloc->length = length; + alloc->aligned_length = aligned_length; + alloc->fence = fence; + write_head_ += aligned_length; + allocations_.push_back(alloc); + + return alloc; + } else { + // Write head after read head + if (capacity_ - write_head_ >= aligned_length) { + // Free space from write -> capacity + auto alloc = new Allocation(); + alloc->host_ptr = host_base_ + write_head_; + alloc->gpu_memory = gpu_memory_; + alloc->offset = gpu_base_ + write_head_; + alloc->length = length; + alloc->aligned_length = aligned_length; + alloc->fence = fence; + write_head_ += aligned_length; + allocations_.push_back(alloc); + + return alloc; + } else if ((read_head_ - 0) > aligned_length) { + // Free space from begin -> read + auto alloc = new Allocation(); + alloc->host_ptr = host_base_ + write_head_; + alloc->gpu_memory = gpu_memory_; + alloc->offset = gpu_base_ + 0; + alloc->length = length; + alloc->aligned_length = aligned_length; + alloc->fence = fence; + write_head_ = aligned_length; + allocations_.push_back(alloc); + + return alloc; + } + } + + return nullptr; +} + +void CircularBuffer::Discard(Allocation* allocation) { + // TODO: Revert write_head_ (only if this is the last alloc though) + // Or maybe just disallow discards. + for (auto it = allocations_.begin(); it != allocations_.end(); ++it) { + if (*it == allocation) { + allocations_.erase(it); + break; + } + } + + delete allocation; +} + +void CircularBuffer::Flush(Allocation* allocation) { + VkMappedMemoryRange range; + range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range.pNext = nullptr; + range.memory = gpu_memory_; + range.offset = gpu_base_ + allocation->offset; + range.size = allocation->length; + vkFlushMappedMemoryRanges(*device_, 1, &range); +} + +void CircularBuffer::Clear() { + for (auto it = allocations_.begin(); it != allocations_.end();) { + delete *it; + it = allocations_.erase(it); + } + + write_head_ = read_head_ = 0; +} + +void CircularBuffer::Scavenge() { + for (auto it = allocations_.begin(); it != allocations_.end();) { + if ((*it)->fence->status() != VK_SUCCESS) { + // Don't bother freeing following allocations to ensure proper ordering. + break; + } + + read_head_ = (read_head_ + (*it)->aligned_length) % capacity_; + delete *it; + it = allocations_.erase(it); + } + + if (allocations_.empty()) { + // Reset R/W heads. + read_head_ = write_head_ = 0; + } else { + // FIXME: Haven't verified this works correctly when actually rotating :P + assert_always(); + } +} + +} // namespace vulkan +} // namespace ui +} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h new file mode 100644 index 000000000..2c036c685 --- /dev/null +++ b/src/xenia/ui/vulkan/circular_buffer.h @@ -0,0 +1,85 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ +#define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ + +#include + +#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_device.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// A circular buffer, intended to hold (fairly) temporary memory that will be +// released when a fence is signaled. Best used when allocations are taken +// in-order with command buffer submission. +// +// Allocations loop around the buffer in circles (but are not fragmented at the +// ends of the buffer), where trailing older allocations are freed after use. +class CircularBuffer { + public: + CircularBuffer(VulkanDevice* device); + ~CircularBuffer(); + + struct Allocation { + void* host_ptr; + VkDeviceMemory gpu_memory; + VkDeviceSize offset; + VkDeviceSize length; + VkDeviceSize aligned_length; + + // Allocation usage fence. This allocation will be deleted when the fence + // becomes signaled. + std::shared_ptr fence; + }; + + bool Initialize(VkDeviceSize capacity, VkBufferUsageFlags usage, + VkDeviceSize alignment = 256); + void Shutdown(); + + VkDeviceSize capacity() const { return capacity_; } + VkBuffer gpu_buffer() const { return gpu_buffer_; } + VkDeviceMemory gpu_memory() const { return gpu_memory_; } + uint8_t* host_base() const { return host_base_; } + + bool CanAcquire(VkDeviceSize length); + Allocation* Acquire(VkDeviceSize length, std::shared_ptr fence); + void Discard(Allocation* allocation); + void Flush(Allocation* allocation); + + // Clears all allocations, regardless of whether they've been consumed or not. + void Clear(); + + // Frees any allocations whose fences have been signaled. + void Scavenge(); + + private: + VkDeviceSize capacity_ = 0; + VkDeviceSize alignment_ = 0; + VkDeviceSize write_head_ = 0; + VkDeviceSize read_head_ = 0; + + VulkanDevice* device_; + VkBuffer gpu_buffer_ = nullptr; + VkDeviceMemory gpu_memory_ = nullptr; + VkDeviceSize gpu_base_ = 0; + uint8_t* host_base_ = nullptr; + + std::unordered_map allocation_cache_; + std::vector allocations_; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_GL_CIRCULAR_BUFFER_H_ From 0e41774e36539baae4a76cc4b0c0d3d4efcf3eb8 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:31:12 -0500 Subject: [PATCH 089/145] RenderCache::dirty() - used to tell if we need to begin a new pass Round all pixel pitch/heights up before dividing. --- src/xenia/gpu/vulkan/render_cache.cc | 87 +++++++++++++++++----------- src/xenia/gpu/vulkan/render_cache.h | 16 ++++- 2 files changed, 66 insertions(+), 37 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 5047bff21..334a1215f 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -39,7 +39,7 @@ VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) { case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: // WARNING: this is wrong, most likely - no float form in vulkan? XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used"); - return VK_FORMAT_A2R10G10B10_SSCALED_PACK32; + return VK_FORMAT_A2R10G10B10_UNORM_PACK32; case ColorRenderTargetFormat::k_16_16: return VK_FORMAT_R16G16_UNORM; case ColorRenderTargetFormat::k_16_16_16_16: @@ -451,10 +451,7 @@ RenderCache::RenderCache(RegisterFile* register_file, CheckResult(status, "vkBindBufferMemory"); if (status == VK_SUCCESS) { - status = vkBindBufferMemory(*device_, edram_buffer_, edram_memory_, 0); - CheckResult(status, "vkBindBufferMemory"); - - // Upload a grid into the EDRAM buffer. + // For debugging, upload a grid into the EDRAM buffer. uint32_t* gpu_data = nullptr; status = vkMapMemory(*device_, edram_memory_, 0, buffer_requirements.size, 0, reinterpret_cast(&gpu_data)); @@ -490,6 +487,25 @@ RenderCache::~RenderCache() { vkFreeMemory(*device_, edram_memory_, nullptr); } +bool RenderCache::dirty() const { + auto& regs = *register_file_; + auto& cur_regs = shadow_registers_; + + bool dirty = false; + dirty |= cur_regs.rb_modecontrol != regs[XE_GPU_REG_RB_MODECONTROL].u32; + dirty |= cur_regs.rb_surface_info != regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + dirty |= cur_regs.rb_color_info != regs[XE_GPU_REG_RB_COLOR_INFO].u32; + dirty |= cur_regs.rb_color1_info != regs[XE_GPU_REG_RB_COLOR1_INFO].u32; + dirty |= cur_regs.rb_color2_info != regs[XE_GPU_REG_RB_COLOR2_INFO].u32; + dirty |= cur_regs.rb_color3_info != regs[XE_GPU_REG_RB_COLOR3_INFO].u32; + dirty |= cur_regs.rb_depth_info != regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + dirty |= cur_regs.pa_sc_window_scissor_tl != + regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; + dirty |= cur_regs.pa_sc_window_scissor_br != + regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + return dirty; +} + const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader) { @@ -739,8 +755,8 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, for (int i = 0; i < 4; ++i) { TileViewKey color_key; color_key.tile_offset = config->color[i].edram_base; - color_key.tile_width = config->surface_pitch_px / 80; - color_key.tile_height = config->surface_height_px / 16; + color_key.tile_width = xe::round_up(config->surface_pitch_px, 80) / 80; + color_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16; color_key.color_or_depth = 1; color_key.edram_format = static_cast(config->color[i].format); target_color_attachments[i] = @@ -753,8 +769,10 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, TileViewKey depth_stencil_key; depth_stencil_key.tile_offset = config->depth_stencil.edram_base; - depth_stencil_key.tile_width = config->surface_pitch_px / 80; - depth_stencil_key.tile_height = config->surface_height_px / 16; + depth_stencil_key.tile_width = + xe::round_up(config->surface_pitch_px, 80) / 80; + depth_stencil_key.tile_height = + xe::round_up(config->surface_height_px, 16) / 16; depth_stencil_key.color_or_depth = 0; depth_stencil_key.edram_format = static_cast(config->depth_stencil.format); @@ -960,6 +978,7 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, &buffer_barrier, 0, nullptr); // Update the tile view with current EDRAM contents. + // TODO: Heuristics to determine if this copy is avoidable. VkBufferImageCopy buffer_copy; buffer_copy.bufferOffset = edram_base * 5120; buffer_copy.bufferImageHeight = 0; @@ -980,29 +999,26 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, image_barrier.pNext = nullptr; image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - if (image_layout != VK_IMAGE_LAYOUT_GENERAL && - image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_barrier.oldLayout = image_layout; - image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - image_barrier.image = image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + image_barrier.oldLayout = image_layout; + image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_barrier.image = image; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - } + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); // If we overflow we'll lose the device here. assert_true(extents.width <= key.tile_width * 80u); assert_true(extents.height <= key.tile_height * 16u); // Now issue the blit to the destination. + // TODO: Resolve to destination if necessary. VkImageBlit image_blit; image_blit.srcSubresource = {0, 0, 0, 1}; image_blit.srcSubresource.aspectMask = @@ -1024,15 +1040,12 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, image, image_layout, 1, &image_blit, filter); // Transition the image back into its previous layout. - if (image_layout != VK_IMAGE_LAYOUT_GENERAL && - image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = 0; - std::swap(image_barrier.oldLayout, image_barrier.newLayout); - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - } + image_barrier.srcAccessMask = image_barrier.dstAccessMask; + image_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + std::swap(image_barrier.oldLayout, image_barrier.newLayout); + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); } void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, @@ -1040,6 +1053,9 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, ColorRenderTargetFormat format, uint32_t pitch, uint32_t height, float* color) { + // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just + // need to detect this and calculate a value. + // Grab a tile view (as we need to clear an image first) TileViewKey key; key.color_or_depth = 1; @@ -1076,6 +1092,9 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, DepthRenderTargetFormat format, uint32_t pitch, uint32_t height, float depth, uint32_t stencil) { + // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just + // need to detect this and calculate a value. + // Grab a tile view (as we need to clear an image first) TileViewKey key; key.color_or_depth = 0; diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 97816c365..2e8d1c5fe 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -37,8 +37,10 @@ struct TileViewKey { uint16_t tile_height; // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. uint16_t color_or_depth : 1; + // Surface MSAA samples + // uint16_t msaa_samples : 2; // Either ColorRenderTargetFormat or DepthRenderTargetFormat. - uint16_t edram_format : 15; + uint16_t edram_format : 15; // 13; }; static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); @@ -249,6 +251,10 @@ class RenderCache { RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); ~RenderCache(); + // Call this to determine if you should start a new render pass or continue + // with an already open pass. + bool dirty() const; + // Begins a render pass targeting the state-specified framebuffer formats. // The command buffer will be transitioned into the render pass phase. const RenderState* BeginRenderPass(VkCommandBuffer command_buffer, @@ -263,23 +269,27 @@ class RenderCache { void ClearCache(); // Queues commands to copy EDRAM contents into an image. + // The command buffer must not be inside of a render pass when calling this. void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base, VkImage image, VkImageLayout image_layout, bool color_or_depth, VkOffset3D offset, VkExtent3D extents); // Queues commands to blit EDRAM contents into an image. + // The command buffer must not be inside of a render pass when calling this. void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, uint32_t pitch, uint32_t height, VkImage image, VkImageLayout image_layout, bool color_or_depth, uint32_t format, VkFilter filter, VkOffset3D offset, VkExtent3D extents); - // Queues commands to clear EDRAM contents with a solid color + // Queues commands to clear EDRAM contents with a solid color. + // The command buffer must not be inside of a render pass when calling this. void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, ColorRenderTargetFormat format, uint32_t pitch, uint32_t height, float* color); // Queues commands to clear EDRAM contents with depth/stencil values. + // The command buffer must not be inside of a render pass when calling this. void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, uint32_t edram_base, DepthRenderTargetFormat format, uint32_t pitch, @@ -307,7 +317,7 @@ class RenderCache { RegisterFile* register_file_ = nullptr; ui::vulkan::VulkanDevice* device_ = nullptr; - // Entire 10MiB of EDRAM, aliased to hell by various VkImages. + // Entire 10MiB of EDRAM. VkDeviceMemory edram_memory_ = nullptr; // Buffer overlayed 1:1 with edram_memory_ to allow raw access. VkBuffer edram_buffer_ = nullptr; From b2457d7e724645678f40721682c4135f96697aec Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:32:29 -0500 Subject: [PATCH 090/145] Basic texture uploads/address lookups/etc Freeing of descriptor sets when the GPU is finished with them. --- src/xenia/gpu/vulkan/texture_cache.cc | 403 +++++++++++++++++++------- src/xenia/gpu/vulkan/texture_cache.h | 84 ++++-- 2 files changed, 363 insertions(+), 124 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 5c6e42b8b..500d6ac25 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -26,19 +26,26 @@ using xe::ui::vulkan::CheckResult; constexpr uint32_t kMaxTextureSamplers = 32; -TextureCache::TextureCache(RegisterFile* register_file, +struct TextureConfig { + TextureFormat guest_format; + VkFormat host_format; +}; + +TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device) - : register_file_(register_file), + : memory_(memory), + register_file_(register_file), trace_writer_(trace_writer), - device_(device) { + device_(device), + staging_buffer_(device) { // Descriptor pool used for all of our cached descriptors. VkDescriptorPoolCreateInfo descriptor_pool_info; descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; descriptor_pool_info.pNext = nullptr; descriptor_pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - descriptor_pool_info.maxSets = 256; + descriptor_pool_info.maxSets = 4096; VkDescriptorPoolSize pool_sizes[2]; pool_sizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER; pool_sizes[0].descriptorCount = 32; @@ -81,50 +88,21 @@ TextureCache::TextureCache(RegisterFile* register_file, nullptr, &texture_descriptor_set_layout_); CheckResult(err, "vkCreateDescriptorSetLayout"); - // Allocate memory for a staging buffer. - VkBufferCreateInfo staging_buffer_info; - staging_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - staging_buffer_info.pNext = nullptr; - staging_buffer_info.flags = 0; - staging_buffer_info.size = 2048 * 2048 * 4; // 16MB buffer - staging_buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - staging_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - staging_buffer_info.queueFamilyIndexCount = 0; - staging_buffer_info.pQueueFamilyIndices = nullptr; - err = - vkCreateBuffer(*device_, &staging_buffer_info, nullptr, &staging_buffer_); - CheckResult(err, "vkCreateBuffer"); - if (err != VK_SUCCESS) { - // This isn't good. + int width = 4096; + int height = 4096; + if (!staging_buffer_.Initialize(width * height * 4, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) { assert_always(); - return; } - VkMemoryRequirements staging_buffer_reqs; - vkGetBufferMemoryRequirements(*device_, staging_buffer_, - &staging_buffer_reqs); - staging_buffer_mem_ = device_->AllocateMemory(staging_buffer_reqs); - assert_not_null(staging_buffer_mem_); - - err = vkBindBufferMemory(*device_, staging_buffer_, staging_buffer_mem_, 0); - CheckResult(err, "vkBindBufferMemory"); - // Upload a grid into the staging buffer. - uint32_t* gpu_data = nullptr; - err = vkMapMemory(*device_, staging_buffer_mem_, 0, staging_buffer_info.size, - 0, reinterpret_cast(&gpu_data)); - CheckResult(err, "vkMapMemory"); - - int width = 2048; - int height = 2048; + auto gpu_data = reinterpret_cast(staging_buffer_.host_base()); for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { gpu_data[y * width + x] = ((y % 32 < 16) ^ (x % 32 >= 16)) ? 0xFF0000FF : 0xFFFFFFFF; } } - - vkUnmapMemory(*device_, staging_buffer_mem_); } TextureCache::~TextureCache() { @@ -223,6 +201,10 @@ TextureCache::Texture* TextureCache::AllocateTexture( auto texture_view = std::make_unique(); texture_view->texture = texture; texture_view->view = view; + texture_view->swiz_x = 0; + texture_view->swiz_y = 1; + texture_view->swiz_z = 2; + texture_view->swiz_w = 3; texture->views.push_back(std::move(texture_view)); } @@ -245,28 +227,16 @@ TextureCache::Texture* TextureCache::DemandResolveTexture( return texture; } - // Check resolve textures. - for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); - ++it) { - texture = (*it).get(); - if (texture_info.guest_address == texture->texture_info.guest_address && - texture_info.size_2d.logical_width == - texture->texture_info.size_2d.logical_width && - texture_info.size_2d.logical_height == - texture->texture_info.size_2d.logical_height) { - // Exact match. - return texture; - } - } - // No texture at this location. Make a new one. texture = AllocateTexture(texture_info); + texture->is_full_texture = false; resolve_textures_.push_back(std::unique_ptr(texture)); return texture; } -TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, - VkCommandBuffer command_buffer) { +TextureCache::Texture* TextureCache::Demand( + const TextureInfo& texture_info, VkCommandBuffer command_buffer, + std::shared_ptr completion_fence) { // Run a tight loop to scan for an exact match existing texture. auto texture_hash = texture_info.hash(); for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { @@ -285,9 +255,13 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, texture_info.size_2d.logical_height == texture->texture_info.size_2d.logical_height) { // Exact match. - // TODO: Lazy match + // TODO: Lazy match (at an offset) + // Upgrade this texture to a full texture. + texture->is_full_texture = true; texture->texture_info = texture_info; textures_[texture_hash] = std::move(*it); + it = resolve_textures_.erase(it); + return textures_[texture_hash].get(); } } @@ -305,7 +279,21 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, return nullptr; } - if (!UploadTexture2D(command_buffer, texture, texture_info)) { + bool uploaded = false; + switch (texture_info.dimension) { + case Dimension::k2D: { + uploaded = UploadTexture2D(command_buffer, completion_fence, texture, + texture_info); + } break; + default: + assert_unhandled_case(texture_info.dimension); + break; + } + + // Okay. Now that the texture is uploaded from system memory, put a writewatch + // on it to tell us if it's been modified from the guest. + + if (!uploaded) { // TODO: Destroy the texture. assert_always(); return nullptr; @@ -314,6 +302,7 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, // Though we didn't find an exact match, that doesn't mean we're out of the // woods yet. This texture could either be a portion of another texture or // vice versa. Copy any overlapping textures into this texture. + // TODO: Byte count -> pixel count (on x and y axes) for (auto it = textures_.begin(); it != textures_.end(); ++it) { } @@ -322,6 +311,67 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, return texture; } +TextureCache::TextureView* TextureCache::DemandView(Texture* texture, + uint16_t swizzle) { + for (auto it = texture->views.begin(); it != texture->views.end(); ++it) { + if ((*it)->swizzle == swizzle) { + return (*it).get(); + } + } + + VkImageViewCreateInfo view_info; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.pNext = nullptr; + view_info.flags = 0; + view_info.image = texture->image; + view_info.format = texture->format; + + switch (texture->texture_info.dimension) { + case Dimension::k1D: + view_info.viewType = VK_IMAGE_VIEW_TYPE_1D; + break; + case Dimension::k2D: + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + break; + case Dimension::k3D: + view_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + case Dimension::kCube: + view_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + break; + default: + assert_always(); + } + + VkComponentSwizzle swiz_component_map[] = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, + VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_IDENTITY, + }; + + view_info.components = { + swiz_component_map[(swizzle >> 0) & 0x7], + swiz_component_map[(swizzle >> 3) & 0x7], + swiz_component_map[(swizzle >> 6) & 0x7], + swiz_component_map[(swizzle >> 9) & 0x7], + }; + view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VkImageView view; + auto status = vkCreateImageView(*device_, &view_info, nullptr, &view); + CheckResult(status, "vkCreateImageView"); + if (status == VK_SUCCESS) { + auto texture_view = new TextureView(); + texture_view->texture = texture; + texture_view->view = view; + texture_view->swizzle = swizzle; + texture->views.push_back(std::unique_ptr(texture_view)); + return texture_view; + } + + return nullptr; +} + TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { auto sampler_hash = sampler_info.hash(); for (auto it = samplers_.find(sampler_hash); it != samplers_.end(); ++it) { @@ -339,12 +389,28 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; sampler_create_info.pNext = nullptr; sampler_create_info.flags = 0; - sampler_create_info.magFilter = VK_FILTER_NEAREST; sampler_create_info.minFilter = VK_FILTER_NEAREST; + sampler_create_info.magFilter = VK_FILTER_NEAREST; sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + + // FIXME: Both halfway / mirror clamp to border aren't mapped properly. + VkSamplerAddressMode address_mode_map[] = { + /* kRepeat */ VK_SAMPLER_ADDRESS_MODE_REPEAT, + /* kMirroredRepeat */ VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, + /* kClampToEdge */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + /* kMirrorClampToEdge */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, + /* kClampToHalfway */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + /* kMirrorClampToHalfway */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, + /* kClampToBorder */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + /* kMirrorClampToBorder */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, + }; + sampler_create_info.addressModeU = + address_mode_map[static_cast(sampler_info.clamp_u)]; + sampler_create_info.addressModeV = + address_mode_map[static_cast(sampler_info.clamp_v)]; + sampler_create_info.addressModeW = + address_mode_map[static_cast(sampler_info.clamp_w)]; + sampler_create_info.mipLodBias = 0.0f; sampler_create_info.anisotropyEnable = VK_FALSE; sampler_create_info.maxAnisotropy = 1.0f; @@ -375,6 +441,22 @@ TextureCache::Texture* TextureCache::LookupAddress( TextureFormat format, uint32_t* offset_x, uint32_t* offset_y) { for (auto it = textures_.begin(); it != textures_.end(); ++it) { const auto& texture_info = it->second->texture_info; + if (guest_address >= texture_info.guest_address && + guest_address < + texture_info.guest_address + texture_info.input_length && + offset_x && offset_y) { + auto offset_bytes = guest_address - texture_info.guest_address; + + if (texture_info.dimension == Dimension::k2D) { + *offset_y = offset_bytes / texture_info.size_2d.input_pitch; + if (offset_bytes % texture_info.size_2d.input_pitch != 0) { + // TODO: offset_x + } + } + + return it->second.get(); + } + if (texture_info.guest_address == guest_address && texture_info.dimension == Dimension::k2D && texture_info.size_2d.input_width == width && @@ -383,20 +465,86 @@ TextureCache::Texture* TextureCache::LookupAddress( } } - // TODO: Try to match at an offset. + // Check resolve textures + for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); + ++it) { + const auto& texture_info = (*it)->texture_info; + if (guest_address >= texture_info.guest_address && + guest_address < + texture_info.guest_address + texture_info.input_length && + offset_x && offset_y) { + auto offset_bytes = guest_address - texture_info.guest_address; + + if (texture_info.dimension == Dimension::k2D) { + *offset_y = offset_bytes / texture_info.size_2d.input_pitch; + if (offset_bytes % texture_info.size_2d.input_pitch != 0) { + // TODO: offset_x + } + } + + return (*it).get(); + } + + if (texture_info.guest_address == guest_address && + texture_info.dimension == Dimension::k2D && + texture_info.size_2d.input_width == width && + texture_info.size_2d.input_height == height) { + return (*it).get(); + } + } + return nullptr; } -bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, - Texture* dest, TextureInfo src) { - // TODO: We need to allocate memory to use as a staging buffer. We can then - // raw copy the texture from system memory into the staging buffer and use a - // shader to convert the texture into a format consumable by the host GPU. +void TextureSwap(Endian endianness, void* dest, const void* src, + size_t length) { + switch (endianness) { + case Endian::k8in16: + xe::copy_and_swap_16_aligned(dest, src, length / 2); + break; + case Endian::k8in32: + xe::copy_and_swap_32_aligned(dest, src, length / 4); + break; + case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word + xe::copy_and_swap_16_in_32_aligned(dest, src, length); + break; + default: + case Endian::kUnspecified: + std::memcpy(dest, src, length); + break; + } +} - // Need to have unique memory for every upload for at least one frame. If we - // run out of memory, we need to flush all queued upload commands to the GPU. +bool TextureCache::UploadTexture2D( + VkCommandBuffer command_buffer, + std::shared_ptr completion_fence, Texture* dest, + TextureInfo src) { + SCOPE_profile_cpu_f("gpu"); + assert_true(src.dimension == Dimension::k2D); - // TODO: Upload memory here. + if (!staging_buffer_.CanAcquire(src.input_length)) { + // Need to have unique memory for every upload for at least one frame. If we + // run out of memory, we need to flush all queued upload commands to the + // GPU. + // TODO: Actually flush commands. + assert_always(); + } + + // Grab some temporary memory for staging. + auto alloc = staging_buffer_.Acquire(src.input_length, completion_fence); + assert_not_null(alloc); + + // TODO: Support these cases. + // assert_false(src.is_tiled); + // assert_false(src.is_compressed()); + + // Upload texture into GPU memory. + // TODO: If the GPU supports it, we can submit a compute batch to convert the + // texture and copy it to its destination. Otherwise, fallback to conversion + // on the CPU. + auto guest_ptr = memory_->TranslatePhysical(src.guest_address); + TextureSwap(src.endianness, alloc->host_ptr, guest_ptr, src.input_length); + staging_buffer_.Flush(alloc); // Insert a memory barrier into the command buffer to ensure the upload has // finished before we copy it into the destination texture. @@ -407,9 +555,9 @@ bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, VK_ACCESS_TRANSFER_READ_BIT, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - staging_buffer_, - 0, - 2048 * 2048 * 4, + staging_buffer_.gpu_buffer(), + alloc->offset, + alloc->aligned_length, }; vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, @@ -432,18 +580,24 @@ bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); + assert_true(src.size_2d.input_width >= + dest->texture_info.size_2d.output_width); + assert_true(src.size_2d.input_height >= + dest->texture_info.size_2d.output_height); + // For now, just transfer the grid we uploaded earlier into the texture. VkBufferImageCopy copy_region; - copy_region.bufferOffset = 0; - copy_region.bufferRowLength = 2048; - copy_region.bufferImageHeight = 2048; + copy_region.bufferOffset = alloc->offset; + copy_region.bufferRowLength = src.size_2d.input_width; + copy_region.bufferImageHeight = src.size_2d.input_height; copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; copy_region.imageOffset = {0, 0, 0}; - copy_region.imageExtent = {dest->texture_info.width + 1, - dest->texture_info.height + 1, + copy_region.imageExtent = {dest->texture_info.size_2d.output_width + 1, + dest->texture_info.size_2d.output_height + 1, dest->texture_info.depth + 1}; - vkCmdCopyBufferToImage(command_buffer, staging_buffer_, dest->image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); + vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(), + dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + ©_region); // Now transition the texture into a shader readonly source. barrier.srcAccessMask = barrier.dstAccessMask; @@ -460,6 +614,7 @@ bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, VkDescriptorSet TextureCache::PrepareTextureSet( VkCommandBuffer command_buffer, + std::shared_ptr completion_fence, const std::vector& vertex_bindings, const std::vector& pixel_bindings) { // Clear state. @@ -476,12 +631,12 @@ VkDescriptorSet TextureCache::PrepareTextureSet( // This does things lazily and de-dupes fetch constants reused in both // shaders. bool any_failed = false; - any_failed = - !SetupTextureBindings(update_set_info, vertex_bindings, command_buffer) || - any_failed; - any_failed = - !SetupTextureBindings(update_set_info, pixel_bindings, command_buffer) || - any_failed; + any_failed = !SetupTextureBindings(command_buffer, completion_fence, + update_set_info, vertex_bindings) || + any_failed; + any_failed = !SetupTextureBindings(command_buffer, completion_fence, + update_set_info, pixel_bindings) || + any_failed; if (any_failed) { XELOGW("Failed to setup one or more texture bindings"); // TODO(benvanik): actually bail out here? @@ -518,6 +673,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( sampler_write.pImageInfo = update_set_info->sampler_infos; } */ + // FIXME: These are not be lined up properly with tf binding points!!!!! if (update_set_info->image_1d_write_count) { auto& image_write = descriptor_writes[descriptor_write_count++]; image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -567,30 +723,33 @@ VkDescriptorSet TextureCache::PrepareTextureSet( 0, nullptr); } + in_flight_sets_.push_back({descriptor_set, completion_fence}); return descriptor_set; } bool TextureCache::SetupTextureBindings( + VkCommandBuffer command_buffer, + std::shared_ptr completion_fence, UpdateSetInfo* update_set_info, - const std::vector& bindings, - VkCommandBuffer command_buffer) { + const std::vector& bindings) { bool any_failed = false; for (auto& binding : bindings) { uint32_t fetch_bit = 1 << binding.fetch_constant; if ((update_set_info->has_setup_fetch_mask & fetch_bit) == 0) { // Needs setup. - any_failed = - !SetupTextureBinding(update_set_info, binding, command_buffer) || - any_failed; + any_failed = !SetupTextureBinding(command_buffer, completion_fence, + update_set_info, binding) || + any_failed; update_set_info->has_setup_fetch_mask |= fetch_bit; } } return !any_failed; } -bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info, - const Shader::TextureBinding& binding, - VkCommandBuffer command_buffer) { +bool TextureCache::SetupTextureBinding( + VkCommandBuffer command_buffer, + std::shared_ptr completion_fence, + UpdateSetInfo* update_set_info, const Shader::TextureBinding& binding) { auto& regs = *register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; auto group = @@ -615,18 +774,48 @@ bool TextureCache::SetupTextureBinding(UpdateSetInfo* update_set_info, return false; // invalid texture used } - auto texture = Demand(texture_info, command_buffer); + auto texture = Demand(texture_info, command_buffer, completion_fence); auto sampler = Demand(sampler_info); assert_true(texture != nullptr && sampler != nullptr); + if (texture == nullptr || sampler == nullptr) { + return false; + } + + uint16_t swizzle = static_cast(fetch.swizzle); + auto view = DemandView(texture, swizzle); trace_writer_->WriteMemoryRead(texture_info.guest_address, texture_info.input_length); - auto& image_write = - update_set_info->image_2d_infos[update_set_info->image_2d_write_count++]; - image_write.imageView = texture->views[0]->view; - image_write.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - image_write.sampler = sampler->sampler; + VkDescriptorImageInfo* image_write = nullptr; + switch (texture_info.dimension) { + case Dimension::k1D: + image_write = + &update_set_info + ->image_1d_infos[update_set_info->image_1d_write_count++]; + break; + case Dimension::k2D: + image_write = + &update_set_info + ->image_2d_infos[update_set_info->image_2d_write_count++]; + break; + case Dimension::k3D: + image_write = + &update_set_info + ->image_3d_infos[update_set_info->image_3d_write_count++]; + break; + case Dimension::kCube: + image_write = + &update_set_info + ->image_cube_infos[update_set_info->image_cube_write_count++]; + break; + default: + assert_unhandled_case(texture_info.dimension); + return false; + } + image_write->imageView = view->view; + image_write->imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_write->sampler = sampler->sampler; return true; } @@ -635,6 +824,22 @@ void TextureCache::ClearCache() { // TODO(benvanik): caching. } +void TextureCache::Scavenge() { + // Free unused descriptor sets + for (auto it = in_flight_sets_.begin(); it != in_flight_sets_.end();) { + if (vkGetFenceStatus(*device_, *it->second) == VK_SUCCESS) { + // We can free this one. + vkFreeDescriptorSets(*device_, descriptor_pool_, 1, &it->first); + it = in_flight_sets_.erase(it); + continue; + } + + ++it; + } + + staging_buffer_.Scavenge(); +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index 6264a4a98..dfc993763 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -17,7 +17,9 @@ #include "xenia/gpu/shader.h" #include "xenia/gpu/texture_info.h" #include "xenia/gpu/trace_writer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/circular_buffer.h" #include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan_device.h" @@ -38,22 +40,38 @@ class TextureCache { // True if we know all info about this texture, false otherwise. // (e.g. we resolve to system memory and may not know the full details about // this texture) - bool full_texture; + bool is_full_texture; VkFormat format; VkImage image; VkImageLayout image_layout; VkDeviceMemory image_memory; VkDeviceSize memory_offset; VkDeviceSize memory_size; + + uintptr_t access_watch_handle; + bool pending_invalidation; }; struct TextureView { Texture* texture; VkImageView view; + + union { + struct { + // FIXME: This only applies on little-endian platforms! + uint16_t swiz_x : 3; + uint16_t swiz_y : 3; + uint16_t swiz_z : 3; + uint16_t swiz_w : 3; + uint16_t : 4; + }; + + uint16_t swizzle; + }; }; - TextureCache(RegisterFile* register_file, TraceWriter* trace_writer, - ui::vulkan::VulkanDevice* device); + TextureCache(Memory* memory, RegisterFile* register_file, + TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device); ~TextureCache(); // Descriptor set layout containing all possible texture bindings. @@ -64,8 +82,11 @@ class TextureCache { // Prepares a descriptor set containing the samplers and images for all // bindings. The textures will be uploaded/converted/etc as needed. + // Requires a fence to be provided that will be signaled when finished + // using the returned descriptor set. VkDescriptorSet PrepareTextureSet( - VkCommandBuffer command_buffer, + VkCommandBuffer setup_command_buffer, + std::shared_ptr completion_fence, const std::vector& vertex_bindings, const std::vector& pixel_bindings); @@ -73,6 +94,16 @@ class TextureCache { // TODO(benvanik): Resolve. // TODO(benvanik): ReadTexture. + // Looks for a texture either containing or matching these parameters. + // Caller is responsible for checking if the texture returned is an exact + // match or just contains the texture given by the parameters. + // If offset_x and offset_y are not null, this may return a texture that + // contains this address at an offset. + Texture* LookupAddress(uint32_t guest_address, uint32_t width, + uint32_t height, TextureFormat format, + uint32_t* offset_x = nullptr, + uint32_t* offset_y = nullptr); + // Demands a texture for the purpose of resolving from EDRAM. This either // creates a new texture or returns a previously created texture. texture_info // is not required to be completely filled out, just guest_address and size. @@ -89,6 +120,9 @@ class TextureCache { // Clears all cached content. void ClearCache(); + // Frees any unused resources + void Scavenge(); + private: struct UpdateSetInfo; @@ -104,31 +138,30 @@ class TextureCache { // Demands a texture. If command_buffer is null and the texture hasn't been // uploaded to graphics memory already, we will return null and bail. - Texture* Demand(const TextureInfo& texture_info, - VkCommandBuffer command_buffer = nullptr); + Texture* Demand( + const TextureInfo& texture_info, VkCommandBuffer command_buffer = nullptr, + std::shared_ptr completion_fence = nullptr); + TextureView* DemandView(Texture* texture, uint16_t swizzle); Sampler* Demand(const SamplerInfo& sampler_info); - // Looks for a texture either containing or matching these parameters. - // Caller is responsible for checking if the texture returned is an exact - // match or just contains the texture given by the parameters. - // If offset_x and offset_y are not null, this may return a texture that - // contains this image at an offset. - Texture* LookupAddress(uint32_t guest_address, uint32_t width, - uint32_t height, TextureFormat format, - uint32_t* offset_x, uint32_t* offset_y); - // Queues commands to upload a texture from system memory, applying any // conversions necessary. This may flush the command buffer to the GPU if we // run out of staging memory. - bool UploadTexture2D(VkCommandBuffer command_buffer, Texture* dest, - TextureInfo src); + bool UploadTexture2D(VkCommandBuffer command_buffer, + std::shared_ptr completion_fence, + Texture* dest, TextureInfo src); - bool SetupTextureBindings(UpdateSetInfo* update_set_info, - const std::vector& bindings, - VkCommandBuffer command_buffer = nullptr); - bool SetupTextureBinding(UpdateSetInfo* update_set_info, - const Shader::TextureBinding& binding, - VkCommandBuffer command_buffer = nullptr); + bool SetupTextureBindings( + VkCommandBuffer command_buffer, + std::shared_ptr completion_fence, + UpdateSetInfo* update_set_info, + const std::vector& bindings); + bool SetupTextureBinding(VkCommandBuffer command_buffer, + std::shared_ptr completion_fence, + UpdateSetInfo* update_set_info, + const Shader::TextureBinding& binding); + + Memory* memory_ = nullptr; RegisterFile* register_file_ = nullptr; TraceWriter* trace_writer_ = nullptr; @@ -136,10 +169,11 @@ class TextureCache { VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; + std::vector>> + in_flight_sets_; // Temporary until we have circular buffers. - VkBuffer staging_buffer_ = nullptr; - VkDeviceMemory staging_buffer_mem_ = nullptr; + ui::vulkan::CircularBuffer staging_buffer_; std::unordered_map> textures_; std::unordered_map> samplers_; std::vector> resolve_textures_; From 1e1da1eb6c78a70f18d188228a79a99fe3f9072f Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:34:14 -0500 Subject: [PATCH 091/145] PipelineCache::ConfigurePipeline - Inform the caller if the pipeline is dirty or they can reuse the previously bound pipeline. Make SetDynamicState public. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 31 ++++++++---------------- src/xenia/gpu/vulkan/pipeline_cache.h | 33 +++++++++++++------------- 2 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index ee1174a72..efcaf5b46 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -183,11 +183,12 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, return shader; } -bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, - const RenderState* render_state, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - PrimitiveType primitive_type) { +PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline( + VkCommandBuffer command_buffer, const RenderState* render_state, + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + PrimitiveType primitive_type, VkPipeline* pipeline_out) { + assert_not_null(pipeline_out); + // Perform a pass over all registers and state updating our cached structures. // This will tell us if anything has changed that requires us to either build // a new pipeline or use an existing one. @@ -208,7 +209,7 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, // Error updating state - bail out. // We are in an indeterminate state, so reset things for the next attempt. current_pipeline_ = nullptr; - return false; + return update_status; } if (!pipeline) { // Should have a hash key produced by the UpdateState pass. @@ -217,24 +218,12 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, current_pipeline_ = pipeline; if (!pipeline) { // Unable to create pipeline. - return false; + return UpdateStatus::kError; } } - // Bind the pipeline. - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - - // Issue all changed dynamic state information commands. - // TODO(benvanik): dynamic state is kept in the command buffer, so if we - // have issued it before (regardless of pipeline) we don't need to do it now. - // TODO(benvanik): track whether we have issued on the given command buffer. - bool full_dynamic_state = true; - if (!SetDynamicState(command_buffer, full_dynamic_state)) { - // Failed to update state. - return false; - } - - return true; + *pipeline_out = pipeline; + return update_status; } void PipelineCache::ClearCache() { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index b33c030ed..66b2e87ef 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -32,6 +32,12 @@ namespace vulkan { // including shaders, various blend/etc options, and input configuration. class PipelineCache { public: + enum class UpdateStatus { + kCompatible, + kMismatch, + kError, + }; + PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, VkDescriptorSetLayout uniform_descriptor_set_layout, VkDescriptorSetLayout texture_descriptor_set_layout); @@ -46,11 +52,17 @@ class PipelineCache { // otherwise a new one may be created. Any state that can be set dynamically // in the command buffer is issued at this time. // Returns whether the pipeline could be successfully created. - bool ConfigurePipeline(VkCommandBuffer command_buffer, - const RenderState* render_state, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - PrimitiveType primitive_type); + UpdateStatus ConfigurePipeline(VkCommandBuffer command_buffer, + const RenderState* render_state, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader, + PrimitiveType primitive_type, + VkPipeline* pipeline_out); + + // Sets required dynamic state on the command buffer. + // Only state that has changed since the last call will be set unless + // full_update is true. + bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update); // Pipeline layout shared by all pipelines. VkPipelineLayout pipeline_layout() const { return pipeline_layout_; } @@ -68,11 +80,6 @@ class PipelineCache { VkShaderModule GetGeometryShader(PrimitiveType primitive_type, bool is_line_mode); - // Sets required dynamic state on the command buffer. - // Only state that has changed since the last call will be set unless - // full_update is true. - bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update); - RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; @@ -111,12 +118,6 @@ class PipelineCache { VkPipeline current_pipeline_ = nullptr; private: - enum class UpdateStatus { - kCompatible, - kMismatch, - kError, - }; - UpdateStatus UpdateState(VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type); From f75e5fec2463fffc54cd71d02652ac59291f07fb Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:35:34 -0500 Subject: [PATCH 092/145] CP: Use a single command buffer for every frame, reuse render passes/pipelines if not dirty Hook up resolves and swaps --- .../gpu/vulkan/vulkan_command_processor.cc | 488 ++++++++++++++---- .../gpu/vulkan/vulkan_command_processor.h | 10 + 2 files changed, 407 insertions(+), 91 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 48c7d681d..1d559d896 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -69,8 +69,8 @@ bool VulkanCommandProcessor::SetupContext() { // Initialize the state machine caches. buffer_cache_ = std::make_unique(register_file_, device_, kDefaultBufferCacheCapacity); - texture_cache_ = - std::make_unique(register_file_, &trace_writer_, device_); + texture_cache_ = std::make_unique(memory_, register_file_, + &trace_writer_, device_); pipeline_cache_ = std::make_unique( register_file_, device_, buffer_cache_->constant_descriptor_set_layout(), texture_cache_->texture_descriptor_set_layout()); @@ -134,21 +134,127 @@ void VulkanCommandProcessor::ReturnFromWait() { void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { - // Ensure we issue any pending draws. - // draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent); + SCOPE_profile_cpu_f("gpu"); - // Need to finish to be sure the other context sees the right data. - // TODO(benvanik): prevent this? fences? - // glFinish(); + // Queue up current command buffers. + // TODO(benvanik): bigger batches. + if (current_command_buffer_) { + if (current_render_state_) { + render_cache_->EndRenderPass(); + current_render_state_ = nullptr; + } - if (context_->WasLost()) { - // We've lost the context due to a TDR. - // TODO: Dump the current commands to a tracefile. - assert_always(); + auto status = vkEndCommandBuffer(current_command_buffer_); + CheckResult(status, "vkEndCommandBuffer"); + status = vkEndCommandBuffer(current_setup_buffer_); + CheckResult(status, "vkEndCommandBuffer"); + command_buffer_pool_->EndBatch(*current_batch_fence_); + + // TODO(benvanik): move to CP or to host (trace dump, etc). + // This only needs to surround a vkQueueSubmit. + static uint32_t frame = 0; + if (device_->is_renderdoc_attached() && + (FLAGS_vulkan_renderdoc_capture_all || + trace_state_ == TraceState::kSingleFrame)) { + if (queue_mutex_) { + queue_mutex_->lock(); + } + + device_->BeginRenderDocFrameCapture(); + + if (queue_mutex_) { + queue_mutex_->unlock(); + } + } + + // TODO(DrChat): If setup buffer is empty, don't bother queueing it up. + VkCommandBuffer command_buffers[] = { + current_setup_buffer_, current_command_buffer_, + }; + + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.commandBufferCount = 2; + submit_info.pCommandBuffers = command_buffers; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = nullptr; + if (queue_mutex_) { + queue_mutex_->lock(); + } + status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_); + if (queue_mutex_) { + queue_mutex_->unlock(); + } + CheckResult(status, "vkQueueSubmit"); + + VkFence fences[] = {*current_batch_fence_}; + status = vkWaitForFences(*device_, 1, fences, true, -1); + CheckResult(status, "vkWaitForFences"); + + if (device_->is_renderdoc_attached() && + (FLAGS_vulkan_renderdoc_capture_all || + trace_state_ == TraceState::kSingleFrame)) { + if (queue_mutex_) { + queue_mutex_->lock(); + } + + device_->EndRenderDocFrameCapture(); + + // HACK(DrChat): Used b/c I disabled trace saving code in the CP. + // Remove later. + if (!trace_writer_.is_open()) { + trace_state_ = TraceState::kDisabled; + } + + if (queue_mutex_) { + queue_mutex_->unlock(); + } + } + + // Scavenging. + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + while (command_buffer_pool_->has_pending()) { + command_buffer_pool_->Scavenge(); + xe::threading::MaybeYield(); + } + + texture_cache_->Scavenge(); + current_batch_fence_ = nullptr; + + // TODO: Remove this when we stop waiting on the queue. + buffer_cache_->ClearCache(); + } + + if (!frontbuffer_ptr) { + if (!last_copy_base_) { + // Nothing to draw. + return; + } + + // Trace viewer does this. + frontbuffer_ptr = last_copy_base_; + } + + auto texture = texture_cache_->LookupAddress( + frontbuffer_ptr, xe::round_up(frontbuffer_width, 32), + xe::round_up(frontbuffer_height, 32), TextureFormat::k_8_8_8_8); + // There shouldn't be a case where the texture is null. + assert_not_null(texture); + + if (texture) { + std::lock_guard lock(swap_state_.mutex); + swap_state_.width = frontbuffer_width; + swap_state_.height = frontbuffer_height; + swap_state_.back_buffer_texture = + reinterpret_cast(texture->image); } // Remove any dead textures, etc. - // texture_cache_.Scavenge(); + texture_cache_->Scavenge(); } Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, @@ -183,13 +289,8 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } - // TODO(benvanik): move to CP or to host (trace dump, etc). - if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { - device_->BeginRenderDocFrameCapture(); - } - // Shaders will have already been defined by previous loads. - // We need the to do just about anything so validate here. + // We need them to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); auto pixel_shader = static_cast(active_pixel_shader()); if (!vertex_shader || !vertex_shader->is_valid()) { @@ -206,42 +307,73 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } - // TODO(benvanik): bigger batches. - command_buffer_pool_->BeginBatch(); - VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry(); - VkCommandBufferBeginInfo command_buffer_begin_info; - command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - command_buffer_begin_info.pNext = nullptr; - command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - command_buffer_begin_info.pInheritanceInfo = nullptr; - auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); - CheckResult(err, "vkBeginCommandBuffer"); + bool started_command_buffer = false; + if (!current_command_buffer_) { + // TODO(benvanik): bigger batches. + // TODO(DrChat): Decouple setup buffer from current batch. + command_buffer_pool_->BeginBatch(); + current_command_buffer_ = command_buffer_pool_->AcquireEntry(); + current_setup_buffer_ = command_buffer_pool_->AcquireEntry(); + current_batch_fence_.reset(new ui::vulkan::Fence(*device_)); + + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + auto status = vkBeginCommandBuffer(current_command_buffer_, + &command_buffer_begin_info); + CheckResult(status, "vkBeginCommandBuffer"); + + status = + vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info); + CheckResult(status, "vkBeginCommandBuffer"); + + started_command_buffer = true; + } + auto command_buffer = current_command_buffer_; // Upload and set descriptors for all textures. // We do this outside of the render pass so the texture cache can upload and // convert textures. - auto samplers = PopulateSamplers(command_buffer, vertex_shader, pixel_shader); + // Setup buffer may be flushed to GPU if the texture cache needs it. + auto samplers = + PopulateSamplers(current_setup_buffer_, vertex_shader, pixel_shader); if (!samplers) { return false; } // Begin the render pass. // This will setup our framebuffer and begin the pass in the command buffer. - auto render_state = render_cache_->BeginRenderPass( - command_buffer, vertex_shader, pixel_shader); - if (!render_state) { - return false; + // This reuses a previous render pass if one is already open. + if (render_cache_->dirty() || !current_render_state_) { + if (current_render_state_) { + render_cache_->EndRenderPass(); + current_render_state_ = nullptr; + } + + current_render_state_ = render_cache_->BeginRenderPass( + command_buffer, vertex_shader, pixel_shader); + if (!current_render_state_) { + return false; + } } // Configure the pipeline for drawing. // This encodes all render state (blend, depth, etc), our shader stages, // and our vertex input layout. - if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_state, - vertex_shader, pixel_shader, - primitive_type)) { - render_cache_->EndRenderPass(); - return false; + VkPipeline pipeline = nullptr; + auto pipeline_status = pipeline_cache_->ConfigurePipeline( + command_buffer, current_render_state_, vertex_shader, pixel_shader, + primitive_type, &pipeline); + if (pipeline_status == PipelineCache::UpdateStatus::kMismatch || + started_command_buffer) { + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); } + pipeline_cache_->SetDynamicState(command_buffer, started_command_buffer); // Pass registers to the shaders. if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { @@ -285,57 +417,6 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, vertex_offset, first_instance); } - // End the rendering pass. - render_cache_->EndRenderPass(); - - // TODO(benvanik): bigger batches. - err = vkEndCommandBuffer(command_buffer); - CheckResult(err, "vkEndCommandBuffer"); - VkFence fence; - VkFenceCreateInfo fence_info; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_info.pNext = nullptr; - fence_info.flags = 0; - vkCreateFence(*device_, &fence_info, nullptr, &fence); - command_buffer_pool_->EndBatch(fence); - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = nullptr; - if (queue_mutex_) { - queue_mutex_->lock(); - } - err = vkQueueSubmit(queue_, 1, &submit_info, fence); - if (queue_mutex_) { - queue_mutex_->unlock(); - } - CheckResult(err, "vkQueueSubmit"); - if (queue_mutex_) { - queue_mutex_->lock(); - } - err = vkQueueWaitIdle(queue_); - CheckResult(err, "vkQueueWaitIdle"); - err = vkDeviceWaitIdle(*device_); - CheckResult(err, "vkDeviceWaitIdle"); - if (queue_mutex_) { - queue_mutex_->unlock(); - } - while (command_buffer_pool_->has_pending()) { - command_buffer_pool_->Scavenge(); - xe::threading::MaybeYield(); - } - vkDestroyFence(*device_, fence, nullptr); - - // TODO(benvanik): move to CP or to host (trace dump, etc). - if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { - device_->EndRenderDocFrameCapture(); - } - return true; } @@ -486,7 +567,7 @@ VkDescriptorSet VulkanCommandProcessor::PopulateSamplers( #endif // FINE_GRAINED_DRAW_SCOPES auto descriptor_set = texture_cache_->PrepareTextureSet( - command_buffer, vertex_shader->texture_bindings(), + command_buffer, current_batch_fence_, vertex_shader->texture_bindings(), pixel_shader->texture_bindings()); if (!descriptor_set) { // Unable to bind set. @@ -519,7 +600,7 @@ bool VulkanCommandProcessor::IssueCopy() { uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7; assert_true(copy_dest_slice == 0); auto copy_dest_format = - static_cast((copy_dest_info >> 7) & 0x3F); + static_cast((copy_dest_info >> 7) & 0x3F); uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7; // assert_true(copy_dest_number == 0); // ? uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F; @@ -541,12 +622,237 @@ bool VulkanCommandProcessor::IssueCopy() { uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32; assert_true(copy_mask == 0); + // Supported in GL4, not supported here yet. + assert_zero(copy_dest_swap); + // RB_SURFACE_INFO // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; uint32_t surface_pitch = surface_info & 0x3FFF; auto surface_msaa = static_cast((surface_info >> 16) & 0x3); + // TODO(benvanik): any way to scissor this? a200 has: + // REG_A2XX_RB_COPY_DEST_OFFSET = A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) | + // A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff); + // but I can't seem to find something similar. + uint32_t dest_logical_width = copy_dest_pitch; + uint32_t dest_logical_height = copy_dest_height; + uint32_t dest_block_width = xe::round_up(dest_logical_width, 32); + uint32_t dest_block_height = xe::round_up(dest_logical_height, 32); + + uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; + int16_t window_offset_x = window_offset & 0x7FFF; + int16_t window_offset_y = (window_offset >> 16) & 0x7FFF; + // Sign-extension + if (window_offset_x & 0x4000) { + window_offset_x |= 0x8000; + } + if (window_offset_y & 0x4000) { + window_offset_y |= 0x8000; + } + + // Adjust the copy base offset to point to the beginning of the texture, so + // we don't run into hiccups down the road (e.g. resolving the last part going + // backwards). + int32_t dest_offset = window_offset_y * copy_dest_pitch * 4; + dest_offset += window_offset_x * 32 * 4; + copy_dest_base += dest_offset; + + // HACK: vertices to use are always in vf0. + int copy_vertex_fetch_slot = 0; + int r = + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (copy_vertex_fetch_slot / 3) * 6; + const auto group = reinterpret_cast(®s.values[r]); + const xe_gpu_vertex_fetch_t* fetch = nullptr; + switch (copy_vertex_fetch_slot % 3) { + case 0: + fetch = &group->vertex_fetch_0; + break; + case 1: + fetch = &group->vertex_fetch_1; + break; + case 2: + fetch = &group->vertex_fetch_2; + break; + } + assert_true(fetch->type == 3); + assert_true(fetch->endian == 2); + assert_true(fetch->size == 6); + const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2); + trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4); + int32_t dest_min_x = int32_t((std::min( + std::min( + GpuSwap(xe::load(vertex_addr + 0), Endian(fetch->endian)), + GpuSwap(xe::load(vertex_addr + 8), Endian(fetch->endian))), + GpuSwap(xe::load(vertex_addr + 16), Endian(fetch->endian))))); + int32_t dest_max_x = int32_t((std::max( + std::max( + GpuSwap(xe::load(vertex_addr + 0), Endian(fetch->endian)), + GpuSwap(xe::load(vertex_addr + 8), Endian(fetch->endian))), + GpuSwap(xe::load(vertex_addr + 16), Endian(fetch->endian))))); + int32_t dest_min_y = int32_t((std::min( + std::min( + GpuSwap(xe::load(vertex_addr + 4), Endian(fetch->endian)), + GpuSwap(xe::load(vertex_addr + 12), Endian(fetch->endian))), + GpuSwap(xe::load(vertex_addr + 20), Endian(fetch->endian))))); + int32_t dest_max_y = int32_t((std::max( + std::max( + GpuSwap(xe::load(vertex_addr + 4), Endian(fetch->endian)), + GpuSwap(xe::load(vertex_addr + 12), Endian(fetch->endian))), + GpuSwap(xe::load(vertex_addr + 20), Endian(fetch->endian))))); + + uint32_t color_edram_base = 0; + uint32_t depth_edram_base = 0; + ColorRenderTargetFormat color_format; + DepthRenderTargetFormat depth_format; + if (copy_src_select <= 3) { + // Source from a color target. + uint32_t color_info[4] = { + regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, + regs[XE_GPU_REG_RB_COLOR2_INFO].u32, + regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + }; + color_edram_base = color_info[copy_src_select] & 0xFFF; + + color_format = static_cast( + (color_info[copy_src_select] >> 16) & 0xF); + } + + if (copy_src_select > 3 || depth_clear_enabled) { + // Source from a depth target. + uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + depth_edram_base = depth_info & 0xFFF; + + depth_format = + static_cast((depth_info >> 16) & 0x1); + } + + // Demand a resolve texture from the texture cache. + TextureInfo tex_info = {}; + tex_info.guest_address = copy_dest_base; + tex_info.width = dest_logical_width - 1; + tex_info.height = dest_logical_height - 1; + tex_info.dimension = gpu::Dimension::k2D; + tex_info.input_length = copy_dest_pitch * copy_dest_height * 4; + tex_info.size_2d.logical_width = dest_logical_width; + tex_info.size_2d.logical_height = dest_logical_height; + tex_info.size_2d.block_width = dest_block_width; + tex_info.size_2d.block_height = dest_block_height; + tex_info.size_2d.input_width = dest_block_width; + tex_info.size_2d.input_height = dest_block_height; + tex_info.size_2d.input_pitch = copy_dest_pitch * 4; + auto texture = texture_cache_->DemandResolveTexture( + tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr, nullptr); + if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { + // Transition the image to a general layout. + VkImageMemoryBarrier image_barrier; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + image_barrier.image = texture->image; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = + copy_src_select <= 3 + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + texture->image_layout = VK_IMAGE_LAYOUT_GENERAL; + } + + // For debugging purposes only (trace viewer) + last_copy_base_ = texture->texture_info.guest_address; + + if (!current_command_buffer_) { + command_buffer_pool_->BeginBatch(); + current_command_buffer_ = command_buffer_pool_->AcquireEntry(); + current_setup_buffer_ = command_buffer_pool_->AcquireEntry(); + current_batch_fence_.reset(new ui::vulkan::Fence(*device_)); + + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + auto status = vkBeginCommandBuffer(current_command_buffer_, + &command_buffer_begin_info); + CheckResult(status, "vkBeginCommandBuffer"); + + status = + vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info); + CheckResult(status, "vkBeginCommandBuffer"); + } else if (current_render_state_) { + render_cache_->EndRenderPass(); + current_render_state_ = nullptr; + } + auto command_buffer = current_command_buffer_; + + VkOffset3D resolve_offset = {dest_min_x, dest_min_y, 0}; + VkExtent3D resolve_extent = {uint32_t(dest_max_x - dest_min_x), + uint32_t(dest_max_y - dest_min_y), 1}; + + // Ask the render cache to copy to the resolve texture. + auto edram_base = copy_src_select <= 3 ? color_edram_base : depth_edram_base; + uint32_t src_format = copy_src_select <= 3 + ? static_cast(color_format) + : static_cast(depth_format); + switch (copy_command) { + case CopyCommand::kRaw: + render_cache_->RawCopyToImage(command_buffer, edram_base, texture->image, + texture->image_layout, copy_src_select <= 3, + resolve_offset, resolve_extent); + break; + case CopyCommand::kConvert: + render_cache_->BlitToImage( + command_buffer, edram_base, surface_pitch, resolve_extent.height, + texture->image, texture->image_layout, copy_src_select <= 3, + src_format, VK_FILTER_LINEAR, resolve_offset, resolve_extent); + break; + + case CopyCommand::kConstantOne: + case CopyCommand::kNull: + assert_always(); + break; + } + + // Perform any requested clears. + uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; + uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; + uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; + assert_true(copy_color_clear == copy_color_clear_low); + + if (color_clear_enabled) { + // If color clear is enabled, we can only clear a selected color target! + assert_true(copy_src_select <= 3); + + // TODO(benvanik): verify color order. + float color[] = {((copy_color_clear >> 0) & 0xFF) / 255.0f, + ((copy_color_clear >> 8) & 0xFF) / 255.0f, + ((copy_color_clear >> 16) & 0xFF) / 255.0f, + ((copy_color_clear >> 24) & 0xFF) / 255.0f}; + + // TODO(DrChat): Do we know the surface height at this point? + render_cache_->ClearEDRAMColor(command_buffer, color_edram_base, + color_format, surface_pitch, + resolve_extent.height, color); + } + + if (depth_clear_enabled) { + float depth = + (copy_depth_clear & 0xFFFFFF00) / static_cast(0xFFFFFF00); + uint8_t stencil = copy_depth_clear & 0xFF; + + // TODO(DrChat): Do we know the surface height at this point? + render_cache_->ClearEDRAMDepthStencil( + command_buffer, depth_edram_base, depth_format, surface_pitch, + resolve_extent.height, depth, stencil); + } + return true; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index b45be07fb..c87c515c0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -34,12 +34,14 @@ #include "xenia/ui/vulkan/fenced_pools.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_device.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { namespace vulkan { class VulkanGraphicsSystem; +class TextureCache; class VulkanCommandProcessor : public CommandProcessor { public: @@ -90,12 +92,20 @@ class VulkanCommandProcessor : public CommandProcessor { VkQueue queue_ = nullptr; std::mutex* queue_mutex_ = nullptr; + // Last copy base address, for debugging only. + uint32_t last_copy_base_ = 0; + std::unique_ptr buffer_cache_; std::unique_ptr pipeline_cache_; std::unique_ptr render_cache_; std::unique_ptr texture_cache_; std::unique_ptr command_buffer_pool_; + + const RenderState* current_render_state_ = nullptr; + VkCommandBuffer current_command_buffer_ = nullptr; + VkCommandBuffer current_setup_buffer_ = nullptr; + std::shared_ptr current_batch_fence_; }; } // namespace vulkan From 2bb40c122db784ce4fcedf47bdf13cd4cc7ef32f Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:36:21 -0500 Subject: [PATCH 093/145] Vulkan util Fence class --- src/xenia/ui/vulkan/vulkan_util.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index fcf9e4f8f..ca93c4c2d 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -25,6 +25,32 @@ namespace xe { namespace ui { namespace vulkan { +class Fence { + public: + Fence(VkDevice device) : device_(device) { + VkFenceCreateInfo fence_info; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.pNext = nullptr; + fence_info.flags = 0; + vkCreateFence(device, &fence_info, nullptr, &fence_); + } + ~Fence() { + vkDestroyFence(device_, fence_, nullptr); + fence_ = nullptr; + } + + VkResult status() const { + return vkGetFenceStatus(device_, fence_); + } + + VkFence fence() const { return fence_; } + operator VkFence() const { return fence_; } + + private: + VkDevice device_; + VkFence fence_ = nullptr; +}; + struct Version { uint32_t major; uint32_t minor; From a5a31cf12371cdcae9456fe4b04e370509439708 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:37:24 -0500 Subject: [PATCH 094/145] VulkanShader::Prepare - return false if vkCreateShaderModule failed. --- src/xenia/gpu/vulkan/vulkan_shader.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc index b3c72abf3..c18341a71 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.cc +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -44,11 +44,11 @@ bool VulkanShader::Prepare() { shader_info.codeSize = translated_binary_.size(); shader_info.pCode = reinterpret_cast(translated_binary_.data()); - auto err = + auto status = vkCreateShaderModule(device_, &shader_info, nullptr, &shader_module_); - CheckResult(err, "vkCreateShaderModule"); + CheckResult(status, "vkCreateShaderModule"); - return true; + return status == VK_SUCCESS; } } // namespace vulkan From d7599c817f4453652206ab799d9eefc1260d0679 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:44:25 -0500 Subject: [PATCH 095/145] Formatting. --- src/xenia/ui/vulkan/vulkan_util.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index ca93c4c2d..f5475edd8 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -39,9 +39,7 @@ class Fence { fence_ = nullptr; } - VkResult status() const { - return vkGetFenceStatus(device_, fence_); - } + VkResult status() const { return vkGetFenceStatus(device_, fence_); } VkFence fence() const { return fence_; } operator VkFence() const { return fence_; } From 0e44cda961d76d5cb004ccd1ccff59e9850d4386 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:49:41 -0500 Subject: [PATCH 096/145] Update the rectangle list shader --- .../gpu/vulkan/shaders/bin/rect_list_geom.h | 587 +++++++++--------- .../gpu/vulkan/shaders/bin/rect_list_geom.txt | 429 +++++++------ src/xenia/gpu/vulkan/shaders/rect_list.geom | 31 +- 3 files changed, 513 insertions(+), 534 deletions(-) diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h index b9598cfa9..730f9f12e 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h @@ -2,7 +2,7 @@ // source: rect_list.geom const uint8_t rect_list_geom[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, - 0xCC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0xCA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, @@ -10,8 +10,8 @@ const uint8_t rect_list_geom[] = { 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, @@ -40,17 +40,13 @@ const uint8_t rect_list_geom[] = { 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, - 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, - 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, - 0x06, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x35, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, - 0x66, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, - 0xB4, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x05, 0x00, 0x07, 0x00, 0x30, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, + 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, + 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, + 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, @@ -65,12 +61,10 @@ const uint8_t rect_list_geom[] = { 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, @@ -107,25 +101,23 @@ const uint8_t rect_list_geom[] = { 0x03, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x03, 0x00, 0x32, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x36, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x65, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x63, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, @@ -139,7 +131,7 @@ const uint8_t rect_list_geom[] = { 0x1C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, @@ -153,286 +145,283 @@ const uint8_t rect_list_geom[] = { 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x3C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, + 0x3A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x3F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x43, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x46, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, + 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x44, 0x00, 0x00, 0x00, + 0x43, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x46, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x4B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x49, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x4E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, + 0x4A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x4C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x4F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x53, 0x00, 0x00, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x54, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x51, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x52, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x55, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x56, 0x00, 0x00, 0x00, - 0x55, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x57, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x57, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x58, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x53, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, + 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x56, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, - 0x5B, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x5D, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, + 0x59, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x5B, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, - 0x5E, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x60, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, + 0x5C, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x5E, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x5F, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x66, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, - 0xF6, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, - 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, - 0x6C, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x6E, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, + 0x62, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x65, 0x00, 0x00, 0x00, + 0xF6, 0x00, 0x04, 0x00, 0x67, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, + 0x6A, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, + 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x66, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x6F, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x6E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x70, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x73, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x74, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x74, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x76, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00, + 0x75, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x6D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x7A, 0x00, 0x00, 0x00, + 0x79, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, - 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x71, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, - 0x7F, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, - 0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x74, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, - 0x75, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x77, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, - 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x79, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, - 0x77, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x7C, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x6A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x6A, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, - 0x66, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x7E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x66, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x69, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x7F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x82, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x83, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x84, 0x00, 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x85, 0x00, 0x00, 0x00, - 0x84, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x86, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, - 0x86, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x87, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x8D, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, - 0x90, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x92, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, - 0x93, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x95, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x9D, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, - 0xA0, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0xA2, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, - 0xA3, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0xA5, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x32, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAC, 0x00, 0x00, 0x00, - 0xA9, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, - 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00, - 0xAC, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, 0x00, - 0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0xB1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0xB3, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB3, 0x00, 0x00, 0x00, - 0xB2, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, - 0xB7, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0xB9, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0xBA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, - 0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0xBB, 0x00, 0x00, 0x00, - 0xB6, 0x00, 0x00, 0x00, 0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0xB6, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0xBC, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0xBD, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xBF, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0xC0, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xC2, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00, - 0xBF, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0xC5, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00, - 0xC7, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, - 0xC9, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0xBC, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xC9, 0x00, 0x00, 0x00, - 0xC8, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xCA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00, - 0xCA, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xB4, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, + 0x7B, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, - 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x7D, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, + 0x7E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x81, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x83, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x83, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x86, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x87, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x88, 0x00, 0x00, 0x00, + 0x87, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x89, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x8B, 0x00, 0x00, 0x00, + 0x8A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x8C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00, + 0x8C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x8D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x90, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x93, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x96, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x97, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x99, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x9B, 0x00, 0x00, 0x00, + 0x9A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x9C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00, + 0x9C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x9D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0xA0, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0xA3, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x34, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x30, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, + 0xA6, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, + 0xA9, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0xAB, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xAC, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, + 0xAC, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0xAE, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, + 0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0xB1, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xB1, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0xB3, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0xB5, 0x00, 0x00, 0x00, + 0xB6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0xB9, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, + 0xFA, 0x00, 0x04, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB4, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xBB, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0xBF, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xC0, 0x00, 0x00, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, + 0xBD, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, + 0xC4, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xC6, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xC7, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0xB6, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB6, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xC9, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0xC9, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0xB5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, }; diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt index b047926f5..94fb6a700 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt +++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.0 ; Generator: Khronos Glslang Reference Front End; 1 -; Bound: 204 +; Bound: 202 ; Schema: 0 OpCapability Geometry OpCapability GeometryPointSize @@ -9,7 +9,7 @@ OpCapability GeometryStreams %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %4 "main" %18 %34 %49 %53 + OpEntryPoint Geometry %4 "main" %18 %34 %48 %51 OpExecutionMode %4 Triangles OpExecutionMode %4 Invocations 1 OpExecutionMode %4 OutputTriangleStrip @@ -27,14 +27,10 @@ OpMemberName %32 1 "gl_PointSize" OpMemberName %32 2 "gl_ClipDistance" OpName %34 "" - OpName %47 "VertexData" - OpMemberName %47 0 "o" - OpName %49 "out_vtx" - OpName %50 "VertexData" - OpMemberName %50 0 "o" - OpName %53 "in_vtx" - OpName %102 "i" - OpName %180 "i" + OpName %48 "out_interpolators" + OpName %51 "in_interpolators" + OpName %100 "i" + OpName %178 "i" OpMemberDecorate %14 0 BuiltIn Position OpMemberDecorate %14 1 BuiltIn PointSize OpMemberDecorate %14 2 BuiltIn ClipDistance @@ -45,10 +41,9 @@ OpDecorate %32 Block OpDecorate %32 Stream 0 OpDecorate %34 Stream 0 - OpMemberDecorate %47 0 Location 0 - OpDecorate %47 Stream 0 - OpDecorate %49 Stream 0 - OpMemberDecorate %50 0 Location 0 + OpDecorate %48 Location 0 + OpDecorate %48 Stream 0 + OpDecorate %51 Location 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 %6 = OpTypeBool @@ -77,21 +72,19 @@ %43 = OpTypePointer Output %9 %45 = OpConstant %11 16 %46 = OpTypeArray %10 %45 - %47 = OpTypeStruct %46 - %48 = OpTypePointer Output %47 - %49 = OpVariable %48 Output - %50 = OpTypeStruct %46 - %51 = OpTypeArray %50 %15 - %52 = OpTypePointer Input %51 - %53 = OpVariable %52 Input - %54 = OpTypePointer Input %50 - %101 = OpTypePointer Function %19 - %109 = OpConstant %19 16 + %47 = OpTypePointer Output %46 + %48 = OpVariable %47 Output + %49 = OpTypeArray %46 %15 + %50 = OpTypePointer Input %49 + %51 = OpVariable %50 Input + %52 = OpTypePointer Input %46 + %99 = OpTypePointer Function %19 + %107 = OpConstant %19 16 %4 = OpFunction %2 None %3 %5 = OpLabel %8 = OpVariable %7 Function - %102 = OpVariable %101 Function - %180 = OpVariable %101 Function + %100 = OpVariable %99 Function + %178 = OpVariable %99 Function %23 = OpAccessChain %22 %18 %20 %20 %21 %24 = OpLoad %9 %23 %26 = OpAccessChain %22 %18 %25 %20 %21 @@ -100,7 +93,7 @@ OpStore %8 %28 %29 = OpLoad %6 %8 OpSelectionMerge %31 None - OpBranchConditional %29 %30 %127 + OpBranchConditional %29 %30 %125 %30 = OpLabel %36 = OpAccessChain %35 %18 %20 %20 %37 = OpLoad %10 %36 @@ -110,216 +103,216 @@ %42 = OpLoad %9 %41 %44 = OpAccessChain %43 %34 %40 OpStore %44 %42 - %55 = OpAccessChain %54 %53 %20 - %56 = OpLoad %50 %55 - OpStore %49 %56 + %53 = OpAccessChain %52 %51 %20 + %54 = OpLoad %46 %53 + OpStore %48 %54 OpEmitVertex - %57 = OpAccessChain %35 %18 %40 %20 - %58 = OpLoad %10 %57 - %59 = OpAccessChain %38 %34 %20 - OpStore %59 %58 - %60 = OpAccessChain %22 %18 %40 %40 - %61 = OpLoad %9 %60 - %62 = OpAccessChain %43 %34 %40 - OpStore %62 %61 - %63 = OpAccessChain %54 %53 %40 - %64 = OpLoad %50 %63 - OpStore %49 %64 + %55 = OpAccessChain %35 %18 %40 %20 + %56 = OpLoad %10 %55 + %57 = OpAccessChain %38 %34 %20 + OpStore %57 %56 + %58 = OpAccessChain %22 %18 %40 %40 + %59 = OpLoad %9 %58 + %60 = OpAccessChain %43 %34 %40 + OpStore %60 %59 + %61 = OpAccessChain %52 %51 %40 + %62 = OpLoad %46 %61 + OpStore %48 %62 OpEmitVertex - %65 = OpAccessChain %35 %18 %25 %20 - %66 = OpLoad %10 %65 - %67 = OpAccessChain %38 %34 %20 - OpStore %67 %66 - %68 = OpAccessChain %22 %18 %25 %40 - %69 = OpLoad %9 %68 - %70 = OpAccessChain %43 %34 %40 - OpStore %70 %69 - %71 = OpAccessChain %54 %53 %25 - %72 = OpLoad %50 %71 - OpStore %49 %72 + %63 = OpAccessChain %35 %18 %25 %20 + %64 = OpLoad %10 %63 + %65 = OpAccessChain %38 %34 %20 + OpStore %65 %64 + %66 = OpAccessChain %22 %18 %25 %40 + %67 = OpLoad %9 %66 + %68 = OpAccessChain %43 %34 %40 + OpStore %68 %67 + %69 = OpAccessChain %52 %51 %25 + %70 = OpLoad %46 %69 + OpStore %48 %70 OpEmitVertex OpEndPrimitive - %73 = OpAccessChain %35 %18 %25 %20 - %74 = OpLoad %10 %73 - %75 = OpAccessChain %38 %34 %20 - OpStore %75 %74 - %76 = OpAccessChain %22 %18 %25 %40 - %77 = OpLoad %9 %76 - %78 = OpAccessChain %43 %34 %40 - OpStore %78 %77 - %79 = OpAccessChain %54 %53 %25 - %80 = OpLoad %50 %79 - OpStore %49 %80 + %71 = OpAccessChain %35 %18 %25 %20 + %72 = OpLoad %10 %71 + %73 = OpAccessChain %38 %34 %20 + OpStore %73 %72 + %74 = OpAccessChain %22 %18 %25 %40 + %75 = OpLoad %9 %74 + %76 = OpAccessChain %43 %34 %40 + OpStore %76 %75 + %77 = OpAccessChain %52 %51 %25 + %78 = OpLoad %46 %77 + OpStore %48 %78 OpEmitVertex - %81 = OpAccessChain %35 %18 %40 %20 - %82 = OpLoad %10 %81 - %83 = OpAccessChain %38 %34 %20 - OpStore %83 %82 - %84 = OpAccessChain %22 %18 %40 %40 - %85 = OpLoad %9 %84 - %86 = OpAccessChain %43 %34 %40 - OpStore %86 %85 - %87 = OpAccessChain %54 %53 %40 - %88 = OpLoad %50 %87 - OpStore %49 %88 + %79 = OpAccessChain %35 %18 %40 %20 + %80 = OpLoad %10 %79 + %81 = OpAccessChain %38 %34 %20 + OpStore %81 %80 + %82 = OpAccessChain %22 %18 %40 %40 + %83 = OpLoad %9 %82 + %84 = OpAccessChain %43 %34 %40 + OpStore %84 %83 + %85 = OpAccessChain %52 %51 %40 + %86 = OpLoad %46 %85 + OpStore %48 %86 OpEmitVertex - %89 = OpAccessChain %35 %18 %40 %20 + %87 = OpAccessChain %35 %18 %40 %20 + %88 = OpLoad %10 %87 + %89 = OpAccessChain %35 %18 %25 %20 %90 = OpLoad %10 %89 - %91 = OpAccessChain %35 %18 %25 %20 - %92 = OpLoad %10 %91 - %93 = OpFAdd %10 %90 %92 - %94 = OpAccessChain %35 %18 %20 %20 - %95 = OpLoad %10 %94 - %96 = OpFSub %10 %93 %95 - %97 = OpAccessChain %38 %34 %20 - OpStore %97 %96 - %98 = OpAccessChain %22 %18 %25 %40 - %99 = OpLoad %9 %98 - %100 = OpAccessChain %43 %34 %40 - OpStore %100 %99 - OpStore %102 %20 - OpBranch %103 - %103 = OpLabel - OpLoopMerge %105 %106 None - OpBranch %107 - %107 = OpLabel - %108 = OpLoad %19 %102 - %110 = OpSLessThan %6 %108 %109 - OpBranchConditional %110 %104 %105 - %104 = OpLabel - %111 = OpLoad %19 %102 - %112 = OpLoad %19 %102 - %113 = OpAccessChain %35 %53 %20 %20 %112 - %114 = OpLoad %10 %113 - %115 = OpFNegate %10 %114 - %116 = OpLoad %19 %102 - %117 = OpAccessChain %35 %53 %40 %20 %116 - %118 = OpLoad %10 %117 - %119 = OpFAdd %10 %115 %118 - %120 = OpLoad %19 %102 - %121 = OpAccessChain %35 %53 %25 %20 %120 - %122 = OpLoad %10 %121 - %123 = OpFAdd %10 %119 %122 - %124 = OpAccessChain %38 %49 %20 %111 - OpStore %124 %123 - OpBranch %106 - %106 = OpLabel - %125 = OpLoad %19 %102 - %126 = OpIAdd %19 %125 %40 - OpStore %102 %126 - OpBranch %103 + %91 = OpFAdd %10 %88 %90 + %92 = OpAccessChain %35 %18 %20 %20 + %93 = OpLoad %10 %92 + %94 = OpFSub %10 %91 %93 + %95 = OpAccessChain %38 %34 %20 + OpStore %95 %94 + %96 = OpAccessChain %22 %18 %25 %40 + %97 = OpLoad %9 %96 + %98 = OpAccessChain %43 %34 %40 + OpStore %98 %97 + OpStore %100 %20 + OpBranch %101 + %101 = OpLabel + OpLoopMerge %103 %104 None + OpBranch %105 %105 = OpLabel + %106 = OpLoad %19 %100 + %108 = OpSLessThan %6 %106 %107 + OpBranchConditional %108 %102 %103 + %102 = OpLabel + %109 = OpLoad %19 %100 + %110 = OpLoad %19 %100 + %111 = OpAccessChain %35 %51 %20 %110 + %112 = OpLoad %10 %111 + %113 = OpFNegate %10 %112 + %114 = OpLoad %19 %100 + %115 = OpAccessChain %35 %51 %40 %114 + %116 = OpLoad %10 %115 + %117 = OpFAdd %10 %113 %116 + %118 = OpLoad %19 %100 + %119 = OpAccessChain %35 %51 %25 %118 + %120 = OpLoad %10 %119 + %121 = OpFAdd %10 %117 %120 + %122 = OpAccessChain %38 %48 %109 + OpStore %122 %121 + OpBranch %104 + %104 = OpLabel + %123 = OpLoad %19 %100 + %124 = OpIAdd %19 %123 %40 + OpStore %100 %124 + OpBranch %101 + %103 = OpLabel OpEmitVertex OpEndPrimitive OpBranch %31 - %127 = OpLabel - %128 = OpAccessChain %35 %18 %20 %20 - %129 = OpLoad %10 %128 - %130 = OpAccessChain %38 %34 %20 - OpStore %130 %129 - %131 = OpAccessChain %22 %18 %20 %40 - %132 = OpLoad %9 %131 - %133 = OpAccessChain %43 %34 %40 - OpStore %133 %132 - %134 = OpAccessChain %54 %53 %20 - %135 = OpLoad %50 %134 - OpStore %49 %135 + %125 = OpLabel + %126 = OpAccessChain %35 %18 %20 %20 + %127 = OpLoad %10 %126 + %128 = OpAccessChain %38 %34 %20 + OpStore %128 %127 + %129 = OpAccessChain %22 %18 %20 %40 + %130 = OpLoad %9 %129 + %131 = OpAccessChain %43 %34 %40 + OpStore %131 %130 + %132 = OpAccessChain %52 %51 %20 + %133 = OpLoad %46 %132 + OpStore %48 %133 OpEmitVertex - %136 = OpAccessChain %35 %18 %40 %20 - %137 = OpLoad %10 %136 - %138 = OpAccessChain %38 %34 %20 - OpStore %138 %137 - %139 = OpAccessChain %22 %18 %40 %40 - %140 = OpLoad %9 %139 - %141 = OpAccessChain %43 %34 %40 - OpStore %141 %140 - %142 = OpAccessChain %54 %53 %40 - %143 = OpLoad %50 %142 - OpStore %49 %143 + %134 = OpAccessChain %35 %18 %40 %20 + %135 = OpLoad %10 %134 + %136 = OpAccessChain %38 %34 %20 + OpStore %136 %135 + %137 = OpAccessChain %22 %18 %40 %40 + %138 = OpLoad %9 %137 + %139 = OpAccessChain %43 %34 %40 + OpStore %139 %138 + %140 = OpAccessChain %52 %51 %40 + %141 = OpLoad %46 %140 + OpStore %48 %141 OpEmitVertex - %144 = OpAccessChain %35 %18 %25 %20 - %145 = OpLoad %10 %144 - %146 = OpAccessChain %38 %34 %20 - OpStore %146 %145 - %147 = OpAccessChain %22 %18 %25 %40 - %148 = OpLoad %9 %147 - %149 = OpAccessChain %43 %34 %40 - OpStore %149 %148 - %150 = OpAccessChain %54 %53 %25 - %151 = OpLoad %50 %150 - OpStore %49 %151 + %142 = OpAccessChain %35 %18 %25 %20 + %143 = OpLoad %10 %142 + %144 = OpAccessChain %38 %34 %20 + OpStore %144 %143 + %145 = OpAccessChain %22 %18 %25 %40 + %146 = OpLoad %9 %145 + %147 = OpAccessChain %43 %34 %40 + OpStore %147 %146 + %148 = OpAccessChain %52 %51 %25 + %149 = OpLoad %46 %148 + OpStore %48 %149 OpEmitVertex OpEndPrimitive - %152 = OpAccessChain %35 %18 %20 %20 - %153 = OpLoad %10 %152 - %154 = OpAccessChain %38 %34 %20 - OpStore %154 %153 - %155 = OpAccessChain %22 %18 %20 %40 - %156 = OpLoad %9 %155 - %157 = OpAccessChain %43 %34 %40 - OpStore %157 %156 - %158 = OpAccessChain %54 %53 %20 - %159 = OpLoad %50 %158 - OpStore %49 %159 + %150 = OpAccessChain %35 %18 %20 %20 + %151 = OpLoad %10 %150 + %152 = OpAccessChain %38 %34 %20 + OpStore %152 %151 + %153 = OpAccessChain %22 %18 %20 %40 + %154 = OpLoad %9 %153 + %155 = OpAccessChain %43 %34 %40 + OpStore %155 %154 + %156 = OpAccessChain %52 %51 %20 + %157 = OpLoad %46 %156 + OpStore %48 %157 OpEmitVertex - %160 = OpAccessChain %35 %18 %25 %20 - %161 = OpLoad %10 %160 - %162 = OpAccessChain %38 %34 %20 - OpStore %162 %161 - %163 = OpAccessChain %22 %18 %25 %40 - %164 = OpLoad %9 %163 - %165 = OpAccessChain %43 %34 %40 - OpStore %165 %164 - %166 = OpAccessChain %54 %53 %25 - %167 = OpLoad %50 %166 - OpStore %49 %167 + %158 = OpAccessChain %35 %18 %25 %20 + %159 = OpLoad %10 %158 + %160 = OpAccessChain %38 %34 %20 + OpStore %160 %159 + %161 = OpAccessChain %22 %18 %25 %40 + %162 = OpLoad %9 %161 + %163 = OpAccessChain %43 %34 %40 + OpStore %163 %162 + %164 = OpAccessChain %52 %51 %25 + %165 = OpLoad %46 %164 + OpStore %48 %165 OpEmitVertex - %168 = OpAccessChain %35 %18 %20 %20 + %166 = OpAccessChain %35 %18 %20 %20 + %167 = OpLoad %10 %166 + %168 = OpAccessChain %35 %18 %25 %20 %169 = OpLoad %10 %168 - %170 = OpAccessChain %35 %18 %25 %20 - %171 = OpLoad %10 %170 - %172 = OpFAdd %10 %169 %171 - %173 = OpAccessChain %35 %18 %40 %20 - %174 = OpLoad %10 %173 - %175 = OpFSub %10 %172 %174 - %176 = OpAccessChain %38 %34 %20 - OpStore %176 %175 - %177 = OpAccessChain %22 %18 %25 %40 - %178 = OpLoad %9 %177 - %179 = OpAccessChain %43 %34 %40 - OpStore %179 %178 - OpStore %180 %20 - OpBranch %181 - %181 = OpLabel - OpLoopMerge %183 %184 None - OpBranch %185 - %185 = OpLabel - %186 = OpLoad %19 %180 - %187 = OpSLessThan %6 %186 %109 - OpBranchConditional %187 %182 %183 - %182 = OpLabel - %188 = OpLoad %19 %180 - %189 = OpLoad %19 %180 - %190 = OpAccessChain %35 %53 %20 %20 %189 - %191 = OpLoad %10 %190 - %192 = OpLoad %19 %180 - %193 = OpAccessChain %35 %53 %40 %20 %192 - %194 = OpLoad %10 %193 - %195 = OpFNegate %10 %194 - %196 = OpFAdd %10 %191 %195 - %197 = OpLoad %19 %180 - %198 = OpAccessChain %35 %53 %25 %20 %197 - %199 = OpLoad %10 %198 - %200 = OpFAdd %10 %196 %199 - %201 = OpAccessChain %38 %49 %20 %188 - OpStore %201 %200 - OpBranch %184 - %184 = OpLabel - %202 = OpLoad %19 %180 - %203 = OpIAdd %19 %202 %40 - OpStore %180 %203 - OpBranch %181 + %170 = OpFAdd %10 %167 %169 + %171 = OpAccessChain %35 %18 %40 %20 + %172 = OpLoad %10 %171 + %173 = OpFSub %10 %170 %172 + %174 = OpAccessChain %38 %34 %20 + OpStore %174 %173 + %175 = OpAccessChain %22 %18 %25 %40 + %176 = OpLoad %9 %175 + %177 = OpAccessChain %43 %34 %40 + OpStore %177 %176 + OpStore %178 %20 + OpBranch %179 + %179 = OpLabel + OpLoopMerge %181 %182 None + OpBranch %183 %183 = OpLabel + %184 = OpLoad %19 %178 + %185 = OpSLessThan %6 %184 %107 + OpBranchConditional %185 %180 %181 + %180 = OpLabel + %186 = OpLoad %19 %178 + %187 = OpLoad %19 %178 + %188 = OpAccessChain %35 %51 %20 %187 + %189 = OpLoad %10 %188 + %190 = OpLoad %19 %178 + %191 = OpAccessChain %35 %51 %40 %190 + %192 = OpLoad %10 %191 + %193 = OpFNegate %10 %192 + %194 = OpFAdd %10 %189 %193 + %195 = OpLoad %19 %178 + %196 = OpAccessChain %35 %51 %25 %195 + %197 = OpLoad %10 %196 + %198 = OpFAdd %10 %194 %197 + %199 = OpAccessChain %38 %48 %186 + OpStore %199 %198 + OpBranch %182 + %182 = OpLabel + %200 = OpLoad %19 %178 + %201 = OpIAdd %19 %200 %40 + OpStore %178 %201 + OpBranch %179 + %181 = OpLabel OpEmitVertex OpEndPrimitive OpBranch %31 diff --git a/src/xenia/gpu/vulkan/shaders/rect_list.geom b/src/xenia/gpu/vulkan/shaders/rect_list.geom index d796919d3..6c7e24c7e 100644 --- a/src/xenia/gpu/vulkan/shaders/rect_list.geom +++ b/src/xenia/gpu/vulkan/shaders/rect_list.geom @@ -16,11 +16,8 @@ out gl_PerVertex { float gl_ClipDistance[]; }; -struct VertexData { - vec4 o[16]; -}; -layout(location = 0) in VertexData in_vtx[]; -layout(location = 0) out VertexData out_vtx; +layout(location = 0) in vec4 in_interpolators[][16]; +layout(location = 0) out vec4 out_interpolators[16]; layout(triangles) in; layout(triangle_strip, max_vertices = 6) out; @@ -35,30 +32,30 @@ void main() { // 2 ----- [3] gl_Position = gl_in[0].gl_Position; gl_PointSize = gl_in[0].gl_PointSize; - out_vtx = in_vtx[0]; + out_interpolators = in_interpolators[0]; EmitVertex(); gl_Position = gl_in[1].gl_Position; gl_PointSize = gl_in[1].gl_PointSize; - out_vtx = in_vtx[1]; + out_interpolators = in_interpolators[1]; EmitVertex(); gl_Position = gl_in[2].gl_Position; gl_PointSize = gl_in[2].gl_PointSize; - out_vtx = in_vtx[2]; + out_interpolators = in_interpolators[2]; EmitVertex(); EndPrimitive(); gl_Position = gl_in[2].gl_Position; gl_PointSize = gl_in[2].gl_PointSize; - out_vtx = in_vtx[2]; + out_interpolators = in_interpolators[2]; EmitVertex(); gl_Position = gl_in[1].gl_Position; gl_PointSize = gl_in[1].gl_PointSize; - out_vtx = in_vtx[1]; + out_interpolators = in_interpolators[1]; EmitVertex(); gl_Position = (gl_in[1].gl_Position + gl_in[2].gl_Position) - gl_in[0].gl_Position; gl_PointSize = gl_in[2].gl_PointSize; for (int i = 0; i < 16; ++i) { - out_vtx.o[i] = -in_vtx[0].o[i] + in_vtx[1].o[i] + in_vtx[2].o[i]; + out_interpolators[i] = -in_interpolators[0][i] + in_interpolators[1][i] + in_interpolators[2][i]; } EmitVertex(); EndPrimitive(); @@ -70,30 +67,30 @@ void main() { // [3] ----- 2 gl_Position = gl_in[0].gl_Position; gl_PointSize = gl_in[0].gl_PointSize; - out_vtx = in_vtx[0]; + out_interpolators = in_interpolators[0]; EmitVertex(); gl_Position = gl_in[1].gl_Position; gl_PointSize = gl_in[1].gl_PointSize; - out_vtx = in_vtx[1]; + out_interpolators = in_interpolators[1]; EmitVertex(); gl_Position = gl_in[2].gl_Position; gl_PointSize = gl_in[2].gl_PointSize; - out_vtx = in_vtx[2]; + out_interpolators = in_interpolators[2]; EmitVertex(); EndPrimitive(); gl_Position = gl_in[0].gl_Position; gl_PointSize = gl_in[0].gl_PointSize; - out_vtx = in_vtx[0]; + out_interpolators = in_interpolators[0]; EmitVertex(); gl_Position = gl_in[2].gl_Position; gl_PointSize = gl_in[2].gl_PointSize; - out_vtx = in_vtx[2]; + out_interpolators = in_interpolators[2]; EmitVertex(); gl_Position = (gl_in[0].gl_Position + gl_in[2].gl_Position) - gl_in[1].gl_Position; gl_PointSize = gl_in[2].gl_PointSize; for (int i = 0; i < 16; ++i) { - out_vtx.o[i] = in_vtx[0].o[i] + -in_vtx[1].o[i] + in_vtx[2].o[i]; + out_interpolators[i] = in_interpolators[0][i] + -in_interpolators[1][i] + in_interpolators[2][i]; } EmitVertex(); EndPrimitive(); From 692d666d57c0eed47237d56e085062431370b8b8 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 16:50:06 -0500 Subject: [PATCH 097/145] Wipe the buffer cache in ClearCache for now. --- src/xenia/gpu/vulkan/buffer_cache.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 7fd3c4768..90b7c487e 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -432,6 +432,8 @@ void BufferCache::InvalidateCache() { void BufferCache::ClearCache() { // TODO(benvanik): caching. + // Temporary clear. + transient_tail_offset_ = transient_head_offset_; } } // namespace vulkan From fc1bd0f3793f61fc264271889a634f8cc5d884a1 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 17:29:39 -0500 Subject: [PATCH 098/145] Fix texture uploads --- src/xenia/gpu/vulkan/texture_cache.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 500d6ac25..686c5314d 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -588,12 +588,12 @@ bool TextureCache::UploadTexture2D( // For now, just transfer the grid we uploaded earlier into the texture. VkBufferImageCopy copy_region; copy_region.bufferOffset = alloc->offset; - copy_region.bufferRowLength = src.size_2d.input_width; - copy_region.bufferImageHeight = src.size_2d.input_height; + copy_region.bufferRowLength = src.width + 1; + copy_region.bufferImageHeight = src.height + 1; copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; copy_region.imageOffset = {0, 0, 0}; - copy_region.imageExtent = {dest->texture_info.size_2d.output_width + 1, - dest->texture_info.size_2d.output_height + 1, + copy_region.imageExtent = {dest->texture_info.width + 1, + dest->texture_info.height + 1, dest->texture_info.depth + 1}; vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(), dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, From 44cffab389377638c619309dfe30b15c07b24521 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 25 Mar 2016 18:23:45 -0500 Subject: [PATCH 099/145] SPIR-V Max4 --- src/xenia/gpu/spirv_shader_translator.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f7a1660fb..2749aad01 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -701,6 +701,7 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( } break; default: // TODO: the rest of these + assert_always(); break; } @@ -909,6 +910,23 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( } break; case AluVectorOpcode::kMax4: { + auto src0_x = b.createCompositeExtract(sources[0], float_type_, 0); + auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1); + auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2); + auto src0_w = b.createCompositeExtract(sources[0], float_type_, 3); + + auto max_xy = CreateGlslStd450InstructionCall( + spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, + {src0_x, src0_y}); + auto max_zw = CreateGlslStd450InstructionCall( + spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, + {src0_z, src0_w}); + auto max_xyzw = CreateGlslStd450InstructionCall( + spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, + {max_xy, max_zw}); + + // FIXME: Docs say this only updates pv.x? + dest = b.smearScalar(spv::NoPrecision, max_xyzw, vec4_float_type_); } break; case AluVectorOpcode::kMaxA: { From 1ea72c5e068ee40fb945bd471048be175a4c1338 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 1 Apr 2016 21:49:58 -0500 Subject: [PATCH 100/145] FencedPool::CancelBatch --- src/xenia/ui/vulkan/fenced_pools.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h index a50f82d08..3a7bb01c4 100644 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -88,6 +88,24 @@ class BaseFencedPool { open_batch_ = batch; } + // Cancels an open batch, and releases all entries acquired within. + void CancelBatch() { + assert_not_null(open_batch_); + + auto batch = open_batch_; + open_batch_ = nullptr; + + // Relink the batch back into the free batch list. + batch->next = free_batch_list_head_; + free_batch_list_head_ = batch; + + // Relink entries back into free entries list. + batch->entry_list_tail->next = free_entry_list_head_; + free_entry_list_head_ = batch->entry_list_head; + batch->entry_list_head = nullptr; + batch->entry_list_tail = nullptr; + } + // Attempts to acquire an entry from the pool in the current batch. // If none are available a new one will be allocated. HANDLE AcquireEntry() { From 2eca3ce9e6e5b14c8765935e73e3e1b1cecb43aa Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 1 Apr 2016 21:51:17 -0500 Subject: [PATCH 101/145] Texture uploads/basic formats Fixed swizzle one/zero mismatch Sampler setup Remove samplers from the descriptor set layout --- src/xenia/gpu/spirv_shader_translator.cc | 118 +++++-- src/xenia/gpu/spirv_shader_translator.h | 5 +- src/xenia/gpu/vulkan/texture_cache.cc | 424 ++++++++++++++++++----- src/xenia/gpu/vulkan/texture_cache.h | 20 +- 4 files changed, 437 insertions(+), 130 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 2749aad01..ef242f0bd 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -55,11 +55,11 @@ void SpirvShaderTranslator::StartTranslation() { bool_type_ = b.makeBoolType(); float_type_ = b.makeFloatType(32); int_type_ = b.makeIntType(32); - Id uint_type = b.makeUintType(32); + uint_type_ = b.makeUintType(32); vec2_float_type_ = b.makeVectorType(float_type_, 2); vec3_float_type_ = b.makeVectorType(float_type_, 3); vec4_float_type_ = b.makeVectorType(float_type_, 4); - vec4_uint_type_ = b.makeVectorType(uint_type, 4); + vec4_uint_type_ = b.makeVectorType(uint_type_, 4); vec4_bool_type_ = b.makeVectorType(bool_type_, 4); vec4_float_one_ = b.makeCompositeConstant( @@ -136,7 +136,7 @@ void SpirvShaderTranslator::StartTranslation() { // Push constants, represented by SpirvPushConstants. Id push_constants_type = b.makeStructType( - {vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type}, + {vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type_}, "push_consts_type"); b.addDecoration(push_constants_type, spv::Decoration::DecorationBlock); @@ -164,7 +164,6 @@ void SpirvShaderTranslator::StartTranslation() { push_constants_type, "push_consts"); // Texture bindings - Id sampler_t = b.makeSamplerType(); Id tex_t[] = {b.makeSampledImageType(b.makeImageType( float_type_, spv::Dim::Dim1D, false, false, false, 1, spv::ImageFormat::ImageFormatUnknown)), @@ -178,23 +177,17 @@ void SpirvShaderTranslator::StartTranslation() { float_type_, spv::Dim::DimCube, false, false, false, 1, spv::ImageFormat::ImageFormatUnknown))}; - Id samplers_a = b.makeArrayType(sampler_t, b.makeUintConstant(32), 0); Id tex_a_t[] = {b.makeArrayType(tex_t[0], b.makeUintConstant(32), 0), b.makeArrayType(tex_t[1], b.makeUintConstant(32), 0), b.makeArrayType(tex_t[2], b.makeUintConstant(32), 0), b.makeArrayType(tex_t[3], b.makeUintConstant(32), 0)}; - // TODO(DrChat): See texture_cache.cc - do we need separate samplers here? - samplers_ = b.createVariable(spv::StorageClass::StorageClassUniformConstant, - samplers_a, "samplers"); - b.addDecoration(samplers_, spv::Decoration::DecorationDescriptorSet, 1); - b.addDecoration(samplers_, spv::Decoration::DecorationBinding, 0); for (int i = 0; i < 4; i++) { tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, tex_a_t[i], xe::format_string("textures%dD", i + 1).c_str()); b.addDecoration(tex_[i], spv::Decoration::DecorationDescriptorSet, 1); - b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, i + 1); + b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, i); } // Interpolators. @@ -254,6 +247,20 @@ void SpirvShaderTranslator::StartTranslation() { vec4_float_type_, "gl_Position"); b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn, spv::BuiltIn::BuiltInPosition); + + vertex_id_ = b.createVariable(spv::StorageClass::StorageClassInput, + int_type_, "gl_VertexId"); + b.addDecoration(vertex_id_, spv::Decoration::DecorationBuiltIn, + spv::BuiltIn::BuiltInVertexId); + + auto vertex_id = b.createLoad(vertex_id_); + auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction, + registers_ptr_, + std::vector({b.makeUintConstant(0)})); + auto r0 = b.createLoad(r0_ptr); + r0 = b.createCompositeInsert(vertex_id, r0, vec4_float_type_, + std::vector({0})); + b.createStore(r0, r0_ptr); } else { // Pixel inputs from vertex shader. interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput, @@ -267,9 +274,9 @@ void SpirvShaderTranslator::StartTranslation() { frag_outputs_type, "oC"); b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0); - Id frag_depth = b.createVariable(spv::StorageClass::StorageClassOutput, - vec4_float_type_, "gl_FragDepth"); - b.addDecoration(frag_depth, spv::Decoration::DecorationBuiltIn, + frag_depth_ = b.createVariable(spv::StorageClass::StorageClassOutput, + float_type_, "gl_FragDepth"); + b.addDecoration(frag_depth_, spv::Decoration::DecorationBuiltIn, spv::BuiltIn::BuiltInFragDepth); // TODO(benvanik): frag depth, etc. @@ -388,6 +395,25 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { {p, p_scaled, 4, 5, 2, 3}); b.createStore(p, pos_); + } else { + // Alpha test + auto alpha_test_x = b.createCompositeExtract( + push_consts_, float_type_, std::vector{2, 0}); + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, alpha_test_x, b.makeFloatConstant(1.f)); + + spv::Builder::If alpha_if(cond, b); + + // TODO(DrChat): Apply alpha test. + // if (alpha_func == 0) passes = false; + // if (alpha_func == 1 && oC[0].a < alpha_ref) passes = true; + // if (alpha_func == 2 && oC[0].a == alpha_ref) passes = true; + // if (alpha_func == 3 && oC[0].a <= alpha_ref) passes = true; + // if (alpha_func == 4 && oC[0].a > alpha_ref) passes = true; + // if (alpha_func == 5 && oC[0].a != alpha_ref) passes = true; + // if (alpha_func == 6 && oC[0].a >= alpha_ref) passes = true; + // if (alpha_func == 7) passes = true; + + alpha_if.makeEndIf(); } b.makeReturn(false); @@ -592,9 +618,9 @@ void SpirvShaderTranslator::ProcessJumpInstruction( v = b.createLoad(v); // Bitfield extract the bool constant. - v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v, - b.makeUintConstant(instr.bool_constant_index % 32), - b.makeUintConstant(1)); + v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, + b.makeIntConstant(instr.bool_constant_index % 32), + b.makeIntConstant(1)); // Conditional branch auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, @@ -642,17 +668,57 @@ void SpirvShaderTranslator::ProcessAllocInstruction( void SpirvShaderTranslator::ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) { auto& b = *builder_; + assert_true(is_vertex_shader()); + assert_not_zero(vertex_id_); // TODO: instr.is_predicated // Operand 0 is the index // Operand 1 is the binding // TODO: Indexed fetch + auto vertex_id = LoadFromOperand(instr.operands[0]); + vertex_id = b.createCompositeExtract(vertex_id, float_type_, 0); + vertex_id = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_id); + auto shader_vertex_id = b.createLoad(vertex_id_); + auto cond = + b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_id, shader_vertex_id); + + // Skip loading if it's an indexed fetch. auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index] [instr.attributes.offset]; assert_not_zero(vertex_ptr); - auto vertex = b.createLoad(vertex_ptr); + + auto vertex_components = b.getNumComponents(vertex); + Id alt_vertex = 0; + switch (vertex_components) { + case 1: + alt_vertex = b.makeFloatConstant(0.f); + break; + case 2: + alt_vertex = b.makeCompositeConstant( + vec2_float_type_, std::vector({b.makeFloatConstant(0.f), + b.makeFloatConstant(1.f)})); + break; + case 3: + alt_vertex = b.makeCompositeConstant( + vec3_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), + b.makeFloatConstant(1.f)})); + break; + case 4: + alt_vertex = b.makeCompositeConstant( + vec4_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), + b.makeFloatConstant(0.f), + b.makeFloatConstant(1.f)})); + break; + default: + assert_unhandled_case(vertex_components); + } + + vertex = b.createTriOp(spv::Op::OpSelect, b.getTypeId(vertex), cond, vertex, + alt_vertex); StoreToResult(vertex, instr.result); } @@ -1594,15 +1660,15 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { case InstructionStorageAddressingMode::kAddressAbsolute: { // storage_index + a0 storage_index = - b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_), + b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), b.makeUintConstant(storage_base + op.storage_index)); } break; case InstructionStorageAddressingMode::kAddressRelative: { // TODO: Based on loop index // storage_index + aL.x - storage_index = b.createBinOp( - spv::Op::OpIAdd, b.makeUintType(32), b.makeUintConstant(0), - b.makeUintConstant(storage_base + op.storage_index)); + storage_index = + b.createBinOp(spv::Op::OpIAdd, uint_type_, b.makeUintConstant(0), + b.makeUintConstant(storage_base + op.storage_index)); } break; default: assert_always(); @@ -1723,7 +1789,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, case InstructionStorageAddressingMode::kAddressAbsolute: { // storage_index + a0 storage_index = - b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_), + b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), b.makeUintConstant(result.storage_index)); } break; case InstructionStorageAddressingMode::kAddressRelative: { @@ -1776,7 +1842,11 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, break; case InstructionStorageTarget::kDepth: assert_true(is_pixel_shader()); - // TODO(benvanik): result.storage_index + storage_pointer = frag_depth_; + storage_class = spv::StorageClass::StorageClassOutput; + storage_type = float_type_; + storage_offsets.push_back(0); + storage_array = false; break; case InstructionStorageTarget::kNone: assert_unhandled_case(result.storage_target); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 3327dccbd..1d5dea31b 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -104,7 +104,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Function* translated_main_ = 0; // Types. - spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0; + spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0; spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; spv::Id vec4_uint_type_ = 0; spv::Id vec4_bool_type_ = 0; @@ -120,7 +120,8 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id pos_ = 0; spv::Id push_consts_ = 0; spv::Id interpolators_ = 0; - spv::Id frag_outputs_ = 0; + spv::Id vertex_id_ = 0; + spv::Id frag_outputs_ = 0, frag_depth_ = 0; spv::Id samplers_ = 0; spv::Id tex_[4] = {0}; // Images {1D, 2D, 3D, Cube} diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 686c5314d..0deddf36d 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -31,6 +31,81 @@ struct TextureConfig { VkFormat host_format; }; +static const TextureConfig texture_configs[64] = { + {TextureFormat::k_1_REVERSE, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_1, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_8, VK_FORMAT_R8_UNORM}, + {TextureFormat::k_1_5_5_5, VK_FORMAT_R5G5B5A1_UNORM_PACK16}, + {TextureFormat::k_5_6_5, VK_FORMAT_R5G6B5_UNORM_PACK16}, + {TextureFormat::k_6_5_5, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_8_8_8_8, VK_FORMAT_R8G8B8A8_UNORM}, + {TextureFormat::k_2_10_10_10, VK_FORMAT_A2R10G10B10_UNORM_PACK32}, + {TextureFormat::k_8_A, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_8_B, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_8_8, VK_FORMAT_R8G8_UNORM}, + {TextureFormat::k_Cr_Y1_Cb_Y0, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_Y1_Cr_Y0_Cb, VK_FORMAT_UNDEFINED}, + {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_8_8_8_8_A, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_4_4_4_4, VK_FORMAT_R4G4B4A4_UNORM_PACK16}, + {TextureFormat::k_10_11_11, VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? + {TextureFormat::k_11_11_10, VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? + {TextureFormat::k_DXT1, VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // ? + {TextureFormat::k_DXT2_3, VK_FORMAT_BC3_SRGB_BLOCK}, // ? + {TextureFormat::k_DXT4_5, VK_FORMAT_BC5_UNORM_BLOCK}, // ? + {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_24_8, VK_FORMAT_D24_UNORM_S8_UINT}, + {TextureFormat::k_24_8_FLOAT, VK_FORMAT_D24_UNORM_S8_UINT}, // ? + {TextureFormat::k_16, VK_FORMAT_R16_UNORM}, + {TextureFormat::k_16_16, VK_FORMAT_R16G16_UNORM}, + {TextureFormat::k_16_16_16_16, VK_FORMAT_R16G16B16A16_UNORM}, + {TextureFormat::k_16_EXPAND, VK_FORMAT_R16_UNORM}, // ? + {TextureFormat::k_16_16_EXPAND, VK_FORMAT_R16G16_UNORM}, // ? + {TextureFormat::k_16_16_16_16_EXPAND, VK_FORMAT_R16G16B16A16_UNORM}, // ? + {TextureFormat::k_16_FLOAT, VK_FORMAT_R16_SFLOAT}, + {TextureFormat::k_16_16_FLOAT, VK_FORMAT_R16G16_SFLOAT}, + {TextureFormat::k_16_16_16_16_FLOAT, VK_FORMAT_R16G16B16A16_SFLOAT}, + {TextureFormat::k_32, VK_FORMAT_R32_SINT}, + {TextureFormat::k_32_32, VK_FORMAT_R32G32_SINT}, + {TextureFormat::k_32_32_32_32, VK_FORMAT_R32G32B32A32_SINT}, + {TextureFormat::k_32_FLOAT, VK_FORMAT_R32_SFLOAT}, + {TextureFormat::k_32_32_FLOAT, VK_FORMAT_R32G32_SFLOAT}, + {TextureFormat::k_32_32_32_32_FLOAT, VK_FORMAT_R32G32B32A32_SFLOAT}, + {TextureFormat::k_32_AS_8, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_32_AS_8_8, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_16_MPEG, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_16_16_MPEG, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_8_INTERLACED, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_32_AS_8_INTERLACED, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_32_AS_8_8_INTERLACED, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_16_INTERLACED, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_16_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_DXN, VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RG_RGTC2 */}, + {TextureFormat::k_8_8_8_8_AS_16_16_16_16, VK_FORMAT_R8G8B8A8_UNORM}, + {TextureFormat::k_DXT1_AS_16_16_16_16, + VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGB_S3TC_DXT1_EXT */}, + {TextureFormat::k_DXT2_3_AS_16_16_16_16, + VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT3_EXT */}, + {TextureFormat::k_DXT4_5_AS_16_16_16_16, + VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT5_EXT */}, + {TextureFormat::k_2_10_10_10_AS_16_16_16_16, + VK_FORMAT_A2R10G10B10_UNORM_PACK32}, + {TextureFormat::k_10_11_11_AS_16_16_16_16, + VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? + {TextureFormat::k_11_11_10_AS_16_16_16_16, + VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? + {TextureFormat::k_32_32_32_FLOAT, VK_FORMAT_R32G32B32_SFLOAT}, + {TextureFormat::k_DXT3A, + VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT3_EXT */}, + {TextureFormat::k_DXT5A, + VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT5_EXT */}, + {TextureFormat::k_CTX1, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_DXT3A_AS_1_1_1_1, VK_FORMAT_UNDEFINED}, + {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED}, + {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED}, +}; + TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device) @@ -46,12 +121,10 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, descriptor_pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; descriptor_pool_info.maxSets = 4096; - VkDescriptorPoolSize pool_sizes[2]; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_sizes[0].descriptorCount = 32; - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[1].descriptorCount = 32; - descriptor_pool_info.poolSizeCount = 2; + VkDescriptorPoolSize pool_sizes[1]; + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + pool_sizes[0].descriptorCount = 4096; + descriptor_pool_info.poolSizeCount = 1; descriptor_pool_info.pPoolSizes = pool_sizes; auto err = vkCreateDescriptorPool(*device_, &descriptor_pool_info, nullptr, &descriptor_pool_); @@ -59,17 +132,10 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, // Create the descriptor set layout used for rendering. // We always have the same number of samplers but only some are used. - VkDescriptorSetLayoutBinding bindings[5]; - auto& sampler_binding = bindings[0]; - sampler_binding.binding = 0; - sampler_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler_binding.descriptorCount = kMaxTextureSamplers; - sampler_binding.stageFlags = - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - sampler_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutBinding bindings[4]; for (int i = 0; i < 4; ++i) { - auto& texture_binding = bindings[1 + i]; - texture_binding.binding = 1 + i; + auto& texture_binding = bindings[i]; + texture_binding.binding = i; texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; texture_binding.descriptorCount = kMaxTextureSamplers; texture_binding.stageFlags = @@ -103,6 +169,10 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, ((y % 32 < 16) ^ (x % 32 >= 16)) ? 0xFF0000FF : 0xFFFFFFFF; } } + + invalidated_textures_sets_[0].reserve(64); + invalidated_textures_sets_[1].reserve(64); + invalidated_textures_ = &invalidated_textures_sets_[0]; } TextureCache::~TextureCache() { @@ -135,8 +205,27 @@ TextureCache::Texture* TextureCache::AllocateTexture( return nullptr; } - // TODO: Format - image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + VkFormat format = VK_FORMAT_UNDEFINED; + if (texture_info.format_info) { + auto& config = texture_configs[int(texture_info.format_info->format)]; + format = config.host_format != VK_FORMAT_UNDEFINED + ? config.host_format + : VK_FORMAT_R8G8B8A8_UNORM; + } else { + format = VK_FORMAT_R8G8B8A8_UNORM; + } + + VkFormatProperties props; + uint32_t required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + vkGetPhysicalDeviceFormatProperties(*device_, format, &props); + if ((props.optimalTilingFeatures & required_flags) != required_flags) { + // Texture needs conversion on upload to a native format. + // assert_always(); + } + + image_info.format = format; image_info.extent = {texture_info.width + 1, texture_info.height + 1, texture_info.depth + 1}; image_info.mipLevels = 1; @@ -212,17 +301,24 @@ TextureCache::Texture* TextureCache::AllocateTexture( } bool TextureCache::FreeTexture(Texture* texture) { - // TODO(DrChat) - return false; + for (auto it = texture->views.begin(); it != texture->views.end();) { + vkDestroyImageView(*device_, (*it)->view, nullptr); + it = texture->views.erase(it); + } + + vkDestroyImage(*device_, texture->image, nullptr); + vkFreeMemory(*device_, texture->image_memory, nullptr); + delete texture; + return true; } TextureCache::Texture* TextureCache::DemandResolveTexture( const TextureInfo& texture_info, TextureFormat format, - uint32_t* out_offset_x, uint32_t* out_offset_y) { + VkOffset2D* out_offset) { // Check to see if we've already used a texture at this location. auto texture = LookupAddress( texture_info.guest_address, texture_info.size_2d.block_width, - texture_info.size_2d.block_height, format, out_offset_x, out_offset_y); + texture_info.size_2d.block_height, format, out_offset); if (texture) { return texture; } @@ -230,7 +326,7 @@ TextureCache::Texture* TextureCache::DemandResolveTexture( // No texture at this location. Make a new one. texture = AllocateTexture(texture_info); texture->is_full_texture = false; - resolve_textures_.push_back(std::unique_ptr(texture)); + resolve_textures_.push_back(texture); return texture; } @@ -241,14 +337,14 @@ TextureCache::Texture* TextureCache::Demand( auto texture_hash = texture_info.hash(); for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { if (it->second->texture_info == texture_info) { - return it->second.get(); + return it->second; } } // Check resolve textures. for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); ++it) { - auto texture = (*it).get(); + auto texture = (*it); if (texture_info.guest_address == texture->texture_info.guest_address && texture_info.size_2d.logical_width == texture->texture_info.size_2d.logical_width && @@ -259,9 +355,9 @@ TextureCache::Texture* TextureCache::Demand( // Upgrade this texture to a full texture. texture->is_full_texture = true; texture->texture_info = texture_info; - textures_[texture_hash] = std::move(*it); + textures_[texture_hash] = *it; it = resolve_textures_.erase(it); - return textures_[texture_hash].get(); + return textures_[texture_hash]; } } @@ -290,15 +386,34 @@ TextureCache::Texture* TextureCache::Demand( break; } - // Okay. Now that the texture is uploaded from system memory, put a writewatch - // on it to tell us if it's been modified from the guest. - if (!uploaded) { // TODO: Destroy the texture. assert_always(); return nullptr; } + // Copy in overlapping resolve textures. + /* + for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); + ++it) { + auto texture = (*it); + if (texture_info.guest_address == texture->texture_info.guest_address && + texture_info.size_2d.logical_width == + texture->texture_info.size_2d.logical_width && + texture_info.size_2d.logical_height == + texture->texture_info.size_2d.logical_height) { + // Exact match. + // TODO: Lazy match (at an offset) + // Upgrade this texture to a full texture. + texture->is_full_texture = true; + texture->texture_info = texture_info; + textures_[texture_hash] = *it; + it = resolve_textures_.erase(it); + return textures_[texture_hash]; + } + } + */ + // Though we didn't find an exact match, that doesn't mean we're out of the // woods yet. This texture could either be a portion of another texture or // vice versa. Copy any overlapping textures into this texture. @@ -306,8 +421,26 @@ TextureCache::Texture* TextureCache::Demand( for (auto it = textures_.begin(); it != textures_.end(); ++it) { } - textures_[texture_hash] = std::unique_ptr(texture); + // Okay. Now that the texture is uploaded from system memory, put a writewatch + // on it to tell us if it's been modified from the guest. + texture->access_watch_handle = memory_->AddPhysicalAccessWatch( + texture_info.guest_address, texture_info.input_length, + cpu::MMIOHandler::kWatchWrite, + [](void* context_ptr, void* data_ptr, uint32_t address) { + auto self = reinterpret_cast(context_ptr); + auto touched_texture = reinterpret_cast(data_ptr); + // Clear watch handle first so we don't redundantly + // remove. + touched_texture->access_watch_handle = 0; + touched_texture->pending_invalidation = true; + // Add to pending list so Scavenge will clean it up. + self->invalidated_textures_mutex_.lock(); + self->invalidated_textures_->push_back(touched_texture); + self->invalidated_textures_mutex_.unlock(); + }, + this, texture); + textures_[texture_hash] = texture; return texture; } @@ -346,7 +479,7 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture, VkComponentSwizzle swiz_component_map[] = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, - VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, + VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_IDENTITY, }; @@ -373,11 +506,15 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture, } TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + auto sampler_hash = sampler_info.hash(); for (auto it = samplers_.find(sampler_hash); it != samplers_.end(); ++it) { if (it->second->sampler_info == sampler_info) { // Found a compatible sampler. - return it->second.get(); + return it->second; } } @@ -389,10 +526,55 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; sampler_create_info.pNext = nullptr; sampler_create_info.flags = 0; - sampler_create_info.minFilter = VK_FILTER_NEAREST; - sampler_create_info.magFilter = VK_FILTER_NEAREST; sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + // Texture level filtering. + VkSamplerMipmapMode mip_filter; + switch (sampler_info.mip_filter) { + case TextureFilter::kBaseMap: + // TODO(DrChat): ? + mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST; + break; + case TextureFilter::kPoint: + mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST; + break; + case TextureFilter::kLinear: + mip_filter = VK_SAMPLER_MIPMAP_MODE_LINEAR; + break; + default: + assert_unhandled_case(sampler_info.mip_filter); + return nullptr; + } + + VkFilter min_filter; + switch (sampler_info.min_filter) { + case TextureFilter::kPoint: + min_filter = VK_FILTER_NEAREST; + break; + case TextureFilter::kLinear: + min_filter = VK_FILTER_LINEAR; + break; + default: + assert_unhandled_case(sampler_info.min_filter); + return nullptr; + } + VkFilter mag_filter; + switch (sampler_info.mag_filter) { + case TextureFilter::kPoint: + mag_filter = VK_FILTER_NEAREST; + break; + case TextureFilter::kLinear: + mag_filter = VK_FILTER_LINEAR; + break; + default: + assert_unhandled_case(mag_filter); + return nullptr; + } + + sampler_create_info.minFilter = min_filter; + sampler_create_info.magFilter = mag_filter; + sampler_create_info.mipmapMode = mip_filter; + // FIXME: Both halfway / mirror clamp to border aren't mapped properly. VkSamplerAddressMode address_mode_map[] = { /* kRepeat */ VK_SAMPLER_ADDRESS_MODE_REPEAT, @@ -431,37 +613,46 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { auto sampler = new Sampler(); sampler->sampler = vk_sampler; sampler->sampler_info = sampler_info; - samplers_[sampler_hash] = std::unique_ptr(sampler); + samplers_[sampler_hash] = sampler; return sampler; } -TextureCache::Texture* TextureCache::LookupAddress( - uint32_t guest_address, uint32_t width, uint32_t height, - TextureFormat format, uint32_t* offset_x, uint32_t* offset_y) { +TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address, + uint32_t width, + uint32_t height, + TextureFormat format, + VkOffset2D* out_offset) { for (auto it = textures_.begin(); it != textures_.end(); ++it) { const auto& texture_info = it->second->texture_info; if (guest_address >= texture_info.guest_address && guest_address < texture_info.guest_address + texture_info.input_length && - offset_x && offset_y) { + texture_info.size_2d.input_width >= width && + texture_info.size_2d.input_height >= height && out_offset) { auto offset_bytes = guest_address - texture_info.guest_address; if (texture_info.dimension == Dimension::k2D) { - *offset_y = offset_bytes / texture_info.size_2d.input_pitch; + out_offset->x = 0; + out_offset->y = offset_bytes / texture_info.size_2d.input_pitch; if (offset_bytes % texture_info.size_2d.input_pitch != 0) { // TODO: offset_x } } - return it->second.get(); + return it->second; } if (texture_info.guest_address == guest_address && texture_info.dimension == Dimension::k2D && texture_info.size_2d.input_width == width && texture_info.size_2d.input_height == height) { - return it->second.get(); + if (out_offset) { + out_offset->x = 0; + out_offset->y = 0; + } + + return it->second; } } @@ -469,27 +660,16 @@ TextureCache::Texture* TextureCache::LookupAddress( for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); ++it) { const auto& texture_info = (*it)->texture_info; - if (guest_address >= texture_info.guest_address && - guest_address < - texture_info.guest_address + texture_info.input_length && - offset_x && offset_y) { - auto offset_bytes = guest_address - texture_info.guest_address; - - if (texture_info.dimension == Dimension::k2D) { - *offset_y = offset_bytes / texture_info.size_2d.input_pitch; - if (offset_bytes % texture_info.size_2d.input_pitch != 0) { - // TODO: offset_x - } - } - - return (*it).get(); - } - if (texture_info.guest_address == guest_address && texture_info.dimension == Dimension::k2D && texture_info.size_2d.input_width == width && texture_info.size_2d.input_height == height) { - return (*it).get(); + if (out_offset) { + out_offset->x = 0; + out_offset->y = 0; + } + + return (*it); } } @@ -531,19 +711,74 @@ bool TextureCache::UploadTexture2D( } // Grab some temporary memory for staging. - auto alloc = staging_buffer_.Acquire(src.input_length, completion_fence); + size_t unpack_length = src.output_length; + auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence); assert_not_null(alloc); - // TODO: Support these cases. - // assert_false(src.is_tiled); + // TODO: Support compression. // assert_false(src.is_compressed()); // Upload texture into GPU memory. // TODO: If the GPU supports it, we can submit a compute batch to convert the // texture and copy it to its destination. Otherwise, fallback to conversion // on the CPU. - auto guest_ptr = memory_->TranslatePhysical(src.guest_address); - TextureSwap(src.endianness, alloc->host_ptr, guest_ptr, src.input_length); + void* host_address = memory_->TranslatePhysical(src.guest_address); + if (!src.is_tiled) { + if (src.size_2d.input_pitch == src.size_2d.output_pitch) { + // Fast path copy entire image. + TextureSwap(src.endianness, alloc->host_ptr, host_address, unpack_length); + } else { + // Slow path copy row-by-row because strides differ. + // UNPACK_ROW_LENGTH only works for uncompressed images, and likely does + // this exact thing under the covers, so we just always do it here. + const uint8_t* src_mem = reinterpret_cast(host_address); + uint8_t* dest = reinterpret_cast(alloc->host_ptr); + uint32_t pitch = + std::min(src.size_2d.input_pitch, src.size_2d.output_pitch); + for (uint32_t y = 0; + y < std::min(src.size_2d.block_height, src.size_2d.logical_height); + y++) { + TextureSwap(src.endianness, dest, src_mem, pitch); + src_mem += src.size_2d.input_pitch; + dest += src.size_2d.output_pitch; + } + } + } else { + // Untile image. + // We could do this in a shader to speed things up, as this is pretty slow. + + // TODO(benvanik): optimize this inner loop (or work by tiles). + const uint8_t* src_mem = reinterpret_cast(host_address); + uint8_t* dest = reinterpret_cast(alloc->host_ptr); + uint32_t bytes_per_block = src.format_info->block_width * + src.format_info->block_height * + src.format_info->bits_per_pixel / 8; + + // Tiled textures can be packed; get the offset into the packed texture. + uint32_t offset_x; + uint32_t offset_y; + TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y); + + auto bpp = (bytes_per_block >> 2) + + ((bytes_per_block >> 1) >> (bytes_per_block >> 2)); + for (uint32_t y = 0, output_base_offset = 0; + y < std::min(src.size_2d.block_height, src.size_2d.logical_height); + y++, output_base_offset += src.size_2d.output_pitch) { + auto input_base_offset = TextureInfo::TiledOffset2DOuter( + offset_y + y, + (src.size_2d.input_width / src.format_info->block_width), bpp); + for (uint32_t x = 0, output_offset = output_base_offset; + x < src.size_2d.block_width; x++, output_offset += bytes_per_block) { + auto input_offset = + TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp, + input_base_offset) >> + bpp; + TextureSwap(src.endianness, dest + output_offset, + src_mem + input_offset * bytes_per_block, bytes_per_block); + } + } + } + staging_buffer_.Flush(alloc); // Insert a memory barrier into the command buffer to ensure the upload has @@ -580,21 +815,15 @@ bool TextureCache::UploadTexture2D( VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); - assert_true(src.size_2d.input_width >= - dest->texture_info.size_2d.output_width); - assert_true(src.size_2d.input_height >= - dest->texture_info.size_2d.output_height); - - // For now, just transfer the grid we uploaded earlier into the texture. + // Now move the converted texture into the destination. VkBufferImageCopy copy_region; copy_region.bufferOffset = alloc->offset; - copy_region.bufferRowLength = src.width + 1; - copy_region.bufferImageHeight = src.height + 1; + copy_region.bufferRowLength = src.size_2d.output_width; + copy_region.bufferImageHeight = src.size_2d.output_height; copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; copy_region.imageOffset = {0, 0, 0}; - copy_region.imageExtent = {dest->texture_info.width + 1, - dest->texture_info.height + 1, - dest->texture_info.depth + 1}; + copy_region.imageExtent = {src.size_2d.output_width, + src.size_2d.output_height, 1}; vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(), dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); @@ -659,27 +888,13 @@ VkDescriptorSet TextureCache::PrepareTextureSet( VkWriteDescriptorSet descriptor_writes[4]; std::memset(descriptor_writes, 0, sizeof(descriptor_writes)); uint32_t descriptor_write_count = 0; - /* - // TODO(DrChat): Do we really need to separate samplers and images here? - if (update_set_info->sampler_write_count) { - auto& sampler_write = descriptor_writes[descriptor_write_count++]; - sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - sampler_write.pNext = nullptr; - sampler_write.dstSet = descriptor_set; - sampler_write.dstBinding = 0; - sampler_write.dstArrayElement = 0; - sampler_write.descriptorCount = update_set_info->sampler_write_count; - sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler_write.pImageInfo = update_set_info->sampler_infos; - } - */ // FIXME: These are not be lined up properly with tf binding points!!!!! if (update_set_info->image_1d_write_count) { auto& image_write = descriptor_writes[descriptor_write_count++]; image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; image_write.pNext = nullptr; image_write.dstSet = descriptor_set; - image_write.dstBinding = 1; + image_write.dstBinding = 0; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_1d_write_count; image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -690,7 +905,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; image_write.pNext = nullptr; image_write.dstSet = descriptor_set; - image_write.dstBinding = 2; + image_write.dstBinding = 1; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_2d_write_count; image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -701,7 +916,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; image_write.pNext = nullptr; image_write.dstSet = descriptor_set; - image_write.dstBinding = 3; + image_write.dstBinding = 2; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_3d_write_count; image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -712,7 +927,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; image_write.pNext = nullptr; image_write.dstSet = descriptor_set; - image_write.dstBinding = 4; + image_write.dstBinding = 3; image_write.dstArrayElement = 0; image_write.descriptorCount = update_set_info->image_cube_write_count; image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -814,7 +1029,7 @@ bool TextureCache::SetupTextureBinding( return false; } image_write->imageView = view->view; - image_write->imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_write->imageLayout = texture->image_layout; image_write->sampler = sampler->sampler; return true; @@ -838,6 +1053,25 @@ void TextureCache::Scavenge() { } staging_buffer_.Scavenge(); + + // Clean up any invalidated textures. + invalidated_textures_mutex_.lock(); + std::vector& invalidated_textures = *invalidated_textures_; + if (invalidated_textures_ == &invalidated_textures_sets_[0]) { + invalidated_textures_ = &invalidated_textures_sets_[1]; + } else { + invalidated_textures_ = &invalidated_textures_sets_[0]; + } + invalidated_textures_mutex_.unlock(); + if (invalidated_textures.empty()) { + return; + } + + for (auto& texture : invalidated_textures) { + textures_.erase(texture->texture_info.hash()); + FreeTexture(texture); + } + invalidated_textures.clear(); } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index dfc993763..b564fcc48 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -101,12 +101,12 @@ class TextureCache { // contains this address at an offset. Texture* LookupAddress(uint32_t guest_address, uint32_t width, uint32_t height, TextureFormat format, - uint32_t* offset_x = nullptr, - uint32_t* offset_y = nullptr); + VkOffset2D* out_offset = nullptr); // Demands a texture for the purpose of resolving from EDRAM. This either // creates a new texture or returns a previously created texture. texture_info - // is not required to be completely filled out, just guest_address and size. + // is not required to be completely filled out, just guest_address and all + // sizes. // // It's possible that this may return an image that is larger than the // requested size (e.g. resolving into a bigger texture) or an image that @@ -114,8 +114,7 @@ class TextureCache { // At the very least, it's guaranteed that the image will be large enough to // hold the requested size. Texture* DemandResolveTexture(const TextureInfo& texture_info, - TextureFormat format, uint32_t* out_offset_x, - uint32_t* out_offset_y); + TextureFormat format, VkOffset2D* out_offset); // Clears all cached content. void ClearCache(); @@ -172,11 +171,14 @@ class TextureCache { std::vector>> in_flight_sets_; - // Temporary until we have circular buffers. ui::vulkan::CircularBuffer staging_buffer_; - std::unordered_map> textures_; - std::unordered_map> samplers_; - std::vector> resolve_textures_; + std::unordered_map textures_; + std::unordered_map samplers_; + std::vector resolve_textures_; + + std::mutex invalidated_textures_mutex_; + std::vector* invalidated_textures_; + std::vector invalidated_textures_sets_[2]; struct UpdateSetInfo { // Bitmap of all 32 fetch constants and whether they have been setup yet. From 50f72b4e42364af54e287dba01de37999e4a9fc1 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 1 Apr 2016 21:52:39 -0500 Subject: [PATCH 102/145] Enable native MSAA Copy back EDRAM buffers in order by base offset. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 94 ++++-- src/xenia/gpu/vulkan/pipeline_cache.h | 5 + src/xenia/gpu/vulkan/render_cache.cc | 284 +++++++++++------- src/xenia/gpu/vulkan/render_cache.h | 28 +- .../gpu/vulkan/vulkan_command_processor.cc | 88 +++--- .../gpu/vulkan/vulkan_command_processor.h | 1 + 6 files changed, 333 insertions(+), 167 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index efcaf5b46..19db3cd4f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -187,6 +187,10 @@ PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline( VkCommandBuffer command_buffer, const RenderState* render_state, VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type, VkPipeline* pipeline_out) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + assert_not_null(pipeline_out); // Perform a pass over all registers and state updating our cached structures. @@ -323,6 +327,10 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type, bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, bool full_update) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + auto& regs = set_dynamic_state_registers_; bool window_offset_dirty = SetShadowRegister(®s.pa_sc_window_offset, @@ -393,20 +401,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, auto surface_msaa = static_cast((regs.rb_surface_info >> 16) & 0x3); // TODO(benvanik): ?? + // FIXME: Some games depend on these for proper clears (e.g. only clearing + // half the size they actually want with 4x MSAA), but others don't. + // Figure out how these games are expecting clears to be done. float window_width_scalar = 1; float window_height_scalar = 1; switch (surface_msaa) { case MsaaSamples::k1X: break; case MsaaSamples::k2X: - window_width_scalar = 2; + // ?? + window_width_scalar = window_height_scalar = 1.41421356f; break; case MsaaSamples::k4X: - window_width_scalar = 2; - window_height_scalar = 2; + window_width_scalar = window_height_scalar = 2; break; } + // window_width_scalar = window_height_scalar = 1; + // Whether each of the viewport settings are enabled. // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0; @@ -434,6 +447,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0; float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1; float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1; + window_width_scalar = window_height_scalar = 1; float vpw = 2 * window_width_scalar * vsx; float vph = -2 * window_height_scalar * vsy; @@ -481,25 +495,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba); } - // VK_DYNAMIC_STATE_LINE_WIDTH - vkCmdSetLineWidth(command_buffer, 1.0f); + if (full_update) { + // VK_DYNAMIC_STATE_LINE_WIDTH + vkCmdSetLineWidth(command_buffer, 1.0f); - // VK_DYNAMIC_STATE_DEPTH_BIAS - vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); + // VK_DYNAMIC_STATE_DEPTH_BIAS + vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); - // VK_DYNAMIC_STATE_DEPTH_BOUNDS - vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); + // VK_DYNAMIC_STATE_DEPTH_BOUNDS + vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); - // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK - vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK + vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); - // VK_DYNAMIC_STATE_STENCIL_REFERENCE - vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + // VK_DYNAMIC_STATE_STENCIL_REFERENCE + vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); - // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK - vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); - - // TODO(benvanik): push constants. + // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK + vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + } bool push_constants_dirty = full_update || viewport_state_dirty; push_constants_dirty |= @@ -530,7 +544,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, push_constants.window_scale[1] = -1.0f; } else { push_constants.window_scale[0] = 1.0f / 2560.0f; - push_constants.window_scale[1] = -1.0f / 2560.0f; + push_constants.window_scale[1] = 1.0f / 2560.0f; } // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf @@ -756,7 +770,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( : VK_FORMAT_A2R10G10B10_UNORM_PACK32; break; case VertexFormat::k_10_11_11: - assert_always("unsupported?"); + // assert_always("unsupported?"); vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; break; case VertexFormat::k_11_11_10: @@ -934,6 +948,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); regs.primitive_type = primitive_type; XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { @@ -947,7 +962,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( // TODO(benvanik): right setting? state_info.depthClampEnable = VK_FALSE; - // TODO(benvanik): use in depth-only mode? + // Discard rasterizer output in depth-only mode. + // TODO(DrChat): Figure out how to make this work properly. + /* + auto enable_mode = static_cast(regs.rb_modecontrol & 0x7); + state_info.rasterizerDiscardEnable = + enable_mode == xenos::ModeControl::kColorDepth ? VK_FALSE : VK_TRUE; + //*/ state_info.rasterizerDiscardEnable = VK_FALSE; bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; @@ -1004,20 +1025,49 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() { auto& regs = update_multisample_state_regs_; auto& state_info = update_multisample_state_info_; + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_sc_aa_config, XE_GPU_REG_PA_SC_AA_CONFIG); + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + XXH64_update(&hash_state_, ®s, sizeof(regs)); + if (!dirty) { + return UpdateStatus::kCompatible; + } + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; state_info.pNext = nullptr; state_info.flags = 0; // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES // PA_SU_SC_MODE_CNTL MSAA_ENABLE - state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + // state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + //* + auto msaa_num_samples = + static_cast((regs.rb_surface_info >> 16) & 0x3); + switch (msaa_num_samples) { + case MsaaSamples::k1X: + state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + break; + case MsaaSamples::k2X: + state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT; + break; + case MsaaSamples::k4X: + state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT; + break; + default: + assert_unhandled_case(msaa_num_samples); + break; + } + //*/ + state_info.sampleShadingEnable = VK_FALSE; state_info.minSampleShading = 0; state_info.pSampleMask = nullptr; state_info.alphaToCoverageEnable = VK_FALSE; state_info.alphaToOneEnable = VK_FALSE; - return UpdateStatus::kCompatible; + return UpdateStatus::kMismatch; } PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 66b2e87ef..f240b9c0d 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -211,6 +211,7 @@ class PipelineCache { uint32_t pa_sc_screen_scissor_tl; uint32_t pa_sc_screen_scissor_br; uint32_t multi_prim_ib_reset_index; + uint32_t rb_modecontrol; UpdateRasterizationStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } @@ -218,6 +219,10 @@ class PipelineCache { VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_; struct UpdateMultisampleStateeRegisters { + uint32_t pa_sc_aa_config; + uint32_t pa_su_sc_mode_cntl; + uint32_t rb_surface_info; + UpdateMultisampleStateeRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_multisample_state_regs_; diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 334a1215f..7e0528866 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -165,8 +165,23 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, image_info.extent.depth = 1; image_info.mipLevels = 1; image_info.arrayLayers = 1; - image_info.samples = - static_cast(VK_SAMPLE_COUNT_1_BIT); + // image_info.samples = VK_SAMPLE_COUNT_1_BIT; + //* + auto msaa_samples = static_cast(key.msaa_samples); + switch (msaa_samples) { + case MsaaSamples::k1X: + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + break; + case MsaaSamples::k2X: + image_info.samples = VK_SAMPLE_COUNT_2_BIT; + break; + case MsaaSamples::k4X: + image_info.samples = VK_SAMPLE_COUNT_4_BIT; + break; + default: + assert_unhandled_case(msaa_samples); + } + //*/ image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | @@ -322,13 +337,29 @@ CachedRenderPass::CachedRenderPass(VkDevice device, : device_(device) { std::memcpy(&config, &desired_config, sizeof(config)); + VkSampleCountFlagBits sample_count; + switch (desired_config.surface_msaa) { + case MsaaSamples::k1X: + sample_count = VK_SAMPLE_COUNT_1_BIT; + break; + case MsaaSamples::k2X: + sample_count = VK_SAMPLE_COUNT_2_BIT; + break; + case MsaaSamples::k4X: + sample_count = VK_SAMPLE_COUNT_4_BIT; + break; + default: + assert_unhandled_case(desired_config.surface_msaa); + break; + } + // Initialize all attachments to default unused. // As we set layout(location=RT) in shaders we must always provide 4. VkAttachmentDescription attachments[5]; for (int i = 0; i < 4; ++i) { attachments[i].flags = 0; attachments[i].format = VK_FORMAT_UNDEFINED; - attachments[i].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[i].samples = sample_count; attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -339,7 +370,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device, auto& depth_stencil_attachment = attachments[4]; depth_stencil_attachment.flags = 0; depth_stencil_attachment.format = VK_FORMAT_UNDEFINED; - depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + depth_stencil_attachment.samples = sample_count; depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; @@ -404,6 +435,10 @@ CachedRenderPass::~CachedRenderPass() { bool CachedRenderPass::IsCompatible( const RenderConfiguration& desired_config) const { + if (config.surface_msaa != desired_config.surface_msaa) { + return false; + } + for (int i = 0; i < 4; ++i) { // TODO(benvanik): allow compatible vulkan formats. if (config.color[i].format != desired_config.color[i].format) { @@ -503,12 +538,18 @@ bool RenderCache::dirty() const { regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; dirty |= cur_regs.pa_sc_window_scissor_br != regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + dirty |= (cur_regs.rb_depthcontrol & (0x4 | 0x2)) != + (regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2)); return dirty; } const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + assert_null(current_command_buffer_); current_command_buffer_ = command_buffer; @@ -520,6 +561,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, bool dirty = false; dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); @@ -529,7 +571,11 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - regs.rb_depthcontrol = register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32; + dirty |= + (regs.rb_depthcontrol & (0x4 | 0x2)) != + (register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2)); + regs.rb_depthcontrol = + register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2); if (!dirty && current_state_.render_pass) { // No registers have changed so we can reuse the previous render pass - // just begin with what we had. @@ -549,7 +595,10 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, // Speculatively see if targets are actually used so we can skip copies for (int i = 0; i < 4; i++) { - config->color[i].used = pixel_shader->writes_color_target(i); + uint32_t color_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; + config->color[i].used = + config->mode_control == xenos::ModeControl::kColorDepth && + color_mask != 0; } config->depth_stencil.used = !!(regs.rb_depthcontrol & (0x4 | 0x2)); @@ -558,66 +607,20 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, current_state_.framebuffer = framebuffer; current_state_.framebuffer_handle = framebuffer->handle; - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = edram_buffer_; - barrier.offset = 0; - barrier.size = 0; - - // Copy EDRAM buffer into render targets with tight packing. - VkBufferImageCopy region; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageOffset = {0, 0, 0}; - // Depth auto depth_target = current_state_.framebuffer->depth_stencil_attachment; if (depth_target && current_state_.config.depth_stencil.used) { - region.imageSubresource = { - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; - region.bufferOffset = depth_target->key.tile_offset * 5120; - - // Wait for any potential copies to finish. - barrier.offset = region.bufferOffset; - barrier.size = depth_target->key.tile_width * 80 * - depth_target->key.tile_height * 16 * 4; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - region.imageExtent = {depth_target->key.tile_width * 80u, - depth_target->key.tile_height * 16u, 1}; - vkCmdCopyBufferToImage(command_buffer, edram_buffer_, depth_target->image, - VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); + UpdateTileView(command_buffer, depth_target, true); } // Color - region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; for (int i = 0; i < 4; i++) { auto target = current_state_.framebuffer->color_attachments[i]; if (!target || !current_state_.config.color[i].used) { continue; } - region.bufferOffset = target->key.tile_offset * 5120; - - // Wait for any potential copies to finish. - barrier.offset = region.bufferOffset; - barrier.size = - target->key.tile_width * 80 * target->key.tile_height * 16 * 4; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - region.imageExtent = {target->key.tile_width * 80u, - target->key.tile_height * 16u, 1}; - vkCmdCopyBufferToImage(command_buffer, edram_buffer_, target->image, - VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); + UpdateTileView(command_buffer, target, true); } } if (!render_pass) { @@ -758,6 +761,7 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, color_key.tile_width = xe::round_up(config->surface_pitch_px, 80) / 80; color_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16; color_key.color_or_depth = 1; + color_key.msaa_samples = static_cast(config->surface_msaa); color_key.edram_format = static_cast(config->color[i].format); target_color_attachments[i] = FindOrCreateTileView(command_buffer, color_key); @@ -774,6 +778,8 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, depth_stencil_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16; depth_stencil_key.color_or_depth = 0; + depth_stencil_key.msaa_samples = + static_cast(config->surface_msaa); depth_stencil_key.edram_format = static_cast(config->depth_stencil.format); auto target_depth_stencil_attachment = @@ -810,6 +816,51 @@ CachedTileView* RenderCache::FindOrCreateTileView( return tile_view; } +void RenderCache::UpdateTileView(VkCommandBuffer command_buffer, + CachedTileView* view, bool load, + bool insert_barrier) { + if (insert_barrier) { + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + if (load) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + } else { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + } + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = edram_buffer_; + barrier.offset = view->key.tile_offset * 5120; + barrier.size = view->key.tile_width * 80 * view->key.tile_height * 16 * 4; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, + &barrier, 0, nullptr); + } + + VkBufferImageCopy region; + region.bufferOffset = view->key.tile_offset * 5120; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource = {0, 0, 0, 1}; + region.imageSubresource.aspectMask = + view->key.color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + region.imageOffset = {0, 0, 0}; + region.imageExtent = {view->key.tile_width * 80u, view->key.tile_height * 16u, + 1}; + if (load) { + vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image, + VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); + } else { + vkCmdCopyImageToBuffer(command_buffer, view->image, VK_IMAGE_LAYOUT_GENERAL, + edram_buffer_, 1, ®ion); + } +} + CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const { // Check the cache. // TODO(benvanik): better lookup. @@ -837,35 +888,31 @@ void RenderCache::EndRenderPass() { // can't get the correct height atm) and we may end up overwriting the valid // contents of another render target by mistake! Need to reorder copy commands // to avoid this. - VkBufferImageCopy region; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageOffset = {0, 0, 0}; - // Depth/stencil + + std::vector cached_views; + + // Depth auto depth_target = current_state_.framebuffer->depth_stencil_attachment; if (depth_target && current_state_.config.depth_stencil.used) { - region.imageSubresource = { - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1}; - region.bufferOffset = depth_target->key.tile_offset * 5120; - region.imageExtent = {depth_target->key.tile_width * 80u, - depth_target->key.tile_height * 16u, 1}; - vkCmdCopyImageToBuffer(current_command_buffer_, depth_target->image, - VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion); + cached_views.push_back(depth_target); } // Color - region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; for (int i = 0; i < 4; i++) { auto target = current_state_.framebuffer->color_attachments[i]; if (!target || !current_state_.config.color[i].used) { continue; } - region.bufferOffset = target->key.tile_offset * 5120; - region.imageExtent = {target->key.tile_width * 80u, - target->key.tile_height * 16u, 1}; - vkCmdCopyImageToBuffer(current_command_buffer_, target->image, - VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion); + cached_views.push_back(target); + } + + std::sort( + cached_views.begin(), cached_views.end(), + [](CachedTileView const* a, CachedTileView const* b) { return *a < *b; }); + + for (auto view : cached_views) { + UpdateTileView(current_command_buffer_, view, false, false); } current_command_buffer_ = nullptr; @@ -920,6 +967,7 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, &buffer_barrier, 0, nullptr); // Issue the copy command. + // TODO(DrChat): Stencil copies. VkBufferImageCopy region; region.bufferOffset = edram_base * 5120; region.bufferImageHeight = 0; @@ -928,8 +976,7 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, region.imageExtent = extents; region.imageSubresource = {0, 0, 0, 1}; region.imageSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1, ®ion); @@ -947,13 +994,15 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, void RenderCache::BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, uint32_t pitch, - uint32_t height, VkImage image, - VkImageLayout image_layout, bool color_or_depth, - uint32_t format, VkFilter filter, - VkOffset3D offset, VkExtent3D extents) { + uint32_t height, MsaaSamples num_samples, + VkImage image, VkImageLayout image_layout, + bool color_or_depth, uint32_t format, + VkFilter filter, VkOffset3D offset, + VkExtent3D extents) { // Grab a tile view that represents the source image. TileViewKey key; key.color_or_depth = color_or_depth ? 1 : 0; + key.msaa_samples = static_cast(num_samples); key.edram_format = format; key.tile_offset = edram_base; key.tile_width = xe::round_up(pitch, 80) / 80; @@ -979,14 +1028,14 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, // Update the tile view with current EDRAM contents. // TODO: Heuristics to determine if this copy is avoidable. + // TODO(DrChat): Stencil copies. VkBufferImageCopy buffer_copy; buffer_copy.bufferOffset = edram_base * 5120; buffer_copy.bufferImageHeight = 0; buffer_copy.bufferRowLength = 0; buffer_copy.imageSubresource = {0, 0, 0, 1}; buffer_copy.imageSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; buffer_copy.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; buffer_copy.imageOffset = {0, 0, 0}; vkCmdCopyBufferToImage(command_buffer, edram_buffer_, tile_view->image, @@ -1018,26 +1067,48 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, assert_true(extents.height <= key.tile_height * 16u); // Now issue the blit to the destination. - // TODO: Resolve to destination if necessary. - VkImageBlit image_blit; - image_blit.srcSubresource = {0, 0, 0, 1}; - image_blit.srcSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - image_blit.srcOffsets[0] = {0, 0, 0}; - image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height), - int32_t(extents.depth)}; + if (num_samples == MsaaSamples::k1X) { + VkImageBlit image_blit; + image_blit.srcSubresource = {0, 0, 0, 1}; + image_blit.srcSubresource.aspectMask = + color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + image_blit.srcOffsets[0] = {0, 0, 0}; + image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height), + int32_t(extents.depth)}; - image_blit.dstSubresource = {0, 0, 0, 1}; - image_blit.dstSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - image_blit.dstOffsets[0] = offset; - image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width), - offset.y + int32_t(extents.height), - offset.z + int32_t(extents.depth)}; - vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL, - image, image_layout, 1, &image_blit, filter); + image_blit.dstSubresource = {0, 0, 0, 1}; + image_blit.dstSubresource.aspectMask = + color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + image_blit.dstOffsets[0] = offset; + image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width), + offset.y + int32_t(extents.height), + offset.z + int32_t(extents.depth)}; + vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL, + image, image_layout, 1, &image_blit, filter); + } else { + VkImageResolve image_resolve; + image_resolve.srcSubresource = {0, 0, 0, 1}; + image_resolve.srcSubresource.aspectMask = + color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + image_resolve.srcOffset = {0, 0, 0}; + + image_resolve.dstSubresource = {0, 0, 0, 1}; + image_resolve.dstSubresource.aspectMask = + color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + image_resolve.dstOffset = offset; + + image_resolve.extent = extents; + vkCmdResolveImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL, + image, image_layout, 1, &image_resolve); + } // Transition the image back into its previous layout. image_barrier.srcAccessMask = image_barrier.dstAccessMask; @@ -1052,13 +1123,14 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, ColorRenderTargetFormat format, uint32_t pitch, uint32_t height, - float* color) { + MsaaSamples num_samples, float* color) { // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just // need to detect this and calculate a value. // Grab a tile view (as we need to clear an image first) TileViewKey key; key.color_or_depth = 1; + key.msaa_samples = static_cast(num_samples); key.edram_format = static_cast(format); key.tile_offset = edram_base; key.tile_width = xe::round_up(pitch, 80) / 80; @@ -1091,13 +1163,15 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, uint32_t edram_base, DepthRenderTargetFormat format, uint32_t pitch, uint32_t height, - float depth, uint32_t stencil) { + MsaaSamples num_samples, float depth, + uint32_t stencil) { // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just // need to detect this and calculate a value. // Grab a tile view (as we need to clear an image first) TileViewKey key; key.color_or_depth = 0; + key.msaa_samples = static_cast(num_samples); key.edram_format = static_cast(format); key.tile_offset = edram_base; key.tile_width = xe::round_up(pitch, 80) / 80; @@ -1117,12 +1191,13 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); // Copy image back into EDRAM buffer + // TODO(DrChat): Stencil copies. VkBufferImageCopy copy_range; copy_range.bufferOffset = edram_base * 5120; copy_range.bufferImageHeight = 0; copy_range.bufferRowLength = 0; copy_range.imageSubresource = { - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1, + VK_IMAGE_ASPECT_DEPTH_BIT, 0, 0, 1, }; copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; copy_range.imageOffset = {0, 0, 0}; @@ -1131,6 +1206,11 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, ©_range); } +void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) { + vkCmdFillBuffer(command_buffer, edram_buffer_, 0, kEdramBufferCapacity, + value); +} + bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { uint32_t value = register_file_->values[register_name].u32; if (*dest == value) { diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 2e8d1c5fe..86edac7bc 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -38,9 +38,9 @@ struct TileViewKey { // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. uint16_t color_or_depth : 1; // Surface MSAA samples - // uint16_t msaa_samples : 2; + uint16_t msaa_samples : 2; // Either ColorRenderTargetFormat or DepthRenderTargetFormat. - uint16_t edram_format : 15; // 13; + uint16_t edram_format : 13; }; static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); @@ -69,6 +69,10 @@ class CachedTileView { return *a == *b; } + bool operator<(const CachedTileView& other) const { + return key.tile_offset < other.key.tile_offset; + } + private: VkDevice device_ = nullptr; }; @@ -278,22 +282,26 @@ class RenderCache { // Queues commands to blit EDRAM contents into an image. // The command buffer must not be inside of a render pass when calling this. void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, - uint32_t pitch, uint32_t height, VkImage image, - VkImageLayout image_layout, bool color_or_depth, - uint32_t format, VkFilter filter, VkOffset3D offset, - VkExtent3D extents); + uint32_t pitch, uint32_t height, MsaaSamples num_samples, + VkImage image, VkImageLayout image_layout, + bool color_or_depth, uint32_t format, VkFilter filter, + VkOffset3D offset, VkExtent3D extents); // Queues commands to clear EDRAM contents with a solid color. // The command buffer must not be inside of a render pass when calling this. void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, ColorRenderTargetFormat format, uint32_t pitch, - uint32_t height, float* color); + uint32_t height, MsaaSamples num_samples, float* color); // Queues commands to clear EDRAM contents with depth/stencil values. // The command buffer must not be inside of a render pass when calling this. void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, uint32_t edram_base, DepthRenderTargetFormat format, uint32_t pitch, - uint32_t height, float depth, uint32_t stencil); + uint32_t height, MsaaSamples num_samples, + float depth, uint32_t stencil); + // Queues commands to fill EDRAM contents with a constant value. + // The command buffer must not be inside of a render pass when calling this. + void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value); private: // Parses the current state into a configuration object. @@ -306,6 +314,9 @@ class RenderCache { CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer, const TileViewKey& view_key); + void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view, + bool load, bool insert_barrier = true); + // Gets or creates a render pass and frame buffer for the given configuration. // This attempts to reuse as much as possible across render passes and // framebuffers. @@ -335,6 +346,7 @@ class RenderCache { struct ShadowRegisters { uint32_t rb_modecontrol; uint32_t rb_surface_info; + uint32_t rb_color_mask; uint32_t rb_color_info; uint32_t rb_color1_info; uint32_t rb_color2_info; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 1d559d896..fd604733b 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -152,19 +152,8 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, // TODO(benvanik): move to CP or to host (trace dump, etc). // This only needs to surround a vkQueueSubmit. - static uint32_t frame = 0; - if (device_->is_renderdoc_attached() && - (FLAGS_vulkan_renderdoc_capture_all || - trace_state_ == TraceState::kSingleFrame)) { - if (queue_mutex_) { - queue_mutex_->lock(); - } - - device_->BeginRenderDocFrameCapture(); - - if (queue_mutex_) { - queue_mutex_->unlock(); - } + if (queue_mutex_) { + queue_mutex_->lock(); } // TODO(DrChat): If setup buffer is empty, don't bother queueing it up. @@ -182,45 +171,37 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, submit_info.signalSemaphoreCount = 0; submit_info.pSignalSemaphores = nullptr; if (queue_mutex_) { - queue_mutex_->lock(); + // queue_mutex_->lock(); } status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_); if (queue_mutex_) { - queue_mutex_->unlock(); + // queue_mutex_->unlock(); } CheckResult(status, "vkQueueSubmit"); + // TODO(DrChat): Disable this completely. VkFence fences[] = {*current_batch_fence_}; status = vkWaitForFences(*device_, 1, fences, true, -1); CheckResult(status, "vkWaitForFences"); - if (device_->is_renderdoc_attached() && - (FLAGS_vulkan_renderdoc_capture_all || - trace_state_ == TraceState::kSingleFrame)) { - if (queue_mutex_) { - queue_mutex_->lock(); - } - + if (device_->is_renderdoc_attached() && capturing_) { device_->EndRenderDocFrameCapture(); + capturing_ = false; // HACK(DrChat): Used b/c I disabled trace saving code in the CP. // Remove later. if (!trace_writer_.is_open()) { trace_state_ = TraceState::kDisabled; } - - if (queue_mutex_) { - queue_mutex_->unlock(); - } + } + if (queue_mutex_) { + queue_mutex_->unlock(); } // Scavenging. current_command_buffer_ = nullptr; current_setup_buffer_ = nullptr; - while (command_buffer_pool_->has_pending()) { - command_buffer_pool_->Scavenge(); - xe::threading::MaybeYield(); - } + command_buffer_pool_->Scavenge(); texture_cache_->Scavenge(); current_batch_fence_ = nullptr; @@ -331,6 +312,22 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info); CheckResult(status, "vkBeginCommandBuffer"); + static uint32_t frame = 0; + if (device_->is_renderdoc_attached() && !capturing_ && + (FLAGS_vulkan_renderdoc_capture_all || + trace_state_ == TraceState::kSingleFrame)) { + if (queue_mutex_) { + queue_mutex_->lock(); + } + + capturing_ = true; + device_->BeginRenderDocFrameCapture(); + + if (queue_mutex_) { + queue_mutex_->unlock(); + } + } + started_command_buffer = true; } auto command_buffer = current_command_buffer_; @@ -357,6 +354,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_render_state_ = render_cache_->BeginRenderPass( command_buffer, vertex_shader, pixel_shader); if (!current_render_state_) { + command_buffer_pool_->CancelBatch(); + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + current_batch_fence_ = nullptr; return false; } } @@ -378,18 +379,30 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // Pass registers to the shaders. if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { render_cache_->EndRenderPass(); + command_buffer_pool_->CancelBatch(); + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + current_batch_fence_ = nullptr; return false; } // Upload and bind index buffer data (if we have any). if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { render_cache_->EndRenderPass(); + command_buffer_pool_->CancelBatch(); + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + current_batch_fence_ = nullptr; return false; } // Upload and bind all vertex buffer data. if (!PopulateVertexBuffers(command_buffer, vertex_shader)) { render_cache_->EndRenderPass(); + command_buffer_pool_->CancelBatch(); + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + current_batch_fence_ = nullptr; return false; } @@ -423,6 +436,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + // Upload the constants the shaders require. // These are optional, and if none are defined 0 will be returned. auto constant_offsets = buffer_cache_->UploadConstantRegisters( @@ -742,7 +759,7 @@ bool VulkanCommandProcessor::IssueCopy() { tex_info.size_2d.input_height = dest_block_height; tex_info.size_2d.input_pitch = copy_dest_pitch * 4; auto texture = texture_cache_->DemandResolveTexture( - tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr, nullptr); + tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr); if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { // Transition the image to a general layout. VkImageMemoryBarrier image_barrier; @@ -810,8 +827,9 @@ bool VulkanCommandProcessor::IssueCopy() { case CopyCommand::kConvert: render_cache_->BlitToImage( command_buffer, edram_base, surface_pitch, resolve_extent.height, - texture->image, texture->image_layout, copy_src_select <= 3, - src_format, VK_FILTER_LINEAR, resolve_offset, resolve_extent); + surface_msaa, texture->image, texture->image_layout, + copy_src_select <= 3, src_format, VK_FILTER_LINEAR, resolve_offset, + resolve_extent); break; case CopyCommand::kConstantOne: @@ -839,7 +857,7 @@ bool VulkanCommandProcessor::IssueCopy() { // TODO(DrChat): Do we know the surface height at this point? render_cache_->ClearEDRAMColor(command_buffer, color_edram_base, color_format, surface_pitch, - resolve_extent.height, color); + resolve_extent.height, surface_msaa, color); } if (depth_clear_enabled) { @@ -850,7 +868,7 @@ bool VulkanCommandProcessor::IssueCopy() { // TODO(DrChat): Do we know the surface height at this point? render_cache_->ClearEDRAMDepthStencil( command_buffer, depth_edram_base, depth_format, surface_pitch, - resolve_extent.height, depth, stencil); + resolve_extent.height, surface_msaa, depth, stencil); } return true; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index c87c515c0..287e4f65e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -94,6 +94,7 @@ class VulkanCommandProcessor : public CommandProcessor { // Last copy base address, for debugging only. uint32_t last_copy_base_ = 0; + bool capturing_ = false; std::unique_ptr buffer_cache_; std::unique_ptr pipeline_cache_; From f9a634ad25c1b05679a94d87885cda8beb2c31b8 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 1 Apr 2016 21:53:46 -0500 Subject: [PATCH 103/145] CircularBuffer remove Discard functionality and allow rotation --- src/xenia/ui/vulkan/circular_buffer.cc | 25 +++++++------------------ src/xenia/ui/vulkan/circular_buffer.h | 4 +++- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc index 4cc22366f..110cd6c36 100644 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -139,7 +139,6 @@ CircularBuffer::Allocation* CircularBuffer::Acquire( assert(read_head_ == write_head_); assert(capacity_ > aligned_length); - read_head_ = 0; write_head_ = length; auto alloc = new Allocation(); @@ -200,19 +199,6 @@ CircularBuffer::Allocation* CircularBuffer::Acquire( return nullptr; } -void CircularBuffer::Discard(Allocation* allocation) { - // TODO: Revert write_head_ (only if this is the last alloc though) - // Or maybe just disallow discards. - for (auto it = allocations_.begin(); it != allocations_.end(); ++it) { - if (*it == allocation) { - allocations_.erase(it); - break; - } - } - - delete allocation; -} - void CircularBuffer::Flush(Allocation* allocation) { VkMappedMemoryRange range; range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; @@ -239,7 +225,13 @@ void CircularBuffer::Scavenge() { break; } - read_head_ = (read_head_ + (*it)->aligned_length) % capacity_; + if (capacity_ - read_head_ < (*it)->aligned_length) { + // This allocation is stored at the beginning of the buffer. + read_head_ = (*it)->aligned_length; + } else { + read_head_ += (*it)->aligned_length; + } + delete *it; it = allocations_.erase(it); } @@ -247,9 +239,6 @@ void CircularBuffer::Scavenge() { if (allocations_.empty()) { // Reset R/W heads. read_head_ = write_head_ = 0; - } else { - // FIXME: Haven't verified this works correctly when actually rotating :P - assert_always(); } } diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h index 2c036c685..6f0ec2f82 100644 --- a/src/xenia/ui/vulkan/circular_buffer.h +++ b/src/xenia/ui/vulkan/circular_buffer.h @@ -52,8 +52,10 @@ class CircularBuffer { uint8_t* host_base() const { return host_base_; } bool CanAcquire(VkDeviceSize length); + + // Acquires space to hold memory. This allocation is only freed when the fence + // reaches the signaled state. Allocation* Acquire(VkDeviceSize length, std::shared_ptr fence); - void Discard(Allocation* allocation); void Flush(Allocation* allocation); // Clears all allocations, regardless of whether they've been consumed or not. From 3726064af5ffee371bbda7144d197d3eabcb44fd Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 1 Apr 2016 22:03:29 -0500 Subject: [PATCH 104/145] Can't use CmdCopyBufferToImage or vice versa for depth and stencil. --- src/xenia/gpu/vulkan/render_cache.cc | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 7e0528866..3df5e4c9e 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -339,18 +339,18 @@ CachedRenderPass::CachedRenderPass(VkDevice device, VkSampleCountFlagBits sample_count; switch (desired_config.surface_msaa) { - case MsaaSamples::k1X: - sample_count = VK_SAMPLE_COUNT_1_BIT; - break; - case MsaaSamples::k2X: - sample_count = VK_SAMPLE_COUNT_2_BIT; - break; - case MsaaSamples::k4X: - sample_count = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(desired_config.surface_msaa); - break; + case MsaaSamples::k1X: + sample_count = VK_SAMPLE_COUNT_1_BIT; + break; + case MsaaSamples::k2X: + sample_count = VK_SAMPLE_COUNT_2_BIT; + break; + case MsaaSamples::k4X: + sample_count = VK_SAMPLE_COUNT_4_BIT; + break; + default: + assert_unhandled_case(desired_config.surface_msaa); + break; } // Initialize all attachments to default unused. @@ -840,15 +840,15 @@ void RenderCache::UpdateTileView(VkCommandBuffer command_buffer, &barrier, 0, nullptr); } + // TODO(DrChat): Stencil copies. VkBufferImageCopy region; region.bufferOffset = view->key.tile_offset * 5120; region.bufferRowLength = 0; region.bufferImageHeight = 0; region.imageSubresource = {0, 0, 0, 1}; - region.imageSubresource.aspectMask = - view->key.color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + region.imageSubresource.aspectMask = view->key.color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT; region.imageOffset = {0, 0, 0}; region.imageExtent = {view->key.tile_width * 80u, view->key.tile_height * 16u, 1}; From a1c9540063ec315646c94c0d4cc80142e2e8c319 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 9 Apr 2016 18:35:00 -0500 Subject: [PATCH 105/145] SPIR-V Validator util class --- src/xenia/ui/spirv/spirv_validator.cc | 80 +++++++++++++++++++++++++++ src/xenia/ui/spirv/spirv_validator.h | 66 ++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 src/xenia/ui/spirv/spirv_validator.cc create mode 100644 src/xenia/ui/spirv/spirv_validator.h diff --git a/src/xenia/ui/spirv/spirv_validator.cc b/src/xenia/ui/spirv/spirv_validator.cc new file mode 100644 index 000000000..734688eb6 --- /dev/null +++ b/src/xenia/ui/spirv/spirv_validator.cc @@ -0,0 +1,80 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/spirv/spirv_validator.h" + +#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" +#include "xenia/base/logging.h" + +namespace xe { +namespace ui { +namespace spirv { + +SpirvValidator::Result::Result(spv_text text, spv_diagnostic diagnostic) + : text_(text), diagnostic_(diagnostic) {} + +SpirvValidator::Result::~Result() { + if (text_) { + spvTextDestroy(text_); + } + if (diagnostic_) { + spvDiagnosticDestroy(diagnostic_); + } +} + +bool SpirvValidator::Result::has_error() const { return !!diagnostic_; } + +size_t SpirvValidator::Result::error_word_index() const { + return diagnostic_ ? diagnostic_->position.index : 0; +} + +const char* SpirvValidator::Result::error_string() const { + return diagnostic_ ? diagnostic_->error : ""; +} + +const char* SpirvValidator::Result::text() const { + return text_ ? text_->str : ""; +} + +std::string SpirvValidator::Result::to_string() const { + return text_ ? std::string(text_->str, text_->length) : ""; +} + +void SpirvValidator::Result::AppendText(StringBuffer* target_buffer) const { + if (text_) { + target_buffer->AppendBytes(reinterpret_cast(text_->str), + text_->length); + } +} + +SpirvValidator::SpirvValidator() : spv_context_(spvContextCreate()) {} +SpirvValidator::~SpirvValidator() { spvContextDestroy(spv_context_); } + +std::unique_ptr SpirvValidator::Validate( + const uint32_t* words, size_t word_count) { + spv_text text = nullptr; + spv_diagnostic diagnostic = nullptr; + spv_const_binary_t binary = {words, word_count}; + auto result_code = + spvValidate(spv_context_, &binary, SPV_VALIDATE_ALL, &diagnostic); + std::unique_ptr result(new Result(text, diagnostic)); + if (result_code) { + XELOGE("Failed to validate spv: %d", result_code); + if (result->has_error()) { + return result; + } else { + return nullptr; + } + } + return result; +} + +} // namespace spirv +} // namespace ui +} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/spirv/spirv_validator.h b/src/xenia/ui/spirv/spirv_validator.h new file mode 100644 index 000000000..890843f27 --- /dev/null +++ b/src/xenia/ui/spirv/spirv_validator.h @@ -0,0 +1,66 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ +#define XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ + +#include +#include + +#include "xenia/base/string_buffer.h" +#include "xenia/ui/spirv/spirv_util.h" + +namespace xe { +namespace ui { +namespace spirv { + +class SpirvValidator { + public: + class Result { + public: + Result(spv_text text, spv_diagnostic diagnostic); + ~Result(); + + // True if the result has an error associated with it. + bool has_error() const; + // Index of the error in the provided binary word data. + size_t error_word_index() const; + // Human-readable description of the error. + const char* error_string() const; + + // Disassembled source text. + // Returned pointer lifetime is tied to this Result instance. + const char* text() const; + // Converts the disassembled source text to a string. + std::string to_string() const; + // Appends the disassembled source text to the given buffer. + void AppendText(StringBuffer* target_buffer) const; + + private: + spv_text text_ = nullptr; + spv_diagnostic diagnostic_ = nullptr; + }; + + SpirvValidator(); + ~SpirvValidator(); + + // Validates the given SPIRV binary. + // The return will be nullptr if validation fails due to a library error. + // The return may have an error set on it if the SPIRV binary is malformed. + std::unique_ptr Validate(const uint32_t* words, size_t word_count); + + private: + spv_context spv_context_ = nullptr; +}; + +} // namespace spirv +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ From b7f2c93d73bbbfd5af1fd7713da6d7e9a845b0d6 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 9 Apr 2016 21:03:44 -0500 Subject: [PATCH 106/145] SPIR-V: Batch predicated instructions together into a single block. Add Post-Translation validation. Fix a couple of type-related typos. --- src/xenia/gpu/shader_translator.cc | 19 +- src/xenia/gpu/shader_translator.h | 6 +- src/xenia/gpu/spirv_shader_translator.cc | 285 +++++++++++++++-------- src/xenia/gpu/spirv_shader_translator.h | 12 +- 4 files changed, 210 insertions(+), 112 deletions(-) diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 6e8b69cea..1097dbc55 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -986,16 +986,19 @@ void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) { return; } + ParsedAluInstruction instr; if (op.has_vector_op()) { const auto& opcode_info = alu_vector_opcode_infos_[static_cast(op.vector_opcode())]; - ParseAluVectorInstruction(op, opcode_info); + ParseAluVectorInstruction(op, opcode_info, instr); + ProcessAluInstruction(instr); } if (op.has_scalar_op()) { const auto& opcode_info = alu_scalar_opcode_infos_[static_cast(op.scalar_opcode())]; - ParseAluScalarInstruction(op, opcode_info); + ParseAluScalarInstruction(op, opcode_info, instr); + ProcessAluInstruction(instr); } } @@ -1088,8 +1091,8 @@ void ParseAluInstructionOperandSpecial(const AluInstruction& op, } void ShaderTranslator::ParseAluVectorInstruction( - const AluInstruction& op, const AluOpcodeInfo& opcode_info) { - ParsedAluInstruction i; + const AluInstruction& op, const AluOpcodeInfo& opcode_info, + ParsedAluInstruction& i) { i.dword_index = 0; i.type = ParsedAluInstruction::Type::kVector; i.vector_opcode = op.vector_opcode(); @@ -1203,13 +1206,11 @@ void ShaderTranslator::ParseAluVectorInstruction( } i.Disassemble(&ucode_disasm_buffer_); - - ProcessAluInstruction(i); } void ShaderTranslator::ParseAluScalarInstruction( - const AluInstruction& op, const AluOpcodeInfo& opcode_info) { - ParsedAluInstruction i; + const AluInstruction& op, const AluOpcodeInfo& opcode_info, + ParsedAluInstruction& i) { i.dword_index = 0; i.type = ParsedAluInstruction::Type::kScalar; i.scalar_opcode = op.scalar_opcode(); @@ -1319,8 +1320,6 @@ void ShaderTranslator::ParseAluScalarInstruction( } i.Disassemble(&ucode_disasm_buffer_); - - ProcessAluInstruction(i); } } // namespace gpu diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index d1b27a997..7dc173dc5 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -173,9 +173,11 @@ class ShaderTranslator { void TranslateAluInstruction(const ucode::AluInstruction& op); void ParseAluVectorInstruction(const ucode::AluInstruction& op, - const AluOpcodeInfo& opcode_info); + const AluOpcodeInfo& opcode_info, + ParsedAluInstruction& instr); void ParseAluScalarInstruction(const ucode::AluInstruction& op, - const AluOpcodeInfo& opcode_info); + const AluOpcodeInfo& opcode_info, + ParsedAluInstruction& instr); // Input shader metadata and microcode. ShaderType shader_type_; diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index ef242f0bd..0b0ab0626 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * + * Copyright 2016 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -85,16 +85,14 @@ void SpirvShaderTranslator::StartTranslation() { "ps"); pv_ = b.createVariable(spv::StorageClass::StorageClassFunction, vec4_float_type_, "pv"); - a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, - b.makeUintType(32), "a0"); + a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_, + "a0"); // Uniform constants. Id float_consts_type = b.makeArrayType(vec4_float_type_, b.makeUintConstant(512), 1); - Id loop_consts_type = - b.makeArrayType(b.makeUintType(32), b.makeUintConstant(32), 1); - Id bool_consts_type = - b.makeArrayType(b.makeUintType(32), b.makeUintConstant(8), 1); + Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1); + Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1); Id consts_struct_type = b.makeStructType( {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); @@ -242,6 +240,13 @@ void SpirvShaderTranslator::StartTranslation() { interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, interpolators_type, "interpolators"); b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); + for (uint32_t i = 0; i < 16; i++) { + // Zero interpolators. + auto ptr = b.createAccessChain(spv::StorageClass::StorageClassOutput, + interpolators_, + std::vector({b.makeUintConstant(i)})); + b.createStore(vec4_float_zero_, ptr); + } pos_ = b.createVariable(spv::StorageClass::StorageClassOutput, vec4_float_type_, "gl_Position"); @@ -338,6 +343,9 @@ void SpirvShaderTranslator::StartTranslation() { std::vector SpirvShaderTranslator::CompleteTranslation() { auto& b = *builder_; + assert_false(open_predicated_block_); + auto block = &b.makeNewBlock(); + b.createBranch(block); b.makeReturn(false); // main() entry point. @@ -397,9 +405,10 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { b.createStore(p, pos_); } else { // Alpha test - auto alpha_test_x = b.createCompositeExtract( - push_consts_, float_type_, std::vector{2, 0}); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, alpha_test_x, b.makeFloatConstant(1.f)); + auto alpha_test_x = b.createCompositeExtract(push_consts_, float_type_, + std::vector{2, 0}); + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, alpha_test_x, + b.makeFloatConstant(1.f)); spv::Builder::If alpha_if(cond, b); @@ -433,15 +442,25 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { } void SpirvShaderTranslator::PostTranslation(Shader* shader) { + // Validation. + // TODO(DrChat): Only do this if a flag is set (this is pretty slow). + auto validation = validator_.Validate( + reinterpret_cast(shader->translated_binary().data()), + shader->translated_binary().size() / 4); + if (validation->has_error()) { + XELOGE("SPIR-V Shader Validation failed! Error: %s", + validation->error_string()); + } + // TODO(benvanik): only if needed? could be slowish. auto disasm = disassembler_.Disassemble( reinterpret_cast(shader->translated_binary().data()), shader->translated_binary().size() / 4); if (disasm->has_error()) { XELOGE("Failed to disassemble SPIRV - invalid?"); - return; + } else { + set_host_disassembly(shader, disasm->to_string()); } - set_host_disassembly(shader, disasm->to_string()); } void SpirvShaderTranslator::PreProcessControlFlowInstruction( @@ -475,13 +494,18 @@ void SpirvShaderTranslator::ProcessControlFlowInstructionEnd( void SpirvShaderTranslator::ProcessControlFlowNopInstruction() { auto& b = *builder_; - b.createNoResultOp(spv::Op::OpNop); + // b.createNoResultOp(spv::Op::OpNop); } void SpirvShaderTranslator::ProcessExecInstructionBegin( const ParsedExecInstruction& instr) { auto& b = *builder_; + assert_false(open_predicated_block_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; + // Head has the logic to check if the body should execute. auto head = cf_blocks_[instr.dword_index]; b.setBuildPoint(head); @@ -500,7 +524,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( v = b.createLoad(v); // Bitfield extract the bool constant. - v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v, + v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, b.makeUintConstant(instr.bool_constant_index % 32), b.makeUintConstant(1)); @@ -519,6 +543,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), b.makeBoolConstant(instr.condition)); b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); + } break; } b.setBuildPoint(body); @@ -528,6 +553,14 @@ void SpirvShaderTranslator::ProcessExecInstructionEnd( const ParsedExecInstruction& instr) { auto& b = *builder_; + if (open_predicated_block_) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; + } + if (instr.is_end) { b.makeReturn(false); } else { @@ -671,7 +704,30 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( assert_true(is_vertex_shader()); assert_not_zero(vertex_id_); - // TODO: instr.is_predicated + // Close the open predicated block if this instr isn't predicated or the + // conditions do not match. + if (open_predicated_block_ && + (!instr.is_predicated || + instr.predicate_condition != predicated_block_cond_)) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; + } + + if (!open_predicated_block_ && instr.is_predicated) { + Id pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + auto block = &b.makeNewBlock(); + open_predicated_block_ = true; + predicated_block_cond_ = instr.predicate_condition; + predicated_block_end_ = &b.makeNewBlock(); + + b.createConditionalBranch(pred_cond, block, predicated_block_end_); + b.setBuildPoint(block); + } // Operand 0 is the index // Operand 1 is the binding @@ -726,7 +782,31 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( const ParsedTextureFetchInstruction& instr) { auto& b = *builder_; - // TODO: instr.is_predicated + // Close the open predicated block if this instr isn't predicated or the + // conditions do not match. + if (open_predicated_block_ && + (!instr.is_predicated || + instr.predicate_condition != predicated_block_cond_)) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; + } + + if (!open_predicated_block_ && instr.is_predicated) { + Id pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + auto block = &b.makeNewBlock(); + open_predicated_block_ = true; + predicated_block_cond_ = instr.predicate_condition; + predicated_block_end_ = &b.makeNewBlock(); + + b.createConditionalBranch(pred_cond, block, predicated_block_end_); + b.setBuildPoint(block); + } + // Operand 0 is the offset // Operand 1 is the sampler index Id dest = 0; @@ -804,13 +884,32 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( sources[i] = LoadFromOperand(instr.operands[i]); } - Id pred_cond = 0; - if (instr.is_predicated) { - pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); + // Close the open predicated block if this instr isn't predicated or the + // conditions do not match. + if (open_predicated_block_ && + (!instr.is_predicated || + instr.predicate_condition != predicated_block_cond_)) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; } + if (!open_predicated_block_ && instr.is_predicated) { + Id pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + auto block = &b.makeNewBlock(); + open_predicated_block_ = true; + predicated_block_cond_ = instr.predicate_condition; + predicated_block_end_ = &b.makeNewBlock(); + + b.createConditionalBranch(pred_cond, block, predicated_block_end_); + b.setBuildPoint(block); + } + + bool close_predicated_block = false; switch (instr.vector_opcode) { case AluVectorOpcode::kAdd: { dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], @@ -863,8 +962,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto src1_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_, {sources[1], sources[1], 0, 1}); auto src2_x = b.createCompositeExtract(sources[2], float_type_, 0); - auto dot = b.createBinOp(spv::Op::OpDot, float_type_, src0_xy, src1_xy); - dest = b.createBinOp(spv::Op::OpFAdd, float_type_, dot, src2_x); + dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xy, src1_xy); + dest = b.createBinOp(spv::Op::OpFAdd, float_type_, dest, src2_x); dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); } break; @@ -873,12 +972,13 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( {sources[0], sources[0], 0, 1, 2}); auto src1_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_, {sources[1], sources[1], 0, 1, 2}); - auto dot = b.createBinOp(spv::Op::OpDot, float_type_, src0_xyz, src1_xyz); - dest = b.smearScalar(spv::NoPrecision, dot, vec4_float_type_); + dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xyz, src1_xyz); + dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); } break; case AluVectorOpcode::kDp4: { dest = b.createBinOp(spv::Op::OpDot, float_type_, sources[0], sources[1]); + dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); } break; case AluVectorOpcode::kFloor: { @@ -899,10 +999,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -918,10 +1014,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -937,10 +1029,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -956,10 +1044,6 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, sources[0], sources[1]); cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1053,6 +1137,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( // p0 b.createStore(c_and_w, p0_); + close_predicated_block = true; // dest auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); @@ -1076,6 +1161,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( // p0 b.createStore(c_and_w, p0_); + close_predicated_block = true; // dest auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); @@ -1099,6 +1185,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( // p0 b.createStore(c_and_w, p0_); + close_predicated_block = true; // dest auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); @@ -1122,6 +1209,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( // p0 b.createStore(c_and_w, p0_); + close_predicated_block = true; // dest auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); @@ -1177,15 +1265,16 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( assert_not_zero(dest); if (dest) { - // If predicated, discard the result from the instruction. - Id pv_dest = dest; - if (instr.is_predicated) { - pv_dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, pred_cond, - dest, b.createLoad(pv_)); - } + b.createStore(dest, pv_); + StoreToResult(dest, instr.result); + } - b.createStore(pv_dest, pv_); - StoreToResult(dest, instr.result, pred_cond); + if (close_predicated_block && open_predicated_block_) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; } } @@ -1229,13 +1318,32 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } } - Id pred_cond = 0; - if (instr.is_predicated) { - pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); + // Close the open predicated block if this instr isn't predicated or the + // conditions do not match. + if (open_predicated_block_ && + (!instr.is_predicated || + instr.predicate_condition != predicated_block_cond_)) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; } + if (!open_predicated_block_ && instr.is_predicated) { + Id pred_cond = + b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), + b.makeBoolConstant(instr.predicate_condition)); + auto block = &b.makeNewBlock(); + open_predicated_block_ = true; + predicated_block_cond_ = instr.predicate_condition; + predicated_block_end_ = &b.makeNewBlock(); + + b.createConditionalBranch(pred_cond, block, predicated_block_end_); + b.setBuildPoint(block); + } + + bool close_predicated_block = false; switch (instr.scalar_opcode) { case AluScalarOpcode::kAdds: case AluScalarOpcode::kAddsc0: @@ -1276,10 +1384,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1294,10 +1398,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1312,10 +1412,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, sources[0], b.makeFloatConstant(0.f)); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1330,10 +1426,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1348,10 +1440,6 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto kill_block = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(1.f)); - if (pred_cond) { - cond = - b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, cond, pred_cond); - } b.createConditionalBranch(cond, kill_block, continue_block); b.setBuildPoint(kill_block); @@ -1448,7 +1536,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.makeFloatConstant(0.f)); auto d = b.createBinOp(spv::Op::OpFDiv, float_type_, b.makeFloatConstant(1.f), sources[0]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, b.makeFloatConstant(0.f), d); } break; @@ -1462,10 +1550,10 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( // dest = src0 != 0.0 ? inversesqrt(src0) : 0.0; auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); - auto d = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec4_float_type_, spv::GLSLstd450::kInverseSqrt, - {sources[0]}); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + auto d = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + spv::GLSLstd450::kInverseSqrt, + {sources[0]}); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, b.makeFloatConstant(0.f), d); } break; @@ -1503,6 +1591,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kSetpClr: { b.createStore(b.makeBoolConstant(false), p0_); + close_predicated_block = true; dest = b.makeFloatConstant(FLT_MAX); } break; @@ -1511,6 +1600,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( b.makeFloatConstant(0.f)); // p0 = cond b.createStore(cond, p0_); + close_predicated_block = true; // dest = cond ? 0.f : 1.f; dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, @@ -1522,6 +1612,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( sources[0], b.makeFloatConstant(0.f)); // p0 = cond b.createStore(cond, p0_); + close_predicated_block = true; // dest = cond ? 0.f : 1.f; dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, @@ -1533,6 +1624,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( sources[0], b.makeFloatConstant(0.f)); // p0 = cond b.createStore(cond, p0_); + close_predicated_block = true; // dest = cond ? 0.f : 1.f; dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, @@ -1544,6 +1636,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(1.f)); b.createStore(cond, p0_); + close_predicated_block = true; // if (!cond) dest = src0 == 0.0 ? 1.0 : src0; auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, @@ -1560,6 +1653,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( // p0 = cond b.createStore(cond, p0_); + close_predicated_block = true; // dest = cond ? 0.f : 1.f; dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, @@ -1572,6 +1666,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, src, b.makeFloatConstant(0.f)); b.createStore(c, p0_); + close_predicated_block = true; dest = CreateGlslStd450InstructionCall( spv::NoPrecision, float_type_, GLSLstd450::kFMax, @@ -1582,6 +1677,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], b.makeFloatConstant(0.f)); b.createStore(c, p0_); + close_predicated_block = true; dest = sources[0]; } break; @@ -1618,15 +1714,16 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( assert_not_zero(dest); if (dest) { - // If predicated, discard the result from the instruction. - Id ps_dest = dest; - if (instr.is_predicated) { - ps_dest = b.createTriOp(spv::Op::OpSelect, float_type_, pred_cond, dest, - b.createLoad(ps_)); - } + b.createStore(dest, ps_); + StoreToResult(dest, instr.result); + } - b.createStore(ps_dest, ps_); - StoreToResult(dest, instr.result, pred_cond); + if (close_predicated_block && open_predicated_block_) { + b.createBranch(predicated_block_end_); + b.setBuildPoint(predicated_block_end_); + open_predicated_block_ = false; + predicated_block_cond_ = false; + predicated_block_end_ = nullptr; } } @@ -1763,8 +1860,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { } void SpirvShaderTranslator::StoreToResult(Id source_value_id, - const InstructionResult& result, - Id predicate_cond) { + const InstructionResult& result) { auto& b = *builder_; if (result.storage_target == InstructionStorageTarget::kNone) { @@ -1865,7 +1961,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, // Only load from storage if we need it later. Id storage_value = 0; - if (!result.has_all_writes() || predicate_cond) { + if (!result.has_all_writes()) { storage_value = b.createLoad(storage_pointer); } @@ -1965,13 +2061,8 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, assert_true(b.getNumComponents(source_value_id) == b.getNumTypeComponents(storage_type)); - // Discard if predicate condition is false. - if (predicate_cond) { - source_value_id = - b.createTriOp(spv::Op::OpSelect, storage_type, predicate_cond, - source_value_id, storage_value); - } - + assert_true(b.getTypeId(source_value_id) == + b.getDerefTypeId(storage_pointer)); b.createStore(source_value_id, storage_pointer); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 1d5dea31b..f30d3ab44 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * + * Copyright 2016 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -18,6 +18,7 @@ #include "third_party/spirv/GLSL.std.450.hpp11" #include "xenia/gpu/shader_translator.h" #include "xenia/ui/spirv/spirv_disassembler.h" +#include "xenia/ui/spirv/spirv_validator.h" namespace xe { namespace gpu { @@ -91,10 +92,15 @@ class SpirvShaderTranslator : public ShaderTranslator { // Stores a value based on the specified result information. // The value will be transformed into the appropriate form for the result and // the proper components will be selected. - void StoreToResult(spv::Id source_value_id, const InstructionResult& result, - spv::Id predicate_cond = 0); + void StoreToResult(spv::Id source_value_id, const InstructionResult& result); xe::ui::spirv::SpirvDisassembler disassembler_; + xe::ui::spirv::SpirvValidator validator_; + + // True if there's an open predicated block + bool open_predicated_block_ = false; + bool predicated_block_cond_ = false; + spv::Block* predicated_block_end_ = nullptr; // TODO(benvanik): replace with something better, make reusable, etc. std::unique_ptr builder_; From 4811ebc2ceb1c9f77282d00ef74052eeac787603 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 9 Apr 2016 21:27:32 -0500 Subject: [PATCH 107/145] BufferCache: Use a CircularBuffer as the transient buffer. --- src/xenia/gpu/vulkan/buffer_cache.cc | 201 ++++++------------------- src/xenia/gpu/vulkan/buffer_cache.h | 41 +++-- src/xenia/ui/vulkan/circular_buffer.cc | 3 +- src/xenia/ui/vulkan/circular_buffer.h | 1 + 4 files changed, 67 insertions(+), 179 deletions(-) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 90b7c487e..4ae98c864 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -30,90 +30,14 @@ constexpr VkDeviceSize kConstantRegisterUniformRange = BufferCache::BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, size_t capacity) - : register_file_(register_file), - device_(*device), - transient_capacity_(capacity) { - // Uniform buffer. - VkBufferCreateInfo uniform_buffer_info; - uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - uniform_buffer_info.pNext = nullptr; - uniform_buffer_info.flags = 0; - uniform_buffer_info.size = transient_capacity_; - uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - uniform_buffer_info.queueFamilyIndexCount = 0; - uniform_buffer_info.pQueueFamilyIndices = nullptr; - auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr, - &transient_uniform_buffer_); - CheckResult(err, "vkCreateBuffer"); - - // Index buffer. - VkBufferCreateInfo index_buffer_info; - index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - index_buffer_info.pNext = nullptr; - index_buffer_info.flags = 0; - index_buffer_info.size = transient_capacity_; - index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; - index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - index_buffer_info.queueFamilyIndexCount = 0; - index_buffer_info.pQueueFamilyIndices = nullptr; - err = vkCreateBuffer(device_, &index_buffer_info, nullptr, - &transient_index_buffer_); - CheckResult(err, "vkCreateBuffer"); - - // Vertex buffer. - VkBufferCreateInfo vertex_buffer_info; - vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - vertex_buffer_info.pNext = nullptr; - vertex_buffer_info.flags = 0; - vertex_buffer_info.size = transient_capacity_; - vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - vertex_buffer_info.queueFamilyIndexCount = 0; - vertex_buffer_info.pQueueFamilyIndices = nullptr; - err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr, - &transient_vertex_buffer_); - CheckResult(err, "vkCreateBuffer"); - - // Allocate the underlying buffer we use for all storage. - // We query all types and take the max alignment. - VkMemoryRequirements uniform_buffer_requirements; - VkMemoryRequirements index_buffer_requirements; - VkMemoryRequirements vertex_buffer_requirements; - vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_, - &uniform_buffer_requirements); - vkGetBufferMemoryRequirements(device_, transient_index_buffer_, - &index_buffer_requirements); - vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_, - &vertex_buffer_requirements); - uniform_buffer_alignment_ = uniform_buffer_requirements.alignment; - index_buffer_alignment_ = index_buffer_requirements.alignment; - vertex_buffer_alignment_ = vertex_buffer_requirements.alignment; - VkMemoryRequirements buffer_requirements; - buffer_requirements.size = transient_capacity_; - buffer_requirements.alignment = - std::max(uniform_buffer_requirements.alignment, - std::max(index_buffer_requirements.alignment, - vertex_buffer_requirements.alignment)); - buffer_requirements.memoryTypeBits = - uniform_buffer_requirements.memoryTypeBits | - index_buffer_requirements.memoryTypeBits | - vertex_buffer_requirements.memoryTypeBits; - transient_buffer_memory_ = device->AllocateMemory( - buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - - // Alias all buffers to our memory. - vkBindBufferMemory(device_, transient_uniform_buffer_, - transient_buffer_memory_, 0); - vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_, - 0); - vkBindBufferMemory(device_, transient_vertex_buffer_, - transient_buffer_memory_, 0); - - // Map memory and keep it mapped while we use it. - err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0, - &transient_buffer_data_); - CheckResult(err, "vkMapMemory"); + : register_file_(register_file), device_(*device) { + transient_buffer_ = std::make_unique(device); + if (!transient_buffer_->Initialize(capacity, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT)) { + assert_always(); + } // Descriptor pool used for all of our cached descriptors. // In the steady state we don't allocate anything, so these are all manually @@ -129,8 +53,8 @@ BufferCache::BufferCache(RegisterFile* register_file, pool_sizes[0].descriptorCount = 2; descriptor_pool_info.poolSizeCount = 1; descriptor_pool_info.pPoolSizes = pool_sizes; - err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, - &descriptor_pool_); + auto err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, + &descriptor_pool_); CheckResult(err, "vkCreateDescriptorPool"); // Create the descriptor set layout used for our uniform buffer. @@ -180,7 +104,7 @@ BufferCache::BufferCache(RegisterFile* register_file, // Initialize descriptor set with our buffers. VkDescriptorBufferInfo buffer_info; - buffer_info.buffer = transient_uniform_buffer_; + buffer_info.buffer = transient_buffer_->gpu_buffer(); buffer_info.offset = 0; buffer_info.range = kConstantRegisterUniformRange; VkWriteDescriptorSet descriptor_writes[2]; @@ -212,25 +136,20 @@ BufferCache::~BufferCache() { &transient_descriptor_set_); vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr); vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); - vkUnmapMemory(device_, transient_buffer_memory_); - vkFreeMemory(device_, transient_buffer_memory_, nullptr); - vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr); - vkDestroyBuffer(device_, transient_index_buffer_, nullptr); - vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr); + transient_buffer_->Shutdown(); } std::pair BufferCache::UploadConstantRegisters( const Shader::ConstantRegisterMap& vertex_constant_register_map, - const Shader::ConstantRegisterMap& pixel_constant_register_map) { + const Shader::ConstantRegisterMap& pixel_constant_register_map, + std::shared_ptr fence) { // Fat struct, including all registers: // struct { // vec4 float[512]; // uint bool[8]; // uint loop[32]; // }; - size_t total_size = - xe::round_up(kConstantRegisterUniformRange, uniform_buffer_alignment_); - auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size); + auto offset = AllocateTransientData(kConstantRegisterUniformRange, fence); if (offset == VK_WHOLE_SIZE) { // OOM. return {VK_WHOLE_SIZE, VK_WHOLE_SIZE}; @@ -238,8 +157,7 @@ std::pair BufferCache::UploadConstantRegisters( // Copy over all the registers. const auto& values = register_file_->values; - uint8_t* dest_ptr = - reinterpret_cast(transient_buffer_data_) + offset; + uint8_t* dest_ptr = transient_buffer_->host_base() + offset; std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, (512 * 4 * 4)); dest_ptr += 512 * 4 * 4; @@ -258,8 +176,8 @@ std::pair BufferCache::UploadConstantRegisters( // constant indexing. #if 0 // Allocate space in the buffer for our data. - auto offset = AllocateTransientData(uniform_buffer_alignment_, - constant_register_map.packed_byte_length); + auto offset = + AllocateTransientData(constant_register_map.packed_byte_length, fence); if (offset == VK_WHOLE_SIZE) { // OOM. return VK_WHOLE_SIZE; @@ -304,11 +222,12 @@ std::pair BufferCache::UploadConstantRegisters( } std::pair BufferCache::UploadIndexBuffer( - const void* source_ptr, size_t source_length, IndexFormat format) { + const void* source_ptr, size_t source_length, IndexFormat format, + std::shared_ptr fence) { // TODO(benvanik): check cache. // Allocate space in the buffer for our data. - auto offset = AllocateTransientData(index_buffer_alignment_, source_length); + auto offset = AllocateTransientData(source_length, fence); if (offset == VK_WHOLE_SIZE) { // OOM. return {nullptr, VK_WHOLE_SIZE}; @@ -319,25 +238,24 @@ std::pair BufferCache::UploadIndexBuffer( // TODO(benvanik): memcpy then use compute shaders to swap? if (format == IndexFormat::kInt16) { // Endian::k8in16, swap half-words. - xe::copy_and_swap_16_aligned( - reinterpret_cast(transient_buffer_data_) + offset, source_ptr, - source_length / 2); + xe::copy_and_swap_16_aligned(transient_buffer_->host_base() + offset, + source_ptr, source_length / 2); } else if (format == IndexFormat::kInt32) { // Endian::k8in32, swap words. - xe::copy_and_swap_32_aligned( - reinterpret_cast(transient_buffer_data_) + offset, source_ptr, - source_length / 4); + xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset, + source_ptr, source_length / 4); } - return {transient_index_buffer_, offset}; + return {transient_buffer_->gpu_buffer(), offset}; } std::pair BufferCache::UploadVertexBuffer( - const void* source_ptr, size_t source_length) { + const void* source_ptr, size_t source_length, + std::shared_ptr fence) { // TODO(benvanik): check cache. // Allocate space in the buffer for our data. - auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length); + auto offset = AllocateTransientData(source_length, fence); if (offset == VK_WHOLE_SIZE) { // OOM. return {nullptr, VK_WHOLE_SIZE}; @@ -346,59 +264,34 @@ std::pair BufferCache::UploadVertexBuffer( // Copy data into the buffer. // TODO(benvanik): memcpy then use compute shaders to swap? // Endian::k8in32, swap words. - xe::copy_and_swap_32_aligned( - reinterpret_cast(transient_buffer_data_) + offset, source_ptr, - source_length / 4); + xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset, + source_ptr, source_length / 4); - return {transient_vertex_buffer_, offset}; + return {transient_buffer_->gpu_buffer(), offset}; } -VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize alignment, - VkDeviceSize length) { +VkDeviceSize BufferCache::AllocateTransientData( + VkDeviceSize length, std::shared_ptr fence) { // Try fast path (if we have space). - VkDeviceSize offset = TryAllocateTransientData(alignment, length); + VkDeviceSize offset = TryAllocateTransientData(length, fence); if (offset != VK_WHOLE_SIZE) { return offset; } // Ran out of easy allocations. // Try consuming fences before we panic. - assert_always("Reclamation not yet implemented"); + transient_buffer_->Scavenge(); // Try again. It may still fail if we didn't get enough space back. - return TryAllocateTransientData(alignment, length); + offset = TryAllocateTransientData(length, fence); + return offset; } -VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize alignment, - VkDeviceSize length) { - if (transient_tail_offset_ >= transient_head_offset_) { - // Tail follows head, so things are easy: - // | H----T | - if (xe::round_up(transient_tail_offset_, alignment) + length <= - transient_capacity_) { - // Allocation fits from tail to end of buffer, so grow. - // | H----**T | - VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment); - transient_tail_offset_ = offset + length; - return offset; - } else if (length + kDeadZone <= transient_head_offset_) { - // Can't fit at the end, but can fit if we wrap around. - // |**T H----....| - VkDeviceSize offset = 0; - transient_tail_offset_ = length; - return offset; - } - } else { - // Head follows tail, so we're reversed: - // |----T H---| - if (xe::round_up(transient_tail_offset_, alignment) + length + kDeadZone <= - transient_head_offset_) { - // Fits from tail to head. - // |----***T H---| - VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment); - transient_tail_offset_ = offset + length; - return offset; - } +VkDeviceSize BufferCache::TryAllocateTransientData( + VkDeviceSize length, std::shared_ptr fence) { + auto alloc = transient_buffer_->Acquire(length, fence); + if (alloc) { + return alloc->offset; } // No more space. @@ -420,9 +313,9 @@ void BufferCache::Flush(VkCommandBuffer command_buffer) { VkMappedMemoryRange dirty_range; dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; dirty_range.pNext = nullptr; - dirty_range.memory = transient_buffer_memory_; + dirty_range.memory = transient_buffer_->gpu_memory(); dirty_range.offset = 0; - dirty_range.size = transient_capacity_; + dirty_range.size = transient_buffer_->capacity(); vkFlushMappedMemoryRanges(device_, 1, &dirty_range); } @@ -432,10 +325,10 @@ void BufferCache::InvalidateCache() { void BufferCache::ClearCache() { // TODO(benvanik): caching. - // Temporary clear. - transient_tail_offset_ = transient_head_offset_; } +void BufferCache::Scavenge() { transient_buffer_->Scavenge(); } + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index 1c7330e52..ee09585b5 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -13,6 +13,7 @@ #include "xenia/gpu/register_file.h" #include "xenia/gpu/shader.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/circular_buffer.h" #include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan_device.h" @@ -50,22 +51,24 @@ class BufferCache { // The returned offsets may alias. std::pair UploadConstantRegisters( const Shader::ConstantRegisterMap& vertex_constant_register_map, - const Shader::ConstantRegisterMap& pixel_constant_register_map); + const Shader::ConstantRegisterMap& pixel_constant_register_map, + std::shared_ptr fence); // Uploads index buffer data from guest memory, possibly eliding with // recently uploaded data or cached copies. // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadIndexBuffer(const void* source_ptr, - size_t source_length, - IndexFormat format); + std::pair UploadIndexBuffer( + const void* source_ptr, size_t source_length, IndexFormat format, + std::shared_ptr fence); // Uploads vertex buffer data from guest memory, possibly eliding with // recently uploaded data or cached copies. // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadVertexBuffer(const void* source_ptr, - size_t source_length); + std::pair UploadVertexBuffer( + const void* source_ptr, size_t source_length, + std::shared_ptr fence); // Flushes all pending data to the GPU. // Until this is called the GPU is not guaranteed to see any data. @@ -81,36 +84,26 @@ class BufferCache { // Clears all cached content and prevents future elision with pending data. void ClearCache(); + // Wipes all data no longer needed. + void Scavenge(); + private: // Allocates a block of memory in the transient buffer. // When memory is not available fences are checked and space is reclaimed. // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize AllocateTransientData(VkDeviceSize alignment, - VkDeviceSize length); + VkDeviceSize AllocateTransientData(VkDeviceSize length, + std::shared_ptr fence); // Tries to allocate a block of memory in the transient buffer. // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize TryAllocateTransientData(VkDeviceSize alignment, - VkDeviceSize length); + VkDeviceSize TryAllocateTransientData( + VkDeviceSize length, std::shared_ptr fence); RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; // Staging ringbuffer we cycle through fast. Used for data we don't // plan on keeping past the current frame. - size_t transient_capacity_ = 0; - VkBuffer transient_uniform_buffer_ = nullptr; - VkBuffer transient_index_buffer_ = nullptr; - VkBuffer transient_vertex_buffer_ = nullptr; - VkDeviceMemory transient_buffer_memory_ = nullptr; - void* transient_buffer_data_ = nullptr; - VkDeviceSize transient_head_offset_ = 0; - VkDeviceSize transient_tail_offset_ = 0; - - // Required alignments for our various types. - // All allocations must start at the appropriate alignment. - VkDeviceSize uniform_buffer_alignment_ = 0; - VkDeviceSize index_buffer_alignment_ = 0; - VkDeviceSize vertex_buffer_alignment_ = 0; + std::unique_ptr transient_buffer_ = nullptr; VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorSetLayout descriptor_set_layout_ = nullptr; diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc index 110cd6c36..43d868120 100644 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -134,12 +134,13 @@ CircularBuffer::Allocation* CircularBuffer::Acquire( } VkDeviceSize aligned_length = xe::round_up(length, alignment_); + assert_true(write_head_ % alignment_ == 0); if (allocations_.empty()) { // Entire buffer available. assert(read_head_ == write_head_); assert(capacity_ > aligned_length); - write_head_ = length; + write_head_ = aligned_length; auto alloc = new Allocation(); alloc->host_ptr = host_base_ + 0; diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h index 6f0ec2f82..6e4331ab9 100644 --- a/src/xenia/ui/vulkan/circular_buffer.h +++ b/src/xenia/ui/vulkan/circular_buffer.h @@ -46,6 +46,7 @@ class CircularBuffer { VkDeviceSize alignment = 256); void Shutdown(); + VkDeviceSize alignment() const { return alignment_; } VkDeviceSize capacity() const { return capacity_; } VkBuffer gpu_buffer() const { return gpu_buffer_; } VkDeviceMemory gpu_memory() const { return gpu_memory_; } From 2bd603bf182b9ab41de291cc76ad6c530982a7af Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 9 Apr 2016 21:40:18 -0500 Subject: [PATCH 108/145] CircularBuffer: use std::list for allocations instead of a vector. --- src/xenia/ui/vulkan/circular_buffer.cc | 6 +++--- src/xenia/ui/vulkan/circular_buffer.h | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc index 43d868120..404f7a503 100644 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -211,10 +211,10 @@ void CircularBuffer::Flush(Allocation* allocation) { } void CircularBuffer::Clear() { - for (auto it = allocations_.begin(); it != allocations_.end();) { - delete *it; - it = allocations_.erase(it); + for (auto alloc : allocations_) { + delete alloc; } + allocations_.clear(); write_head_ = read_head_ = 0; } diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h index 6e4331ab9..54aa916fd 100644 --- a/src/xenia/ui/vulkan/circular_buffer.h +++ b/src/xenia/ui/vulkan/circular_buffer.h @@ -10,7 +10,7 @@ #ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ #define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ -#include +#include #include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan_device.h" @@ -77,8 +77,7 @@ class CircularBuffer { VkDeviceSize gpu_base_ = 0; uint8_t* host_base_ = nullptr; - std::unordered_map allocation_cache_; - std::vector allocations_; + std::list allocations_; }; } // namespace vulkan From 9b2e2a7275c3cefa0d002bc86deaac5d7f858299 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 13 Apr 2016 23:17:03 -0500 Subject: [PATCH 109/145] SPIR-V: Hack in OpSelectionMerge as hints to NVidia's shader compiler (TODO: Make a Shader Compiler) --- src/xenia/gpu/shader_translator.cc | 4 +- src/xenia/gpu/shader_translator.h | 3 +- src/xenia/gpu/spirv_shader_translator.cc | 113 +++++++++++++++-------- src/xenia/gpu/spirv_shader_translator.h | 10 +- 4 files changed, 88 insertions(+), 42 deletions(-) diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 1097dbc55..f6bfbdd65 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -369,9 +369,9 @@ bool ShaderTranslator::TranslateBlocks() { AddControlFlowTargetLabel(cf_a, &label_addresses); AddControlFlowTargetLabel(cf_b, &label_addresses); - PreProcessControlFlowInstruction(cf_index); + PreProcessControlFlowInstruction(cf_index, cf_a); ++cf_index; - PreProcessControlFlowInstruction(cf_index); + PreProcessControlFlowInstruction(cf_index, cf_b); ++cf_index; } diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 7dc173dc5..5df53bc0a 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -79,7 +79,8 @@ class ShaderTranslator { } // Pre-process a control-flow instruction before anything else. - virtual void PreProcessControlFlowInstruction(uint32_t cf_index) {} + virtual void PreProcessControlFlowInstruction( + uint32_t cf_index, const ucode::ControlFlowInstruction& instr) {} // Handles translation for control flow label addresses. // This is triggered once for each label required (due to control flow diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 0b0ab0626..855df73f7 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -259,6 +259,7 @@ void SpirvShaderTranslator::StartTranslation() { spv::BuiltIn::BuiltInVertexId); auto vertex_id = b.createLoad(vertex_id_); + vertex_id = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, vertex_id); auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction, registers_ptr_, std::vector({b.makeUintConstant(0)})); @@ -464,16 +465,33 @@ void SpirvShaderTranslator::PostTranslation(Shader* shader) { } void SpirvShaderTranslator::PreProcessControlFlowInstruction( - uint32_t cf_index) { + uint32_t cf_index, const ControlFlowInstruction& instr) { auto& b = *builder_; - cf_blocks_[cf_index] = &b.makeNewBlock(); + if (cf_blocks_.find(cf_index) == cf_blocks_.end()) { + CFBlock block; + block.block = &b.makeNewBlock(); + cf_blocks_[cf_index] = block; + } else { + cf_blocks_[cf_index].block = &b.makeNewBlock(); + } + + if (instr.opcode() == ControlFlowOpcode::kCondJmp) { + auto cf_block = cf_blocks_.find(instr.cond_jmp.address()); + if (cf_block == cf_blocks_.end()) { + CFBlock block; + block.prev_dominates = false; + cf_blocks_[instr.cond_jmp.address()] = block; + } else { + cf_block->second.prev_dominates = false; + } + } else if (instr.opcode() == ControlFlowOpcode::kLoopStart) { + // TODO + } } void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { auto& b = *builder_; - - EmitUnimplementedTranslationError(); } void SpirvShaderTranslator::ProcessControlFlowInstructionBegin( @@ -482,7 +500,7 @@ void SpirvShaderTranslator::ProcessControlFlowInstructionBegin( if (cf_index == 0) { // Kind of cheaty, but emit a branch to the first block. - b.createBranch(cf_blocks_[cf_index]); + b.createBranch(cf_blocks_[cf_index].block); } } @@ -507,7 +525,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( predicated_block_end_ = nullptr; // Head has the logic to check if the body should execute. - auto head = cf_blocks_[instr.dword_index]; + auto head = cf_blocks_[instr.dword_index].block; b.setBuildPoint(head); auto body = head; switch (instr.type) { @@ -516,6 +534,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( } break; case ParsedExecInstruction::Type::kConditional: { // Based off of bool_consts + // FIXME: Nvidia compiler is complaining about this. std::vector offsets; offsets.push_back(b.makeUintConstant(2)); // bool_consts offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32)); @@ -532,8 +551,14 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( assert_true(cf_blocks_.size() > instr.dword_index + 1); body = &b.makeNewBlock(); auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, - b.makeUintConstant(uint32_t(instr.condition))); - b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); + b.makeUintConstant(instr.condition ? 1 : 0)); + + auto next_block = cf_blocks_[instr.dword_index + 1]; + if (next_block.prev_dominates) { + b.createNoResultOp(spv::Op::OpSelectionMerge, + {next_block.block->getId(), 0}); + } + b.createConditionalBranch(cond, body, next_block.block); } break; case ParsedExecInstruction::Type::kPredicated: { // Branch based on p0. @@ -542,7 +567,13 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), b.makeBoolConstant(instr.condition)); - b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); + + auto next_block = cf_blocks_[instr.dword_index + 1]; + if (next_block.prev_dominates) { + b.createNoResultOp(spv::Op::OpSelectionMerge, + {next_block.block->getId(), 0}); + } + b.createConditionalBranch(cond, body, next_block.block); } break; } @@ -565,7 +596,7 @@ void SpirvShaderTranslator::ProcessExecInstructionEnd( b.makeReturn(false); } else { assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1]); + b.createBranch(cf_blocks_[instr.dword_index + 1].block); } } @@ -573,7 +604,7 @@ void SpirvShaderTranslator::ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) { auto& b = *builder_; - auto head = cf_blocks_[instr.dword_index]; + auto head = cf_blocks_[instr.dword_index].block; b.setBuildPoint(head); // TODO: Emit a spv LoopMerge @@ -582,27 +613,27 @@ void SpirvShaderTranslator::ProcessLoopStartInstruction( EmitUnimplementedTranslationError(); assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1]); + b.createBranch(cf_blocks_[instr.dword_index + 1].block); } void SpirvShaderTranslator::ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) { auto& b = *builder_; - auto head = cf_blocks_[instr.dword_index]; + auto head = cf_blocks_[instr.dword_index].block; b.setBuildPoint(head); EmitUnimplementedTranslationError(); assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1]); + b.createBranch(cf_blocks_[instr.dword_index + 1].block); } void SpirvShaderTranslator::ProcessCallInstruction( const ParsedCallInstruction& instr) { auto& b = *builder_; - auto head = cf_blocks_[instr.dword_index]; + auto head = cf_blocks_[instr.dword_index].block; b.setBuildPoint(head); // Unused instruction(?) @@ -610,14 +641,14 @@ void SpirvShaderTranslator::ProcessCallInstruction( EmitUnimplementedTranslationError(); assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1]); + b.createBranch(cf_blocks_[instr.dword_index + 1].block); } void SpirvShaderTranslator::ProcessReturnInstruction( const ParsedReturnInstruction& instr) { auto& b = *builder_; - auto head = cf_blocks_[instr.dword_index]; + auto head = cf_blocks_[instr.dword_index].block; b.setBuildPoint(head); // Unused instruction(?) @@ -625,7 +656,7 @@ void SpirvShaderTranslator::ProcessReturnInstruction( EmitUnimplementedTranslationError(); assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1]); + b.createBranch(cf_blocks_[instr.dword_index + 1].block); } // CF jump @@ -633,11 +664,11 @@ void SpirvShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { auto& b = *builder_; - auto head = cf_blocks_[instr.dword_index]; + auto head = cf_blocks_[instr.dword_index].block; b.setBuildPoint(head); switch (instr.type) { case ParsedJumpInstruction::Type::kUnconditional: { - b.createBranch(cf_blocks_[instr.target_address]); + b.createBranch(cf_blocks_[instr.target_address].block); } break; case ParsedJumpInstruction::Type::kConditional: { assert_true(cf_blocks_.size() > instr.dword_index + 1); @@ -652,14 +683,14 @@ void SpirvShaderTranslator::ProcessJumpInstruction( // Bitfield extract the bool constant. v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, - b.makeIntConstant(instr.bool_constant_index % 32), - b.makeIntConstant(1)); + b.makeUintConstant(instr.bool_constant_index % 32), + b.makeUintConstant(1)); // Conditional branch auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, - b.makeUintConstant(uint32_t(instr.condition))); - b.createConditionalBranch(cond, cf_blocks_[instr.target_address], - cf_blocks_[instr.dword_index + 1]); + b.makeUintConstant(instr.condition ? 1 : 0)); + b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block, + cf_blocks_[instr.dword_index + 1].block); } break; case ParsedJumpInstruction::Type::kPredicated: { assert_true(cf_blocks_.size() > instr.dword_index + 1); @@ -667,8 +698,8 @@ void SpirvShaderTranslator::ProcessJumpInstruction( auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), b.makeBoolConstant(instr.condition)); - b.createConditionalBranch(cond, cf_blocks_[instr.target_address], - cf_blocks_[instr.dword_index + 1]); + b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block, + cf_blocks_[instr.dword_index + 1].block); } break; } } @@ -677,7 +708,7 @@ void SpirvShaderTranslator::ProcessAllocInstruction( const ParsedAllocInstruction& instr) { auto& b = *builder_; - auto head = cf_blocks_[instr.dword_index]; + auto head = cf_blocks_[instr.dword_index].block; b.setBuildPoint(head); switch (instr.type) { @@ -695,7 +726,7 @@ void SpirvShaderTranslator::ProcessAllocInstruction( } assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1]); + b.createBranch(cf_blocks_[instr.dword_index + 1].block); } void SpirvShaderTranslator::ProcessVertexFetchInstruction( @@ -725,6 +756,8 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); + b.createNoResultOp(spv::Op::OpSelectionMerge, + {predicated_block_end_->getId(), 0}); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -803,6 +836,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); + b.createNoResultOp(spv::Op::OpSelectionMerge, + {predicated_block_end_->getId(), 0}); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -905,6 +940,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); + b.createNoResultOp(spv::Op::OpSelectionMerge, + {predicated_block_end_->getId(), 0}); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -1339,6 +1376,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); + b.createNoResultOp(spv::Op::OpSelectionMerge, + {predicated_block_end_->getId(), 0}); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -1965,6 +2004,14 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, storage_value = b.createLoad(storage_pointer); } + // Clamp the input value. + if (result.is_clamped) { + source_value_id = CreateGlslStd450InstructionCall( + spv::NoPrecision, b.getTypeId(source_value_id), + spv::GLSLstd450::kFClamp, + {source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)}); + } + // Convert to the appropriate type, if needed. if (b.getTypeId(source_value_id) != storage_type) { std::vector constituents; @@ -1990,14 +2037,6 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, b.createConstructor(spv::NoPrecision, constituents, storage_type); } - // Clamp the input value. - if (result.is_clamped) { - source_value_id = CreateGlslStd450InstructionCall( - spv::NoPrecision, b.getTypeId(source_value_id), - spv::GLSLstd450::kFClamp, - {source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)}); - } - // swizzle if (!result.is_standard_swizzle()) { std::vector operands; diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index f30d3ab44..39d3899c1 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -55,7 +55,8 @@ class SpirvShaderTranslator : public ShaderTranslator { std::vector CompleteTranslation() override; void PostTranslation(Shader* shader) override; - void PreProcessControlFlowInstruction(uint32_t cf_index) override; + void PreProcessControlFlowInstruction( + uint32_t cf_index, const ucode::ControlFlowInstruction& instr) override; void ProcessLabel(uint32_t cf_index) override; void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; @@ -133,7 +134,12 @@ class SpirvShaderTranslator : public ShaderTranslator { // Map of {binding -> {offset -> spv input}} std::map> vertex_binding_map_; - std::map cf_blocks_; + + struct CFBlock { + spv::Block* block = nullptr; + bool prev_dominates = true; + }; + std::map cf_blocks_; }; } // namespace gpu From 6101b70641436c4e9e068434caeca6a7026f8dc0 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 29 Apr 2016 13:09:39 -0500 Subject: [PATCH 110/145] Fix the Vulkan immediate drawer not drawing lines. --- src/xenia/ui/vulkan/vulkan_immediate_drawer.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 23dffd6c6..a68b44c5f 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -538,7 +538,7 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context) pipeline_info.renderPass = context_->swap_chain()->render_pass(); pipeline_info.subpass = 0; pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = 0; + pipeline_info.basePipelineIndex = -1; err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr, &triangle_pipeline_); CheckResult(err, "vkCreateGraphicsPipelines"); @@ -547,7 +547,7 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context) pipeline_info.flags = VK_PIPELINE_CREATE_DERIVATIVE_BIT; input_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; pipeline_info.basePipelineHandle = triangle_pipeline_; - pipeline_info.basePipelineIndex = 0; + pipeline_info.basePipelineIndex = -1; err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr, &line_pipeline_); CheckResult(err, "vkCreateGraphicsPipelines"); @@ -672,9 +672,6 @@ void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { auto swap_chain = context_->swap_chain(); - if (draw.primitive_type != ImmediatePrimitiveType::kTriangles) { - return; - } switch (draw.primitive_type) { case ImmediatePrimitiveType::kLines: vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, From cbccc785cc45eda064eb6e0c7c3beb5ed85e58ee Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 1 May 2016 10:15:33 -0500 Subject: [PATCH 111/145] TraceViewer: Build a tree of all command buffers and display that instead of a flat list. --- src/xenia/gpu/trace_player.cc | 24 ++++--- src/xenia/gpu/trace_player.h | 4 +- src/xenia/gpu/trace_reader.cc | 22 ++++++ src/xenia/gpu/trace_reader.h | 42 ++++++++++++ src/xenia/gpu/trace_viewer.cc | 124 ++++++++++++++++++++++++---------- src/xenia/gpu/trace_viewer.h | 2 + 6 files changed, 171 insertions(+), 47 deletions(-) diff --git a/src/xenia/gpu/trace_player.cc b/src/xenia/gpu/trace_player.cc index 54c199736..b79b49df2 100644 --- a/src/xenia/gpu/trace_player.cc +++ b/src/xenia/gpu/trace_player.cc @@ -51,7 +51,7 @@ void TracePlayer::SeekFrame(int target_frame) { assert_true(frame->start_ptr <= frame->end_ptr); PlayTrace(frame->start_ptr, frame->end_ptr - frame->start_ptr, - TracePlaybackMode::kBreakOnSwap); + TracePlaybackMode::kBreakOnSwap, false); } void TracePlayer::SeekCommand(int target_command) { @@ -71,11 +71,11 @@ void TracePlayer::SeekCommand(int target_command) { const auto& previous_command = frame->commands[previous_command_index]; PlayTrace(previous_command.end_ptr, command.end_ptr - previous_command.end_ptr, - TracePlaybackMode::kBreakOnSwap); + TracePlaybackMode::kBreakOnSwap, false); } else { // Full playback from frame start. PlayTrace(frame->start_ptr, command.end_ptr - frame->start_ptr, - TracePlaybackMode::kBreakOnSwap); + TracePlaybackMode::kBreakOnSwap, true); } } @@ -84,19 +84,25 @@ void TracePlayer::WaitOnPlayback() { } void TracePlayer::PlayTrace(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode) { - graphics_system_->command_processor()->CallInThread( - [this, trace_data, trace_size, playback_mode]() { - PlayTraceOnThread(trace_data, trace_size, playback_mode); - }); + TracePlaybackMode playback_mode, + bool clear_caches) { + playing_trace_ = true; + graphics_system_->command_processor()->CallInThread([=]() { + PlayTraceOnThread(trace_data, trace_size, playback_mode, clear_caches); + }); } void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode) { + TracePlaybackMode playback_mode, + bool clear_caches) { auto memory = graphics_system_->memory(); auto command_processor = graphics_system_->command_processor(); + if (clear_caches) { + command_processor->ClearCaches(); + } + command_processor->set_swap_mode(SwapMode::kIgnored); playback_percent_ = 0; auto trace_end = trace_data + trace_size; diff --git a/src/xenia/gpu/trace_player.h b/src/xenia/gpu/trace_player.h index d3926d460..0c3c6571a 100644 --- a/src/xenia/gpu/trace_player.h +++ b/src/xenia/gpu/trace_player.h @@ -50,9 +50,9 @@ class TracePlayer : public TraceReader { private: void PlayTrace(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode); + TracePlaybackMode playback_mode, bool clear_caches); void PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode); + TracePlaybackMode playback_mode, bool clear_caches); xe::ui::Loop* loop_; GraphicsSystem* graphics_system_; diff --git a/src/xenia/gpu/trace_reader.cc b/src/xenia/gpu/trace_reader.cc index fb58c436b..16980c28c 100644 --- a/src/xenia/gpu/trace_reader.cc +++ b/src/xenia/gpu/trace_reader.cc @@ -75,6 +75,10 @@ void TraceReader::ParseTrace() { const uint8_t* packet_start_ptr = nullptr; const uint8_t* last_ptr = trace_ptr; bool pending_break = false; + auto current_command_buffer = new CommandBuffer(); + current_frame.command_tree = + std::unique_ptr(current_command_buffer); + while (trace_ptr < trace_data_ + trace_size_) { ++current_frame.command_count; auto type = static_cast(xe::load(trace_ptr)); @@ -94,11 +98,24 @@ void TraceReader::ParseTrace() { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd) + cmd->count * 4; + + // Traverse down a level. + auto sub_command_buffer = new CommandBuffer(); + sub_command_buffer->parent = current_command_buffer; + current_command_buffer->commands.push_back( + CommandBuffer::Command(sub_command_buffer)); + current_command_buffer = sub_command_buffer; break; } case TraceCommandType::kIndirectBufferEnd: { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); + + // Go back up a level. If parent is null, this frame started in an + // indirect buffer. + if (current_command_buffer->parent) { + current_command_buffer = current_command_buffer->parent; + } break; } case TraceCommandType::kPacketStart: { @@ -125,6 +142,8 @@ void TraceReader::ParseTrace() { command.end_ptr = trace_ptr; current_frame.commands.push_back(std::move(command)); last_ptr = trace_ptr; + current_command_buffer->commands.push_back(CommandBuffer::Command( + uint32_t(current_frame.commands.size() - 1))); break; } case PacketCategory::kSwap: @@ -136,6 +155,9 @@ void TraceReader::ParseTrace() { if (pending_break) { current_frame.end_ptr = trace_ptr; frames_.push_back(std::move(current_frame)); + current_command_buffer = new CommandBuffer(); + current_frame.command_tree = + std::unique_ptr(current_command_buffer); current_frame.start_ptr = trace_ptr; current_frame.end_ptr = nullptr; current_frame.command_count = 0; diff --git a/src/xenia/gpu/trace_reader.h b/src/xenia/gpu/trace_reader.h index 5445bd1f9..b3245da46 100644 --- a/src/xenia/gpu/trace_reader.h +++ b/src/xenia/gpu/trace_reader.h @@ -11,6 +11,7 @@ #define XENIA_GPU_TRACE_READER_H_ #include +#include #include "xenia/base/mapped_memory.h" #include "xenia/gpu/trace_protocol.h" @@ -51,6 +52,42 @@ namespace gpu { class TraceReader { public: + struct CommandBuffer { + struct Command { + enum class Type { + kCommand, + kBuffer, + }; + + Command() {} + Command(Command&& other) { + type = other.type; + command_id = other.command_id; + command_subtree = std::move(other.command_subtree); + } + Command(CommandBuffer* buf) { + type = Type::kBuffer; + command_subtree = std::unique_ptr(buf); + } + Command(uint32_t id) { + type = Type::kCommand; + command_id = id; + } + ~Command() = default; + + Type type; + uint32_t command_id = -1; + std::unique_ptr command_subtree = nullptr; + }; + + CommandBuffer() {} + ~CommandBuffer() {} + + // Parent command buffer, if one exists. + CommandBuffer* parent = nullptr; + std::vector commands; + }; + struct Frame { struct Command { enum class Type { @@ -74,7 +111,12 @@ class TraceReader { const uint8_t* start_ptr = nullptr; const uint8_t* end_ptr = nullptr; int command_count = 0; + + // Flat list of all commands in this frame. std::vector commands; + + // Tree of all command buffers + std::unique_ptr command_tree; }; TraceReader() = default; diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 7ce20c7ca..8079631f5 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -390,6 +390,66 @@ void TraceViewer::DrawPacketDisassemblerUI() { ImGui::End(); } +int TraceViewer::RecursiveDrawCommandBufferUI( + const TraceReader::Frame* frame, TraceReader::CommandBuffer* buffer) { + int selected_id = -1; + int column_width = int(ImGui::GetContentRegionMax().x); + + for (size_t i = 0; i < buffer->commands.size(); i++) { + switch (buffer->commands[i].type) { + case TraceReader::CommandBuffer::Command::Type::kBuffer: { + auto subtree = buffer->commands[i].command_subtree.get(); + if (!subtree->commands.size()) { + continue; + } + + ImGui::PushID(int(i)); + if (ImGui::TreeNode((void*)0, "Indirect Buffer %d", i)) { + ImGui::Indent(); + auto id = RecursiveDrawCommandBufferUI( + frame, buffer->commands[i].command_subtree.get()); + ImGui::Unindent(); + ImGui::TreePop(); + + if (id != -1) { + selected_id = id; + } + } + ImGui::PopID(); + } break; + + case TraceReader::CommandBuffer::Command::Type::kCommand: { + uint32_t command_id = buffer->commands[i].command_id; + + const auto& command = frame->commands[command_id]; + bool is_selected = command_id == player_->current_command_index(); + const char* label; + switch (command.type) { + case TraceReader::Frame::Command::Type::kDraw: + label = "Draw"; + break; + case TraceReader::Frame::Command::Type::kSwap: + label = "Swap"; + break; + } + + ImGui::PushID(command_id); + if (ImGui::Selectable(label, &is_selected)) { + selected_id = command_id; + } + ImGui::SameLine(column_width - 60.0f); + ImGui::Text("%d", command_id); + ImGui::PopID(); + // if (did_seek && target_command == i) { + // ImGui::SetScrollPosHere(); + // } + } break; + } + } + + return selected_id; +} + void TraceViewer::DrawCommandListUI() { ImGui::SetNextWindowPos(ImVec2(5, 70), ImGuiSetCond_FirstUseEver); if (!ImGui::Begin("Command List", nullptr, ImVec2(200, 640))) { @@ -473,31 +533,12 @@ void TraceViewer::DrawCommandListUI() { ImGui::SetScrollPosHere(); } - for (int i = 0; i < int(frame->commands.size()); ++i) { - ImGui::PushID(i); - is_selected = i == player_->current_command_index(); - const auto& command = frame->commands[i]; - const char* label; - switch (command.type) { - case TraceReader::Frame::Command::Type::kDraw: - label = "Draw"; - break; - case TraceReader::Frame::Command::Type::kSwap: - label = "Swap"; - break; - } - if (ImGui::Selectable(label, &is_selected)) { - if (!player_->is_playing_trace()) { - player_->SeekCommand(i); - } - } - ImGui::SameLine(column_width - 60.0f); - ImGui::Text("%d", i); - ImGui::PopID(); - if (did_seek && target_command == i) { - ImGui::SetScrollPosHere(); - } + auto id = RecursiveDrawCommandBufferUI(frame, frame->command_tree.get()); + if (id != -1 && id != player_->current_command_index() && + !player_->is_playing_trace()) { + player_->SeekCommand(id); } + ImGui::EndChild(); ImGui::End(); } @@ -639,8 +680,8 @@ void TraceViewer::DrawTextureInfo( ImGui::Columns(2); ImVec2 button_size(256, 256); - if (ImGui::ImageButton(ImTextureID(texture | ui::ImGuiDrawer::kIgnoreAlpha), - button_size, ImVec2(0, 0), ImVec2(1, 1))) { + if (ImGui::ImageButton(ImTextureID(texture), button_size, ImVec2(0, 0), + ImVec2(1, 1))) { // show viewer } ImGui::NextColumn(); @@ -1108,11 +1149,14 @@ void TraceViewer::DrawStateUI() { ((window_scissor_br >> 16) & 0x7FFF) - ((window_scissor_tl >> 16) & 0x7FFF)); uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_actual = (surface_info >> 18) & 0x3FFF; uint32_t surface_pitch = surface_info & 0x3FFF; auto surface_msaa = (surface_info >> 16) & 0x3; static const char* kMsaaNames[] = { "1X", "2X", "4X", }; + ImGui::BulletText("Surface Pitch - Actual: %d - %d", surface_pitch, + surface_actual); ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]); uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; @@ -1124,6 +1168,9 @@ void TraceViewer::DrawStateUI() { assert_true(vport_xscale_enable == vport_yscale_enable == vport_zscale_enable == vport_xoffset_enable == vport_yoffset_enable == vport_zoffset_enable); + if (!vport_xscale_enable) { + ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored); + } ImGui::BulletText( "Viewport Offset: %f, %f, %f", vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0, @@ -1134,6 +1181,10 @@ void TraceViewer::DrawStateUI() { vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1, vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1, vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1); + if (!vport_xscale_enable) { + ImGui::PopStyleColor(); + } + ImGui::BulletText("Vertex Format: %s, %s, %s, %s", ((vte_control >> 8) & 0x1) ? "x/w0" : "x", ((vte_control >> 8) & 0x1) ? "y/w0" : "y", @@ -1318,7 +1369,7 @@ void TraceViewer::DrawStateUI() { if (write_mask) { auto color_target = GetColorRenderTarget(surface_pitch, surface_msaa, color_base, color_format); - tex = ImTextureID(color_target | ui::ImGuiDrawer::kIgnoreAlpha); + tex = ImTextureID(color_target); if (ImGui::ImageButton(tex, button_size, ImVec2(0, 0), ImVec2(1, 1))) { // show viewer @@ -1330,10 +1381,9 @@ void TraceViewer::DrawStateUI() { } if (ImGui::IsItemHovered()) { ImGui::BeginTooltip(); - ImGui::Text( - "Color Target %d (%s), base %.4X, pitch %d, msaa %d, format %d", - i, write_mask ? "enabled" : "disabled", color_base, surface_pitch, - surface_msaa, color_format); + ImGui::Text("Color Target %d (%s), base %.4X, pitch %d, format %d", i, + write_mask ? "enabled" : "disabled", color_base, + surface_pitch, color_format); if (tex) { ImVec2 rel_pos; @@ -1407,17 +1457,19 @@ void TraceViewer::DrawStateUI() { auto button_pos = ImGui::GetCursorScreenPos(); ImVec2 button_size(256, 256); - ImGui::ImageButton( - ImTextureID(depth_target | ui::ImGuiDrawer::kIgnoreAlpha), - button_size, ImVec2(0, 0), ImVec2(1, 1)); + ImGui::ImageButton(ImTextureID(depth_target), button_size, ImVec2(0, 0), + ImVec2(1, 1)); if (ImGui::IsItemHovered()) { ImGui::BeginTooltip(); + ImGui::Text("Depth Target: base %.4X, pitch %d, format %d", depth_base, + surface_pitch, depth_format); + ImVec2 rel_pos; rel_pos.x = ImGui::GetMousePos().x - button_pos.x; rel_pos.y = ImGui::GetMousePos().y - button_pos.y; - ZoomedImage(ImTextureID(depth_target | ui::ImGuiDrawer::kIgnoreAlpha), - rel_pos, button_size, 32.f, ImVec2(256, 256)); + ZoomedImage(ImTextureID(depth_target), rel_pos, button_size, 32.f, + ImVec2(256, 256)); ImGui::EndTooltip(); } diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h index 6f7c900fc..7e82ad831 100644 --- a/src/xenia/gpu/trace_viewer.h +++ b/src/xenia/gpu/trace_viewer.h @@ -80,6 +80,8 @@ class TraceViewer { void DrawUI(); void DrawControllerUI(); void DrawPacketDisassemblerUI(); + int RecursiveDrawCommandBufferUI(const TraceReader::Frame* frame, + TraceReader::CommandBuffer* buffer); void DrawCommandListUI(); void DrawStateUI(); From aa038fbf23cb26e411b38252231e0f920c46e56d Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 1 May 2016 15:48:31 -0500 Subject: [PATCH 112/145] Skip the wrapping packet end after parsing IB end (to avoid false draws appearing) --- src/xenia/gpu/trace_reader.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/xenia/gpu/trace_reader.cc b/src/xenia/gpu/trace_reader.cc index 16980c28c..6bedfb9b4 100644 --- a/src/xenia/gpu/trace_reader.cc +++ b/src/xenia/gpu/trace_reader.cc @@ -111,6 +111,11 @@ void TraceReader::ParseTrace() { auto cmd = reinterpret_cast(trace_ptr); trace_ptr += sizeof(*cmd); + // IB packet is wrapped in a kPacketStart/kPacketEnd. Skip the end. + auto end_cmd = reinterpret_cast(trace_ptr); + assert_true(end_cmd->type == TraceCommandType::kPacketEnd); + trace_ptr += sizeof(*cmd); + // Go back up a level. If parent is null, this frame started in an // indirect buffer. if (current_command_buffer->parent) { From d18c99aab6517e560dc2b8f2a022ddea7abf1a35 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 3 May 2016 14:05:34 -0500 Subject: [PATCH 113/145] RenderCache: Account for MSAA when calculating tile sizes. Add a new flag to enable native MSAA (this does not work properly at the moment) --- src/xenia/gpu/vulkan/render_cache.cc | 283 +++++++++++++---------- src/xenia/gpu/vulkan/render_cache.h | 25 +- src/xenia/gpu/vulkan/vulkan_gpu_flags.cc | 3 + src/xenia/gpu/vulkan/vulkan_gpu_flags.h | 2 + 4 files changed, 186 insertions(+), 127 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 3df5e4c9e..7d73951b5 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -149,7 +149,8 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format); } assert_true(vulkan_format != VK_FORMAT_UNDEFINED); - assert_true(bpp == 4); + // FIXME(DrChat): Was this check necessary? + // assert_true(bpp == 4); // Create the image with the desired properties. VkImageCreateInfo image_info; @@ -165,23 +166,25 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, image_info.extent.depth = 1; image_info.mipLevels = 1; image_info.arrayLayers = 1; - // image_info.samples = VK_SAMPLE_COUNT_1_BIT; - //* - auto msaa_samples = static_cast(key.msaa_samples); - switch (msaa_samples) { - case MsaaSamples::k1X: - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - break; - case MsaaSamples::k2X: - image_info.samples = VK_SAMPLE_COUNT_2_BIT; - break; - case MsaaSamples::k4X: - image_info.samples = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(msaa_samples); + if (FLAGS_vulkan_native_msaa) { + auto msaa_samples = static_cast(key.msaa_samples); + switch (msaa_samples) { + case MsaaSamples::k1X: + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + break; + case MsaaSamples::k2X: + image_info.samples = VK_SAMPLE_COUNT_2_BIT; + break; + case MsaaSamples::k4X: + image_info.samples = VK_SAMPLE_COUNT_4_BIT; + break; + default: + assert_unhandled_case(msaa_samples); + } + } else { + image_info.samples = VK_SAMPLE_COUNT_1_BIT; } - //*/ + sample_count = image_info.samples; image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | @@ -243,7 +246,10 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.image = image; - image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_barrier.subresourceRange.aspectMask = + key.color_or_depth + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; image_barrier.subresourceRange.baseMipLevel = 0; image_barrier.subresourceRange.levelCount = 1; image_barrier.subresourceRange.baseArrayLayer = 0; @@ -338,19 +344,23 @@ CachedRenderPass::CachedRenderPass(VkDevice device, std::memcpy(&config, &desired_config, sizeof(config)); VkSampleCountFlagBits sample_count; - switch (desired_config.surface_msaa) { - case MsaaSamples::k1X: - sample_count = VK_SAMPLE_COUNT_1_BIT; - break; - case MsaaSamples::k2X: - sample_count = VK_SAMPLE_COUNT_2_BIT; - break; - case MsaaSamples::k4X: - sample_count = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(desired_config.surface_msaa); - break; + if (FLAGS_vulkan_native_msaa) { + switch (desired_config.surface_msaa) { + case MsaaSamples::k1X: + sample_count = VK_SAMPLE_COUNT_1_BIT; + break; + case MsaaSamples::k2X: + sample_count = VK_SAMPLE_COUNT_2_BIT; + break; + case MsaaSamples::k4X: + sample_count = VK_SAMPLE_COUNT_4_BIT; + break; + default: + assert_unhandled_case(desired_config.surface_msaa); + break; + } + } else { + sample_count = VK_SAMPLE_COUNT_1_BIT; } // Initialize all attachments to default unused. @@ -538,7 +548,7 @@ bool RenderCache::dirty() const { regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; dirty |= cur_regs.pa_sc_window_scissor_br != regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - dirty |= (cur_regs.rb_depthcontrol & (0x4 | 0x2)) != + dirty |= (cur_regs.rb_depthcontrol & (0x4 | 0x2)) < (regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2)); return dirty; } @@ -561,7 +571,6 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, bool dirty = false; dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); @@ -572,7 +581,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); dirty |= - (regs.rb_depthcontrol & (0x4 | 0x2)) != + (regs.rb_depthcontrol & (0x4 | 0x2)) < (register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2)); regs.rb_depthcontrol = register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2); @@ -593,14 +602,8 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, return nullptr; } - // Speculatively see if targets are actually used so we can skip copies - for (int i = 0; i < 4; i++) { - uint32_t color_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; - config->color[i].used = - config->mode_control == xenos::ModeControl::kColorDepth && - color_mask != 0; - } - config->depth_stencil.used = !!(regs.rb_depthcontrol & (0x4 | 0x2)); + // Initial state update. + UpdateState(); current_state_.render_pass = render_pass; current_state_.render_pass_handle = render_pass->handle; @@ -610,7 +613,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, // Depth auto depth_target = current_state_.framebuffer->depth_stencil_attachment; if (depth_target && current_state_.config.depth_stencil.used) { - UpdateTileView(command_buffer, depth_target, true); + // UpdateTileView(command_buffer, depth_target, true); } // Color @@ -620,7 +623,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, continue; } - UpdateTileView(command_buffer, target, true); + // UpdateTileView(command_buffer, target, true); } } if (!render_pass) { @@ -693,12 +696,23 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) { case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; break; + case ColorRenderTargetFormat::k_2_10_10_10_unknown: + config->color[i].format = ColorRenderTargetFormat::k_2_10_10_10; + break; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: + config->color[i].format = ColorRenderTargetFormat::k_2_10_10_10_FLOAT; + break; } + + // Make sure all unknown bits are unset. + // RDR sets bit 0x00400000 + // assert_zero(color_info[i] & ~0x000F0FFF); } } else { for (int i = 0; i < 4; ++i) { config->color[i].edram_base = 0; config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; + config->color[i].used = false; } } @@ -708,9 +722,13 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) { config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF; config->depth_stencil.format = static_cast((regs.rb_depth_info >> 16) & 0x1); + + // Make sure all unknown bits are unset. + // assert_zero(regs.rb_depth_info & ~0x00010FFF); } else { config->depth_stencil.edram_base = 0; config->depth_stencil.format = DepthRenderTargetFormat::kD24S8; + config->depth_stencil.used = false; } return true; @@ -753,15 +771,22 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, // If no framebuffer was found in the cache create a new one. if (!framebuffer) { + uint32_t tile_width = config->surface_msaa == MsaaSamples::k4X ? 40 : 80; + uint32_t tile_height = config->surface_msaa != MsaaSamples::k1X ? 8 : 16; + CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr, nullptr}; for (int i = 0; i < 4; ++i) { TileViewKey color_key; color_key.tile_offset = config->color[i].edram_base; - color_key.tile_width = xe::round_up(config->surface_pitch_px, 80) / 80; - color_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16; + color_key.tile_width = + xe::round_up(config->surface_pitch_px, tile_width) / tile_width; + color_key.tile_height = std::min( + 2560 / tile_height, 160u); // xe::round_up(config->surface_height_px, + // tile_height) / tile_height; color_key.color_or_depth = 1; - color_key.msaa_samples = static_cast(config->surface_msaa); + color_key.msaa_samples = + 0; // static_cast(config->surface_msaa); color_key.edram_format = static_cast(config->color[i].format); target_color_attachments[i] = FindOrCreateTileView(command_buffer, color_key); @@ -774,12 +799,13 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, TileViewKey depth_stencil_key; depth_stencil_key.tile_offset = config->depth_stencil.edram_base; depth_stencil_key.tile_width = - xe::round_up(config->surface_pitch_px, 80) / 80; - depth_stencil_key.tile_height = - xe::round_up(config->surface_height_px, 16) / 16; + xe::round_up(config->surface_pitch_px, tile_width) / tile_width; + depth_stencil_key.tile_height = std::min( + 2560 / tile_height, 160u); // xe::round_up(config->surface_height_px, + // tile_height) / tile_height; depth_stencil_key.color_or_depth = 0; depth_stencil_key.msaa_samples = - static_cast(config->surface_msaa); + 0; // static_cast(config->surface_msaa); depth_stencil_key.edram_format = static_cast(config->depth_stencil.format); auto target_depth_stencil_attachment = @@ -819,6 +845,11 @@ CachedTileView* RenderCache::FindOrCreateTileView( void RenderCache::UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view, bool load, bool insert_barrier) { + uint32_t tile_width = + view->key.msaa_samples == uint16_t(MsaaSamples::k4X) ? 40 : 80; + uint32_t tile_height = + view->key.msaa_samples != uint16_t(MsaaSamples::k1X) ? 8 : 16; + if (insert_barrier) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -834,7 +865,10 @@ void RenderCache::UpdateTileView(VkCommandBuffer command_buffer, barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.buffer = edram_buffer_; barrier.offset = view->key.tile_offset * 5120; - barrier.size = view->key.tile_width * 80 * view->key.tile_height * 16 * 4; + barrier.size = view->key.tile_width * tile_width * view->key.tile_height * + tile_height * view->key.color_or_depth + ? 4 + : 1; vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr); @@ -850,8 +884,8 @@ void RenderCache::UpdateTileView(VkCommandBuffer command_buffer, ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; region.imageOffset = {0, 0, 0}; - region.imageExtent = {view->key.tile_width * 80u, view->key.tile_height * 16u, - 1}; + region.imageExtent = {view->key.tile_width * tile_width, + view->key.tile_height * tile_height, 1}; if (load) { vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image, VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); @@ -912,12 +946,27 @@ void RenderCache::EndRenderPass() { [](CachedTileView const* a, CachedTileView const* b) { return *a < *b; }); for (auto view : cached_views) { - UpdateTileView(current_command_buffer_, view, false, false); + // UpdateTileView(current_command_buffer_, view, false, false); } current_command_buffer_ = nullptr; } +void RenderCache::UpdateState() { + // Keep track of whether color attachments were used or not in this pass. + uint32_t rb_color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t rb_depthcontrol = + register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32; + for (int i = 0; i < 4; i++) { + uint32_t color_mask = (rb_color_mask >> (i * 4)) & 0xF; + current_state_.config.color[i].used |= + current_state_.config.mode_control == xenos::ModeControl::kColorDepth && + color_mask != 0; + } + + current_state_.config.depth_stencil.used |= !!(rb_depthcontrol & (0x4 | 0x2)); +} + void RenderCache::ClearCache() { // TODO(benvanik): caching. } @@ -999,47 +1048,39 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, bool color_or_depth, uint32_t format, VkFilter filter, VkOffset3D offset, VkExtent3D extents) { + if (color_or_depth) { + // Adjust similar formats for easier matching. + switch (static_cast(format)) { + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + format = uint32_t(ColorRenderTargetFormat::k_8_8_8_8); + break; + case ColorRenderTargetFormat::k_2_10_10_10_unknown: + format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10); + break; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: + format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10_FLOAT); + break; + } + } + + uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80; + uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16; + // Grab a tile view that represents the source image. TileViewKey key; key.color_or_depth = color_or_depth ? 1 : 0; - key.msaa_samples = static_cast(num_samples); + key.msaa_samples = 0; // static_cast(num_samples); key.edram_format = format; key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, 80) / 80; - key.tile_height = xe::round_up(height, 16) / 16; + key.tile_width = xe::round_up(pitch, tile_width) / tile_width; + key.tile_height = + std::min(2560 / tile_height, + 160u); // xe::round_up(height, tile_height) / tile_height; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); - // Issue a memory barrier before we update this tile view. - VkBufferMemoryBarrier buffer_barrier; - buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - buffer_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_barrier.buffer = edram_buffer_; - buffer_barrier.offset = edram_base * 5120; - // TODO: Calculate this accurately (need texel size) - buffer_barrier.size = extents.width * extents.height * 4; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &buffer_barrier, 0, nullptr); - - // Update the tile view with current EDRAM contents. - // TODO: Heuristics to determine if this copy is avoidable. - // TODO(DrChat): Stencil copies. - VkBufferImageCopy buffer_copy; - buffer_copy.bufferOffset = edram_base * 5120; - buffer_copy.bufferImageHeight = 0; - buffer_copy.bufferRowLength = 0; - buffer_copy.imageSubresource = {0, 0, 0, 1}; - buffer_copy.imageSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - buffer_copy.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; - buffer_copy.imageOffset = {0, 0, 0}; - vkCmdCopyBufferToImage(command_buffer, edram_buffer_, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, 1, &buffer_copy); + // Update the view with the latest contents. + // UpdateTileView(command_buffer, tile_view, true, true); // Transition the image into a transfer destination layout, if needed. // TODO: Util function for this @@ -1063,11 +1104,11 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, nullptr, 1, &image_barrier); // If we overflow we'll lose the device here. - assert_true(extents.width <= key.tile_width * 80u); - assert_true(extents.height <= key.tile_height * 16u); + assert_true(extents.width <= key.tile_width * tile_width); + assert_true(extents.height <= key.tile_height * tile_height); // Now issue the blit to the destination. - if (num_samples == MsaaSamples::k1X) { + if (tile_view->sample_count == VK_SAMPLE_COUNT_1_BIT) { VkImageBlit image_blit; image_blit.srcSubresource = {0, 0, 0, 1}; image_blit.srcSubresource.aspectMask = @@ -1127,14 +1168,32 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just // need to detect this and calculate a value. + // Adjust similar formats for easier matching. + switch (format) { + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + format = ColorRenderTargetFormat::k_8_8_8_8; + break; + case ColorRenderTargetFormat::k_2_10_10_10_unknown: + format = ColorRenderTargetFormat::k_2_10_10_10; + break; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: + format = ColorRenderTargetFormat::k_2_10_10_10_FLOAT; + break; + } + + uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80; + uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16; + // Grab a tile view (as we need to clear an image first) TileViewKey key; key.color_or_depth = 1; - key.msaa_samples = static_cast(num_samples); + key.msaa_samples = 0; // static_cast(num_samples); key.edram_format = static_cast(format); key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, 80) / 80; - key.tile_height = xe::round_up(height, 16) / 16; + key.tile_width = xe::round_up(pitch, tile_width) / tile_width; + key.tile_height = + std::min(2560 / tile_height, + 160u); // xe::round_up(height, tile_height) / tile_height; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); @@ -1147,16 +1206,7 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); // Copy image back into EDRAM buffer - VkBufferImageCopy copy_range; - copy_range.bufferOffset = edram_base * 5120; - copy_range.bufferImageHeight = 0; - copy_range.bufferRowLength = 0; - copy_range.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; - copy_range.imageOffset = {0, 0, 0}; - vkCmdCopyImageToBuffer(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, - ©_range); + // UpdateTileView(command_buffer, tile_view, false, false); } void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, @@ -1168,14 +1218,19 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just // need to detect this and calculate a value. + uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80; + uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16; + // Grab a tile view (as we need to clear an image first) TileViewKey key; key.color_or_depth = 0; - key.msaa_samples = static_cast(num_samples); + key.msaa_samples = 0; // static_cast(num_samples); key.edram_format = static_cast(format); key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, 80) / 80; - key.tile_height = xe::round_up(height, 16) / 16; + key.tile_width = xe::round_up(pitch, tile_width) / tile_width; + key.tile_height = + std::min(2560 / tile_height, + 160u); // xe::round_up(height, tile_height) / tile_height; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); @@ -1191,19 +1246,7 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); // Copy image back into EDRAM buffer - // TODO(DrChat): Stencil copies. - VkBufferImageCopy copy_range; - copy_range.bufferOffset = edram_base * 5120; - copy_range.bufferImageHeight = 0; - copy_range.bufferRowLength = 0; - copy_range.imageSubresource = { - VK_IMAGE_ASPECT_DEPTH_BIT, 0, 0, 1, - }; - copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; - copy_range.imageOffset = {0, 0, 0}; - vkCmdCopyImageToBuffer(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, - ©_range); + // UpdateTileView(command_buffer, tile_view, false, false); } void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) { diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 86edac7bc..4eeca42bf 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -57,6 +57,8 @@ class CachedTileView { VkImageView image_view = nullptr; // Memory buffer VkDeviceMemory memory = nullptr; + // Image sample count + VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT; CachedTileView(ui::vulkan::VulkanDevice* device, VkCommandBuffer command_buffer, VkDeviceMemory edram_memory, @@ -81,9 +83,9 @@ class CachedTileView { struct RenderConfiguration { // Render mode (color+depth, depth-only, etc). xenos::ModeControl mode_control; - // Target surface pitch, in pixels. + // Target surface pitch multiplied by MSAA, in pixels. uint32_t surface_pitch_px; - // ESTIMATED target surface height, in pixels. + // ESTIMATED target surface height multiplied by MSAA, in pixels. uint32_t surface_height_px; // Surface MSAA setting. MsaaSamples surface_msaa; @@ -111,6 +113,9 @@ struct RenderState { // Target framebuffer bound to the render pass. CachedFramebuffer* framebuffer = nullptr; VkFramebuffer framebuffer_handle = nullptr; + + bool color_attachment_written[4] = {false}; + bool depth_attachment_written = false; }; // Manages the virtualized EDRAM and the render target cache. @@ -135,9 +140,13 @@ struct RenderState { // 320px by rounding up to the next tile. // // MSAA and other settings will modify the exact pixel sizes, like 4X makes -// each tile effectively 40x8px, but they are still all 5120b. As we try to -// emulate this we adjust our viewport when rendering to stretch pixels as -// needed. +// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still +// all 5120b. As we try to emulate this we adjust our viewport when rendering to +// stretch pixels as needed. +// +// It appears that games also take advantage of MSAA stretching tiles when doing +// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then +// later draw to that view with 1X pitch/height and 1X MSAA. // // The good news is that games cannot read EDRAM directly but must use a copy // operation to get the data out. That gives us a chance to do whatever we @@ -269,6 +278,9 @@ class RenderCache { // The command buffer will be transitioned out of the render pass phase. void EndRenderPass(); + // Updates current render state. Call this every draw with an open render pass + void UpdateState(); + // Clears all cached content. void ClearCache(); @@ -346,13 +358,12 @@ class RenderCache { struct ShadowRegisters { uint32_t rb_modecontrol; uint32_t rb_surface_info; - uint32_t rb_color_mask; uint32_t rb_color_info; uint32_t rb_color1_info; uint32_t rb_color2_info; uint32_t rb_color3_info; - uint32_t rb_depthcontrol; uint32_t rb_depth_info; + uint32_t rb_depthcontrol; uint32_t pa_sc_window_scissor_tl; uint32_t pa_sc_window_scissor_br; diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc index 1f018db54..52bc10c84 100644 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc @@ -11,3 +11,6 @@ DEFINE_bool(vulkan_renderdoc_capture_all, false, "Capture everything with RenderDoc."); +DEFINE_bool(vulkan_native_msaa, true, "Use native MSAA"); +DEFINE_bool(vulkan_dump_disasm, false, + "Dump shader disassembly. NVIDIA only supported."); diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h index ca83dfb7a..169e797c8 100644 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h @@ -15,5 +15,7 @@ #define FINE_GRAINED_DRAW_SCOPES 1 DECLARE_bool(vulkan_renderdoc_capture_all); +DECLARE_bool(vulkan_native_msaa); +DECLARE_bool(vulkan_dump_disasm); #endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ From 8e8df2e778e59d20350889ea46c1163f0f499921 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 3 May 2016 14:07:20 -0500 Subject: [PATCH 114/145] PipelineCache: Support shader disasm dumps for nvidia cards. Fix MSAA 2X multiplier. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 169 ++++++++++++++++++++----- src/xenia/gpu/vulkan/pipeline_cache.h | 3 + 2 files changed, 138 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 19db3cd4f..70054f5e2 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -17,6 +17,9 @@ #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" +#include +#include + namespace xe { namespace gpu { namespace vulkan { @@ -169,9 +172,9 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, } if (shader->is_valid()) { - XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s", + XELOGGPU("Generated %s shader at 0x%.8X (%db) - hash %.16" PRIX64 ":\n%s\n", shader_type == ShaderType::kVertex ? "vertex" : "pixel", - guest_address, dword_count * 4, + guest_address, dword_count * 4, shader->ucode_data_hash(), shader->ucode_disassembly().c_str()); } @@ -288,12 +291,105 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, &pipeline_info, nullptr, &pipeline); CheckResult(err, "vkCreateGraphicsPipelines"); + // Dump shader disassembly. + if (FLAGS_vulkan_dump_disasm) { + DumpShaderDisasmNV(pipeline_info); + } + // Add to cache with the hash key for reuse. cached_pipelines_.insert({hash_key, pipeline}); return pipeline; } +void PipelineCache::DumpShaderDisasmNV( + const VkGraphicsPipelineCreateInfo& pipeline_info) { + // !! HACK !!: This only works on NVidia drivers. Dumps shader disasm. + // This code is super ugly. Update this when NVidia includes an official + // way to dump shader disassembly. + + VkPipelineCacheCreateInfo pipeline_cache_info; + VkPipelineCache dummy_pipeline_cache; + pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + pipeline_cache_info.pNext = nullptr; + pipeline_cache_info.flags = 0; + pipeline_cache_info.initialDataSize = 0; + pipeline_cache_info.pInitialData = nullptr; + auto err = vkCreatePipelineCache(device_, &pipeline_cache_info, nullptr, + &dummy_pipeline_cache); + CheckResult(err, "vkCreatePipelineCache"); + + // Create a pipeline on the dummy cache and dump it. + VkPipeline dummy_pipeline; + err = vkCreateGraphicsPipelines(device_, dummy_pipeline_cache, 1, + &pipeline_info, nullptr, &dummy_pipeline); + + std::vector pipeline_data; + size_t data_size = 0; + err = vkGetPipelineCacheData(device_, dummy_pipeline_cache, &data_size, + nullptr); + if (err == VK_SUCCESS) { + pipeline_data.resize(data_size); + vkGetPipelineCacheData(device_, dummy_pipeline_cache, &data_size, + pipeline_data.data()); + + // Scan the data for the disassembly. + std::string disasm_vp, disasm_fp; + + const char* disasm_start_vp = nullptr; + const char* disasm_start_fp = nullptr; + size_t search_offset = 0; + const char* search_start = + reinterpret_cast(pipeline_data.data()); + while (true) { + auto p = reinterpret_cast( + memchr(pipeline_data.data() + search_offset, '!', + pipeline_data.size() - search_offset)); + if (!p) { + break; + } + if (!strncmp(p, "!!NV", 4)) { + if (!strncmp(p + 4, "vp", 2)) { + disasm_start_vp = p; + } else if (!strncmp(p + 4, "fp", 2)) { + disasm_start_fp = p; + } + + if (disasm_start_fp && disasm_start_vp) { + // Found all we needed. + break; + } + } + search_offset = p - search_start; + ++search_offset; + } + if (disasm_start_vp) { + disasm_vp = std::string(disasm_start_vp); + + // For some reason there's question marks all over the code. + disasm_vp.erase(std::remove(disasm_vp.begin(), disasm_vp.end(), '?'), + disasm_vp.end()); + } else { + disasm_vp = std::string("Shader disassembly not available."); + } + + if (disasm_start_fp) { + disasm_fp = std::string(disasm_start_fp); + + // For some reason there's question marks all over the code. + disasm_fp.erase(std::remove(disasm_fp.begin(), disasm_fp.end(), '?'), + disasm_fp.end()); + } else { + disasm_fp = std::string("Shader disassembly not available."); + } + + XELOGI("%s\n=====================================\n%s", disasm_vp.c_str(), + disasm_fp.c_str()); + } + + vkDestroyPipelineCache(device_, dummy_pipeline_cache, nullptr); +} + VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type, bool is_line_mode) { switch (primitive_type) { @@ -396,22 +492,18 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, XE_GPU_REG_PA_CL_VPORT_ZSCALE); if (viewport_state_dirty) { - // HACK: no clue where to get these values. // RB_SURFACE_INFO auto surface_msaa = static_cast((regs.rb_surface_info >> 16) & 0x3); - // TODO(benvanik): ?? - // FIXME: Some games depend on these for proper clears (e.g. only clearing - // half the size they actually want with 4x MSAA), but others don't. - // Figure out how these games are expecting clears to be done. + + // Apply a multiplier to emulate MSAA. float window_width_scalar = 1; float window_height_scalar = 1; switch (surface_msaa) { case MsaaSamples::k1X: break; case MsaaSamples::k2X: - // ?? - window_width_scalar = window_height_scalar = 1.41421356f; + window_height_scalar = 2; break; case MsaaSamples::k4X: window_width_scalar = window_height_scalar = 2; @@ -770,11 +862,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( : VK_FORMAT_A2R10G10B10_UNORM_PACK32; break; case VertexFormat::k_10_11_11: - // assert_always("unsupported?"); + assert_true(is_signed); vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; break; case VertexFormat::k_11_11_10: - assert_true(is_signed); + // Converted in-shader. + // TODO(DrChat) + // vertex_attrib_descr.format = VK_FORMAT_R32_UINT; vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; break; case VertexFormat::k_16_16: @@ -946,6 +1040,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); + dirty |= SetShadowRegister(®s.pa_sc_viz_query, XE_GPU_REG_PA_SC_VIZ_QUERY); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); @@ -964,12 +1059,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( // Discard rasterizer output in depth-only mode. // TODO(DrChat): Figure out how to make this work properly. - /* auto enable_mode = static_cast(regs.rb_modecontrol & 0x7); state_info.rasterizerDiscardEnable = enable_mode == xenos::ModeControl::kColorDepth ? VK_FALSE : VK_TRUE; - //*/ - state_info.rasterizerDiscardEnable = VK_FALSE; + + // KILL_PIX_POST_EARLY_Z + if (regs.pa_sc_viz_query & 0x80) { + state_info.rasterizerDiscardEnable = VK_TRUE; + } bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; if (poly_mode) { @@ -1039,27 +1136,31 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() { state_info.pNext = nullptr; state_info.flags = 0; - // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES - // PA_SU_SC_MODE_CNTL MSAA_ENABLE - // state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - //* - auto msaa_num_samples = - static_cast((regs.rb_surface_info >> 16) & 0x3); - switch (msaa_num_samples) { - case MsaaSamples::k1X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - break; - case MsaaSamples::k2X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT; - break; - case MsaaSamples::k4X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(msaa_num_samples); - break; + // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES (0x7) + // PA_SC_AA_MASK (0xFFFF) + // PA_SU_SC_MODE_CNTL MSAA_ENABLE (0x10000) + // If set, all samples will be sampled at set locations. Otherwise, they're + // all sampled from the pixel center. + if (FLAGS_vulkan_native_msaa) { + auto msaa_num_samples = + static_cast((regs.rb_surface_info >> 16) & 0x3); + switch (msaa_num_samples) { + case MsaaSamples::k1X: + state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + break; + case MsaaSamples::k2X: + state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT; + break; + case MsaaSamples::k4X: + state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT; + break; + default: + assert_unhandled_case(msaa_num_samples); + break; + } + } else { + state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; } - //*/ state_info.sampleShadingEnable = VK_FALSE; state_info.minSampleShading = 0; diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index f240b9c0d..e5645f638 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -75,6 +75,8 @@ class PipelineCache { // state. VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); + void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info); + // Gets a geometry shader used to emulate the given primitive type. // Returns nullptr if the primitive doesn't need to be emulated. VkShaderModule GetGeometryShader(PrimitiveType primitive_type, @@ -210,6 +212,7 @@ class PipelineCache { uint32_t pa_su_sc_mode_cntl; uint32_t pa_sc_screen_scissor_tl; uint32_t pa_sc_screen_scissor_br; + uint32_t pa_sc_viz_query; uint32_t multi_prim_ib_reset_index; uint32_t rb_modecontrol; From f2af28c3228857912a1b8da5db7cca6029dc1d3f Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 3 May 2016 14:10:15 -0500 Subject: [PATCH 115/145] TextureCache: Fix up some synchronization flaws (deleting in-use textures/etc) Fix texture binding IDs not matching fetch instruction IDs. Fix some bad texture format matching. Add access watches --- src/xenia/gpu/vulkan/texture_cache.cc | 362 ++++++++++++++++---------- src/xenia/gpu/vulkan/texture_cache.h | 23 +- 2 files changed, 245 insertions(+), 140 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 0deddf36d..ee82cb74a 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -50,9 +50,9 @@ static const TextureConfig texture_configs[64] = { {TextureFormat::k_4_4_4_4, VK_FORMAT_R4G4B4A4_UNORM_PACK16}, {TextureFormat::k_10_11_11, VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? {TextureFormat::k_11_11_10, VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? - {TextureFormat::k_DXT1, VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // ? - {TextureFormat::k_DXT2_3, VK_FORMAT_BC3_SRGB_BLOCK}, // ? - {TextureFormat::k_DXT4_5, VK_FORMAT_BC5_UNORM_BLOCK}, // ? + {TextureFormat::k_DXT1, VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, + {TextureFormat::k_DXT2_3, VK_FORMAT_BC2_SRGB_BLOCK}, + {TextureFormat::k_DXT4_5, VK_FORMAT_BC3_SRGB_BLOCK}, {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED}, {TextureFormat::k_24_8, VK_FORMAT_D24_UNORM_S8_UINT}, {TextureFormat::k_24_8_FLOAT, VK_FORMAT_D24_UNORM_S8_UINT}, // ? @@ -81,14 +81,13 @@ static const TextureConfig texture_configs[64] = { {TextureFormat::k_16_INTERLACED, VK_FORMAT_UNDEFINED}, {TextureFormat::k_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED}, {TextureFormat::k_16_16_MPEG_INTERLACED, VK_FORMAT_UNDEFINED}, - {TextureFormat::k_DXN, VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RG_RGTC2 */}, + + // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf + {TextureFormat::k_DXN, VK_FORMAT_BC5_UNORM_BLOCK}, // ? {TextureFormat::k_8_8_8_8_AS_16_16_16_16, VK_FORMAT_R8G8B8A8_UNORM}, - {TextureFormat::k_DXT1_AS_16_16_16_16, - VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGB_S3TC_DXT1_EXT */}, - {TextureFormat::k_DXT2_3_AS_16_16_16_16, - VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT3_EXT */}, - {TextureFormat::k_DXT4_5_AS_16_16_16_16, - VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT5_EXT */}, + {TextureFormat::k_DXT1_AS_16_16_16_16, VK_FORMAT_BC1_RGB_SRGB_BLOCK}, + {TextureFormat::k_DXT2_3_AS_16_16_16_16, VK_FORMAT_BC2_SRGB_BLOCK}, + {TextureFormat::k_DXT4_5_AS_16_16_16_16, VK_FORMAT_BC3_SRGB_BLOCK}, {TextureFormat::k_2_10_10_10_AS_16_16_16_16, VK_FORMAT_A2R10G10B10_UNORM_PACK32}, {TextureFormat::k_10_11_11_AS_16_16_16_16, @@ -96,10 +95,8 @@ static const TextureConfig texture_configs[64] = { {TextureFormat::k_11_11_10_AS_16_16_16_16, VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? {TextureFormat::k_32_32_32_FLOAT, VK_FORMAT_R32G32B32_SFLOAT}, - {TextureFormat::k_DXT3A, - VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT3_EXT */}, - {TextureFormat::k_DXT5A, - VK_FORMAT_UNDEFINED /* GL_COMPRESSED_RGBA_S3TC_DXT5_EXT */}, + {TextureFormat::k_DXT3A, VK_FORMAT_UNDEFINED}, + {TextureFormat::k_DXT5A, VK_FORMAT_UNDEFINED}, {TextureFormat::k_CTX1, VK_FORMAT_UNDEFINED}, {TextureFormat::k_DXT3A_AS_1_1_1_1, VK_FORMAT_UNDEFINED}, {TextureFormat::kUnknown, VK_FORMAT_UNDEFINED}, @@ -120,10 +117,10 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, descriptor_pool_info.pNext = nullptr; descriptor_pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - descriptor_pool_info.maxSets = 4096; + descriptor_pool_info.maxSets = 8192; VkDescriptorPoolSize pool_sizes[1]; pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[0].descriptorCount = 4096; + pool_sizes[0].descriptorCount = 8192; descriptor_pool_info.poolSizeCount = 1; descriptor_pool_info.pPoolSizes = pool_sizes; auto err = vkCreateDescriptorPool(*device_, &descriptor_pool_info, nullptr, @@ -301,11 +298,21 @@ TextureCache::Texture* TextureCache::AllocateTexture( } bool TextureCache::FreeTexture(Texture* texture) { + if (texture->in_flight_fence->status() != VK_SUCCESS) { + // Texture still in flight. + return false; + } + for (auto it = texture->views.begin(); it != texture->views.end();) { vkDestroyImageView(*device_, (*it)->view, nullptr); it = texture->views.erase(it); } + if (texture->access_watch_handle) { + memory_->CancelAccessWatch(texture->access_watch_handle); + texture->access_watch_handle = 0; + } + vkDestroyImage(*device_, texture->image, nullptr); vkFreeMemory(*device_, texture->image_memory, nullptr); delete texture; @@ -326,6 +333,25 @@ TextureCache::Texture* TextureCache::DemandResolveTexture( // No texture at this location. Make a new one. texture = AllocateTexture(texture_info); texture->is_full_texture = false; + + // Setup an access watch. If this texture is touched, it is destroyed. + texture->access_watch_handle = memory_->AddPhysicalAccessWatch( + texture_info.guest_address, texture_info.input_length, + cpu::MMIOHandler::kWatchWrite, + [](void* context_ptr, void* data_ptr, uint32_t address) { + auto self = reinterpret_cast(context_ptr); + auto touched_texture = reinterpret_cast(data_ptr); + // Clear watch handle first so we don't redundantly + // remove. + touched_texture->access_watch_handle = 0; + touched_texture->pending_invalidation = true; + // Add to pending list so Scavenge will clean it up. + self->invalidated_resolve_textures_mutex_.lock(); + self->invalidated_resolve_textures_.push_back(touched_texture); + self->invalidated_resolve_textures_mutex_.unlock(); + }, + this, texture); + resolve_textures_.push_back(texture); return texture; } @@ -337,6 +363,12 @@ TextureCache::Texture* TextureCache::Demand( auto texture_hash = texture_info.hash(); for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { if (it->second->texture_info == texture_info) { + if (it->second->pending_invalidation) { + // This texture has been invalidated! + Scavenge(); + break; + } + return it->second; } } @@ -355,6 +387,25 @@ TextureCache::Texture* TextureCache::Demand( // Upgrade this texture to a full texture. texture->is_full_texture = true; texture->texture_info = texture_info; + + memory_->CancelAccessWatch(texture->access_watch_handle); + texture->access_watch_handle = memory_->AddPhysicalAccessWatch( + texture_info.guest_address, texture_info.input_length, + cpu::MMIOHandler::kWatchWrite, + [](void* context_ptr, void* data_ptr, uint32_t address) { + auto self = reinterpret_cast(context_ptr); + auto touched_texture = reinterpret_cast(data_ptr); + // Clear watch handle first so we don't redundantly + // remove. + touched_texture->access_watch_handle = 0; + touched_texture->pending_invalidation = true; + // Add to pending list so Scavenge will clean it up. + self->invalidated_textures_mutex_.lock(); + self->invalidated_textures_->push_back(touched_texture); + self->invalidated_textures_mutex_.unlock(); + }, + this, texture); + textures_[texture_hash] = *it; it = resolve_textures_.erase(it); return textures_[texture_hash]; @@ -367,6 +418,11 @@ TextureCache::Texture* TextureCache::Demand( return nullptr; } + if (texture_info.dimension != Dimension::k2D) { + // Abort. + return nullptr; + } + // Create a new texture and cache it. auto texture = AllocateTexture(texture_info); if (!texture) { @@ -388,31 +444,25 @@ TextureCache::Texture* TextureCache::Demand( if (!uploaded) { // TODO: Destroy the texture. - assert_always(); + FreeTexture(texture); return nullptr; } // Copy in overlapping resolve textures. - /* - for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); - ++it) { - auto texture = (*it); - if (texture_info.guest_address == texture->texture_info.guest_address && - texture_info.size_2d.logical_width == - texture->texture_info.size_2d.logical_width && - texture_info.size_2d.logical_height == - texture->texture_info.size_2d.logical_height) { - // Exact match. - // TODO: Lazy match (at an offset) - // Upgrade this texture to a full texture. - texture->is_full_texture = true; - texture->texture_info = texture_info; - textures_[texture_hash] = *it; - it = resolve_textures_.erase(it); - return textures_[texture_hash]; + // FIXME: RDR appears to take textures from small chunks of a resolve texture? + if (texture_info.dimension == Dimension::k2D) { + for (auto it = resolve_textures_.begin(); it != resolve_textures_.end(); + ++it) { + auto texture = (*it); + if (texture_info.guest_address >= texture->texture_info.guest_address && + texture_info.guest_address < texture->texture_info.guest_address + + texture->texture_info.input_length) { + // Lazy matched a resolve texture. Copy it in and destroy it. + // Future resolves will just copy directly into this texture. + // assert_always(); + } } } - */ // Though we didn't find an exact match, that doesn't mean we're out of the // woods yet. This texture could either be a portion of another texture or @@ -594,8 +644,36 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { address_mode_map[static_cast(sampler_info.clamp_w)]; sampler_create_info.mipLodBias = 0.0f; - sampler_create_info.anisotropyEnable = VK_FALSE; - sampler_create_info.maxAnisotropy = 1.0f; + + float aniso = 0.f; + switch (sampler_info.aniso_filter) { + case AnisoFilter::kDisabled: + aniso = 1.0f; + break; + case AnisoFilter::kMax_1_1: + aniso = 1.0f; + break; + case AnisoFilter::kMax_2_1: + aniso = 2.0f; + break; + case AnisoFilter::kMax_4_1: + aniso = 4.0f; + break; + case AnisoFilter::kMax_8_1: + aniso = 8.0f; + break; + case AnisoFilter::kMax_16_1: + aniso = 16.0f; + break; + default: + assert_unhandled_case(aniso); + return nullptr; + } + + sampler_create_info.anisotropyEnable = + sampler_info.aniso_filter != AnisoFilter::kDisabled ? VK_TRUE : VK_FALSE; + sampler_create_info.maxAnisotropy = aniso; + sampler_create_info.compareEnable = VK_FALSE; sampler_create_info.compareOp = VK_COMPARE_OP_NEVER; sampler_create_info.minLod = 0.0f; @@ -758,7 +836,6 @@ bool TextureCache::UploadTexture2D( uint32_t offset_x; uint32_t offset_y; TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y); - auto bpp = (bytes_per_block >> 2) + ((bytes_per_block >> 1) >> (bytes_per_block >> 2)); for (uint32_t y = 0, output_base_offset = 0; @@ -783,6 +860,7 @@ bool TextureCache::UploadTexture2D( // Insert a memory barrier into the command buffer to ensure the upload has // finished before we copy it into the destination texture. + /* VkBufferMemoryBarrier upload_barrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, NULL, @@ -797,6 +875,7 @@ bool TextureCache::UploadTexture2D( vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, &upload_barrier, 0, nullptr); + //*/ // Transition the texture into a transfer destination layout. VkImageMemoryBarrier barrier; @@ -805,7 +884,7 @@ bool TextureCache::UploadTexture2D( barrier.srcAccessMask = 0; barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT; - barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.oldLayout = dest->image_layout; barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -849,10 +928,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( // Clear state. auto update_set_info = &update_set_info_; update_set_info->has_setup_fetch_mask = 0; - update_set_info->image_1d_write_count = 0; - update_set_info->image_2d_write_count = 0; - update_set_info->image_3d_write_count = 0; - update_set_info->image_cube_write_count = 0; + update_set_info->image_write_count = 0; std::memset(update_set_info, 0, sizeof(update_set_info_)); @@ -885,60 +961,75 @@ VkDescriptorSet TextureCache::PrepareTextureSet( // Write all updated descriptors. // TODO(benvanik): optimize? split into multiple sets? set per type? - VkWriteDescriptorSet descriptor_writes[4]; - std::memset(descriptor_writes, 0, sizeof(descriptor_writes)); - uint32_t descriptor_write_count = 0; - // FIXME: These are not be lined up properly with tf binding points!!!!! - if (update_set_info->image_1d_write_count) { - auto& image_write = descriptor_writes[descriptor_write_count++]; - image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - image_write.pNext = nullptr; - image_write.dstSet = descriptor_set; - image_write.dstBinding = 0; - image_write.dstArrayElement = 0; - image_write.descriptorCount = update_set_info->image_1d_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - image_write.pImageInfo = update_set_info->image_1d_infos; - } - if (update_set_info->image_2d_write_count) { - auto& image_write = descriptor_writes[descriptor_write_count++]; - image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - image_write.pNext = nullptr; - image_write.dstSet = descriptor_set; - image_write.dstBinding = 1; - image_write.dstArrayElement = 0; - image_write.descriptorCount = update_set_info->image_2d_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - image_write.pImageInfo = update_set_info->image_2d_infos; - } - if (update_set_info->image_3d_write_count) { - auto& image_write = descriptor_writes[descriptor_write_count++]; - image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - image_write.pNext = nullptr; - image_write.dstSet = descriptor_set; - image_write.dstBinding = 2; - image_write.dstArrayElement = 0; - image_write.descriptorCount = update_set_info->image_3d_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - image_write.pImageInfo = update_set_info->image_3d_infos; - } - if (update_set_info->image_cube_write_count) { - auto& image_write = descriptor_writes[descriptor_write_count++]; - image_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - image_write.pNext = nullptr; - image_write.dstSet = descriptor_set; - image_write.dstBinding = 3; - image_write.dstArrayElement = 0; - image_write.descriptorCount = update_set_info->image_cube_write_count; - image_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - image_write.pImageInfo = update_set_info->image_cube_infos; - } - if (descriptor_write_count) { - vkUpdateDescriptorSets(*device_, descriptor_write_count, descriptor_writes, - 0, nullptr); + // First: Reorganize and pool image update infos. + struct DescriptorInfo { + Dimension dimension; + uint32_t tf_binding_base; + std::vector infos; + }; + + std::vector descriptor_update_infos; + for (uint32_t i = 0; i < update_set_info->image_write_count; i++) { + auto& image_info = update_set_info->image_infos[i]; + if (descriptor_update_infos.size() > 0) { + // Check last write to see if we can pool more into it. + DescriptorInfo& last_write = + descriptor_update_infos[descriptor_update_infos.size() - 1]; + if (last_write.dimension == image_info.dimension && + last_write.tf_binding_base + last_write.infos.size() == + image_info.tf_binding) { + // Compatible! Pool into it. + last_write.infos.push_back(image_info.info); + continue; + } + } + + // Push a new descriptor write entry. + DescriptorInfo desc_info; + desc_info.dimension = image_info.dimension; + desc_info.tf_binding_base = image_info.tf_binding; + desc_info.infos.push_back(image_info.info); + descriptor_update_infos.push_back(desc_info); } - in_flight_sets_.push_back({descriptor_set, completion_fence}); + // Finalize the writes so they're consumable by Vulkan. + std::vector descriptor_writes; + descriptor_writes.resize(descriptor_update_infos.size()); + for (size_t i = 0; i < descriptor_update_infos.size(); i++) { + auto& update_info = descriptor_update_infos[i]; + auto& write_info = descriptor_writes[i]; + std::memset(&write_info, 0, sizeof(VkWriteDescriptorSet)); + + write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_info.dstSet = descriptor_set; + + switch (update_info.dimension) { + case Dimension::k1D: + write_info.dstBinding = 0; + break; + case Dimension::k2D: + write_info.dstBinding = 1; + break; + case Dimension::k3D: + write_info.dstBinding = 2; + break; + case Dimension::kCube: + write_info.dstBinding = 3; + break; + } + + write_info.dstArrayElement = update_info.tf_binding_base; + write_info.descriptorCount = uint32_t(update_info.infos.size()); + write_info.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write_info.pImageInfo = update_info.infos.data(); + } + + if (descriptor_writes.size() > 0) { + vkUpdateDescriptorSets(*device_, uint32_t(descriptor_writes.size()), + descriptor_writes.data(), 0, nullptr); + } + + in_flight_sets_[descriptor_set] = completion_fence; return descriptor_set; } @@ -991,7 +1082,7 @@ bool TextureCache::SetupTextureBinding( auto texture = Demand(texture_info, command_buffer, completion_fence); auto sampler = Demand(sampler_info); - assert_true(texture != nullptr && sampler != nullptr); + // assert_true(texture != nullptr && sampler != nullptr); if (texture == nullptr || sampler == nullptr) { return false; } @@ -1002,35 +1093,14 @@ bool TextureCache::SetupTextureBinding( trace_writer_->WriteMemoryRead(texture_info.guest_address, texture_info.input_length); - VkDescriptorImageInfo* image_write = nullptr; - switch (texture_info.dimension) { - case Dimension::k1D: - image_write = - &update_set_info - ->image_1d_infos[update_set_info->image_1d_write_count++]; - break; - case Dimension::k2D: - image_write = - &update_set_info - ->image_2d_infos[update_set_info->image_2d_write_count++]; - break; - case Dimension::k3D: - image_write = - &update_set_info - ->image_3d_infos[update_set_info->image_3d_write_count++]; - break; - case Dimension::kCube: - image_write = - &update_set_info - ->image_cube_infos[update_set_info->image_cube_write_count++]; - break; - default: - assert_unhandled_case(texture_info.dimension); - return false; - } - image_write->imageView = view->view; - image_write->imageLayout = texture->image_layout; - image_write->sampler = sampler->sampler; + auto image_write = + &update_set_info->image_infos[update_set_info->image_write_count++]; + image_write->dimension = texture_info.dimension; + image_write->tf_binding = binding.fetch_constant; + image_write->info.imageView = view->view; + image_write->info.imageLayout = texture->image_layout; + image_write->info.sampler = sampler->sampler; + texture->in_flight_fence = completion_fence; return true; } @@ -1054,6 +1124,18 @@ void TextureCache::Scavenge() { staging_buffer_.Scavenge(); + // Kill all pending delete textures. + if (!pending_delete_textures_.empty()) { + for (auto it = pending_delete_textures_.begin(); + it != pending_delete_textures_.end();) { + if (!FreeTexture(*it)) { + break; + } + + it = pending_delete_textures_.erase(it); + } + } + // Clean up any invalidated textures. invalidated_textures_mutex_.lock(); std::vector& invalidated_textures = *invalidated_textures_; @@ -1063,15 +1145,33 @@ void TextureCache::Scavenge() { invalidated_textures_ = &invalidated_textures_sets_[0]; } invalidated_textures_mutex_.unlock(); - if (invalidated_textures.empty()) { - return; + if (!invalidated_textures.empty()) { + for (auto it = invalidated_textures.begin(); + it != invalidated_textures.end(); ++it) { + if (!FreeTexture(*it)) { + // Texture wasn't deleted because it's still in use. + pending_delete_textures_.push_back(*it); + } + + textures_.erase((*it)->texture_info.hash()); + } + + invalidated_textures.clear(); } - for (auto& texture : invalidated_textures) { - textures_.erase(texture->texture_info.hash()); - FreeTexture(texture); + invalidated_resolve_textures_mutex_.lock(); + if (!invalidated_resolve_textures_.empty()) { + for (auto it = invalidated_resolve_textures_.begin(); + it != invalidated_resolve_textures_.end(); ++it) { + if (!FreeTexture(*it)) { + // Texture wasn't deleted because it's still in use. + pending_delete_textures_.push_back(*it); + } + } + + invalidated_resolve_textures_.clear(); } - invalidated_textures.clear(); + invalidated_resolve_textures_mutex_.unlock(); } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index b564fcc48..a78be6ed6 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -50,6 +50,9 @@ class TextureCache { uintptr_t access_watch_handle; bool pending_invalidation; + + // Pointer to the latest usage fence. + std::shared_ptr in_flight_fence; }; struct TextureView { @@ -168,30 +171,32 @@ class TextureCache { VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; - std::vector>> + std::unordered_map> in_flight_sets_; ui::vulkan::CircularBuffer staging_buffer_; std::unordered_map textures_; std::unordered_map samplers_; std::vector resolve_textures_; + std::vector pending_delete_textures_; std::mutex invalidated_textures_mutex_; std::vector* invalidated_textures_; std::vector invalidated_textures_sets_[2]; + std::mutex invalidated_resolve_textures_mutex_; + std::vector invalidated_resolve_textures_; + struct UpdateSetInfo { // Bitmap of all 32 fetch constants and whether they have been setup yet. // This prevents duplication across the vertex and pixel shader. uint32_t has_setup_fetch_mask; - uint32_t image_1d_write_count = 0; - VkDescriptorImageInfo image_1d_infos[32]; - uint32_t image_2d_write_count = 0; - VkDescriptorImageInfo image_2d_infos[32]; - uint32_t image_3d_write_count = 0; - VkDescriptorImageInfo image_3d_infos[32]; - uint32_t image_cube_write_count = 0; - VkDescriptorImageInfo image_cube_infos[32]; + uint32_t image_write_count = 0; + struct ImageSetInfo { + Dimension dimension; + uint32_t tf_binding; + VkDescriptorImageInfo info; + } image_infos[32]; } update_set_info_; }; From 7c5042add71ae6cc672ce6b5557ee3ef8636730c Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 3 May 2016 14:12:05 -0500 Subject: [PATCH 116/145] Vulkan CP: Add in separate swap-chain images Some other changes I can't remember --- .../gpu/vulkan/vulkan_command_processor.cc | 330 +++++++++++++----- .../gpu/vulkan/vulkan_command_processor.h | 14 +- .../gpu/vulkan/vulkan_graphics_system.cc | 22 +- 3 files changed, 278 insertions(+), 88 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index fd604733b..011c5b878 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -29,7 +29,7 @@ namespace vulkan { using namespace xe::gpu::xenos; using xe::ui::vulkan::CheckResult; -constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024; +constexpr size_t kDefaultBufferCacheCapacity = 128 * 1024 * 1024; VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) @@ -82,6 +82,11 @@ bool VulkanCommandProcessor::SetupContext() { void VulkanCommandProcessor::ShutdownContext() { // TODO(benvanik): wait until idle. + if (swap_state_.front_buffer_texture) { + // Free swap chain images. + DestroySwapImages(); + } + buffer_cache_.reset(); pipeline_cache_.reset(); render_cache_.reset(); @@ -131,59 +136,214 @@ void VulkanCommandProcessor::ReturnFromWait() { CommandProcessor::ReturnFromWait(); } +void VulkanCommandProcessor::CreateSwapImages(VkCommandBuffer setup_buffer, + VkExtent2D extents) { + VkImageCreateInfo image_info; + std::memset(&image_info, 0, sizeof(VkImageCreateInfo)); + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_info.extent = {extents.width, extents.height, 1}; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImage image_fb, image_bb; + auto status = vkCreateImage(*device_, &image_info, nullptr, &image_fb); + CheckResult(status, "vkCreateImage"); + + status = vkCreateImage(*device_, &image_info, nullptr, &image_bb); + CheckResult(status, "vkCreateImage"); + + // Bind memory to images. + VkMemoryRequirements mem_requirements; + vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements); + fb_memory = device_->AllocateMemory(mem_requirements, 0); + assert_not_null(fb_memory); + + status = vkBindImageMemory(*device_, image_fb, fb_memory, 0); + CheckResult(status, "vkBindImageMemory"); + + vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements); + bb_memory = device_->AllocateMemory(mem_requirements, 0); + assert_not_null(bb_memory); + + status = vkBindImageMemory(*device_, image_bb, bb_memory, 0); + CheckResult(status, "vkBindImageMemory"); + + std::lock_guard lock(swap_state_.mutex); + swap_state_.front_buffer_texture = reinterpret_cast(image_fb); + swap_state_.back_buffer_texture = reinterpret_cast(image_bb); + + // Transition both images to general layout. + VkImageMemoryBarrier barrier; + std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); + barrier.srcAccessMask = 0; + barrier.dstAccessMask = 0; + barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image_fb; + barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + + vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + + barrier.image = image_bb; + + vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); +} + +void VulkanCommandProcessor::DestroySwapImages() { + std::lock_guard lock(swap_state_.mutex); + vkDestroyImage(*device_, + reinterpret_cast(swap_state_.front_buffer_texture), + nullptr); + vkDestroyImage(*device_, + reinterpret_cast(swap_state_.back_buffer_texture), + nullptr); + vkFreeMemory(*device_, fb_memory, nullptr); + vkFreeMemory(*device_, bb_memory, nullptr); + + swap_state_.front_buffer_texture = 0; + swap_state_.back_buffer_texture = 0; + fb_memory = nullptr; + bb_memory = nullptr; +} + void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { SCOPE_profile_cpu_f("gpu"); + // Build a final command buffer that copies the game's frontbuffer texture + // into our backbuffer texture. + VkCommandBuffer copy_commands = nullptr; + bool opened_batch; + if (command_buffer_pool_->has_open_batch()) { + copy_commands = command_buffer_pool_->AcquireEntry(); + opened_batch = false; + } else { + command_buffer_pool_->BeginBatch(); + copy_commands = command_buffer_pool_->AcquireEntry(); + current_batch_fence_.reset(new ui::vulkan::Fence(*device_)); + opened_batch = true; + } + + VkCommandBufferBeginInfo begin_info; + std::memset(&begin_info, 0, sizeof(begin_info)); + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + auto status = vkBeginCommandBuffer(copy_commands, &begin_info); + CheckResult(status, "vkBeginCommandBuffer"); + + if (!frontbuffer_ptr) { + // Trace viewer does this. + frontbuffer_ptr = last_copy_base_; + } + + if (!swap_state_.back_buffer_texture) { + CreateSwapImages(copy_commands, {frontbuffer_width, frontbuffer_height}); + } + auto swap_bb = reinterpret_cast(swap_state_.back_buffer_texture); + + // Issue the commands to copy the game's frontbuffer to our backbuffer. + auto texture = texture_cache_->LookupAddress( + frontbuffer_ptr, xe::round_up(frontbuffer_width, 32), + xe::round_up(frontbuffer_height, 32), TextureFormat::k_8_8_8_8); + if (texture) { + texture->in_flight_fence = current_batch_fence_; + + // Insert a barrier so the GPU finishes writing to the image. + VkImageMemoryBarrier barrier; + std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); + barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.oldLayout = texture->image_layout; + barrier.newLayout = texture->image_layout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = texture->image; + barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + + vkCmdPipelineBarrier(copy_commands, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + + // Now issue a blit command. + VkImageBlit blit; + std::memset(&blit, 0, sizeof(VkImageBlit)); + blit.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.srcOffsets[0] = {0, 0, 0}; + blit.srcOffsets[1] = {int32_t(frontbuffer_width), + int32_t(frontbuffer_height), 1}; + blit.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit.dstOffsets[0] = {0, 0, 0}; + blit.dstOffsets[1] = {int32_t(frontbuffer_width), + int32_t(frontbuffer_height), 1}; + + vkCmdBlitImage(copy_commands, texture->image, texture->image_layout, + swap_bb, VK_IMAGE_LAYOUT_GENERAL, 1, &blit, + VK_FILTER_LINEAR); + + std::lock_guard lock(swap_state_.mutex); + swap_state_.width = frontbuffer_width; + swap_state_.height = frontbuffer_height; + } + + status = vkEndCommandBuffer(copy_commands); + CheckResult(status, "vkEndCommandBuffer"); + // Queue up current command buffers. // TODO(benvanik): bigger batches. + std::vector submit_buffers; if (current_command_buffer_) { if (current_render_state_) { render_cache_->EndRenderPass(); current_render_state_ = nullptr; } - auto status = vkEndCommandBuffer(current_command_buffer_); + status = vkEndCommandBuffer(current_command_buffer_); CheckResult(status, "vkEndCommandBuffer"); status = vkEndCommandBuffer(current_setup_buffer_); CheckResult(status, "vkEndCommandBuffer"); - command_buffer_pool_->EndBatch(*current_batch_fence_); + // TODO(DrChat): If the setup buffer is empty, don't bother queueing it up. + submit_buffers.push_back(current_setup_buffer_); + submit_buffers.push_back(current_command_buffer_); + + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + } + + submit_buffers.push_back(copy_commands); + if (!submit_buffers.empty()) { // TODO(benvanik): move to CP or to host (trace dump, etc). // This only needs to surround a vkQueueSubmit. if (queue_mutex_) { queue_mutex_->lock(); } - // TODO(DrChat): If setup buffer is empty, don't bother queueing it up. - VkCommandBuffer command_buffers[] = { - current_setup_buffer_, current_command_buffer_, - }; - VkSubmitInfo submit_info; + std::memset(&submit_info, 0, sizeof(VkSubmitInfo)); submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.commandBufferCount = 2; - submit_info.pCommandBuffers = command_buffers; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = nullptr; - if (queue_mutex_) { - // queue_mutex_->lock(); - } + submit_info.commandBufferCount = uint32_t(submit_buffers.size()); + submit_info.pCommandBuffers = submit_buffers.data(); status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_); - if (queue_mutex_) { - // queue_mutex_->unlock(); - } CheckResult(status, "vkQueueSubmit"); - // TODO(DrChat): Disable this completely. - VkFence fences[] = {*current_batch_fence_}; - status = vkWaitForFences(*device_, 1, fences, true, -1); - CheckResult(status, "vkWaitForFences"); - if (device_->is_renderdoc_attached() && capturing_) { device_->EndRenderDocFrameCapture(); capturing_ = false; @@ -197,45 +357,28 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, if (queue_mutex_) { queue_mutex_->unlock(); } + } - // Scavenging. - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; + command_buffer_pool_->EndBatch(current_batch_fence_); + + // TODO(DrChat): Remove this. + VkFence fences[] = { *current_batch_fence_ }; + vkWaitForFences(*device_, 1, fences, true, -1); + + // Scavenging. + { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_i( + "gpu", + "xe::gpu::vulkan::VulkanCommandProcessor::PerformSwap Scavenging"); +#endif // FINE_GRAINED_DRAW_SCOPES command_buffer_pool_->Scavenge(); texture_cache_->Scavenge(); - current_batch_fence_ = nullptr; - - // TODO: Remove this when we stop waiting on the queue. - buffer_cache_->ClearCache(); + buffer_cache_->Scavenge(); } - if (!frontbuffer_ptr) { - if (!last_copy_base_) { - // Nothing to draw. - return; - } - - // Trace viewer does this. - frontbuffer_ptr = last_copy_base_; - } - - auto texture = texture_cache_->LookupAddress( - frontbuffer_ptr, xe::round_up(frontbuffer_width, 32), - xe::round_up(frontbuffer_height, 32), TextureFormat::k_8_8_8_8); - // There shouldn't be a case where the texture is null. - assert_not_null(texture); - - if (texture) { - std::lock_guard lock(swap_state_.mutex); - swap_state_.width = frontbuffer_width; - swap_state_.height = frontbuffer_height; - swap_state_.back_buffer_texture = - reinterpret_cast(texture->image); - } - - // Remove any dead textures, etc. - texture_cache_->Scavenge(); + current_batch_fence_ = nullptr; } Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, @@ -331,16 +474,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, started_command_buffer = true; } auto command_buffer = current_command_buffer_; - - // Upload and set descriptors for all textures. - // We do this outside of the render pass so the texture cache can upload and - // convert textures. - // Setup buffer may be flushed to GPU if the texture cache needs it. - auto samplers = - PopulateSamplers(current_setup_buffer_, vertex_shader, pixel_shader); - if (!samplers) { - return false; - } + auto setup_buffer = current_setup_buffer_; // Begin the render pass. // This will setup our framebuffer and begin the pass in the command buffer. @@ -362,6 +496,9 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, } } + // Update the render cache's tracking state. + render_cache_->UpdateState(); + // Configure the pipeline for drawing. // This encodes all render state (blend, depth, etc), our shader stages, // and our vertex input layout. @@ -373,6 +510,13 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, started_command_buffer) { vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } else if (pipeline_status == PipelineCache::UpdateStatus::kError) { + render_cache_->EndRenderPass(); + command_buffer_pool_->CancelBatch(); + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + current_batch_fence_ = nullptr; + return false; } pipeline_cache_->SetDynamicState(command_buffer, started_command_buffer); @@ -407,9 +551,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, } // Bind samplers/textures. - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_cache_->pipeline_layout(), 1, 1, &samplers, - 0, nullptr); + // Uploads all textures that need it. + // Setup buffer may be flushed to GPU if the texture cache needs it. + if (!PopulateSamplers(command_buffer, setup_buffer, vertex_shader, + pixel_shader)) { + render_cache_->EndRenderPass(); + command_buffer_pool_->CancelBatch(); + current_command_buffer_ = nullptr; + current_setup_buffer_ = nullptr; + current_batch_fence_ = nullptr; + return false; + } // Actually issue the draw. if (!index_buffer_info) { @@ -444,7 +596,7 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, // These are optional, and if none are defined 0 will be returned. auto constant_offsets = buffer_cache_->UploadConstantRegisters( vertex_shader->constant_register_map(), - pixel_shader->constant_register_map()); + pixel_shader->constant_register_map(), current_batch_fence_); if (constant_offsets.first == VK_WHOLE_SIZE || constant_offsets.second == VK_WHOLE_SIZE) { // Shader wants constants but we couldn't upload them. @@ -497,8 +649,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer( size_t source_length = info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) : sizeof(uint16_t)); - auto buffer_ref = - buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format); + auto buffer_ref = buffer_cache_->UploadIndexBuffer( + source_ptr, source_length, info.format, current_batch_fence_); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. return false; @@ -523,6 +675,11 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( #endif // FINE_GRAINED_DRAW_SCOPES auto& vertex_bindings = vertex_shader->vertex_bindings(); + if (vertex_bindings.empty()) { + // No bindings. + return true; + } + assert_true(vertex_bindings.size() <= 32); VkBuffer all_buffers[32]; VkDeviceSize all_buffer_offsets[32]; @@ -556,8 +713,8 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( const void* source_ptr = memory_->TranslatePhysical(fetch->address << 2); size_t source_length = valid_range; - auto buffer_ref = - buffer_cache_->UploadVertexBuffer(source_ptr, source_length); + auto buffer_ref = buffer_cache_->UploadVertexBuffer( + source_ptr, source_length, current_batch_fence_); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. return false; @@ -576,9 +733,9 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( return true; } -VkDescriptorSet VulkanCommandProcessor::PopulateSamplers( - VkCommandBuffer command_buffer, VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { +bool VulkanCommandProcessor::PopulateSamplers( + VkCommandBuffer command_buffer, VkCommandBuffer setup_buffer, + VulkanShader* vertex_shader, VulkanShader* pixel_shader) { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -588,10 +745,14 @@ VkDescriptorSet VulkanCommandProcessor::PopulateSamplers( pixel_shader->texture_bindings()); if (!descriptor_set) { // Unable to bind set. - return nullptr; + return false; } - return descriptor_set; + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_cache_->pipeline_layout(), 1, 1, + &descriptor_set, 0, nullptr); + + return true; } bool VulkanCommandProcessor::IssueCopy() { @@ -760,6 +921,9 @@ bool VulkanCommandProcessor::IssueCopy() { tex_info.size_2d.input_pitch = copy_dest_pitch * 4; auto texture = texture_cache_->DemandResolveTexture( tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr); + assert_not_null(texture); + texture->in_flight_fence = current_batch_fence_; + if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { // Transition the image to a general layout. VkImageMemoryBarrier image_barrier; @@ -820,10 +984,12 @@ bool VulkanCommandProcessor::IssueCopy() { : static_cast(depth_format); switch (copy_command) { case CopyCommand::kRaw: + /* render_cache_->RawCopyToImage(command_buffer, edram_base, texture->image, texture->image_layout, copy_src_select <= 3, resolve_offset, resolve_extent); break; + */ case CopyCommand::kConvert: render_cache_->BlitToImage( command_buffer, edram_base, surface_pitch, resolve_extent.height, diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 287e4f65e..4a7788e09 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -61,6 +61,9 @@ class VulkanCommandProcessor : public CommandProcessor { void PrepareForWait() override; void ReturnFromWait() override; + void CreateSwapImages(VkCommandBuffer setup_buffer, VkExtent2D extents); + void DestroySwapImages(); + void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -77,13 +80,18 @@ class VulkanCommandProcessor : public CommandProcessor { IndexBufferInfo* index_buffer_info); bool PopulateVertexBuffers(VkCommandBuffer command_buffer, VulkanShader* vertex_shader); - VkDescriptorSet PopulateSamplers(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); + bool PopulateSamplers(VkCommandBuffer command_buffer, + VkCommandBuffer setup_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); bool IssueCopy() override; xe::ui::vulkan::VulkanDevice* device_ = nullptr; + // front buffer / back buffer memory + VkDeviceMemory fb_memory = nullptr; + VkDeviceMemory bb_memory = nullptr; + // TODO(benvanik): abstract behind context? // Queue used to submit work. This may be a dedicated queue for the command // processor and no locking will be required for use. If a dedicated queue diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc index 27b2ff073..08c6120d7 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc @@ -76,6 +76,23 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { auto swap_chain = display_context_->swap_chain(); auto copy_cmd_buffer = swap_chain->copy_cmd_buffer(); + auto front_buffer = + reinterpret_cast(swap_state.front_buffer_texture); + + VkImageMemoryBarrier barrier; + std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = front_buffer; + barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + vkCmdPipelineBarrier(copy_cmd_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); VkImageBlit region; region.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; @@ -88,9 +105,8 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { region.dstOffsets[1] = {static_cast(swap_chain->surface_width()), static_cast(swap_chain->surface_height()), 1}; - vkCmdBlitImage(copy_cmd_buffer, - reinterpret_cast(swap_state.front_buffer_texture), - VK_IMAGE_LAYOUT_GENERAL, swap_chain->surface_image(), + vkCmdBlitImage(copy_cmd_buffer, front_buffer, VK_IMAGE_LAYOUT_GENERAL, + swap_chain->surface_image(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion, VK_FILTER_LINEAR); } From 79f1193130a3db33843ff546390333bf67e75b8c Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 5 May 2016 23:42:36 -0500 Subject: [PATCH 117/145] Vulkan CP: Fix calculating an invalid copy destination base address when sizeof(texel) != 4 --- src/xenia/gpu/texture_info.h | 60 +++++++++++++++++++ .../gpu/vulkan/vulkan_command_processor.cc | 15 +++-- 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h index 500f22bb3..0cb2ed2ba 100644 --- a/src/xenia/gpu/texture_info.h +++ b/src/xenia/gpu/texture_info.h @@ -88,6 +88,66 @@ enum class TextureFormat : uint32_t { kUnknown = 0xFFFFFFFFu, }; +inline size_t GetTexelSize(TextureFormat format) { + switch (format) { + case TextureFormat::k_1_5_5_5: + return 2; + break; + case TextureFormat::k_2_10_10_10: + return 4; + break; + case TextureFormat::k_4_4_4_4: + return 2; + break; + case TextureFormat::k_5_6_5: + return 2; + break; + case TextureFormat::k_8: + return 1; + break; + case TextureFormat::k_8_8: + return 2; + break; + case TextureFormat::k_8_8_8_8: + return 4; + break; + case TextureFormat::k_16: + return 4; + break; + case TextureFormat::k_16_FLOAT: + return 4; + break; + case TextureFormat::k_16_16: + return 4; + break; + case TextureFormat::k_16_16_FLOAT: + return 4; + break; + case TextureFormat::k_16_16_16_16: + return 8; + break; + case TextureFormat::k_16_16_16_16_FLOAT: + return 8; + break; + case TextureFormat::k_32_FLOAT: + return 4; + break; + case TextureFormat::k_32_32_FLOAT: + return 8; + break; + case TextureFormat::k_32_32_32_32_FLOAT: + return 16; + break; + case TextureFormat::k_10_11_11: + case TextureFormat::k_11_11_10: + return 4; + break; + default: + assert_unhandled_case(format); + return 0; + } +} + inline TextureFormat ColorFormatToTextureFormat(ColorFormat color_format) { return static_cast(color_format); } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 011c5b878..17f83f82c 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -362,7 +362,7 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, command_buffer_pool_->EndBatch(current_batch_fence_); // TODO(DrChat): Remove this. - VkFence fences[] = { *current_batch_fence_ }; + VkFence fences[] = {*current_batch_fence_}; vkWaitForFences(*device_, 1, fences, true, -1); // Scavenging. @@ -733,9 +733,10 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( return true; } -bool VulkanCommandProcessor::PopulateSamplers( - VkCommandBuffer command_buffer, VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader, VulkanShader* pixel_shader) { +bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, + VkCommandBuffer setup_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -829,11 +830,13 @@ bool VulkanCommandProcessor::IssueCopy() { window_offset_y |= 0x8000; } + size_t read_size = GetTexelSize(ColorFormatToTextureFormat(copy_dest_format)); + // Adjust the copy base offset to point to the beginning of the texture, so // we don't run into hiccups down the road (e.g. resolving the last part going // backwards). - int32_t dest_offset = window_offset_y * copy_dest_pitch * 4; - dest_offset += window_offset_x * 32 * 4; + int32_t dest_offset = window_offset_y * copy_dest_pitch * int(read_size); + dest_offset += window_offset_x * 32 * int(read_size); copy_dest_base += dest_offset; // HACK: vertices to use are always in vf0. From c06a7cdf81bd3bfb54e842541277610f893ef17c Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 7 May 2016 19:17:56 -0500 Subject: [PATCH 118/145] BaseFencedPool::has_open_batch (and other uncommitted changes) --- src/xenia/ui/vulkan/fenced_pools.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h index 3a7bb01c4..a481edf10 100644 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -14,6 +14,7 @@ #include "xenia/base/assert.h" #include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace ui { @@ -40,13 +41,15 @@ class BaseFencedPool { // True if one or more batches are still pending on the GPU. bool has_pending() const { return pending_batch_list_head_ != nullptr; } + // True if a batch is open. + bool has_open_batch() const { return open_batch_ != nullptr; } // Checks all pending batches for completion and scavenges their entries. // This should be called as frequently as reasonable. void Scavenge() { while (pending_batch_list_head_) { auto batch = pending_batch_list_head_; - if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) { + if (vkGetFenceStatus(device_, *batch->fence) == VK_SUCCESS) { // Batch has completed. Reclaim. pending_batch_list_head_ = batch->next; if (batch == pending_batch_list_tail_) { @@ -132,7 +135,7 @@ class BaseFencedPool { // Ends the current batch using the given fence to indicate when the batch // has completed execution on the GPU. - void EndBatch(VkFence fence) { + void EndBatch(std::shared_ptr fence) { assert_not_null(open_batch_); // Close and see if we have anything. @@ -194,7 +197,7 @@ class BaseFencedPool { Batch* next; Entry* entry_list_head; Entry* entry_list_tail; - VkFence fence; + std::shared_ptr fence; }; Batch* free_batch_list_head_ = nullptr; From 2bb52ef86b8580f5819c81d09a1083f48bc3bc91 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 15 May 2016 12:01:38 -0500 Subject: [PATCH 119/145] SPIR-V: WIP shader compiler / optimizations / alpha test implementation --- src/xenia/gpu/premake5.lua | 2 + src/xenia/gpu/shader.h | 11 ++ src/xenia/gpu/spirv/compiler.cc | 36 +++++ src/xenia/gpu/spirv/compiler.h | 41 ++++++ src/xenia/gpu/spirv/compiler_pass.h | 37 +++++ .../passes/control_flow_analysis_pass.cpp | 30 ++++ .../spirv/passes/control_flow_analysis_pass.h | 34 +++++ .../control_flow_simplification_pass.cc | 48 ++++++ .../passes/control_flow_simplification_pass.h | 34 +++++ src/xenia/gpu/spirv_shader_translator.cc | 137 ++++++++++++++---- src/xenia/gpu/spirv_shader_translator.h | 2 + 11 files changed, 383 insertions(+), 29 deletions(-) create mode 100644 src/xenia/gpu/spirv/compiler.cc create mode 100644 src/xenia/gpu/spirv/compiler.h create mode 100644 src/xenia/gpu/spirv/compiler_pass.h create mode 100644 src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp create mode 100644 src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h create mode 100644 src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc create mode 100644 src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index 1f6a1eea6..1c7870edc 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -22,6 +22,8 @@ project("xenia-gpu") project_root.."/third_party/gflags/src", }) local_platform_files() + local_platform_files("spirv") + local_platform_files("spirv/passes") group("src") project("xenia-gpu-shader-compiler") diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 476369e53..95abe4dfa 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -99,6 +99,17 @@ struct InstructionResult { bool has_all_writes() const { return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3]; } + // Returns number of components written + uint32_t num_writes() const { + uint32_t total = 0; + for (int i = 0; i < 4; i++) { + if (write_mask[i]) { + total++; + } + } + + return total; + } // Returns true if any non-constant components are written. bool stores_non_constants() const { for (int i = 0; i < 4; ++i) { diff --git a/src/xenia/gpu/spirv/compiler.cc b/src/xenia/gpu/spirv/compiler.cc new file mode 100644 index 000000000..d31b36996 --- /dev/null +++ b/src/xenia/gpu/spirv/compiler.cc @@ -0,0 +1,36 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv/compiler.h" + +namespace xe { +namespace gpu { +namespace spirv { + +Compiler::Compiler() {} + +void Compiler::AddPass(std::unique_ptr pass) { + compiler_passes_.push_back(std::move(pass)); +} + +bool Compiler::Compile(spv::Module* module) { + for (auto& pass : compiler_passes_) { + if (!pass->Run(module)) { + return false; + } + } + + return true; +} + +void Compiler::Reset() { compiler_passes_.clear(); } + +} // namespace spirv +} // namespace gpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler.h b/src/xenia/gpu/spirv/compiler.h new file mode 100644 index 000000000..fd27969ee --- /dev/null +++ b/src/xenia/gpu/spirv/compiler.h @@ -0,0 +1,41 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_COMPILER_H_ +#define XENIA_GPU_SPIRV_COMPILER_H_ + +#include "xenia/base/arena.h" +#include "xenia/gpu/spirv/compiler_pass.h" + +#include "third_party/glslang-spirv/SpvBuilder.h" +#include "third_party/spirv/GLSL.std.450.hpp11" + +namespace xe { +namespace gpu { +namespace spirv { + +// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the +// drivers. +class Compiler { + public: + Compiler(); + + void AddPass(std::unique_ptr pass); + void Reset(); + bool Compile(spv::Module* module); + + private: + std::vector> compiler_passes_; +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_COMPILER_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler_pass.h b/src/xenia/gpu/spirv/compiler_pass.h new file mode 100644 index 000000000..0d81aeeee --- /dev/null +++ b/src/xenia/gpu/spirv/compiler_pass.h @@ -0,0 +1,37 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_ +#define XENIA_GPU_SPIRV_COMPILER_PASS_H_ + +#include "xenia/base/arena.h" + +#include "third_party/glslang-spirv/SpvBuilder.h" +#include "third_party/spirv/GLSL.std.450.hpp11" + +namespace xe { +namespace gpu { +namespace spirv { + +class CompilerPass { + public: + CompilerPass() = default; + virtual ~CompilerPass() {} + + virtual bool Run(spv::Module* module) = 0; + + private: + xe::Arena ir_arena_; +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp new file mode 100644 index 000000000..4d719f769 --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp @@ -0,0 +1,30 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +ControlFlowAnalysisPass::ControlFlowAnalysisPass() {} + +bool ControlFlowAnalysisPass::Run(spv::Module* module) { + for (auto function : module->getFunctions()) { + // For each OpBranchConditional, see if we can find a point where control + // flow converges and then append an OpSelectionMerge. + // Potential problems: while loops constructed from branch instructions + } + + return true; +} + +} // namespace spirv +} // namespace gpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h new file mode 100644 index 000000000..6b279e251 --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h @@ -0,0 +1,34 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ +#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ + +#include "xenia/gpu/spirv/compiler_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +// Control-flow analysis pass. Runs through control-flow and adds merge opcodes +// where necessary. +class ControlFlowAnalysisPass : public CompilerPass { + public: + ControlFlowAnalysisPass(); + + bool Run(spv::Module* module) override; + + private: +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc new file mode 100644 index 000000000..7b01aa5aa --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc @@ -0,0 +1,48 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +ControlFlowSimplificationPass::ControlFlowSimplificationPass() {} + +bool ControlFlowSimplificationPass::Run(spv::Module* module) { + for (auto function : module->getFunctions()) { + // Walk through the blocks in the function and merge any blocks which are + // unconditionally dominated. + for (auto it = function->getBlocks().end() - 1; + it != function->getBlocks().begin() - 1;) { + auto block = *it; + if (!block->isUnreachable() && block->getPredecessors().size() == 1) { + auto prev_block = block->getPredecessors()[0]; + auto last_instr = + prev_block->getInstruction(prev_block->getInstructionCount() - 1); + if (last_instr->getOpCode() == spv::Op::OpBranch) { + if (prev_block->getSuccessors().size() == 1 && + prev_block->getSuccessors()[0] == block) { + // We're dominated by this block. Merge into it. + prev_block->merge(block); + block->setUnreachable(); + } + } + } + + --it; + } + } + + return true; +} + +} // namespace spirv +} // namespace gpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h new file mode 100644 index 000000000..f851d24f1 --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h @@ -0,0 +1,34 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ +#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ + +#include "xenia/gpu/spirv/compiler_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +// Control-flow simplification pass. Combines adjacent blocks and marks +// any unreachable blocks. +class ControlFlowSimplificationPass : public CompilerPass { + public: + ControlFlowSimplificationPass(); + + bool Run(spv::Module* module) override; + + private: +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 855df73f7..86bddcd80 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -12,16 +12,24 @@ #include #include "xenia/base/logging.h" +#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h" +#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h" namespace xe { namespace gpu { using namespace ucode; +constexpr int kMaxInterpolators = 16; +constexpr int kMaxTemporaryRegisters = 64; + using spv::GLSLstd450; using spv::Id; using spv::Op; -SpirvShaderTranslator::SpirvShaderTranslator() = default; +SpirvShaderTranslator::SpirvShaderTranslator() { + compiler_.AddPass(std::make_unique()); + compiler_.AddPass(std::make_unique()); +} SpirvShaderTranslator::~SpirvShaderTranslator() = default; @@ -331,11 +339,19 @@ void SpirvShaderTranslator::StartTranslation() { ps_param_gen_idx, b.makeUintConstant(-1)); spv::Builder::If ifb(cond, b); - // Index is specified - auto reg_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction, - registers_ptr_, - std::vector({ps_param_gen_idx})); - b.createStore(param, reg_ptr); + // FYI: We do this instead of r[ps_param_gen_idx] because that causes + // nvidia to move all registers into local memory (slow!) + for (uint32_t i = 0; i < kMaxInterpolators; i++) { + auto reg_ptr = b.createAccessChain( + spv::StorageClass::StorageClassFunction, registers_ptr_, + std::vector({b.makeUintConstant(i)})); + + auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, ps_param_gen_idx, + b.makeUintConstant(i)); + auto reg = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, cond, param, + b.createLoad(reg_ptr)); + b.createStore(reg, reg_ptr); + } ifb.makeEndIf(); } @@ -406,28 +422,64 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { b.createStore(p, pos_); } else { // Alpha test - auto alpha_test_x = b.createCompositeExtract(push_consts_, float_type_, - std::vector{2, 0}); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, alpha_test_x, - b.makeFloatConstant(1.f)); + auto alpha_test_enabled = b.createCompositeExtract( + push_consts_, float_type_, std::vector{2, 0}); + auto alpha_test_func = b.createCompositeExtract( + push_consts_, float_type_, std::vector{2, 1}); + auto alpha_test_ref = b.createCompositeExtract(push_consts_, float_type_, + std::vector{2, 2}); + alpha_test_func = + b.createUnaryOp(spv::Op::OpConvertFToU, uint_type_, alpha_test_func); + auto oC0_alpha = b.createCompositeExtract(frag_outputs_, float_type_, + std::vector({0, 3})); + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, + alpha_test_enabled, b.makeFloatConstant(1.f)); spv::Builder::If alpha_if(cond, b); - // TODO(DrChat): Apply alpha test. + std::vector switch_segments; + b.makeSwitch(alpha_test_func, 8, std::vector({0, 1, 2, 3, 4, 5, 6, 7}), + std::vector({0, 1, 2, 3, 4, 5, 6, 7}), 7, + switch_segments); + + const static spv::Op alpha_op_map[] = { + spv::Op::OpNop, + spv::Op::OpFOrdGreaterThanEqual, + spv::Op::OpFOrdNotEqual, + spv::Op::OpFOrdGreaterThan, + spv::Op::OpFOrdLessThanEqual, + spv::Op::OpFOrdEqual, + spv::Op::OpFOrdLessThan, + spv::Op::OpNop, + }; + // if (alpha_func == 0) passes = false; - // if (alpha_func == 1 && oC[0].a < alpha_ref) passes = true; - // if (alpha_func == 2 && oC[0].a == alpha_ref) passes = true; - // if (alpha_func == 3 && oC[0].a <= alpha_ref) passes = true; - // if (alpha_func == 4 && oC[0].a > alpha_ref) passes = true; - // if (alpha_func == 5 && oC[0].a != alpha_ref) passes = true; - // if (alpha_func == 6 && oC[0].a >= alpha_ref) passes = true; + b.nextSwitchSegment(switch_segments, 0); + b.makeDiscard(); + b.addSwitchBreak(); + + for (int i = 1; i < 7; i++) { + b.nextSwitchSegment(switch_segments, i); + auto cond = + b.createBinOp(alpha_op_map[i], bool_type_, oC0_alpha, alpha_test_ref); + spv::Builder::If discard_if(cond, b); + b.makeDiscard(); + discard_if.makeEndIf(); + b.addSwitchBreak(); + } + // if (alpha_func == 7) passes = true; + b.nextSwitchSegment(switch_segments, 7); + b.endSwitch(switch_segments); alpha_if.makeEndIf(); } b.makeReturn(false); + // Compile the spv IR + compiler_.Compile(b.getModule()); + std::vector spirv_words; b.dump(spirv_words); @@ -555,8 +607,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( auto next_block = cf_blocks_[instr.dword_index + 1]; if (next_block.prev_dominates) { - b.createNoResultOp(spv::Op::OpSelectionMerge, - {next_block.block->getId(), 0}); + b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone); } b.createConditionalBranch(cond, body, next_block.block); } break; @@ -570,8 +621,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( auto next_block = cf_blocks_[instr.dword_index + 1]; if (next_block.prev_dominates) { - b.createNoResultOp(spv::Op::OpSelectionMerge, - {next_block.block->getId(), 0}); + b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone); } b.createConditionalBranch(cond, body, next_block.block); @@ -756,8 +806,8 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -771,6 +821,7 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( auto shader_vertex_id = b.createLoad(vertex_id_); auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_id, shader_vertex_id); + cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_); // Skip loading if it's an indexed fetch. auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index] @@ -778,6 +829,30 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( assert_not_zero(vertex_ptr); auto vertex = b.createLoad(vertex_ptr); + switch (instr.attributes.data_format) { + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_16_16: + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32: + case VertexFormat::k_32_32: + case VertexFormat::k_32_32_32_32: + case VertexFormat::k_32_FLOAT: + case VertexFormat::k_32_32_FLOAT: + case VertexFormat::k_32_32_32_FLOAT: + case VertexFormat::k_32_32_32_32_FLOAT: + // These are handled, for now. + break; + + case VertexFormat::k_10_11_11: { + // No conversion needed. Natively supported. + } break; + + case VertexFormat::k_11_11_10: { + // This needs to be converted. + } break; + } + auto vertex_components = b.getNumComponents(vertex); Id alt_vertex = 0; switch (vertex_components) { @@ -836,8 +911,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -940,8 +1015,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -1170,6 +1245,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1194,6 +1270,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1218,6 +1295,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1242,6 +1320,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1376,8 +1455,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 39d3899c1..b6a761a24 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -17,6 +17,7 @@ #include "third_party/glslang-spirv/SpvBuilder.h" #include "third_party/spirv/GLSL.std.450.hpp11" #include "xenia/gpu/shader_translator.h" +#include "xenia/gpu/spirv/compiler.h" #include "xenia/ui/spirv/spirv_disassembler.h" #include "xenia/ui/spirv/spirv_validator.h" @@ -97,6 +98,7 @@ class SpirvShaderTranslator : public ShaderTranslator { xe::ui::spirv::SpirvDisassembler disassembler_; xe::ui::spirv::SpirvValidator validator_; + xe::gpu::spirv::Compiler compiler_; // True if there's an open predicated block bool open_predicated_block_ = false; From b9a40d1a00f8f5c6c6ea1cfa98ed1c0c9acdba6c Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 15 May 2016 12:08:29 -0500 Subject: [PATCH 120/145] Use Vulkan as the default graphics backend. --- src/xenia/app/xenia_main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index bc9b662c5..80ed35551 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -78,7 +78,7 @@ std::unique_ptr CreateGraphicsSystem() { std::unique_ptr best; best = std::unique_ptr( - new xe::gpu::gl4::GL4GraphicsSystem()); + new xe::gpu::vulkan::VulkanGraphicsSystem()); if (best) { return best; } From 44284a780c80d0009e0b31c92cde0c57d38b76c2 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 15 May 2016 14:27:44 -0500 Subject: [PATCH 121/145] SPIR-V: Misc. IR utility functions --- third_party/glslang-spirv/SpvBuilder.cpp | 6 ++ third_party/glslang-spirv/SpvBuilder.h | 4 +- third_party/glslang-spirv/spvIR.h | 77 +++++++++++++++++++++++- 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/third_party/glslang-spirv/SpvBuilder.cpp b/third_party/glslang-spirv/SpvBuilder.cpp index 0a2fa2139..13a6c946a 100644 --- a/third_party/glslang-spirv/SpvBuilder.cpp +++ b/third_party/glslang-spirv/SpvBuilder.cpp @@ -1166,6 +1166,7 @@ void Builder::createMemoryBarrier(unsigned executionScope, unsigned memorySemant // An opcode that has one operands, a result id, and a type Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand) { + assert(operand != 0); Instruction* op = new Instruction(getUniqueId(), typeId, opCode); op->addIdOperand(operand); buildPoint->addInstruction(std::unique_ptr(op)); @@ -1175,6 +1176,8 @@ Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand) Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right) { + assert(left != 0); + assert(right != 0); Instruction* op = new Instruction(getUniqueId(), typeId, opCode); op->addIdOperand(left); op->addIdOperand(right); @@ -1185,6 +1188,9 @@ Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right) Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3) { + assert(op1 != 0); + assert(op2 != 0); + assert(op3 != 0); Instruction* op = new Instruction(getUniqueId(), typeId, opCode); op->addIdOperand(op1); op->addIdOperand(op2); diff --git a/third_party/glslang-spirv/SpvBuilder.h b/third_party/glslang-spirv/SpvBuilder.h index d6dc61218..7eae4fe91 100644 --- a/third_party/glslang-spirv/SpvBuilder.h +++ b/third_party/glslang-spirv/SpvBuilder.h @@ -93,6 +93,8 @@ public: return id; } + Module* getModule() { return &module; } + // For creating new types (will return old type if the requested one was already made). Id makeVoidType(); Id makeBoolType(); @@ -517,6 +519,7 @@ public: void createBranch(Block* block); void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control); + void createSelectionMerge(Block* mergeBlock, unsigned int control); protected: Id makeIntConstant(Id typeId, unsigned value, bool specConstant); @@ -527,7 +530,6 @@ public: void transferAccessChainSwizzle(bool dynamic); void simplifyAccessChainSwizzle(); void createAndSetNoPredecessorBlock(const char*); - void createSelectionMerge(Block* mergeBlock, unsigned int control); void dumpInstructions(std::vector&, const std::vector >&) const; SourceLanguage source; diff --git a/third_party/glslang-spirv/spvIR.h b/third_party/glslang-spirv/spvIR.h index 98f4971b4..63e460ebb 100644 --- a/third_party/glslang-spirv/spvIR.h +++ b/third_party/glslang-spirv/spvIR.h @@ -180,6 +180,11 @@ public: void addInstruction(std::unique_ptr inst); void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } + void insertInstruction(size_t pos, std::unique_ptr inst); + + size_t getInstructionCount() { return instructions.size(); } + Instruction* getInstruction(size_t i) { return instructions[i].get(); } + void removeInstruction(size_t i) { instructions.erase(instructions.begin() + i); } const std::vector& getPredecessors() const { return predecessors; } const std::vector& getSuccessors() const { return successors; } void setUnreachable() { unreachable = true; } @@ -200,6 +205,10 @@ public: bool isTerminated() const { + if (instructions.size() == 0) { + return false; + } + switch (instructions.back()->getOpCode()) { case OpBranch: case OpBranchConditional: @@ -215,6 +224,7 @@ public: void dump(std::vector& out) const { + // OpLabel instructions[0]->dump(out); for (int i = 0; i < (int)localVariables.size(); ++i) localVariables[i]->dump(out); @@ -222,7 +232,51 @@ public: instructions[i]->dump(out); } -protected: + // Moves all instructions from a target block into this block, and removes + // the target block from our list of successors. + // This function assumes this block unconditionally branches to the target + // block directly. + void merge(Block* target_block) { + if (isTerminated()) { + instructions.erase(instructions.end() - 1); + } + + // Find the target block in our successors first. + for (auto it = successors.begin(); it != successors.end(); ++it) { + if (*it == target_block) { + it = successors.erase(it); + break; + } + } + + // Add target block's successors to our successors. + successors.insert(successors.end(), target_block->successors.begin(), + target_block->successors.end()); + + // For each successor, replace the target block in their predecessors with + // us. + for (auto block : successors) { + std::replace(block->predecessors.begin(), block->predecessors.end(), + target_block, this); + } + + // Move instructions from target block into this block. + for (auto it = target_block->instructions.begin(); + it != target_block->instructions.end();) { + if ((*it)->getOpCode() == spv::Op::OpLabel) { + ++it; + continue; + } + + instructions.push_back(std::move(*it)); + it = target_block->instructions.erase(it); + } + + target_block->predecessors.clear(); + target_block->successors.clear(); + } + + protected: Block(const Block&); Block& operator=(Block&); @@ -275,6 +329,17 @@ public: Module& getParent() const { return parent; } Block* getEntryBlock() const { return blocks.front(); } Block* getLastBlock() const { return blocks.back(); } + Block* findBlockById(Id id) + { + for (auto block : blocks) { + if (block->getId() == id) { + return block; + } + } + + return nullptr; + } + std::vector& getBlocks() { return blocks; } void addLocalVariable(std::unique_ptr inst); Id getReturnType() const { return functionInstruction.getTypeId(); } void dump(std::vector& out) const @@ -315,6 +380,8 @@ public: } void addFunction(Function *fun) { functions.push_back(fun); } + const std::vector& getFunctions() const { return functions; } + std::vector& getFunctions() { return functions; } void mapInstruction(Instruction *instruction) { @@ -398,6 +465,14 @@ __inline void Block::addInstruction(std::unique_ptr inst) parent.getParent().mapInstruction(raw_instruction); } +__inline void Block::insertInstruction(size_t pos, std::unique_ptr inst) { + Instruction* raw_instruction = inst.get(); + instructions.insert(instructions.begin() + pos, std::move(inst)); + raw_instruction->setBlock(this); + if (raw_instruction->getResultId()) + parent.getParent().mapInstruction(raw_instruction); +} + }; // end spv namespace #endif // spvIR_H From b025790207ff15757d745a07a7ffa0a130fcb937 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 17 May 2016 05:58:52 -0500 Subject: [PATCH 122/145] Disable Vulkan native MSAA by default for now. --- src/xenia/gpu/vulkan/vulkan_gpu_flags.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc index 52bc10c84..fd2fe7789 100644 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc +++ b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc @@ -11,6 +11,6 @@ DEFINE_bool(vulkan_renderdoc_capture_all, false, "Capture everything with RenderDoc."); -DEFINE_bool(vulkan_native_msaa, true, "Use native MSAA"); +DEFINE_bool(vulkan_native_msaa, false, "Use native MSAA"); DEFINE_bool(vulkan_dump_disasm, false, "Dump shader disassembly. NVIDIA only supported."); From 1faf5a813aa5be69227c1103da48b9811d57fd3c Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 22 May 2016 19:57:05 -0500 Subject: [PATCH 123/145] Fix ALU scalar swizzles (Possibly) --- src/xenia/gpu/shader_translator.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index f6bfbdd65..79381d909 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -1047,9 +1047,8 @@ void ParseAluInstructionOperand(const AluInstruction& op, int i, uint32_t a = swizzle & 0x3; out_op->components[0] = GetSwizzleFromComponentIndex(a); } else if (swizzle_component_count == 2) { - swizzle >>= 4; - uint32_t a = ((swizzle >> 2) + 3) & 0x3; - uint32_t b = (swizzle + 2) & 0x3; + uint32_t a = ((swizzle >> 6) + 3) & 0x3; + uint32_t b = ((swizzle >> 0) + 0) & 0x3; out_op->components[0] = GetSwizzleFromComponentIndex(a); out_op->components[1] = GetSwizzleFromComponentIndex(b); } else { @@ -1129,6 +1128,10 @@ void ShaderTranslator::ParseAluVectorInstruction( } else { // Unimplemented. // assert_always(); + XELOGE( + "ShaderTranslator::ParseAluVectorInstruction: Unsupported write " + "to export %d", + dest_num); i.result.storage_target = InstructionStorageTarget::kNone; i.result.storage_index = 0; } From d94ff6eb2510a9826a741ca92de7b0992211ad80 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 22 May 2016 19:58:50 -0500 Subject: [PATCH 124/145] Shaders: Track the register count from the program control register (if available) --- src/xenia/gpu/shader.h | 4 ++++ src/xenia/gpu/shader_translator.cc | 14 ++++++++++++++ src/xenia/gpu/shader_translator.h | 7 +++++++ 3 files changed, 25 insertions(+) diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 95abe4dfa..7e0cd3ab2 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -558,6 +558,9 @@ class Shader { // True if the shader was translated and prepared without error. bool is_valid() const { return is_valid_; } + // True if the shader has already been translated. + bool is_translated() const { return is_translated_; } + // Errors that occurred during translation. const std::vector& errors() const { return errors_; } @@ -602,6 +605,7 @@ class Shader { bool writes_color_targets_[4] = {false, false, false, false}; bool is_valid_ = false; + bool is_translated_ = false; std::vector errors_; std::string ucode_disassembly_; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 79381d909..5bb9ba016 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -51,6 +51,7 @@ void ShaderTranslator::Reset() { ucode_disasm_buffer_.Reset(); ucode_disasm_line_number_ = 0; previous_ucode_disasm_scan_offset_ = 0; + register_count_ = 64; total_attrib_count_ = 0; vertex_bindings_.clear(); texture_bindings_.clear(); @@ -95,9 +96,21 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) { return true; } +bool ShaderTranslator::Translate(Shader* shader, + xenos::xe_gpu_program_cntl_t cntl) { + Reset(); + register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1 + : cntl.ps_regs + 1; + + return TranslateInternal(shader); +} + bool ShaderTranslator::Translate(Shader* shader) { Reset(); + return TranslateInternal(shader); +} +bool ShaderTranslator::TranslateInternal(Shader* shader) { shader_type_ = shader->type(); ucode_dwords_ = shader->ucode_dwords(); ucode_dword_count_ = shader->ucode_dword_count(); @@ -155,6 +168,7 @@ bool ShaderTranslator::Translate(Shader* shader) { } shader->is_valid_ = true; + shader->is_translated_ = true; for (const auto& error : shader->errors_) { if (error.is_fatal) { shader->is_valid_ = false; diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 5df53bc0a..9801cb2d6 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -30,6 +30,7 @@ class ShaderTranslator { // DEPRECATED(benvanik): remove this when shader cache is removed. bool GatherAllBindingInformation(Shader* shader); + bool Translate(Shader* shader, xenos::xe_gpu_program_cntl_t cntl); bool Translate(Shader* shader); protected: @@ -38,6 +39,8 @@ class ShaderTranslator { // Resets translator state before beginning translation. virtual void Reset(); + // Register count. + uint32_t register_count() const { return register_count_; } // True if the current shader is a vertex shader. bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } // True if the current shader is a pixel shader. @@ -132,6 +135,8 @@ class ShaderTranslator { int src_swizzle_component_count; }; + bool TranslateInternal(Shader* shader); + void MarkUcodeInstruction(uint32_t dword_offset); void AppendUcodeDisasm(char c); void AppendUcodeDisasm(const char* value); @@ -184,6 +189,8 @@ class ShaderTranslator { ShaderType shader_type_; const uint32_t* ucode_dwords_; size_t ucode_dword_count_; + xenos::xe_gpu_program_cntl_t program_cntl_; + uint32_t register_count_; // Accumulated translation errors. std::vector errors_; From d1b4d61b52f94ab831cda6fb8fccfdf4af6fd8f0 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 22 May 2016 20:01:42 -0500 Subject: [PATCH 125/145] SPIR-V: Use the register count from the program control register Workaround for broken OpBitFieldUExtract on NVIDIA drivers kRcpc/kRcpf/kRsqc/kRsqf Fix broken ps_ usage --- src/xenia/gpu/spirv_shader_translator.cc | 83 +++++++++++++++++++----- 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 86bddcd80..229951c8e 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -19,8 +19,8 @@ namespace xe { namespace gpu { using namespace ucode; -constexpr int kMaxInterpolators = 16; -constexpr int kMaxTemporaryRegisters = 64; +constexpr uint32_t kMaxInterpolators = 16; +constexpr uint32_t kMaxTemporaryRegisters = 64; using spv::GLSLstd450; using spv::Id; @@ -47,6 +47,7 @@ void SpirvShaderTranslator::StartTranslation() { spv::MemoryModel::MemoryModelGLSL450); b.addCapability(spv::Capability::CapabilityShader); b.addCapability(spv::Capability::CapabilityGenericPointer); + if (is_vertex_shader()) { b.addCapability(spv::Capability::CapabilityClipDistance); b.addCapability(spv::Capability::CapabilityCullDistance); @@ -79,8 +80,8 @@ void SpirvShaderTranslator::StartTranslation() { std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), b.makeFloatConstant(0.f)})); - registers_type_ = - b.makeArrayType(vec4_float_type_, b.makeUintConstant(64), 0); + registers_type_ = b.makeArrayType(vec4_float_type_, + b.makeUintConstant(register_count()), 0); registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction, registers_type_, "r"); @@ -197,8 +198,8 @@ void SpirvShaderTranslator::StartTranslation() { } // Interpolators. - Id interpolators_type = - b.makeArrayType(vec4_float_type_, b.makeUintConstant(16), 0); + Id interpolators_type = b.makeArrayType( + vec4_float_type_, b.makeUintConstant(kMaxInterpolators), 0); if (is_vertex_shader()) { // Vertex inputs/outputs. for (const auto& binding : vertex_bindings()) { @@ -248,7 +249,8 @@ void SpirvShaderTranslator::StartTranslation() { interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, interpolators_type, "interpolators"); b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); - for (uint32_t i = 0; i < 16; i++) { + for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); + i++) { // Zero interpolators. auto ptr = b.createAccessChain(spv::StorageClass::StorageClassOutput, interpolators_, @@ -300,7 +302,8 @@ void SpirvShaderTranslator::StartTranslation() { // b.createNoResultOp(spv::Op::OpCopyMemorySized, // {registers_ptr_, interpolators_, // b.makeUintConstant(16 * 4 * sizeof(float))}); - for (int i = 0; i < 16; i++) { + for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); + i++) { // For now, copy interpolators register-by-register :/ auto idx = b.makeUintConstant(i); auto i_a = b.createAccessChain(spv::StorageClass::StorageClassInput, @@ -341,7 +344,8 @@ void SpirvShaderTranslator::StartTranslation() { // FYI: We do this instead of r[ps_param_gen_idx] because that causes // nvidia to move all registers into local memory (slow!) - for (uint32_t i = 0; i < kMaxInterpolators; i++) { + for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); + i++) { auto reg_ptr = b.createAccessChain( spv::StorageClass::StorageClassFunction, registers_ptr_, std::vector({b.makeUintConstant(i)})); @@ -586,7 +590,6 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( } break; case ParsedExecInstruction::Type::kConditional: { // Based off of bool_consts - // FIXME: Nvidia compiler is complaining about this. std::vector offsets; offsets.push_back(b.makeUintConstant(2)); // bool_consts offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32)); @@ -595,15 +598,25 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( v = b.createLoad(v); // Bitfield extract the bool constant. + // FIXME: NVidia's compiler seems to be broken on this instruction? + /* v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, b.makeUintConstant(instr.bool_constant_index % 32), b.makeUintConstant(1)); + auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, + b.makeUintConstant(instr.condition ? 1 : 0)); + */ + v = b.createBinOp( + spv::Op::OpBitwiseAnd, uint_type_, v, + b.makeUintConstant(1 << (instr.bool_constant_index % 32))); + auto cond = b.createBinOp( + instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual, + bool_type_, v, b.makeUintConstant(0)); + // Conditional branch assert_true(cf_blocks_.size() > instr.dword_index + 1); body = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, - b.makeUintConstant(instr.condition ? 1 : 0)); auto next_block = cf_blocks_[instr.dword_index + 1]; if (next_block.prev_dominates) { @@ -731,6 +744,8 @@ void SpirvShaderTranslator::ProcessJumpInstruction( consts_, offsets); v = b.createLoad(v); + // FIXME: NVidia's compiler seems to be broken on this instruction? + /* // Bitfield extract the bool constant. v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, b.makeUintConstant(instr.bool_constant_index % 32), @@ -739,6 +754,14 @@ void SpirvShaderTranslator::ProcessJumpInstruction( // Conditional branch auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, b.makeUintConstant(instr.condition ? 1 : 0)); + */ + v = b.createBinOp( + spv::Op::OpBitwiseAnd, uint_type_, v, + b.makeUintConstant(1 << (instr.bool_constant_index % 32))); + auto cond = b.createBinOp( + instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual, + bool_type_, v, b.makeUintConstant(0)); + b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block, cf_blocks_[instr.dword_index + 1].block); } break; @@ -1473,7 +1496,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kAddsPrev: { // dest = src0 + ps - dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], ps_); + dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], + b.createLoad(ps_)); } break; case AluScalarOpcode::kCos: { @@ -1636,7 +1660,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( case AluScalarOpcode::kMulsPrev: { // dest = src0 * ps - dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], ps_); + dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], + b.createLoad(ps_)); } break; case AluScalarOpcode::kMulsPrev2: { @@ -1644,11 +1669,22 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kRcpc: { - // TODO: dest = src0 != 0.0 ? 1.0 / src0 : FLT_MAX; + dest = b.createBinOp(spv::Op::OpFDiv, float_type_, + b.makeFloatConstant(1.f), sources[0]); + dest = CreateGlslStd450InstructionCall( + spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp, + {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)}); } break; - case AluScalarOpcode::kRcp: case AluScalarOpcode::kRcpf: { + dest = b.createBinOp(spv::Op::OpFDiv, float_type_, + b.makeFloatConstant(1.f), sources[0]); + auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, + b.makeFloatConstant(0.f), dest); + } break; + + case AluScalarOpcode::kRcp: { // dest = src0 != 0.0 ? 1.0 / src0 : 0.0; auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0], b.makeFloatConstant(0.f)); @@ -1659,9 +1695,21 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kRsqc: { + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + spv::GLSLstd450::kInverseSqrt, + {sources[0]}); + dest = CreateGlslStd450InstructionCall( + spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp, + {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)}); } break; case AluScalarOpcode::kRsqf: { + dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, + spv::GLSLstd450::kInverseSqrt, + {sources[0]}); + auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, + b.makeFloatConstant(0.f), dest); } break; case AluScalarOpcode::kRsq: { @@ -1817,7 +1865,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( } break; case AluScalarOpcode::kSubsPrev: { - dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], ps_); + dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], + b.createLoad(ps_)); } break; case AluScalarOpcode::kTruncs: { From bd27835a3df64cd1a3f7b3b1937c33bdadbee086 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 22 May 2016 20:03:13 -0500 Subject: [PATCH 126/145] Pipeline Cache: Translate shaders when program cntl register is available --- src/xenia/gpu/vulkan/pipeline_cache.cc | 96 +++++++++++++++----------- src/xenia/gpu/vulkan/pipeline_cache.h | 2 + 2 files changed, 59 insertions(+), 39 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 70054f5e2..b790b7cc1 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -157,32 +157,6 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, host_address, dword_count); shader_map_.insert({data_hash, shader}); - // Perform translation. - // If this fails the shader will be marked as invalid and ignored later. - if (!shader_translator_.Translate(shader)) { - XELOGE("Shader translation failed; marking shader as ignored"); - return shader; - } - - // Prepare the shader for use (creates our VkShaderModule). - // It could still fail at this point. - if (!shader->Prepare()) { - XELOGE("Shader preparation failed; marking shader as ignored"); - return shader; - } - - if (shader->is_valid()) { - XELOGGPU("Generated %s shader at 0x%.8X (%db) - hash %.16" PRIX64 ":\n%s\n", - shader_type == ShaderType::kVertex ? "vertex" : "pixel", - guest_address, dword_count * 4, shader->ucode_data_hash(), - shader->ucode_disassembly().c_str()); - } - - // Dump shader files if desired. - if (!FLAGS_dump_shaders.empty()) { - shader->Dump(FLAGS_dump_shaders, "vk"); - } - return shader; } @@ -302,6 +276,37 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, return pipeline; } +bool PipelineCache::TranslateShader(VulkanShader* shader, + xenos::xe_gpu_program_cntl_t cntl) { + // Perform translation. + // If this fails the shader will be marked as invalid and ignored later. + if (!shader_translator_.Translate(shader, cntl)) { + XELOGE("Shader translation failed; marking shader as ignored"); + return false; + } + + // Prepare the shader for use (creates our VkShaderModule). + // It could still fail at this point. + if (!shader->Prepare()) { + XELOGE("Shader preparation failed; marking shader as ignored"); + return false; + } + + if (shader->is_valid()) { + XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n", + shader->type() == ShaderType::kVertex ? "vertex" : "pixel", + shader->ucode_dword_count() * 4, shader->ucode_data_hash(), + shader->ucode_disassembly().c_str()); + } + + // Dump shader files if desired. + if (!FLAGS_dump_shaders.empty()) { + shader->Dump(FLAGS_dump_shaders, "vk"); + } + + return shader->is_valid(); +} + void PipelineCache::DumpShaderDisasmNV( const VkGraphicsPipelineCreateInfo& pipeline_info) { // !! HACK !!: This only works on NVidia drivers. Dumps shader disasm. @@ -510,8 +515,6 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, break; } - // window_width_scalar = window_height_scalar = 1; - // Whether each of the viewport settings are enabled. // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0; @@ -525,10 +528,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, vport_yoffset_enable == vport_zoffset_enable); VkViewport viewport_rect; - viewport_rect.x = 0; - viewport_rect.y = 0; - viewport_rect.width = 100; - viewport_rect.height = 100; + std::memset(&viewport_rect, 0, sizeof(VkViewport)); viewport_rect.minDepth = 0; viewport_rect.maxDepth = 1; @@ -655,7 +655,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, push_constants.vtx_fmt[3] = vtx_w0_fmt; // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE - // Deprecated in Vulkan, implemented in shader. + // Emulated in shader. // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; // ALPHATESTENABLE push_constants.alpha_test[0] = @@ -754,6 +754,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( bool dirty = false; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); dirty |= regs.vertex_shader != vertex_shader; dirty |= regs.pixel_shader != pixel_shader; dirty |= regs.primitive_type != primitive_type; @@ -765,6 +766,21 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( return UpdateStatus::kCompatible; } + xenos::xe_gpu_program_cntl_t sq_program_cntl; + sq_program_cntl.dword_0 = regs.sq_program_cntl; + + if (!vertex_shader->is_translated() && + !TranslateShader(vertex_shader, sq_program_cntl)) { + XELOGE("Failed to translate the vertex shader!"); + return UpdateStatus::kError; + } + + if (!pixel_shader->is_translated() && + !TranslateShader(pixel_shader, sq_program_cntl)) { + XELOGE("Failed to translate the pixel shader!"); + return UpdateStatus::kError; + } + update_shader_stages_stage_count_ = 0; auto& vertex_pipeline_stage = @@ -868,6 +884,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( case VertexFormat::k_11_11_10: // Converted in-shader. // TODO(DrChat) + assert_always(); // vertex_attrib_descr.format = VK_FORMAT_R32_UINT; vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; break; @@ -901,19 +918,19 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( is_signed ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32_UINT; break; case VertexFormat::k_32_FLOAT: - assert_true(is_signed); + // assert_true(is_signed); vertex_attrib_descr.format = VK_FORMAT_R32_SFLOAT; break; case VertexFormat::k_32_32_FLOAT: - assert_true(is_signed); + // assert_true(is_signed); vertex_attrib_descr.format = VK_FORMAT_R32G32_SFLOAT; break; case VertexFormat::k_32_32_32_FLOAT: - assert_true(is_signed); + // assert_true(is_signed); vertex_attrib_descr.format = VK_FORMAT_R32G32B32_SFLOAT; break; case VertexFormat::k_32_32_32_32_FLOAT: - assert_true(is_signed); + // assert_true(is_signed); vertex_attrib_descr.format = VK_FORMAT_R32G32B32A32_SFLOAT; break; default: @@ -1060,8 +1077,9 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( // Discard rasterizer output in depth-only mode. // TODO(DrChat): Figure out how to make this work properly. auto enable_mode = static_cast(regs.rb_modecontrol & 0x7); - state_info.rasterizerDiscardEnable = - enable_mode == xenos::ModeControl::kColorDepth ? VK_FALSE : VK_TRUE; + state_info.rasterizerDiscardEnable = VK_FALSE; + // state_info.rasterizerDiscardEnable = + // enable_mode == xenos::ModeControl::kColorDepth ? VK_FALSE : VK_TRUE; // KILL_PIX_POST_EARLY_Z if (regs.pa_sc_viz_query & 0x80) { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index e5645f638..c2335028f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -75,6 +75,7 @@ class PipelineCache { // state. VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); + bool TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl); void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info); // Gets a geometry shader used to emulate the given primitive type. @@ -157,6 +158,7 @@ class PipelineCache { struct UpdateShaderStagesRegisters { PrimitiveType primitive_type; uint32_t pa_su_sc_mode_cntl; + uint32_t sq_program_cntl; VulkanShader* vertex_shader; VulkanShader* pixel_shader; From 7004f836657773c1a34a2d9adff03992225459d3 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 22 May 2016 20:05:47 -0500 Subject: [PATCH 127/145] CP: Don't check for shader validity here Fix a lousy typo in PrepareTextureSet --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 17f83f82c..d29c6e8bf 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -95,7 +95,7 @@ void VulkanCommandProcessor::ShutdownContext() { // Free all pools. This must come after all of our caches clean up. command_buffer_pool_.reset(); - // Release queue, if were using an acquired one. + // Release queue, if we were using an acquired one. if (!queue_mutex_) { device_->ReleaseQueue(queue_); queue_ = nullptr; @@ -185,6 +185,7 @@ void VulkanCommandProcessor::CreateSwapImages(VkCommandBuffer setup_buffer, // Transition both images to general layout. VkImageMemoryBarrier barrier; std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.srcAccessMask = 0; barrier.dstAccessMask = 0; barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -268,6 +269,7 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, // Insert a barrier so the GPU finishes writing to the image. VkImageMemoryBarrier barrier; std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; @@ -315,10 +317,10 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, current_render_state_ = nullptr; } - status = vkEndCommandBuffer(current_command_buffer_); - CheckResult(status, "vkEndCommandBuffer"); status = vkEndCommandBuffer(current_setup_buffer_); CheckResult(status, "vkEndCommandBuffer"); + status = vkEndCommandBuffer(current_command_buffer_); + CheckResult(status, "vkEndCommandBuffer"); // TODO(DrChat): If the setup buffer is empty, don't bother queueing it up. submit_buffers.push_back(current_setup_buffer_); @@ -417,7 +419,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // We need them to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); auto pixel_shader = static_cast(active_pixel_shader()); - if (!vertex_shader || !vertex_shader->is_valid()) { + if (!vertex_shader) { // Always need a vertex shader. return true; } @@ -426,7 +428,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // Use a dummy pixel shader when required. // TODO(benvanik): dummy pixel shader. assert_not_null(pixel_shader); - } else if (!pixel_shader || !pixel_shader->is_valid()) { + } else if (!pixel_shader) { // Need a pixel shader in normal color mode. return true; } @@ -742,7 +744,7 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, #endif // FINE_GRAINED_DRAW_SCOPES auto descriptor_set = texture_cache_->PrepareTextureSet( - command_buffer, current_batch_fence_, vertex_shader->texture_bindings(), + setup_buffer, current_batch_fence_, vertex_shader->texture_bindings(), pixel_shader->texture_bindings()); if (!descriptor_set) { // Unable to bind set. From c6e905db2fb1736a55fca6996246d49ce4820c12 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 22 May 2016 22:14:45 -0500 Subject: [PATCH 128/145] Fix a memory leak in fenced pools. --- src/xenia/ui/vulkan/fenced_pools.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h index a481edf10..d62ad7452 100644 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -158,6 +158,7 @@ class BaseFencedPool { } if (pending_batch_list_tail_) { pending_batch_list_tail_->next = batch; + pending_batch_list_tail_ = batch; } else { pending_batch_list_tail_ = batch; } From 6e21d882501fef826d0f96fb993b449e150d7221 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Mon, 23 May 2016 09:58:46 -0500 Subject: [PATCH 129/145] Fixup circular buffers for full rotation --- src/xenia/ui/vulkan/circular_buffer.cc | 42 ++++++-------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc index 404f7a503..883e4d98d 100644 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -103,23 +103,22 @@ bool CircularBuffer::CanAcquire(VkDeviceSize length) { length = xe::round_up(length, alignment_); if (allocations_.empty()) { // Read head has caught up to write head (entire buffer available for write) - assert(read_head_ == write_head_); - return capacity_ > length; + return capacity_ >= length; } else if (write_head_ < read_head_) { // Write head wrapped around and is behind read head. // | write |---- read ----| - return (read_head_ - write_head_) > length; - } else { + return (read_head_ - write_head_) >= length; + } else if (write_head_ > read_head_) { // Read head behind write head. // 1. Check if there's enough room from write -> capacity // | |---- read ----| write | - if ((capacity_ - write_head_) > length) { + if ((capacity_ - write_head_) >= length) { return true; } // 2. Check if there's enough room from 0 -> read // | write |---- read ----| | - if ((read_head_) > length) { + if ((read_head_ - 0) >= length) { return true; } } @@ -129,29 +128,13 @@ bool CircularBuffer::CanAcquire(VkDeviceSize length) { CircularBuffer::Allocation* CircularBuffer::Acquire( VkDeviceSize length, std::shared_ptr fence) { - if (!CanAcquire(length)) { + VkDeviceSize aligned_length = xe::round_up(length, alignment_); + if (!CanAcquire(aligned_length)) { return nullptr; } - VkDeviceSize aligned_length = xe::round_up(length, alignment_); assert_true(write_head_ % alignment_ == 0); - if (allocations_.empty()) { - // Entire buffer available. - assert(read_head_ == write_head_); - assert(capacity_ > aligned_length); - - write_head_ = aligned_length; - - auto alloc = new Allocation(); - alloc->host_ptr = host_base_ + 0; - alloc->gpu_memory = gpu_memory_; - alloc->offset = gpu_base_ + 0; - alloc->length = length; - alloc->aligned_length = aligned_length; - alloc->fence = fence; - allocations_.push_back(alloc); - return alloc; - } else if (write_head_ < read_head_) { + if (write_head_ < read_head_) { // Write head behind read head. assert_true(read_head_ - write_head_ >= aligned_length); @@ -167,7 +150,7 @@ CircularBuffer::Allocation* CircularBuffer::Acquire( return alloc; } else { - // Write head after read head + // Write head equal to/after read head if (capacity_ - write_head_ >= aligned_length) { // Free space from write -> capacity auto alloc = new Allocation(); @@ -181,7 +164,7 @@ CircularBuffer::Allocation* CircularBuffer::Acquire( allocations_.push_back(alloc); return alloc; - } else if ((read_head_ - 0) > aligned_length) { + } else if ((read_head_ - 0) >= aligned_length) { // Free space from begin -> read auto alloc = new Allocation(); alloc->host_ptr = host_base_ + write_head_; @@ -236,11 +219,6 @@ void CircularBuffer::Scavenge() { delete *it; it = allocations_.erase(it); } - - if (allocations_.empty()) { - // Reset R/W heads. - read_head_ = write_head_ = 0; - } } } // namespace vulkan From 729152a58ba116fc7669275f8486ca9f2a2ee81d Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Mon, 23 May 2016 09:59:37 -0500 Subject: [PATCH 130/145] VK: Enable independentBlend feature --- src/xenia/ui/vulkan/vulkan_device.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc index 42077ca82..0b3a6c2ff 100644 --- a/src/xenia/ui/vulkan/vulkan_device.cc +++ b/src/xenia/ui/vulkan/vulkan_device.cc @@ -95,6 +95,7 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) { ENABLE_AND_EXPECT(depthClamp); ENABLE_AND_EXPECT(alphaToOne); ENABLE_AND_EXPECT(multiViewport); + ENABLE_AND_EXPECT(independentBlend); // TODO(benvanik): add other features. if (any_features_missing) { XELOGE( From 5f764730ae8d6dcc2012ac38313a7c3edb2b6a0c Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Mon, 23 May 2016 13:16:13 -0500 Subject: [PATCH 131/145] Vulkan CP: Override frame traces if renderdoc is attached --- src/xenia/gpu/command_processor.h | 6 ++--- .../gpu/vulkan/vulkan_command_processor.cc | 23 ++++++++++++------- .../gpu/vulkan/vulkan_command_processor.h | 2 ++ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index f2fbb6c54..c2784480b 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -84,9 +84,9 @@ class CommandProcessor { swap_request_handler_ = fn; } - void RequestFrameTrace(const std::wstring& root_path); - void BeginTracing(const std::wstring& root_path); - void EndTracing(); + virtual void RequestFrameTrace(const std::wstring& root_path); + virtual void BeginTracing(const std::wstring& root_path); + virtual void EndTracing(); void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index d29c6e8bf..a213ea6e5 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -37,9 +37,22 @@ VulkanCommandProcessor::VulkanCommandProcessor( VulkanCommandProcessor::~VulkanCommandProcessor() = default; +void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) { + // Override traces if renderdoc is attached. + if (device_->is_renderdoc_attached()) { + trace_requested_ = true; + return; + } + + return CommandProcessor::RequestFrameTrace(root_path); +} + void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); + auto status = vkQueueWaitIdle(queue_); + CheckResult(status, "vkQueueWaitIdle"); + buffer_cache_->ClearCache(); pipeline_cache_->ClearCache(); render_cache_->ClearCache(); @@ -349,12 +362,6 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, if (device_->is_renderdoc_attached() && capturing_) { device_->EndRenderDocFrameCapture(); capturing_ = false; - - // HACK(DrChat): Used b/c I disabled trace saving code in the CP. - // Remove later. - if (!trace_writer_.is_open()) { - trace_state_ = TraceState::kDisabled; - } } if (queue_mutex_) { queue_mutex_->unlock(); @@ -459,13 +466,13 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, static uint32_t frame = 0; if (device_->is_renderdoc_attached() && !capturing_ && - (FLAGS_vulkan_renderdoc_capture_all || - trace_state_ == TraceState::kSingleFrame)) { + (FLAGS_vulkan_renderdoc_capture_all || trace_requested_)) { if (queue_mutex_) { queue_mutex_->lock(); } capturing_ = true; + trace_requested_ = false; device_->BeginRenderDocFrameCapture(); if (queue_mutex_) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 4a7788e09..f58e2319b 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -49,6 +49,7 @@ class VulkanCommandProcessor : public CommandProcessor { kernel::KernelState* kernel_state); ~VulkanCommandProcessor() override; + virtual void RequestFrameTrace(const std::wstring& root_path) override; void ClearCaches() override; RenderCache* render_cache() { return render_cache_.get(); } @@ -103,6 +104,7 @@ class VulkanCommandProcessor : public CommandProcessor { // Last copy base address, for debugging only. uint32_t last_copy_base_ = 0; bool capturing_ = false; + bool trace_requested_ = false; std::unique_ptr buffer_cache_; std::unique_ptr pipeline_cache_; From ad83a1994dd5e0e37c6166761db1f21cb2cee7ab Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 24 May 2016 12:53:25 -0500 Subject: [PATCH 132/145] VK: Remove alphaToOne feature requirement --- src/xenia/ui/vulkan/vulkan_device.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc index 0b3a6c2ff..7b1dc7f8d 100644 --- a/src/xenia/ui/vulkan/vulkan_device.cc +++ b/src/xenia/ui/vulkan/vulkan_device.cc @@ -93,7 +93,6 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) { } ENABLE_AND_EXPECT(geometryShader); ENABLE_AND_EXPECT(depthClamp); - ENABLE_AND_EXPECT(alphaToOne); ENABLE_AND_EXPECT(multiViewport); ENABLE_AND_EXPECT(independentBlend); // TODO(benvanik): add other features. From 9d1e66ab16ac4cff94027c549fb57e3ebb706dc3 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Tue, 24 May 2016 21:58:02 -0500 Subject: [PATCH 133/145] Don't write to color targets in depth-only mode. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 16 ++++++---------- src/xenia/gpu/vulkan/pipeline_cache.h | 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index b790b7cc1..eecad03d9 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -388,7 +388,7 @@ void PipelineCache::DumpShaderDisasmNV( disasm_fp = std::string("Shader disassembly not available."); } - XELOGI("%s\n=====================================\n%s", disasm_vp.c_str(), + XELOGI("%s\n=====================================\n%s\n", disasm_vp.c_str(), disasm_fp.c_str()); } @@ -1060,7 +1060,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( dirty |= SetShadowRegister(®s.pa_sc_viz_query, XE_GPU_REG_PA_SC_VIZ_QUERY); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); regs.primitive_type = primitive_type; XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { @@ -1074,13 +1073,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( // TODO(benvanik): right setting? state_info.depthClampEnable = VK_FALSE; - // Discard rasterizer output in depth-only mode. - // TODO(DrChat): Figure out how to make this work properly. - auto enable_mode = static_cast(regs.rb_modecontrol & 0x7); - state_info.rasterizerDiscardEnable = VK_FALSE; - // state_info.rasterizerDiscardEnable = - // enable_mode == xenos::ModeControl::kColorDepth ? VK_FALSE : VK_TRUE; - // KILL_PIX_POST_EARLY_Z if (regs.pa_sc_viz_query & 0x80) { state_info.rasterizerDiscardEnable = VK_TRUE; @@ -1298,6 +1290,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); dirty |= SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; @@ -1310,6 +1303,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { state_info.logicOpEnable = VK_FALSE; state_info.logicOp = VK_LOGIC_OP_NO_OP; + auto enable_mode = static_cast(regs.rb_modecontrol & 0x7); + static const VkBlendFactor kBlendFactorMap[] = { /* 0 */ VK_BLEND_FACTOR_ZERO, /* 1 */ VK_BLEND_FACTOR_ONE, @@ -1362,7 +1357,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE // Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc.. uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; - attachment_state.colorWriteMask = write_mask; + attachment_state.colorWriteMask = + enable_mode == xenos::ModeControl::kColorDepth ? write_mask : 0; } state_info.attachmentCount = 4; diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index c2335028f..49144f50f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -216,7 +216,6 @@ class PipelineCache { uint32_t pa_sc_screen_scissor_br; uint32_t pa_sc_viz_query; uint32_t multi_prim_ib_reset_index; - uint32_t rb_modecontrol; UpdateRasterizationStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } @@ -246,6 +245,7 @@ class PipelineCache { uint32_t rb_colorcontrol; uint32_t rb_color_mask; uint32_t rb_blendcontrol[4]; + uint32_t rb_modecontrol; UpdateColorBlendStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } From a187a4931a966ffbb80a23c63ff39c43f15264b3 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 25 May 2016 13:14:03 -0500 Subject: [PATCH 134/145] Whoops - fix setting the wrong host base for some graphics allocations. --- src/xenia/ui/vulkan/circular_buffer.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc index 883e4d98d..94d2996ce 100644 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -103,6 +103,7 @@ bool CircularBuffer::CanAcquire(VkDeviceSize length) { length = xe::round_up(length, alignment_); if (allocations_.empty()) { // Read head has caught up to write head (entire buffer available for write) + assert_true(read_head_ == write_head_); return capacity_ >= length; } else if (write_head_ < read_head_) { // Write head wrapped around and is behind read head. @@ -167,7 +168,7 @@ CircularBuffer::Allocation* CircularBuffer::Acquire( } else if ((read_head_ - 0) >= aligned_length) { // Free space from begin -> read auto alloc = new Allocation(); - alloc->host_ptr = host_base_ + write_head_; + alloc->host_ptr = host_base_ + 0; alloc->gpu_memory = gpu_memory_; alloc->offset = gpu_base_ + 0; alloc->length = length; From 008167fa6670d0a4851ca8e11483342446d63bcd Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 25 May 2016 13:49:36 -0500 Subject: [PATCH 135/145] VK: Enable full rotation / Set render_state to nullptr on failure / Fix format info in texture resolves --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index a213ea6e5..9c8e268a5 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -370,10 +370,6 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, command_buffer_pool_->EndBatch(current_batch_fence_); - // TODO(DrChat): Remove this. - VkFence fences[] = {*current_batch_fence_}; - vkWaitForFences(*device_, 1, fences, true, -1); - // Scavenging. { #if FINE_GRAINED_DRAW_SCOPES @@ -525,6 +521,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_command_buffer_ = nullptr; current_setup_buffer_ = nullptr; current_batch_fence_ = nullptr; + current_render_state_ = nullptr; return false; } pipeline_cache_->SetDynamicState(command_buffer, started_command_buffer); @@ -536,6 +533,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_command_buffer_ = nullptr; current_setup_buffer_ = nullptr; current_batch_fence_ = nullptr; + current_render_state_ = nullptr; return false; } @@ -546,6 +544,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_command_buffer_ = nullptr; current_setup_buffer_ = nullptr; current_batch_fence_ = nullptr; + current_render_state_ = nullptr; return false; } @@ -556,6 +555,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_command_buffer_ = nullptr; current_setup_buffer_ = nullptr; current_batch_fence_ = nullptr; + current_render_state_ = nullptr; return false; } @@ -569,6 +569,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_command_buffer_ = nullptr; current_setup_buffer_ = nullptr; current_batch_fence_ = nullptr; + current_render_state_ = nullptr; return false; } @@ -924,6 +925,8 @@ bool VulkanCommandProcessor::IssueCopy() { tex_info.height = dest_logical_height - 1; tex_info.dimension = gpu::Dimension::k2D; tex_info.input_length = copy_dest_pitch * copy_dest_height * 4; + tex_info.format_info = + FormatInfo::Get(uint32_t(ColorFormatToTextureFormat(copy_dest_format))); tex_info.size_2d.logical_width = dest_logical_width; tex_info.size_2d.logical_height = dest_logical_height; tex_info.size_2d.block_width = dest_block_width; From 861141721992b5c54b1a6ce5510020066cc83e29 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 25 May 2016 17:45:38 -0500 Subject: [PATCH 136/145] Initialize rasterDiscardEnable to VK_FALSE --- src/xenia/gpu/vulkan/pipeline_cache.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index eecad03d9..e80cb4675 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -1072,6 +1072,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( // TODO(benvanik): right setting? state_info.depthClampEnable = VK_FALSE; + state_info.rasterizerDiscardEnable = VK_FALSE; // KILL_PIX_POST_EARLY_Z if (regs.pa_sc_viz_query & 0x80) { From c85756981b381308b745e849a40325d27738eb32 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Wed, 25 May 2016 19:49:56 -0500 Subject: [PATCH 137/145] TextureCache: Fix a few null pointer bugs Ordering of in-flight descriptor sets Change staging buffer size Free all samplers on exit --- src/xenia/gpu/vulkan/texture_cache.cc | 82 ++++++++++++++------------- src/xenia/gpu/vulkan/texture_cache.h | 4 +- 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index ee82cb74a..0108f6100 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -25,6 +25,7 @@ namespace vulkan { using xe::ui::vulkan::CheckResult; constexpr uint32_t kMaxTextureSamplers = 32; +constexpr VkDeviceSize kStagingBufferSize = 64 * 1024 * 1024; struct TextureConfig { TextureFormat guest_format; @@ -85,9 +86,9 @@ static const TextureConfig texture_configs[64] = { // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf {TextureFormat::k_DXN, VK_FORMAT_BC5_UNORM_BLOCK}, // ? {TextureFormat::k_8_8_8_8_AS_16_16_16_16, VK_FORMAT_R8G8B8A8_UNORM}, - {TextureFormat::k_DXT1_AS_16_16_16_16, VK_FORMAT_BC1_RGB_SRGB_BLOCK}, - {TextureFormat::k_DXT2_3_AS_16_16_16_16, VK_FORMAT_BC2_SRGB_BLOCK}, - {TextureFormat::k_DXT4_5_AS_16_16_16_16, VK_FORMAT_BC3_SRGB_BLOCK}, + {TextureFormat::k_DXT1_AS_16_16_16_16, VK_FORMAT_BC1_RGB_UNORM_BLOCK}, + {TextureFormat::k_DXT2_3_AS_16_16_16_16, VK_FORMAT_BC2_UNORM_BLOCK}, + {TextureFormat::k_DXT4_5_AS_16_16_16_16, VK_FORMAT_BC3_UNORM_BLOCK}, {TextureFormat::k_2_10_10_10_AS_16_16_16_16, VK_FORMAT_A2R10G10B10_UNORM_PACK32}, {TextureFormat::k_10_11_11_AS_16_16_16_16, @@ -151,28 +152,23 @@ TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, nullptr, &texture_descriptor_set_layout_); CheckResult(err, "vkCreateDescriptorSetLayout"); - int width = 4096; - int height = 4096; - if (!staging_buffer_.Initialize(width * height * 4, + if (!staging_buffer_.Initialize(kStagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) { assert_always(); } - // Upload a grid into the staging buffer. - auto gpu_data = reinterpret_cast(staging_buffer_.host_base()); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - gpu_data[y * width + x] = - ((y % 32 < 16) ^ (x % 32 >= 16)) ? 0xFF0000FF : 0xFFFFFFFF; - } - } - invalidated_textures_sets_[0].reserve(64); invalidated_textures_sets_[1].reserve(64); invalidated_textures_ = &invalidated_textures_sets_[0]; } TextureCache::~TextureCache() { + for (auto it = samplers_.begin(); it != samplers_.end(); ++it) { + vkDestroySampler(*device_, it->second->sampler, nullptr); + delete it->second; + } + samplers_.clear(); + vkDestroyDescriptorSetLayout(*device_, texture_descriptor_set_layout_, nullptr); vkDestroyDescriptorPool(*device_, descriptor_pool_, nullptr); @@ -202,15 +198,11 @@ TextureCache::Texture* TextureCache::AllocateTexture( return nullptr; } - VkFormat format = VK_FORMAT_UNDEFINED; - if (texture_info.format_info) { - auto& config = texture_configs[int(texture_info.format_info->format)]; - format = config.host_format != VK_FORMAT_UNDEFINED - ? config.host_format - : VK_FORMAT_R8G8B8A8_UNORM; - } else { - format = VK_FORMAT_R8G8B8A8_UNORM; - } + assert_not_null(texture_info.format_info); + auto& config = texture_configs[int(texture_info.format_info->format)]; + VkFormat format = config.host_format != VK_FORMAT_UNDEFINED + ? config.host_format + : VK_FORMAT_R8G8B8A8_UNORM; VkFormatProperties props; uint32_t required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | @@ -298,7 +290,8 @@ TextureCache::Texture* TextureCache::AllocateTexture( } bool TextureCache::FreeTexture(Texture* texture) { - if (texture->in_flight_fence->status() != VK_SUCCESS) { + if (texture->in_flight_fence && + texture->in_flight_fence->status() != VK_SUCCESS) { // Texture still in flight. return false; } @@ -388,7 +381,10 @@ TextureCache::Texture* TextureCache::Demand( texture->is_full_texture = true; texture->texture_info = texture_info; - memory_->CancelAccessWatch(texture->access_watch_handle); + if (texture->access_watch_handle) { + memory_->CancelAccessWatch(texture->access_watch_handle); + } + texture->access_watch_handle = memory_->AddPhysicalAccessWatch( texture_info.guest_address, texture_info.input_length, cpu::MMIOHandler::kWatchWrite, @@ -443,7 +439,6 @@ TextureCache::Texture* TextureCache::Demand( } if (!uploaded) { - // TODO: Destroy the texture. FreeTexture(texture); return nullptr; } @@ -777,7 +772,10 @@ bool TextureCache::UploadTexture2D( VkCommandBuffer command_buffer, std::shared_ptr completion_fence, Texture* dest, TextureInfo src) { +#if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + assert_true(src.dimension == Dimension::k2D); if (!staging_buffer_.CanAcquire(src.input_length)) { @@ -959,6 +957,10 @@ VkDescriptorSet TextureCache::PrepareTextureSet( vkAllocateDescriptorSets(*device_, &set_alloc_info, &descriptor_set); CheckResult(err, "vkAllocateDescriptorSets"); + if (err != VK_SUCCESS) { + return nullptr; + } + // Write all updated descriptors. // TODO(benvanik): optimize? split into multiple sets? set per type? // First: Reorganize and pool image update infos. @@ -1029,7 +1031,7 @@ VkDescriptorSet TextureCache::PrepareTextureSet( descriptor_writes.data(), 0, nullptr); } - in_flight_sets_[descriptor_set] = completion_fence; + in_flight_sets_.push_back({descriptor_set, completion_fence}); return descriptor_set; } @@ -1056,6 +1058,10 @@ bool TextureCache::SetupTextureBinding( VkCommandBuffer command_buffer, std::shared_ptr completion_fence, UpdateSetInfo* update_set_info, const Shader::TextureBinding& binding) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + auto& regs = *register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; auto group = @@ -1106,7 +1112,7 @@ bool TextureCache::SetupTextureBinding( } void TextureCache::ClearCache() { - // TODO(benvanik): caching. + // TODO(DrChat): Nuke everything. } void TextureCache::Scavenge() { @@ -1119,7 +1125,9 @@ void TextureCache::Scavenge() { continue; } - ++it; + // We've encountered an item that hasn't been used yet, so any items + // afterwards are guaranteed to be unused. + break; } staging_buffer_.Scavenge(); @@ -1148,25 +1156,21 @@ void TextureCache::Scavenge() { if (!invalidated_textures.empty()) { for (auto it = invalidated_textures.begin(); it != invalidated_textures.end(); ++it) { - if (!FreeTexture(*it)) { - // Texture wasn't deleted because it's still in use. - pending_delete_textures_.push_back(*it); - } - + pending_delete_textures_.push_back(*it); textures_.erase((*it)->texture_info.hash()); } invalidated_textures.clear(); } + // Invalidated resolve textures. invalidated_resolve_textures_mutex_.lock(); if (!invalidated_resolve_textures_.empty()) { for (auto it = invalidated_resolve_textures_.begin(); it != invalidated_resolve_textures_.end(); ++it) { - if (!FreeTexture(*it)) { - // Texture wasn't deleted because it's still in use. - pending_delete_textures_.push_back(*it); - } + pending_delete_textures_.push_back(*it); + resolve_textures_.erase( + std::find(resolve_textures_.begin(), resolve_textures_.end(), *it)); } invalidated_resolve_textures_.clear(); diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index a78be6ed6..8f47f33df 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -171,14 +171,14 @@ class TextureCache { VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; - std::unordered_map> + std::list>> in_flight_sets_; ui::vulkan::CircularBuffer staging_buffer_; std::unordered_map textures_; std::unordered_map samplers_; std::vector resolve_textures_; - std::vector pending_delete_textures_; + std::list pending_delete_textures_; std::mutex invalidated_textures_mutex_; std::vector* invalidated_textures_; From f8d9472872027a62417f81ea359d030744b66618 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 26 May 2016 14:46:18 -0500 Subject: [PATCH 138/145] TextureCache: Fix trying to erase a resolve texture that isn't in the resolve textures list. --- src/xenia/gpu/vulkan/texture_cache.cc | 30 ++++++--------------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 0108f6100..a6f6dab17 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -791,9 +791,6 @@ bool TextureCache::UploadTexture2D( auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence); assert_not_null(alloc); - // TODO: Support compression. - // assert_false(src.is_compressed()); - // Upload texture into GPU memory. // TODO: If the GPU supports it, we can submit a compute batch to convert the // texture and copy it to its destination. Otherwise, fallback to conversion @@ -856,25 +853,6 @@ bool TextureCache::UploadTexture2D( staging_buffer_.Flush(alloc); - // Insert a memory barrier into the command buffer to ensure the upload has - // finished before we copy it into the destination texture. - /* - VkBufferMemoryBarrier upload_barrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - NULL, - VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_TRANSFER_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - staging_buffer_.gpu_buffer(), - alloc->offset, - alloc->aligned_length, - }; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &upload_barrier, 0, nullptr); - //*/ - // Transition the texture into a transfer destination layout. VkImageMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -1169,8 +1147,12 @@ void TextureCache::Scavenge() { for (auto it = invalidated_resolve_textures_.begin(); it != invalidated_resolve_textures_.end(); ++it) { pending_delete_textures_.push_back(*it); - resolve_textures_.erase( - std::find(resolve_textures_.begin(), resolve_textures_.end(), *it)); + + auto tex = + std::find(resolve_textures_.begin(), resolve_textures_.end(), *it); + if (tex != resolve_textures_.end()) { + resolve_textures_.erase(tex); + } } invalidated_resolve_textures_.clear(); From fd37112db84c7b639d06d56f9cfccad4044236df Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 28 May 2016 10:25:18 -0500 Subject: [PATCH 139/145] VK Immediate Drawer: Properly transition texture layouts Support wrapping of textures not created here --- .../ui/vulkan/vulkan_immediate_drawer.cc | 97 +++++++++++++++++-- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 4 + 2 files changed, 92 insertions(+), 9 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index a68b44c5f..49b0cbc4d 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -136,6 +136,46 @@ class LightweightCircularBuffer { class VulkanImmediateTexture : public ImmediateTexture { public: + VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool, + VkDescriptorSetLayout descriptor_set_layout, + VkImageView image_view, VkSampler sampler, + uint32_t width, uint32_t height) + : ImmediateTexture(width, height), + device_(*device), + descriptor_pool_(descriptor_pool), + image_view_(image_view), + sampler_(sampler) { + handle = reinterpret_cast(this); + + // Create descriptor set used just for this texture. + // It never changes, so we can reuse it and not worry with updates. + VkDescriptorSetAllocateInfo set_alloc_info; + set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_alloc_info.pNext = nullptr; + set_alloc_info.descriptorPool = descriptor_pool_; + set_alloc_info.descriptorSetCount = 1; + set_alloc_info.pSetLayouts = &descriptor_set_layout; + auto err = + vkAllocateDescriptorSets(device_, &set_alloc_info, &descriptor_set_); + CheckResult(err, "vkAllocateDescriptorSets"); + + // Initialize descriptor with our texture. + VkDescriptorImageInfo texture_info; + texture_info.sampler = sampler_; + texture_info.imageView = image_view_; + texture_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + VkWriteDescriptorSet descriptor_write; + descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_write.pNext = nullptr; + descriptor_write.dstSet = descriptor_set_; + descriptor_write.dstBinding = 0; + descriptor_write.dstArrayElement = 0; + descriptor_write.descriptorCount = 1; + descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_write.pImageInfo = &texture_info; + vkUpdateDescriptorSets(device_, 1, &descriptor_write, 0, nullptr); + } + VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool, VkDescriptorSetLayout descriptor_set_layout, VkSampler sampler, uint32_t width, uint32_t height) @@ -161,7 +201,7 @@ class VulkanImmediateTexture : public ImmediateTexture { image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.queueFamilyIndexCount = 0; image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; auto err = vkCreateImage(device_, &image_info, nullptr, &image_); CheckResult(err, "vkCreateImage"); @@ -221,9 +261,12 @@ class VulkanImmediateTexture : public ImmediateTexture { ~VulkanImmediateTexture() override { vkFreeDescriptorSets(device_, descriptor_pool_, 1, &descriptor_set_); - vkDestroyImageView(device_, image_view_, nullptr); - vkDestroyImage(device_, image_, nullptr); - vkFreeMemory(device_, device_memory_, nullptr); + + if (device_memory_) { + vkDestroyImageView(device_, image_view_, nullptr); + vkDestroyImage(device_, image_, nullptr); + vkFreeMemory(device_, device_memory_, nullptr); + } } void Upload(const uint8_t* src_data) { @@ -238,25 +281,49 @@ class VulkanImmediateTexture : public ImmediateTexture { vkGetImageSubresourceLayout(device_, image_, &subresource, &layout); // Map memory for upload. - void* gpu_data = nullptr; - auto err = - vkMapMemory(device_, device_memory_, 0, layout.size, 0, &gpu_data); + uint8_t* gpu_data = nullptr; + auto err = vkMapMemory(device_, device_memory_, 0, layout.size, 0, + reinterpret_cast(&gpu_data)); CheckResult(err, "vkMapMemory"); // Copy the entire texture, hoping its layout matches what we expect. - std::memcpy(gpu_data, src_data, layout.size); + std::memcpy(gpu_data + layout.offset, src_data, layout.size); vkUnmapMemory(device_, device_memory_); } + // Queues a command to transition this texture to a new layout. This assumes + // the command buffer WILL be queued and executed by the device. + void TransitionLayout(VkCommandBuffer command_buffer, + VkImageLayout new_layout) { + VkImageMemoryBarrier image_barrier; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = 0; + image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + image_barrier.newLayout = new_layout; + image_barrier.image = image_; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_layout_ = new_layout; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); + } + VkDescriptorSet descriptor_set() const { return descriptor_set_; } + VkImageLayout layout() const { return image_layout_; } private: VkDevice device_ = nullptr; VkDescriptorPool descriptor_pool_ = nullptr; VkSampler sampler_ = nullptr; // Not owned. VkImage image_ = nullptr; - VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; + VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_PREINITIALIZED; VkDeviceMemory device_memory_ = nullptr; VkImageView image_view_ = nullptr; VkDescriptorSet descriptor_set_ = nullptr; @@ -604,6 +671,14 @@ std::unique_ptr VulkanImmediateDrawer::CreateTexture( return std::unique_ptr(texture.release()); } +std::unique_ptr VulkanImmediateDrawer::WrapTexture( + VkImageView image_view, VkSampler sampler, uint32_t width, + uint32_t height) { + return std::make_unique( + context_->device(), descriptor_pool_, texture_set_layout_, image_view, + sampler, width, height); +} + void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture, const uint8_t* data) { static_cast(texture)->Upload(data); @@ -686,6 +761,10 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { // Setup texture binding. auto texture = reinterpret_cast(draw.texture_handle); if (texture) { + if (texture->layout() != VK_IMAGE_LAYOUT_GENERAL) { + texture->TransitionLayout(current_cmd_buffer_, VK_IMAGE_LAYOUT_GENERAL); + } + auto texture_set = texture->descriptor_set(); vkCmdBindDescriptorSets(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index d14a6eb7c..1db47f0d8 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -32,6 +32,10 @@ class VulkanImmediateDrawer : public ImmediateDrawer { ImmediateTextureFilter filter, bool repeat, const uint8_t* data) override; + std::unique_ptr WrapTexture(VkImageView image_view, + VkSampler sampler, + uint32_t width, + uint32_t height); void UpdateTexture(ImmediateTexture* texture, const uint8_t* data) override; void Begin(int render_target_width, int render_target_height) override; From 6dab81d0cd18b9caf1dde266fc5e8b82793595bb Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 3 Jun 2016 13:10:57 -0500 Subject: [PATCH 140/145] Find the Vulkan SDK from %VULKAN_SDK% environmental variable. --- xenia-build | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xenia-build b/xenia-build index 4587374c4..98330b6a5 100755 --- a/xenia-build +++ b/xenia-build @@ -642,8 +642,7 @@ class GenSpirvCommand(Command): print('Generating SPIR-V binaries...') print('') - # TODO(benvanik): actually find vulkan SDK. Env var? etc? - vulkan_sdk_path = 'C:\\VulkanSDK\\1.0.3.1' + vulkan_sdk_path = os.environ['VULKAN_SDK'] vulkan_bin_path = os.path.join(vulkan_sdk_path, 'bin') glslang = os.path.join(vulkan_bin_path, 'glslangValidator') spirv_dis = os.path.join(vulkan_bin_path, 'spirv-dis') From 2a924d2b05b769e52d5fa15df6b0091702b99251 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 3 Jun 2016 20:00:28 -0500 Subject: [PATCH 141/145] Pass vertex buffer endianness into the BufferCache IssueCopy: Actually issue the pipeline barrier to transition the image --- src/xenia/gpu/vulkan/buffer_cache.cc | 14 ++--- src/xenia/gpu/vulkan/buffer_cache.h | 2 +- .../gpu/vulkan/vulkan_command_processor.cc | 53 ++++++++++--------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 4ae98c864..02bd88a83 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -22,9 +22,6 @@ namespace vulkan { using xe::ui::vulkan::CheckResult; -// Space kept between tail and head when wrapping. -constexpr VkDeviceSize kDeadZone = 4 * 1024; - constexpr VkDeviceSize kConstantRegisterUniformRange = 512 * 4 * 4 + 8 * 4 + 32 * 4; @@ -250,7 +247,7 @@ std::pair BufferCache::UploadIndexBuffer( } std::pair BufferCache::UploadVertexBuffer( - const void* source_ptr, size_t source_length, + const void* source_ptr, size_t source_length, Endian endian, std::shared_ptr fence) { // TODO(benvanik): check cache. @@ -263,9 +260,12 @@ std::pair BufferCache::UploadVertexBuffer( // Copy data into the buffer. // TODO(benvanik): memcpy then use compute shaders to swap? - // Endian::k8in32, swap words. - xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 4); + assert_true(endian == Endian::k8in32); + if (endian == Endian::k8in32) { + // Endian::k8in32, swap words. + xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset, + source_ptr, source_length / 4); + } return {transient_buffer_->gpu_buffer(), offset}; } diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index ee09585b5..8695fc36d 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -67,7 +67,7 @@ class BufferCache { // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). std::pair UploadVertexBuffer( - const void* source_ptr, size_t source_length, + const void* source_ptr, size_t source_length, Endian endian, std::shared_ptr fence); // Flushes all pending data to the GPU. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9c8e268a5..f31b28142 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -29,7 +29,7 @@ namespace vulkan { using namespace xe::gpu::xenos; using xe::ui::vulkan::CheckResult; -constexpr size_t kDefaultBufferCacheCapacity = 128 * 1024 * 1024; +constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024; VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) @@ -501,9 +501,6 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, } } - // Update the render cache's tracking state. - render_cache_->UpdateState(); - // Configure the pipeline for drawing. // This encodes all render state (blend, depth, etc), our shader stages, // and our vertex input layout. @@ -711,7 +708,6 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( fetch = &group->vertex_fetch_2; break; } - assert_true(fetch->endian == 2); // TODO(benvanik): compute based on indices or vertex count. // THIS CAN BE MASSIVELY INCORRECT (too large). @@ -724,7 +720,8 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( memory_->TranslatePhysical(fetch->address << 2); size_t source_length = valid_range; auto buffer_ref = buffer_cache_->UploadVertexBuffer( - source_ptr, source_length, current_batch_fence_); + source_ptr, source_length, static_cast(fetch->endian), + current_batch_fence_); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. return false; @@ -939,26 +936,6 @@ bool VulkanCommandProcessor::IssueCopy() { assert_not_null(texture); texture->in_flight_fence = current_batch_fence_; - if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { - // Transition the image to a general layout. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.image = texture->image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - copy_src_select <= 3 - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - texture->image_layout = VK_IMAGE_LAYOUT_GENERAL; - } - // For debugging purposes only (trace viewer) last_copy_base_ = texture->texture_info.guest_address; @@ -988,6 +965,30 @@ bool VulkanCommandProcessor::IssueCopy() { } auto command_buffer = current_command_buffer_; + if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { + // Transition the image to a general layout. + VkImageMemoryBarrier image_barrier; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.srcAccessMask = 0; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + image_barrier.image = texture->image; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = + copy_src_select <= 3 + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + texture->image_layout = VK_IMAGE_LAYOUT_GENERAL; + + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); + } + VkOffset3D resolve_offset = {dest_min_x, dest_min_y, 0}; VkExtent3D resolve_extent = {uint32_t(dest_max_x - dest_min_x), uint32_t(dest_max_y - dest_min_y), 1}; From 254acf2a67d1beb46872eb6fd881691d73ac611d Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Fri, 3 Jun 2016 20:01:49 -0500 Subject: [PATCH 142/145] RenderCache: Hardcode surface height to 2560 Fix a couple of other things --- src/xenia/gpu/vulkan/render_cache.cc | 108 ++++++++++++++------------- src/xenia/gpu/vulkan/render_cache.h | 4 - 2 files changed, 58 insertions(+), 54 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 7d73951b5..f3d3288a7 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -309,8 +309,15 @@ bool CachedFramebuffer::IsCompatible( const RenderConfiguration& desired_config) const { // We already know all render pass things line up, so let's verify dimensions, // edram offsets, etc. We need an exact match. - if (desired_config.surface_pitch_px != width || - desired_config.surface_height_px != height) { + uint32_t surface_pitch_px = desired_config.surface_msaa != MsaaSamples::k4X + ? desired_config.surface_pitch_px + : desired_config.surface_pitch_px * 2; + uint32_t surface_height_px = desired_config.surface_msaa == MsaaSamples::k1X + ? desired_config.surface_height_px + : desired_config.surface_height_px * 2; + surface_pitch_px = std::min(surface_pitch_px, 2560u); + surface_height_px = std::min(surface_height_px, 2560u); + if (surface_pitch_px != width || surface_height_px != height) { return false; } // TODO(benvanik): separate image views from images in tiles and store in fb? @@ -445,7 +452,8 @@ CachedRenderPass::~CachedRenderPass() { bool CachedRenderPass::IsCompatible( const RenderConfiguration& desired_config) const { - if (config.surface_msaa != desired_config.surface_msaa) { + if (config.surface_msaa != desired_config.surface_msaa && + FLAGS_vulkan_native_msaa) { return false; } @@ -548,8 +556,6 @@ bool RenderCache::dirty() const { regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; dirty |= cur_regs.pa_sc_window_scissor_br != regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - dirty |= (cur_regs.rb_depthcontrol & (0x4 | 0x2)) < - (regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2)); return dirty; } @@ -580,11 +586,6 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - dirty |= - (regs.rb_depthcontrol & (0x4 | 0x2)) < - (register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2)); - regs.rb_depthcontrol = - register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2); if (!dirty && current_state_.render_pass) { // No registers have changed so we can reuse the previous render pass - // just begin with what we had. @@ -602,18 +603,17 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, return nullptr; } - // Initial state update. - UpdateState(); - current_state_.render_pass = render_pass; current_state_.render_pass_handle = render_pass->handle; current_state_.framebuffer = framebuffer; current_state_.framebuffer_handle = framebuffer->handle; + // TODO(DrChat): Determine if we actually need an EDRAM buffer. + /* // Depth auto depth_target = current_state_.framebuffer->depth_stencil_attachment; if (depth_target && current_state_.config.depth_stencil.used) { - // UpdateTileView(command_buffer, depth_target, true); + UpdateTileView(command_buffer, depth_target, true); } // Color @@ -623,8 +623,9 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, continue; } - // UpdateTileView(command_buffer, target, true); + UpdateTileView(command_buffer, target, true); } + */ } if (!render_pass) { return nullptr; @@ -647,6 +648,15 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px; render_pass_begin_info.renderArea.extent.height = config->surface_height_px; + if (config->surface_msaa == MsaaSamples::k2X) { + render_pass_begin_info.renderArea.extent.height = + std::min(config->surface_height_px * 2, 2560u); + } else if (config->surface_msaa == MsaaSamples::k4X) { + render_pass_begin_info.renderArea.extent.width *= 2; + render_pass_begin_info.renderArea.extent.height = + std::min(config->surface_height_px * 2, 2560u); + } + // Configure clear color, if clearing. // TODO(benvanik): enable clearing here during resolve? render_pass_begin_info.clearValueCount = 0; @@ -677,9 +687,15 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) { // Guess the height from the scissor height. // It's wildly inaccurate, but I've never seen it be bigger than the // EDRAM tiling. + /* uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16)); + */ + + // TODO(DrChat): Find an accurate way to get the surface height. Until we do, + // we're going to hardcode it to 2560, as that's the absolute maximum. + config->surface_height_px = 2560; // Color attachment configuration. if (config->mode_control == ModeControl::kColorDepth) { @@ -781,9 +797,9 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, color_key.tile_offset = config->color[i].edram_base; color_key.tile_width = xe::round_up(config->surface_pitch_px, tile_width) / tile_width; - color_key.tile_height = std::min( - 2560 / tile_height, 160u); // xe::round_up(config->surface_height_px, - // tile_height) / tile_height; + // color_key.tile_height = + // xe::round_up(config->surface_height_px, tile_height) / tile_height; + color_key.tile_height = 160; color_key.color_or_depth = 1; color_key.msaa_samples = 0; // static_cast(config->surface_msaa); @@ -800,9 +816,9 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, depth_stencil_key.tile_offset = config->depth_stencil.edram_base; depth_stencil_key.tile_width = xe::round_up(config->surface_pitch_px, tile_width) / tile_width; - depth_stencil_key.tile_height = std::min( - 2560 / tile_height, 160u); // xe::round_up(config->surface_height_px, - // tile_height) / tile_height; + // depth_stencil_key.tile_height = + // xe::round_up(config->surface_height_px, tile_height) / tile_height; + depth_stencil_key.tile_height = 160; depth_stencil_key.color_or_depth = 0; depth_stencil_key.msaa_samples = 0; // static_cast(config->surface_msaa); @@ -815,10 +831,17 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, return false; } + uint32_t surface_pitch_px = config->surface_msaa != MsaaSamples::k4X + ? config->surface_pitch_px + : config->surface_pitch_px * 2; + uint32_t surface_height_px = config->surface_msaa == MsaaSamples::k1X + ? config->surface_height_px + : config->surface_height_px * 2; + surface_pitch_px = std::min(surface_pitch_px, 2560u); + surface_height_px = std::min(surface_height_px, 2560u); framebuffer = new CachedFramebuffer( - *device_, render_pass->handle, config->surface_pitch_px, - config->surface_height_px, target_color_attachments, - target_depth_stencil_attachment); + *device_, render_pass->handle, surface_pitch_px, surface_height_px, + target_color_attachments, target_depth_stencil_attachment); render_pass->cached_framebuffers.push_back(framebuffer); } @@ -923,6 +946,8 @@ void RenderCache::EndRenderPass() { // contents of another render target by mistake! Need to reorder copy commands // to avoid this. + // TODO(DrChat): Determine if we actually need an EDRAM buffer. + /* std::vector cached_views; // Depth @@ -946,27 +971,13 @@ void RenderCache::EndRenderPass() { [](CachedTileView const* a, CachedTileView const* b) { return *a < *b; }); for (auto view : cached_views) { - // UpdateTileView(current_command_buffer_, view, false, false); + UpdateTileView(current_command_buffer_, view, false, false); } + */ current_command_buffer_ = nullptr; } -void RenderCache::UpdateState() { - // Keep track of whether color attachments were used or not in this pass. - uint32_t rb_color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32; - uint32_t rb_depthcontrol = - register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32; - for (int i = 0; i < 4; i++) { - uint32_t color_mask = (rb_color_mask >> (i * 4)) & 0xF; - current_state_.config.color[i].used |= - current_state_.config.mode_control == xenos::ModeControl::kColorDepth && - color_mask != 0; - } - - current_state_.config.depth_stencil.used |= !!(rb_depthcontrol & (0x4 | 0x2)); -} - void RenderCache::ClearCache() { // TODO(benvanik): caching. } @@ -1073,9 +1084,8 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, key.edram_format = format; key.tile_offset = edram_base; key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - key.tile_height = - std::min(2560 / tile_height, - 160u); // xe::round_up(height, tile_height) / tile_height; + // key.tile_height = xe::round_up(height, tile_height) / tile_height; + key.tile_height = 160; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); @@ -1115,7 +1125,7 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer, color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - image_blit.srcOffsets[0] = {0, 0, 0}; + image_blit.srcOffsets[0] = {0, 0, offset.z}; image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height), int32_t(extents.depth)}; @@ -1191,9 +1201,8 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, key.edram_format = static_cast(format); key.tile_offset = edram_base; key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - key.tile_height = - std::min(2560 / tile_height, - 160u); // xe::round_up(height, tile_height) / tile_height; + // key.tile_height = xe::round_up(height, tile_height) / tile_height; + key.tile_height = 160; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); @@ -1228,9 +1237,8 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, key.edram_format = static_cast(format); key.tile_offset = edram_base; key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - key.tile_height = - std::min(2560 / tile_height, - 160u); // xe::round_up(height, tile_height) / tile_height; + // key.tile_height = xe::round_up(height, tile_height) / tile_height; + key.tile_height = 160; auto tile_view = FindOrCreateTileView(command_buffer, key); assert_not_null(tile_view); diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 4eeca42bf..c9f0adf98 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -278,9 +278,6 @@ class RenderCache { // The command buffer will be transitioned out of the render pass phase. void EndRenderPass(); - // Updates current render state. Call this every draw with an open render pass - void UpdateState(); - // Clears all cached content. void ClearCache(); @@ -363,7 +360,6 @@ class RenderCache { uint32_t rb_color2_info; uint32_t rb_color3_info; uint32_t rb_depth_info; - uint32_t rb_depthcontrol; uint32_t pa_sc_window_scissor_tl; uint32_t pa_sc_window_scissor_br; From 6f5f3534c4e27878f3df432cc861025305e7160e Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 4 Jun 2016 15:17:31 -0500 Subject: [PATCH 143/145] Rebuild shaders with the latest compiler --- .../vulkan/shaders/bin/line_quad_list_geom.h | 361 ++++---- .../shaders/bin/line_quad_list_geom.txt | 133 ++- .../gpu/vulkan/shaders/bin/point_list_geom.h | 118 ++- .../vulkan/shaders/bin/point_list_geom.txt | 5 - .../gpu/vulkan/shaders/bin/quad_list_geom.h | 321 ++++--- .../gpu/vulkan/shaders/bin/quad_list_geom.txt | 49 +- .../gpu/vulkan/shaders/bin/rect_list_geom.h | 792 +++++++++--------- .../gpu/vulkan/shaders/bin/rect_list_geom.txt | 4 - .../gpu/vulkan/shaders/line_quad_list.geom | 17 +- src/xenia/gpu/vulkan/shaders/quad_list.geom | 9 +- .../ui/vulkan/shaders/bin/immediate_frag.h | 161 ++-- .../ui/vulkan/shaders/bin/immediate_frag.spv | Bin 1464 -> 1448 bytes .../ui/vulkan/shaders/bin/immediate_frag.txt | 3 +- .../ui/vulkan/shaders/bin/immediate_vert.h | 151 ++-- .../ui/vulkan/shaders/bin/immediate_vert.spv | Bin 1608 -> 1592 bytes .../ui/vulkan/shaders/bin/immediate_vert.txt | 1 - 16 files changed, 1024 insertions(+), 1101 deletions(-) diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h index af848e905..780d04ea7 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h @@ -2,199 +2,186 @@ // source: line_quad_list.geom const uint8_t line_quad_list_geom[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, - 0x4E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x4C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, + 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, + 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, + 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, - 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, - 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, - 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, - 0x06, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x37, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x38, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x3A, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x3C, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, + 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x6F, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, + 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, + 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x46, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x19, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x4B, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xFD, 0x00, 0x01, 0x00, - 0x38, 0x00, 0x01, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x3A, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x41, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x45, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x49, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x4A, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, + 0x4A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x4B, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, + 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, }; diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt index 613a25522..f5e91eba1 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt +++ b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt @@ -1,15 +1,14 @@ ; SPIR-V ; Version: 1.0 ; Generator: Khronos Glslang Reference Front End; 1 -; Bound: 78 +; Bound: 76 ; Schema: 0 OpCapability Geometry OpCapability GeometryPointSize OpCapability ClipDistance - OpCapability GeometryStreams %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %4 "main" %13 %20 %36 %40 + OpEntryPoint Geometry %4 "main" %13 %20 %35 %38 OpExecutionMode %4 InputLinesAdjacency OpExecutionMode %4 Invocations 1 OpExecutionMode %4 OutputLineStrip @@ -26,26 +25,18 @@ OpMemberName %16 1 "gl_PointSize" OpMemberName %16 2 "gl_ClipDistance" OpName %20 "gl_in" - OpName %34 "VertexData" - OpMemberName %34 0 "o" - OpName %36 "out_vtx" - OpName %37 "VertexData" - OpMemberName %37 0 "o" - OpName %40 "in_vtx" + OpName %35 "out_interpolators" + OpName %38 "in_interpolators" OpMemberDecorate %11 0 BuiltIn Position OpMemberDecorate %11 1 BuiltIn PointSize OpMemberDecorate %11 2 BuiltIn ClipDistance OpDecorate %11 Block - OpDecorate %11 Stream 0 - OpDecorate %13 Stream 0 OpMemberDecorate %16 0 BuiltIn Position OpMemberDecorate %16 1 BuiltIn PointSize OpMemberDecorate %16 2 BuiltIn ClipDistance OpDecorate %16 Block - OpMemberDecorate %34 0 Location 0 - OpDecorate %34 Stream 0 - OpDecorate %36 Stream 0 - OpMemberDecorate %37 0 Location 0 + OpDecorate %35 Location 0 + OpDecorate %38 Location 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 %6 = OpTypeFloat 32 @@ -70,16 +61,14 @@ %30 = OpTypePointer Output %6 %32 = OpConstant %8 16 %33 = OpTypeArray %7 %32 - %34 = OpTypeStruct %33 - %35 = OpTypePointer Output %34 - %36 = OpVariable %35 Output - %37 = OpTypeStruct %33 - %38 = OpTypeArray %37 %17 - %39 = OpTypePointer Input %38 - %40 = OpVariable %39 Input - %41 = OpTypePointer Input %37 - %52 = OpConstant %14 2 - %61 = OpConstant %14 3 + %34 = OpTypePointer Output %33 + %35 = OpVariable %34 Output + %36 = OpTypeArray %33 %17 + %37 = OpTypePointer Input %36 + %38 = OpVariable %37 Input + %39 = OpTypePointer Input %33 + %50 = OpConstant %14 2 + %59 = OpConstant %14 3 %4 = OpFunction %2 None %3 %5 = OpLabel %22 = OpAccessChain %21 %20 %15 %15 @@ -90,57 +79,57 @@ %29 = OpLoad %6 %28 %31 = OpAccessChain %30 %13 %26 OpStore %31 %29 - %42 = OpAccessChain %41 %40 %15 - %43 = OpLoad %37 %42 - OpStore %36 %43 + %40 = OpAccessChain %39 %38 %15 + %41 = OpLoad %33 %40 + OpStore %35 %41 OpEmitVertex - %44 = OpAccessChain %21 %20 %26 %15 - %45 = OpLoad %7 %44 - %46 = OpAccessChain %24 %13 %15 - OpStore %46 %45 - %47 = OpAccessChain %27 %20 %26 %26 - %48 = OpLoad %6 %47 - %49 = OpAccessChain %30 %13 %26 - OpStore %49 %48 - %50 = OpAccessChain %41 %40 %26 - %51 = OpLoad %37 %50 - OpStore %36 %51 + %42 = OpAccessChain %21 %20 %26 %15 + %43 = OpLoad %7 %42 + %44 = OpAccessChain %24 %13 %15 + OpStore %44 %43 + %45 = OpAccessChain %27 %20 %26 %26 + %46 = OpLoad %6 %45 + %47 = OpAccessChain %30 %13 %26 + OpStore %47 %46 + %48 = OpAccessChain %39 %38 %26 + %49 = OpLoad %33 %48 + OpStore %35 %49 OpEmitVertex - %53 = OpAccessChain %21 %20 %52 %15 - %54 = OpLoad %7 %53 - %55 = OpAccessChain %24 %13 %15 - OpStore %55 %54 - %56 = OpAccessChain %27 %20 %52 %26 - %57 = OpLoad %6 %56 - %58 = OpAccessChain %30 %13 %26 - OpStore %58 %57 - %59 = OpAccessChain %41 %40 %52 - %60 = OpLoad %37 %59 - OpStore %36 %60 + %51 = OpAccessChain %21 %20 %50 %15 + %52 = OpLoad %7 %51 + %53 = OpAccessChain %24 %13 %15 + OpStore %53 %52 + %54 = OpAccessChain %27 %20 %50 %26 + %55 = OpLoad %6 %54 + %56 = OpAccessChain %30 %13 %26 + OpStore %56 %55 + %57 = OpAccessChain %39 %38 %50 + %58 = OpLoad %33 %57 + OpStore %35 %58 OpEmitVertex - %62 = OpAccessChain %21 %20 %61 %15 - %63 = OpLoad %7 %62 - %64 = OpAccessChain %24 %13 %15 - OpStore %64 %63 - %65 = OpAccessChain %27 %20 %61 %26 - %66 = OpLoad %6 %65 - %67 = OpAccessChain %30 %13 %26 - OpStore %67 %66 - %68 = OpAccessChain %41 %40 %61 - %69 = OpLoad %37 %68 - OpStore %36 %69 + %60 = OpAccessChain %21 %20 %59 %15 + %61 = OpLoad %7 %60 + %62 = OpAccessChain %24 %13 %15 + OpStore %62 %61 + %63 = OpAccessChain %27 %20 %59 %26 + %64 = OpLoad %6 %63 + %65 = OpAccessChain %30 %13 %26 + OpStore %65 %64 + %66 = OpAccessChain %39 %38 %59 + %67 = OpLoad %33 %66 + OpStore %35 %67 OpEmitVertex - %70 = OpAccessChain %21 %20 %15 %15 - %71 = OpLoad %7 %70 - %72 = OpAccessChain %24 %13 %15 - OpStore %72 %71 - %73 = OpAccessChain %27 %20 %15 %26 - %74 = OpLoad %6 %73 - %75 = OpAccessChain %30 %13 %26 - OpStore %75 %74 - %76 = OpAccessChain %41 %40 %15 - %77 = OpLoad %37 %76 - OpStore %36 %77 + %68 = OpAccessChain %21 %20 %15 %15 + %69 = OpLoad %7 %68 + %70 = OpAccessChain %24 %13 %15 + OpStore %70 %69 + %71 = OpAccessChain %27 %20 %15 %26 + %72 = OpLoad %6 %71 + %73 = OpAccessChain %30 %13 %26 + OpStore %73 %72 + %74 = OpAccessChain %39 %38 %15 + %75 = OpLoad %33 %74 + OpStore %35 %75 OpEmitVertex OpEndPrimitive OpReturn diff --git a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h index a8e13aaa5..17ff81059 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h @@ -4,73 +4,67 @@ const uint8_t point_list_geom[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x70, 0x6F, 0x73, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, - 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, - 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, - 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x70, 0x73, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, + 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x4C, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x70, 0x6F, 0x73, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x0D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, - 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x38, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x64, 0x65, 0x78, 0x61, 0x62, 0x6C, 0x65, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, - 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, - 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x4D, 0x00, 0x00, 0x00, + 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x70, 0x73, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, + 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x61, 0x62, 0x6C, + 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, - 0x06, 0x00, 0x04, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x04, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x4C, 0x00, 0x00, 0x00, + 0x6F, 0x75, 0x74, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, + 0x4D, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, + 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, 0x4D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x50, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, diff --git a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt index ea6523102..0ac3b7351 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt +++ b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt @@ -6,7 +6,6 @@ OpCapability Geometry OpCapability GeometryPointSize OpCapability ClipDistance - OpCapability GeometryStreams %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint Geometry %4 "main" %16 %41 %76 %80 @@ -44,11 +43,7 @@ OpMemberDecorate %39 1 BuiltIn PointSize OpMemberDecorate %39 2 BuiltIn ClipDistance OpDecorate %39 Block - OpDecorate %39 Stream 0 - OpDecorate %41 Stream 0 OpMemberDecorate %74 0 Location 0 - OpDecorate %74 Stream 0 - OpDecorate %76 Stream 0 OpMemberDecorate %77 0 Location 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h index f168ce835..fcb305592 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h @@ -2,177 +2,164 @@ // source: quad_list.geom const uint8_t quad_list_geom[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, - 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x3F, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x70, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x64, 0x65, 0x78, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x64, 0x65, - 0x78, 0x61, 0x62, 0x6C, 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, + 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x3A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x70, 0x75, 0x74, 0x5F, 0x69, 0x6E, + 0x64, 0x65, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x69, 0x6E, 0x64, 0x65, 0x78, 0x61, 0x62, 0x6C, 0x65, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x06, 0x00, 0x24, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x06, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, + 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x24, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, - 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, - 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, - 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, - 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x39, 0x00, 0x00, 0x00, - 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, - 0x06, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x6F, 0x75, 0x74, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, - 0x3C, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, - 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, 0x3C, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x3F, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, + 0x3A, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x74, 0x65, + 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x07, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x69, + 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x3A, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x02, 0x00, 0x11, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x07, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x24, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x35, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x38, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x03, 0x00, 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x3A, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, - 0x3C, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x3D, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0x3F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x41, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x41, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x3C, 0x00, 0x00, 0x00, - 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x00, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x38, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x39, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x04, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xF6, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xB1, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0xDB, 0x00, 0x01, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, }; diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt index ee4a83586..5cbb850f5 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt +++ b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt @@ -1,15 +1,14 @@ ; SPIR-V ; Version: 1.0 ; Generator: Khronos Glslang Reference Front End; 1 -; Bound: 70 +; Bound: 68 ; Schema: 0 OpCapability Geometry OpCapability GeometryPointSize OpCapability ClipDistance - OpCapability GeometryStreams %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %4 "main" %38 %42 %59 %63 + OpEntryPoint Geometry %4 "main" %38 %42 %58 %61 OpExecutionMode %4 InputLinesAdjacency OpExecutionMode %4 Invocations 1 OpExecutionMode %4 OutputTriangleStrip @@ -29,26 +28,18 @@ OpMemberName %39 1 "gl_PointSize" OpMemberName %39 2 "gl_ClipDistance" OpName %42 "gl_in" - OpName %57 "VertexData" - OpMemberName %57 0 "o" - OpName %59 "out_vtx" - OpName %60 "VertexData" - OpMemberName %60 0 "o" - OpName %63 "in_vtx" + OpName %58 "out_interpolators" + OpName %61 "in_interpolators" OpMemberDecorate %36 0 BuiltIn Position OpMemberDecorate %36 1 BuiltIn PointSize OpMemberDecorate %36 2 BuiltIn ClipDistance OpDecorate %36 Block - OpDecorate %36 Stream 0 - OpDecorate %38 Stream 0 OpMemberDecorate %39 0 BuiltIn Position OpMemberDecorate %39 1 BuiltIn PointSize OpMemberDecorate %39 2 BuiltIn ClipDistance OpDecorate %39 Block - OpMemberDecorate %57 0 Location 0 - OpDecorate %57 Stream 0 - OpDecorate %59 Stream 0 - OpMemberDecorate %60 0 Location 0 + OpDecorate %58 Location 0 + OpDecorate %61 Location 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 %6 = OpTypeInt 32 1 @@ -81,14 +72,12 @@ %53 = OpTypePointer Output %32 %55 = OpConstant %20 16 %56 = OpTypeArray %33 %55 - %57 = OpTypeStruct %56 - %58 = OpTypePointer Output %57 - %59 = OpVariable %58 Output - %60 = OpTypeStruct %56 - %61 = OpTypeArray %60 %21 - %62 = OpTypePointer Input %61 - %63 = OpVariable %62 Input - %65 = OpTypePointer Input %60 + %57 = OpTypePointer Output %56 + %58 = OpVariable %57 Output + %59 = OpTypeArray %56 %21 + %60 = OpTypePointer Input %59 + %61 = OpVariable %60 Input + %63 = OpTypePointer Input %56 %4 = OpFunction %2 None %3 %5 = OpLabel %8 = OpVariable %7 Function @@ -119,16 +108,16 @@ %52 = OpLoad %32 %51 %54 = OpAccessChain %53 %38 %23 OpStore %54 %52 - %64 = OpLoad %6 %19 - %66 = OpAccessChain %65 %63 %64 - %67 = OpLoad %60 %66 - OpStore %59 %67 + %62 = OpLoad %6 %19 + %64 = OpAccessChain %63 %61 %62 + %65 = OpLoad %56 %64 + OpStore %58 %65 OpEmitVertex OpBranch %13 %13 = OpLabel - %68 = OpLoad %6 %8 - %69 = OpIAdd %6 %68 %23 - OpStore %8 %69 + %66 = OpLoad %6 %8 + %67 = OpIAdd %6 %66 %23 + OpStore %8 %67 OpBranch %10 %12 = OpLabel OpEndPrimitive diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h index 730f9f12e..88f0f7a25 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h +++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h @@ -4,424 +4,420 @@ const uint8_t rect_list_geom[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, 0xCA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x6C, 0x65, 0x66, 0x74, 0x5F, 0x61, 0x6C, 0x69, 0x67, 0x6E, 0x65, 0x64, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, - 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, - 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, - 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, - 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, - 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x07, 0x00, 0x30, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, - 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, - 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, - 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, - 0xB2, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, + 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x6C, 0x65, 0x66, 0x74, 0x5F, 0x61, 0x6C, 0x69, + 0x67, 0x6E, 0x65, 0x64, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, + 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, + 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x6F, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, + 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, + 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x64, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x63, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x63, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x3A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x3C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x3F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x43, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x46, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x4F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x51, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x53, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x55, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, - 0x55, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x56, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, - 0x59, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x5B, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0xB4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xF7, 0x00, 0x03, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFA, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x7D, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, - 0x5C, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x5E, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x5F, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x62, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x65, 0x00, 0x00, 0x00, - 0xF6, 0x00, 0x04, 0x00, 0x67, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, - 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, - 0x6A, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x66, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x6F, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x6E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x70, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x73, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x74, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, - 0x74, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x76, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00, - 0x75, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x6D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x7A, 0x00, 0x00, 0x00, - 0x79, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, - 0x7B, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x7D, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, - 0x7E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x81, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x83, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x83, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x86, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x87, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x88, 0x00, 0x00, 0x00, - 0x87, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x89, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x3A, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x8B, 0x00, 0x00, 0x00, - 0x8A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x8C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00, - 0x8C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x8D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x3C, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x90, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x93, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x46, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x96, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x98, 0x00, 0x00, 0x00, - 0x97, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x99, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x49, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x4A, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x9B, 0x00, 0x00, 0x00, - 0x9A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x9C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00, - 0x9C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x9D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00, + 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x4C, 0x00, 0x00, 0x00, + 0x4B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x4D, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, + 0x4D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x4E, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xA0, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x16, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00, + 0x51, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xA3, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x34, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x30, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, - 0xA6, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x56, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x59, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, - 0xA9, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0xAB, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x5A, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, + 0x5A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x5C, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xAC, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, - 0xAC, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, - 0xAE, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00, + 0x5D, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, + 0x5D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x5F, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x5F, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, - 0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0xB1, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0xB1, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0xB3, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0xB5, 0x00, 0x00, 0x00, - 0xB6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, - 0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0xB9, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, - 0xFA, 0x00, 0x04, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, - 0xB2, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, - 0xBB, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, - 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, - 0xBF, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xC0, 0x00, 0x00, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, - 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, - 0xBD, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, - 0xC4, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xC6, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xC7, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0xB6, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB6, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, - 0xB2, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, - 0xC9, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0xC9, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0xB5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, + 0x60, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x62, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x62, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x64, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x65, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0x67, 0x00, 0x00, 0x00, + 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x69, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x6C, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, + 0xFA, 0x00, 0x04, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, + 0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x66, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x6E, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x70, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x72, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, + 0x71, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, + 0x77, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x79, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x7A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x68, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x7C, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x64, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x67, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + 0x7D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x7E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x82, 0x00, 0x00, 0x00, 0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x83, 0x00, 0x00, 0x00, + 0x82, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x84, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, + 0x84, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x85, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x88, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x8B, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x8D, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, + 0x8E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x90, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x90, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, + 0x91, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x93, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x93, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x98, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x9B, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x34, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x9D, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, + 0x9E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0xA0, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xA0, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, + 0xA1, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0xA3, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xA3, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, + 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0xA6, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xA7, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, + 0xA7, 0x00, 0x00, 0x00, 0xA9, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xAC, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, + 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, + 0xAA, 0x00, 0x00, 0x00, 0xAC, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xAE, 0x00, 0x00, 0x00, + 0xAD, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0xAF, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0xB0, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB1, 0x00, 0x00, 0x00, + 0xB0, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, + 0xB5, 0x00, 0x00, 0x00, 0xB6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0xB7, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xB8, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, + 0x6B, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0xB9, 0x00, 0x00, 0x00, + 0xB4, 0x00, 0x00, 0x00, 0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0xB4, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xBA, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, + 0xBC, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xBE, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0xBF, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, + 0xC0, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xC2, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, + 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0xC4, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0xC3, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xC5, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, + 0xC5, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, + 0xC7, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, + 0xF9, 0x00, 0x02, 0x00, 0xB6, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0xB6, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xC8, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x13, 0x00, 0x00, 0x00, 0xC9, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, + 0xC9, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, + 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, + 0x38, 0x00, 0x01, 0x00, }; diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt index 94fb6a700..0d74da2bf 100644 --- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt +++ b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt @@ -6,7 +6,6 @@ OpCapability Geometry OpCapability GeometryPointSize OpCapability ClipDistance - OpCapability GeometryStreams %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint Geometry %4 "main" %18 %34 %48 %51 @@ -39,10 +38,7 @@ OpMemberDecorate %32 1 BuiltIn PointSize OpMemberDecorate %32 2 BuiltIn ClipDistance OpDecorate %32 Block - OpDecorate %32 Stream 0 - OpDecorate %34 Stream 0 OpDecorate %48 Location 0 - OpDecorate %48 Stream 0 OpDecorate %51 Location 0 %2 = OpTypeVoid %3 = OpTypeFunction %2 diff --git a/src/xenia/gpu/vulkan/shaders/line_quad_list.geom b/src/xenia/gpu/vulkan/shaders/line_quad_list.geom index d514bf456..5da954705 100644 --- a/src/xenia/gpu/vulkan/shaders/line_quad_list.geom +++ b/src/xenia/gpu/vulkan/shaders/line_quad_list.geom @@ -16,34 +16,31 @@ out gl_PerVertex { float gl_ClipDistance[]; }; -struct VertexData { - vec4 o[16]; -}; -layout(location = 0) in VertexData in_vtx[]; -layout(location = 0) out VertexData out_vtx; +layout(location = 0) in vec4 in_interpolators[][16]; +layout(location = 0) out vec4 out_interpolators[16]; layout(lines_adjacency) in; layout(line_strip, max_vertices = 5) out; void main() { gl_Position = gl_in[0].gl_Position; gl_PointSize = gl_in[0].gl_PointSize; - out_vtx = in_vtx[0]; + out_interpolators = in_interpolators[0]; EmitVertex(); gl_Position = gl_in[1].gl_Position; gl_PointSize = gl_in[1].gl_PointSize; - out_vtx = in_vtx[1]; + out_interpolators = in_interpolators[1]; EmitVertex(); gl_Position = gl_in[2].gl_Position; gl_PointSize = gl_in[2].gl_PointSize; - out_vtx = in_vtx[2]; + out_interpolators = in_interpolators[2]; EmitVertex(); gl_Position = gl_in[3].gl_Position; gl_PointSize = gl_in[3].gl_PointSize; - out_vtx = in_vtx[3]; + out_interpolators = in_interpolators[3]; EmitVertex(); gl_Position = gl_in[0].gl_Position; gl_PointSize = gl_in[0].gl_PointSize; - out_vtx = in_vtx[0]; + out_interpolators = in_interpolators[0]; EmitVertex(); EndPrimitive(); } diff --git a/src/xenia/gpu/vulkan/shaders/quad_list.geom b/src/xenia/gpu/vulkan/shaders/quad_list.geom index e10acc71f..f5223f4c6 100644 --- a/src/xenia/gpu/vulkan/shaders/quad_list.geom +++ b/src/xenia/gpu/vulkan/shaders/quad_list.geom @@ -16,11 +16,8 @@ out gl_PerVertex { float gl_ClipDistance[]; }; -struct VertexData { - vec4 o[16]; -}; -layout(location = 0) in VertexData in_vtx[]; -layout(location = 0) out VertexData out_vtx; +layout(location = 0) in vec4 in_interpolators[][16]; +layout(location = 0) out vec4 out_interpolators[16]; layout(lines_adjacency) in; layout(triangle_strip, max_vertices = 4) out; @@ -30,7 +27,7 @@ void main() { int input_index = order[i]; gl_Position = gl_in[input_index].gl_Position; gl_PointSize = gl_in[input_index].gl_PointSize; - out_vtx = in_vtx[input_index]; + out_interpolators = in_interpolators[input_index]; EmitVertex(); } EndPrimitive(); diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h index 4cdf8593b..eb606b3e4 100644 --- a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h @@ -9,7 +9,7 @@ const uint8_t immediate_frag[] = { 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x08, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, @@ -41,86 +41,85 @@ const uint8_t immediate_frag[] = { 0x48, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, - 0x20, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0xAA, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x04, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0xF7, 0x00, 0x03, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xFA, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0xBC, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0xF5, 0x00, 0x07, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x28, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x28, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x20, 0x00, 0x04, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x09, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x05, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x05, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x1B, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0xF7, 0x00, 0x03, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFA, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x85, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, + 0x28, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x28, 0x00, 0x00, 0x00, + 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, }; diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.spv b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.spv index 9e0e6bec70231a6e265cb77110969afa0bd4cfd1..37ecbe8c03070b1f0b02497e80db407624d4e870 100644 GIT binary patch delta 17 ZcmdnNy@Go}1|$2%%yP!f9~e(C0{}UR26zAf delta 27 hcmZ3%y@Pu~1|!GD%yLFfAqEBpB_L+le3S7!GXP{@2BrW2 diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt index c4b6ea61f..137476ace 100644 --- a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt @@ -7,7 +7,7 @@ %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %4 "main" %9 %11 %30 - OpExecutionMode %4 OriginLowerLeft + OpExecutionMode %4 OriginUpperLeft OpSource GLSL 450 OpName %4 "main" OpName %9 "out_color" @@ -26,7 +26,6 @@ OpMemberDecorate %16 0 MatrixStride 16 OpMemberDecorate %16 1 Offset 64 OpDecorate %16 Block - OpDecorate %18 DescriptorSet 0 OpDecorate %30 Location 0 OpDecorate %46 DescriptorSet 0 OpDecorate %46 Binding 0 diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h index 3d2c0687e..e56457712 100644 --- a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h @@ -58,81 +58,80 @@ const uint8_t immediate_vert[] = { 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x20, 0x00, 0x04, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x06, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3F, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x2D, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x7F, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, + 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, }; diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.spv b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.spv index 581d87bc672ab5bfbf858cce7ecea9047249de28..b75eb10f8eb089a1a0438107b53e19c45120f153 100644 GIT binary patch delta 12 TcmX@Xvx8^D2Bysim^zpNBY*_^ delta 22 ccmdnNbAo5X1}08n1_lNtAZFOSlBu5=07JzEDF6Tf diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt index a8e36189e..b72350eea 100644 --- a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt +++ b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt @@ -36,7 +36,6 @@ OpMemberDecorate %17 0 MatrixStride 16 OpMemberDecorate %17 1 Offset 64 OpDecorate %17 Block - OpDecorate %19 DescriptorSet 0 OpDecorate %25 Location 0 OpDecorate %41 Location 0 OpDecorate %42 Location 1 From b01903ccc7e6337d55ef5899abc5b979c79bfb10 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 5 Jun 2016 12:55:19 -0500 Subject: [PATCH 144/145] Set basePipelineIndex to -1 instead of 0 to indicate invalid. --- src/xenia/gpu/vulkan/pipeline_cache.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index e80cb4675..cce9e0d7f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -259,7 +259,7 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, pipeline_info.renderPass = render_state->render_pass_handle; pipeline_info.subpass = 0; pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = 0; + pipeline_info.basePipelineIndex = -1; VkPipeline pipeline = nullptr; auto err = vkCreateGraphicsPipelines(device_, pipeline_cache_, 1, &pipeline_info, nullptr, &pipeline); From 323b993bbcfa6fa37fd6356eeee04a000f35cbf2 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 11 Jun 2016 19:13:45 -0500 Subject: [PATCH 145/145] Revert the default graphics backend to GL4 (in preparation of merge to master) --- src/xenia/app/xenia_main.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index 80ed35551..3bfd11cb4 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -35,7 +35,7 @@ #endif // XE_PLATFORM_WIN32 DEFINE_string(apu, "any", "Audio system. Use: [any, nop, xaudio2]"); -DEFINE_string(gpu, "any", "Graphics system. Use: [any, gl4]"); +DEFINE_string(gpu, "any", "Graphics system. Use: [any, gl4, vulkan]"); DEFINE_string(hid, "any", "Input system. Use: [any, nop, winkey, xinput]"); DEFINE_string(target, "", "Specifies the target .xex or .iso to execute."); @@ -78,7 +78,7 @@ std::unique_ptr CreateGraphicsSystem() { std::unique_ptr best; best = std::unique_ptr( - new xe::gpu::vulkan::VulkanGraphicsSystem()); + new xe::gpu::gl4::GL4GraphicsSystem()); if (best) { return best; }