Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2020-11-15 14:06:15 +03:00
commit 8febf02a39
35 changed files with 737 additions and 670 deletions

3
.gitmodules vendored
View File

@ -55,6 +55,9 @@
[submodule "third_party/DirectXShaderCompiler"] [submodule "third_party/DirectXShaderCompiler"]
path = third_party/DirectXShaderCompiler path = third_party/DirectXShaderCompiler
url = https://github.com/microsoft/DirectXShaderCompiler.git url = https://github.com/microsoft/DirectXShaderCompiler.git
[submodule "third_party/premake-cmake"]
path = third_party/premake-cmake
url = https://github.com/Enhex/premake-cmake.git
[submodule "third_party/glslang"] [submodule "third_party/glslang"]
path = third_party/glslang path = third_party/glslang
url = https://github.com/KhronosGroup/glslang.git url = https://github.com/KhronosGroup/glslang.git

View File

@ -28,9 +28,9 @@ addons:
jobs: jobs:
include: include:
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 LINT=true - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 LINT=true
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Debug - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Debug
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Release - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Release
git: git:
# We handle submodules ourselves in xenia-build setup. # We handle submodules ourselves in xenia-build setup.
@ -40,8 +40,10 @@ before_script:
- export LIBVULKAN_VERSION=1.1.70 - export LIBVULKAN_VERSION=1.1.70
- export CXX=$CXX_COMPILER - export CXX=$CXX_COMPILER
- export CC=$C_COMPILER - export CC=$C_COMPILER
- export AR=$AR_COMPILER
# Dump useful info. # Dump useful info.
- $CXX --version - $CXX --version
- $AR_COMPILER --version
- python3 --version - python3 --version
- clang-format-9 --version - clang-format-9 --version
- clang-format-9 -style=file -dump-config - clang-format-9 -style=file -dump-config

View File

@ -91,12 +91,14 @@ Linux support is extremely experimental and presently incomplete.
The build script uses LLVM/Clang 9. GCC while it should work in theory, is not easily The build script uses LLVM/Clang 9. GCC while it should work in theory, is not easily
interchangeable right now. interchangeable right now.
[CodeLite](https://codelite.org) is the supported IDE and `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website. * Normal building via `xb build` uses Make.
Normal building via `xb build` uses Make. * [CodeLite](https://codelite.org) is supported. `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website.
* Experimental CMake generation is available to facilitate use of other IDEs such as [CLion](https://www.jetbrains.com/clion/). If `clion` is available inside `$PATH`, `xb devenv` will start it. Otherwise `build/CMakeLists.txt` needs to be generated by invoking `xb premake --devenv=cmake` manually.
Clang-9 or newer should be available from system repositories on all up to date distributions. Clang-9 or newer should be available from system repositories on all up to date distributions.
You will also need some development libraries. To get them on an Ubuntu system: You will also need some development libraries. To get them on an Ubuntu system:
```
```bash
sudo apt-get install libgtk-3-dev libpthread-stubs0-dev liblz4-dev libx11-dev libvulkan-dev libsdl2-dev libiberty-dev libunwind-dev libc++-dev libc++abi-dev sudo apt-get install libgtk-3-dev libpthread-stubs0-dev liblz4-dev libx11-dev libvulkan-dev libsdl2-dev libiberty-dev libunwind-dev libc++-dev libc++abi-dev
``` ```

View File

@ -1,5 +1,6 @@
include("tools/build") include("tools/build")
require("third_party/premake-export-compile-commands/export-compile-commands") require("third_party/premake-export-compile-commands/export-compile-commands")
require("third_party/premake-cmake/cmake")
location(build_root) location(build_root)
targetdir(build_bin) targetdir(build_bin)
@ -24,6 +25,9 @@ defines({
"UNICODE", "UNICODE",
}) })
cppdialect("C++17")
symbols("On")
-- TODO(DrChat): Find a way to disable this on other architectures. -- TODO(DrChat): Find a way to disable this on other architectures.
if ARCH ~= "ppc64" then if ARCH ~= "ppc64" then
filter("architecture:x86_64") filter("architecture:x86_64")
@ -44,30 +48,29 @@ filter("kind:StaticLib")
filter("configurations:Checked") filter("configurations:Checked")
runtime("Debug") runtime("Debug")
optimize("Off")
defines({ defines({
"DEBUG", "DEBUG",
}) })
runtime("Debug")
filter({"configurations:Checked", "platforms:Windows"}) filter({"configurations:Checked", "platforms:Windows"})
buildoptions({ buildoptions({
"/RTCsu", -- Full Run-Time Checks. "/RTCsu", -- Full Run-Time Checks.
}) })
filter({"configurations:Checked", "platforms:Linux"})
defines({
"_GLIBCXX_DEBUG", -- libstdc++ debug mode
})
filter("configurations:Debug") filter("configurations:Debug")
runtime("Debug") runtime("Release")
optimize("Off")
defines({ defines({
"DEBUG", "DEBUG",
"_NO_DEBUG_HEAP=1", "_NO_DEBUG_HEAP=1",
}) })
runtime("Release")
filter({"configurations:Debug", "platforms:Windows"})
linkoptions({
"/NODEFAULTLIB:MSVCRTD",
})
filter({"configurations:Debug", "platforms:Linux"}) filter({"configurations:Debug", "platforms:Linux"})
buildoptions({ defines({
"-g", "_GLIBCXX_DEBUG", -- make dbg symbols work on some distros
}) })
filter("configurations:Release") filter("configurations:Release")
@ -76,26 +79,18 @@ filter("configurations:Release")
"NDEBUG", "NDEBUG",
"_NO_DEBUG_HEAP=1", "_NO_DEBUG_HEAP=1",
}) })
optimize("speed") optimize("Speed")
inlining("Auto") inlining("Auto")
floatingpoint("Fast") floatingpoint("Fast")
flags({ flags({
"LinkTimeOptimization", "LinkTimeOptimization",
}) })
runtime("Release")
filter({"configurations:Release", "platforms:Windows"})
linkoptions({
"/NODEFAULTLIB:MSVCRTD",
})
filter("platforms:Linux") filter("platforms:Linux")
system("linux") system("linux")
toolset("clang") toolset("clang")
cppdialect("C++17")
buildoptions({ buildoptions({
-- "-mlzcnt", -- (don't) Assume lzcnt is supported. -- "-mlzcnt", -- (don't) Assume lzcnt is supported.
"`pkg-config --cflags gtk+-x11-3.0`", ({os.outputof("pkg-config --cflags gtk+-x11-3.0")})[1],
"-fno-lto", -- Premake doesn't support LTO on clang
}) })
links({ links({
"stdc++fs", "stdc++fs",
@ -105,14 +100,13 @@ filter("platforms:Linux")
"rt", "rt",
}) })
linkoptions({ linkoptions({
"`pkg-config --libs gtk+-3.0`", ({os.outputof("pkg-config --libs gtk+-3.0")})[1],
}) })
filter({"platforms:Linux", "kind:*App"}) filter({"platforms:Linux", "kind:*App"})
linkgroups("On") linkgroups("On")
filter({"platforms:Linux", "language:C++", "toolset:gcc"}) filter({"platforms:Linux", "language:C++", "toolset:gcc"})
cppdialect("C++17")
links({ links({
}) })
disablewarnings({ disablewarnings({
@ -147,9 +141,7 @@ filter({"platforms:Linux", "language:C++", "toolset:clang", "files:*.cc or *.cpp
filter("platforms:Windows") filter("platforms:Windows")
system("windows") system("windows")
toolset("msc") toolset("msc")
cppdialect("C++17")
buildoptions({ buildoptions({
"/MP", -- Multiprocessor compilation.
"/utf-8", -- 'build correctly on systems with non-Latin codepages'. "/utf-8", -- 'build correctly on systems with non-Latin codepages'.
-- Mark warnings as severe -- Mark warnings as severe
"/w14839", -- non-standard use of class 'type' as an argument to a variadic function "/w14839", -- non-standard use of class 'type' as an argument to a variadic function
@ -163,10 +155,10 @@ filter("platforms:Windows")
"/wd4189", -- 'local variable is initialized but not referenced'. "/wd4189", -- 'local variable is initialized but not referenced'.
}) })
flags({ flags({
"MultiProcessorCompile", -- Multiprocessor compilation.
"NoMinimalRebuild", -- Required for /MP above. "NoMinimalRebuild", -- Required for /MP above.
}) })
symbols("On")
defines({ defines({
"_CRT_NONSTDC_NO_DEPRECATE", "_CRT_NONSTDC_NO_DEPRECATE",
"_CRT_SECURE_NO_WARNINGS", "_CRT_SECURE_NO_WARNINGS",

View File

@ -8,17 +8,6 @@ project("xenia-app")
targetname("xenia") targetname("xenia")
language("C++") language("C++")
links({ links({
"aes_128",
"capstone",
"fmt",
"dxbc",
"discord-rpc",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-app-discord", "xenia-app-discord",
"xenia-apu", "xenia-apu",
"xenia-apu-nop", "xenia-apu-nop",
@ -39,6 +28,19 @@ project("xenia-app")
"xenia-ui", "xenia-ui",
"xenia-ui-vulkan", "xenia-ui-vulkan",
"xenia-vfs", "xenia-vfs",
})
links({
"aes_128",
"capstone",
"fmt",
"dxbc",
"discord-rpc",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash", "xxhash",
}) })
defines({ defines({

View File

@ -9,21 +9,51 @@
#include "xenia/base/debugging.h" #include "xenia/base/debugging.h"
#include <signal.h> #include <csignal>
#include <cstdarg> #include <cstdarg>
#include <fstream>
#include <iostream>
#include <mutex>
#include <sstream>
#include "xenia/base/string_buffer.h" #include "xenia/base/string_buffer.h"
namespace xe { namespace xe {
namespace debugging { namespace debugging {
bool IsDebuggerAttached() { return false; } bool IsDebuggerAttached() {
void Break() { raise(SIGTRAP); } std::ifstream proc_status_stream("/proc/self/status");
if (!proc_status_stream.is_open()) {
return false;
}
std::string line;
while (std::getline(proc_status_stream, line)) {
std::istringstream line_stream(line);
std::string key;
line_stream >> key;
if (key == "TracerPid:") {
uint32_t tracer_pid;
line_stream >> tracer_pid;
return tracer_pid != 0;
}
}
return false;
}
void Break() {
static std::once_flag flag;
std::call_once(flag, []() {
// Install handler for sigtrap only once
std::signal(SIGTRAP, [](int) {
// Forward signal to default handler after being caught
std::signal(SIGTRAP, SIG_DFL);
});
});
std::raise(SIGTRAP);
}
namespace internal { namespace internal {
void DebugPrint(const char* s) { void DebugPrint(const char* s) { std::clog << s << std::endl; }
// TODO: proper implementation.
}
} // namespace internal } // namespace internal
} // namespace debugging } // namespace debugging

View File

@ -30,7 +30,7 @@ ExportResolver::Table::Table(const std::string_view module_name,
} }
std::sort( std::sort(
exports_by_name_.begin(), exports_by_name_.end(), exports_by_name_.begin(), exports_by_name_.end(),
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; }); [](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
} }
ExportResolver::ExportResolver() = default; ExportResolver::ExportResolver() = default;
@ -51,7 +51,7 @@ void ExportResolver::RegisterTable(
} }
std::sort( std::sort(
all_exports_by_name_.begin(), all_exports_by_name_.end(), all_exports_by_name_.begin(), all_exports_by_name_.end(),
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; }); [](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
} }
Export* ExportResolver::GetExportByOrdinal(const std::string_view module_name, Export* ExportResolver::GetExportByOrdinal(const std::string_view module_name,

View File

@ -387,7 +387,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
sampler_count_vertex); sampler_count_vertex);
return nullptr; return nullptr;
} }
root_signatures_bindful_.insert({index, root_signature}); root_signatures_bindful_.emplace(index, root_signature);
return root_signature; return root_signature;
} }
@ -745,12 +745,11 @@ void D3D12CommandProcessor::SetSamplePositions(
current_sample_positions_ = sample_positions; current_sample_positions_ = sample_positions;
} }
void D3D12CommandProcessor::SetComputePipelineState( void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
ID3D12PipelineState* pipeline_state) { if (current_external_pipeline_ != pipeline) {
if (current_external_pipeline_state_ != pipeline_state) { deferred_command_list_.D3DSetPipelineState(pipeline);
deferred_command_list_.D3DSetPipelineState(pipeline_state); current_external_pipeline_ = pipeline;
current_external_pipeline_state_ = pipeline_state; current_cached_pipeline_ = nullptr;
current_cached_pipeline_state_ = nullptr;
} }
} }
@ -773,8 +772,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
} }
// Currently scaling is only supported with ROV. // Currently scaling is only supported with ROV.
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) { if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
return "Direct3D 12 - 2x"; return "Direct3D 12 - ROV 2x";
} }
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
// that faces adoption complications (outside of Direct3D - on Vulkan - at
// least), but crucial to Xenia - raise awareness of its usage.
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
// feature" - oscarbg in that issue.
return "Direct3D 12 - ROV";
} }
return "Direct3D 12"; return "Direct3D 12";
} }
@ -1196,7 +1203,7 @@ bool D3D12CommandProcessor::SetupContext() {
*this, *register_file_, bindless_resources_used_, edram_rov_used_, *this, *register_file_, bindless_resources_used_, edram_rov_used_,
texture_cache_->IsResolutionScale2X() ? 2 : 1); texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) { if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline state cache"); XELOGE("Failed to initialize the graphics pipeline cache");
return false; return false;
} }
@ -1526,8 +1533,7 @@ void D3D12CommandProcessor::ShutdownContext() {
// Shut down binding - bindless descriptors may be owned by subsystems like // Shut down binding - bindless descriptors may be owned by subsystems like
// the texture cache. // the texture cache.
// Root signatured are used by pipeline states, thus freed after the pipeline // Root signatures are used by pipelines, thus freed after the pipelines.
// states.
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_); ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_); ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
for (auto it : root_signatures_bindful_) { for (auto it : root_signatures_bindful_) {
@ -1878,7 +1884,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
xenos::VertexShaderExportMode::kMultipass || xenos::VertexShaderExportMode::kMultipass ||
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front && (primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back))) { pa_su_sc_mode_cntl.cull_back))) {
// All faces are culled - can't be expressed in the pipeline state. // All faces are culled - can't be expressed in the pipeline.
return true; return true;
} }
@ -1954,7 +1960,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
line_loop_closing_index = 0; line_loop_closing_index = 0;
} }
// Update the textures - this may bind pipeline state objects. // Update the textures - this may bind pipelines.
uint32_t used_texture_mask = uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMask() | vertex_shader->GetUsedTextureMask() |
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); (pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
@ -1972,21 +1978,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
early_z = true; early_z = true;
} }
// Create the pipeline state object if needed and bind it. // Create the pipeline if needed and bind it.
void* pipeline_state_handle; void* pipeline_handle;
ID3D12RootSignature* root_signature; ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline( if (!pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type_converted, vertex_shader, pixel_shader, primitive_type_converted,
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16, indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
early_z, pipeline_render_targets, &pipeline_state_handle, early_z, pipeline_render_targets, &pipeline_handle,
&root_signature)) { &root_signature)) {
return false; return false;
} }
if (current_cached_pipeline_state_ != pipeline_state_handle) { if (current_cached_pipeline_ != pipeline_handle) {
deferred_command_list_.SetPipelineStateHandle( deferred_command_list_.SetPipelineStateHandle(
reinterpret_cast<void*>(pipeline_state_handle)); reinterpret_cast<void*>(pipeline_handle));
current_cached_pipeline_state_ = pipeline_state_handle; current_cached_pipeline_ = pipeline_handle;
current_external_pipeline_state_ = nullptr; current_external_pipeline_ = nullptr;
} }
// Update viewport, scissor, blend factor and stencil reference. // Update viewport, scissor, blend factor and stencil reference.
@ -2519,8 +2525,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
submission_open_ = true; submission_open_ = true;
// Start a new deferred command list - will submit it to the real one in the // Start a new deferred command list - will submit it to the real one in the
// end of the submission (when async pipeline state object creation requests // end of the submission (when async pipeline creation requests are
// are fulfilled). // fulfilled).
deferred_command_list_.Reset(); deferred_command_list_.Reset();
// Reset cached state of the command list. // Reset cached state of the command list.
@ -2529,8 +2535,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
ff_blend_factor_update_needed_ = true; ff_blend_factor_update_needed_ = true;
ff_stencil_ref_update_needed_ = true; ff_stencil_ref_update_needed_ = true;
current_sample_positions_ = xenos::MsaaSamples::k1X; current_sample_positions_ = xenos::MsaaSamples::k1X;
current_cached_pipeline_state_ = nullptr; current_cached_pipeline_ = nullptr;
current_external_pipeline_state_ = nullptr; current_external_pipeline_ = nullptr;
current_graphics_root_signature_ = nullptr; current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0; current_graphics_root_up_to_date_ = 0;
if (bindless_resources_used_) { if (bindless_resources_used_) {
@ -2726,7 +2732,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
} }
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const { bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates(); return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
} }
void D3D12CommandProcessor::ClearCommandAllocatorCache() { void D3D12CommandProcessor::ClearCommandAllocatorCache() {
@ -3890,8 +3896,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters, sampler_parameters,
provider.OffsetSamplerDescriptor( provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index)); sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert( texture_cache_bindless_sampler_map_.emplace(
{sampler_parameters.value, sampler_index}); sampler_parameters.value, sampler_index);
} }
current_sampler_bindless_indices_vertex_[j] = sampler_index; current_sampler_bindless_indices_vertex_[j] = sampler_index;
} }
@ -3922,8 +3928,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters, sampler_parameters,
provider.OffsetSamplerDescriptor( provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index)); sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert( texture_cache_bindless_sampler_map_.emplace(
{sampler_parameters.value, sampler_index}); sampler_parameters.value, sampler_index);
} }
current_sampler_bindless_indices_pixel_[j] = sampler_index; current_sampler_bindless_indices_pixel_[j] = sampler_index;
} }

View File

@ -186,19 +186,17 @@ class D3D12CommandProcessor : public CommandProcessor {
// render targets or copying to depth render targets. // render targets or copying to depth render targets.
void SetSamplePositions(xenos::MsaaSamples sample_positions); void SetSamplePositions(xenos::MsaaSamples sample_positions);
// Returns a pipeline state object with deferred creation by its handle. May // Returns a pipeline with deferred creation by its handle. May return nullptr
// return nullptr if failed to create the pipeline state object. // if failed to create the pipeline.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
void* handle) const { return pipeline_cache_->GetD3D12PipelineByHandle(handle);
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
} }
// Sets the current pipeline state to a compute one. This is for cache // Sets the current pipeline to a compute one. This is for cache invalidation
// invalidation primarily. A submission must be open. // primarily. A submission must be open.
void SetComputePipelineState(ID3D12PipelineState* pipeline_state); void SetComputePipeline(ID3D12PipelineState* pipeline);
// For the pipeline state cache to call when binding layout UIDs may be // For the pipeline cache to call when binding layout UIDs may be reused.
// reused.
void NotifyShaderBindingsLayoutUIDsInvalidated(); void NotifyShaderBindingsLayoutUIDsInvalidated();
// Returns the text to display in the GPU backend name in the window title. // Returns the text to display in the GPU backend name in the window title.
@ -323,8 +321,8 @@ class D3D12CommandProcessor : public CommandProcessor {
bool EndSubmission(bool is_swap); bool EndSubmission(bool is_swap);
// Checks if ending a submission right now would not cause potentially more // Checks if ending a submission right now would not cause potentially more
// delay than it would reduce by making the GPU start working earlier - such // delay than it would reduce by making the GPU start working earlier - such
// as when there are unfinished graphics pipeline state creation requests that // as when there are unfinished graphics pipeline creation requests that would
// would need to be fulfilled before actually submitting the command list. // need to be fulfilled before actually submitting the command list.
bool CanEndSubmissionImmediately() const; bool CanEndSubmissionImmediately() const;
bool AwaitAllQueueOperationsCompletion() { bool AwaitAllQueueOperationsCompletion() {
CheckSubmissionFence(submission_current_); CheckSubmissionFence(submission_current_);
@ -503,7 +501,7 @@ class D3D12CommandProcessor : public CommandProcessor {
static constexpr uint32_t kSwapTextureWidth = 1280; static constexpr uint32_t kSwapTextureWidth = 1280;
static constexpr uint32_t kSwapTextureHeight = 720; static constexpr uint32_t kSwapTextureHeight = 720;
inline std::pair<uint32_t, uint32_t> GetSwapTextureSize() const { std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
if (texture_cache_->IsResolutionScale2X()) { if (texture_cache_->IsResolutionScale2X()) {
return std::make_pair(kSwapTextureWidth * 2, kSwapTextureHeight * 2); return std::make_pair(kSwapTextureWidth * 2, kSwapTextureHeight * 2);
} }
@ -548,13 +546,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// Current SSAA sample positions (to be updated by the render target cache). // Current SSAA sample positions (to be updated by the render target cache).
xenos::MsaaSamples current_sample_positions_; xenos::MsaaSamples current_sample_positions_;
// Currently bound pipeline state, either a graphics pipeline state object // Currently bound pipeline, either a graphics pipeline from the pipeline
// from the pipeline state cache (with potentially deferred creation - // cache (with potentially deferred creation - current_external_pipeline_ is
// current_external_pipeline_state_ is nullptr in this case) or a non-Xenos // nullptr in this case) or a non-Xenos graphics or compute pipeline
// graphics or compute pipeline state object (current_cached_pipeline_state_ // (current_cached_pipeline_ is nullptr in this case).
// is nullptr in this case). void* current_cached_pipeline_;
void* current_cached_pipeline_state_; ID3D12PipelineState* current_external_pipeline_;
ID3D12PipelineState* current_external_pipeline_state_;
// Currently bound graphics root signature. // Currently bound graphics root signature.
ID3D12RootSignature* current_graphics_root_signature_; ID3D12RootSignature* current_graphics_root_signature_;

View File

@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
stretch_pipeline_desc.SampleDesc.Count = 1; stretch_pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) { &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) {
XELOGE("Failed to create the front buffer stretch pipeline state"); XELOGE("Failed to create the front buffer stretch pipeline");
stretch_gamma_root_signature_->Release(); stretch_gamma_root_signature_->Release();
stretch_gamma_root_signature_ = nullptr; stretch_gamma_root_signature_ = nullptr;
stretch_root_signature_->Release(); stretch_root_signature_->Release();
@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) { &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) {
XELOGE( XELOGE(
"Failed to create the gamma-correcting front buffer stretch " "Failed to create the gamma-correcting front buffer stretch pipeline");
"pipeline state");
stretch_pipeline_->Release(); stretch_pipeline_->Release();
stretch_pipeline_ = nullptr; stretch_pipeline_ = nullptr;
stretch_gamma_root_signature_->Release(); stretch_gamma_root_signature_->Release();

View File

@ -85,7 +85,7 @@ class D3D12Shader : public Shader {
return sampler_bindings_.data(); return sampler_bindings_.data();
} }
// For owning subsystems like the pipeline state cache, accessors for unique // For owning subsystems like the pipeline cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen) // identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an // of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout was bound. // incompatible layout was bound.

View File

@ -48,7 +48,7 @@ class D3D12SharedMemory : public SharedMemory {
// UseForReading or UseForWriting. // UseForReading or UseForWriting.
// Makes the buffer usable for vertices, indices and texture untiling. // Makes the buffer usable for vertices, indices and texture untiling.
inline void UseForReading() { void UseForReading() {
// Vertex fetch is also allowed in pixel shaders. // Vertex fetch is also allowed in pixel shaders.
CommitUAVWritesAndTransitionBuffer( CommitUAVWritesAndTransitionBuffer(
D3D12_RESOURCE_STATE_INDEX_BUFFER | D3D12_RESOURCE_STATE_INDEX_BUFFER |
@ -56,18 +56,18 @@ class D3D12SharedMemory : public SharedMemory {
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
} }
// Makes the buffer usable for texture tiling after a resolve. // Makes the buffer usable for texture tiling after a resolve.
inline void UseForWriting() { void UseForWriting() {
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
} }
// Makes the buffer usable as a source for copy commands. // Makes the buffer usable as a source for copy commands.
inline void UseAsCopySource() { void UseAsCopySource() {
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE); CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
} }
// Must be called when doing draws/dispatches modifying data within the shared // Must be called when doing draws/dispatches modifying data within the shared
// memory buffer as a UAV, to make sure that when UseForWriting is called the // memory buffer as a UAV, to make sure that when UseForWriting is called the
// next time, a UAV barrier will be done, and subsequent overlapping UAV // next time, a UAV barrier will be done, and subsequent overlapping UAV
// writes and reads are ordered. // writes and reads are ordered.
inline void MarkUAVWritesCommitNeeded() { void MarkUAVWritesCommitNeeded() {
if (buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { if (buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
buffer_uav_writes_commit_needed_ = true; buffer_uav_writes_commit_needed_ = true;
} }

View File

@ -209,8 +209,7 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
} }
} break; } break;
case Command::kSetPipelineStateHandle: { case Command::kSetPipelineStateHandle: {
current_pipeline_state = current_pipeline_state = command_processor_.GetD3D12PipelineByHandle(
command_processor_.GetD3D12PipelineStateByHandle(
*reinterpret_cast<void* const*>(stream)); *reinterpret_cast<void* const*>(stream));
if (current_pipeline_state) { if (current_pipeline_state) {
command_list->SetPipelineState(current_pipeline_state); command_list->SetPipelineState(current_pipeline_state);

View File

@ -33,7 +33,7 @@ class DeferredCommandList {
void Execute(ID3D12GraphicsCommandList* command_list, void Execute(ID3D12GraphicsCommandList* command_list,
ID3D12GraphicsCommandList1* command_list_1); ID3D12GraphicsCommandList1* command_list_1);
inline void D3DClearUnorderedAccessViewUint( void D3DClearUnorderedAccessViewUint(
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap, D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap,
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource, D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource,
const UINT values[4], UINT num_rects, const D3D12_RECT* rects) { const UINT values[4], UINT num_rects, const D3D12_RECT* rects) {
@ -51,7 +51,7 @@ class DeferredCommandList {
} }
} }
inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset, void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
ID3D12Resource* src_buffer, UINT64 src_offset, ID3D12Resource* src_buffer, UINT64 src_offset,
UINT64 num_bytes) { UINT64 num_bytes) {
auto& args = *reinterpret_cast<D3DCopyBufferRegionArguments*>(WriteCommand( auto& args = *reinterpret_cast<D3DCopyBufferRegionArguments*>(WriteCommand(
@ -63,7 +63,7 @@ class DeferredCommandList {
args.num_bytes = num_bytes; args.num_bytes = num_bytes;
} }
inline void D3DCopyResource(ID3D12Resource* dst_resource, void D3DCopyResource(ID3D12Resource* dst_resource,
ID3D12Resource* src_resource) { ID3D12Resource* src_resource) {
auto& args = *reinterpret_cast<D3DCopyResourceArguments*>(WriteCommand( auto& args = *reinterpret_cast<D3DCopyResourceArguments*>(WriteCommand(
Command::kD3DCopyResource, sizeof(D3DCopyResourceArguments))); Command::kD3DCopyResource, sizeof(D3DCopyResourceArguments)));
@ -71,7 +71,7 @@ class DeferredCommandList {
args.src_resource = src_resource; args.src_resource = src_resource;
} }
inline void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst, void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst,
const D3D12_TEXTURE_COPY_LOCATION& src) { const D3D12_TEXTURE_COPY_LOCATION& src) {
auto& args = *reinterpret_cast<CopyTextureArguments*>( auto& args = *reinterpret_cast<CopyTextureArguments*>(
WriteCommand(Command::kCopyTexture, sizeof(CopyTextureArguments))); WriteCommand(Command::kCopyTexture, sizeof(CopyTextureArguments)));
@ -79,8 +79,8 @@ class DeferredCommandList {
std::memcpy(&args.src, &src, sizeof(D3D12_TEXTURE_COPY_LOCATION)); std::memcpy(&args.src, &src, sizeof(D3D12_TEXTURE_COPY_LOCATION));
} }
inline void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst, void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst, UINT dst_x,
UINT dst_x, UINT dst_y, UINT dst_z, UINT dst_y, UINT dst_z,
const D3D12_TEXTURE_COPY_LOCATION& src, const D3D12_TEXTURE_COPY_LOCATION& src,
const D3D12_BOX& src_box) { const D3D12_BOX& src_box) {
auto& args = *reinterpret_cast<CopyTextureRegionArguments*>(WriteCommand( auto& args = *reinterpret_cast<CopyTextureRegionArguments*>(WriteCommand(
@ -93,7 +93,7 @@ class DeferredCommandList {
args.src_box = src_box; args.src_box = src_box;
} }
inline void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y, void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y,
UINT thread_group_count_z) { UINT thread_group_count_z) {
auto& args = *reinterpret_cast<D3DDispatchArguments*>( auto& args = *reinterpret_cast<D3DDispatchArguments*>(
WriteCommand(Command::kD3DDispatch, sizeof(D3DDispatchArguments))); WriteCommand(Command::kD3DDispatch, sizeof(D3DDispatchArguments)));
@ -102,9 +102,8 @@ class DeferredCommandList {
args.thread_group_count_z = thread_group_count_z; args.thread_group_count_z = thread_group_count_z;
} }
inline void D3DDrawIndexedInstanced(UINT index_count_per_instance, void D3DDrawIndexedInstanced(UINT index_count_per_instance,
UINT instance_count, UINT instance_count, UINT start_index_location,
UINT start_index_location,
INT base_vertex_location, INT base_vertex_location,
UINT start_instance_location) { UINT start_instance_location) {
auto& args = *reinterpret_cast<D3DDrawIndexedInstancedArguments*>( auto& args = *reinterpret_cast<D3DDrawIndexedInstancedArguments*>(
@ -117,8 +116,8 @@ class DeferredCommandList {
args.start_instance_location = start_instance_location; args.start_instance_location = start_instance_location;
} }
inline void D3DDrawInstanced(UINT vertex_count_per_instance, void D3DDrawInstanced(UINT vertex_count_per_instance, UINT instance_count,
UINT instance_count, UINT start_vertex_location, UINT start_vertex_location,
UINT start_instance_location) { UINT start_instance_location) {
auto& args = *reinterpret_cast<D3DDrawInstancedArguments*>(WriteCommand( auto& args = *reinterpret_cast<D3DDrawInstancedArguments*>(WriteCommand(
Command::kD3DDrawInstanced, sizeof(D3DDrawInstancedArguments))); Command::kD3DDrawInstanced, sizeof(D3DDrawInstancedArguments)));
@ -128,7 +127,7 @@ class DeferredCommandList {
args.start_instance_location = start_instance_location; args.start_instance_location = start_instance_location;
} }
inline void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) { void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) {
auto& args = *reinterpret_cast<D3D12_INDEX_BUFFER_VIEW*>(WriteCommand( auto& args = *reinterpret_cast<D3D12_INDEX_BUFFER_VIEW*>(WriteCommand(
Command::kD3DIASetIndexBuffer, sizeof(D3D12_INDEX_BUFFER_VIEW))); Command::kD3DIASetIndexBuffer, sizeof(D3D12_INDEX_BUFFER_VIEW)));
if (view != nullptr) { if (view != nullptr) {
@ -142,14 +141,13 @@ class DeferredCommandList {
} }
} }
inline void D3DIASetPrimitiveTopology( void D3DIASetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
auto& arg = *reinterpret_cast<D3D12_PRIMITIVE_TOPOLOGY*>(WriteCommand( auto& arg = *reinterpret_cast<D3D12_PRIMITIVE_TOPOLOGY*>(WriteCommand(
Command::kD3DIASetPrimitiveTopology, sizeof(D3D12_PRIMITIVE_TOPOLOGY))); Command::kD3DIASetPrimitiveTopology, sizeof(D3D12_PRIMITIVE_TOPOLOGY)));
arg = primitive_topology; arg = primitive_topology;
} }
inline void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) { void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) {
auto args = reinterpret_cast<FLOAT*>( auto args = reinterpret_cast<FLOAT*>(
WriteCommand(Command::kD3DOMSetBlendFactor, 4 * sizeof(FLOAT))); WriteCommand(Command::kD3DOMSetBlendFactor, 4 * sizeof(FLOAT)));
args[0] = blend_factor[0]; args[0] = blend_factor[0];
@ -158,7 +156,7 @@ class DeferredCommandList {
args[3] = blend_factor[3]; args[3] = blend_factor[3];
} }
inline void D3DOMSetRenderTargets( void D3DOMSetRenderTargets(
UINT num_render_target_descriptors, UINT num_render_target_descriptors,
const D3D12_CPU_DESCRIPTOR_HANDLE* render_target_descriptors, const D3D12_CPU_DESCRIPTOR_HANDLE* render_target_descriptors,
BOOL rts_single_handle_to_descriptor_range, BOOL rts_single_handle_to_descriptor_range,
@ -185,13 +183,13 @@ class DeferredCommandList {
} }
} }
inline void D3DOMSetStencilRef(UINT stencil_ref) { void D3DOMSetStencilRef(UINT stencil_ref) {
auto& arg = *reinterpret_cast<UINT*>( auto& arg = *reinterpret_cast<UINT*>(
WriteCommand(Command::kD3DOMSetStencilRef, sizeof(UINT))); WriteCommand(Command::kD3DOMSetStencilRef, sizeof(UINT)));
arg = stencil_ref; arg = stencil_ref;
} }
inline void D3DResourceBarrier(UINT num_barriers, void D3DResourceBarrier(UINT num_barriers,
const D3D12_RESOURCE_BARRIER* barriers) { const D3D12_RESOURCE_BARRIER* barriers) {
if (num_barriers == 0) { if (num_barriers == 0) {
return; return;
@ -207,21 +205,22 @@ class DeferredCommandList {
num_barriers * sizeof(D3D12_RESOURCE_BARRIER)); num_barriers * sizeof(D3D12_RESOURCE_BARRIER));
} }
inline void RSSetScissorRect(const D3D12_RECT& rect) { void RSSetScissorRect(const D3D12_RECT& rect) {
auto& arg = *reinterpret_cast<D3D12_RECT*>( auto& arg = *reinterpret_cast<D3D12_RECT*>(
WriteCommand(Command::kRSSetScissorRect, sizeof(D3D12_RECT))); WriteCommand(Command::kRSSetScissorRect, sizeof(D3D12_RECT)));
arg = rect; arg = rect;
} }
inline void RSSetViewport(const D3D12_VIEWPORT& viewport) { void RSSetViewport(const D3D12_VIEWPORT& viewport) {
auto& arg = *reinterpret_cast<D3D12_VIEWPORT*>( auto& arg = *reinterpret_cast<D3D12_VIEWPORT*>(
WriteCommand(Command::kRSSetViewport, sizeof(D3D12_VIEWPORT))); WriteCommand(Command::kRSSetViewport, sizeof(D3D12_VIEWPORT)));
arg = viewport; arg = viewport;
} }
inline void D3DSetComputeRoot32BitConstants( void D3DSetComputeRoot32BitConstants(UINT root_parameter_index,
UINT root_parameter_index, UINT num_32bit_values_to_set, UINT num_32bit_values_to_set,
const void* src_data, UINT dest_offset_in_32bit_values) { const void* src_data,
UINT dest_offset_in_32bit_values) {
if (num_32bit_values_to_set == 0) { if (num_32bit_values_to_set == 0) {
return; return;
} }
@ -235,9 +234,10 @@ class DeferredCommandList {
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t)); std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
} }
inline void D3DSetGraphicsRoot32BitConstants( void D3DSetGraphicsRoot32BitConstants(UINT root_parameter_index,
UINT root_parameter_index, UINT num_32bit_values_to_set, UINT num_32bit_values_to_set,
const void* src_data, UINT dest_offset_in_32bit_values) { const void* src_data,
UINT dest_offset_in_32bit_values) {
if (num_32bit_values_to_set == 0) { if (num_32bit_values_to_set == 0) {
return; return;
} }
@ -251,7 +251,7 @@ class DeferredCommandList {
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t)); std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
} }
inline void D3DSetComputeRootConstantBufferView( void D3DSetComputeRootConstantBufferView(
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) { UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>( auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
WriteCommand(Command::kD3DSetComputeRootConstantBufferView, WriteCommand(Command::kD3DSetComputeRootConstantBufferView,
@ -260,7 +260,7 @@ class DeferredCommandList {
args.buffer_location = buffer_location; args.buffer_location = buffer_location;
} }
inline void D3DSetGraphicsRootConstantBufferView( void D3DSetGraphicsRootConstantBufferView(
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) { UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>( auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
WriteCommand(Command::kD3DSetGraphicsRootConstantBufferView, WriteCommand(Command::kD3DSetGraphicsRootConstantBufferView,
@ -269,7 +269,7 @@ class DeferredCommandList {
args.buffer_location = buffer_location; args.buffer_location = buffer_location;
} }
inline void D3DSetComputeRootDescriptorTable( void D3DSetComputeRootDescriptorTable(
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>( auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
WriteCommand(Command::kD3DSetComputeRootDescriptorTable, WriteCommand(Command::kD3DSetComputeRootDescriptorTable,
@ -278,7 +278,7 @@ class DeferredCommandList {
args.base_descriptor.ptr = base_descriptor.ptr; args.base_descriptor.ptr = base_descriptor.ptr;
} }
inline void D3DSetGraphicsRootDescriptorTable( void D3DSetGraphicsRootDescriptorTable(
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>( auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
WriteCommand(Command::kD3DSetGraphicsRootDescriptorTable, WriteCommand(Command::kD3DSetGraphicsRootDescriptorTable,
@ -287,20 +287,19 @@ class DeferredCommandList {
args.base_descriptor.ptr = base_descriptor.ptr; args.base_descriptor.ptr = base_descriptor.ptr;
} }
inline void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) { void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) {
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand( auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
Command::kD3DSetComputeRootSignature, sizeof(ID3D12RootSignature*))); Command::kD3DSetComputeRootSignature, sizeof(ID3D12RootSignature*)));
arg = root_signature; arg = root_signature;
} }
inline void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) { void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) {
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand( auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
Command::kD3DSetGraphicsRootSignature, sizeof(ID3D12RootSignature*))); Command::kD3DSetGraphicsRootSignature, sizeof(ID3D12RootSignature*)));
arg = root_signature; arg = root_signature;
} }
inline void SetDescriptorHeaps( void SetDescriptorHeaps(ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
ID3D12DescriptorHeap* sampler_descriptor_heap) { ID3D12DescriptorHeap* sampler_descriptor_heap) {
auto& args = *reinterpret_cast<SetDescriptorHeapsArguments*>(WriteCommand( auto& args = *reinterpret_cast<SetDescriptorHeapsArguments*>(WriteCommand(
Command::kSetDescriptorHeaps, sizeof(SetDescriptorHeapsArguments))); Command::kSetDescriptorHeaps, sizeof(SetDescriptorHeapsArguments)));
@ -308,20 +307,19 @@ class DeferredCommandList {
args.sampler_descriptor_heap = sampler_descriptor_heap; args.sampler_descriptor_heap = sampler_descriptor_heap;
} }
inline void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) { void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) {
auto& arg = *reinterpret_cast<ID3D12PipelineState**>(WriteCommand( auto& arg = *reinterpret_cast<ID3D12PipelineState**>(WriteCommand(
Command::kD3DSetPipelineState, sizeof(ID3D12PipelineState*))); Command::kD3DSetPipelineState, sizeof(ID3D12PipelineState*)));
arg = pipeline_state; arg = pipeline_state;
} }
inline void SetPipelineStateHandle(void* pipeline_state_handle) { void SetPipelineStateHandle(void* pipeline_state_handle) {
auto& arg = *reinterpret_cast<void**>( auto& arg = *reinterpret_cast<void**>(
WriteCommand(Command::kSetPipelineStateHandle, sizeof(void*))); WriteCommand(Command::kSetPipelineStateHandle, sizeof(void*)));
arg = pipeline_state_handle; arg = pipeline_state_handle;
} }
inline void D3DSetSamplePositions( void D3DSetSamplePositions(UINT num_samples_per_pixel, UINT num_pixels,
UINT num_samples_per_pixel, UINT num_pixels,
const D3D12_SAMPLE_POSITION* sample_positions) { const D3D12_SAMPLE_POSITION* sample_positions) {
auto& args = *reinterpret_cast<D3DSetSamplePositionsArguments*>( auto& args = *reinterpret_cast<D3DSetSamplePositionsArguments*>(
WriteCommand(Command::kD3DSetSamplePositions, WriteCommand(Command::kD3DSetSamplePositions,

View File

@ -43,10 +43,10 @@ DEFINE_bool(
"D3D12"); "D3D12");
DEFINE_int32( DEFINE_int32(
d3d12_pipeline_creation_threads, -1, d3d12_pipeline_creation_threads, -1,
"Number of threads used for graphics pipeline state object creation. -1 to " "Number of threads used for graphics pipeline creation. -1 to calculate "
"calculate automatically (75% of logical CPU cores), a positive number to " "automatically (75% of logical CPU cores), a positive number to specify "
"specify the number of threads explicitly (up to the number of logical CPU " "the number of threads explicitly (up to the number of logical CPU cores), "
"cores), 0 to disable multithreaded pipeline state object creation.", "0 to disable multithreaded pipeline creation.",
"D3D12"); "D3D12");
DEFINE_bool(d3d12_tessellation_wireframe, false, DEFINE_bool(d3d12_tessellation_wireframe, false,
"Display tessellated surfaces as wireframe for debugging.", "Display tessellated surfaces as wireframe for debugging.",
@ -125,8 +125,8 @@ bool PipelineCache::Initialize() {
logical_processor_count = 6; logical_processor_count = 6;
} }
// Initialize creation thread synchronization data even if not using creation // Initialize creation thread synchronization data even if not using creation
// threads because they may be used anyway to create pipeline state objects // threads because they may be used anyway to create pipelines from the
// from the storage. // storage.
creation_threads_busy_ = 0; creation_threads_busy_ = 0;
creation_completion_event_ = creation_completion_event_ =
xe::threading::Event::CreateManualResetEvent(true); xe::threading::Event::CreateManualResetEvent(true);
@ -145,7 +145,7 @@ bool PipelineCache::Initialize() {
for (size_t i = 0; i < creation_thread_count; ++i) { for (size_t i = 0; i < creation_thread_count; ++i) {
std::unique_ptr<xe::threading::Thread> creation_thread = std::unique_ptr<xe::threading::Thread> creation_thread =
xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); }); xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); });
creation_thread->set_name("D3D12 Pipeline States"); creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread)); creation_threads_.push_back(std::move(creation_thread));
} }
} }
@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) {
} }
ShutdownShaderStorage(); ShutdownShaderStorage();
// Remove references to the current pipeline state object. // Remove references to the current pipeline.
current_pipeline_state_ = nullptr; current_pipeline_ = nullptr;
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
// Empty the pipeline state object creation queue and make sure there are no // Empty the pipeline creation queue and make sure there are no threads
// threads currently creating pipeline state objects because pipeline states // currently creating pipelines because pipelines are going to be deleted.
// are going to be deleted.
bool await_creation_completion_event = false; bool await_creation_completion_event = false;
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
} }
} }
// Destroy all pipeline state objects. // Destroy all pipelines.
for (auto it : pipeline_states_) { for (auto it : pipelines_) {
it.second->state->Release(); it.second->state->Release();
delete it.second; delete it.second;
} }
pipeline_states_.clear(); pipelines_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0); COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
// Destroy all shaders. // Destroy all shaders.
command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated(); command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated();
@ -401,7 +400,7 @@ void PipelineCache::InitializeShaderStorage(
D3D12Shader* shader = D3D12Shader* shader =
new D3D12Shader(shader_header.type, ucode_data_hash, new D3D12Shader(shader_header.type, ucode_data_hash,
ucode_dwords.data(), shader_header.ucode_dword_count); ucode_dwords.data(), shader_header.ucode_dword_count);
shaders_.insert({ucode_data_hash, shader}); shaders_.emplace(ucode_data_hash, shader);
// Create new threads if the currently existing threads can't keep up with // Create new threads if the currently existing threads can't keep up with
// file reading, but not more than the number of logical processors minus // file reading, but not more than the number of logical processors minus
// one. // one.
@ -459,72 +458,66 @@ void PipelineCache::InitializeShaderStorage(
} }
// 'DXRO' or 'DXRT'. // 'DXRO' or 'DXRT'.
const uint32_t pipeline_state_storage_magic_api = const uint32_t pipeline_storage_magic_api =
edram_rov_used_ ? 0x4F525844 : 0x54525844; edram_rov_used_ ? 0x4F525844 : 0x54525844;
// Initialize the pipeline state storage stream. // Initialize the pipeline storage stream.
uint64_t pipeline_state_storage_initialization_start_ = uint64_t pipeline_storage_initialization_start_ =
xe::Clock::QueryHostTickCount(); xe::Clock::QueryHostTickCount();
auto pipeline_state_storage_file_path = auto pipeline_storage_file_path =
shader_storage_shareable_root / shader_storage_shareable_root /
fmt::format("{:08X}.{}.d3d12.xpso", title_id, fmt::format("{:08X}.{}.d3d12.xpso", title_id,
edram_rov_used_ ? "rov" : "rtv"); edram_rov_used_ ? "rov" : "rtv");
pipeline_state_storage_file_ = pipeline_storage_file_ =
xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b"); xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b");
if (!pipeline_state_storage_file_) { if (!pipeline_storage_file_) {
XELOGE( XELOGE(
"Failed to open the Direct3D 12 pipeline state description storage " "Failed to open the Direct3D 12 pipeline description storage file for "
"file for writing, persistent shader storage will be disabled: {}", "writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_state_storage_file_path)); xe::path_to_utf8(pipeline_storage_file_path));
fclose(shader_storage_file_); fclose(shader_storage_file_);
shader_storage_file_ = nullptr; shader_storage_file_ = nullptr;
return; return;
} }
pipeline_state_storage_file_flush_needed_ = false; pipeline_storage_file_flush_needed_ = false;
// 'XEPS'. // 'XEPS'.
const uint32_t pipeline_state_storage_magic = 0x53504558; const uint32_t pipeline_storage_magic = 0x53504558;
struct { struct {
uint32_t magic; uint32_t magic;
uint32_t magic_api; uint32_t magic_api;
uint32_t version_swapped; uint32_t version_swapped;
} pipeline_state_storage_file_header; } pipeline_storage_file_header;
if (fread(&pipeline_state_storage_file_header, if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
sizeof(pipeline_state_storage_file_header), 1, 1, pipeline_storage_file_) &&
pipeline_state_storage_file_) && pipeline_storage_file_header.magic == pipeline_storage_magic &&
pipeline_state_storage_file_header.magic == pipeline_storage_file_header.magic_api == pipeline_storage_magic_api &&
pipeline_state_storage_magic && xe::byte_swap(pipeline_storage_file_header.version_swapped) ==
pipeline_state_storage_file_header.magic_api ==
pipeline_state_storage_magic_api &&
xe::byte_swap(pipeline_state_storage_file_header.version_swapped) ==
PipelineDescription::kVersion) { PipelineDescription::kVersion) {
uint64_t pipeline_state_storage_valid_bytes = uint64_t pipeline_storage_valid_bytes =
sizeof(pipeline_state_storage_file_header); sizeof(pipeline_storage_file_header);
// Enqueue pipeline state descriptions written by previous Xenia executions // Enqueue pipeline descriptions written by previous Xenia executions until
// until the end of the file or until a corrupted one is detected. // the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END); xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END);
int64_t pipeline_state_storage_told_end = int64_t pipeline_storage_told_end =
xe::filesystem::Tell(pipeline_state_storage_file_); xe::filesystem::Tell(pipeline_storage_file_);
size_t pipeline_state_storage_told_count = size_t pipeline_storage_told_count = size_t(
size_t(pipeline_state_storage_told_end >= pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes)
int64_t(pipeline_state_storage_valid_bytes) ? (uint64_t(pipeline_storage_told_end) -
? (uint64_t(pipeline_state_storage_told_end) - pipeline_storage_valid_bytes) /
pipeline_state_storage_valid_bytes) /
sizeof(PipelineStoredDescription) sizeof(PipelineStoredDescription)
: 0); : 0);
if (pipeline_state_storage_told_count && if (pipeline_storage_told_count &&
xe::filesystem::Seek(pipeline_state_storage_file_, xe::filesystem::Seek(pipeline_storage_file_,
int64_t(pipeline_state_storage_valid_bytes), int64_t(pipeline_storage_valid_bytes), SEEK_SET)) {
SEEK_SET)) {
std::vector<PipelineStoredDescription> pipeline_stored_descriptions; std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
pipeline_stored_descriptions.resize(pipeline_state_storage_told_count); pipeline_stored_descriptions.resize(pipeline_storage_told_count);
pipeline_stored_descriptions.resize(fread( pipeline_stored_descriptions.resize(
pipeline_stored_descriptions.data(), fread(pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_state_storage_told_count, sizeof(PipelineStoredDescription), pipeline_storage_told_count,
pipeline_state_storage_file_)); pipeline_storage_file_));
if (!pipeline_stored_descriptions.empty()) { if (!pipeline_stored_descriptions.empty()) {
// Launch additional creation threads to use all cores to create // Launch additional creation threads to use all cores to create
// pipeline state objects faster. Will also be using the main thread, so // pipelines faster. Will also be using the main thread, so minus 1.
// minus 1.
size_t creation_thread_original_count = creation_threads_.size(); size_t creation_thread_original_count = creation_threads_.size();
size_t creation_thread_needed_count = size_t creation_thread_needed_count =
std::max(std::min(pipeline_stored_descriptions.size(), std::max(std::min(pipeline_stored_descriptions.size(),
@ -538,10 +531,10 @@ void PipelineCache::InitializeShaderStorage(
{}, [this, creation_thread_index]() { {}, [this, creation_thread_index]() {
CreationThread(creation_thread_index); CreationThread(creation_thread_index);
}); });
creation_thread->set_name("D3D12 Pipeline States Additional"); creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread)); creation_threads_.push_back(std::move(creation_thread));
} }
size_t pipeline_states_created = 0; size_t pipelines_created = 0;
for (const PipelineStoredDescription& pipeline_stored_description : for (const PipelineStoredDescription& pipeline_stored_description :
pipeline_stored_descriptions) { pipeline_stored_descriptions) {
const PipelineDescription& pipeline_description = const PipelineDescription& pipeline_description =
@ -553,23 +546,21 @@ void PipelineCache::InitializeShaderStorage(
0) != pipeline_stored_description.description_hash) { 0) != pipeline_stored_description.description_hash) {
break; break;
} }
pipeline_state_storage_valid_bytes += pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription);
sizeof(PipelineStoredDescription); // Skip already known pipelines - those have already been enqueued.
// Skip already known pipeline states - those have already been auto found_range = pipelines_.equal_range(
// enqueued.
auto found_range = pipeline_states_.equal_range(
pipeline_stored_description.description_hash); pipeline_stored_description.description_hash);
bool pipeline_state_found = false; bool pipeline_found = false;
for (auto it = found_range.first; it != found_range.second; ++it) { for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second; Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline_state->description.description, if (!std::memcmp(&found_pipeline->description.description,
&pipeline_description, &pipeline_description,
sizeof(pipeline_description))) { sizeof(pipeline_description))) {
pipeline_state_found = true; pipeline_found = true;
break; break;
} }
} }
if (pipeline_state_found) { if (pipeline_found) {
continue; continue;
} }
@ -606,36 +597,33 @@ void PipelineCache::InitializeShaderStorage(
std::memcpy(&pipeline_runtime_description.description, std::memcpy(&pipeline_runtime_description.description,
&pipeline_description, sizeof(pipeline_description)); &pipeline_description, sizeof(pipeline_description));
PipelineState* new_pipeline_state = new PipelineState; Pipeline* new_pipeline = new Pipeline;
new_pipeline_state->state = nullptr; new_pipeline->state = nullptr;
std::memcpy(&new_pipeline_state->description, std::memcpy(&new_pipeline->description, &pipeline_runtime_description,
&pipeline_runtime_description,
sizeof(pipeline_runtime_description)); sizeof(pipeline_runtime_description));
pipeline_states_.insert( pipelines_.emplace(pipeline_stored_description.description_hash,
std::make_pair(pipeline_stored_description.description_hash, new_pipeline);
new_pipeline_state)); COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
// Submit the pipeline for creation to any available thread. // Submit the pipeline for creation to any available thread.
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state); creation_queue_.push_back(new_pipeline);
} }
creation_request_cond_.notify_one(); creation_request_cond_.notify_one();
} else { } else {
new_pipeline_state->state = new_pipeline->state =
CreateD3D12PipelineState(pipeline_runtime_description); CreateD3D12Pipeline(pipeline_runtime_description);
} }
++pipeline_states_created; ++pipelines_created;
} }
CreateQueuedPipelineStatesOnProcessorThread(); CreateQueuedPipelinesOnProcessorThread();
if (creation_threads_.size() > creation_thread_original_count) { if (creation_threads_.size() > creation_thread_original_count) {
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_threads_shutdown_from_ = creation_thread_original_count; creation_threads_shutdown_from_ = creation_thread_original_count;
// Assuming the queue is empty because of // Assuming the queue is empty because of
// CreateQueuedPipelineStatesOnProcessorThread. // CreateQueuedPipelinesOnProcessorThread.
} }
creation_request_cond_.notify_all(); creation_request_cond_.notify_all();
while (creation_threads_.size() > creation_thread_original_count) { while (creation_threads_.size() > creation_thread_original_count) {
@ -663,26 +651,23 @@ void PipelineCache::InitializeShaderStorage(
} }
} }
XELOGGPU( XELOGGPU(
"Created {} graphics pipeline state objects from the storage in {} " "Created {} graphics pipelines from the storage in {} milliseconds",
"milliseconds", pipelines_created,
pipeline_states_created,
(xe::Clock::QueryHostTickCount() - (xe::Clock::QueryHostTickCount() -
pipeline_state_storage_initialization_start_) * pipeline_storage_initialization_start_) *
1000 / xe::Clock::QueryHostTickFrequency()); 1000 / xe::Clock::QueryHostTickFrequency());
} }
} }
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, xe::filesystem::TruncateStdioFile(pipeline_storage_file_,
pipeline_state_storage_valid_bytes); pipeline_storage_valid_bytes);
} else { } else {
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0); xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0);
pipeline_state_storage_file_header.magic = pipeline_state_storage_magic; pipeline_storage_file_header.magic = pipeline_storage_magic;
pipeline_state_storage_file_header.magic_api = pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
pipeline_state_storage_magic_api; pipeline_storage_file_header.version_swapped =
pipeline_state_storage_file_header.version_swapped =
xe::byte_swap(PipelineDescription::kVersion); xe::byte_swap(PipelineDescription::kVersion);
fwrite(&pipeline_state_storage_file_header, fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
sizeof(pipeline_state_storage_file_header), 1, 1, pipeline_storage_file_);
pipeline_state_storage_file_);
} }
shader_storage_root_ = storage_root; shader_storage_root_ = storage_root;
@ -690,7 +675,7 @@ void PipelineCache::InitializeShaderStorage(
// Start the storage writing thread. // Start the storage writing thread.
storage_write_flush_shaders_ = false; storage_write_flush_shaders_ = false;
storage_write_flush_pipeline_states_ = false; storage_write_flush_pipelines_ = false;
storage_write_thread_shutdown_ = false; storage_write_thread_shutdown_ = false;
storage_write_thread_ = storage_write_thread_ =
xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); }); xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); });
@ -707,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() {
storage_write_thread_.reset(); storage_write_thread_.reset();
} }
storage_write_shader_queue_.clear(); storage_write_shader_queue_.clear();
storage_write_pipeline_state_queue_.clear(); storage_write_pipeline_queue_.clear();
if (pipeline_state_storage_file_) { if (pipeline_storage_file_) {
fclose(pipeline_state_storage_file_); fclose(pipeline_storage_file_);
pipeline_state_storage_file_ = nullptr; pipeline_storage_file_ = nullptr;
pipeline_state_storage_file_flush_needed_ = false; pipeline_storage_file_flush_needed_ = false;
} }
if (shader_storage_file_) { if (shader_storage_file_) {
@ -727,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() {
void PipelineCache::EndSubmission() { void PipelineCache::EndSubmission() {
if (shader_storage_file_flush_needed_ || if (shader_storage_file_flush_needed_ ||
pipeline_state_storage_file_flush_needed_) { pipeline_storage_file_flush_needed_) {
{ {
std::lock_guard<std::mutex> lock(storage_write_request_lock_); std::lock_guard<std::mutex> lock(storage_write_request_lock_);
if (shader_storage_file_flush_needed_) { if (shader_storage_file_flush_needed_) {
storage_write_flush_shaders_ = true; storage_write_flush_shaders_ = true;
} }
if (pipeline_state_storage_file_flush_needed_) { if (pipeline_storage_file_flush_needed_) {
storage_write_flush_pipeline_states_ = true; storage_write_flush_pipelines_ = true;
} }
} }
storage_write_request_cond_.notify_one(); storage_write_request_cond_.notify_one();
shader_storage_file_flush_needed_ = false; shader_storage_file_flush_needed_ = false;
pipeline_state_storage_file_flush_needed_ = false; pipeline_storage_file_flush_needed_ = false;
} }
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
CreateQueuedPipelineStatesOnProcessorThread(); CreateQueuedPipelinesOnProcessorThread();
// Await creation of all queued pipeline state objects. // Await creation of all queued pipelines.
bool await_creation_completion_event; bool await_creation_completion_event;
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
// Assuming the creation queue is already empty (because the processor // Assuming the creation queue is already empty (because the processor
// thread also worked on creating the leftover pipeline state objects), so // thread also worked on creating the leftover pipelines), so only check
// only check if there are threads with pipeline state objects currently // if there are threads with pipelines currently being created.
// being created.
await_creation_completion_event = creation_threads_busy_ != 0; await_creation_completion_event = creation_threads_busy_ != 0;
if (await_creation_completion_event) { if (await_creation_completion_event) {
creation_completion_event_->Reset(); creation_completion_event_->Reset();
@ -764,7 +748,7 @@ void PipelineCache::EndSubmission() {
} }
} }
bool PipelineCache::IsCreatingPipelineStates() { bool PipelineCache::IsCreatingPipelines() {
if (creation_threads_.empty()) { if (creation_threads_.empty()) {
return false; return false;
} }
@ -789,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
// again. // again.
D3D12Shader* shader = D3D12Shader* shader =
new D3D12Shader(shader_type, data_hash, host_address, dword_count); new D3D12Shader(shader_type, data_hash, host_address, dword_count);
shaders_.insert({data_hash, shader}); shaders_.emplace(data_hash, shader);
return shader; return shader;
} }
@ -797,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid() Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid()
const { const {
// If the values this functions returns are changed, INVALIDATE THE SHADER // If the values this functions returns are changed, INVALIDATE THE SHADER
// STORAGE (increase kVersion for BOTH shaders and pipeline states)! The // STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception
// exception is when the function originally returned "unsupported", but // is when the function originally returned "unsupported", but started to
// started to return a valid value (in this case the shader wouldn't be cached // return a valid value (in this case the shader wouldn't be cached in the
// in the first place). Otherwise games will not be able to locate shaders for // first place). Otherwise games will not be able to locate shaders for draws
// draws for which the host vertex shader type has changed! // for which the host vertex shader type has changed!
const auto& regs = register_file_; const auto& regs = register_file_;
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>(); auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
@ -928,13 +912,12 @@ bool PipelineCache::ConfigurePipeline(
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out, void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
ID3D12RootSignature** root_signature_out) {
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
assert_not_null(pipeline_state_handle_out); assert_not_null(pipeline_handle_out);
assert_not_null(root_signature_out); assert_not_null(root_signature_out);
PipelineRuntimeDescription runtime_description; PipelineRuntimeDescription runtime_description;
@ -945,24 +928,24 @@ bool PipelineCache::ConfigurePipeline(
} }
PipelineDescription& description = runtime_description.description; PipelineDescription& description = runtime_description.description;
if (current_pipeline_state_ != nullptr && if (current_pipeline_ != nullptr &&
!std::memcmp(&current_pipeline_state_->description.description, !std::memcmp(&current_pipeline_->description.description, &description,
&description, sizeof(description))) { sizeof(description))) {
*pipeline_state_handle_out = current_pipeline_state_; *pipeline_handle_out = current_pipeline_;
*root_signature_out = runtime_description.root_signature; *root_signature_out = runtime_description.root_signature;
return true; return true;
} }
// Find an existing pipeline state object in the cache. // Find an existing pipeline in the cache.
uint64_t hash = XXH64(&description, sizeof(description), 0); uint64_t hash = XXH64(&description, sizeof(description), 0);
auto found_range = pipeline_states_.equal_range(hash); auto found_range = pipelines_.equal_range(hash);
for (auto it = found_range.first; it != found_range.second; ++it) { for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second; Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline_state->description.description, if (!std::memcmp(&found_pipeline->description.description, &description,
&description, sizeof(description))) { sizeof(description))) {
current_pipeline_state_ = found_pipeline_state; current_pipeline_ = found_pipeline;
*pipeline_state_handle_out = found_pipeline_state; *pipeline_handle_out = found_pipeline;
*root_signature_out = found_pipeline_state->description.root_signature; *root_signature_out = found_pipeline->description.root_signature;
return true; return true;
} }
} }
@ -973,33 +956,32 @@ bool PipelineCache::ConfigurePipeline(
return false; return false;
} }
PipelineState* new_pipeline_state = new PipelineState; Pipeline* new_pipeline = new Pipeline;
new_pipeline_state->state = nullptr; new_pipeline->state = nullptr;
std::memcpy(&new_pipeline_state->description, &runtime_description, std::memcpy(&new_pipeline->description, &runtime_description,
sizeof(runtime_description)); sizeof(runtime_description));
pipeline_states_.insert(std::make_pair(hash, new_pipeline_state)); pipelines_.emplace(hash, new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
pipeline_states_.size());
if (!creation_threads_.empty()) { if (!creation_threads_.empty()) {
// Submit the pipeline state object for creation to any available thread. // Submit the pipeline for creation to any available thread.
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state); creation_queue_.push_back(new_pipeline);
} }
creation_request_cond_.notify_one(); creation_request_cond_.notify_one();
} else { } else {
new_pipeline_state->state = CreateD3D12PipelineState(runtime_description); new_pipeline->state = CreateD3D12Pipeline(runtime_description);
} }
if (pipeline_state_storage_file_) { if (pipeline_storage_file_) {
assert_not_null(storage_write_thread_); assert_not_null(storage_write_thread_);
pipeline_state_storage_file_flush_needed_ = true; pipeline_storage_file_flush_needed_ = true;
{ {
std::lock_guard<std::mutex> lock(storage_write_request_lock_); std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_pipeline_state_queue_.emplace_back(); storage_write_pipeline_queue_.emplace_back();
PipelineStoredDescription& stored_description = PipelineStoredDescription& stored_description =
storage_write_pipeline_state_queue_.back(); storage_write_pipeline_queue_.back();
stored_description.description_hash = hash; stored_description.description_hash = hash;
std::memcpy(&stored_description.description, &description, std::memcpy(&stored_description.description, &description,
sizeof(description)); sizeof(description));
@ -1007,8 +989,8 @@ bool PipelineCache::ConfigurePipeline(
storage_write_request_cond_.notify_all(); storage_write_request_cond_.notify_all();
} }
current_pipeline_state_ = new_pipeline_state; current_pipeline_ = new_pipeline;
*pipeline_state_handle_out = new_pipeline_state; *pipeline_handle_out = new_pipeline;
*root_signature_out = runtime_description.root_signature; *root_signature_out = runtime_description.root_signature;
return true; return true;
} }
@ -1135,8 +1117,8 @@ bool PipelineCache::TranslateShader(
std::memcpy( std::memcpy(
texture_binding_layouts_.data() + new_uid.vector_span_offset, texture_binding_layouts_.data() + new_uid.vector_span_offset,
texture_bindings, texture_binding_layout_bytes); texture_bindings, texture_binding_layout_bytes);
texture_binding_layout_map_.insert( texture_binding_layout_map_.emplace(texture_binding_layout_hash,
{texture_binding_layout_hash, new_uid}); new_uid);
} }
} }
if (bindless_sampler_count) { if (bindless_sampler_count) {
@ -1178,8 +1160,8 @@ bool PipelineCache::TranslateShader(
vector_bindless_sampler_layout[i] = vector_bindless_sampler_layout[i] =
sampler_bindings[i].bindless_descriptor_index; sampler_bindings[i].bindless_descriptor_index;
} }
bindless_sampler_layout_map_.insert( bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash,
{bindless_sampler_layout_hash, new_uid}); new_uid);
} }
} }
} }
@ -1507,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription(
/* 16 */ PipelineBlendFactor::kSrcAlphaSat, /* 16 */ PipelineBlendFactor::kSrcAlphaSat,
}; };
// Like kBlendFactorMap, but with color modes changed to alpha. Some // Like kBlendFactorMap, but with color modes changed to alpha. Some
// pipeline state objects aren't created in Prey because a color mode is // pipelines aren't created in Prey because a color mode is used for alpha.
// used for alpha.
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = { static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
/* 0 */ PipelineBlendFactor::kZero, /* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne, /* 1 */ PipelineBlendFactor::kOne,
@ -1568,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription(
return true; return true;
} }
ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState( ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description) { const PipelineRuntimeDescription& runtime_description) {
const PipelineDescription& description = runtime_description.description; const PipelineDescription& description = runtime_description.description;
if (runtime_description.pixel_shader != nullptr) { if (runtime_description.pixel_shader != nullptr) {
XELOGGPU( XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}",
"Creating graphics pipeline state with VS {:016X}"
", PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(), runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash()); runtime_description.pixel_shader->ucode_data_hash());
} else { } else {
XELOGGPU("Creating graphics pipeline state with VS {:016X}", XELOGGPU("Creating graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash()); runtime_description.vertex_shader->ucode_data_hash());
} }
@ -1892,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
} }
} }
// Create the pipeline state object. // Create the D3D12 pipeline state object.
auto device = auto device =
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
ID3D12PipelineState* state; ID3D12PipelineState* state;
if (FAILED(device->CreateGraphicsPipelineState(&state_desc, if (FAILED(device->CreateGraphicsPipelineState(&state_desc,
IID_PPV_ARGS(&state)))) { IID_PPV_ARGS(&state)))) {
if (runtime_description.pixel_shader != nullptr) { if (runtime_description.pixel_shader != nullptr) {
XELOGE( XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
"Failed to create graphics pipeline state with VS {:016X}"
", PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(), runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash()); runtime_description.pixel_shader->ucode_data_hash());
} else { } else {
XELOGE("Failed to create graphics pipeline state with VS {:016X}", XELOGE("Failed to create graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash()); runtime_description.vertex_shader->ucode_data_hash());
} }
return nullptr; return nullptr;
@ -1932,7 +1909,7 @@ void PipelineCache::StorageWriteThread() {
ucode_guest_endian.reserve(0xFFFF); ucode_guest_endian.reserve(0xFFFF);
bool flush_shaders = false; bool flush_shaders = false;
bool flush_pipeline_states = false; bool flush_pipelines = false;
while (true) { while (true) {
if (flush_shaders) { if (flush_shaders) {
@ -1940,15 +1917,15 @@ void PipelineCache::StorageWriteThread() {
assert_not_null(shader_storage_file_); assert_not_null(shader_storage_file_);
fflush(shader_storage_file_); fflush(shader_storage_file_);
} }
if (flush_pipeline_states) { if (flush_pipelines) {
flush_pipeline_states = false; flush_pipelines = false;
assert_not_null(pipeline_state_storage_file_); assert_not_null(pipeline_storage_file_);
fflush(pipeline_state_storage_file_); fflush(pipeline_storage_file_);
} }
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {}; std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
PipelineStoredDescription pipeline_description; PipelineStoredDescription pipeline_description;
bool write_pipeline_state = false; bool write_pipeline = false;
{ {
std::unique_lock<std::mutex> lock(storage_write_request_lock_); std::unique_lock<std::mutex> lock(storage_write_request_lock_);
if (storage_write_thread_shutdown_) { if (storage_write_thread_shutdown_) {
@ -1961,17 +1938,17 @@ void PipelineCache::StorageWriteThread() {
storage_write_flush_shaders_ = false; storage_write_flush_shaders_ = false;
flush_shaders = true; flush_shaders = true;
} }
if (!storage_write_pipeline_state_queue_.empty()) { if (!storage_write_pipeline_queue_.empty()) {
std::memcpy(&pipeline_description, std::memcpy(&pipeline_description,
&storage_write_pipeline_state_queue_.front(), &storage_write_pipeline_queue_.front(),
sizeof(pipeline_description)); sizeof(pipeline_description));
storage_write_pipeline_state_queue_.pop_front(); storage_write_pipeline_queue_.pop_front();
write_pipeline_state = true; write_pipeline = true;
} else if (storage_write_flush_pipeline_states_) { } else if (storage_write_flush_pipelines_) {
storage_write_flush_pipeline_states_ = false; storage_write_flush_pipelines_ = false;
flush_pipeline_states = true; flush_pipelines = true;
} }
if (!shader_pair.first && !write_pipeline_state) { if (!shader_pair.first && !write_pipeline) {
storage_write_request_cond_.wait(lock); storage_write_request_cond_.wait(lock);
continue; continue;
} }
@ -1998,27 +1975,26 @@ void PipelineCache::StorageWriteThread() {
} }
} }
if (write_pipeline_state) { if (write_pipeline) {
assert_not_null(pipeline_state_storage_file_); assert_not_null(pipeline_storage_file_);
fwrite(&pipeline_description, sizeof(pipeline_description), 1, fwrite(&pipeline_description, sizeof(pipeline_description), 1,
pipeline_state_storage_file_); pipeline_storage_file_);
} }
} }
} }
void PipelineCache::CreationThread(size_t thread_index) { void PipelineCache::CreationThread(size_t thread_index) {
while (true) { while (true) {
PipelineState* pipeline_state_to_create = nullptr; Pipeline* pipeline_to_create = nullptr;
// Check if need to shut down or set the completion event and dequeue the // Check if need to shut down or set the completion event and dequeue the
// pipeline state if there is any. // pipeline if there is any.
{ {
std::unique_lock<std::mutex> lock(creation_request_lock_); std::unique_lock<std::mutex> lock(creation_request_lock_);
if (thread_index >= creation_threads_shutdown_from_ || if (thread_index >= creation_threads_shutdown_from_ ||
creation_queue_.empty()) { creation_queue_.empty()) {
if (creation_completion_set_event_ && creation_threads_busy_ == 0) { if (creation_completion_set_event_ && creation_threads_busy_ == 0) {
// Last pipeline state object in the queue created - signal the event // Last pipeline in the queue created - signal the event if requested.
// if requested.
creation_completion_set_event_ = false; creation_completion_set_event_ = false;
creation_completion_event_->Set(); creation_completion_event_->Set();
} }
@ -2028,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) {
creation_request_cond_.wait(lock); creation_request_cond_.wait(lock);
continue; continue;
} }
// Take the pipeline state from the queue and increment the busy thread // Take the pipeline from the queue and increment the busy thread count
// count until the pipeline state object is created - other threads must // until the pipeline is created - other threads must be able to dequeue
// be able to dequeue requests, but can't set the completion event until // requests, but can't set the completion event until the pipelines are
// the pipeline state objects are fully created (rather than just started // fully created (rather than just started creating).
// creating). pipeline_to_create = creation_queue_.front();
pipeline_state_to_create = creation_queue_.front();
creation_queue_.pop_front(); creation_queue_.pop_front();
++creation_threads_busy_; ++creation_threads_busy_;
} }
// Create the D3D12 pipeline state object. // Create the D3D12 pipeline state object.
pipeline_state_to_create->state = pipeline_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description); CreateD3D12Pipeline(pipeline_to_create->description);
// Pipeline state object created - the thread is not busy anymore, safe to // Pipeline created - the thread is not busy anymore, safe to set the
// set the completion event if needed (at the next iteration, or in some // completion event if needed (at the next iteration, or in some other
// other thread). // thread).
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
--creation_threads_busy_; --creation_threads_busy_;
@ -2052,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) {
} }
} }
void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() { void PipelineCache::CreateQueuedPipelinesOnProcessorThread() {
assert_false(creation_threads_.empty()); assert_false(creation_threads_.empty());
while (true) { while (true) {
PipelineState* pipeline_state_to_create; Pipeline* pipeline_to_create;
{ {
std::lock_guard<std::mutex> lock(creation_request_lock_); std::lock_guard<std::mutex> lock(creation_request_lock_);
if (creation_queue_.empty()) { if (creation_queue_.empty()) {
break; break;
} }
pipeline_state_to_create = creation_queue_.front(); pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front(); creation_queue_.pop_front();
} }
pipeline_state_to_create->state = pipeline_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description); CreateD3D12Pipeline(pipeline_to_create->description);
} }
} }

View File

@ -55,7 +55,7 @@ class PipelineCache {
void ShutdownShaderStorage(); void ShutdownShaderStorage();
void EndSubmission(); void EndSubmission();
bool IsCreatingPipelineStates(); bool IsCreatingPipelines();
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count); const uint32_t* host_address, uint32_t dword_count);
@ -74,14 +74,12 @@ class PipelineCache {
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out, void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
ID3D12RootSignature** root_signature_out);
// Returns a pipeline state object with deferred creation by its handle. May // Returns a pipeline with deferred creation by its handle. May return nullptr
// return nullptr if failed to create the pipeline state object. // if failed to create the pipeline.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle( ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
void* handle) const { return reinterpret_cast<const Pipeline*>(handle)->state;
return reinterpret_cast<const PipelineState*>(handle)->state;
} }
private: private:
@ -238,7 +236,7 @@ class PipelineCache {
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineRuntimeDescription& runtime_description_out); PipelineRuntimeDescription& runtime_description_out);
ID3D12PipelineState* CreateD3D12PipelineState( ID3D12PipelineState* CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description); const PipelineRuntimeDescription& runtime_description);
D3D12CommandProcessor& command_processor_; D3D12CommandProcessor& command_processor_;
@ -286,21 +284,20 @@ class PipelineCache {
// Xenos pixel shader provided. // Xenos pixel shader provided.
std::vector<uint8_t> depth_only_pixel_shader_; std::vector<uint8_t> depth_only_pixel_shader_;
struct PipelineState { struct Pipeline {
// nullptr if creation has failed. // nullptr if creation has failed.
ID3D12PipelineState* state; ID3D12PipelineState* state;
PipelineRuntimeDescription description; PipelineRuntimeDescription description;
}; };
// All previously generated pipeline state objects identified by hash and the // All previously generated pipelines identified by hash and the description.
// description. std::unordered_multimap<uint64_t, Pipeline*,
std::unordered_multimap<uint64_t, PipelineState*,
xe::hash::IdentityHasher<uint64_t>> xe::hash::IdentityHasher<uint64_t>>
pipeline_states_; pipelines_;
// Previously used pipeline state object. This matches our current state // Previously used pipeline. This matches our current state settings and
// settings and allows us to quickly(ish) reuse the pipeline state if no // allows us to quickly(ish) reuse the pipeline if no registers have been
// registers have changed. // changed.
PipelineState* current_pipeline_state_ = nullptr; Pipeline* current_pipeline_ = nullptr;
// Currently open shader storage path. // Currently open shader storage path.
std::filesystem::path shader_storage_root_; std::filesystem::path shader_storage_root_;
@ -310,10 +307,9 @@ class PipelineCache {
FILE* shader_storage_file_ = nullptr; FILE* shader_storage_file_ = nullptr;
bool shader_storage_file_flush_needed_ = false; bool shader_storage_file_flush_needed_ = false;
// Pipeline state storage output stream, for preload in the next emulator // Pipeline storage output stream, for preload in the next emulator runs.
// runs. FILE* pipeline_storage_file_ = nullptr;
FILE* pipeline_state_storage_file_ = nullptr; bool pipeline_storage_file_flush_needed_ = false;
bool pipeline_state_storage_file_flush_needed_ = false;
// Thread for asynchronous writing to the storage streams. // Thread for asynchronous writing to the storage streams.
void StorageWriteThread(); void StorageWriteThread();
@ -323,28 +319,27 @@ class PipelineCache {
// thread is notified about its change via storage_write_request_cond_. // thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>> std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_; storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_; std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
bool storage_write_flush_shaders_ = false; bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipeline_states_ = false; bool storage_write_flush_pipelines_ = false;
bool storage_write_thread_shutdown_ = false; bool storage_write_thread_shutdown_ = false;
std::unique_ptr<xe::threading::Thread> storage_write_thread_; std::unique_ptr<xe::threading::Thread> storage_write_thread_;
// Pipeline state object creation threads. // Pipeline creation threads.
void CreationThread(size_t thread_index); void CreationThread(size_t thread_index);
void CreateQueuedPipelineStatesOnProcessorThread(); void CreateQueuedPipelinesOnProcessorThread();
std::mutex creation_request_lock_; std::mutex creation_request_lock_;
std::condition_variable creation_request_cond_; std::condition_variable creation_request_cond_;
// Protected with creation_request_lock_, notify_one creation_request_cond_ // Protected with creation_request_lock_, notify_one creation_request_cond_
// when set. // when set.
std::deque<PipelineState*> creation_queue_; std::deque<Pipeline*> creation_queue_;
// Number of threads that are currently creating a pipeline state object - // Number of threads that are currently creating a pipeline - incremented when
// incremented when a pipeline state object is dequeued (the completion event // a pipeline is dequeued (the completion event can't be triggered before this
// can't be triggered before this is zero). Protected with // is zero). Protected with creation_request_lock_.
// creation_request_lock_.
size_t creation_threads_busy_ = 0; size_t creation_threads_busy_ = 0;
// Manual-reset event set when the last queued pipeline state object is // Manual-reset event set when the last queued pipeline is created and there
// created and there are no more pipeline state objects to create. This is // are no more pipelines to create. This is triggered by the thread creating
// triggered by the thread creating the last pipeline state object. // the last pipeline.
std::unique_ptr<xe::threading::Event> creation_completion_event_; std::unique_ptr<xe::threading::Event> creation_completion_event_;
// Whether setting the event on completion is queued. Protected with // Whether setting the event on completion is queued. Protected with
// creation_request_lock_, notify_one creation_request_cond_ when set. // creation_request_lock_, notify_one creation_request_cond_ when set.

View File

@ -25,15 +25,6 @@ project("xenia-gpu-d3d12-trace-viewer")
kind("WindowedApp") kind("WindowedApp")
language("C++") language("C++")
links({ links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-apu", "xenia-apu",
"xenia-apu-nop", "xenia-apu-nop",
"xenia-base", "xenia-base",
@ -48,6 +39,17 @@ project("xenia-gpu-d3d12-trace-viewer")
"xenia-ui", "xenia-ui",
"xenia-ui-d3d12", "xenia-ui-d3d12",
"xenia-vfs", "xenia-vfs",
})
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash", "xxhash",
}) })
files({ files({
@ -70,15 +72,6 @@ project("xenia-gpu-d3d12-trace-dump")
kind("ConsoleApp") kind("ConsoleApp")
language("C++") language("C++")
links({ links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-apu", "xenia-apu",
"xenia-apu-nop", "xenia-apu-nop",
"xenia-base", "xenia-base",
@ -93,6 +86,17 @@ project("xenia-gpu-d3d12-trace-dump")
"xenia-ui", "xenia-ui",
"xenia-ui-d3d12", "xenia-ui-d3d12",
"xenia-vfs", "xenia-vfs",
})
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash", "xxhash",
}) })
files({ files({

View File

@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// again and again and exit. // again and again and exit.
if (!conversion_needed || converted_index_count == 0) { if (!conversion_needed || converted_index_count == 0) {
converted_indices.gpu_address = 0; converted_indices.gpu_address = 0;
converted_indices_cache_.insert( converted_indices_cache_.emplace(converted_indices.key.value,
std::make_pair(converted_indices.key.value, converted_indices)); converted_indices);
memory_regions_used_ |= memory_regions_used_bits; memory_regions_used_ |= memory_regions_used_bits;
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
: ConversionResult::kConversionNotNeeded; : ConversionResult::kConversionNotNeeded;
@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// Cache and return the indices. // Cache and return the indices.
converted_indices.gpu_address = gpu_address; converted_indices.gpu_address = gpu_address;
converted_indices_cache_.insert( converted_indices_cache_.emplace(converted_indices.key.value,
std::make_pair(converted_indices.key.value, converted_indices)); converted_indices);
memory_regions_used_ |= memory_regions_used_bits; memory_regions_used_ |= memory_regions_used_bits;
gpu_address_out = gpu_address; gpu_address_out = gpu_address;
index_count_out = converted_index_count; index_count_out = converted_index_count;

View File

@ -277,19 +277,18 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false; return false;
} }
// Create the EDRAM load/store pipeline state objects. // Create the EDRAM load/store pipelines.
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i]; const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.load_shader, mode_info.load_shader_size, device, mode_info.load_shader, mode_info.load_shader_size,
edram_load_store_root_signature_); edram_load_store_root_signature_);
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState( edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.store_shader, mode_info.store_shader_size, device, mode_info.store_shader, mode_info.store_shader_size,
edram_load_store_root_signature_); edram_load_store_root_signature_);
if (edram_load_pipelines_[i] == nullptr || if (edram_load_pipelines_[i] == nullptr ||
edram_store_pipelines_[i] == nullptr) { edram_store_pipelines_[i] == nullptr) {
XELOGE( XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
"Failed to create the EDRAM load/store pipeline states for mode {}",
i); i);
Shutdown(); Shutdown();
return false; return false;
@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
} }
} }
// Create the resolve root signatures and pipeline state objects. // Create the resolve root signatures and pipelines.
D3D12_ROOT_PARAMETER resolve_root_parameters[3]; D3D12_ROOT_PARAMETER resolve_root_parameters[3];
// Copying root signature. // Copying root signature.
@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false; return false;
} }
// Copying pipeline state objects. // Copying pipelines.
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1; uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount); for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
++i) { ++i) {
@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
continue; continue;
} }
const auto& resolve_copy_shader = resolve_copy_shaders_[i]; const auto& resolve_copy_shader = resolve_copy_shaders_[i];
ID3D12PipelineState* resolve_copy_pipeline_state = ID3D12PipelineState* resolve_copy_pipeline =
ui::d3d12::util::CreateComputePipelineState( ui::d3d12::util::CreateComputePipeline(
device, resolve_copy_shader.first, resolve_copy_shader.second, device, resolve_copy_shader.first, resolve_copy_shader.second,
resolve_copy_root_signature_); resolve_copy_root_signature_);
if (resolve_copy_pipeline_state == nullptr) { if (resolve_copy_pipeline == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline state", XELOGE("Failed to create {} resolve copy pipeline",
resolve_copy_shader_info.debug_name); resolve_copy_shader_info.debug_name);
} }
resolve_copy_pipeline_state->SetName(reinterpret_cast<LPCWSTR>( resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str())); xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state; resolve_copy_pipelines_[i] = resolve_copy_pipeline;
} }
// Clearing pipeline state objects. // Clearing pipelines.
resolve_clear_32bpp_pipeline_state_ = resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
ui::d3d12::util::CreateComputePipelineState(
device, device,
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
: resolve_clear_32bpp_cs, : resolve_clear_32bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs) resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
: sizeof(resolve_clear_32bpp_cs), : sizeof(resolve_clear_32bpp_cs),
resolve_clear_root_signature_); resolve_clear_root_signature_);
if (resolve_clear_32bpp_pipeline_state_ == nullptr) { if (resolve_clear_32bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 32bpp resolve clear pipeline state"); XELOGE("Failed to create the 32bpp resolve clear pipeline");
Shutdown(); Shutdown();
return false; return false;
} }
resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp"); resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_state_ = resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
ui::d3d12::util::CreateComputePipelineState(
device, device,
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
: resolve_clear_64bpp_cs, : resolve_clear_64bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs) resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
: sizeof(resolve_clear_64bpp_cs), : sizeof(resolve_clear_64bpp_cs),
resolve_clear_root_signature_); resolve_clear_root_signature_);
if (resolve_clear_64bpp_pipeline_state_ == nullptr) { if (resolve_clear_64bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 64bpp resolve clear pipeline state"); XELOGE("Failed to create the 64bpp resolve clear pipeline");
Shutdown(); Shutdown();
return false; return false;
} }
resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp"); resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
if (!edram_rov_used_) { if (!edram_rov_used_) {
assert_false(resolution_scale_2x_); assert_false(resolution_scale_2x_);
resolve_clear_depth_24_32_pipeline_state_ = resolve_clear_depth_24_32_pipeline_ =
ui::d3d12::util::CreateComputePipelineState( ui::d3d12::util::CreateComputePipeline(
device, resolve_clear_depth_24_32_cs, device, resolve_clear_depth_24_32_cs,
sizeof(resolve_clear_depth_24_32_cs), sizeof(resolve_clear_depth_24_32_cs),
resolve_clear_root_signature_); resolve_clear_root_signature_);
if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) { if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
XELOGE( XELOGE(
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline " "Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
"state"); "state");
Shutdown(); Shutdown();
return false; return false;
} }
resolve_clear_64bpp_pipeline_state_->SetName( resolve_clear_64bpp_pipeline_->SetName(
L"Resolve Clear 24-bit & 32-bit Depth"); L"Resolve Clear 24-bit & 32-bit Depth");
} }
@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() {
edram_snapshot_restore_pool_.reset(); edram_snapshot_restore_pool_.reset();
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_); ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_); ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_); ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_); ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_); ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) { for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]); ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
} }
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_); ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(copy_shader_constants) / sizeof(uint32_t), 0, sizeof(copy_shader_constants) / sizeof(uint32_t),
&copy_shader_constants, 0); &copy_shader_constants, 0);
} }
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(
resolve_copy_pipeline_states_[size_t(copy_shader)]); resolve_copy_pipelines_[size_t(copy_shader)]);
command_processor_.SubmitBarriers(); command_processor_.SubmitBarriers();
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1); command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory,
command_list.D3DSetComputeRoot32BitConstants( command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(depth_clear_constants) / sizeof(uint32_t), 0, sizeof(depth_clear_constants) / sizeof(uint32_t),
&depth_clear_constants, 0); &depth_clear_constants, 0);
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_ clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
: resolve_clear_32bpp_pipeline_state_); : resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers(); command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first, command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1); clear_group_count.second, 1);
@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(color_clear_constants) / sizeof(uint32_t), 0, sizeof(color_clear_constants) / sizeof(uint32_t),
&color_clear_constants, 0); &color_clear_constants, 0);
} }
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(
resolve_info.color_edram_info.format_is_64bpp resolve_info.color_edram_info.format_is_64bpp
? resolve_clear_64bpp_pipeline_state_ ? resolve_clear_64bpp_pipeline_
: resolve_clear_32bpp_pipeline_state_); : resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers(); command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first, command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1); clear_group_count.second, 1);
@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
render_target->footprints, nullptr, nullptr, render_target->footprints, nullptr, nullptr,
&copy_buffer_size); &copy_buffer_size);
render_target->copy_buffer_size = uint32_t(copy_buffer_size); render_target->copy_buffer_size = uint32_t(copy_buffer_size);
render_targets_.insert(std::make_pair(key.value, render_target)); render_targets_.emplace(key.value, render_target);
COUNT_profile_set("gpu/render_target_cache/render_targets", COUNT_profile_set("gpu/render_target_cache/render_targets",
render_targets_.size()); render_targets_.size());
#if 0 #if 0
@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() {
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format); render_target->key.format);
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
edram_store_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples. // 1 group per 80x16 samples.
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1); command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0); 0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth, EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format); render_target->key.format);
command_processor_.SetComputePipelineState( command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
edram_load_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples. // 1 group per 80x16 samples.
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1); command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);

View File

@ -237,14 +237,13 @@ class D3D12CommandProcessor;
// get each of the 4 host pixels for each sample. // get each of the 4 host pixels for each sample.
class RenderTargetCache { class RenderTargetCache {
public: public:
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors // Direct3D 12 debug layer is giving errors that contradict each other when
// that contradict each other when you use null RTV descriptors - if you set // you use null RTV descriptors - if you set a valid format in RTVFormats in
// a valid format in RTVFormats in the pipeline state, it says that null // the pipeline state, it says that null descriptors can only be used if the
// descriptors can only be used if the format in the pipeline state is // format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains // DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline
// that the format in the pipeline doesn't match the RTV format. So we have to // state doesn't match the RTV format. So we have to make render target
// make render target bindings consecutive and remap the output indices in // bindings consecutive and remap the output indices in pixel shaders.
// pixel shaders.
struct PipelineRenderTarget { struct PipelineRenderTarget {
uint32_t guest_render_target; uint32_t guest_render_target;
DXGI_FORMAT format; DXGI_FORMAT format;
@ -304,8 +303,7 @@ class RenderTargetCache {
// performance difference, but with EDRAM loads/stores less conversion should // performance difference, but with EDRAM loads/stores less conversion should
// be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and // be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and
// it's probably more accurate. // it's probably more accurate.
static inline DXGI_FORMAT GetDepthDXGIFormat( static DXGI_FORMAT GetDepthDXGIFormat(xenos::DepthRenderTargetFormat format) {
xenos::DepthRenderTargetFormat format) {
return format == xenos::DepthRenderTargetFormat::kD24FS8 return format == xenos::DepthRenderTargetFormat::kD24FS8
? DXGI_FORMAT_D32_FLOAT_S8X24_UINT ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT
: DXGI_FORMAT_D24_UNORM_S8_UINT; : DXGI_FORMAT_D24_UNORM_S8_UINT;
@ -537,7 +535,7 @@ class RenderTargetCache {
// 16: - EDRAM pitch in tiles. // 16: - EDRAM pitch in tiles.
uint32_t base_samples_2x_depth_pitch; uint32_t base_samples_2x_depth_pitch;
}; };
// EDRAM pipeline states for the RTV/DSV path. // EDRAM pipelines for the RTV/DSV path.
static const EdramLoadStoreModeInfo static const EdramLoadStoreModeInfo
edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)]; edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)];
ID3D12PipelineState* ID3D12PipelineState*
@ -546,20 +544,20 @@ class RenderTargetCache {
ID3D12PipelineState* ID3D12PipelineState*
edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {}; edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {};
// Resolve root signatures and pipeline state objects. // Resolve root signatures and pipelines.
ID3D12RootSignature* resolve_copy_root_signature_ = nullptr; ID3D12RootSignature* resolve_copy_root_signature_ = nullptr;
static const std::pair<const uint8_t*, size_t> static const std::pair<const uint8_t*, size_t>
resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)]; resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
ID3D12PipelineState* resolve_copy_pipeline_states_[size_t( ID3D12PipelineState* resolve_copy_pipelines_[size_t(
draw_util::ResolveCopyShaderIndex::kCount)] = {}; draw_util::ResolveCopyShaderIndex::kCount)] = {};
ID3D12RootSignature* resolve_clear_root_signature_ = nullptr; ID3D12RootSignature* resolve_clear_root_signature_ = nullptr;
// Clearing 32bpp color, depth with ROV, or unorm depth without ROV. // Clearing 32bpp color, depth with ROV, or unorm depth without ROV.
ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr; ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr;
// Clearing 64bpp color. // Clearing 64bpp color.
ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr; ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr;
// Clearing float depth without ROV, both the float24 and the host float32 // Clearing float depth without ROV, both the float24 and the host float32
// versions. // versions.
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr; ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr;
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on // FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
// Nvidia Maxwell 1st generation and older. // Nvidia Maxwell 1st generation and older.

View File

@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) {
return false; return false;
} }
// Create the loading pipeline state objects. // Create the loading pipelines.
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
const LoadModeInfo& mode_info = load_mode_info_[i]; const LoadModeInfo& mode_info = load_mode_info_[i];
load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState( load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader, mode_info.shader_size, load_root_signature_); device, mode_info.shader, mode_info.shader_size, load_root_signature_);
if (load_pipeline_states_[i] == nullptr) { if (load_pipelines_[i] == nullptr) {
XELOGE( XELOGE("Failed to create the texture loading pipeline for mode {}", i);
"Failed to create the texture loading pipeline state object for mode "
"{}",
i);
Shutdown(); Shutdown();
return false; return false;
} }
if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) { if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) {
load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState( load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader_2x, mode_info.shader_2x_size, device, mode_info.shader_2x, mode_info.shader_2x_size,
load_root_signature_); load_root_signature_);
if (load_pipeline_states_2x_[i] == nullptr) { if (load_pipelines_2x_[i] == nullptr) {
XELOGE( XELOGE(
"Failed to create the 2x-scaled texture loading pipeline state " "Failed to create the 2x-scaled texture loading pipeline for mode "
"for mode {}", "{}",
i); i);
Shutdown(); Shutdown();
return false; return false;
@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() {
ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_); ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_);
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) { for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]); ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]); ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]);
} }
ui::d3d12::util::ReleaseAndNull(load_root_signature_); ui::d3d12::util::ReleaseAndNull(load_root_signature_);
@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
if (IsResolutionScale2X() && key.tiled) { if (IsResolutionScale2X() && key.tiled) {
LoadMode load_mode = GetLoadMode(key); LoadMode load_mode = GetLoadMode(key);
if (load_mode != LoadMode::kUnknown && if (load_mode != LoadMode::kUnknown &&
load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) { load_pipelines_2x_[uint32_t(load_mode)] != nullptr) {
uint32_t base_size = 0, mip_size = 0; uint32_t base_size = 0, mip_size = 0;
texture_util::GetTextureTotalSize( texture_util::GetTextureTotalSize(
key.dimension, key.width, key.height, key.depth, key.format, key.dimension, key.width, key.height, key.depth, key.format,
@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
} }
texture->base_watch_handle = nullptr; texture->base_watch_handle = nullptr;
texture->mip_watch_handle = nullptr; texture->mip_watch_handle = nullptr;
textures_.insert(std::make_pair(map_key, texture)); textures_.emplace(map_key, texture);
COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
textures_total_size_ += texture->resource_size; textures_total_size_ += texture->resource_size;
COUNT_profile_set("gpu/texture_cache/total_size_mb", COUNT_profile_set("gpu/texture_cache/total_size_mb",
@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return false; return false;
} }
bool scaled_resolve = texture->key.scaled_resolve ? true : false; bool scaled_resolve = texture->key.scaled_resolve ? true : false;
ID3D12PipelineState* pipeline_state = ID3D12PipelineState* pipeline = scaled_resolve
scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)] ? load_pipelines_2x_[uint32_t(load_mode)]
: load_pipeline_states_[uint32_t(load_mode)]; : load_pipelines_[uint32_t(load_mode)];
if (pipeline_state == nullptr) { if (pipeline == nullptr) {
return false; return false;
} }
const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)]; const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)];
@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
load_mode_info.srv_bpe_log2); load_mode_info.srv_bpe_log2);
} }
} }
command_processor_.SetComputePipelineState(pipeline_state); command_processor_.SetComputePipeline(pipeline);
command_list.D3DSetComputeRootSignature(load_root_signature_); command_list.D3DSetComputeRootSignature(load_root_signature_);
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second); command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
} }
device->CreateShaderResourceView( device->CreateShaderResourceView(
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index)); texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
texture.srv_descriptors.insert({descriptor_key, descriptor_index}); texture.srv_descriptors.emplace(descriptor_key, descriptor_index);
return descriptor_index; return descriptor_index;
} }

View File

@ -106,18 +106,18 @@ class TextureCache {
bool operator!=(const TextureKey& key) const { bool operator!=(const TextureKey& key) const {
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key; return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
} }
inline uint64_t GetMapKey() const { uint64_t GetMapKey() const {
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32); return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
} }
inline void SetMapKey(uint64_t key) { void SetMapKey(uint64_t key) {
map_key[0] = uint32_t(key); map_key[0] = uint32_t(key);
map_key[1] = uint32_t(key >> 32); map_key[1] = uint32_t(key >> 32);
} }
inline bool IsInvalid() const { bool IsInvalid() const {
// Zero base and zero width is enough for a binding to be invalid. // Zero base and zero width is enough for a binding to be invalid.
return map_key[0] == 0; return map_key[0] == 0;
} }
inline void MakeInvalid() { void MakeInvalid() {
// Reset all for a stable hash. // Reset all for a stable hash.
SetMapKey(0); SetMapKey(0);
bucket_key = 0; bucket_key = 0;
@ -222,9 +222,7 @@ class TextureCache {
void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled); void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled);
inline bool IsResolutionScale2X() const { bool IsResolutionScale2X() const { return scaled_resolve_buffer_ != nullptr; }
return scaled_resolve_buffer_ != nullptr;
}
ID3D12Resource* GetScaledResolveBuffer() const { ID3D12Resource* GetScaledResolveBuffer() const {
return scaled_resolve_buffer_; return scaled_resolve_buffer_;
} }
@ -233,7 +231,7 @@ class TextureCache {
uint32_t length_unscaled); uint32_t length_unscaled);
void UseScaledResolveBufferForReading(); void UseScaledResolveBufferForReading();
void UseScaledResolveBufferForWriting(); void UseScaledResolveBufferForWriting();
inline void MarkScaledResolveBufferUAVWritesCommitNeeded() { void MarkScaledResolveBufferUAVWritesCommitNeeded() {
if (scaled_resolve_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { if (scaled_resolve_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
scaled_resolve_buffer_uav_writes_commit_needed_ = true; scaled_resolve_buffer_uav_writes_commit_needed_ = true;
} }
@ -432,7 +430,7 @@ class TextureCache {
// Whether the signed version of the texture has a different representation on // Whether the signed version of the texture has a different representation on
// the host than its unsigned version (for example, if it's a fixed-point // the host than its unsigned version (for example, if it's a fixed-point
// texture emulated with a larger host pixel format). // texture emulated with a larger host pixel format).
static inline bool IsSignedVersionSeparate(xenos::TextureFormat format) { static bool IsSignedVersionSeparate(xenos::TextureFormat format) {
const HostFormat& host_format = host_formats_[uint32_t(format)]; const HostFormat& host_format = host_formats_[uint32_t(format)];
return host_format.load_mode_snorm != LoadMode::kUnknown && return host_format.load_mode_snorm != LoadMode::kUnknown &&
host_format.load_mode_snorm != host_format.load_mode; host_format.load_mode_snorm != host_format.load_mode;
@ -441,26 +439,24 @@ class TextureCache {
// of block-compressed textures with 4x4-aligned dimensions on PC). // of block-compressed textures with 4x4-aligned dimensions on PC).
static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width, static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width,
uint32_t height); uint32_t height);
static inline DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format, static DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
uint32_t width, uint32_t width, uint32_t height) {
uint32_t height) {
const HostFormat& host_format = host_formats_[uint32_t(format)]; const HostFormat& host_format = host_formats_[uint32_t(format)];
return IsDecompressionNeeded(format, width, height) return IsDecompressionNeeded(format, width, height)
? host_format.dxgi_format_uncompressed ? host_format.dxgi_format_uncompressed
: host_format.dxgi_format_resource; : host_format.dxgi_format_resource;
} }
static inline DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) { static DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
return GetDXGIResourceFormat(key.format, key.width, key.height); return GetDXGIResourceFormat(key.format, key.width, key.height);
} }
static inline DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format, static DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
uint32_t width, uint32_t width, uint32_t height) {
uint32_t height) {
const HostFormat& host_format = host_formats_[uint32_t(format)]; const HostFormat& host_format = host_formats_[uint32_t(format)];
return IsDecompressionNeeded(format, width, height) return IsDecompressionNeeded(format, width, height)
? host_format.dxgi_format_uncompressed ? host_format.dxgi_format_uncompressed
: host_format.dxgi_format_unorm; : host_format.dxgi_format_unorm;
} }
static inline DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) { static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
return GetDXGIUnormFormat(key.format, key.width, key.height); return GetDXGIUnormFormat(key.format, key.width, key.height);
} }
@ -550,9 +546,9 @@ class TextureCache {
static const LoadModeInfo load_mode_info_[]; static const LoadModeInfo load_mode_info_[];
ID3D12RootSignature* load_root_signature_ = nullptr; ID3D12RootSignature* load_root_signature_ = nullptr;
ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {}; ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
// Load pipeline state objects for 2x-scaled resolved targets. // Load pipelines for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {}; ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {};
std::unordered_multimap<uint64_t, Texture*> textures_; std::unordered_multimap<uint64_t, Texture*> textures_;
uint64_t textures_total_size_ = 0; uint64_t textures_total_size_ = 0;

View File

@ -40,11 +40,11 @@ project("xenia-hid-demo")
filter("platforms:Linux") filter("platforms:Linux")
links({ links({
"SDL2",
"vulkan",
"X11", "X11",
"xcb", "xcb",
"X11-xcb", "X11-xcb",
"vulkan",
"SDL2",
}) })
filter("platforms:Windows") filter("platforms:Windows")

View File

@ -542,19 +542,13 @@ dword_result_t NetDll_XNetDnsRelease(dword_t caller, pointer_t<XNDNS> dns) {
} }
DECLARE_XAM_EXPORT1(NetDll_XNetDnsRelease, kNetworking, kStub); DECLARE_XAM_EXPORT1(NetDll_XNetDnsRelease, kNetworking, kStub);
SHIM_CALL NetDll_XNetQosServiceLookup_shim(PPCContext* ppc_context, dword_result_t NetDll_XNetQosServiceLookup(dword_t caller, dword_t zero,
KernelState* kernel_state) { dword_t event_handle,
uint32_t caller = SHIM_GET_ARG_32(0); lpdword_t out_ptr) {
uint32_t zero = SHIM_GET_ARG_32(1);
uint32_t event_handle = SHIM_GET_ARG_32(2);
uint32_t out_ptr = SHIM_GET_ARG_32(3);
XELOGD("NetDll_XNetQosServiceLookup({}, {}, {:08X}, {:08X})", caller, zero,
event_handle, out_ptr);
// Non-zero is error. // Non-zero is error.
SHIM_SET_RETURN_32(1); return 1;
} }
DECLARE_XAM_EXPORT1(NetDll_XNetQosServiceLookup, kNetworking, kStub);
dword_result_t NetDll_XNetQosListen(dword_t caller, lpvoid_t id, lpvoid_t data, dword_result_t NetDll_XNetQosListen(dword_t caller, lpvoid_t id, lpvoid_t data,
dword_t data_size, dword_t r7, dword_t data_size, dword_t r7,
@ -965,9 +959,7 @@ dword_result_t NetDll___WSAFDIsSet(dword_t socket_handle,
DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented); DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented);
void RegisterNetExports(xe::cpu::ExportResolver* export_resolver, void RegisterNetExports(xe::cpu::ExportResolver* export_resolver,
KernelState* kernel_state) { KernelState* kernel_state) {}
SHIM_SET_MAPPING("xam.xex", NetDll_XNetQosServiceLookup, state);
}
} // namespace xam } // namespace xam
} // namespace kernel } // namespace kernel

View File

@ -222,13 +222,23 @@ void KeSetCurrentStackPointers(lpvoid_t stack_ptr,
} }
DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented); DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented);
dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity) { dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity,
lpdword_t previous_affinity_ptr) {
// The Xbox 360, according to disassembly of KeSetAffinityThread, unlike
// Windows NT, stores the previous affinity via the pointer provided as an
// argument, not in the return value - the return value is used for the
// result.
if (!affinity) {
return X_STATUS_INVALID_PARAMETER;
}
auto thread = XObject::GetNativeObject<XThread>(kernel_state(), thread_ptr); auto thread = XObject::GetNativeObject<XThread>(kernel_state(), thread_ptr);
if (thread) { if (thread) {
if (previous_affinity_ptr) {
*previous_affinity_ptr = uint32_t(1) << thread->active_cpu();
}
thread->SetAffinity(affinity); thread->SetAffinity(affinity);
} }
return X_STATUS_SUCCESS;
return (uint32_t)affinity;
} }
DECLARE_XBOXKRNL_EXPORT1(KeSetAffinityThread, kThreading, kImplemented); DECLARE_XBOXKRNL_EXPORT1(KeSetAffinityThread, kThreading, kImplemented);

View File

@ -156,11 +156,17 @@ void XThread::set_name(const std::string_view name) {
} }
} }
uint8_t next_cpu = 0; static uint8_t next_cpu = 0;
uint8_t GetFakeCpuNumber(uint8_t proc_mask) { static uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
// NOTE: proc_mask is logical processors, not physical processors or cores.
if (!proc_mask) { if (!proc_mask) {
next_cpu = (next_cpu + 1) % 6; next_cpu = (next_cpu + 1) % 6;
return next_cpu; // is this reasonable? return next_cpu; // is this reasonable?
// TODO(Triang3l): Does the following apply here?
// https://docs.microsoft.com/en-us/windows/win32/dxtecharts/coding-for-multiple-cores
// "On Xbox 360, you must explicitly assign software threads to a particular
// hardware thread by using XSetThreadProcessor. Otherwise, all child
// threads will stay on the same hardware thread as the parent."
} }
assert_false(proc_mask & 0xC0); assert_false(proc_mask & 0xC0);
@ -205,6 +211,7 @@ void XThread::InitializeGuestObject() {
// 0xA88 = APC // 0xA88 = APC
// 0x18 = timer // 0x18 = timer
xe::store_and_swap<uint32_t>(p + 0x09C, 0xFDFFD7FF); xe::store_and_swap<uint32_t>(p + 0x09C, 0xFDFFD7FF);
// current_cpu is expected to be initialized externally via SetActiveCpu.
xe::store_and_swap<uint32_t>(p + 0x0D0, stack_base_); xe::store_and_swap<uint32_t>(p + 0x0D0, stack_base_);
xe::store_and_swap<uint64_t>(p + 0x130, Clock::QueryGuestSystemTime()); xe::store_and_swap<uint64_t>(p + 0x130, Clock::QueryGuestSystemTime());
xe::store_and_swap<uint32_t>(p + 0x144, guest_object() + 0x144); xe::store_and_swap<uint32_t>(p + 0x144, guest_object() + 0x144);
@ -346,6 +353,12 @@ X_STATUS XThread::Create() {
// Exports use this to get the kernel. // Exports use this to get the kernel.
thread_state_->context()->kernel_state = kernel_state_; thread_state_->context()->kernel_state = kernel_state_;
uint8_t cpu_index = GetFakeCpuNumber(
static_cast<uint8_t>(creation_params_.creation_flags >> 24));
// Initialize the KTHREAD object.
InitializeGuestObject();
X_KPCR* pcr = memory()->TranslateVirtual<X_KPCR*>(pcr_address_); X_KPCR* pcr = memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
pcr->tls_ptr = tls_static_address_; pcr->tls_ptr = tls_static_address_;
@ -355,14 +368,11 @@ X_STATUS XThread::Create() {
pcr->stack_base_ptr = stack_base_; pcr->stack_base_ptr = stack_base_;
pcr->stack_end_ptr = stack_limit_; pcr->stack_end_ptr = stack_limit_;
uint8_t proc_mask =
static_cast<uint8_t>(creation_params_.creation_flags >> 24);
pcr->current_cpu = GetFakeCpuNumber(proc_mask); // Current CPU(?)
pcr->dpc_active = 0; // DPC active bool? pcr->dpc_active = 0; // DPC active bool?
// Initialize the KTHREAD object. // Assign the thread to the logical processor, and also set up the current CPU
InitializeGuestObject(); // in KPCR and KTHREAD.
SetActiveCpu(cpu_index);
// Always retain when starting - the thread owns itself until exited. // Always retain when starting - the thread owns itself until exited.
RetainHandle(); RetainHandle();
@ -415,10 +425,6 @@ X_STATUS XThread::Create() {
return X_STATUS_NO_MEMORY; return X_STATUS_NO_MEMORY;
} }
if (!cvars::ignore_thread_affinities) {
thread_->set_affinity_mask(proc_mask);
}
// Set the thread name based on host ID (for easier debugging). // Set the thread name based on host ID (for easier debugging).
if (thread_name_.empty()) { if (thread_name_.empty()) {
set_name(fmt::format("XThread{:04X}", thread_->system_id())); set_name(fmt::format("XThread{:04X}", thread_->system_id()));
@ -700,37 +706,36 @@ void XThread::SetPriority(int32_t increment) {
} }
void XThread::SetAffinity(uint32_t affinity) { void XThread::SetAffinity(uint32_t affinity) {
// Affinity mask, as in SetThreadAffinityMask. SetActiveCpu(GetFakeCpuNumber(affinity));
// Xbox thread IDs: }
// 0 - core 0, thread 0 - user
// 1 - core 0, thread 1 - user uint8_t XThread::active_cpu() const {
// 2 - core 1, thread 0 - sometimes xcontent const X_KPCR& pcr = *memory()->TranslateVirtual<const X_KPCR*>(pcr_address_);
// 3 - core 1, thread 1 - user return pcr.current_cpu;
// 4 - core 2, thread 0 - xaudio }
// 5 - core 2, thread 1 - user
// TODO(benvanik): implement better thread distribution. void XThread::SetActiveCpu(uint8_t cpu_index) {
// NOTE: these are logical processors, not physical processors or cores. // May be called during thread creation - don't skip if current == new.
assert_true(cpu_index < 6);
X_KPCR& pcr = *memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
pcr.current_cpu = cpu_index;
if (is_guest_thread()) {
X_KTHREAD& thread_object =
*memory()->TranslateVirtual<X_KTHREAD*>(guest_object());
thread_object.current_cpu = cpu_index;
}
if (xe::threading::logical_processor_count() < 6) { if (xe::threading::logical_processor_count() < 6) {
XELOGW("Too few processors - scheduling will be wonky"); XELOGW("Too few processors - scheduling will be wonky");
} }
SetActiveCpu(GetFakeCpuNumber(affinity));
affinity_ = affinity;
if (!cvars::ignore_thread_affinities) { if (!cvars::ignore_thread_affinities) {
thread_->set_affinity_mask(affinity); thread_->set_affinity_mask(uint64_t(1) << cpu_index);
} }
} }
uint32_t XThread::active_cpu() const {
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
return xe::load_and_swap<uint8_t>(pcr + 0x10C);
}
void XThread::SetActiveCpu(uint32_t cpu_index) {
assert_true(cpu_index < 6);
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
xe::store_and_swap<uint8_t>(pcr + 0x10C, cpu_index);
}
bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) { bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) {
if (slot * 4 > tls_total_size_) { if (slot * 4 > tls_total_size_) {
return false; return false;

View File

@ -88,7 +88,8 @@ struct X_KTHREAD {
char unk_10[0xAC]; // 0x10 char unk_10[0xAC]; // 0x10
uint8_t suspend_count; // 0xBC uint8_t suspend_count; // 0xBC
uint8_t unk_BD; // 0xBD uint8_t unk_BD; // 0xBD
uint16_t unk_BE; // 0xBE uint8_t unk_BE; // 0xBE
uint8_t current_cpu; // 0xBF
char unk_C0[0x70]; // 0xC0 char unk_C0[0x70]; // 0xC0
xe::be<uint64_t> create_time; // 0x130 xe::be<uint64_t> create_time; // 0x130
xe::be<uint64_t> exit_time; // 0x138 xe::be<uint64_t> exit_time; // 0x138
@ -165,10 +166,17 @@ class XThread : public XObject, public cpu::Thread {
int32_t priority() const { return priority_; } int32_t priority() const { return priority_; }
int32_t QueryPriority(); int32_t QueryPriority();
void SetPriority(int32_t increment); void SetPriority(int32_t increment);
uint32_t affinity() const { return affinity_; }
// Xbox thread IDs:
// 0 - core 0, thread 0 - user
// 1 - core 0, thread 1 - user
// 2 - core 1, thread 0 - sometimes xcontent
// 3 - core 1, thread 1 - user
// 4 - core 2, thread 0 - xaudio
// 5 - core 2, thread 1 - user
void SetAffinity(uint32_t affinity); void SetAffinity(uint32_t affinity);
uint32_t active_cpu() const; uint8_t active_cpu() const;
void SetActiveCpu(uint32_t cpu_index); void SetActiveCpu(uint8_t cpu_index);
bool GetTLSValue(uint32_t slot, uint32_t* value_out); bool GetTLSValue(uint32_t slot, uint32_t* value_out);
bool SetTLSValue(uint32_t slot, uint32_t value); bool SetTLSValue(uint32_t slot, uint32_t value);
@ -220,7 +228,6 @@ class XThread : public XObject, public cpu::Thread {
bool running_ = false; bool running_ = false;
int32_t priority_ = 0; int32_t priority_ = 0;
uint32_t affinity_ = 0;
xe::global_critical_region global_critical_region_; xe::global_critical_region global_critical_region_;
std::atomic<uint32_t> irql_ = {0}; std::atomic<uint32_t> irql_ = {0};

View File

@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() {
return false; return false;
} }
// Create the pipeline states. // Create the pipelines.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {}; D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
pipeline_state_desc.pRootSignature = root_signature_; pipeline_desc.pRootSignature = root_signature_;
pipeline_state_desc.VS.pShaderBytecode = immediate_vs; pipeline_desc.VS.pShaderBytecode = immediate_vs;
pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs); pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_state_desc.PS.pShaderBytecode = immediate_ps; pipeline_desc.PS.pShaderBytecode = immediate_ps;
pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps); pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps);
D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc = D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc =
pipeline_state_desc.BlendState.RenderTarget[0]; pipeline_desc.BlendState.RenderTarget[0];
pipeline_blend_desc.BlendEnable = TRUE; pipeline_blend_desc.BlendEnable = TRUE;
pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA;
pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA; pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED | pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED |
D3D12_COLOR_WRITE_ENABLE_GREEN | D3D12_COLOR_WRITE_ENABLE_GREEN |
D3D12_COLOR_WRITE_ENABLE_BLUE; D3D12_COLOR_WRITE_ENABLE_BLUE;
pipeline_state_desc.SampleMask = UINT_MAX; pipeline_desc.SampleMask = UINT_MAX;
pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE; pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE; pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {}; D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {};
pipeline_input_elements[0].SemanticName = "POSITION"; pipeline_input_elements[0].SemanticName = "POSITION";
pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT; pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT;
@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM; pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM;
pipeline_input_elements[2].AlignedByteOffset = pipeline_input_elements[2].AlignedByteOffset =
offsetof(ImmediateVertex, color); offsetof(ImmediateVertex, color);
pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements; pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_state_desc.InputLayout.NumElements = pipeline_desc.InputLayout.NumElements =
UINT(xe::countof(pipeline_input_elements)); UINT(xe::countof(pipeline_input_elements));
pipeline_state_desc.PrimitiveTopologyType = pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; pipeline_desc.NumRenderTargets = 1;
pipeline_state_desc.NumRenderTargets = 1; pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat; pipeline_desc.SampleDesc.Count = 1;
pipeline_state_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) { &pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) {
XELOGE( XELOGE(
"Failed to create the Direct3D 12 immediate drawer triangle pipeline " "Failed to create the Direct3D 12 immediate drawer triangle pipeline "
"state"); "state");
Shutdown(); Shutdown();
return false; return false;
} }
pipeline_state_desc.PrimitiveTopologyType = pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
if (FAILED(device->CreateGraphicsPipelineState( if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) { &pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) {
XELOGE( XELOGE(
"Failed to create the Direct3D 12 immediate drawer line pipeline " "Failed to create the Direct3D 12 immediate drawer line pipeline "
"state"); "state");
@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() {
util::ReleaseAndNull(sampler_heap_); util::ReleaseAndNull(sampler_heap_);
util::ReleaseAndNull(pipeline_state_line_); util::ReleaseAndNull(pipeline_line_);
util::ReleaseAndNull(pipeline_state_triangle_); util::ReleaseAndNull(pipeline_triangle_);
util::ReleaseAndNull(root_signature_); util::ReleaseAndNull(root_signature_);
} }
@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
uint32_t(sampler_index))); uint32_t(sampler_index)));
} }
// Set the primitive type and the pipeline state for it. // Set the primitive type and the pipeline for it.
D3D_PRIMITIVE_TOPOLOGY primitive_topology; D3D_PRIMITIVE_TOPOLOGY primitive_topology;
ID3D12PipelineState* pipeline_state; ID3D12PipelineState* pipeline;
switch (draw.primitive_type) { switch (draw.primitive_type) {
case ImmediatePrimitiveType::kLines: case ImmediatePrimitiveType::kLines:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
pipeline_state = pipeline_state_line_; pipeline = pipeline_line_;
break; break;
case ImmediatePrimitiveType::kTriangles: case ImmediatePrimitiveType::kTriangles:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
pipeline_state = pipeline_state_triangle_; pipeline = pipeline_triangle_;
break; break;
default: default:
assert_unhandled_case(draw.primitive_type); assert_unhandled_case(draw.primitive_type);
@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
if (current_primitive_topology_ != primitive_topology) { if (current_primitive_topology_ != primitive_topology) {
current_primitive_topology_ = primitive_topology; current_primitive_topology_ = primitive_topology;
current_command_list_->IASetPrimitiveTopology(primitive_topology); current_command_list_->IASetPrimitiveTopology(primitive_topology);
current_command_list_->SetPipelineState(pipeline_state); current_command_list_->SetPipelineState(pipeline);
} }
// Draw. // Draw.

View File

@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
kCount kCount
}; };
ID3D12PipelineState* pipeline_state_triangle_ = nullptr; ID3D12PipelineState* pipeline_triangle_ = nullptr;
ID3D12PipelineState* pipeline_state_line_ = nullptr; ID3D12PipelineState* pipeline_line_ = nullptr;
ID3D12DescriptorHeap* sampler_heap_ = nullptr; ID3D12DescriptorHeap* sampler_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_; D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_;

View File

@ -46,22 +46,22 @@ class D3D12Provider : public GraphicsProvider {
uint32_t GetRTVDescriptorSize() const { return descriptor_size_rtv_; } uint32_t GetRTVDescriptorSize() const { return descriptor_size_rtv_; }
uint32_t GetDSVDescriptorSize() const { return descriptor_size_dsv_; } uint32_t GetDSVDescriptorSize() const { return descriptor_size_dsv_; }
template <typename T> template <typename T>
inline T OffsetViewDescriptor(T start, uint32_t index) const { T OffsetViewDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_view_; start.ptr += index * descriptor_size_view_;
return start; return start;
} }
template <typename T> template <typename T>
inline T OffsetSamplerDescriptor(T start, uint32_t index) const { T OffsetSamplerDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_sampler_; start.ptr += index * descriptor_size_sampler_;
return start; return start;
} }
template <typename T> template <typename T>
inline T OffsetRTVDescriptor(T start, uint32_t index) const { T OffsetRTVDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_rtv_; start.ptr += index * descriptor_size_rtv_;
return start; return start;
} }
template <typename T> template <typename T>
inline T OffsetDSVDescriptor(T start, uint32_t index) const { T OffsetDSVDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_dsv_; start.ptr += index * descriptor_size_dsv_;
return start; return start;
} }
@ -91,31 +91,29 @@ class D3D12Provider : public GraphicsProvider {
} }
// Proxies for Direct3D 12 functions since they are loaded dynamically. // Proxies for Direct3D 12 functions since they are loaded dynamically.
inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
D3D_ROOT_SIGNATURE_VERSION version, D3D_ROOT_SIGNATURE_VERSION version,
ID3DBlob** blob_out, ID3DBlob** blob_out,
ID3DBlob** error_blob_out) const { ID3DBlob** error_blob_out) const {
return pfn_d3d12_serialize_root_signature_(desc, version, blob_out, return pfn_d3d12_serialize_root_signature_(desc, version, blob_out,
error_blob_out); error_blob_out);
} }
inline HRESULT Disassemble(const void* src_data, size_t src_data_size, HRESULT Disassemble(const void* src_data, size_t src_data_size, UINT flags,
UINT flags, const char* comments, const char* comments, ID3DBlob** disassembly_out) const {
ID3DBlob** disassembly_out) const {
if (!pfn_d3d_disassemble_) { if (!pfn_d3d_disassemble_) {
return E_NOINTERFACE; return E_NOINTERFACE;
} }
return pfn_d3d_disassemble_(src_data, src_data_size, flags, comments, return pfn_d3d_disassemble_(src_data, src_data_size, flags, comments,
disassembly_out); disassembly_out);
} }
inline HRESULT DxbcConverterCreateInstance(const CLSID& rclsid, HRESULT DxbcConverterCreateInstance(const CLSID& rclsid, const IID& riid,
const IID& riid,
void** ppv) const { void** ppv) const {
if (!pfn_dxilconv_dxc_create_instance_) { if (!pfn_dxilconv_dxc_create_instance_) {
return E_NOINTERFACE; return E_NOINTERFACE;
} }
return pfn_dxilconv_dxc_create_instance_(rclsid, riid, ppv); return pfn_dxilconv_dxc_create_instance_(rclsid, riid, ppv);
} }
inline HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid, HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid,
void** ppv) const { void** ppv) const {
if (!pfn_dxcompiler_dxc_create_instance_) { if (!pfn_dxcompiler_dxc_create_instance_) {
return E_NOINTERFACE; return E_NOINTERFACE;

View File

@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature(
return root_signature; return root_signature;
} }
ID3D12PipelineState* CreateComputePipelineState( ID3D12PipelineState* CreateComputePipeline(
ID3D12Device* device, const void* shader, size_t shader_size, ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12RootSignature* root_signature) { ID3D12RootSignature* root_signature) {
D3D12_COMPUTE_PIPELINE_STATE_DESC desc; D3D12_COMPUTE_PIPELINE_STATE_DESC desc;

View File

@ -27,7 +27,7 @@ extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback; extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;
template <typename T> template <typename T>
inline bool ReleaseAndNull(T& object) { bool ReleaseAndNull(T& object) {
if (object != nullptr) { if (object != nullptr) {
object->Release(); object->Release();
object = nullptr; object = nullptr;
@ -39,8 +39,9 @@ inline bool ReleaseAndNull(T& object) {
ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider, ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider,
const D3D12_ROOT_SIGNATURE_DESC& desc); const D3D12_ROOT_SIGNATURE_DESC& desc);
ID3D12PipelineState* CreateComputePipelineState( ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device,
ID3D12Device* device, const void* shader, size_t shader_size, const void* shader,
size_t shader_size,
ID3D12RootSignature* root_signature); ID3D12RootSignature* root_signature);
constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) { constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) {

View File

@ -18,7 +18,7 @@ project("SDL2")
"SDL2/include", "SDL2/include",
}) })
buildoptions({ buildoptions({
"/wd4828", -- illegal characters in file "/wd4828", -- illegal characters in file https://bugzilla.libsdl.org/show_bug.cgi?id=5333
}) })
files({ files({
-- 1:1 from SDL.vcxproj file -- 1:1 from SDL.vcxproj file

1
third_party/premake-cmake vendored Submodule

@ -0,0 +1 @@
Subproject commit 26fbbb9962aefcb1c24aff1e7952033ce1361190

View File

@ -88,6 +88,16 @@ def main():
sys.exit(return_code) sys.exit(return_code)
def print_box(msg):
"""Prints an important message inside a box
"""
print(
'┌{0:─^{2}}╖\n'
'│{1: ^{2}}║\n'
'╘{0:═^{2}}╝\n'
.format('', msg, len(msg) + 2))
def import_vs_environment(): def import_vs_environment():
"""Finds the installed Visual Studio version and imports """Finds the installed Visual Studio version and imports
interesting environment variables into os.environ. interesting environment variables into os.environ.
@ -153,6 +163,7 @@ def import_subprocess_environment(args):
os.environ[var.upper()] = setting os.environ[var.upper()] = setting
break break
def has_bin(binary): def has_bin(binary):
"""Checks whether the given binary is present. """Checks whether the given binary is present.
@ -372,9 +383,9 @@ def run_platform_premake(cc='clang', devenv=None):
if 'VSVERSION' in os.environ: if 'VSVERSION' in os.environ:
vs_version = os.environ['VSVERSION'] vs_version = os.environ['VSVERSION']
return run_premake('windows', 'vs' + vs_version) return run_premake('windows', devenv or ('vs' + vs_version))
else: else:
return run_premake('linux', devenv == 'codelite' and devenv or 'gmake2', cc) return run_premake('linux', devenv or 'gmake2', cc)
def run_premake_export_commands(): def run_premake_export_commands():
@ -408,6 +419,43 @@ def get_build_bin_path(args):
return os.path.join(self_path, 'build', 'bin', platform.capitalize(), args['config'].capitalize()) return os.path.join(self_path, 'build', 'bin', platform.capitalize(), args['config'].capitalize())
def create_clion_workspace():
"""Creates some basic workspace information inside the .idea directory for first start.
"""
if os.path.exists('.idea'):
# No first start
return False
print('Generating CLion workspace files...')
# Might become easier in the future: https://youtrack.jetbrains.com/issue/CPP-7911
# Set the location of the CMakeLists.txt
os.mkdir('.idea')
with open(os.path.join('.idea', 'misc.xml'), 'w') as f:
f.write("""<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$/build">
<contentRoot DIR="$PROJECT_DIR$" />
</component>
</project>
""")
# Set available configurations
# TODO Find a way to trigger a cmake reload
with open(os.path.join('.idea', 'workspace.xml'), 'w') as f:
f.write("""<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeSettings">
<configurations>
<configuration PROFILE_NAME="Checked" CONFIG_NAME="Checked" />
<configuration PROFILE_NAME="Debug" CONFIG_NAME="Debug" />
<configuration PROFILE_NAME="Release" CONFIG_NAME="Release" />
</configurations>
</component>
</project>""")
return True
def discover_commands(subparsers): def discover_commands(subparsers):
"""Looks for all commands and returns a dictionary of them. """Looks for all commands and returns a dictionary of them.
In the future commands could be discovered on disk. In the future commands could be discovered on disk.
@ -1444,8 +1492,13 @@ class DevenvCommand(Command):
def execute(self, args, pass_args, cwd): def execute(self, args, pass_args, cwd):
devenv = None devenv = None
show_reload_prompt = False
if sys.platform == 'win32': if sys.platform == 'win32':
print('Launching Visual Studio...') print('Launching Visual Studio...')
elif has_bin('clion') or has_bin('clion.sh'):
print('Launching CLion...')
show_reload_prompt = create_clion_workspace()
devenv = 'cmake'
else: else:
print('Launching CodeLite...') print('Launching CodeLite...')
devenv = 'codelite' devenv = 'codelite'
@ -1456,11 +1509,23 @@ class DevenvCommand(Command):
print('') print('')
print('- launching devenv...') print('- launching devenv...')
if show_reload_prompt:
print_box('Please run "File ⇒ ↺ Reload CMake Project" from inside the IDE!')
if sys.platform == 'win32': if sys.platform == 'win32':
shell_call([ shell_call([
'devenv', 'devenv',
'build\\xenia.sln', 'build\\xenia.sln',
]) ])
elif has_bin('clion'):
shell_call([
'clion',
'.',
])
elif has_bin('clion.sh'):
shell_call([
'clion.sh',
'.',
])
else: else:
shell_call([ shell_call([
'codelite', 'codelite',