Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2020-11-15 14:06:15 +03:00
commit 8febf02a39
35 changed files with 737 additions and 670 deletions

3
.gitmodules vendored
View File

@ -55,6 +55,9 @@
[submodule "third_party/DirectXShaderCompiler"]
path = third_party/DirectXShaderCompiler
url = https://github.com/microsoft/DirectXShaderCompiler.git
[submodule "third_party/premake-cmake"]
path = third_party/premake-cmake
url = https://github.com/Enhex/premake-cmake.git
[submodule "third_party/glslang"]
path = third_party/glslang
url = https://github.com/KhronosGroup/glslang.git

View File

@ -28,9 +28,9 @@ addons:
jobs:
include:
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 LINT=true
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Debug
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Release
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 LINT=true
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Debug
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Release
git:
# We handle submodules ourselves in xenia-build setup.
@ -40,8 +40,10 @@ before_script:
- export LIBVULKAN_VERSION=1.1.70
- export CXX=$CXX_COMPILER
- export CC=$C_COMPILER
- export AR=$AR_COMPILER
# Dump useful info.
- $CXX --version
- $AR_COMPILER --version
- python3 --version
- clang-format-9 --version
- clang-format-9 -style=file -dump-config

View File

@ -91,12 +91,14 @@ Linux support is extremely experimental and presently incomplete.
The build script uses LLVM/Clang 9. GCC while it should work in theory, is not easily
interchangeable right now.
[CodeLite](https://codelite.org) is the supported IDE and `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website.
Normal building via `xb build` uses Make.
* Normal building via `xb build` uses Make.
* [CodeLite](https://codelite.org) is supported. `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website.
* Experimental CMake generation is available to facilitate use of other IDEs such as [CLion](https://www.jetbrains.com/clion/). If `clion` is available inside `$PATH`, `xb devenv` will start it. Otherwise `build/CMakeLists.txt` needs to be generated by invoking `xb premake --devenv=cmake` manually.
Clang-9 or newer should be available from system repositories on all up to date distributions.
You will also need some development libraries. To get them on an Ubuntu system:
```
```bash
sudo apt-get install libgtk-3-dev libpthread-stubs0-dev liblz4-dev libx11-dev libvulkan-dev libsdl2-dev libiberty-dev libunwind-dev libc++-dev libc++abi-dev
```

View File

@ -1,5 +1,6 @@
include("tools/build")
require("third_party/premake-export-compile-commands/export-compile-commands")
require("third_party/premake-cmake/cmake")
location(build_root)
targetdir(build_bin)
@ -24,6 +25,9 @@ defines({
"UNICODE",
})
cppdialect("C++17")
symbols("On")
-- TODO(DrChat): Find a way to disable this on other architectures.
if ARCH ~= "ppc64" then
filter("architecture:x86_64")
@ -44,30 +48,29 @@ filter("kind:StaticLib")
filter("configurations:Checked")
runtime("Debug")
optimize("Off")
defines({
"DEBUG",
})
runtime("Debug")
filter({"configurations:Checked", "platforms:Windows"})
buildoptions({
"/RTCsu", -- Full Run-Time Checks.
"/RTCsu", -- Full Run-Time Checks.
})
filter({"configurations:Checked", "platforms:Linux"})
defines({
"_GLIBCXX_DEBUG", -- libstdc++ debug mode
})
filter("configurations:Debug")
runtime("Debug")
runtime("Release")
optimize("Off")
defines({
"DEBUG",
"_NO_DEBUG_HEAP=1",
})
runtime("Release")
filter({"configurations:Debug", "platforms:Windows"})
linkoptions({
"/NODEFAULTLIB:MSVCRTD",
})
filter({"configurations:Debug", "platforms:Linux"})
buildoptions({
"-g",
defines({
"_GLIBCXX_DEBUG", -- make dbg symbols work on some distros
})
filter("configurations:Release")
@ -76,26 +79,18 @@ filter("configurations:Release")
"NDEBUG",
"_NO_DEBUG_HEAP=1",
})
optimize("speed")
optimize("Speed")
inlining("Auto")
floatingpoint("Fast")
flags({
"LinkTimeOptimization",
})
runtime("Release")
filter({"configurations:Release", "platforms:Windows"})
linkoptions({
"/NODEFAULTLIB:MSVCRTD",
})
filter("platforms:Linux")
system("linux")
toolset("clang")
cppdialect("C++17")
buildoptions({
-- "-mlzcnt", -- (don't) Assume lzcnt is supported.
"`pkg-config --cflags gtk+-x11-3.0`",
"-fno-lto", -- Premake doesn't support LTO on clang
({os.outputof("pkg-config --cflags gtk+-x11-3.0")})[1],
})
links({
"stdc++fs",
@ -105,14 +100,13 @@ filter("platforms:Linux")
"rt",
})
linkoptions({
"`pkg-config --libs gtk+-3.0`",
({os.outputof("pkg-config --libs gtk+-3.0")})[1],
})
filter({"platforms:Linux", "kind:*App"})
linkgroups("On")
filter({"platforms:Linux", "language:C++", "toolset:gcc"})
cppdialect("C++17")
links({
})
disablewarnings({
@ -147,13 +141,11 @@ filter({"platforms:Linux", "language:C++", "toolset:clang", "files:*.cc or *.cpp
filter("platforms:Windows")
system("windows")
toolset("msc")
cppdialect("C++17")
buildoptions({
"/MP", -- Multiprocessor compilation.
"/utf-8", -- 'build correctly on systems with non-Latin codepages'.
-- Mark warnings as severe
"/w14839", -- non-standard use of class 'type' as an argument to a variadic function
"/w14840", -- non-portable use of class 'type' as an argument to a variadic function
"/w14839", -- non-standard use of class 'type' as an argument to a variadic function
"/w14840", -- non-portable use of class 'type' as an argument to a variadic function
-- Disable warnings
"/wd4100", -- Unreferenced parameters are ok.
"/wd4201", -- Nameless struct/unions are ok.
@ -163,10 +155,10 @@ filter("platforms:Windows")
"/wd4189", -- 'local variable is initialized but not referenced'.
})
flags({
"NoMinimalRebuild", -- Required for /MP above.
"MultiProcessorCompile", -- Multiprocessor compilation.
"NoMinimalRebuild", -- Required for /MP above.
})
symbols("On")
defines({
"_CRT_NONSTDC_NO_DEPRECATE",
"_CRT_SECURE_NO_WARNINGS",

View File

@ -8,17 +8,6 @@ project("xenia-app")
targetname("xenia")
language("C++")
links({
"aes_128",
"capstone",
"fmt",
"dxbc",
"discord-rpc",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-app-discord",
"xenia-apu",
"xenia-apu-nop",
@ -39,6 +28,19 @@ project("xenia-app")
"xenia-ui",
"xenia-ui-vulkan",
"xenia-vfs",
})
links({
"aes_128",
"capstone",
"fmt",
"dxbc",
"discord-rpc",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash",
})
defines({

View File

@ -9,21 +9,51 @@
#include "xenia/base/debugging.h"
#include <signal.h>
#include <csignal>
#include <cstdarg>
#include <fstream>
#include <iostream>
#include <mutex>
#include <sstream>
#include "xenia/base/string_buffer.h"
namespace xe {
namespace debugging {
bool IsDebuggerAttached() { return false; }
void Break() { raise(SIGTRAP); }
bool IsDebuggerAttached() {
std::ifstream proc_status_stream("/proc/self/status");
if (!proc_status_stream.is_open()) {
return false;
}
std::string line;
while (std::getline(proc_status_stream, line)) {
std::istringstream line_stream(line);
std::string key;
line_stream >> key;
if (key == "TracerPid:") {
uint32_t tracer_pid;
line_stream >> tracer_pid;
return tracer_pid != 0;
}
}
return false;
}
void Break() {
static std::once_flag flag;
std::call_once(flag, []() {
// Install handler for sigtrap only once
std::signal(SIGTRAP, [](int) {
// Forward signal to default handler after being caught
std::signal(SIGTRAP, SIG_DFL);
});
});
std::raise(SIGTRAP);
}
namespace internal {
void DebugPrint(const char* s) {
// TODO: proper implementation.
}
void DebugPrint(const char* s) { std::clog << s << std::endl; }
} // namespace internal
} // namespace debugging

View File

@ -30,7 +30,7 @@ ExportResolver::Table::Table(const std::string_view module_name,
}
std::sort(
exports_by_name_.begin(), exports_by_name_.end(),
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; });
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
}
ExportResolver::ExportResolver() = default;
@ -51,7 +51,7 @@ void ExportResolver::RegisterTable(
}
std::sort(
all_exports_by_name_.begin(), all_exports_by_name_.end(),
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; });
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
}
Export* ExportResolver::GetExportByOrdinal(const std::string_view module_name,

View File

@ -387,7 +387,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
sampler_count_vertex);
return nullptr;
}
root_signatures_bindful_.insert({index, root_signature});
root_signatures_bindful_.emplace(index, root_signature);
return root_signature;
}
@ -745,12 +745,11 @@ void D3D12CommandProcessor::SetSamplePositions(
current_sample_positions_ = sample_positions;
}
void D3D12CommandProcessor::SetComputePipelineState(
ID3D12PipelineState* pipeline_state) {
if (current_external_pipeline_state_ != pipeline_state) {
deferred_command_list_.D3DSetPipelineState(pipeline_state);
current_external_pipeline_state_ = pipeline_state;
current_cached_pipeline_state_ = nullptr;
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
if (current_external_pipeline_ != pipeline) {
deferred_command_list_.D3DSetPipelineState(pipeline);
current_external_pipeline_ = pipeline;
current_cached_pipeline_ = nullptr;
}
}
@ -773,8 +772,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
}
// Currently scaling is only supported with ROV.
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
return "Direct3D 12 - 2x";
return "Direct3D 12 - ROV 2x";
}
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
// that faces adoption complications (outside of Direct3D - on Vulkan - at
// least), but crucial to Xenia - raise awareness of its usage.
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
// feature" - oscarbg in that issue.
return "Direct3D 12 - ROV";
}
return "Direct3D 12";
}
@ -1196,7 +1203,7 @@ bool D3D12CommandProcessor::SetupContext() {
*this, *register_file_, bindless_resources_used_, edram_rov_used_,
texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline state cache");
XELOGE("Failed to initialize the graphics pipeline cache");
return false;
}
@ -1526,8 +1533,7 @@ void D3D12CommandProcessor::ShutdownContext() {
// Shut down binding - bindless descriptors may be owned by subsystems like
// the texture cache.
// Root signatured are used by pipeline states, thus freed after the pipeline
// states.
// Root signatures are used by pipelines, thus freed after the pipelines.
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
for (auto it : root_signatures_bindful_) {
@ -1878,7 +1884,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
xenos::VertexShaderExportMode::kMultipass ||
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back))) {
// All faces are culled - can't be expressed in the pipeline state.
// All faces are culled - can't be expressed in the pipeline.
return true;
}
@ -1954,7 +1960,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
line_loop_closing_index = 0;
}
// Update the textures - this may bind pipeline state objects.
// Update the textures - this may bind pipelines.
uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMask() |
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
@ -1972,21 +1978,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
early_z = true;
}
// Create the pipeline state object if needed and bind it.
void* pipeline_state_handle;
// Create the pipeline if needed and bind it.
void* pipeline_handle;
ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type_converted,
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
early_z, pipeline_render_targets, &pipeline_state_handle,
early_z, pipeline_render_targets, &pipeline_handle,
&root_signature)) {
return false;
}
if (current_cached_pipeline_state_ != pipeline_state_handle) {
if (current_cached_pipeline_ != pipeline_handle) {
deferred_command_list_.SetPipelineStateHandle(
reinterpret_cast<void*>(pipeline_state_handle));
current_cached_pipeline_state_ = pipeline_state_handle;
current_external_pipeline_state_ = nullptr;
reinterpret_cast<void*>(pipeline_handle));
current_cached_pipeline_ = pipeline_handle;
current_external_pipeline_ = nullptr;
}
// Update viewport, scissor, blend factor and stencil reference.
@ -2519,8 +2525,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
submission_open_ = true;
// Start a new deferred command list - will submit it to the real one in the
// end of the submission (when async pipeline state object creation requests
// are fulfilled).
// end of the submission (when async pipeline creation requests are
// fulfilled).
deferred_command_list_.Reset();
// Reset cached state of the command list.
@ -2529,8 +2535,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
ff_blend_factor_update_needed_ = true;
ff_stencil_ref_update_needed_ = true;
current_sample_positions_ = xenos::MsaaSamples::k1X;
current_cached_pipeline_state_ = nullptr;
current_external_pipeline_state_ = nullptr;
current_cached_pipeline_ = nullptr;
current_external_pipeline_ = nullptr;
current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0;
if (bindless_resources_used_) {
@ -2726,7 +2732,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
}
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
}
void D3D12CommandProcessor::ClearCommandAllocatorCache() {
@ -3890,8 +3896,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters,
provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert(
{sampler_parameters.value, sampler_index});
texture_cache_bindless_sampler_map_.emplace(
sampler_parameters.value, sampler_index);
}
current_sampler_bindless_indices_vertex_[j] = sampler_index;
}
@ -3922,8 +3928,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters,
provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert(
{sampler_parameters.value, sampler_index});
texture_cache_bindless_sampler_map_.emplace(
sampler_parameters.value, sampler_index);
}
current_sampler_bindless_indices_pixel_[j] = sampler_index;
}

View File

@ -186,19 +186,17 @@ class D3D12CommandProcessor : public CommandProcessor {
// render targets or copying to depth render targets.
void SetSamplePositions(xenos::MsaaSamples sample_positions);
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
return pipeline_cache_->GetD3D12PipelineByHandle(handle);
}
// Sets the current pipeline state to a compute one. This is for cache
// invalidation primarily. A submission must be open.
void SetComputePipelineState(ID3D12PipelineState* pipeline_state);
// Sets the current pipeline to a compute one. This is for cache invalidation
// primarily. A submission must be open.
void SetComputePipeline(ID3D12PipelineState* pipeline);
// For the pipeline state cache to call when binding layout UIDs may be
// reused.
// For the pipeline cache to call when binding layout UIDs may be reused.
void NotifyShaderBindingsLayoutUIDsInvalidated();
// Returns the text to display in the GPU backend name in the window title.
@ -323,8 +321,8 @@ class D3D12CommandProcessor : public CommandProcessor {
bool EndSubmission(bool is_swap);
// Checks if ending a submission right now would not cause potentially more
// delay than it would reduce by making the GPU start working earlier - such
// as when there are unfinished graphics pipeline state creation requests that
// would need to be fulfilled before actually submitting the command list.
// as when there are unfinished graphics pipeline creation requests that would
// need to be fulfilled before actually submitting the command list.
bool CanEndSubmissionImmediately() const;
bool AwaitAllQueueOperationsCompletion() {
CheckSubmissionFence(submission_current_);
@ -503,7 +501,7 @@ class D3D12CommandProcessor : public CommandProcessor {
static constexpr uint32_t kSwapTextureWidth = 1280;
static constexpr uint32_t kSwapTextureHeight = 720;
inline std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
if (texture_cache_->IsResolutionScale2X()) {
return std::make_pair(kSwapTextureWidth * 2, kSwapTextureHeight * 2);
}
@ -548,13 +546,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// Current SSAA sample positions (to be updated by the render target cache).
xenos::MsaaSamples current_sample_positions_;
// Currently bound pipeline state, either a graphics pipeline state object
// from the pipeline state cache (with potentially deferred creation -
// current_external_pipeline_state_ is nullptr in this case) or a non-Xenos
// graphics or compute pipeline state object (current_cached_pipeline_state_
// is nullptr in this case).
void* current_cached_pipeline_state_;
ID3D12PipelineState* current_external_pipeline_state_;
// Currently bound pipeline, either a graphics pipeline from the pipeline
// cache (with potentially deferred creation - current_external_pipeline_ is
// nullptr in this case) or a non-Xenos graphics or compute pipeline
// (current_cached_pipeline_ is nullptr in this case).
void* current_cached_pipeline_;
ID3D12PipelineState* current_external_pipeline_;
// Currently bound graphics root signature.
ID3D12RootSignature* current_graphics_root_signature_;

View File

@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
stretch_pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) {
XELOGE("Failed to create the front buffer stretch pipeline state");
XELOGE("Failed to create the front buffer stretch pipeline");
stretch_gamma_root_signature_->Release();
stretch_gamma_root_signature_ = nullptr;
stretch_root_signature_->Release();
@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) {
XELOGE(
"Failed to create the gamma-correcting front buffer stretch "
"pipeline state");
"Failed to create the gamma-correcting front buffer stretch pipeline");
stretch_pipeline_->Release();
stretch_pipeline_ = nullptr;
stretch_gamma_root_signature_->Release();

View File

@ -85,7 +85,7 @@ class D3D12Shader : public Shader {
return sampler_bindings_.data();
}
// For owning subsystems like the pipeline state cache, accessors for unique
// For owning subsystems like the pipeline cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout was bound.

View File

@ -48,7 +48,7 @@ class D3D12SharedMemory : public SharedMemory {
// UseForReading or UseForWriting.
// Makes the buffer usable for vertices, indices and texture untiling.
inline void UseForReading() {
void UseForReading() {
// Vertex fetch is also allowed in pixel shaders.
CommitUAVWritesAndTransitionBuffer(
D3D12_RESOURCE_STATE_INDEX_BUFFER |
@ -56,18 +56,18 @@ class D3D12SharedMemory : public SharedMemory {
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
// Makes the buffer usable for texture tiling after a resolve.
inline void UseForWriting() {
void UseForWriting() {
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
// Makes the buffer usable as a source for copy commands.
inline void UseAsCopySource() {
void UseAsCopySource() {
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
}
// Must be called when doing draws/dispatches modifying data within the shared
// memory buffer as a UAV, to make sure that when UseForWriting is called the
// next time, a UAV barrier will be done, and subsequent overlapping UAV
// writes and reads are ordered.
inline void MarkUAVWritesCommitNeeded() {
void MarkUAVWritesCommitNeeded() {
if (buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
buffer_uav_writes_commit_needed_ = true;
}

View File

@ -209,9 +209,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
}
} break;
case Command::kSetPipelineStateHandle: {
current_pipeline_state =
command_processor_.GetD3D12PipelineStateByHandle(
*reinterpret_cast<void* const*>(stream));
current_pipeline_state = command_processor_.GetD3D12PipelineByHandle(
*reinterpret_cast<void* const*>(stream));
if (current_pipeline_state) {
command_list->SetPipelineState(current_pipeline_state);
}

View File

@ -33,7 +33,7 @@ class DeferredCommandList {
void Execute(ID3D12GraphicsCommandList* command_list,
ID3D12GraphicsCommandList1* command_list_1);
inline void D3DClearUnorderedAccessViewUint(
void D3DClearUnorderedAccessViewUint(
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap,
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource,
const UINT values[4], UINT num_rects, const D3D12_RECT* rects) {
@ -51,9 +51,9 @@ class DeferredCommandList {
}
}
inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
ID3D12Resource* src_buffer, UINT64 src_offset,
UINT64 num_bytes) {
void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
ID3D12Resource* src_buffer, UINT64 src_offset,
UINT64 num_bytes) {
auto& args = *reinterpret_cast<D3DCopyBufferRegionArguments*>(WriteCommand(
Command::kD3DCopyBufferRegion, sizeof(D3DCopyBufferRegionArguments)));
args.dst_buffer = dst_buffer;
@ -63,26 +63,26 @@ class DeferredCommandList {
args.num_bytes = num_bytes;
}
inline void D3DCopyResource(ID3D12Resource* dst_resource,
ID3D12Resource* src_resource) {
void D3DCopyResource(ID3D12Resource* dst_resource,
ID3D12Resource* src_resource) {
auto& args = *reinterpret_cast<D3DCopyResourceArguments*>(WriteCommand(
Command::kD3DCopyResource, sizeof(D3DCopyResourceArguments)));
args.dst_resource = dst_resource;
args.src_resource = src_resource;
}
inline void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst,
const D3D12_TEXTURE_COPY_LOCATION& src) {
void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst,
const D3D12_TEXTURE_COPY_LOCATION& src) {
auto& args = *reinterpret_cast<CopyTextureArguments*>(
WriteCommand(Command::kCopyTexture, sizeof(CopyTextureArguments)));
std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION));
std::memcpy(&args.src, &src, sizeof(D3D12_TEXTURE_COPY_LOCATION));
}
inline void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst,
UINT dst_x, UINT dst_y, UINT dst_z,
const D3D12_TEXTURE_COPY_LOCATION& src,
const D3D12_BOX& src_box) {
void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst, UINT dst_x,
UINT dst_y, UINT dst_z,
const D3D12_TEXTURE_COPY_LOCATION& src,
const D3D12_BOX& src_box) {
auto& args = *reinterpret_cast<CopyTextureRegionArguments*>(WriteCommand(
Command::kCopyTextureRegion, sizeof(CopyTextureRegionArguments)));
std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION));
@ -93,8 +93,8 @@ class DeferredCommandList {
args.src_box = src_box;
}
inline void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y,
UINT thread_group_count_z) {
void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y,
UINT thread_group_count_z) {
auto& args = *reinterpret_cast<D3DDispatchArguments*>(
WriteCommand(Command::kD3DDispatch, sizeof(D3DDispatchArguments)));
args.thread_group_count_x = thread_group_count_x;
@ -102,11 +102,10 @@ class DeferredCommandList {
args.thread_group_count_z = thread_group_count_z;
}
inline void D3DDrawIndexedInstanced(UINT index_count_per_instance,
UINT instance_count,
UINT start_index_location,
INT base_vertex_location,
UINT start_instance_location) {
void D3DDrawIndexedInstanced(UINT index_count_per_instance,
UINT instance_count, UINT start_index_location,
INT base_vertex_location,
UINT start_instance_location) {
auto& args = *reinterpret_cast<D3DDrawIndexedInstancedArguments*>(
WriteCommand(Command::kD3DDrawIndexedInstanced,
sizeof(D3DDrawIndexedInstancedArguments)));
@ -117,9 +116,9 @@ class DeferredCommandList {
args.start_instance_location = start_instance_location;
}
inline void D3DDrawInstanced(UINT vertex_count_per_instance,
UINT instance_count, UINT start_vertex_location,
UINT start_instance_location) {
void D3DDrawInstanced(UINT vertex_count_per_instance, UINT instance_count,
UINT start_vertex_location,
UINT start_instance_location) {
auto& args = *reinterpret_cast<D3DDrawInstancedArguments*>(WriteCommand(
Command::kD3DDrawInstanced, sizeof(D3DDrawInstancedArguments)));
args.vertex_count_per_instance = vertex_count_per_instance;
@ -128,7 +127,7 @@ class DeferredCommandList {
args.start_instance_location = start_instance_location;
}
inline void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) {
void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) {
auto& args = *reinterpret_cast<D3D12_INDEX_BUFFER_VIEW*>(WriteCommand(
Command::kD3DIASetIndexBuffer, sizeof(D3D12_INDEX_BUFFER_VIEW)));
if (view != nullptr) {
@ -142,14 +141,13 @@ class DeferredCommandList {
}
}
inline void D3DIASetPrimitiveTopology(
D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
void D3DIASetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
auto& arg = *reinterpret_cast<D3D12_PRIMITIVE_TOPOLOGY*>(WriteCommand(
Command::kD3DIASetPrimitiveTopology, sizeof(D3D12_PRIMITIVE_TOPOLOGY)));
arg = primitive_topology;
}
inline void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) {
void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) {
auto args = reinterpret_cast<FLOAT*>(
WriteCommand(Command::kD3DOMSetBlendFactor, 4 * sizeof(FLOAT)));
args[0] = blend_factor[0];
@ -158,7 +156,7 @@ class DeferredCommandList {
args[3] = blend_factor[3];
}
inline void D3DOMSetRenderTargets(
void D3DOMSetRenderTargets(
UINT num_render_target_descriptors,
const D3D12_CPU_DESCRIPTOR_HANDLE* render_target_descriptors,
BOOL rts_single_handle_to_descriptor_range,
@ -185,14 +183,14 @@ class DeferredCommandList {
}
}
inline void D3DOMSetStencilRef(UINT stencil_ref) {
void D3DOMSetStencilRef(UINT stencil_ref) {
auto& arg = *reinterpret_cast<UINT*>(
WriteCommand(Command::kD3DOMSetStencilRef, sizeof(UINT)));
arg = stencil_ref;
}
inline void D3DResourceBarrier(UINT num_barriers,
const D3D12_RESOURCE_BARRIER* barriers) {
void D3DResourceBarrier(UINT num_barriers,
const D3D12_RESOURCE_BARRIER* barriers) {
if (num_barriers == 0) {
return;
}
@ -207,21 +205,22 @@ class DeferredCommandList {
num_barriers * sizeof(D3D12_RESOURCE_BARRIER));
}
inline void RSSetScissorRect(const D3D12_RECT& rect) {
void RSSetScissorRect(const D3D12_RECT& rect) {
auto& arg = *reinterpret_cast<D3D12_RECT*>(
WriteCommand(Command::kRSSetScissorRect, sizeof(D3D12_RECT)));
arg = rect;
}
inline void RSSetViewport(const D3D12_VIEWPORT& viewport) {
void RSSetViewport(const D3D12_VIEWPORT& viewport) {
auto& arg = *reinterpret_cast<D3D12_VIEWPORT*>(
WriteCommand(Command::kRSSetViewport, sizeof(D3D12_VIEWPORT)));
arg = viewport;
}
inline void D3DSetComputeRoot32BitConstants(
UINT root_parameter_index, UINT num_32bit_values_to_set,
const void* src_data, UINT dest_offset_in_32bit_values) {
void D3DSetComputeRoot32BitConstants(UINT root_parameter_index,
UINT num_32bit_values_to_set,
const void* src_data,
UINT dest_offset_in_32bit_values) {
if (num_32bit_values_to_set == 0) {
return;
}
@ -235,9 +234,10 @@ class DeferredCommandList {
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
}
inline void D3DSetGraphicsRoot32BitConstants(
UINT root_parameter_index, UINT num_32bit_values_to_set,
const void* src_data, UINT dest_offset_in_32bit_values) {
void D3DSetGraphicsRoot32BitConstants(UINT root_parameter_index,
UINT num_32bit_values_to_set,
const void* src_data,
UINT dest_offset_in_32bit_values) {
if (num_32bit_values_to_set == 0) {
return;
}
@ -251,7 +251,7 @@ class DeferredCommandList {
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
}
inline void D3DSetComputeRootConstantBufferView(
void D3DSetComputeRootConstantBufferView(
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
WriteCommand(Command::kD3DSetComputeRootConstantBufferView,
@ -260,7 +260,7 @@ class DeferredCommandList {
args.buffer_location = buffer_location;
}
inline void D3DSetGraphicsRootConstantBufferView(
void D3DSetGraphicsRootConstantBufferView(
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
WriteCommand(Command::kD3DSetGraphicsRootConstantBufferView,
@ -269,7 +269,7 @@ class DeferredCommandList {
args.buffer_location = buffer_location;
}
inline void D3DSetComputeRootDescriptorTable(
void D3DSetComputeRootDescriptorTable(
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
WriteCommand(Command::kD3DSetComputeRootDescriptorTable,
@ -278,7 +278,7 @@ class DeferredCommandList {
args.base_descriptor.ptr = base_descriptor.ptr;
}
inline void D3DSetGraphicsRootDescriptorTable(
void D3DSetGraphicsRootDescriptorTable(
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
WriteCommand(Command::kD3DSetGraphicsRootDescriptorTable,
@ -287,42 +287,40 @@ class DeferredCommandList {
args.base_descriptor.ptr = base_descriptor.ptr;
}
inline void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) {
void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) {
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
Command::kD3DSetComputeRootSignature, sizeof(ID3D12RootSignature*)));
arg = root_signature;
}
inline void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) {
void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) {
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
Command::kD3DSetGraphicsRootSignature, sizeof(ID3D12RootSignature*)));
arg = root_signature;
}
inline void SetDescriptorHeaps(
ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
ID3D12DescriptorHeap* sampler_descriptor_heap) {
void SetDescriptorHeaps(ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
ID3D12DescriptorHeap* sampler_descriptor_heap) {
auto& args = *reinterpret_cast<SetDescriptorHeapsArguments*>(WriteCommand(
Command::kSetDescriptorHeaps, sizeof(SetDescriptorHeapsArguments)));
args.cbv_srv_uav_descriptor_heap = cbv_srv_uav_descriptor_heap;
args.sampler_descriptor_heap = sampler_descriptor_heap;
}
inline void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) {
void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) {
auto& arg = *reinterpret_cast<ID3D12PipelineState**>(WriteCommand(
Command::kD3DSetPipelineState, sizeof(ID3D12PipelineState*)));
arg = pipeline_state;
}
inline void SetPipelineStateHandle(void* pipeline_state_handle) {
void SetPipelineStateHandle(void* pipeline_state_handle) {
auto& arg = *reinterpret_cast<void**>(
WriteCommand(Command::kSetPipelineStateHandle, sizeof(void*)));
arg = pipeline_state_handle;
}
inline void D3DSetSamplePositions(
UINT num_samples_per_pixel, UINT num_pixels,
const D3D12_SAMPLE_POSITION* sample_positions) {
void D3DSetSamplePositions(UINT num_samples_per_pixel, UINT num_pixels,
const D3D12_SAMPLE_POSITION* sample_positions) {
auto& args = *reinterpret_cast<D3DSetSamplePositionsArguments*>(
WriteCommand(Command::kD3DSetSamplePositions,
sizeof(D3DSetSamplePositionsArguments)));

View File

@ -43,10 +43,10 @@ DEFINE_bool(
"D3D12");
DEFINE_int32(
d3d12_pipeline_creation_threads, -1,
"Number of threads used for graphics pipeline state object creation. -1 to "
"calculate automatically (75% of logical CPU cores), a positive number to "
"specify the number of threads explicitly (up to the number of logical CPU "
"cores), 0 to disable multithreaded pipeline state object creation.",
"Number of threads used for graphics pipeline creation. -1 to calculate "
"automatically (75% of logical CPU cores), a positive number to specify "
"the number of threads explicitly (up to the number of logical CPU cores), "
"0 to disable multithreaded pipeline creation.",
"D3D12");
DEFINE_bool(d3d12_tessellation_wireframe, false,
"Display tessellated surfaces as wireframe for debugging.",
@ -125,8 +125,8 @@ bool PipelineCache::Initialize() {
logical_processor_count = 6;
}
// Initialize creation thread synchronization data even if not using creation
// threads because they may be used anyway to create pipeline state objects
// from the storage.
// threads because they may be used anyway to create pipelines from the
// storage.
creation_threads_busy_ = 0;
creation_completion_event_ =
xe::threading::Event::CreateManualResetEvent(true);
@ -145,7 +145,7 @@ bool PipelineCache::Initialize() {
for (size_t i = 0; i < creation_thread_count; ++i) {
std::unique_ptr<xe::threading::Thread> creation_thread =
xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); });
creation_thread->set_name("D3D12 Pipeline States");
creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread));
}
}
@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
ShutdownShaderStorage();
// Remove references to the current pipeline state object.
current_pipeline_state_ = nullptr;
// Remove references to the current pipeline.
current_pipeline_ = nullptr;
if (!creation_threads_.empty()) {
// Empty the pipeline state object creation queue and make sure there are no
// threads currently creating pipeline state objects because pipeline states
// are going to be deleted.
// Empty the pipeline creation queue and make sure there are no threads
// currently creating pipelines because pipelines are going to be deleted.
bool await_creation_completion_event = false;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
}
// Destroy all pipeline state objects.
for (auto it : pipeline_states_) {
// Destroy all pipelines.
for (auto it : pipelines_) {
it.second->state->Release();
delete it.second;
}
pipeline_states_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0);
pipelines_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
// Destroy all shaders.
command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated();
@ -401,7 +400,7 @@ void PipelineCache::InitializeShaderStorage(
D3D12Shader* shader =
new D3D12Shader(shader_header.type, ucode_data_hash,
ucode_dwords.data(), shader_header.ucode_dword_count);
shaders_.insert({ucode_data_hash, shader});
shaders_.emplace(ucode_data_hash, shader);
// Create new threads if the currently existing threads can't keep up with
// file reading, but not more than the number of logical processors minus
// one.
@ -459,72 +458,66 @@ void PipelineCache::InitializeShaderStorage(
}
// 'DXRO' or 'DXRT'.
const uint32_t pipeline_state_storage_magic_api =
const uint32_t pipeline_storage_magic_api =
edram_rov_used_ ? 0x4F525844 : 0x54525844;
// Initialize the pipeline state storage stream.
uint64_t pipeline_state_storage_initialization_start_ =
// Initialize the pipeline storage stream.
uint64_t pipeline_storage_initialization_start_ =
xe::Clock::QueryHostTickCount();
auto pipeline_state_storage_file_path =
auto pipeline_storage_file_path =
shader_storage_shareable_root /
fmt::format("{:08X}.{}.d3d12.xpso", title_id,
edram_rov_used_ ? "rov" : "rtv");
pipeline_state_storage_file_ =
xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b");
if (!pipeline_state_storage_file_) {
pipeline_storage_file_ =
xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b");
if (!pipeline_storage_file_) {
XELOGE(
"Failed to open the Direct3D 12 pipeline state description storage "
"file for writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_state_storage_file_path));
"Failed to open the Direct3D 12 pipeline description storage file for "
"writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_storage_file_path));
fclose(shader_storage_file_);
shader_storage_file_ = nullptr;
return;
}
pipeline_state_storage_file_flush_needed_ = false;
pipeline_storage_file_flush_needed_ = false;
// 'XEPS'.
const uint32_t pipeline_state_storage_magic = 0x53504558;
const uint32_t pipeline_storage_magic = 0x53504558;
struct {
uint32_t magic;
uint32_t magic_api;
uint32_t version_swapped;
} pipeline_state_storage_file_header;
if (fread(&pipeline_state_storage_file_header,
sizeof(pipeline_state_storage_file_header), 1,
pipeline_state_storage_file_) &&
pipeline_state_storage_file_header.magic ==
pipeline_state_storage_magic &&
pipeline_state_storage_file_header.magic_api ==
pipeline_state_storage_magic_api &&
xe::byte_swap(pipeline_state_storage_file_header.version_swapped) ==
} pipeline_storage_file_header;
if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_) &&
pipeline_storage_file_header.magic == pipeline_storage_magic &&
pipeline_storage_file_header.magic_api == pipeline_storage_magic_api &&
xe::byte_swap(pipeline_storage_file_header.version_swapped) ==
PipelineDescription::kVersion) {
uint64_t pipeline_state_storage_valid_bytes =
sizeof(pipeline_state_storage_file_header);
// Enqueue pipeline state descriptions written by previous Xenia executions
// until the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END);
int64_t pipeline_state_storage_told_end =
xe::filesystem::Tell(pipeline_state_storage_file_);
size_t pipeline_state_storage_told_count =
size_t(pipeline_state_storage_told_end >=
int64_t(pipeline_state_storage_valid_bytes)
? (uint64_t(pipeline_state_storage_told_end) -
pipeline_state_storage_valid_bytes) /
sizeof(PipelineStoredDescription)
: 0);
if (pipeline_state_storage_told_count &&
xe::filesystem::Seek(pipeline_state_storage_file_,
int64_t(pipeline_state_storage_valid_bytes),
SEEK_SET)) {
uint64_t pipeline_storage_valid_bytes =
sizeof(pipeline_storage_file_header);
// Enqueue pipeline descriptions written by previous Xenia executions until
// the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END);
int64_t pipeline_storage_told_end =
xe::filesystem::Tell(pipeline_storage_file_);
size_t pipeline_storage_told_count = size_t(
pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes)
? (uint64_t(pipeline_storage_told_end) -
pipeline_storage_valid_bytes) /
sizeof(PipelineStoredDescription)
: 0);
if (pipeline_storage_told_count &&
xe::filesystem::Seek(pipeline_storage_file_,
int64_t(pipeline_storage_valid_bytes), SEEK_SET)) {
std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
pipeline_stored_descriptions.resize(pipeline_state_storage_told_count);
pipeline_stored_descriptions.resize(fread(
pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_state_storage_told_count,
pipeline_state_storage_file_));
pipeline_stored_descriptions.resize(pipeline_storage_told_count);
pipeline_stored_descriptions.resize(
fread(pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_storage_told_count,
pipeline_storage_file_));
if (!pipeline_stored_descriptions.empty()) {
// Launch additional creation threads to use all cores to create
// pipeline state objects faster. Will also be using the main thread, so
// minus 1.
// pipelines faster. Will also be using the main thread, so minus 1.
size_t creation_thread_original_count = creation_threads_.size();
size_t creation_thread_needed_count =
std::max(std::min(pipeline_stored_descriptions.size(),
@ -538,10 +531,10 @@ void PipelineCache::InitializeShaderStorage(
{}, [this, creation_thread_index]() {
CreationThread(creation_thread_index);
});
creation_thread->set_name("D3D12 Pipeline States Additional");
creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread));
}
size_t pipeline_states_created = 0;
size_t pipelines_created = 0;
for (const PipelineStoredDescription& pipeline_stored_description :
pipeline_stored_descriptions) {
const PipelineDescription& pipeline_description =
@ -553,23 +546,21 @@ void PipelineCache::InitializeShaderStorage(
0) != pipeline_stored_description.description_hash) {
break;
}
pipeline_state_storage_valid_bytes +=
sizeof(PipelineStoredDescription);
// Skip already known pipeline states - those have already been
// enqueued.
auto found_range = pipeline_states_.equal_range(
pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription);
// Skip already known pipelines - those have already been enqueued.
auto found_range = pipelines_.equal_range(
pipeline_stored_description.description_hash);
bool pipeline_state_found = false;
bool pipeline_found = false;
for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second;
if (!std::memcmp(&found_pipeline_state->description.description,
Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline->description.description,
&pipeline_description,
sizeof(pipeline_description))) {
pipeline_state_found = true;
pipeline_found = true;
break;
}
}
if (pipeline_state_found) {
if (pipeline_found) {
continue;
}
@ -606,36 +597,33 @@ void PipelineCache::InitializeShaderStorage(
std::memcpy(&pipeline_runtime_description.description,
&pipeline_description, sizeof(pipeline_description));
PipelineState* new_pipeline_state = new PipelineState;
new_pipeline_state->state = nullptr;
std::memcpy(&new_pipeline_state->description,
&pipeline_runtime_description,
Pipeline* new_pipeline = new Pipeline;
new_pipeline->state = nullptr;
std::memcpy(&new_pipeline->description, &pipeline_runtime_description,
sizeof(pipeline_runtime_description));
pipeline_states_.insert(
std::make_pair(pipeline_stored_description.description_hash,
new_pipeline_state));
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
pipelines_.emplace(pipeline_stored_description.description_hash,
new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
if (!creation_threads_.empty()) {
// Submit the pipeline for creation to any available thread.
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state);
creation_queue_.push_back(new_pipeline);
}
creation_request_cond_.notify_one();
} else {
new_pipeline_state->state =
CreateD3D12PipelineState(pipeline_runtime_description);
new_pipeline->state =
CreateD3D12Pipeline(pipeline_runtime_description);
}
++pipeline_states_created;
++pipelines_created;
}
CreateQueuedPipelineStatesOnProcessorThread();
CreateQueuedPipelinesOnProcessorThread();
if (creation_threads_.size() > creation_thread_original_count) {
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_threads_shutdown_from_ = creation_thread_original_count;
// Assuming the queue is empty because of
// CreateQueuedPipelineStatesOnProcessorThread.
// CreateQueuedPipelinesOnProcessorThread.
}
creation_request_cond_.notify_all();
while (creation_threads_.size() > creation_thread_original_count) {
@ -663,26 +651,23 @@ void PipelineCache::InitializeShaderStorage(
}
}
XELOGGPU(
"Created {} graphics pipeline state objects from the storage in {} "
"milliseconds",
pipeline_states_created,
"Created {} graphics pipelines from the storage in {} milliseconds",
pipelines_created,
(xe::Clock::QueryHostTickCount() -
pipeline_state_storage_initialization_start_) *
pipeline_storage_initialization_start_) *
1000 / xe::Clock::QueryHostTickFrequency());
}
}
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_,
pipeline_state_storage_valid_bytes);
xe::filesystem::TruncateStdioFile(pipeline_storage_file_,
pipeline_storage_valid_bytes);
} else {
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0);
pipeline_state_storage_file_header.magic = pipeline_state_storage_magic;
pipeline_state_storage_file_header.magic_api =
pipeline_state_storage_magic_api;
pipeline_state_storage_file_header.version_swapped =
xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0);
pipeline_storage_file_header.magic = pipeline_storage_magic;
pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
pipeline_storage_file_header.version_swapped =
xe::byte_swap(PipelineDescription::kVersion);
fwrite(&pipeline_state_storage_file_header,
sizeof(pipeline_state_storage_file_header), 1,
pipeline_state_storage_file_);
fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_);
}
shader_storage_root_ = storage_root;
@ -690,7 +675,7 @@ void PipelineCache::InitializeShaderStorage(
// Start the storage writing thread.
storage_write_flush_shaders_ = false;
storage_write_flush_pipeline_states_ = false;
storage_write_flush_pipelines_ = false;
storage_write_thread_shutdown_ = false;
storage_write_thread_ =
xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); });
@ -707,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() {
storage_write_thread_.reset();
}
storage_write_shader_queue_.clear();
storage_write_pipeline_state_queue_.clear();
storage_write_pipeline_queue_.clear();
if (pipeline_state_storage_file_) {
fclose(pipeline_state_storage_file_);
pipeline_state_storage_file_ = nullptr;
pipeline_state_storage_file_flush_needed_ = false;
if (pipeline_storage_file_) {
fclose(pipeline_storage_file_);
pipeline_storage_file_ = nullptr;
pipeline_storage_file_flush_needed_ = false;
}
if (shader_storage_file_) {
@ -727,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() {
void PipelineCache::EndSubmission() {
if (shader_storage_file_flush_needed_ ||
pipeline_state_storage_file_flush_needed_) {
pipeline_storage_file_flush_needed_) {
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
if (shader_storage_file_flush_needed_) {
storage_write_flush_shaders_ = true;
}
if (pipeline_state_storage_file_flush_needed_) {
storage_write_flush_pipeline_states_ = true;
if (pipeline_storage_file_flush_needed_) {
storage_write_flush_pipelines_ = true;
}
}
storage_write_request_cond_.notify_one();
shader_storage_file_flush_needed_ = false;
pipeline_state_storage_file_flush_needed_ = false;
pipeline_storage_file_flush_needed_ = false;
}
if (!creation_threads_.empty()) {
CreateQueuedPipelineStatesOnProcessorThread();
// Await creation of all queued pipeline state objects.
CreateQueuedPipelinesOnProcessorThread();
// Await creation of all queued pipelines.
bool await_creation_completion_event;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
// Assuming the creation queue is already empty (because the processor
// thread also worked on creating the leftover pipeline state objects), so
// only check if there are threads with pipeline state objects currently
// being created.
// thread also worked on creating the leftover pipelines), so only check
// if there are threads with pipelines currently being created.
await_creation_completion_event = creation_threads_busy_ != 0;
if (await_creation_completion_event) {
creation_completion_event_->Reset();
@ -764,7 +748,7 @@ void PipelineCache::EndSubmission() {
}
}
bool PipelineCache::IsCreatingPipelineStates() {
bool PipelineCache::IsCreatingPipelines() {
if (creation_threads_.empty()) {
return false;
}
@ -789,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
// again.
D3D12Shader* shader =
new D3D12Shader(shader_type, data_hash, host_address, dword_count);
shaders_.insert({data_hash, shader});
shaders_.emplace(data_hash, shader);
return shader;
}
@ -797,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid()
const {
// If the values this functions returns are changed, INVALIDATE THE SHADER
// STORAGE (increase kVersion for BOTH shaders and pipeline states)! The
// exception is when the function originally returned "unsupported", but
// started to return a valid value (in this case the shader wouldn't be cached
// in the first place). Otherwise games will not be able to locate shaders for
// draws for which the host vertex shader type has changed!
// STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception
// is when the function originally returned "unsupported", but started to
// return a valid value (in this case the shader wouldn't be cached in the
// first place). Otherwise games will not be able to locate shaders for draws
// for which the host vertex shader type has changed!
const auto& regs = register_file_;
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
@ -928,13 +912,12 @@ bool PipelineCache::ConfigurePipeline(
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out,
ID3D12RootSignature** root_signature_out) {
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
assert_not_null(pipeline_state_handle_out);
assert_not_null(pipeline_handle_out);
assert_not_null(root_signature_out);
PipelineRuntimeDescription runtime_description;
@ -945,24 +928,24 @@ bool PipelineCache::ConfigurePipeline(
}
PipelineDescription& description = runtime_description.description;
if (current_pipeline_state_ != nullptr &&
!std::memcmp(&current_pipeline_state_->description.description,
&description, sizeof(description))) {
*pipeline_state_handle_out = current_pipeline_state_;
if (current_pipeline_ != nullptr &&
!std::memcmp(&current_pipeline_->description.description, &description,
sizeof(description))) {
*pipeline_handle_out = current_pipeline_;
*root_signature_out = runtime_description.root_signature;
return true;
}
// Find an existing pipeline state object in the cache.
// Find an existing pipeline in the cache.
uint64_t hash = XXH64(&description, sizeof(description), 0);
auto found_range = pipeline_states_.equal_range(hash);
auto found_range = pipelines_.equal_range(hash);
for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second;
if (!std::memcmp(&found_pipeline_state->description.description,
&description, sizeof(description))) {
current_pipeline_state_ = found_pipeline_state;
*pipeline_state_handle_out = found_pipeline_state;
*root_signature_out = found_pipeline_state->description.root_signature;
Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline->description.description, &description,
sizeof(description))) {
current_pipeline_ = found_pipeline;
*pipeline_handle_out = found_pipeline;
*root_signature_out = found_pipeline->description.root_signature;
return true;
}
}
@ -973,33 +956,32 @@ bool PipelineCache::ConfigurePipeline(
return false;
}
PipelineState* new_pipeline_state = new PipelineState;
new_pipeline_state->state = nullptr;
std::memcpy(&new_pipeline_state->description, &runtime_description,
Pipeline* new_pipeline = new Pipeline;
new_pipeline->state = nullptr;
std::memcpy(&new_pipeline->description, &runtime_description,
sizeof(runtime_description));
pipeline_states_.insert(std::make_pair(hash, new_pipeline_state));
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
pipelines_.emplace(hash, new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
if (!creation_threads_.empty()) {
// Submit the pipeline state object for creation to any available thread.
// Submit the pipeline for creation to any available thread.
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state);
creation_queue_.push_back(new_pipeline);
}
creation_request_cond_.notify_one();
} else {
new_pipeline_state->state = CreateD3D12PipelineState(runtime_description);
new_pipeline->state = CreateD3D12Pipeline(runtime_description);
}
if (pipeline_state_storage_file_) {
if (pipeline_storage_file_) {
assert_not_null(storage_write_thread_);
pipeline_state_storage_file_flush_needed_ = true;
pipeline_storage_file_flush_needed_ = true;
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_pipeline_state_queue_.emplace_back();
storage_write_pipeline_queue_.emplace_back();
PipelineStoredDescription& stored_description =
storage_write_pipeline_state_queue_.back();
storage_write_pipeline_queue_.back();
stored_description.description_hash = hash;
std::memcpy(&stored_description.description, &description,
sizeof(description));
@ -1007,8 +989,8 @@ bool PipelineCache::ConfigurePipeline(
storage_write_request_cond_.notify_all();
}
current_pipeline_state_ = new_pipeline_state;
*pipeline_state_handle_out = new_pipeline_state;
current_pipeline_ = new_pipeline;
*pipeline_handle_out = new_pipeline;
*root_signature_out = runtime_description.root_signature;
return true;
}
@ -1135,8 +1117,8 @@ bool PipelineCache::TranslateShader(
std::memcpy(
texture_binding_layouts_.data() + new_uid.vector_span_offset,
texture_bindings, texture_binding_layout_bytes);
texture_binding_layout_map_.insert(
{texture_binding_layout_hash, new_uid});
texture_binding_layout_map_.emplace(texture_binding_layout_hash,
new_uid);
}
}
if (bindless_sampler_count) {
@ -1178,8 +1160,8 @@ bool PipelineCache::TranslateShader(
vector_bindless_sampler_layout[i] =
sampler_bindings[i].bindless_descriptor_index;
}
bindless_sampler_layout_map_.insert(
{bindless_sampler_layout_hash, new_uid});
bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash,
new_uid);
}
}
}
@ -1507,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription(
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
};
// Like kBlendFactorMap, but with color modes changed to alpha. Some
// pipeline state objects aren't created in Prey because a color mode is
// used for alpha.
// pipelines aren't created in Prey because a color mode is used for alpha.
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
/* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne,
@ -1568,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription(
return true;
}
ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description) {
const PipelineDescription& description = runtime_description.description;
if (runtime_description.pixel_shader != nullptr) {
XELOGGPU(
"Creating graphics pipeline state with VS {:016X}"
", PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else {
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
XELOGGPU("Creating graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash());
}
@ -1892,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
}
}
// Create the pipeline state object.
// Create the D3D12 pipeline state object.
auto device =
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
ID3D12PipelineState* state;
if (FAILED(device->CreateGraphicsPipelineState(&state_desc,
IID_PPV_ARGS(&state)))) {
if (runtime_description.pixel_shader != nullptr) {
XELOGE(
"Failed to create graphics pipeline state with VS {:016X}"
", PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else {
XELOGE("Failed to create graphics pipeline state with VS {:016X}",
XELOGE("Failed to create graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash());
}
return nullptr;
@ -1932,7 +1909,7 @@ void PipelineCache::StorageWriteThread() {
ucode_guest_endian.reserve(0xFFFF);
bool flush_shaders = false;
bool flush_pipeline_states = false;
bool flush_pipelines = false;
while (true) {
if (flush_shaders) {
@ -1940,15 +1917,15 @@ void PipelineCache::StorageWriteThread() {
assert_not_null(shader_storage_file_);
fflush(shader_storage_file_);
}
if (flush_pipeline_states) {
flush_pipeline_states = false;
assert_not_null(pipeline_state_storage_file_);
fflush(pipeline_state_storage_file_);
if (flush_pipelines) {
flush_pipelines = false;
assert_not_null(pipeline_storage_file_);
fflush(pipeline_storage_file_);
}
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
PipelineStoredDescription pipeline_description;
bool write_pipeline_state = false;
bool write_pipeline = false;
{
std::unique_lock<std::mutex> lock(storage_write_request_lock_);
if (storage_write_thread_shutdown_) {
@ -1961,17 +1938,17 @@ void PipelineCache::StorageWriteThread() {
storage_write_flush_shaders_ = false;
flush_shaders = true;
}
if (!storage_write_pipeline_state_queue_.empty()) {
if (!storage_write_pipeline_queue_.empty()) {
std::memcpy(&pipeline_description,
&storage_write_pipeline_state_queue_.front(),
&storage_write_pipeline_queue_.front(),
sizeof(pipeline_description));
storage_write_pipeline_state_queue_.pop_front();
write_pipeline_state = true;
} else if (storage_write_flush_pipeline_states_) {
storage_write_flush_pipeline_states_ = false;
flush_pipeline_states = true;
storage_write_pipeline_queue_.pop_front();
write_pipeline = true;
} else if (storage_write_flush_pipelines_) {
storage_write_flush_pipelines_ = false;
flush_pipelines = true;
}
if (!shader_pair.first && !write_pipeline_state) {
if (!shader_pair.first && !write_pipeline) {
storage_write_request_cond_.wait(lock);
continue;
}
@ -1998,27 +1975,26 @@ void PipelineCache::StorageWriteThread() {
}
}
if (write_pipeline_state) {
assert_not_null(pipeline_state_storage_file_);
if (write_pipeline) {
assert_not_null(pipeline_storage_file_);
fwrite(&pipeline_description, sizeof(pipeline_description), 1,
pipeline_state_storage_file_);
pipeline_storage_file_);
}
}
}
void PipelineCache::CreationThread(size_t thread_index) {
while (true) {
PipelineState* pipeline_state_to_create = nullptr;
Pipeline* pipeline_to_create = nullptr;
// Check if need to shut down or set the completion event and dequeue the
// pipeline state if there is any.
// pipeline if there is any.
{
std::unique_lock<std::mutex> lock(creation_request_lock_);
if (thread_index >= creation_threads_shutdown_from_ ||
creation_queue_.empty()) {
if (creation_completion_set_event_ && creation_threads_busy_ == 0) {
// Last pipeline state object in the queue created - signal the event
// if requested.
// Last pipeline in the queue created - signal the event if requested.
creation_completion_set_event_ = false;
creation_completion_event_->Set();
}
@ -2028,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) {
creation_request_cond_.wait(lock);
continue;
}
// Take the pipeline state from the queue and increment the busy thread
// count until the pipeline state object is created - other threads must
// be able to dequeue requests, but can't set the completion event until
// the pipeline state objects are fully created (rather than just started
// creating).
pipeline_state_to_create = creation_queue_.front();
// Take the pipeline from the queue and increment the busy thread count
// until the pipeline is created - other threads must be able to dequeue
// requests, but can't set the completion event until the pipelines are
// fully created (rather than just started creating).
pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front();
++creation_threads_busy_;
}
// Create the D3D12 pipeline state object.
pipeline_state_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description);
pipeline_to_create->state =
CreateD3D12Pipeline(pipeline_to_create->description);
// Pipeline state object created - the thread is not busy anymore, safe to
// set the completion event if needed (at the next iteration, or in some
// other thread).
// Pipeline created - the thread is not busy anymore, safe to set the
// completion event if needed (at the next iteration, or in some other
// thread).
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
--creation_threads_busy_;
@ -2052,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) {
}
}
void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() {
void PipelineCache::CreateQueuedPipelinesOnProcessorThread() {
assert_false(creation_threads_.empty());
while (true) {
PipelineState* pipeline_state_to_create;
Pipeline* pipeline_to_create;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
if (creation_queue_.empty()) {
break;
}
pipeline_state_to_create = creation_queue_.front();
pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front();
}
pipeline_state_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description);
pipeline_to_create->state =
CreateD3D12Pipeline(pipeline_to_create->description);
}
}

View File

@ -55,7 +55,7 @@ class PipelineCache {
void ShutdownShaderStorage();
void EndSubmission();
bool IsCreatingPipelineStates();
bool IsCreatingPipelines();
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count);
@ -74,14 +74,12 @@ class PipelineCache {
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out,
ID3D12RootSignature** root_signature_out);
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return reinterpret_cast<const PipelineState*>(handle)->state;
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
return reinterpret_cast<const Pipeline*>(handle)->state;
}
private:
@ -238,7 +236,7 @@ class PipelineCache {
const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineRuntimeDescription& runtime_description_out);
ID3D12PipelineState* CreateD3D12PipelineState(
ID3D12PipelineState* CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description);
D3D12CommandProcessor& command_processor_;
@ -286,21 +284,20 @@ class PipelineCache {
// Xenos pixel shader provided.
std::vector<uint8_t> depth_only_pixel_shader_;
struct PipelineState {
struct Pipeline {
// nullptr if creation has failed.
ID3D12PipelineState* state;
PipelineRuntimeDescription description;
};
// All previously generated pipeline state objects identified by hash and the
// description.
std::unordered_multimap<uint64_t, PipelineState*,
// All previously generated pipelines identified by hash and the description.
std::unordered_multimap<uint64_t, Pipeline*,
xe::hash::IdentityHasher<uint64_t>>
pipeline_states_;
pipelines_;
// Previously used pipeline state object. This matches our current state
// settings and allows us to quickly(ish) reuse the pipeline state if no
// registers have changed.
PipelineState* current_pipeline_state_ = nullptr;
// Previously used pipeline. This matches our current state settings and
// allows us to quickly(ish) reuse the pipeline if no registers have been
// changed.
Pipeline* current_pipeline_ = nullptr;
// Currently open shader storage path.
std::filesystem::path shader_storage_root_;
@ -310,10 +307,9 @@ class PipelineCache {
FILE* shader_storage_file_ = nullptr;
bool shader_storage_file_flush_needed_ = false;
// Pipeline state storage output stream, for preload in the next emulator
// runs.
FILE* pipeline_state_storage_file_ = nullptr;
bool pipeline_state_storage_file_flush_needed_ = false;
// Pipeline storage output stream, for preload in the next emulator runs.
FILE* pipeline_storage_file_ = nullptr;
bool pipeline_storage_file_flush_needed_ = false;
// Thread for asynchronous writing to the storage streams.
void StorageWriteThread();
@ -323,28 +319,27 @@ class PipelineCache {
// thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipeline_states_ = false;
bool storage_write_flush_pipelines_ = false;
bool storage_write_thread_shutdown_ = false;
std::unique_ptr<xe::threading::Thread> storage_write_thread_;
// Pipeline state object creation threads.
// Pipeline creation threads.
void CreationThread(size_t thread_index);
void CreateQueuedPipelineStatesOnProcessorThread();
void CreateQueuedPipelinesOnProcessorThread();
std::mutex creation_request_lock_;
std::condition_variable creation_request_cond_;
// Protected with creation_request_lock_, notify_one creation_request_cond_
// when set.
std::deque<PipelineState*> creation_queue_;
// Number of threads that are currently creating a pipeline state object -
// incremented when a pipeline state object is dequeued (the completion event
// can't be triggered before this is zero). Protected with
// creation_request_lock_.
std::deque<Pipeline*> creation_queue_;
// Number of threads that are currently creating a pipeline - incremented when
// a pipeline is dequeued (the completion event can't be triggered before this
// is zero). Protected with creation_request_lock_.
size_t creation_threads_busy_ = 0;
// Manual-reset event set when the last queued pipeline state object is
// created and there are no more pipeline state objects to create. This is
// triggered by the thread creating the last pipeline state object.
// Manual-reset event set when the last queued pipeline is created and there
// are no more pipelines to create. This is triggered by the thread creating
// the last pipeline.
std::unique_ptr<xe::threading::Event> creation_completion_event_;
// Whether setting the event on completion is queued. Protected with
// creation_request_lock_, notify_one creation_request_cond_ when set.

View File

@ -25,15 +25,6 @@ project("xenia-gpu-d3d12-trace-viewer")
kind("WindowedApp")
language("C++")
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -48,6 +39,17 @@ project("xenia-gpu-d3d12-trace-viewer")
"xenia-ui",
"xenia-ui-d3d12",
"xenia-vfs",
})
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash",
})
files({
@ -70,15 +72,6 @@ project("xenia-gpu-d3d12-trace-dump")
kind("ConsoleApp")
language("C++")
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -93,6 +86,17 @@ project("xenia-gpu-d3d12-trace-dump")
"xenia-ui",
"xenia-ui-d3d12",
"xenia-vfs",
})
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash",
})
files({
@ -107,4 +111,4 @@ project("xenia-gpu-d3d12-trace-dump")
"2>&1",
"1>scratch/stdout-trace-dump.txt",
})
end
end

View File

@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// again and again and exit.
if (!conversion_needed || converted_index_count == 0) {
converted_indices.gpu_address = 0;
converted_indices_cache_.insert(
std::make_pair(converted_indices.key.value, converted_indices));
converted_indices_cache_.emplace(converted_indices.key.value,
converted_indices);
memory_regions_used_ |= memory_regions_used_bits;
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
: ConversionResult::kConversionNotNeeded;
@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// Cache and return the indices.
converted_indices.gpu_address = gpu_address;
converted_indices_cache_.insert(
std::make_pair(converted_indices.key.value, converted_indices));
converted_indices_cache_.emplace(converted_indices.key.value,
converted_indices);
memory_regions_used_ |= memory_regions_used_bits;
gpu_address_out = gpu_address;
index_count_out = converted_index_count;

View File

@ -277,20 +277,19 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false;
}
// Create the EDRAM load/store pipeline state objects.
// Create the EDRAM load/store pipelines.
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.load_shader, mode_info.load_shader_size,
edram_load_store_root_signature_);
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.store_shader, mode_info.store_shader_size,
edram_load_store_root_signature_);
if (edram_load_pipelines_[i] == nullptr ||
edram_store_pipelines_[i] == nullptr) {
XELOGE(
"Failed to create the EDRAM load/store pipeline states for mode {}",
i);
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
i);
Shutdown();
return false;
}
@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
}
}
// Create the resolve root signatures and pipeline state objects.
// Create the resolve root signatures and pipelines.
D3D12_ROOT_PARAMETER resolve_root_parameters[3];
// Copying root signature.
@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false;
}
// Copying pipeline state objects.
// Copying pipelines.
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
++i) {
@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
continue;
}
const auto& resolve_copy_shader = resolve_copy_shaders_[i];
ID3D12PipelineState* resolve_copy_pipeline_state =
ui::d3d12::util::CreateComputePipelineState(
ID3D12PipelineState* resolve_copy_pipeline =
ui::d3d12::util::CreateComputePipeline(
device, resolve_copy_shader.first, resolve_copy_shader.second,
resolve_copy_root_signature_);
if (resolve_copy_pipeline_state == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline state",
if (resolve_copy_pipeline == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline",
resolve_copy_shader_info.debug_name);
}
resolve_copy_pipeline_state->SetName(reinterpret_cast<LPCWSTR>(
resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state;
resolve_copy_pipelines_[i] = resolve_copy_pipeline;
}
// Clearing pipeline state objects.
resolve_clear_32bpp_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
device,
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
: resolve_clear_32bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
: sizeof(resolve_clear_32bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_32bpp_pipeline_state_ == nullptr) {
XELOGE("Failed to create the 32bpp resolve clear pipeline state");
// Clearing pipelines.
resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
device,
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
: resolve_clear_32bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
: sizeof(resolve_clear_32bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_32bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 32bpp resolve clear pipeline");
Shutdown();
return false;
}
resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
device,
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
: resolve_clear_64bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
: sizeof(resolve_clear_64bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_64bpp_pipeline_state_ == nullptr) {
XELOGE("Failed to create the 64bpp resolve clear pipeline state");
resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
device,
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
: resolve_clear_64bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
: sizeof(resolve_clear_64bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_64bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 64bpp resolve clear pipeline");
Shutdown();
return false;
}
resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp");
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
if (!edram_rov_used_) {
assert_false(resolution_scale_2x_);
resolve_clear_depth_24_32_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
resolve_clear_depth_24_32_pipeline_ =
ui::d3d12::util::CreateComputePipeline(
device, resolve_clear_depth_24_32_cs,
sizeof(resolve_clear_depth_24_32_cs),
resolve_clear_root_signature_);
if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) {
if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
XELOGE(
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
"state");
Shutdown();
return false;
}
resolve_clear_64bpp_pipeline_state_->SetName(
resolve_clear_64bpp_pipeline_->SetName(
L"Resolve Clear 24-bit & 32-bit Depth");
}
@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() {
edram_snapshot_restore_pool_.reset();
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]);
for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
}
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(copy_shader_constants) / sizeof(uint32_t),
&copy_shader_constants, 0);
}
command_processor_.SetComputePipelineState(
resolve_copy_pipeline_states_[size_t(copy_shader)]);
command_processor_.SetComputePipeline(
resolve_copy_pipelines_[size_t(copy_shader)]);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory,
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(depth_clear_constants) / sizeof(uint32_t),
&depth_clear_constants, 0);
command_processor_.SetComputePipelineState(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_
: resolve_clear_32bpp_pipeline_state_);
command_processor_.SetComputePipeline(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
: resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1);
@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(color_clear_constants) / sizeof(uint32_t),
&color_clear_constants, 0);
}
command_processor_.SetComputePipelineState(
command_processor_.SetComputePipeline(
resolve_info.color_edram_info.format_is_64bpp
? resolve_clear_64bpp_pipeline_state_
: resolve_clear_32bpp_pipeline_state_);
? resolve_clear_64bpp_pipeline_
: resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1);
@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
render_target->footprints, nullptr, nullptr,
&copy_buffer_size);
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
render_targets_.insert(std::make_pair(key.value, render_target));
render_targets_.emplace(key.value, render_target);
COUNT_profile_set("gpu/render_target_cache/render_targets",
render_targets_.size());
#if 0
@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() {
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_.SetComputePipelineState(
edram_store_pipelines_[size_t(mode)]);
command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples.
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_.SetComputePipelineState(
edram_load_pipelines_[size_t(mode)]);
command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples.
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);

View File

@ -237,14 +237,13 @@ class D3D12CommandProcessor;
// get each of the 4 host pixels for each sample.
class RenderTargetCache {
public:
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
// that contradict each other when you use null RTV descriptors - if you set
// a valid format in RTVFormats in the pipeline state, it says that null
// descriptors can only be used if the format in the pipeline state is
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains
// that the format in the pipeline doesn't match the RTV format. So we have to
// make render target bindings consecutive and remap the output indices in
// pixel shaders.
// Direct3D 12 debug layer is giving errors that contradict each other when
// you use null RTV descriptors - if you set a valid format in RTVFormats in
// the pipeline state, it says that null descriptors can only be used if the
// format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if
// DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline
// state doesn't match the RTV format. So we have to make render target
// bindings consecutive and remap the output indices in pixel shaders.
struct PipelineRenderTarget {
uint32_t guest_render_target;
DXGI_FORMAT format;
@ -304,8 +303,7 @@ class RenderTargetCache {
// performance difference, but with EDRAM loads/stores less conversion should
// be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and
// it's probably more accurate.
static inline DXGI_FORMAT GetDepthDXGIFormat(
xenos::DepthRenderTargetFormat format) {
static DXGI_FORMAT GetDepthDXGIFormat(xenos::DepthRenderTargetFormat format) {
return format == xenos::DepthRenderTargetFormat::kD24FS8
? DXGI_FORMAT_D32_FLOAT_S8X24_UINT
: DXGI_FORMAT_D24_UNORM_S8_UINT;
@ -537,7 +535,7 @@ class RenderTargetCache {
// 16: - EDRAM pitch in tiles.
uint32_t base_samples_2x_depth_pitch;
};
// EDRAM pipeline states for the RTV/DSV path.
// EDRAM pipelines for the RTV/DSV path.
static const EdramLoadStoreModeInfo
edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)];
ID3D12PipelineState*
@ -546,20 +544,20 @@ class RenderTargetCache {
ID3D12PipelineState*
edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {};
// Resolve root signatures and pipeline state objects.
// Resolve root signatures and pipelines.
ID3D12RootSignature* resolve_copy_root_signature_ = nullptr;
static const std::pair<const uint8_t*, size_t>
resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
ID3D12PipelineState* resolve_copy_pipeline_states_[size_t(
ID3D12PipelineState* resolve_copy_pipelines_[size_t(
draw_util::ResolveCopyShaderIndex::kCount)] = {};
ID3D12RootSignature* resolve_clear_root_signature_ = nullptr;
// Clearing 32bpp color, depth with ROV, or unorm depth without ROV.
ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr;
// Clearing 64bpp color.
ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr;
// Clearing float depth without ROV, both the float24 and the host float32
// versions.
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr;
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
// Nvidia Maxwell 1st generation and older.

View File

@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) {
return false;
}
// Create the loading pipeline state objects.
// Create the loading pipelines.
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
const LoadModeInfo& mode_info = load_mode_info_[i];
load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState(
load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader, mode_info.shader_size, load_root_signature_);
if (load_pipeline_states_[i] == nullptr) {
XELOGE(
"Failed to create the texture loading pipeline state object for mode "
"{}",
i);
if (load_pipelines_[i] == nullptr) {
XELOGE("Failed to create the texture loading pipeline for mode {}", i);
Shutdown();
return false;
}
if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) {
load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState(
load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader_2x, mode_info.shader_2x_size,
load_root_signature_);
if (load_pipeline_states_2x_[i] == nullptr) {
if (load_pipelines_2x_[i] == nullptr) {
XELOGE(
"Failed to create the 2x-scaled texture loading pipeline state "
"for mode {}",
"Failed to create the 2x-scaled texture loading pipeline for mode "
"{}",
i);
Shutdown();
return false;
@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() {
ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_);
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]);
}
ui::d3d12::util::ReleaseAndNull(load_root_signature_);
@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
if (IsResolutionScale2X() && key.tiled) {
LoadMode load_mode = GetLoadMode(key);
if (load_mode != LoadMode::kUnknown &&
load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) {
load_pipelines_2x_[uint32_t(load_mode)] != nullptr) {
uint32_t base_size = 0, mip_size = 0;
texture_util::GetTextureTotalSize(
key.dimension, key.width, key.height, key.depth, key.format,
@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
}
texture->base_watch_handle = nullptr;
texture->mip_watch_handle = nullptr;
textures_.insert(std::make_pair(map_key, texture));
textures_.emplace(map_key, texture);
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
textures_total_size_ += texture->resource_size;
COUNT_profile_set("gpu/texture_cache/total_size_mb",
@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return false;
}
bool scaled_resolve = texture->key.scaled_resolve ? true : false;
ID3D12PipelineState* pipeline_state =
scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)]
: load_pipeline_states_[uint32_t(load_mode)];
if (pipeline_state == nullptr) {
ID3D12PipelineState* pipeline = scaled_resolve
? load_pipelines_2x_[uint32_t(load_mode)]
: load_pipelines_[uint32_t(load_mode)];
if (pipeline == nullptr) {
return false;
}
const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)];
@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
load_mode_info.srv_bpe_log2);
}
}
command_processor_.SetComputePipelineState(pipeline_state);
command_processor_.SetComputePipeline(pipeline);
command_list.D3DSetComputeRootSignature(load_root_signature_);
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
}
device->CreateShaderResourceView(
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
texture.srv_descriptors.insert({descriptor_key, descriptor_index});
texture.srv_descriptors.emplace(descriptor_key, descriptor_index);
return descriptor_index;
}

View File

@ -106,18 +106,18 @@ class TextureCache {
bool operator!=(const TextureKey& key) const {
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
}
inline uint64_t GetMapKey() const {
uint64_t GetMapKey() const {
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
}
inline void SetMapKey(uint64_t key) {
void SetMapKey(uint64_t key) {
map_key[0] = uint32_t(key);
map_key[1] = uint32_t(key >> 32);
}
inline bool IsInvalid() const {
bool IsInvalid() const {
// Zero base and zero width is enough for a binding to be invalid.
return map_key[0] == 0;
}
inline void MakeInvalid() {
void MakeInvalid() {
// Reset all for a stable hash.
SetMapKey(0);
bucket_key = 0;
@ -222,9 +222,7 @@ class TextureCache {
void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled);
inline bool IsResolutionScale2X() const {
return scaled_resolve_buffer_ != nullptr;
}
bool IsResolutionScale2X() const { return scaled_resolve_buffer_ != nullptr; }
ID3D12Resource* GetScaledResolveBuffer() const {
return scaled_resolve_buffer_;
}
@ -233,7 +231,7 @@ class TextureCache {
uint32_t length_unscaled);
void UseScaledResolveBufferForReading();
void UseScaledResolveBufferForWriting();
inline void MarkScaledResolveBufferUAVWritesCommitNeeded() {
void MarkScaledResolveBufferUAVWritesCommitNeeded() {
if (scaled_resolve_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
scaled_resolve_buffer_uav_writes_commit_needed_ = true;
}
@ -432,7 +430,7 @@ class TextureCache {
// Whether the signed version of the texture has a different representation on
// the host than its unsigned version (for example, if it's a fixed-point
// texture emulated with a larger host pixel format).
static inline bool IsSignedVersionSeparate(xenos::TextureFormat format) {
static bool IsSignedVersionSeparate(xenos::TextureFormat format) {
const HostFormat& host_format = host_formats_[uint32_t(format)];
return host_format.load_mode_snorm != LoadMode::kUnknown &&
host_format.load_mode_snorm != host_format.load_mode;
@ -441,26 +439,24 @@ class TextureCache {
// of block-compressed textures with 4x4-aligned dimensions on PC).
static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width,
uint32_t height);
static inline DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
uint32_t width,
uint32_t height) {
static DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
uint32_t width, uint32_t height) {
const HostFormat& host_format = host_formats_[uint32_t(format)];
return IsDecompressionNeeded(format, width, height)
? host_format.dxgi_format_uncompressed
: host_format.dxgi_format_resource;
}
static inline DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
static DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
return GetDXGIResourceFormat(key.format, key.width, key.height);
}
static inline DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
uint32_t width,
uint32_t height) {
static DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
uint32_t width, uint32_t height) {
const HostFormat& host_format = host_formats_[uint32_t(format)];
return IsDecompressionNeeded(format, width, height)
? host_format.dxgi_format_uncompressed
: host_format.dxgi_format_unorm;
}
static inline DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
return GetDXGIUnormFormat(key.format, key.width, key.height);
}
@ -550,9 +546,9 @@ class TextureCache {
static const LoadModeInfo load_mode_info_[];
ID3D12RootSignature* load_root_signature_ = nullptr;
ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {};
// Load pipeline state objects for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {};
ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
// Load pipelines for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {};
std::unordered_multimap<uint64_t, Texture*> textures_;
uint64_t textures_total_size_ = 0;

View File

@ -40,11 +40,11 @@ project("xenia-hid-demo")
filter("platforms:Linux")
links({
"SDL2",
"vulkan",
"X11",
"xcb",
"X11-xcb",
"vulkan",
"SDL2",
})
filter("platforms:Windows")

View File

@ -542,19 +542,13 @@ dword_result_t NetDll_XNetDnsRelease(dword_t caller, pointer_t<XNDNS> dns) {
}
DECLARE_XAM_EXPORT1(NetDll_XNetDnsRelease, kNetworking, kStub);
SHIM_CALL NetDll_XNetQosServiceLookup_shim(PPCContext* ppc_context,
KernelState* kernel_state) {
uint32_t caller = SHIM_GET_ARG_32(0);
uint32_t zero = SHIM_GET_ARG_32(1);
uint32_t event_handle = SHIM_GET_ARG_32(2);
uint32_t out_ptr = SHIM_GET_ARG_32(3);
XELOGD("NetDll_XNetQosServiceLookup({}, {}, {:08X}, {:08X})", caller, zero,
event_handle, out_ptr);
dword_result_t NetDll_XNetQosServiceLookup(dword_t caller, dword_t zero,
dword_t event_handle,
lpdword_t out_ptr) {
// Non-zero is error.
SHIM_SET_RETURN_32(1);
return 1;
}
DECLARE_XAM_EXPORT1(NetDll_XNetQosServiceLookup, kNetworking, kStub);
dword_result_t NetDll_XNetQosListen(dword_t caller, lpvoid_t id, lpvoid_t data,
dword_t data_size, dword_t r7,
@ -965,9 +959,7 @@ dword_result_t NetDll___WSAFDIsSet(dword_t socket_handle,
DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented);
void RegisterNetExports(xe::cpu::ExportResolver* export_resolver,
KernelState* kernel_state) {
SHIM_SET_MAPPING("xam.xex", NetDll_XNetQosServiceLookup, state);
}
KernelState* kernel_state) {}
} // namespace xam
} // namespace kernel

View File

@ -222,13 +222,23 @@ void KeSetCurrentStackPointers(lpvoid_t stack_ptr,
}
DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented);
dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity) {
dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity,
lpdword_t previous_affinity_ptr) {
// The Xbox 360, according to disassembly of KeSetAffinityThread, unlike
// Windows NT, stores the previous affinity via the pointer provided as an
// argument, not in the return value - the return value is used for the
// result.
if (!affinity) {
return X_STATUS_INVALID_PARAMETER;
}
auto thread = XObject::GetNativeObject<XThread>(kernel_state(), thread_ptr);
if (thread) {
if (previous_affinity_ptr) {
*previous_affinity_ptr = uint32_t(1) << thread->active_cpu();
}
thread->SetAffinity(affinity);
}
return (uint32_t)affinity;
return X_STATUS_SUCCESS;
}
DECLARE_XBOXKRNL_EXPORT1(KeSetAffinityThread, kThreading, kImplemented);

View File

@ -156,11 +156,17 @@ void XThread::set_name(const std::string_view name) {
}
}
uint8_t next_cpu = 0;
uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
static uint8_t next_cpu = 0;
static uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
// NOTE: proc_mask is logical processors, not physical processors or cores.
if (!proc_mask) {
next_cpu = (next_cpu + 1) % 6;
return next_cpu; // is this reasonable?
// TODO(Triang3l): Does the following apply here?
// https://docs.microsoft.com/en-us/windows/win32/dxtecharts/coding-for-multiple-cores
// "On Xbox 360, you must explicitly assign software threads to a particular
// hardware thread by using XSetThreadProcessor. Otherwise, all child
// threads will stay on the same hardware thread as the parent."
}
assert_false(proc_mask & 0xC0);
@ -205,6 +211,7 @@ void XThread::InitializeGuestObject() {
// 0xA88 = APC
// 0x18 = timer
xe::store_and_swap<uint32_t>(p + 0x09C, 0xFDFFD7FF);
// current_cpu is expected to be initialized externally via SetActiveCpu.
xe::store_and_swap<uint32_t>(p + 0x0D0, stack_base_);
xe::store_and_swap<uint64_t>(p + 0x130, Clock::QueryGuestSystemTime());
xe::store_and_swap<uint32_t>(p + 0x144, guest_object() + 0x144);
@ -346,6 +353,12 @@ X_STATUS XThread::Create() {
// Exports use this to get the kernel.
thread_state_->context()->kernel_state = kernel_state_;
uint8_t cpu_index = GetFakeCpuNumber(
static_cast<uint8_t>(creation_params_.creation_flags >> 24));
// Initialize the KTHREAD object.
InitializeGuestObject();
X_KPCR* pcr = memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
pcr->tls_ptr = tls_static_address_;
@ -355,14 +368,11 @@ X_STATUS XThread::Create() {
pcr->stack_base_ptr = stack_base_;
pcr->stack_end_ptr = stack_limit_;
uint8_t proc_mask =
static_cast<uint8_t>(creation_params_.creation_flags >> 24);
pcr->dpc_active = 0; // DPC active bool?
pcr->current_cpu = GetFakeCpuNumber(proc_mask); // Current CPU(?)
pcr->dpc_active = 0; // DPC active bool?
// Initialize the KTHREAD object.
InitializeGuestObject();
// Assign the thread to the logical processor, and also set up the current CPU
// in KPCR and KTHREAD.
SetActiveCpu(cpu_index);
// Always retain when starting - the thread owns itself until exited.
RetainHandle();
@ -415,10 +425,6 @@ X_STATUS XThread::Create() {
return X_STATUS_NO_MEMORY;
}
if (!cvars::ignore_thread_affinities) {
thread_->set_affinity_mask(proc_mask);
}
// Set the thread name based on host ID (for easier debugging).
if (thread_name_.empty()) {
set_name(fmt::format("XThread{:04X}", thread_->system_id()));
@ -700,37 +706,36 @@ void XThread::SetPriority(int32_t increment) {
}
void XThread::SetAffinity(uint32_t affinity) {
// Affinity mask, as in SetThreadAffinityMask.
// Xbox thread IDs:
// 0 - core 0, thread 0 - user
// 1 - core 0, thread 1 - user
// 2 - core 1, thread 0 - sometimes xcontent
// 3 - core 1, thread 1 - user
// 4 - core 2, thread 0 - xaudio
// 5 - core 2, thread 1 - user
// TODO(benvanik): implement better thread distribution.
// NOTE: these are logical processors, not physical processors or cores.
SetActiveCpu(GetFakeCpuNumber(affinity));
}
uint8_t XThread::active_cpu() const {
const X_KPCR& pcr = *memory()->TranslateVirtual<const X_KPCR*>(pcr_address_);
return pcr.current_cpu;
}
void XThread::SetActiveCpu(uint8_t cpu_index) {
// May be called during thread creation - don't skip if current == new.
assert_true(cpu_index < 6);
X_KPCR& pcr = *memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
pcr.current_cpu = cpu_index;
if (is_guest_thread()) {
X_KTHREAD& thread_object =
*memory()->TranslateVirtual<X_KTHREAD*>(guest_object());
thread_object.current_cpu = cpu_index;
}
if (xe::threading::logical_processor_count() < 6) {
XELOGW("Too few processors - scheduling will be wonky");
}
SetActiveCpu(GetFakeCpuNumber(affinity));
affinity_ = affinity;
if (!cvars::ignore_thread_affinities) {
thread_->set_affinity_mask(affinity);
thread_->set_affinity_mask(uint64_t(1) << cpu_index);
}
}
uint32_t XThread::active_cpu() const {
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
return xe::load_and_swap<uint8_t>(pcr + 0x10C);
}
void XThread::SetActiveCpu(uint32_t cpu_index) {
assert_true(cpu_index < 6);
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
xe::store_and_swap<uint8_t>(pcr + 0x10C, cpu_index);
}
bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) {
if (slot * 4 > tls_total_size_) {
return false;

View File

@ -88,7 +88,8 @@ struct X_KTHREAD {
char unk_10[0xAC]; // 0x10
uint8_t suspend_count; // 0xBC
uint8_t unk_BD; // 0xBD
uint16_t unk_BE; // 0xBE
uint8_t unk_BE; // 0xBE
uint8_t current_cpu; // 0xBF
char unk_C0[0x70]; // 0xC0
xe::be<uint64_t> create_time; // 0x130
xe::be<uint64_t> exit_time; // 0x138
@ -165,10 +166,17 @@ class XThread : public XObject, public cpu::Thread {
int32_t priority() const { return priority_; }
int32_t QueryPriority();
void SetPriority(int32_t increment);
uint32_t affinity() const { return affinity_; }
// Xbox thread IDs:
// 0 - core 0, thread 0 - user
// 1 - core 0, thread 1 - user
// 2 - core 1, thread 0 - sometimes xcontent
// 3 - core 1, thread 1 - user
// 4 - core 2, thread 0 - xaudio
// 5 - core 2, thread 1 - user
void SetAffinity(uint32_t affinity);
uint32_t active_cpu() const;
void SetActiveCpu(uint32_t cpu_index);
uint8_t active_cpu() const;
void SetActiveCpu(uint8_t cpu_index);
bool GetTLSValue(uint32_t slot, uint32_t* value_out);
bool SetTLSValue(uint32_t slot, uint32_t value);
@ -220,7 +228,6 @@ class XThread : public XObject, public cpu::Thread {
bool running_ = false;
int32_t priority_ = 0;
uint32_t affinity_ = 0;
xe::global_critical_region global_critical_region_;
std::atomic<uint32_t> irql_ = {0};

View File

@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() {
return false;
}
// Create the pipeline states.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {};
pipeline_state_desc.pRootSignature = root_signature_;
pipeline_state_desc.VS.pShaderBytecode = immediate_vs;
pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_state_desc.PS.pShaderBytecode = immediate_ps;
pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps);
// Create the pipelines.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
pipeline_desc.pRootSignature = root_signature_;
pipeline_desc.VS.pShaderBytecode = immediate_vs;
pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_desc.PS.pShaderBytecode = immediate_ps;
pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps);
D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc =
pipeline_state_desc.BlendState.RenderTarget[0];
pipeline_desc.BlendState.RenderTarget[0];
pipeline_blend_desc.BlendEnable = TRUE;
pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA;
pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED |
D3D12_COLOR_WRITE_ENABLE_GREEN |
D3D12_COLOR_WRITE_ENABLE_BLUE;
pipeline_state_desc.SampleMask = UINT_MAX;
pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE;
pipeline_desc.SampleMask = UINT_MAX;
pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {};
pipeline_input_elements[0].SemanticName = "POSITION";
pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT;
@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM;
pipeline_input_elements[2].AlignedByteOffset =
offsetof(ImmediateVertex, color);
pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_state_desc.InputLayout.NumElements =
pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_desc.InputLayout.NumElements =
UINT(xe::countof(pipeline_input_elements));
pipeline_state_desc.PrimitiveTopologyType =
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pipeline_state_desc.NumRenderTargets = 1;
pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_state_desc.SampleDesc.Count = 1;
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pipeline_desc.NumRenderTargets = 1;
pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) {
&pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) {
XELOGE(
"Failed to create the Direct3D 12 immediate drawer triangle pipeline "
"state");
Shutdown();
return false;
}
pipeline_state_desc.PrimitiveTopologyType =
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) {
&pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) {
XELOGE(
"Failed to create the Direct3D 12 immediate drawer line pipeline "
"state");
@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() {
util::ReleaseAndNull(sampler_heap_);
util::ReleaseAndNull(pipeline_state_line_);
util::ReleaseAndNull(pipeline_state_triangle_);
util::ReleaseAndNull(pipeline_line_);
util::ReleaseAndNull(pipeline_triangle_);
util::ReleaseAndNull(root_signature_);
}
@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
uint32_t(sampler_index)));
}
// Set the primitive type and the pipeline state for it.
// Set the primitive type and the pipeline for it.
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
ID3D12PipelineState* pipeline_state;
ID3D12PipelineState* pipeline;
switch (draw.primitive_type) {
case ImmediatePrimitiveType::kLines:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
pipeline_state = pipeline_state_line_;
pipeline = pipeline_line_;
break;
case ImmediatePrimitiveType::kTriangles:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
pipeline_state = pipeline_state_triangle_;
pipeline = pipeline_triangle_;
break;
default:
assert_unhandled_case(draw.primitive_type);
@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
if (current_primitive_topology_ != primitive_topology) {
current_primitive_topology_ = primitive_topology;
current_command_list_->IASetPrimitiveTopology(primitive_topology);
current_command_list_->SetPipelineState(pipeline_state);
current_command_list_->SetPipelineState(pipeline);
}
// Draw.

View File

@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
kCount
};
ID3D12PipelineState* pipeline_state_triangle_ = nullptr;
ID3D12PipelineState* pipeline_state_line_ = nullptr;
ID3D12PipelineState* pipeline_triangle_ = nullptr;
ID3D12PipelineState* pipeline_line_ = nullptr;
ID3D12DescriptorHeap* sampler_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_;

View File

@ -46,22 +46,22 @@ class D3D12Provider : public GraphicsProvider {
uint32_t GetRTVDescriptorSize() const { return descriptor_size_rtv_; }
uint32_t GetDSVDescriptorSize() const { return descriptor_size_dsv_; }
template <typename T>
inline T OffsetViewDescriptor(T start, uint32_t index) const {
T OffsetViewDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_view_;
return start;
}
template <typename T>
inline T OffsetSamplerDescriptor(T start, uint32_t index) const {
T OffsetSamplerDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_sampler_;
return start;
}
template <typename T>
inline T OffsetRTVDescriptor(T start, uint32_t index) const {
T OffsetRTVDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_rtv_;
return start;
}
template <typename T>
inline T OffsetDSVDescriptor(T start, uint32_t index) const {
T OffsetDSVDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_dsv_;
return start;
}
@ -91,32 +91,30 @@ class D3D12Provider : public GraphicsProvider {
}
// Proxies for Direct3D 12 functions since they are loaded dynamically.
inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
D3D_ROOT_SIGNATURE_VERSION version,
ID3DBlob** blob_out,
ID3DBlob** error_blob_out) const {
HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
D3D_ROOT_SIGNATURE_VERSION version,
ID3DBlob** blob_out,
ID3DBlob** error_blob_out) const {
return pfn_d3d12_serialize_root_signature_(desc, version, blob_out,
error_blob_out);
}
inline HRESULT Disassemble(const void* src_data, size_t src_data_size,
UINT flags, const char* comments,
ID3DBlob** disassembly_out) const {
HRESULT Disassemble(const void* src_data, size_t src_data_size, UINT flags,
const char* comments, ID3DBlob** disassembly_out) const {
if (!pfn_d3d_disassemble_) {
return E_NOINTERFACE;
}
return pfn_d3d_disassemble_(src_data, src_data_size, flags, comments,
disassembly_out);
}
inline HRESULT DxbcConverterCreateInstance(const CLSID& rclsid,
const IID& riid,
void** ppv) const {
HRESULT DxbcConverterCreateInstance(const CLSID& rclsid, const IID& riid,
void** ppv) const {
if (!pfn_dxilconv_dxc_create_instance_) {
return E_NOINTERFACE;
}
return pfn_dxilconv_dxc_create_instance_(rclsid, riid, ppv);
}
inline HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid,
void** ppv) const {
HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid,
void** ppv) const {
if (!pfn_dxcompiler_dxc_create_instance_) {
return E_NOINTERFACE;
}

View File

@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature(
return root_signature;
}
ID3D12PipelineState* CreateComputePipelineState(
ID3D12PipelineState* CreateComputePipeline(
ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12RootSignature* root_signature) {
D3D12_COMPUTE_PIPELINE_STATE_DESC desc;

View File

@ -27,7 +27,7 @@ extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;
template <typename T>
inline bool ReleaseAndNull(T& object) {
bool ReleaseAndNull(T& object) {
if (object != nullptr) {
object->Release();
object = nullptr;
@ -39,9 +39,10 @@ inline bool ReleaseAndNull(T& object) {
ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider,
const D3D12_ROOT_SIGNATURE_DESC& desc);
ID3D12PipelineState* CreateComputePipelineState(
ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12RootSignature* root_signature);
ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device,
const void* shader,
size_t shader_size,
ID3D12RootSignature* root_signature);
constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) {
switch (element_size_bytes_log2) {

View File

@ -18,7 +18,7 @@ project("SDL2")
"SDL2/include",
})
buildoptions({
"/wd4828", -- illegal characters in file
"/wd4828", -- illegal characters in file https://bugzilla.libsdl.org/show_bug.cgi?id=5333
})
files({
-- 1:1 from SDL.vcxproj file

1
third_party/premake-cmake vendored Submodule

@ -0,0 +1 @@
Subproject commit 26fbbb9962aefcb1c24aff1e7952033ce1361190

View File

@ -88,6 +88,16 @@ def main():
sys.exit(return_code)
def print_box(msg):
"""Prints an important message inside a box
"""
print(
'┌{0:─^{2}}╖\n'
'│{1: ^{2}}║\n'
'╘{0:═^{2}}╝\n'
.format('', msg, len(msg) + 2))
def import_vs_environment():
"""Finds the installed Visual Studio version and imports
interesting environment variables into os.environ.
@ -153,6 +163,7 @@ def import_subprocess_environment(args):
os.environ[var.upper()] = setting
break
def has_bin(binary):
"""Checks whether the given binary is present.
@ -372,9 +383,9 @@ def run_platform_premake(cc='clang', devenv=None):
if 'VSVERSION' in os.environ:
vs_version = os.environ['VSVERSION']
return run_premake('windows', 'vs' + vs_version)
return run_premake('windows', devenv or ('vs' + vs_version))
else:
return run_premake('linux', devenv == 'codelite' and devenv or 'gmake2', cc)
return run_premake('linux', devenv or 'gmake2', cc)
def run_premake_export_commands():
@ -408,6 +419,43 @@ def get_build_bin_path(args):
return os.path.join(self_path, 'build', 'bin', platform.capitalize(), args['config'].capitalize())
def create_clion_workspace():
"""Creates some basic workspace information inside the .idea directory for first start.
"""
if os.path.exists('.idea'):
# No first start
return False
print('Generating CLion workspace files...')
# Might become easier in the future: https://youtrack.jetbrains.com/issue/CPP-7911
# Set the location of the CMakeLists.txt
os.mkdir('.idea')
with open(os.path.join('.idea', 'misc.xml'), 'w') as f:
f.write("""<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$/build">
<contentRoot DIR="$PROJECT_DIR$" />
</component>
</project>
""")
# Set available configurations
# TODO Find a way to trigger a cmake reload
with open(os.path.join('.idea', 'workspace.xml'), 'w') as f:
f.write("""<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeSettings">
<configurations>
<configuration PROFILE_NAME="Checked" CONFIG_NAME="Checked" />
<configuration PROFILE_NAME="Debug" CONFIG_NAME="Debug" />
<configuration PROFILE_NAME="Release" CONFIG_NAME="Release" />
</configurations>
</component>
</project>""")
return True
def discover_commands(subparsers):
"""Looks for all commands and returns a dictionary of them.
In the future commands could be discovered on disk.
@ -1444,8 +1492,13 @@ class DevenvCommand(Command):
def execute(self, args, pass_args, cwd):
devenv = None
show_reload_prompt = False
if sys.platform == 'win32':
print('Launching Visual Studio...')
elif has_bin('clion') or has_bin('clion.sh'):
print('Launching CLion...')
show_reload_prompt = create_clion_workspace()
devenv = 'cmake'
else:
print('Launching CodeLite...')
devenv = 'codelite'
@ -1456,11 +1509,23 @@ class DevenvCommand(Command):
print('')
print('- launching devenv...')
if show_reload_prompt:
print_box('Please run "File ⇒ ↺ Reload CMake Project" from inside the IDE!')
if sys.platform == 'win32':
shell_call([
'devenv',
'build\\xenia.sln',
])
elif has_bin('clion'):
shell_call([
'clion',
'.',
])
elif has_bin('clion.sh'):
shell_call([
'clion.sh',
'.',
])
else:
shell_call([
'codelite',