Merge branch 'master' of https://github.com/xenia-project/xenia into canary_new
This commit is contained in:
commit
7abe6312be
|
@ -0,0 +1,10 @@
|
|||
# Ignore HighResolutionTimer custom event
|
||||
handle SIG34 nostop noprint
|
||||
# Ignore PosixTimer custom event
|
||||
handle SIG35 nostop noprint
|
||||
# Ignore PosixThread exit event
|
||||
handle SIG32 nostop noprint
|
||||
# Ignore PosixThread suspend event
|
||||
handle SIG36 nostop noprint
|
||||
# Ignore PosixThread user callback event
|
||||
handle SIG37 nostop noprint
|
|
@ -64,3 +64,6 @@
|
|||
[submodule "third_party/DirectXShaderCompiler"]
|
||||
path = third_party/DirectXShaderCompiler
|
||||
url = https://github.com/microsoft/DirectXShaderCompiler.git
|
||||
[submodule "third_party/premake-cmake"]
|
||||
path = third_party/premake-cmake
|
||||
url = https://github.com/Enhex/premake-cmake.git
|
||||
|
|
|
@ -28,9 +28,9 @@ addons:
|
|||
|
||||
jobs:
|
||||
include:
|
||||
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 LINT=true
|
||||
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Debug
|
||||
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Release
|
||||
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 LINT=true
|
||||
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Debug
|
||||
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Release
|
||||
|
||||
git:
|
||||
# We handle submodules ourselves in xenia-build setup.
|
||||
|
@ -40,8 +40,10 @@ before_script:
|
|||
- export LIBVULKAN_VERSION=1.1.70
|
||||
- export CXX=$CXX_COMPILER
|
||||
- export CC=$C_COMPILER
|
||||
- export AR=$AR_COMPILER
|
||||
# Dump useful info.
|
||||
- $CXX --version
|
||||
- $AR_COMPILER --version
|
||||
- python3 --version
|
||||
- clang-format-9 --version
|
||||
- clang-format-9 -style=file -dump-config
|
||||
|
|
|
@ -91,12 +91,14 @@ Linux support is extremely experimental and presently incomplete.
|
|||
The build script uses LLVM/Clang 9. GCC while it should work in theory, is not easily
|
||||
interchangeable right now.
|
||||
|
||||
[CodeLite](https://codelite.org) is the supported IDE and `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website.
|
||||
Normal building via `xb build` uses Make.
|
||||
* Normal building via `xb build` uses Make.
|
||||
* [CodeLite](https://codelite.org) is supported. `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website.
|
||||
* Experimental CMake generation is available to facilitate use of other IDEs such as [CLion](https://www.jetbrains.com/clion/). If `clion` is available inside `$PATH`, `xb devenv` will start it. Otherwise `build/CMakeLists.txt` needs to be generated by invoking `xb premake --devenv=cmake` manually.
|
||||
|
||||
Clang-9 or newer should be available from system repositories on all up to date distributions.
|
||||
You will also need some development libraries. To get them on an Ubuntu system:
|
||||
```
|
||||
|
||||
```bash
|
||||
sudo apt-get install libgtk-3-dev libpthread-stubs0-dev liblz4-dev libx11-dev libvulkan-dev libsdl2-dev libiberty-dev libunwind-dev libc++-dev libc++abi-dev
|
||||
```
|
||||
|
||||
|
|
50
premake5.lua
50
premake5.lua
|
@ -1,5 +1,6 @@
|
|||
include("tools/build")
|
||||
require("third_party/premake-export-compile-commands/export-compile-commands")
|
||||
require("third_party/premake-cmake/cmake")
|
||||
|
||||
location(build_root)
|
||||
targetdir(build_bin)
|
||||
|
@ -24,6 +25,9 @@ defines({
|
|||
"UNICODE",
|
||||
})
|
||||
|
||||
cppdialect("C++17")
|
||||
symbols("On")
|
||||
|
||||
-- TODO(DrChat): Find a way to disable this on other architectures.
|
||||
if ARCH ~= "ppc64" then
|
||||
filter("architecture:x86_64")
|
||||
|
@ -44,30 +48,29 @@ filter("kind:StaticLib")
|
|||
|
||||
filter("configurations:Checked")
|
||||
runtime("Debug")
|
||||
optimize("Off")
|
||||
defines({
|
||||
"DEBUG",
|
||||
})
|
||||
runtime("Debug")
|
||||
filter({"configurations:Checked", "platforms:Windows"})
|
||||
buildoptions({
|
||||
"/RTCsu", -- Full Run-Time Checks.
|
||||
"/RTCsu", -- Full Run-Time Checks.
|
||||
})
|
||||
filter({"configurations:Checked", "platforms:Linux"})
|
||||
defines({
|
||||
"_GLIBCXX_DEBUG", -- libstdc++ debug mode
|
||||
})
|
||||
|
||||
filter("configurations:Debug")
|
||||
runtime("Debug")
|
||||
runtime("Release")
|
||||
optimize("Off")
|
||||
defines({
|
||||
"DEBUG",
|
||||
"_NO_DEBUG_HEAP=1",
|
||||
})
|
||||
runtime("Release")
|
||||
filter({"configurations:Debug", "platforms:Windows"})
|
||||
linkoptions({
|
||||
"/NODEFAULTLIB:MSVCRTD",
|
||||
})
|
||||
|
||||
filter({"configurations:Debug", "platforms:Linux"})
|
||||
buildoptions({
|
||||
"-g",
|
||||
defines({
|
||||
"_GLIBCXX_DEBUG", -- make dbg symbols work on some distros
|
||||
})
|
||||
|
||||
filter("configurations:Release")
|
||||
|
@ -76,26 +79,18 @@ filter("configurations:Release")
|
|||
"NDEBUG",
|
||||
"_NO_DEBUG_HEAP=1",
|
||||
})
|
||||
optimize("speed")
|
||||
optimize("Speed")
|
||||
inlining("Auto")
|
||||
floatingpoint("Fast")
|
||||
flags({
|
||||
"LinkTimeOptimization",
|
||||
})
|
||||
runtime("Release")
|
||||
filter({"configurations:Release", "platforms:Windows"})
|
||||
linkoptions({
|
||||
"/NODEFAULTLIB:MSVCRTD",
|
||||
})
|
||||
|
||||
filter("platforms:Linux")
|
||||
system("linux")
|
||||
toolset("clang")
|
||||
cppdialect("C++17")
|
||||
buildoptions({
|
||||
-- "-mlzcnt", -- (don't) Assume lzcnt is supported.
|
||||
"`pkg-config --cflags gtk+-x11-3.0`",
|
||||
"-fno-lto", -- Premake doesn't support LTO on clang
|
||||
({os.outputof("pkg-config --cflags gtk+-x11-3.0")})[1],
|
||||
})
|
||||
links({
|
||||
"stdc++fs",
|
||||
|
@ -105,14 +100,13 @@ filter("platforms:Linux")
|
|||
"rt",
|
||||
})
|
||||
linkoptions({
|
||||
"`pkg-config --libs gtk+-3.0`",
|
||||
({os.outputof("pkg-config --libs gtk+-3.0")})[1],
|
||||
})
|
||||
|
||||
filter({"platforms:Linux", "kind:*App"})
|
||||
linkgroups("On")
|
||||
|
||||
filter({"platforms:Linux", "language:C++", "toolset:gcc"})
|
||||
cppdialect("C++17")
|
||||
links({
|
||||
})
|
||||
disablewarnings({
|
||||
|
@ -147,13 +141,11 @@ filter({"platforms:Linux", "language:C++", "toolset:clang", "files:*.cc or *.cpp
|
|||
filter("platforms:Windows")
|
||||
system("windows")
|
||||
toolset("msc")
|
||||
cppdialect("C++17")
|
||||
buildoptions({
|
||||
"/MP", -- Multiprocessor compilation.
|
||||
"/utf-8", -- 'build correctly on systems with non-Latin codepages'.
|
||||
-- Mark warnings as severe
|
||||
"/w14839", -- non-standard use of class 'type' as an argument to a variadic function
|
||||
"/w14840", -- non-portable use of class 'type' as an argument to a variadic function
|
||||
"/w14839", -- non-standard use of class 'type' as an argument to a variadic function
|
||||
"/w14840", -- non-portable use of class 'type' as an argument to a variadic function
|
||||
-- Disable warnings
|
||||
"/wd4100", -- Unreferenced parameters are ok.
|
||||
"/wd4201", -- Nameless struct/unions are ok.
|
||||
|
@ -163,10 +155,10 @@ filter("platforms:Windows")
|
|||
"/wd4189", -- 'local variable is initialized but not referenced'.
|
||||
})
|
||||
flags({
|
||||
"NoMinimalRebuild", -- Required for /MP above.
|
||||
"MultiProcessorCompile", -- Multiprocessor compilation.
|
||||
"NoMinimalRebuild", -- Required for /MP above.
|
||||
})
|
||||
|
||||
symbols("On")
|
||||
defines({
|
||||
"_CRT_NONSTDC_NO_DEPRECATE",
|
||||
"_CRT_SECURE_NO_WARNINGS",
|
||||
|
|
|
@ -71,8 +71,8 @@ std::unique_ptr<EmulatorWindow> EmulatorWindow::Create(Emulator* emulator) {
|
|||
std::unique_ptr<EmulatorWindow> emulator_window(new EmulatorWindow(emulator));
|
||||
|
||||
emulator_window->loop()->PostSynchronous([&emulator_window]() {
|
||||
xe::threading::set_name("Win32 Loop");
|
||||
xe::Profiler::ThreadEnter("Win32 Loop");
|
||||
xe::threading::set_name("Windowing Loop");
|
||||
xe::Profiler::ThreadEnter("Windowing Loop");
|
||||
|
||||
if (!emulator_window->Initialize()) {
|
||||
xe::FatalError("Failed to initialize main window");
|
||||
|
|
|
@ -8,19 +8,6 @@ project("xenia-app")
|
|||
targetname("xenia_canary")
|
||||
language("C++")
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"fmt",
|
||||
"dxbc",
|
||||
"discord-rpc",
|
||||
"glslang-spirv",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"volk",
|
||||
"xenia-app-discord",
|
||||
"xenia-apu",
|
||||
"xenia-apu-nop",
|
||||
|
@ -43,6 +30,21 @@ project("xenia-app")
|
|||
"xenia-ui-vulkan",
|
||||
"xenia-patcher",
|
||||
"xenia-vfs",
|
||||
})
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"fmt",
|
||||
"dxbc",
|
||||
"discord-rpc",
|
||||
"glslang-spirv",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"volk",
|
||||
"xxhash",
|
||||
})
|
||||
defines({
|
||||
|
|
|
@ -302,6 +302,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
|
|||
|
||||
// No available data.
|
||||
if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
|
||||
data->output_buffer_valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -144,7 +144,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
|
|||
WorkerThreadMain();
|
||||
return 0;
|
||||
}));
|
||||
worker_thread_->set_name("XMA Decoder Worker");
|
||||
worker_thread_->set_name("XMA Decoder");
|
||||
worker_thread_->set_can_debugger_suspend(true);
|
||||
worker_thread_->Create();
|
||||
|
||||
|
|
|
@ -9,21 +9,51 @@
|
|||
|
||||
#include "xenia/base/debugging.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <csignal>
|
||||
#include <cstdarg>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
|
||||
#include "xenia/base/string_buffer.h"
|
||||
|
||||
namespace xe {
|
||||
namespace debugging {
|
||||
|
||||
bool IsDebuggerAttached() { return false; }
|
||||
void Break() { raise(SIGTRAP); }
|
||||
bool IsDebuggerAttached() {
|
||||
std::ifstream proc_status_stream("/proc/self/status");
|
||||
if (!proc_status_stream.is_open()) {
|
||||
return false;
|
||||
}
|
||||
std::string line;
|
||||
while (std::getline(proc_status_stream, line)) {
|
||||
std::istringstream line_stream(line);
|
||||
std::string key;
|
||||
line_stream >> key;
|
||||
if (key == "TracerPid:") {
|
||||
uint32_t tracer_pid;
|
||||
line_stream >> tracer_pid;
|
||||
return tracer_pid != 0;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Break() {
|
||||
static std::once_flag flag;
|
||||
std::call_once(flag, []() {
|
||||
// Install handler for sigtrap only once
|
||||
std::signal(SIGTRAP, [](int) {
|
||||
// Forward signal to default handler after being caught
|
||||
std::signal(SIGTRAP, SIG_DFL);
|
||||
});
|
||||
});
|
||||
std::raise(SIGTRAP);
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
void DebugPrint(const char* s) {
|
||||
// TODO: proper implementation.
|
||||
}
|
||||
void DebugPrint(const char* s) { std::clog << s << std::endl; }
|
||||
} // namespace internal
|
||||
|
||||
} // namespace debugging
|
||||
|
|
|
@ -93,7 +93,7 @@ class Logger {
|
|||
|
||||
write_thread_ =
|
||||
xe::threading::Thread::Create({}, [this]() { WriteThread(); });
|
||||
write_thread_->set_name("xe::FileLogSink Writer");
|
||||
write_thread_->set_name("Logging Writer");
|
||||
}
|
||||
|
||||
~Logger() {
|
||||
|
|
|
@ -76,14 +76,12 @@
|
|||
#endif // XE_PLATFORM_MAC
|
||||
|
||||
#if XE_COMPILER_MSVC
|
||||
#define XEPACKEDSTRUCT(name, value) \
|
||||
__pragma(pack(push, 1)) struct name##_s value __pragma(pack(pop)); \
|
||||
typedef struct name##_s name;
|
||||
#define XEPACKEDSTRUCT(name, value) \
|
||||
__pragma(pack(push, 1)) struct name value __pragma(pack(pop));
|
||||
#define XEPACKEDSTRUCTANONYMOUS(value) \
|
||||
__pragma(pack(push, 1)) struct value __pragma(pack(pop));
|
||||
#define XEPACKEDUNION(name, value) \
|
||||
__pragma(pack(push, 1)) union name##_s value __pragma(pack(pop)); \
|
||||
typedef union name##_s name;
|
||||
#define XEPACKEDUNION(name, value) \
|
||||
__pragma(pack(push, 1)) union name value __pragma(pack(pop));
|
||||
#else
|
||||
#define XEPACKEDSTRUCT(name, value) struct __attribute__((packed)) name value;
|
||||
#define XEPACKEDSTRUCTANONYMOUS(value) struct __attribute__((packed)) value;
|
||||
|
|
|
@ -10,11 +10,15 @@
|
|||
#ifndef XENIA_BASE_STRING_UTIL_H_
|
||||
#define XENIA_BASE_STRING_UTIL_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <charconv>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/base/vec128.h"
|
||||
|
@ -30,6 +34,40 @@
|
|||
namespace xe {
|
||||
namespace string_util {
|
||||
|
||||
inline size_t copy_truncating(char* dest, const std::string_view source,
|
||||
size_t dest_buffer_count) {
|
||||
if (!dest_buffer_count) {
|
||||
return 0;
|
||||
}
|
||||
size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1));
|
||||
std::memcpy(dest, source.data(), chars_copied);
|
||||
dest[chars_copied] = '\0';
|
||||
return chars_copied;
|
||||
}
|
||||
|
||||
inline size_t copy_truncating(char16_t* dest, const std::u16string_view source,
|
||||
size_t dest_buffer_count) {
|
||||
if (!dest_buffer_count) {
|
||||
return 0;
|
||||
}
|
||||
size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1));
|
||||
std::memcpy(dest, source.data(), chars_copied * sizeof(char16_t));
|
||||
dest[chars_copied] = u'\0';
|
||||
return chars_copied;
|
||||
}
|
||||
|
||||
inline size_t copy_and_swap_truncating(char16_t* dest,
|
||||
const std::u16string_view source,
|
||||
size_t dest_buffer_count) {
|
||||
if (!dest_buffer_count) {
|
||||
return 0;
|
||||
}
|
||||
size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1));
|
||||
xe::copy_and_swap(dest, source.data(), chars_copied);
|
||||
dest[chars_copied] = u'\0';
|
||||
return chars_copied;
|
||||
}
|
||||
|
||||
inline std::string to_hex_string(uint32_t value) {
|
||||
return fmt::format("{:08X}", value);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,967 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "xenia/base/threading.h"
|
||||
|
||||
#include "third_party/catch/include/catch.hpp"
|
||||
|
||||
namespace xe {
|
||||
namespace base {
|
||||
namespace test {
|
||||
using namespace threading;
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
TEST_CASE("Fence") {
|
||||
std::unique_ptr<threading::Fence> pFence;
|
||||
std::unique_ptr<threading::HighResolutionTimer> pTimer;
|
||||
|
||||
// Signal without wait
|
||||
pFence = std::make_unique<threading::Fence>();
|
||||
pFence->Signal();
|
||||
|
||||
// Signal once and wait
|
||||
pFence = std::make_unique<threading::Fence>();
|
||||
pFence->Signal();
|
||||
pFence->Wait();
|
||||
|
||||
// Signal twice and wait
|
||||
pFence = std::make_unique<threading::Fence>();
|
||||
pFence->Signal();
|
||||
pFence->Signal();
|
||||
pFence->Wait();
|
||||
|
||||
// Signal and wait two times
|
||||
pFence = std::make_unique<threading::Fence>();
|
||||
pFence->Signal();
|
||||
pFence->Wait();
|
||||
pFence->Signal();
|
||||
pFence->Wait();
|
||||
|
||||
// Test to synchronize multiple threads
|
||||
std::atomic<int> started(0);
|
||||
std::atomic<int> finished(0);
|
||||
pFence = std::make_unique<threading::Fence>();
|
||||
auto func = [&pFence, &started, &finished] {
|
||||
started.fetch_add(1);
|
||||
pFence->Wait();
|
||||
finished.fetch_add(1);
|
||||
};
|
||||
|
||||
auto threads = std::array<std::thread, 5>({
|
||||
std::thread(func),
|
||||
std::thread(func),
|
||||
std::thread(func),
|
||||
std::thread(func),
|
||||
std::thread(func),
|
||||
});
|
||||
|
||||
Sleep(100ms);
|
||||
REQUIRE(started.load() == threads.size());
|
||||
REQUIRE(finished.load() == 0);
|
||||
|
||||
pFence->Signal();
|
||||
|
||||
for (auto& t : threads) t.join();
|
||||
REQUIRE(finished.load() == threads.size());
|
||||
} // namespace test
|
||||
|
||||
TEST_CASE("Get number of logical processors") {
|
||||
auto count = std::thread::hardware_concurrency();
|
||||
REQUIRE(logical_processor_count() == count);
|
||||
REQUIRE(logical_processor_count() == count);
|
||||
REQUIRE(logical_processor_count() == count);
|
||||
}
|
||||
|
||||
TEST_CASE("Enable process to set thread affinity") {
|
||||
EnableAffinityConfiguration();
|
||||
}
|
||||
|
||||
TEST_CASE("Yield Current Thread", "MaybeYield") {
|
||||
// Run to see if there are any errors
|
||||
MaybeYield();
|
||||
}
|
||||
|
||||
TEST_CASE("Sync with Memory Barrier", "SyncMemory") {
|
||||
// Run to see if there are any errors
|
||||
SyncMemory();
|
||||
}
|
||||
|
||||
TEST_CASE("Sleep Current Thread", "Sleep") {
|
||||
auto wait_time = 50ms;
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
Sleep(wait_time);
|
||||
auto duration = std::chrono::steady_clock::now() - start;
|
||||
REQUIRE(duration >= wait_time);
|
||||
}
|
||||
|
||||
TEST_CASE("Sleep Current Thread in Alertable State", "Sleep") {
|
||||
auto wait_time = 50ms;
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
auto result = threading::AlertableSleep(wait_time);
|
||||
auto duration = std::chrono::steady_clock::now() - start;
|
||||
REQUIRE(duration >= wait_time);
|
||||
REQUIRE(result == threading::SleepResult::kSuccess);
|
||||
|
||||
// TODO(bwrsandman): Test a Thread to return kAlerted.
|
||||
// Need callback to call extended I/O function (ReadFileEx or WriteFileEx)
|
||||
}
|
||||
|
||||
TEST_CASE("TlsHandle") {
|
||||
// Test Allocate
|
||||
auto handle = threading::AllocateTlsHandle();
|
||||
|
||||
// Test Free
|
||||
REQUIRE(threading::FreeTlsHandle(handle));
|
||||
REQUIRE(!threading::FreeTlsHandle(handle));
|
||||
REQUIRE(!threading::FreeTlsHandle(threading::kInvalidTlsHandle));
|
||||
|
||||
// Test setting values
|
||||
handle = threading::AllocateTlsHandle();
|
||||
REQUIRE(threading::GetTlsValue(handle) == 0);
|
||||
uint32_t value = 0xDEADBEEF;
|
||||
threading::SetTlsValue(handle, reinterpret_cast<uintptr_t>(&value));
|
||||
auto p_received_value = threading::GetTlsValue(handle);
|
||||
REQUIRE(threading::GetTlsValue(handle) != 0);
|
||||
auto received_value = *reinterpret_cast<uint32_t*>(p_received_value);
|
||||
REQUIRE(received_value == value);
|
||||
|
||||
uintptr_t non_thread_local_value = 0;
|
||||
auto thread = Thread::Create({}, [&non_thread_local_value, &handle] {
|
||||
non_thread_local_value = threading::GetTlsValue(handle);
|
||||
});
|
||||
|
||||
auto result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(non_thread_local_value == 0);
|
||||
|
||||
// Cleanup
|
||||
REQUIRE(threading::FreeTlsHandle(handle));
|
||||
}
|
||||
|
||||
TEST_CASE("HighResolutionTimer") {
|
||||
// The wait time is 500ms with an interval of 50ms
|
||||
// Smaller values are not as precise and fail the test
|
||||
const auto wait_time = 500ms;
|
||||
|
||||
// Time the actual sleep duration
|
||||
{
|
||||
const auto interval = 50ms;
|
||||
std::atomic<uint64_t> counter;
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
auto cb = [&counter] { ++counter; };
|
||||
auto pTimer = HighResolutionTimer::CreateRepeating(interval, cb);
|
||||
Sleep(wait_time);
|
||||
pTimer.reset();
|
||||
auto duration = std::chrono::steady_clock::now() - start;
|
||||
|
||||
// Should have run as many times as wait_time / timer_interval plus or
|
||||
// minus 1 due to imprecision of Sleep
|
||||
REQUIRE(duration.count() >= wait_time.count());
|
||||
auto ratio = static_cast<uint64_t>(duration / interval);
|
||||
REQUIRE(counter >= ratio - 1);
|
||||
REQUIRE(counter <= ratio + 1);
|
||||
}
|
||||
|
||||
// Test concurrent timers
|
||||
{
|
||||
const auto interval1 = 100ms;
|
||||
const auto interval2 = 200ms;
|
||||
std::atomic<uint64_t> counter1;
|
||||
std::atomic<uint64_t> counter2;
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
auto cb1 = [&counter1] { ++counter1; };
|
||||
auto cb2 = [&counter2] { ++counter2; };
|
||||
auto pTimer1 = HighResolutionTimer::CreateRepeating(interval1, cb1);
|
||||
auto pTimer2 = HighResolutionTimer::CreateRepeating(interval2, cb2);
|
||||
Sleep(wait_time);
|
||||
pTimer1.reset();
|
||||
pTimer2.reset();
|
||||
auto duration = std::chrono::steady_clock::now() - start;
|
||||
|
||||
// Should have run as many times as wait_time / timer_interval plus or
|
||||
// minus 1 due to imprecision of Sleep
|
||||
REQUIRE(duration.count() >= wait_time.count());
|
||||
auto ratio1 = static_cast<uint64_t>(duration / interval1);
|
||||
auto ratio2 = static_cast<uint64_t>(duration / interval2);
|
||||
REQUIRE(counter1 >= ratio1 - 1);
|
||||
REQUIRE(counter1 <= ratio1 + 1);
|
||||
REQUIRE(counter2 >= ratio2 - 1);
|
||||
REQUIRE(counter2 <= ratio2 + 1);
|
||||
}
|
||||
|
||||
// TODO(bwrsandman): Check on which thread callbacks are executed when
|
||||
// spawned from differing threads
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Multiple Handles", "Wait") {
|
||||
auto mutant = Mutant::Create(true);
|
||||
auto semaphore = Semaphore::Create(10, 10);
|
||||
auto event_ = Event::CreateManualResetEvent(false);
|
||||
auto thread = Thread::Create({}, [&mutant, &semaphore, &event_] {
|
||||
event_->Set();
|
||||
Wait(mutant.get(), false, 25ms);
|
||||
semaphore->Release(1, nullptr);
|
||||
Wait(mutant.get(), false, 25ms);
|
||||
mutant->Release();
|
||||
});
|
||||
|
||||
std::vector<WaitHandle*> handles = {
|
||||
mutant.get(),
|
||||
semaphore.get(),
|
||||
event_.get(),
|
||||
thread.get(),
|
||||
};
|
||||
|
||||
auto any_result = WaitAny(handles, false, 100ms);
|
||||
REQUIRE(any_result.first == WaitResult::kSuccess);
|
||||
REQUIRE(any_result.second == 0);
|
||||
|
||||
auto all_result = WaitAll(handles, false, 100ms);
|
||||
REQUIRE(all_result == WaitResult::kSuccess);
|
||||
}
|
||||
|
||||
TEST_CASE("Signal and Wait") {
|
||||
WaitResult result;
|
||||
auto mutant = Mutant::Create(true);
|
||||
auto event_ = Event::CreateAutoResetEvent(false);
|
||||
auto thread = Thread::Create({}, [&mutant, &event_] {
|
||||
Wait(mutant.get(), false);
|
||||
event_->Set();
|
||||
});
|
||||
result = Wait(event_.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
result = SignalAndWait(mutant.get(), event_.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Event", "Event") {
|
||||
auto evt = Event::CreateAutoResetEvent(false);
|
||||
WaitResult result;
|
||||
|
||||
// Call wait on unset Event
|
||||
result = Wait(evt.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
|
||||
// Call wait on set Event
|
||||
evt->Set();
|
||||
result = Wait(evt.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
|
||||
// Call wait on now consumed Event
|
||||
result = Wait(evt.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
}
|
||||
|
||||
TEST_CASE("Reset Event", "Event") {
|
||||
auto evt = Event::CreateAutoResetEvent(false);
|
||||
WaitResult result;
|
||||
|
||||
// Call wait on reset Event
|
||||
evt->Set();
|
||||
evt->Reset();
|
||||
result = Wait(evt.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
|
||||
// Test resetting the unset event
|
||||
evt->Reset();
|
||||
result = Wait(evt.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
|
||||
// Test setting the reset event
|
||||
evt->Set();
|
||||
result = Wait(evt.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Multiple Events", "Event") {
|
||||
auto events = std::array<std::unique_ptr<Event>, 4>{
|
||||
Event::CreateAutoResetEvent(false),
|
||||
Event::CreateAutoResetEvent(false),
|
||||
Event::CreateAutoResetEvent(false),
|
||||
Event::CreateManualResetEvent(false),
|
||||
};
|
||||
|
||||
std::array<char, 8> order = {0};
|
||||
std::atomic_uint index(0);
|
||||
auto sign_in = [&order, &index](uint32_t id) {
|
||||
auto i = index.fetch_add(1, std::memory_order::memory_order_relaxed);
|
||||
order[i] = static_cast<char>('0' + id);
|
||||
};
|
||||
|
||||
auto threads = std::array<std::thread, 4>{
|
||||
std::thread([&events, &sign_in] {
|
||||
auto res = WaitAll({events[1].get(), events[3].get()}, false, 100ms);
|
||||
if (res == WaitResult::kSuccess) {
|
||||
sign_in(1);
|
||||
}
|
||||
}),
|
||||
std::thread([&events, &sign_in] {
|
||||
auto res = WaitAny({events[0].get(), events[2].get()}, false, 100ms);
|
||||
if (res.first == WaitResult::kSuccess) {
|
||||
sign_in(2);
|
||||
}
|
||||
}),
|
||||
std::thread([&events, &sign_in] {
|
||||
auto res = WaitAll({events[0].get(), events[2].get(), events[3].get()},
|
||||
false, 100ms);
|
||||
if (res == WaitResult::kSuccess) {
|
||||
sign_in(3);
|
||||
}
|
||||
}),
|
||||
std::thread([&events, &sign_in] {
|
||||
auto res = WaitAny({events[1].get(), events[3].get()}, false, 100ms);
|
||||
if (res.first == WaitResult::kSuccess) {
|
||||
sign_in(4);
|
||||
}
|
||||
}),
|
||||
};
|
||||
|
||||
Sleep(10ms);
|
||||
events[3]->Set(); // Signals thread id=4 and stays on for 1 and 3
|
||||
Sleep(10ms);
|
||||
events[1]->Set(); // Signals thread id=1
|
||||
Sleep(10ms);
|
||||
events[0]->Set(); // Signals thread id=2
|
||||
Sleep(10ms);
|
||||
events[2]->Set(); // Partial signals thread id=3
|
||||
events[0]->Set(); // Signals thread id=3
|
||||
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
|
||||
INFO(order.data());
|
||||
REQUIRE(order[0] == '4');
|
||||
// TODO(bwrsandman): Order is not always maintained on linux
|
||||
// REQUIRE(order[1] == '1');
|
||||
// REQUIRE(order[2] == '2');
|
||||
// REQUIRE(order[3] == '3');
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Semaphore", "Semaphore") {
|
||||
WaitResult result;
|
||||
std::unique_ptr<Semaphore> sem;
|
||||
int previous_count = 0;
|
||||
|
||||
// Wait on semaphore with no room
|
||||
sem = Semaphore::Create(0, 5);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
|
||||
// Add room in semaphore
|
||||
REQUIRE(sem->Release(2, &previous_count));
|
||||
REQUIRE(previous_count == 0);
|
||||
REQUIRE(sem->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 2);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(sem->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 2);
|
||||
|
||||
// Set semaphore over maximum_count
|
||||
sem = Semaphore::Create(5, 5);
|
||||
previous_count = -1;
|
||||
REQUIRE_FALSE(sem->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == -1);
|
||||
REQUIRE_FALSE(sem->Release(10, &previous_count));
|
||||
REQUIRE(previous_count == -1);
|
||||
sem = Semaphore::Create(0, 5);
|
||||
REQUIRE_FALSE(sem->Release(10, &previous_count));
|
||||
REQUIRE(previous_count == -1);
|
||||
REQUIRE_FALSE(sem->Release(10, &previous_count));
|
||||
REQUIRE(previous_count == -1);
|
||||
|
||||
// Test invalid Release parameters
|
||||
REQUIRE_FALSE(sem->Release(0, &previous_count));
|
||||
REQUIRE(previous_count == -1);
|
||||
REQUIRE_FALSE(sem->Release(-1, &previous_count));
|
||||
REQUIRE(previous_count == -1);
|
||||
|
||||
// Wait on fully available semaphore
|
||||
sem = Semaphore::Create(5, 5);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
|
||||
// Semaphore between threads
|
||||
sem = Semaphore::Create(5, 5);
|
||||
Sleep(10ms);
|
||||
// Occupy the semaphore with 5 threads
|
||||
auto func = [&sem] {
|
||||
auto res = Wait(sem.get(), false, 100ms);
|
||||
Sleep(500ms);
|
||||
if (res == WaitResult::kSuccess) {
|
||||
sem->Release(1, nullptr);
|
||||
}
|
||||
};
|
||||
auto threads = std::array<std::thread, 5>{
|
||||
std::thread(func), std::thread(func), std::thread(func),
|
||||
std::thread(func), std::thread(func),
|
||||
};
|
||||
// Give threads time to acquire semaphore
|
||||
Sleep(10ms);
|
||||
// Attempt to acquire full semaphore with current (6th) thread
|
||||
result = Wait(sem.get(), false, 20ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
// Give threads time to release semaphore
|
||||
for (auto& t : threads) {
|
||||
t.join();
|
||||
}
|
||||
result = Wait(sem.get(), false, 10ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
sem->Release(1, &previous_count);
|
||||
REQUIRE(previous_count == 4);
|
||||
|
||||
// Test invalid construction parameters
|
||||
// These are invalid according to documentation
|
||||
// TODO(bwrsandman): Many of these invalid invocations succeed
|
||||
sem = Semaphore::Create(-1, 5);
|
||||
// REQUIRE(sem.get() == nullptr);
|
||||
sem = Semaphore::Create(10, 5);
|
||||
// REQUIRE(sem.get() == nullptr);
|
||||
sem = Semaphore::Create(0, 0);
|
||||
// REQUIRE(sem.get() == nullptr);
|
||||
sem = Semaphore::Create(0, -1);
|
||||
// REQUIRE(sem.get() == nullptr);
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Multiple Semaphores", "Semaphore") {
|
||||
WaitResult all_result;
|
||||
std::pair<WaitResult, size_t> any_result;
|
||||
int previous_count;
|
||||
std::unique_ptr<Semaphore> sem0, sem1;
|
||||
|
||||
// Test Wait all which should fail
|
||||
sem0 = Semaphore::Create(0, 5);
|
||||
sem1 = Semaphore::Create(5, 5);
|
||||
all_result = WaitAll({sem0.get(), sem1.get()}, false, 10ms);
|
||||
REQUIRE(all_result == WaitResult::kTimeout);
|
||||
previous_count = -1;
|
||||
REQUIRE(sem0->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 0);
|
||||
previous_count = -1;
|
||||
REQUIRE_FALSE(sem1->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == -1);
|
||||
|
||||
// Test Wait all again which should succeed
|
||||
sem0 = Semaphore::Create(1, 5);
|
||||
sem1 = Semaphore::Create(5, 5);
|
||||
all_result = WaitAll({sem0.get(), sem1.get()}, false, 10ms);
|
||||
REQUIRE(all_result == WaitResult::kSuccess);
|
||||
previous_count = -1;
|
||||
REQUIRE(sem0->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 0);
|
||||
previous_count = -1;
|
||||
REQUIRE(sem1->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 4);
|
||||
|
||||
// Test Wait Any which should fail
|
||||
sem0 = Semaphore::Create(0, 5);
|
||||
sem1 = Semaphore::Create(0, 5);
|
||||
any_result = WaitAny({sem0.get(), sem1.get()}, false, 10ms);
|
||||
REQUIRE(any_result.first == WaitResult::kTimeout);
|
||||
REQUIRE(any_result.second == 0);
|
||||
previous_count = -1;
|
||||
REQUIRE(sem0->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 0);
|
||||
previous_count = -1;
|
||||
REQUIRE(sem1->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 0);
|
||||
|
||||
// Test Wait Any which should succeed
|
||||
sem0 = Semaphore::Create(0, 5);
|
||||
sem1 = Semaphore::Create(5, 5);
|
||||
any_result = WaitAny({sem0.get(), sem1.get()}, false, 10ms);
|
||||
REQUIRE(any_result.first == WaitResult::kSuccess);
|
||||
REQUIRE(any_result.second == 1);
|
||||
previous_count = -1;
|
||||
REQUIRE(sem0->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 0);
|
||||
previous_count = -1;
|
||||
REQUIRE(sem1->Release(1, &previous_count));
|
||||
REQUIRE(previous_count == 4);
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Mutant", "Mutant") {
|
||||
WaitResult result;
|
||||
std::unique_ptr<Mutant> mut;
|
||||
|
||||
// Release on initially owned mutant
|
||||
mut = Mutant::Create(true);
|
||||
REQUIRE(mut->Release());
|
||||
REQUIRE_FALSE(mut->Release());
|
||||
|
||||
// Release on initially not-owned mutant
|
||||
mut = Mutant::Create(false);
|
||||
REQUIRE_FALSE(mut->Release());
|
||||
|
||||
// Wait on initially owned mutant
|
||||
mut = Mutant::Create(true);
|
||||
result = Wait(mut.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(mut->Release());
|
||||
REQUIRE(mut->Release());
|
||||
REQUIRE_FALSE(mut->Release());
|
||||
|
||||
// Wait on initially not owned mutant
|
||||
mut = Mutant::Create(false);
|
||||
result = Wait(mut.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(mut->Release());
|
||||
REQUIRE_FALSE(mut->Release());
|
||||
|
||||
// Multiple waits (or locks)
|
||||
mut = Mutant::Create(false);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
result = Wait(mut.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
REQUIRE(mut->Release());
|
||||
}
|
||||
REQUIRE_FALSE(mut->Release());
|
||||
|
||||
// Test mutants on other threads
|
||||
auto thread1 = std::thread([&mut] {
|
||||
Sleep(5ms);
|
||||
mut = Mutant::Create(true);
|
||||
Sleep(100ms);
|
||||
mut->Release();
|
||||
});
|
||||
Sleep(10ms);
|
||||
REQUIRE_FALSE(mut->Release());
|
||||
Sleep(10ms);
|
||||
result = Wait(mut.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
thread1.join();
|
||||
result = Wait(mut.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(mut->Release());
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Multiple Mutants", "Mutant") {
|
||||
WaitResult all_result;
|
||||
std::pair<WaitResult, size_t> any_result;
|
||||
std::unique_ptr<Mutant> mut0, mut1;
|
||||
|
||||
// Test which should fail for WaitAll and WaitAny
|
||||
auto thread0 = std::thread([&mut0, &mut1] {
|
||||
mut0 = Mutant::Create(true);
|
||||
mut1 = Mutant::Create(true);
|
||||
Sleep(50ms);
|
||||
mut0->Release();
|
||||
mut1->Release();
|
||||
});
|
||||
Sleep(10ms);
|
||||
all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms);
|
||||
REQUIRE(all_result == WaitResult::kTimeout);
|
||||
REQUIRE_FALSE(mut0->Release());
|
||||
REQUIRE_FALSE(mut1->Release());
|
||||
any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms);
|
||||
REQUIRE(any_result.first == WaitResult::kTimeout);
|
||||
REQUIRE(any_result.second == 0);
|
||||
REQUIRE_FALSE(mut0->Release());
|
||||
REQUIRE_FALSE(mut1->Release());
|
||||
thread0.join();
|
||||
|
||||
// Test which should fail for WaitAll but not WaitAny
|
||||
auto thread1 = std::thread([&mut0, &mut1] {
|
||||
mut0 = Mutant::Create(true);
|
||||
mut1 = Mutant::Create(false);
|
||||
Sleep(50ms);
|
||||
mut0->Release();
|
||||
});
|
||||
Sleep(10ms);
|
||||
all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms);
|
||||
REQUIRE(all_result == WaitResult::kTimeout);
|
||||
REQUIRE_FALSE(mut0->Release());
|
||||
REQUIRE_FALSE(mut1->Release());
|
||||
any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms);
|
||||
REQUIRE(any_result.first == WaitResult::kSuccess);
|
||||
REQUIRE(any_result.second == 1);
|
||||
REQUIRE_FALSE(mut0->Release());
|
||||
REQUIRE(mut1->Release());
|
||||
thread1.join();
|
||||
|
||||
// Test which should pass for WaitAll and WaitAny
|
||||
auto thread2 = std::thread([&mut0, &mut1] {
|
||||
mut0 = Mutant::Create(false);
|
||||
mut1 = Mutant::Create(false);
|
||||
Sleep(50ms);
|
||||
});
|
||||
Sleep(10ms);
|
||||
all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms);
|
||||
REQUIRE(all_result == WaitResult::kSuccess);
|
||||
REQUIRE(mut0->Release());
|
||||
REQUIRE(mut1->Release());
|
||||
any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms);
|
||||
REQUIRE(any_result.first == WaitResult::kSuccess);
|
||||
REQUIRE(any_result.second == 0);
|
||||
REQUIRE(mut0->Release());
|
||||
REQUIRE_FALSE(mut1->Release());
|
||||
thread2.join();
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Timer", "Timer") {
|
||||
WaitResult result;
|
||||
std::unique_ptr<Timer> timer;
|
||||
|
||||
// Test Manual Reset
|
||||
timer = Timer::CreateManualResetTimer();
|
||||
result = Wait(timer.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
REQUIRE(timer->SetOnce(1ms)); // Signals it
|
||||
result = Wait(timer.get(), false, 2ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(timer.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kSuccess); // Did not reset
|
||||
|
||||
// Test Synchronization
|
||||
timer = Timer::CreateSynchronizationTimer();
|
||||
result = Wait(timer.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
REQUIRE(timer->SetOnce(1ms)); // Signals it
|
||||
result = Wait(timer.get(), false, 2ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
result = Wait(timer.get(), false, 1ms);
|
||||
REQUIRE(result == WaitResult::kTimeout); // Did reset
|
||||
|
||||
// TODO(bwrsandman): This test unexpectedly fails under windows
|
||||
// Test long due time
|
||||
// timer = Timer::CreateSynchronizationTimer();
|
||||
// REQUIRE(timer->SetOnce(10s));
|
||||
// result = Wait(timer.get(), false, 10ms); // Still signals under windows
|
||||
// REQUIRE(result == WaitResult::kTimeout);
|
||||
|
||||
// Test Repeating
|
||||
REQUIRE(timer->SetRepeating(1ms, 10ms));
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
result = Wait(timer.get(), false, 20ms);
|
||||
INFO(i);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
}
|
||||
MaybeYield();
|
||||
Sleep(10ms); // Skip a few events
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
result = Wait(timer.get(), false, 20ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
}
|
||||
// Cancel it
|
||||
timer->Cancel();
|
||||
result = Wait(timer.get(), false, 20ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
MaybeYield();
|
||||
Sleep(10ms); // Skip a few events
|
||||
result = Wait(timer.get(), false, 20ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
// Cancel with SetOnce
|
||||
REQUIRE(timer->SetRepeating(1ms, 10ms));
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
result = Wait(timer.get(), false, 20ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
}
|
||||
REQUIRE(timer->SetOnce(1ms));
|
||||
result = Wait(timer.get(), false, 20ms);
|
||||
REQUIRE(result == WaitResult::kSuccess); // Signal from Set Once
|
||||
result = Wait(timer.get(), false, 20ms);
|
||||
REQUIRE(result == WaitResult::kTimeout); // No more signals from repeating
|
||||
}
|
||||
|
||||
TEST_CASE("Wait on Multiple Timers", "Timer") {
|
||||
WaitResult all_result;
|
||||
std::pair<WaitResult, size_t> any_result;
|
||||
|
||||
auto timer0 = Timer::CreateSynchronizationTimer();
|
||||
auto timer1 = Timer::CreateManualResetTimer();
|
||||
|
||||
// None signaled
|
||||
all_result = WaitAll({timer0.get(), timer1.get()}, false, 1ms);
|
||||
REQUIRE(all_result == WaitResult::kTimeout);
|
||||
any_result = WaitAny({timer0.get(), timer1.get()}, false, 1ms);
|
||||
REQUIRE(any_result.first == WaitResult::kTimeout);
|
||||
REQUIRE(any_result.second == 0);
|
||||
|
||||
// Some signaled
|
||||
REQUIRE(timer1->SetOnce(1ms));
|
||||
all_result = WaitAll({timer0.get(), timer1.get()}, false, 100ms);
|
||||
REQUIRE(all_result == WaitResult::kTimeout);
|
||||
any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms);
|
||||
REQUIRE(any_result.first == WaitResult::kSuccess);
|
||||
REQUIRE(any_result.second == 1);
|
||||
|
||||
// All signaled
|
||||
REQUIRE(timer0->SetOnce(1ms));
|
||||
all_result = WaitAll({timer0.get(), timer1.get()}, false, 100ms);
|
||||
REQUIRE(all_result == WaitResult::kSuccess);
|
||||
REQUIRE(timer0->SetOnce(1ms));
|
||||
Sleep(1ms);
|
||||
any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms);
|
||||
REQUIRE(any_result.first == WaitResult::kSuccess);
|
||||
REQUIRE(any_result.second == 0);
|
||||
|
||||
// Check that timer0 reset
|
||||
any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms);
|
||||
REQUIRE(any_result.first == WaitResult::kSuccess);
|
||||
REQUIRE(any_result.second == 1);
|
||||
}
|
||||
|
||||
TEST_CASE("Create and Trigger Timer Callbacks", "Timer") {
|
||||
// TODO(bwrsandman): Check which thread performs callback and timing of
|
||||
// callback
|
||||
REQUIRE(true);
|
||||
}
|
||||
|
||||
TEST_CASE("Set and Test Current Thread ID", "Thread") {
|
||||
// System ID
|
||||
auto system_id = current_thread_system_id();
|
||||
REQUIRE(system_id > 0);
|
||||
|
||||
// Thread ID
|
||||
auto thread_id = current_thread_id();
|
||||
REQUIRE(thread_id == system_id);
|
||||
|
||||
// Set a new thread id
|
||||
const uint32_t new_thread_id = 0xDEADBEEF;
|
||||
set_current_thread_id(new_thread_id);
|
||||
REQUIRE(current_thread_id() == new_thread_id);
|
||||
|
||||
// Set back original thread id of system
|
||||
set_current_thread_id(std::numeric_limits<uint32_t>::max());
|
||||
REQUIRE(current_thread_id() == system_id);
|
||||
|
||||
// TODO(bwrsandman): Test on Thread object
|
||||
}
|
||||
|
||||
TEST_CASE("Set and Test Current Thread Name", "Thread") {
|
||||
auto current_thread = Thread::GetCurrentThread();
|
||||
REQUIRE(current_thread);
|
||||
auto old_thread_name = current_thread->name();
|
||||
|
||||
std::string new_thread_name = "Threading Test";
|
||||
REQUIRE_NOTHROW(set_name(new_thread_name));
|
||||
|
||||
// Restore the old catch.hpp thread name
|
||||
REQUIRE_NOTHROW(set_name(old_thread_name));
|
||||
}
|
||||
|
||||
TEST_CASE("Create and Run Thread", "Thread") {
|
||||
std::unique_ptr<Thread> thread;
|
||||
WaitResult result;
|
||||
Thread::CreationParameters params = {};
|
||||
auto func = [] { Sleep(20ms); };
|
||||
|
||||
// Create most basic case of thread
|
||||
thread = Thread::Create(params, func);
|
||||
REQUIRE(thread->native_handle() != nullptr);
|
||||
REQUIRE_NOTHROW(thread->affinity_mask());
|
||||
REQUIRE(thread->name().empty());
|
||||
result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
|
||||
// Add thread name
|
||||
std::string new_name = "Test thread name";
|
||||
thread = Thread::Create(params, func);
|
||||
auto name = thread->name();
|
||||
INFO(name.c_str());
|
||||
REQUIRE(name.empty());
|
||||
thread->set_name(new_name);
|
||||
REQUIRE(thread->name() == new_name);
|
||||
result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
|
||||
// Use Terminate to end an infinitely looping thread
|
||||
thread = Thread::Create(params, [] {
|
||||
while (true) {
|
||||
Sleep(1ms);
|
||||
}
|
||||
});
|
||||
result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
thread->Terminate(-1);
|
||||
result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
|
||||
// Call Exit from inside an infinitely looping thread
|
||||
thread = Thread::Create(params, [] {
|
||||
while (true) {
|
||||
Thread::Exit(-1);
|
||||
}
|
||||
});
|
||||
result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
|
||||
// Call timeout wait on self
|
||||
result = Wait(Thread::GetCurrentThread(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
|
||||
params.stack_size = 16 * 1024;
|
||||
thread = Thread::Create(params, [] {
|
||||
while (true) {
|
||||
Thread::Exit(-1);
|
||||
}
|
||||
});
|
||||
REQUIRE(thread != nullptr);
|
||||
result = Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
|
||||
// TODO(bwrsandman): Test with different priorities
|
||||
// TODO(bwrsandman): Test setting and getting thread affinity
|
||||
}
|
||||
|
||||
TEST_CASE("Test Suspending Thread", "Thread") {
|
||||
std::unique_ptr<Thread> thread;
|
||||
WaitResult result;
|
||||
Thread::CreationParameters params = {};
|
||||
auto func = [] { Sleep(20ms); };
|
||||
|
||||
// Create initially suspended
|
||||
params.create_suspended = true;
|
||||
thread = threading::Thread::Create(params, func);
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kTimeout);
|
||||
thread->Resume();
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kSuccess);
|
||||
params.create_suspended = false;
|
||||
|
||||
// Create and then suspend
|
||||
thread = threading::Thread::Create(params, func);
|
||||
thread->Suspend();
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kTimeout);
|
||||
thread->Resume();
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kSuccess);
|
||||
|
||||
// Test recursive suspend
|
||||
thread = threading::Thread::Create(params, func);
|
||||
thread->Suspend();
|
||||
thread->Suspend();
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kTimeout);
|
||||
thread->Resume();
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kTimeout);
|
||||
thread->Resume();
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kSuccess);
|
||||
|
||||
// Test suspend count
|
||||
uint32_t suspend_count = 0;
|
||||
thread = threading::Thread::Create(params, func);
|
||||
thread->Suspend(&suspend_count);
|
||||
REQUIRE(suspend_count == 0);
|
||||
thread->Suspend(&suspend_count);
|
||||
REQUIRE(suspend_count == 1);
|
||||
thread->Suspend(&suspend_count);
|
||||
REQUIRE(suspend_count == 2);
|
||||
thread->Resume(&suspend_count);
|
||||
REQUIRE(suspend_count == 3);
|
||||
thread->Resume(&suspend_count);
|
||||
REQUIRE(suspend_count == 2);
|
||||
thread->Resume(&suspend_count);
|
||||
REQUIRE(suspend_count == 1);
|
||||
thread->Suspend(&suspend_count);
|
||||
REQUIRE(suspend_count == 0);
|
||||
thread->Resume(&suspend_count);
|
||||
REQUIRE(suspend_count == 1);
|
||||
result = threading::Wait(thread.get(), false, 50ms);
|
||||
REQUIRE(result == threading::WaitResult::kSuccess);
|
||||
}
|
||||
|
||||
TEST_CASE("Test Thread QueueUserCallback", "Thread") {
|
||||
std::unique_ptr<Thread> thread;
|
||||
WaitResult result;
|
||||
Thread::CreationParameters params = {};
|
||||
std::atomic_int order;
|
||||
int is_modified;
|
||||
int has_finished;
|
||||
auto callback = [&is_modified, &order] {
|
||||
is_modified = std::atomic_fetch_add_explicit(
|
||||
&order, 1, std::memory_order::memory_order_relaxed);
|
||||
};
|
||||
|
||||
// Without alertable
|
||||
order = 0;
|
||||
is_modified = -1;
|
||||
has_finished = -1;
|
||||
thread = Thread::Create(params, [&has_finished, &order] {
|
||||
// Not using Alertable so callback is not registered
|
||||
Sleep(90ms);
|
||||
has_finished = std::atomic_fetch_add_explicit(
|
||||
&order, 1, std::memory_order::memory_order_relaxed);
|
||||
});
|
||||
result = Wait(thread.get(), true, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
REQUIRE(is_modified == -1);
|
||||
thread->QueueUserCallback(callback);
|
||||
result = Wait(thread.get(), true, 100ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(is_modified == -1);
|
||||
REQUIRE(has_finished == 0);
|
||||
|
||||
// With alertable
|
||||
order = 0;
|
||||
is_modified = -1;
|
||||
has_finished = -1;
|
||||
thread = Thread::Create(params, [&has_finished, &order] {
|
||||
// Using Alertable so callback is registered
|
||||
AlertableSleep(90ms);
|
||||
has_finished = std::atomic_fetch_add_explicit(
|
||||
&order, 1, std::memory_order::memory_order_relaxed);
|
||||
});
|
||||
result = Wait(thread.get(), true, 50ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
REQUIRE(is_modified == -1);
|
||||
thread->QueueUserCallback(callback);
|
||||
result = Wait(thread.get(), true, 100ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(is_modified == 0);
|
||||
REQUIRE(has_finished == 1);
|
||||
|
||||
// Test Exit command with QueueUserCallback
|
||||
order = 0;
|
||||
is_modified = -1;
|
||||
has_finished = -1;
|
||||
thread = Thread::Create(params, [&is_modified, &has_finished, &order] {
|
||||
is_modified = std::atomic_fetch_add_explicit(
|
||||
&order, 1, std::memory_order::memory_order_relaxed);
|
||||
// Using Alertable so callback is registered
|
||||
AlertableSleep(200ms);
|
||||
has_finished = std::atomic_fetch_add_explicit(
|
||||
&order, 1, std::memory_order::memory_order_relaxed);
|
||||
});
|
||||
result = Wait(thread.get(), true, 100ms);
|
||||
REQUIRE(result == WaitResult::kTimeout);
|
||||
thread->QueueUserCallback([] { Thread::Exit(0); });
|
||||
result = Wait(thread.get(), true, 500ms);
|
||||
REQUIRE(result == WaitResult::kSuccess);
|
||||
REQUIRE(is_modified == 0);
|
||||
REQUIRE(has_finished == -1);
|
||||
|
||||
// TODO(bwrsandman): Test alertable wait returning kUserCallback by using IO
|
||||
// callbacks.
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace base
|
||||
} // namespace xe
|
|
@ -24,29 +24,56 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
|
||||
namespace xe {
|
||||
namespace threading {
|
||||
|
||||
// This is more like an Event with self-reset when returning from Wait()
|
||||
class Fence {
|
||||
public:
|
||||
Fence() : signaled_(false) {}
|
||||
Fence() : signal_state_(0) {}
|
||||
|
||||
void Signal() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
signaled_.store(true);
|
||||
signal_state_ |= SIGMASK_;
|
||||
cond_.notify_all();
|
||||
}
|
||||
|
||||
// Wait for the Fence to be signaled. Clears the signal on return.
|
||||
void Wait() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
while (!signaled_.load()) {
|
||||
assert_true((signal_state_ & ~SIGMASK_) < (SIGMASK_ - 1) &&
|
||||
"Too many threads?");
|
||||
|
||||
// keep local copy to minimize loads
|
||||
auto signal_state = ++signal_state_;
|
||||
for (; !(signal_state & SIGMASK_); signal_state = signal_state_) {
|
||||
cond_.wait(lock);
|
||||
}
|
||||
signaled_.store(false);
|
||||
|
||||
// We can't just clear the signal as other threads may not have read it yet
|
||||
assert_true((signal_state & ~SIGMASK_) > 0); // wait_count > 0
|
||||
if (signal_state == (1 | SIGMASK_)) { // wait_count == 1
|
||||
// Last one out turn off the lights
|
||||
signal_state_ = 0;
|
||||
} else {
|
||||
// Oops, another thread is still waiting, set the new count and keep the
|
||||
// signal.
|
||||
signal_state_ = --signal_state;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
using state_t_ = uint_fast32_t;
|
||||
static constexpr state_t_ SIGMASK_ = state_t_(1)
|
||||
<< (sizeof(state_t_) * 8 - 1);
|
||||
|
||||
std::mutex mutex_;
|
||||
std::condition_variable cond_;
|
||||
std::atomic<bool> signaled_;
|
||||
// Use the highest bit (sign bit) as the signal flag and the rest to count
|
||||
// waiting threads.
|
||||
volatile state_t_ signal_state_;
|
||||
};
|
||||
|
||||
// Returns the total number of logical processors in the host system.
|
||||
|
@ -308,12 +335,12 @@ class Timer : public WaitHandle {
|
|||
std::chrono::milliseconds period,
|
||||
std::function<void()> opt_callback = nullptr) = 0;
|
||||
template <typename Rep, typename Period>
|
||||
void SetRepeating(std::chrono::nanoseconds due_time,
|
||||
bool SetRepeating(std::chrono::nanoseconds due_time,
|
||||
std::chrono::duration<Rep, Period> period,
|
||||
std::function<void()> opt_callback = nullptr) {
|
||||
SetRepeating(due_time,
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(period),
|
||||
std::move(opt_callback));
|
||||
return SetRepeating(
|
||||
due_time, std::chrono::duration_cast<std::chrono::milliseconds>(period),
|
||||
std::move(opt_callback));
|
||||
}
|
||||
|
||||
// Stops the timer before it can be set to the signaled state and cancels
|
||||
|
@ -391,7 +418,7 @@ class Thread : public WaitHandle {
|
|||
|
||||
// Decrements a thread's suspend count. When the suspend count is decremented
|
||||
// to zero, the execution of the thread is resumed.
|
||||
virtual bool Resume(uint32_t* out_new_suspend_count = nullptr) = 0;
|
||||
virtual bool Resume(uint32_t* out_previous_suspend_count = nullptr) = 0;
|
||||
|
||||
// Suspends the specified thread.
|
||||
virtual bool Suspend(uint32_t* out_previous_suspend_count = nullptr) = 0;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -388,16 +388,16 @@ class Win32Thread : public Win32Handle<Thread> {
|
|||
QueueUserAPC(DispatchApc, handle_, reinterpret_cast<ULONG_PTR>(apc_data));
|
||||
}
|
||||
|
||||
bool Resume(uint32_t* out_new_suspend_count = nullptr) override {
|
||||
if (out_new_suspend_count) {
|
||||
*out_new_suspend_count = 0;
|
||||
bool Resume(uint32_t* out_previous_suspend_count = nullptr) override {
|
||||
if (out_previous_suspend_count) {
|
||||
*out_previous_suspend_count = 0;
|
||||
}
|
||||
DWORD result = ResumeThread(handle_);
|
||||
if (result == UINT_MAX) {
|
||||
return false;
|
||||
}
|
||||
if (out_new_suspend_count) {
|
||||
*out_new_suspend_count = result;
|
||||
if (out_previous_suspend_count) {
|
||||
*out_previous_suspend_count = result;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ ExportResolver::Table::Table(const std::string_view module_name,
|
|||
}
|
||||
std::sort(
|
||||
exports_by_name_.begin(), exports_by_name_.end(),
|
||||
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; });
|
||||
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
|
||||
}
|
||||
|
||||
ExportResolver::ExportResolver() = default;
|
||||
|
@ -51,7 +51,7 @@ void ExportResolver::RegisterTable(
|
|||
}
|
||||
std::sort(
|
||||
all_exports_by_name_.begin(), all_exports_by_name_.end(),
|
||||
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; });
|
||||
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
|
||||
}
|
||||
|
||||
Export* ExportResolver::GetExportByOrdinal(const std::string_view module_name,
|
||||
|
|
|
@ -73,7 +73,7 @@ bool CommandProcessor::Initialize(
|
|||
WorkerThreadMain();
|
||||
return 0;
|
||||
}));
|
||||
worker_thread_->set_name("GraphicsSystem Command Processor");
|
||||
worker_thread_->set_name("GPU Commands");
|
||||
worker_thread_->Create();
|
||||
|
||||
return true;
|
||||
|
@ -731,12 +731,20 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) {
|
|||
} break;
|
||||
case PM4_CONTEXT_UPDATE: {
|
||||
assert_true(count == 1);
|
||||
uint64_t value = reader->ReadAndSwap<uint32_t>();
|
||||
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
||||
XELOGGPU("GPU context update = {:08X}", value);
|
||||
assert_true(value == 0);
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
case PM4_WAIT_FOR_IDLE: {
|
||||
// This opcode is used by "Duke Nukem Forever" while going/being ingame
|
||||
assert_true(count == 1);
|
||||
uint32_t value = reader->ReadAndSwap<uint32_t>();
|
||||
XELOGGPU("GPU wait for idle = {:08X}", value);
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
XELOGGPU("Unimplemented GPU OPCODE: 0x{:02X}\t\tCOUNT: {}\n", opcode,
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
#include "xenia/gpu/draw_util.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||
|
@ -387,7 +388,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
sampler_count_vertex);
|
||||
return nullptr;
|
||||
}
|
||||
root_signatures_bindful_.insert({index, root_signature});
|
||||
root_signatures_bindful_.emplace(index, root_signature);
|
||||
return root_signature;
|
||||
}
|
||||
|
||||
|
@ -745,12 +746,11 @@ void D3D12CommandProcessor::SetSamplePositions(
|
|||
current_sample_positions_ = sample_positions;
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::SetComputePipelineState(
|
||||
ID3D12PipelineState* pipeline_state) {
|
||||
if (current_external_pipeline_state_ != pipeline_state) {
|
||||
deferred_command_list_.D3DSetPipelineState(pipeline_state);
|
||||
current_external_pipeline_state_ = pipeline_state;
|
||||
current_cached_pipeline_state_ = nullptr;
|
||||
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
|
||||
if (current_external_pipeline_ != pipeline) {
|
||||
deferred_command_list_.D3DSetPipelineState(pipeline);
|
||||
current_external_pipeline_ = pipeline;
|
||||
current_cached_pipeline_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -773,8 +773,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
|
|||
}
|
||||
// Currently scaling is only supported with ROV.
|
||||
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
|
||||
return "Direct3D 12 - 2x";
|
||||
return "Direct3D 12 - ROV 2x";
|
||||
}
|
||||
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
|
||||
// that faces adoption complications (outside of Direct3D - on Vulkan - at
|
||||
// least), but crucial to Xenia - raise awareness of its usage.
|
||||
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
|
||||
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
|
||||
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
|
||||
// feature" - oscarbg in that issue.
|
||||
return "Direct3D 12 - ROV";
|
||||
}
|
||||
return "Direct3D 12";
|
||||
}
|
||||
|
@ -1196,7 +1204,7 @@ bool D3D12CommandProcessor::SetupContext() {
|
|||
*this, *register_file_, bindless_resources_used_, edram_rov_used_,
|
||||
texture_cache_->IsResolutionScale2X() ? 2 : 1);
|
||||
if (!pipeline_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the graphics pipeline state cache");
|
||||
XELOGE("Failed to initialize the graphics pipeline cache");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1526,8 +1534,7 @@ void D3D12CommandProcessor::ShutdownContext() {
|
|||
// Shut down binding - bindless descriptors may be owned by subsystems like
|
||||
// the texture cache.
|
||||
|
||||
// Root signatured are used by pipeline states, thus freed after the pipeline
|
||||
// states.
|
||||
// Root signatures are used by pipelines, thus freed after the pipelines.
|
||||
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
|
||||
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
|
||||
for (auto it : root_signatures_bindful_) {
|
||||
|
@ -1878,7 +1885,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
xenos::VertexShaderExportMode::kMultipass ||
|
||||
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
|
||||
pa_su_sc_mode_cntl.cull_back))) {
|
||||
// All faces are culled - can't be expressed in the pipeline state.
|
||||
// All faces are culled - can't be expressed in the pipeline.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1954,7 +1961,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
line_loop_closing_index = 0;
|
||||
}
|
||||
|
||||
// Update the textures - this may bind pipeline state objects.
|
||||
// Update the textures - this may bind pipelines.
|
||||
uint32_t used_texture_mask =
|
||||
vertex_shader->GetUsedTextureMask() |
|
||||
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||
|
@ -1972,21 +1979,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
early_z = true;
|
||||
}
|
||||
|
||||
// Create the pipeline state object if needed and bind it.
|
||||
void* pipeline_state_handle;
|
||||
// Create the pipeline if needed and bind it.
|
||||
void* pipeline_handle;
|
||||
ID3D12RootSignature* root_signature;
|
||||
if (!pipeline_cache_->ConfigurePipeline(
|
||||
vertex_shader, pixel_shader, primitive_type_converted,
|
||||
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
|
||||
early_z, pipeline_render_targets, &pipeline_state_handle,
|
||||
early_z, pipeline_render_targets, &pipeline_handle,
|
||||
&root_signature)) {
|
||||
return false;
|
||||
}
|
||||
if (current_cached_pipeline_state_ != pipeline_state_handle) {
|
||||
if (current_cached_pipeline_ != pipeline_handle) {
|
||||
deferred_command_list_.SetPipelineStateHandle(
|
||||
reinterpret_cast<void*>(pipeline_state_handle));
|
||||
current_cached_pipeline_state_ = pipeline_state_handle;
|
||||
current_external_pipeline_state_ = nullptr;
|
||||
reinterpret_cast<void*>(pipeline_handle));
|
||||
current_cached_pipeline_ = pipeline_handle;
|
||||
current_external_pipeline_ = nullptr;
|
||||
}
|
||||
|
||||
// Update viewport, scissor, blend factor and stencil reference.
|
||||
|
@ -2005,14 +2012,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
// Must not call anything that can change the descriptor heap from now on!
|
||||
|
||||
// Ensure vertex and index buffers are resident and draw.
|
||||
// Ensure vertex buffers are resident.
|
||||
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
||||
// validity will be tracked.
|
||||
// validity is tracked.
|
||||
uint64_t vertex_buffers_resident[2] = {};
|
||||
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
|
||||
for (const Shader::VertexBinding& vertex_binding :
|
||||
vertex_shader->vertex_bindings()) {
|
||||
uint32_t vfetch_index = vertex_binding.fetch_constant;
|
||||
if (vertex_buffers_resident[vfetch_index >> 6] &
|
||||
(1ull << (vfetch_index & 63))) {
|
||||
(uint64_t(1) << (vfetch_index & 63))) {
|
||||
continue;
|
||||
}
|
||||
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
|
@ -2045,7 +2053,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
||||
return false;
|
||||
}
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
|
||||
<< (vfetch_index & 63);
|
||||
}
|
||||
|
||||
// Gather memexport ranges and ensure the heaps for them are resident, and
|
||||
|
@ -2517,8 +2526,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
|||
submission_open_ = true;
|
||||
|
||||
// Start a new deferred command list - will submit it to the real one in the
|
||||
// end of the submission (when async pipeline state object creation requests
|
||||
// are fulfilled).
|
||||
// end of the submission (when async pipeline creation requests are
|
||||
// fulfilled).
|
||||
deferred_command_list_.Reset();
|
||||
|
||||
// Reset cached state of the command list.
|
||||
|
@ -2527,8 +2536,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
|||
ff_blend_factor_update_needed_ = true;
|
||||
ff_stencil_ref_update_needed_ = true;
|
||||
current_sample_positions_ = xenos::MsaaSamples::k1X;
|
||||
current_cached_pipeline_state_ = nullptr;
|
||||
current_external_pipeline_state_ = nullptr;
|
||||
current_cached_pipeline_ = nullptr;
|
||||
current_external_pipeline_ = nullptr;
|
||||
current_graphics_root_signature_ = nullptr;
|
||||
current_graphics_root_up_to_date_ = 0;
|
||||
if (bindless_resources_used_) {
|
||||
|
@ -2724,7 +2733,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
|
|||
}
|
||||
|
||||
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
|
||||
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
|
||||
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::ClearCommandAllocatorCache() {
|
||||
|
@ -2745,12 +2754,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() {
|
|||
}
|
||||
|
||||
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
|
||||
// Window parameters.
|
||||
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
||||
// See r200UpdateWindow:
|
||||
|
@ -2838,34 +2847,20 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
}
|
||||
|
||||
// Scissor.
|
||||
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
|
||||
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
|
||||
D3D12_RECT scissor;
|
||||
scissor.left = pa_sc_window_scissor_tl.tl_x;
|
||||
scissor.top = pa_sc_window_scissor_tl.tl_y;
|
||||
scissor.right = pa_sc_window_scissor_br.br_x;
|
||||
scissor.bottom = pa_sc_window_scissor_br.br_y;
|
||||
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
||||
scissor.left =
|
||||
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.top =
|
||||
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
scissor.right =
|
||||
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.bottom =
|
||||
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
}
|
||||
scissor.left *= pixel_size_x;
|
||||
scissor.top *= pixel_size_y;
|
||||
scissor.right *= pixel_size_x;
|
||||
scissor.bottom *= pixel_size_y;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.left != scissor.left;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.top != scissor.top;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.right != scissor.right;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom;
|
||||
draw_util::Scissor scissor;
|
||||
draw_util::GetScissor(regs, scissor);
|
||||
D3D12_RECT scissor_rect;
|
||||
scissor_rect.left = LONG(scissor.left * pixel_size_x);
|
||||
scissor_rect.top = LONG(scissor.top * pixel_size_y);
|
||||
scissor_rect.right = LONG((scissor.left + scissor.width) * pixel_size_x);
|
||||
scissor_rect.bottom = LONG((scissor.top + scissor.height) * pixel_size_y);
|
||||
ff_scissor_update_needed_ |= ff_scissor_.left != scissor_rect.left;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.top != scissor_rect.top;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.right != scissor_rect.right;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor_rect.bottom;
|
||||
if (ff_scissor_update_needed_) {
|
||||
ff_scissor_ = scissor;
|
||||
deferred_command_list_.RSSetScissorRect(scissor);
|
||||
ff_scissor_ = scissor_rect;
|
||||
deferred_command_list_.RSSetScissorRect(scissor_rect);
|
||||
ff_scissor_update_needed_ = false;
|
||||
}
|
||||
|
||||
|
@ -2915,12 +2910,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
uint32_t line_loop_closing_index, xenos::Endian index_endian,
|
||||
uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
|
@ -3103,14 +3097,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
|
||||
system_constants_.line_loop_closing_index = line_loop_closing_index;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
// Index or tessellation edge factor buffer endianness.
|
||||
dirty |= system_constants_.vertex_index_endian != index_endian;
|
||||
system_constants_.vertex_index_endian = index_endian;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
||||
if (!pa_cl_clip_cntl.clip_disable) {
|
||||
for (uint32_t i = 0; i < 6; ++i) {
|
||||
|
@ -3574,7 +3568,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
float_constant_map_vertex.float_bitmap[i];
|
||||
// If no float constants at all, we can reuse any buffer for them, so not
|
||||
// invalidating.
|
||||
if (float_constant_map_vertex.float_count != 0) {
|
||||
if (float_constant_count_vertex) {
|
||||
cbuffer_binding_float_vertex_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
|
@ -3589,7 +3583,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
float_constant_map_pixel.float_bitmap[i]) {
|
||||
current_float_constant_map_pixel_[i] =
|
||||
float_constant_map_pixel.float_bitmap[i];
|
||||
if (float_constant_map_pixel.float_count != 0) {
|
||||
if (float_constant_count_pixel) {
|
||||
cbuffer_binding_float_pixel_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
|
@ -3889,8 +3883,8 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
sampler_parameters,
|
||||
provider.OffsetSamplerDescriptor(
|
||||
sampler_bindless_heap_cpu_start_, sampler_index));
|
||||
texture_cache_bindless_sampler_map_.insert(
|
||||
{sampler_parameters.value, sampler_index});
|
||||
texture_cache_bindless_sampler_map_.emplace(
|
||||
sampler_parameters.value, sampler_index);
|
||||
}
|
||||
current_sampler_bindless_indices_vertex_[j] = sampler_index;
|
||||
}
|
||||
|
@ -3921,8 +3915,8 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
sampler_parameters,
|
||||
provider.OffsetSamplerDescriptor(
|
||||
sampler_bindless_heap_cpu_start_, sampler_index));
|
||||
texture_cache_bindless_sampler_map_.insert(
|
||||
{sampler_parameters.value, sampler_index});
|
||||
texture_cache_bindless_sampler_map_.emplace(
|
||||
sampler_parameters.value, sampler_index);
|
||||
}
|
||||
current_sampler_bindless_indices_pixel_[j] = sampler_index;
|
||||
}
|
||||
|
|
|
@ -190,19 +190,17 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// render targets or copying to depth render targets.
|
||||
void SetSamplePositions(xenos::MsaaSamples sample_positions);
|
||||
|
||||
// Returns a pipeline state object with deferred creation by its handle. May
|
||||
// return nullptr if failed to create the pipeline state object.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
|
||||
void* handle) const {
|
||||
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
|
||||
// Returns a pipeline with deferred creation by its handle. May return nullptr
|
||||
// if failed to create the pipeline.
|
||||
ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
|
||||
return pipeline_cache_->GetD3D12PipelineByHandle(handle);
|
||||
}
|
||||
|
||||
// Sets the current pipeline state to a compute one. This is for cache
|
||||
// invalidation primarily. A submission must be open.
|
||||
void SetComputePipelineState(ID3D12PipelineState* pipeline_state);
|
||||
// Sets the current pipeline to a compute one. This is for cache invalidation
|
||||
// primarily. A submission must be open.
|
||||
void SetComputePipeline(ID3D12PipelineState* pipeline);
|
||||
|
||||
// For the pipeline state cache to call when binding layout UIDs may be
|
||||
// reused.
|
||||
// For the pipeline cache to call when binding layout UIDs may be reused.
|
||||
void NotifyShaderBindingsLayoutUIDsInvalidated();
|
||||
|
||||
// Returns the text to display in the GPU backend name in the window title.
|
||||
|
@ -327,8 +325,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
bool EndSubmission(bool is_swap);
|
||||
// Checks if ending a submission right now would not cause potentially more
|
||||
// delay than it would reduce by making the GPU start working earlier - such
|
||||
// as when there are unfinished graphics pipeline state creation requests that
|
||||
// would need to be fulfilled before actually submitting the command list.
|
||||
// as when there are unfinished graphics pipeline creation requests that would
|
||||
// need to be fulfilled before actually submitting the command list.
|
||||
bool CanEndSubmissionImmediately() const;
|
||||
bool AwaitAllQueueOperationsCompletion() {
|
||||
CheckSubmissionFence(submission_current_);
|
||||
|
@ -512,7 +510,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
return cvars::internal_tile_height;
|
||||
}
|
||||
|
||||
inline std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
|
||||
std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
|
||||
if (texture_cache_->IsResolutionScale2X()) {
|
||||
return std::make_pair(kSwapTextureWidth() * 2, kSwapTextureHeight() * 2);
|
||||
}
|
||||
|
@ -557,13 +555,12 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Current SSAA sample positions (to be updated by the render target cache).
|
||||
xenos::MsaaSamples current_sample_positions_;
|
||||
|
||||
// Currently bound pipeline state, either a graphics pipeline state object
|
||||
// from the pipeline state cache (with potentially deferred creation -
|
||||
// current_external_pipeline_state_ is nullptr in this case) or a non-Xenos
|
||||
// graphics or compute pipeline state object (current_cached_pipeline_state_
|
||||
// is nullptr in this case).
|
||||
void* current_cached_pipeline_state_;
|
||||
ID3D12PipelineState* current_external_pipeline_state_;
|
||||
// Currently bound pipeline, either a graphics pipeline from the pipeline
|
||||
// cache (with potentially deferred creation - current_external_pipeline_ is
|
||||
// nullptr in this case) or a non-Xenos graphics or compute pipeline
|
||||
// (current_cached_pipeline_ is nullptr in this case).
|
||||
void* current_cached_pipeline_;
|
||||
ID3D12PipelineState* current_external_pipeline_;
|
||||
|
||||
// Currently bound graphics root signature.
|
||||
ID3D12RootSignature* current_graphics_root_signature_;
|
||||
|
|
|
@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
|
|||
stretch_pipeline_desc.SampleDesc.Count = 1;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) {
|
||||
XELOGE("Failed to create the front buffer stretch pipeline state");
|
||||
XELOGE("Failed to create the front buffer stretch pipeline");
|
||||
stretch_gamma_root_signature_->Release();
|
||||
stretch_gamma_root_signature_ = nullptr;
|
||||
stretch_root_signature_->Release();
|
||||
|
@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
|
|||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) {
|
||||
XELOGE(
|
||||
"Failed to create the gamma-correcting front buffer stretch "
|
||||
"pipeline state");
|
||||
"Failed to create the gamma-correcting front buffer stretch pipeline");
|
||||
stretch_pipeline_->Release();
|
||||
stretch_pipeline_ = nullptr;
|
||||
stretch_gamma_root_signature_->Release();
|
||||
|
|
|
@ -85,7 +85,7 @@ class D3D12Shader : public Shader {
|
|||
return sampler_bindings_.data();
|
||||
}
|
||||
|
||||
// For owning subsystems like the pipeline state cache, accessors for unique
|
||||
// For owning subsystems like the pipeline cache, accessors for unique
|
||||
// identifiers (used instead of hashes to make sure collisions can't happen)
|
||||
// of binding layouts used by the shader, for invalidation if a shader with an
|
||||
// incompatible layout was bound.
|
||||
|
|
|
@ -48,7 +48,7 @@ class D3D12SharedMemory : public SharedMemory {
|
|||
// UseForReading or UseForWriting.
|
||||
|
||||
// Makes the buffer usable for vertices, indices and texture untiling.
|
||||
inline void UseForReading() {
|
||||
void UseForReading() {
|
||||
// Vertex fetch is also allowed in pixel shaders.
|
||||
CommitUAVWritesAndTransitionBuffer(
|
||||
D3D12_RESOURCE_STATE_INDEX_BUFFER |
|
||||
|
@ -56,18 +56,18 @@ class D3D12SharedMemory : public SharedMemory {
|
|||
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
}
|
||||
// Makes the buffer usable for texture tiling after a resolve.
|
||||
inline void UseForWriting() {
|
||||
void UseForWriting() {
|
||||
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
}
|
||||
// Makes the buffer usable as a source for copy commands.
|
||||
inline void UseAsCopySource() {
|
||||
void UseAsCopySource() {
|
||||
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
}
|
||||
// Must be called when doing draws/dispatches modifying data within the shared
|
||||
// memory buffer as a UAV, to make sure that when UseForWriting is called the
|
||||
// next time, a UAV barrier will be done, and subsequent overlapping UAV
|
||||
// writes and reads are ordered.
|
||||
inline void MarkUAVWritesCommitNeeded() {
|
||||
void MarkUAVWritesCommitNeeded() {
|
||||
if (buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||
buffer_uav_writes_commit_needed_ = true;
|
||||
}
|
||||
|
|
|
@ -209,9 +209,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
|
|||
}
|
||||
} break;
|
||||
case Command::kSetPipelineStateHandle: {
|
||||
current_pipeline_state =
|
||||
command_processor_.GetD3D12PipelineStateByHandle(
|
||||
*reinterpret_cast<void* const*>(stream));
|
||||
current_pipeline_state = command_processor_.GetD3D12PipelineByHandle(
|
||||
*reinterpret_cast<void* const*>(stream));
|
||||
if (current_pipeline_state) {
|
||||
command_list->SetPipelineState(current_pipeline_state);
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ class DeferredCommandList {
|
|||
void Execute(ID3D12GraphicsCommandList* command_list,
|
||||
ID3D12GraphicsCommandList1* command_list_1);
|
||||
|
||||
inline void D3DClearUnorderedAccessViewUint(
|
||||
void D3DClearUnorderedAccessViewUint(
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource,
|
||||
const UINT values[4], UINT num_rects, const D3D12_RECT* rects) {
|
||||
|
@ -51,9 +51,9 @@ class DeferredCommandList {
|
|||
}
|
||||
}
|
||||
|
||||
inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
|
||||
ID3D12Resource* src_buffer, UINT64 src_offset,
|
||||
UINT64 num_bytes) {
|
||||
void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
|
||||
ID3D12Resource* src_buffer, UINT64 src_offset,
|
||||
UINT64 num_bytes) {
|
||||
auto& args = *reinterpret_cast<D3DCopyBufferRegionArguments*>(WriteCommand(
|
||||
Command::kD3DCopyBufferRegion, sizeof(D3DCopyBufferRegionArguments)));
|
||||
args.dst_buffer = dst_buffer;
|
||||
|
@ -63,26 +63,26 @@ class DeferredCommandList {
|
|||
args.num_bytes = num_bytes;
|
||||
}
|
||||
|
||||
inline void D3DCopyResource(ID3D12Resource* dst_resource,
|
||||
ID3D12Resource* src_resource) {
|
||||
void D3DCopyResource(ID3D12Resource* dst_resource,
|
||||
ID3D12Resource* src_resource) {
|
||||
auto& args = *reinterpret_cast<D3DCopyResourceArguments*>(WriteCommand(
|
||||
Command::kD3DCopyResource, sizeof(D3DCopyResourceArguments)));
|
||||
args.dst_resource = dst_resource;
|
||||
args.src_resource = src_resource;
|
||||
}
|
||||
|
||||
inline void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst,
|
||||
const D3D12_TEXTURE_COPY_LOCATION& src) {
|
||||
void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst,
|
||||
const D3D12_TEXTURE_COPY_LOCATION& src) {
|
||||
auto& args = *reinterpret_cast<CopyTextureArguments*>(
|
||||
WriteCommand(Command::kCopyTexture, sizeof(CopyTextureArguments)));
|
||||
std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION));
|
||||
std::memcpy(&args.src, &src, sizeof(D3D12_TEXTURE_COPY_LOCATION));
|
||||
}
|
||||
|
||||
inline void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst,
|
||||
UINT dst_x, UINT dst_y, UINT dst_z,
|
||||
const D3D12_TEXTURE_COPY_LOCATION& src,
|
||||
const D3D12_BOX& src_box) {
|
||||
void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst, UINT dst_x,
|
||||
UINT dst_y, UINT dst_z,
|
||||
const D3D12_TEXTURE_COPY_LOCATION& src,
|
||||
const D3D12_BOX& src_box) {
|
||||
auto& args = *reinterpret_cast<CopyTextureRegionArguments*>(WriteCommand(
|
||||
Command::kCopyTextureRegion, sizeof(CopyTextureRegionArguments)));
|
||||
std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION));
|
||||
|
@ -93,8 +93,8 @@ class DeferredCommandList {
|
|||
args.src_box = src_box;
|
||||
}
|
||||
|
||||
inline void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y,
|
||||
UINT thread_group_count_z) {
|
||||
void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y,
|
||||
UINT thread_group_count_z) {
|
||||
auto& args = *reinterpret_cast<D3DDispatchArguments*>(
|
||||
WriteCommand(Command::kD3DDispatch, sizeof(D3DDispatchArguments)));
|
||||
args.thread_group_count_x = thread_group_count_x;
|
||||
|
@ -102,11 +102,10 @@ class DeferredCommandList {
|
|||
args.thread_group_count_z = thread_group_count_z;
|
||||
}
|
||||
|
||||
inline void D3DDrawIndexedInstanced(UINT index_count_per_instance,
|
||||
UINT instance_count,
|
||||
UINT start_index_location,
|
||||
INT base_vertex_location,
|
||||
UINT start_instance_location) {
|
||||
void D3DDrawIndexedInstanced(UINT index_count_per_instance,
|
||||
UINT instance_count, UINT start_index_location,
|
||||
INT base_vertex_location,
|
||||
UINT start_instance_location) {
|
||||
auto& args = *reinterpret_cast<D3DDrawIndexedInstancedArguments*>(
|
||||
WriteCommand(Command::kD3DDrawIndexedInstanced,
|
||||
sizeof(D3DDrawIndexedInstancedArguments)));
|
||||
|
@ -117,9 +116,9 @@ class DeferredCommandList {
|
|||
args.start_instance_location = start_instance_location;
|
||||
}
|
||||
|
||||
inline void D3DDrawInstanced(UINT vertex_count_per_instance,
|
||||
UINT instance_count, UINT start_vertex_location,
|
||||
UINT start_instance_location) {
|
||||
void D3DDrawInstanced(UINT vertex_count_per_instance, UINT instance_count,
|
||||
UINT start_vertex_location,
|
||||
UINT start_instance_location) {
|
||||
auto& args = *reinterpret_cast<D3DDrawInstancedArguments*>(WriteCommand(
|
||||
Command::kD3DDrawInstanced, sizeof(D3DDrawInstancedArguments)));
|
||||
args.vertex_count_per_instance = vertex_count_per_instance;
|
||||
|
@ -128,7 +127,7 @@ class DeferredCommandList {
|
|||
args.start_instance_location = start_instance_location;
|
||||
}
|
||||
|
||||
inline void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) {
|
||||
void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) {
|
||||
auto& args = *reinterpret_cast<D3D12_INDEX_BUFFER_VIEW*>(WriteCommand(
|
||||
Command::kD3DIASetIndexBuffer, sizeof(D3D12_INDEX_BUFFER_VIEW)));
|
||||
if (view != nullptr) {
|
||||
|
@ -142,14 +141,13 @@ class DeferredCommandList {
|
|||
}
|
||||
}
|
||||
|
||||
inline void D3DIASetPrimitiveTopology(
|
||||
D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
|
||||
void D3DIASetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
|
||||
auto& arg = *reinterpret_cast<D3D12_PRIMITIVE_TOPOLOGY*>(WriteCommand(
|
||||
Command::kD3DIASetPrimitiveTopology, sizeof(D3D12_PRIMITIVE_TOPOLOGY)));
|
||||
arg = primitive_topology;
|
||||
}
|
||||
|
||||
inline void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) {
|
||||
void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) {
|
||||
auto args = reinterpret_cast<FLOAT*>(
|
||||
WriteCommand(Command::kD3DOMSetBlendFactor, 4 * sizeof(FLOAT)));
|
||||
args[0] = blend_factor[0];
|
||||
|
@ -158,7 +156,7 @@ class DeferredCommandList {
|
|||
args[3] = blend_factor[3];
|
||||
}
|
||||
|
||||
inline void D3DOMSetRenderTargets(
|
||||
void D3DOMSetRenderTargets(
|
||||
UINT num_render_target_descriptors,
|
||||
const D3D12_CPU_DESCRIPTOR_HANDLE* render_target_descriptors,
|
||||
BOOL rts_single_handle_to_descriptor_range,
|
||||
|
@ -185,14 +183,14 @@ class DeferredCommandList {
|
|||
}
|
||||
}
|
||||
|
||||
inline void D3DOMSetStencilRef(UINT stencil_ref) {
|
||||
void D3DOMSetStencilRef(UINT stencil_ref) {
|
||||
auto& arg = *reinterpret_cast<UINT*>(
|
||||
WriteCommand(Command::kD3DOMSetStencilRef, sizeof(UINT)));
|
||||
arg = stencil_ref;
|
||||
}
|
||||
|
||||
inline void D3DResourceBarrier(UINT num_barriers,
|
||||
const D3D12_RESOURCE_BARRIER* barriers) {
|
||||
void D3DResourceBarrier(UINT num_barriers,
|
||||
const D3D12_RESOURCE_BARRIER* barriers) {
|
||||
if (num_barriers == 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -207,21 +205,22 @@ class DeferredCommandList {
|
|||
num_barriers * sizeof(D3D12_RESOURCE_BARRIER));
|
||||
}
|
||||
|
||||
inline void RSSetScissorRect(const D3D12_RECT& rect) {
|
||||
void RSSetScissorRect(const D3D12_RECT& rect) {
|
||||
auto& arg = *reinterpret_cast<D3D12_RECT*>(
|
||||
WriteCommand(Command::kRSSetScissorRect, sizeof(D3D12_RECT)));
|
||||
arg = rect;
|
||||
}
|
||||
|
||||
inline void RSSetViewport(const D3D12_VIEWPORT& viewport) {
|
||||
void RSSetViewport(const D3D12_VIEWPORT& viewport) {
|
||||
auto& arg = *reinterpret_cast<D3D12_VIEWPORT*>(
|
||||
WriteCommand(Command::kRSSetViewport, sizeof(D3D12_VIEWPORT)));
|
||||
arg = viewport;
|
||||
}
|
||||
|
||||
inline void D3DSetComputeRoot32BitConstants(
|
||||
UINT root_parameter_index, UINT num_32bit_values_to_set,
|
||||
const void* src_data, UINT dest_offset_in_32bit_values) {
|
||||
void D3DSetComputeRoot32BitConstants(UINT root_parameter_index,
|
||||
UINT num_32bit_values_to_set,
|
||||
const void* src_data,
|
||||
UINT dest_offset_in_32bit_values) {
|
||||
if (num_32bit_values_to_set == 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -235,9 +234,10 @@ class DeferredCommandList {
|
|||
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
inline void D3DSetGraphicsRoot32BitConstants(
|
||||
UINT root_parameter_index, UINT num_32bit_values_to_set,
|
||||
const void* src_data, UINT dest_offset_in_32bit_values) {
|
||||
void D3DSetGraphicsRoot32BitConstants(UINT root_parameter_index,
|
||||
UINT num_32bit_values_to_set,
|
||||
const void* src_data,
|
||||
UINT dest_offset_in_32bit_values) {
|
||||
if (num_32bit_values_to_set == 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -251,7 +251,7 @@ class DeferredCommandList {
|
|||
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
inline void D3DSetComputeRootConstantBufferView(
|
||||
void D3DSetComputeRootConstantBufferView(
|
||||
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
|
||||
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
|
||||
WriteCommand(Command::kD3DSetComputeRootConstantBufferView,
|
||||
|
@ -260,7 +260,7 @@ class DeferredCommandList {
|
|||
args.buffer_location = buffer_location;
|
||||
}
|
||||
|
||||
inline void D3DSetGraphicsRootConstantBufferView(
|
||||
void D3DSetGraphicsRootConstantBufferView(
|
||||
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
|
||||
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
|
||||
WriteCommand(Command::kD3DSetGraphicsRootConstantBufferView,
|
||||
|
@ -269,7 +269,7 @@ class DeferredCommandList {
|
|||
args.buffer_location = buffer_location;
|
||||
}
|
||||
|
||||
inline void D3DSetComputeRootDescriptorTable(
|
||||
void D3DSetComputeRootDescriptorTable(
|
||||
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
|
||||
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
|
||||
WriteCommand(Command::kD3DSetComputeRootDescriptorTable,
|
||||
|
@ -278,7 +278,7 @@ class DeferredCommandList {
|
|||
args.base_descriptor.ptr = base_descriptor.ptr;
|
||||
}
|
||||
|
||||
inline void D3DSetGraphicsRootDescriptorTable(
|
||||
void D3DSetGraphicsRootDescriptorTable(
|
||||
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
|
||||
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
|
||||
WriteCommand(Command::kD3DSetGraphicsRootDescriptorTable,
|
||||
|
@ -287,42 +287,40 @@ class DeferredCommandList {
|
|||
args.base_descriptor.ptr = base_descriptor.ptr;
|
||||
}
|
||||
|
||||
inline void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) {
|
||||
void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) {
|
||||
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
|
||||
Command::kD3DSetComputeRootSignature, sizeof(ID3D12RootSignature*)));
|
||||
arg = root_signature;
|
||||
}
|
||||
|
||||
inline void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) {
|
||||
void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) {
|
||||
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
|
||||
Command::kD3DSetGraphicsRootSignature, sizeof(ID3D12RootSignature*)));
|
||||
arg = root_signature;
|
||||
}
|
||||
|
||||
inline void SetDescriptorHeaps(
|
||||
ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
|
||||
ID3D12DescriptorHeap* sampler_descriptor_heap) {
|
||||
void SetDescriptorHeaps(ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
|
||||
ID3D12DescriptorHeap* sampler_descriptor_heap) {
|
||||
auto& args = *reinterpret_cast<SetDescriptorHeapsArguments*>(WriteCommand(
|
||||
Command::kSetDescriptorHeaps, sizeof(SetDescriptorHeapsArguments)));
|
||||
args.cbv_srv_uav_descriptor_heap = cbv_srv_uav_descriptor_heap;
|
||||
args.sampler_descriptor_heap = sampler_descriptor_heap;
|
||||
}
|
||||
|
||||
inline void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) {
|
||||
void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) {
|
||||
auto& arg = *reinterpret_cast<ID3D12PipelineState**>(WriteCommand(
|
||||
Command::kD3DSetPipelineState, sizeof(ID3D12PipelineState*)));
|
||||
arg = pipeline_state;
|
||||
}
|
||||
|
||||
inline void SetPipelineStateHandle(void* pipeline_state_handle) {
|
||||
void SetPipelineStateHandle(void* pipeline_state_handle) {
|
||||
auto& arg = *reinterpret_cast<void**>(
|
||||
WriteCommand(Command::kSetPipelineStateHandle, sizeof(void*)));
|
||||
arg = pipeline_state_handle;
|
||||
}
|
||||
|
||||
inline void D3DSetSamplePositions(
|
||||
UINT num_samples_per_pixel, UINT num_pixels,
|
||||
const D3D12_SAMPLE_POSITION* sample_positions) {
|
||||
void D3DSetSamplePositions(UINT num_samples_per_pixel, UINT num_pixels,
|
||||
const D3D12_SAMPLE_POSITION* sample_positions) {
|
||||
auto& args = *reinterpret_cast<D3DSetSamplePositionsArguments*>(
|
||||
WriteCommand(Command::kD3DSetSamplePositions,
|
||||
sizeof(D3DSetSamplePositionsArguments)));
|
||||
|
|
|
@ -43,10 +43,10 @@ DEFINE_bool(
|
|||
"D3D12");
|
||||
DEFINE_int32(
|
||||
d3d12_pipeline_creation_threads, -1,
|
||||
"Number of threads used for graphics pipeline state object creation. -1 to "
|
||||
"calculate automatically (75% of logical CPU cores), a positive number to "
|
||||
"specify the number of threads explicitly (up to the number of logical CPU "
|
||||
"cores), 0 to disable multithreaded pipeline state object creation.",
|
||||
"Number of threads used for graphics pipeline creation. -1 to calculate "
|
||||
"automatically (75% of logical CPU cores), a positive number to specify "
|
||||
"the number of threads explicitly (up to the number of logical CPU cores), "
|
||||
"0 to disable multithreaded pipeline creation.",
|
||||
"D3D12");
|
||||
DEFINE_bool(d3d12_tessellation_wireframe, false,
|
||||
"Display tessellated surfaces as wireframe for debugging.",
|
||||
|
@ -125,8 +125,8 @@ bool PipelineCache::Initialize() {
|
|||
logical_processor_count = 6;
|
||||
}
|
||||
// Initialize creation thread synchronization data even if not using creation
|
||||
// threads because they may be used anyway to create pipeline state objects
|
||||
// from the storage.
|
||||
// threads because they may be used anyway to create pipelines from the
|
||||
// storage.
|
||||
creation_threads_busy_ = 0;
|
||||
creation_completion_event_ =
|
||||
xe::threading::Event::CreateManualResetEvent(true);
|
||||
|
@ -145,7 +145,7 @@ bool PipelineCache::Initialize() {
|
|||
for (size_t i = 0; i < creation_thread_count; ++i) {
|
||||
std::unique_ptr<xe::threading::Thread> creation_thread =
|
||||
xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); });
|
||||
creation_thread->set_name("D3D12 Pipeline States");
|
||||
creation_thread->set_name("D3D12 Pipelines");
|
||||
creation_threads_.push_back(std::move(creation_thread));
|
||||
}
|
||||
}
|
||||
|
@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
}
|
||||
ShutdownShaderStorage();
|
||||
|
||||
// Remove references to the current pipeline state object.
|
||||
current_pipeline_state_ = nullptr;
|
||||
// Remove references to the current pipeline.
|
||||
current_pipeline_ = nullptr;
|
||||
|
||||
if (!creation_threads_.empty()) {
|
||||
// Empty the pipeline state object creation queue and make sure there are no
|
||||
// threads currently creating pipeline state objects because pipeline states
|
||||
// are going to be deleted.
|
||||
// Empty the pipeline creation queue and make sure there are no threads
|
||||
// currently creating pipelines because pipelines are going to be deleted.
|
||||
bool await_creation_completion_event = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
|
@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
}
|
||||
}
|
||||
|
||||
// Destroy all pipeline state objects.
|
||||
for (auto it : pipeline_states_) {
|
||||
// Destroy all pipelines.
|
||||
for (auto it : pipelines_) {
|
||||
it.second->state->Release();
|
||||
delete it.second;
|
||||
}
|
||||
pipeline_states_.clear();
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0);
|
||||
pipelines_.clear();
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
|
||||
|
||||
// Destroy all shaders.
|
||||
command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated();
|
||||
|
@ -223,10 +222,10 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
}
|
||||
texture_binding_layout_map_.clear();
|
||||
texture_binding_layouts_.clear();
|
||||
for (auto it : shader_map_) {
|
||||
for (auto it : shaders_) {
|
||||
delete it.second;
|
||||
}
|
||||
shader_map_.clear();
|
||||
shaders_.clear();
|
||||
|
||||
if (reinitialize_shader_storage) {
|
||||
InitializeShaderStorage(shader_storage_root, shader_storage_title_id,
|
||||
|
@ -374,8 +373,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
size_t ucode_byte_count =
|
||||
shader_header.ucode_dword_count * sizeof(uint32_t);
|
||||
if (shader_map_.find(shader_header.ucode_data_hash) !=
|
||||
shader_map_.end()) {
|
||||
if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) {
|
||||
// Already added - usually shaders aren't added without the intention of
|
||||
// translating them imminently, so don't do additional checks to
|
||||
// actually ensure that translation happens right now (they would cause
|
||||
|
@ -402,7 +400,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
D3D12Shader* shader =
|
||||
new D3D12Shader(shader_header.type, ucode_data_hash,
|
||||
ucode_dwords.data(), shader_header.ucode_dword_count);
|
||||
shader_map_.insert({ucode_data_hash, shader});
|
||||
shaders_.emplace(ucode_data_hash, shader);
|
||||
// Create new threads if the currently existing threads can't keep up with
|
||||
// file reading, but not more than the number of logical processors minus
|
||||
// one.
|
||||
|
@ -439,7 +437,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
shader_translation_threads.clear();
|
||||
for (D3D12Shader* shader : shaders_failed_to_translate) {
|
||||
shader_map_.erase(shader->ucode_data_hash());
|
||||
shaders_.erase(shader->ucode_data_hash());
|
||||
delete shader;
|
||||
}
|
||||
}
|
||||
|
@ -460,72 +458,66 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
|
||||
// 'DXRO' or 'DXRT'.
|
||||
const uint32_t pipeline_state_storage_magic_api =
|
||||
const uint32_t pipeline_storage_magic_api =
|
||||
edram_rov_used_ ? 0x4F525844 : 0x54525844;
|
||||
|
||||
// Initialize the pipeline state storage stream.
|
||||
uint64_t pipeline_state_storage_initialization_start_ =
|
||||
// Initialize the pipeline storage stream.
|
||||
uint64_t pipeline_storage_initialization_start_ =
|
||||
xe::Clock::QueryHostTickCount();
|
||||
auto pipeline_state_storage_file_path =
|
||||
auto pipeline_storage_file_path =
|
||||
shader_storage_shareable_root /
|
||||
fmt::format("{:08X}.{}.d3d12.xpso", title_id,
|
||||
edram_rov_used_ ? "rov" : "rtv");
|
||||
pipeline_state_storage_file_ =
|
||||
xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b");
|
||||
if (!pipeline_state_storage_file_) {
|
||||
pipeline_storage_file_ =
|
||||
xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b");
|
||||
if (!pipeline_storage_file_) {
|
||||
XELOGE(
|
||||
"Failed to open the Direct3D 12 pipeline state description storage "
|
||||
"file for writing, persistent shader storage will be disabled: {}",
|
||||
xe::path_to_utf8(pipeline_state_storage_file_path));
|
||||
"Failed to open the Direct3D 12 pipeline description storage file for "
|
||||
"writing, persistent shader storage will be disabled: {}",
|
||||
xe::path_to_utf8(pipeline_storage_file_path));
|
||||
fclose(shader_storage_file_);
|
||||
shader_storage_file_ = nullptr;
|
||||
return;
|
||||
}
|
||||
pipeline_state_storage_file_flush_needed_ = false;
|
||||
pipeline_storage_file_flush_needed_ = false;
|
||||
// 'XEPS'.
|
||||
const uint32_t pipeline_state_storage_magic = 0x53504558;
|
||||
const uint32_t pipeline_storage_magic = 0x53504558;
|
||||
struct {
|
||||
uint32_t magic;
|
||||
uint32_t magic_api;
|
||||
uint32_t version_swapped;
|
||||
} pipeline_state_storage_file_header;
|
||||
if (fread(&pipeline_state_storage_file_header,
|
||||
sizeof(pipeline_state_storage_file_header), 1,
|
||||
pipeline_state_storage_file_) &&
|
||||
pipeline_state_storage_file_header.magic ==
|
||||
pipeline_state_storage_magic &&
|
||||
pipeline_state_storage_file_header.magic_api ==
|
||||
pipeline_state_storage_magic_api &&
|
||||
xe::byte_swap(pipeline_state_storage_file_header.version_swapped) ==
|
||||
} pipeline_storage_file_header;
|
||||
if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
|
||||
1, pipeline_storage_file_) &&
|
||||
pipeline_storage_file_header.magic == pipeline_storage_magic &&
|
||||
pipeline_storage_file_header.magic_api == pipeline_storage_magic_api &&
|
||||
xe::byte_swap(pipeline_storage_file_header.version_swapped) ==
|
||||
PipelineDescription::kVersion) {
|
||||
uint64_t pipeline_state_storage_valid_bytes =
|
||||
sizeof(pipeline_state_storage_file_header);
|
||||
// Enqueue pipeline state descriptions written by previous Xenia executions
|
||||
// until the end of the file or until a corrupted one is detected.
|
||||
xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END);
|
||||
int64_t pipeline_state_storage_told_end =
|
||||
xe::filesystem::Tell(pipeline_state_storage_file_);
|
||||
size_t pipeline_state_storage_told_count =
|
||||
size_t(pipeline_state_storage_told_end >=
|
||||
int64_t(pipeline_state_storage_valid_bytes)
|
||||
? (uint64_t(pipeline_state_storage_told_end) -
|
||||
pipeline_state_storage_valid_bytes) /
|
||||
sizeof(PipelineStoredDescription)
|
||||
: 0);
|
||||
if (pipeline_state_storage_told_count &&
|
||||
xe::filesystem::Seek(pipeline_state_storage_file_,
|
||||
int64_t(pipeline_state_storage_valid_bytes),
|
||||
SEEK_SET)) {
|
||||
uint64_t pipeline_storage_valid_bytes =
|
||||
sizeof(pipeline_storage_file_header);
|
||||
// Enqueue pipeline descriptions written by previous Xenia executions until
|
||||
// the end of the file or until a corrupted one is detected.
|
||||
xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END);
|
||||
int64_t pipeline_storage_told_end =
|
||||
xe::filesystem::Tell(pipeline_storage_file_);
|
||||
size_t pipeline_storage_told_count = size_t(
|
||||
pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes)
|
||||
? (uint64_t(pipeline_storage_told_end) -
|
||||
pipeline_storage_valid_bytes) /
|
||||
sizeof(PipelineStoredDescription)
|
||||
: 0);
|
||||
if (pipeline_storage_told_count &&
|
||||
xe::filesystem::Seek(pipeline_storage_file_,
|
||||
int64_t(pipeline_storage_valid_bytes), SEEK_SET)) {
|
||||
std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
|
||||
pipeline_stored_descriptions.resize(pipeline_state_storage_told_count);
|
||||
pipeline_stored_descriptions.resize(fread(
|
||||
pipeline_stored_descriptions.data(),
|
||||
sizeof(PipelineStoredDescription), pipeline_state_storage_told_count,
|
||||
pipeline_state_storage_file_));
|
||||
pipeline_stored_descriptions.resize(pipeline_storage_told_count);
|
||||
pipeline_stored_descriptions.resize(
|
||||
fread(pipeline_stored_descriptions.data(),
|
||||
sizeof(PipelineStoredDescription), pipeline_storage_told_count,
|
||||
pipeline_storage_file_));
|
||||
if (!pipeline_stored_descriptions.empty()) {
|
||||
// Launch additional creation threads to use all cores to create
|
||||
// pipeline state objects faster. Will also be using the main thread, so
|
||||
// minus 1.
|
||||
// pipelines faster. Will also be using the main thread, so minus 1.
|
||||
size_t creation_thread_original_count = creation_threads_.size();
|
||||
size_t creation_thread_needed_count =
|
||||
std::max(std::min(pipeline_stored_descriptions.size(),
|
||||
|
@ -539,10 +531,10 @@ void PipelineCache::InitializeShaderStorage(
|
|||
{}, [this, creation_thread_index]() {
|
||||
CreationThread(creation_thread_index);
|
||||
});
|
||||
creation_thread->set_name("D3D12 Pipeline States Additional");
|
||||
creation_thread->set_name("D3D12 Pipelines");
|
||||
creation_threads_.push_back(std::move(creation_thread));
|
||||
}
|
||||
size_t pipeline_states_created = 0;
|
||||
size_t pipelines_created = 0;
|
||||
for (const PipelineStoredDescription& pipeline_stored_description :
|
||||
pipeline_stored_descriptions) {
|
||||
const PipelineDescription& pipeline_description =
|
||||
|
@ -554,30 +546,28 @@ void PipelineCache::InitializeShaderStorage(
|
|||
0) != pipeline_stored_description.description_hash) {
|
||||
break;
|
||||
}
|
||||
pipeline_state_storage_valid_bytes +=
|
||||
sizeof(PipelineStoredDescription);
|
||||
// Skip already known pipeline states - those have already been
|
||||
// enqueued.
|
||||
auto found_range = pipeline_states_.equal_range(
|
||||
pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription);
|
||||
// Skip already known pipelines - those have already been enqueued.
|
||||
auto found_range = pipelines_.equal_range(
|
||||
pipeline_stored_description.description_hash);
|
||||
bool pipeline_state_found = false;
|
||||
bool pipeline_found = false;
|
||||
for (auto it = found_range.first; it != found_range.second; ++it) {
|
||||
PipelineState* found_pipeline_state = it->second;
|
||||
if (!std::memcmp(&found_pipeline_state->description.description,
|
||||
Pipeline* found_pipeline = it->second;
|
||||
if (!std::memcmp(&found_pipeline->description.description,
|
||||
&pipeline_description,
|
||||
sizeof(pipeline_description))) {
|
||||
pipeline_state_found = true;
|
||||
pipeline_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pipeline_state_found) {
|
||||
if (pipeline_found) {
|
||||
continue;
|
||||
}
|
||||
|
||||
PipelineRuntimeDescription pipeline_runtime_description;
|
||||
auto vertex_shader_it =
|
||||
shader_map_.find(pipeline_description.vertex_shader_hash);
|
||||
if (vertex_shader_it == shader_map_.end()) {
|
||||
shaders_.find(pipeline_description.vertex_shader_hash);
|
||||
if (vertex_shader_it == shaders_.end()) {
|
||||
continue;
|
||||
}
|
||||
pipeline_runtime_description.vertex_shader = vertex_shader_it->second;
|
||||
|
@ -586,8 +576,8 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
if (pipeline_description.pixel_shader_hash) {
|
||||
auto pixel_shader_it =
|
||||
shader_map_.find(pipeline_description.pixel_shader_hash);
|
||||
if (pixel_shader_it == shader_map_.end()) {
|
||||
shaders_.find(pipeline_description.pixel_shader_hash);
|
||||
if (pixel_shader_it == shaders_.end()) {
|
||||
continue;
|
||||
}
|
||||
pipeline_runtime_description.pixel_shader = pixel_shader_it->second;
|
||||
|
@ -607,36 +597,33 @@ void PipelineCache::InitializeShaderStorage(
|
|||
std::memcpy(&pipeline_runtime_description.description,
|
||||
&pipeline_description, sizeof(pipeline_description));
|
||||
|
||||
PipelineState* new_pipeline_state = new PipelineState;
|
||||
new_pipeline_state->state = nullptr;
|
||||
std::memcpy(&new_pipeline_state->description,
|
||||
&pipeline_runtime_description,
|
||||
Pipeline* new_pipeline = new Pipeline;
|
||||
new_pipeline->state = nullptr;
|
||||
std::memcpy(&new_pipeline->description, &pipeline_runtime_description,
|
||||
sizeof(pipeline_runtime_description));
|
||||
pipeline_states_.insert(
|
||||
std::make_pair(pipeline_stored_description.description_hash,
|
||||
new_pipeline_state));
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
|
||||
pipeline_states_.size());
|
||||
pipelines_.emplace(pipeline_stored_description.description_hash,
|
||||
new_pipeline);
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
|
||||
if (!creation_threads_.empty()) {
|
||||
// Submit the pipeline for creation to any available thread.
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
creation_queue_.push_back(new_pipeline_state);
|
||||
creation_queue_.push_back(new_pipeline);
|
||||
}
|
||||
creation_request_cond_.notify_one();
|
||||
} else {
|
||||
new_pipeline_state->state =
|
||||
CreateD3D12PipelineState(pipeline_runtime_description);
|
||||
new_pipeline->state =
|
||||
CreateD3D12Pipeline(pipeline_runtime_description);
|
||||
}
|
||||
++pipeline_states_created;
|
||||
++pipelines_created;
|
||||
}
|
||||
CreateQueuedPipelineStatesOnProcessorThread();
|
||||
CreateQueuedPipelinesOnProcessorThread();
|
||||
if (creation_threads_.size() > creation_thread_original_count) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
creation_threads_shutdown_from_ = creation_thread_original_count;
|
||||
// Assuming the queue is empty because of
|
||||
// CreateQueuedPipelineStatesOnProcessorThread.
|
||||
// CreateQueuedPipelinesOnProcessorThread.
|
||||
}
|
||||
creation_request_cond_.notify_all();
|
||||
while (creation_threads_.size() > creation_thread_original_count) {
|
||||
|
@ -664,26 +651,23 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
}
|
||||
XELOGGPU(
|
||||
"Created {} graphics pipeline state objects from the storage in {} "
|
||||
"milliseconds",
|
||||
pipeline_states_created,
|
||||
"Created {} graphics pipelines from the storage in {} milliseconds",
|
||||
pipelines_created,
|
||||
(xe::Clock::QueryHostTickCount() -
|
||||
pipeline_state_storage_initialization_start_) *
|
||||
pipeline_storage_initialization_start_) *
|
||||
1000 / xe::Clock::QueryHostTickFrequency());
|
||||
}
|
||||
}
|
||||
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_,
|
||||
pipeline_state_storage_valid_bytes);
|
||||
xe::filesystem::TruncateStdioFile(pipeline_storage_file_,
|
||||
pipeline_storage_valid_bytes);
|
||||
} else {
|
||||
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0);
|
||||
pipeline_state_storage_file_header.magic = pipeline_state_storage_magic;
|
||||
pipeline_state_storage_file_header.magic_api =
|
||||
pipeline_state_storage_magic_api;
|
||||
pipeline_state_storage_file_header.version_swapped =
|
||||
xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0);
|
||||
pipeline_storage_file_header.magic = pipeline_storage_magic;
|
||||
pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
|
||||
pipeline_storage_file_header.version_swapped =
|
||||
xe::byte_swap(PipelineDescription::kVersion);
|
||||
fwrite(&pipeline_state_storage_file_header,
|
||||
sizeof(pipeline_state_storage_file_header), 1,
|
||||
pipeline_state_storage_file_);
|
||||
fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
|
||||
1, pipeline_storage_file_);
|
||||
}
|
||||
|
||||
shader_storage_root_ = storage_root;
|
||||
|
@ -691,7 +675,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
|
||||
// Start the storage writing thread.
|
||||
storage_write_flush_shaders_ = false;
|
||||
storage_write_flush_pipeline_states_ = false;
|
||||
storage_write_flush_pipelines_ = false;
|
||||
storage_write_thread_shutdown_ = false;
|
||||
storage_write_thread_ =
|
||||
xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); });
|
||||
|
@ -708,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() {
|
|||
storage_write_thread_.reset();
|
||||
}
|
||||
storage_write_shader_queue_.clear();
|
||||
storage_write_pipeline_state_queue_.clear();
|
||||
storage_write_pipeline_queue_.clear();
|
||||
|
||||
if (pipeline_state_storage_file_) {
|
||||
fclose(pipeline_state_storage_file_);
|
||||
pipeline_state_storage_file_ = nullptr;
|
||||
pipeline_state_storage_file_flush_needed_ = false;
|
||||
if (pipeline_storage_file_) {
|
||||
fclose(pipeline_storage_file_);
|
||||
pipeline_storage_file_ = nullptr;
|
||||
pipeline_storage_file_flush_needed_ = false;
|
||||
}
|
||||
|
||||
if (shader_storage_file_) {
|
||||
|
@ -728,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() {
|
|||
|
||||
void PipelineCache::EndSubmission() {
|
||||
if (shader_storage_file_flush_needed_ ||
|
||||
pipeline_state_storage_file_flush_needed_) {
|
||||
pipeline_storage_file_flush_needed_) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
if (shader_storage_file_flush_needed_) {
|
||||
storage_write_flush_shaders_ = true;
|
||||
}
|
||||
if (pipeline_state_storage_file_flush_needed_) {
|
||||
storage_write_flush_pipeline_states_ = true;
|
||||
if (pipeline_storage_file_flush_needed_) {
|
||||
storage_write_flush_pipelines_ = true;
|
||||
}
|
||||
}
|
||||
storage_write_request_cond_.notify_one();
|
||||
shader_storage_file_flush_needed_ = false;
|
||||
pipeline_state_storage_file_flush_needed_ = false;
|
||||
pipeline_storage_file_flush_needed_ = false;
|
||||
}
|
||||
if (!creation_threads_.empty()) {
|
||||
CreateQueuedPipelineStatesOnProcessorThread();
|
||||
// Await creation of all queued pipeline state objects.
|
||||
CreateQueuedPipelinesOnProcessorThread();
|
||||
// Await creation of all queued pipelines.
|
||||
bool await_creation_completion_event;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
// Assuming the creation queue is already empty (because the processor
|
||||
// thread also worked on creating the leftover pipeline state objects), so
|
||||
// only check if there are threads with pipeline state objects currently
|
||||
// being created.
|
||||
// thread also worked on creating the leftover pipelines), so only check
|
||||
// if there are threads with pipelines currently being created.
|
||||
await_creation_completion_event = creation_threads_busy_ != 0;
|
||||
if (await_creation_completion_event) {
|
||||
creation_completion_event_->Reset();
|
||||
|
@ -765,7 +748,7 @@ void PipelineCache::EndSubmission() {
|
|||
}
|
||||
}
|
||||
|
||||
bool PipelineCache::IsCreatingPipelineStates() {
|
||||
bool PipelineCache::IsCreatingPipelines() {
|
||||
if (creation_threads_.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -779,8 +762,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
uint32_t dword_count) {
|
||||
// Hash the input memory and lookup the shader.
|
||||
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||
auto it = shader_map_.find(data_hash);
|
||||
if (it != shader_map_.end()) {
|
||||
auto it = shaders_.find(data_hash);
|
||||
if (it != shaders_.end()) {
|
||||
// Shader has been previously loaded.
|
||||
return it->second;
|
||||
}
|
||||
|
@ -790,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
// again.
|
||||
D3D12Shader* shader =
|
||||
new D3D12Shader(shader_type, data_hash, host_address, dword_count);
|
||||
shader_map_.insert({data_hash, shader});
|
||||
shaders_.emplace(data_hash, shader);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
@ -798,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid()
|
||||
const {
|
||||
// If the values this functions returns are changed, INVALIDATE THE SHADER
|
||||
// STORAGE (increase kVersion for BOTH shaders and pipeline states)! The
|
||||
// exception is when the function originally returned "unsupported", but
|
||||
// started to return a valid value (in this case the shader wouldn't be cached
|
||||
// in the first place). Otherwise games will not be able to locate shaders for
|
||||
// draws for which the host vertex shader type has changed!
|
||||
// STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception
|
||||
// is when the function originally returned "unsupported", but started to
|
||||
// return a valid value (in this case the shader wouldn't be cached in the
|
||||
// first place). Otherwise games will not be able to locate shaders for draws
|
||||
// for which the host vertex shader type has changed!
|
||||
const auto& regs = register_file_;
|
||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||
if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
|
||||
|
@ -929,13 +912,12 @@ bool PipelineCache::ConfigurePipeline(
|
|||
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
|
||||
bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
void** pipeline_state_handle_out,
|
||||
ID3D12RootSignature** root_signature_out) {
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
assert_not_null(pipeline_state_handle_out);
|
||||
assert_not_null(pipeline_handle_out);
|
||||
assert_not_null(root_signature_out);
|
||||
|
||||
PipelineRuntimeDescription runtime_description;
|
||||
|
@ -946,24 +928,24 @@ bool PipelineCache::ConfigurePipeline(
|
|||
}
|
||||
PipelineDescription& description = runtime_description.description;
|
||||
|
||||
if (current_pipeline_state_ != nullptr &&
|
||||
!std::memcmp(¤t_pipeline_state_->description.description,
|
||||
&description, sizeof(description))) {
|
||||
*pipeline_state_handle_out = current_pipeline_state_;
|
||||
if (current_pipeline_ != nullptr &&
|
||||
!std::memcmp(¤t_pipeline_->description.description, &description,
|
||||
sizeof(description))) {
|
||||
*pipeline_handle_out = current_pipeline_;
|
||||
*root_signature_out = runtime_description.root_signature;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Find an existing pipeline state object in the cache.
|
||||
// Find an existing pipeline in the cache.
|
||||
uint64_t hash = XXH64(&description, sizeof(description), 0);
|
||||
auto found_range = pipeline_states_.equal_range(hash);
|
||||
auto found_range = pipelines_.equal_range(hash);
|
||||
for (auto it = found_range.first; it != found_range.second; ++it) {
|
||||
PipelineState* found_pipeline_state = it->second;
|
||||
if (!std::memcmp(&found_pipeline_state->description.description,
|
||||
&description, sizeof(description))) {
|
||||
current_pipeline_state_ = found_pipeline_state;
|
||||
*pipeline_state_handle_out = found_pipeline_state;
|
||||
*root_signature_out = found_pipeline_state->description.root_signature;
|
||||
Pipeline* found_pipeline = it->second;
|
||||
if (!std::memcmp(&found_pipeline->description.description, &description,
|
||||
sizeof(description))) {
|
||||
current_pipeline_ = found_pipeline;
|
||||
*pipeline_handle_out = found_pipeline;
|
||||
*root_signature_out = found_pipeline->description.root_signature;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -974,33 +956,32 @@ bool PipelineCache::ConfigurePipeline(
|
|||
return false;
|
||||
}
|
||||
|
||||
PipelineState* new_pipeline_state = new PipelineState;
|
||||
new_pipeline_state->state = nullptr;
|
||||
std::memcpy(&new_pipeline_state->description, &runtime_description,
|
||||
Pipeline* new_pipeline = new Pipeline;
|
||||
new_pipeline->state = nullptr;
|
||||
std::memcpy(&new_pipeline->description, &runtime_description,
|
||||
sizeof(runtime_description));
|
||||
pipeline_states_.insert(std::make_pair(hash, new_pipeline_state));
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
|
||||
pipeline_states_.size());
|
||||
pipelines_.emplace(hash, new_pipeline);
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
|
||||
|
||||
if (!creation_threads_.empty()) {
|
||||
// Submit the pipeline state object for creation to any available thread.
|
||||
// Submit the pipeline for creation to any available thread.
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
creation_queue_.push_back(new_pipeline_state);
|
||||
creation_queue_.push_back(new_pipeline);
|
||||
}
|
||||
creation_request_cond_.notify_one();
|
||||
} else {
|
||||
new_pipeline_state->state = CreateD3D12PipelineState(runtime_description);
|
||||
new_pipeline->state = CreateD3D12Pipeline(runtime_description);
|
||||
}
|
||||
|
||||
if (pipeline_state_storage_file_) {
|
||||
if (pipeline_storage_file_) {
|
||||
assert_not_null(storage_write_thread_);
|
||||
pipeline_state_storage_file_flush_needed_ = true;
|
||||
pipeline_storage_file_flush_needed_ = true;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
storage_write_pipeline_state_queue_.emplace_back();
|
||||
storage_write_pipeline_queue_.emplace_back();
|
||||
PipelineStoredDescription& stored_description =
|
||||
storage_write_pipeline_state_queue_.back();
|
||||
storage_write_pipeline_queue_.back();
|
||||
stored_description.description_hash = hash;
|
||||
std::memcpy(&stored_description.description, &description,
|
||||
sizeof(description));
|
||||
|
@ -1008,8 +989,8 @@ bool PipelineCache::ConfigurePipeline(
|
|||
storage_write_request_cond_.notify_all();
|
||||
}
|
||||
|
||||
current_pipeline_state_ = new_pipeline_state;
|
||||
*pipeline_state_handle_out = new_pipeline_state;
|
||||
current_pipeline_ = new_pipeline;
|
||||
*pipeline_handle_out = new_pipeline;
|
||||
*root_signature_out = runtime_description.root_signature;
|
||||
return true;
|
||||
}
|
||||
|
@ -1136,8 +1117,8 @@ bool PipelineCache::TranslateShader(
|
|||
std::memcpy(
|
||||
texture_binding_layouts_.data() + new_uid.vector_span_offset,
|
||||
texture_bindings, texture_binding_layout_bytes);
|
||||
texture_binding_layout_map_.insert(
|
||||
{texture_binding_layout_hash, new_uid});
|
||||
texture_binding_layout_map_.emplace(texture_binding_layout_hash,
|
||||
new_uid);
|
||||
}
|
||||
}
|
||||
if (bindless_sampler_count) {
|
||||
|
@ -1179,8 +1160,8 @@ bool PipelineCache::TranslateShader(
|
|||
vector_bindless_sampler_layout[i] =
|
||||
sampler_bindings[i].bindless_descriptor_index;
|
||||
}
|
||||
bindless_sampler_layout_map_.insert(
|
||||
{bindless_sampler_layout_hash, new_uid});
|
||||
bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash,
|
||||
new_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1508,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
|
||||
};
|
||||
// Like kBlendFactorMap, but with color modes changed to alpha. Some
|
||||
// pipeline state objects aren't created in Prey because a color mode is
|
||||
// used for alpha.
|
||||
// pipelines aren't created in Prey because a color mode is used for alpha.
|
||||
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
|
||||
/* 0 */ PipelineBlendFactor::kZero,
|
||||
/* 1 */ PipelineBlendFactor::kOne,
|
||||
|
@ -1569,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
return true;
|
||||
}
|
||||
|
||||
ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
|
||||
ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
|
||||
const PipelineRuntimeDescription& runtime_description) {
|
||||
const PipelineDescription& description = runtime_description.description;
|
||||
|
||||
if (runtime_description.pixel_shader != nullptr) {
|
||||
XELOGGPU(
|
||||
"Creating graphics pipeline state with VS {:016X}"
|
||||
", PS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash(),
|
||||
runtime_description.pixel_shader->ucode_data_hash());
|
||||
XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash(),
|
||||
runtime_description.pixel_shader->ucode_data_hash());
|
||||
} else {
|
||||
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
|
||||
XELOGGPU("Creating graphics pipeline with VS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash());
|
||||
}
|
||||
|
||||
|
@ -1893,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
|
|||
}
|
||||
}
|
||||
|
||||
// Create the pipeline state object.
|
||||
// Create the D3D12 pipeline state object.
|
||||
auto device =
|
||||
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
|
||||
ID3D12PipelineState* state;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(&state_desc,
|
||||
IID_PPV_ARGS(&state)))) {
|
||||
if (runtime_description.pixel_shader != nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create graphics pipeline state with VS {:016X}"
|
||||
", PS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash(),
|
||||
runtime_description.pixel_shader->ucode_data_hash());
|
||||
XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash(),
|
||||
runtime_description.pixel_shader->ucode_data_hash());
|
||||
} else {
|
||||
XELOGE("Failed to create graphics pipeline state with VS {:016X}",
|
||||
XELOGE("Failed to create graphics pipeline with VS {:016X}",
|
||||
runtime_description.vertex_shader->ucode_data_hash());
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -1933,7 +1909,7 @@ void PipelineCache::StorageWriteThread() {
|
|||
ucode_guest_endian.reserve(0xFFFF);
|
||||
|
||||
bool flush_shaders = false;
|
||||
bool flush_pipeline_states = false;
|
||||
bool flush_pipelines = false;
|
||||
|
||||
while (true) {
|
||||
if (flush_shaders) {
|
||||
|
@ -1941,15 +1917,15 @@ void PipelineCache::StorageWriteThread() {
|
|||
assert_not_null(shader_storage_file_);
|
||||
fflush(shader_storage_file_);
|
||||
}
|
||||
if (flush_pipeline_states) {
|
||||
flush_pipeline_states = false;
|
||||
assert_not_null(pipeline_state_storage_file_);
|
||||
fflush(pipeline_state_storage_file_);
|
||||
if (flush_pipelines) {
|
||||
flush_pipelines = false;
|
||||
assert_not_null(pipeline_storage_file_);
|
||||
fflush(pipeline_storage_file_);
|
||||
}
|
||||
|
||||
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
|
||||
PipelineStoredDescription pipeline_description;
|
||||
bool write_pipeline_state = false;
|
||||
bool write_pipeline = false;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(storage_write_request_lock_);
|
||||
if (storage_write_thread_shutdown_) {
|
||||
|
@ -1962,17 +1938,17 @@ void PipelineCache::StorageWriteThread() {
|
|||
storage_write_flush_shaders_ = false;
|
||||
flush_shaders = true;
|
||||
}
|
||||
if (!storage_write_pipeline_state_queue_.empty()) {
|
||||
if (!storage_write_pipeline_queue_.empty()) {
|
||||
std::memcpy(&pipeline_description,
|
||||
&storage_write_pipeline_state_queue_.front(),
|
||||
&storage_write_pipeline_queue_.front(),
|
||||
sizeof(pipeline_description));
|
||||
storage_write_pipeline_state_queue_.pop_front();
|
||||
write_pipeline_state = true;
|
||||
} else if (storage_write_flush_pipeline_states_) {
|
||||
storage_write_flush_pipeline_states_ = false;
|
||||
flush_pipeline_states = true;
|
||||
storage_write_pipeline_queue_.pop_front();
|
||||
write_pipeline = true;
|
||||
} else if (storage_write_flush_pipelines_) {
|
||||
storage_write_flush_pipelines_ = false;
|
||||
flush_pipelines = true;
|
||||
}
|
||||
if (!shader_pair.first && !write_pipeline_state) {
|
||||
if (!shader_pair.first && !write_pipeline) {
|
||||
storage_write_request_cond_.wait(lock);
|
||||
continue;
|
||||
}
|
||||
|
@ -1999,27 +1975,26 @@ void PipelineCache::StorageWriteThread() {
|
|||
}
|
||||
}
|
||||
|
||||
if (write_pipeline_state) {
|
||||
assert_not_null(pipeline_state_storage_file_);
|
||||
if (write_pipeline) {
|
||||
assert_not_null(pipeline_storage_file_);
|
||||
fwrite(&pipeline_description, sizeof(pipeline_description), 1,
|
||||
pipeline_state_storage_file_);
|
||||
pipeline_storage_file_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::CreationThread(size_t thread_index) {
|
||||
while (true) {
|
||||
PipelineState* pipeline_state_to_create = nullptr;
|
||||
Pipeline* pipeline_to_create = nullptr;
|
||||
|
||||
// Check if need to shut down or set the completion event and dequeue the
|
||||
// pipeline state if there is any.
|
||||
// pipeline if there is any.
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(creation_request_lock_);
|
||||
if (thread_index >= creation_threads_shutdown_from_ ||
|
||||
creation_queue_.empty()) {
|
||||
if (creation_completion_set_event_ && creation_threads_busy_ == 0) {
|
||||
// Last pipeline state object in the queue created - signal the event
|
||||
// if requested.
|
||||
// Last pipeline in the queue created - signal the event if requested.
|
||||
creation_completion_set_event_ = false;
|
||||
creation_completion_event_->Set();
|
||||
}
|
||||
|
@ -2029,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) {
|
|||
creation_request_cond_.wait(lock);
|
||||
continue;
|
||||
}
|
||||
// Take the pipeline state from the queue and increment the busy thread
|
||||
// count until the pipeline state object is created - other threads must
|
||||
// be able to dequeue requests, but can't set the completion event until
|
||||
// the pipeline state objects are fully created (rather than just started
|
||||
// creating).
|
||||
pipeline_state_to_create = creation_queue_.front();
|
||||
// Take the pipeline from the queue and increment the busy thread count
|
||||
// until the pipeline is created - other threads must be able to dequeue
|
||||
// requests, but can't set the completion event until the pipelines are
|
||||
// fully created (rather than just started creating).
|
||||
pipeline_to_create = creation_queue_.front();
|
||||
creation_queue_.pop_front();
|
||||
++creation_threads_busy_;
|
||||
}
|
||||
|
||||
// Create the D3D12 pipeline state object.
|
||||
pipeline_state_to_create->state =
|
||||
CreateD3D12PipelineState(pipeline_state_to_create->description);
|
||||
pipeline_to_create->state =
|
||||
CreateD3D12Pipeline(pipeline_to_create->description);
|
||||
|
||||
// Pipeline state object created - the thread is not busy anymore, safe to
|
||||
// set the completion event if needed (at the next iteration, or in some
|
||||
// other thread).
|
||||
// Pipeline created - the thread is not busy anymore, safe to set the
|
||||
// completion event if needed (at the next iteration, or in some other
|
||||
// thread).
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
--creation_threads_busy_;
|
||||
|
@ -2053,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) {
|
|||
}
|
||||
}
|
||||
|
||||
void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() {
|
||||
void PipelineCache::CreateQueuedPipelinesOnProcessorThread() {
|
||||
assert_false(creation_threads_.empty());
|
||||
while (true) {
|
||||
PipelineState* pipeline_state_to_create;
|
||||
Pipeline* pipeline_to_create;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
if (creation_queue_.empty()) {
|
||||
break;
|
||||
}
|
||||
pipeline_state_to_create = creation_queue_.front();
|
||||
pipeline_to_create = creation_queue_.front();
|
||||
creation_queue_.pop_front();
|
||||
}
|
||||
pipeline_state_to_create->state =
|
||||
CreateD3D12PipelineState(pipeline_state_to_create->description);
|
||||
pipeline_to_create->state =
|
||||
CreateD3D12Pipeline(pipeline_to_create->description);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -54,7 +55,7 @@ class PipelineCache {
|
|||
void ShutdownShaderStorage();
|
||||
|
||||
void EndSubmission();
|
||||
bool IsCreatingPipelineStates();
|
||||
bool IsCreatingPipelines();
|
||||
|
||||
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
|
@ -73,14 +74,12 @@ class PipelineCache {
|
|||
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
|
||||
bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
void** pipeline_state_handle_out,
|
||||
ID3D12RootSignature** root_signature_out);
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
|
||||
|
||||
// Returns a pipeline state object with deferred creation by its handle. May
|
||||
// return nullptr if failed to create the pipeline state object.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
|
||||
void* handle) const {
|
||||
return reinterpret_cast<const PipelineState*>(handle)->state;
|
||||
// Returns a pipeline with deferred creation by its handle. May return nullptr
|
||||
// if failed to create the pipeline.
|
||||
ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
|
||||
return reinterpret_cast<const Pipeline*>(handle)->state;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -237,7 +236,7 @@ class PipelineCache {
|
|||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
PipelineRuntimeDescription& runtime_description_out);
|
||||
|
||||
ID3D12PipelineState* CreateD3D12PipelineState(
|
||||
ID3D12PipelineState* CreateD3D12Pipeline(
|
||||
const PipelineRuntimeDescription& runtime_description);
|
||||
|
||||
D3D12CommandProcessor& command_processor_;
|
||||
|
@ -255,9 +254,9 @@ class PipelineCache {
|
|||
IDxcUtils* dxc_utils_ = nullptr;
|
||||
IDxcCompiler* dxc_compiler_ = nullptr;
|
||||
|
||||
// All loaded shaders mapped by their guest hash key.
|
||||
// Ucode hash -> shader.
|
||||
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
|
||||
shader_map_;
|
||||
shaders_;
|
||||
|
||||
struct LayoutUID {
|
||||
size_t uid;
|
||||
|
@ -285,21 +284,20 @@ class PipelineCache {
|
|||
// Xenos pixel shader provided.
|
||||
std::vector<uint8_t> depth_only_pixel_shader_;
|
||||
|
||||
struct PipelineState {
|
||||
struct Pipeline {
|
||||
// nullptr if creation has failed.
|
||||
ID3D12PipelineState* state;
|
||||
PipelineRuntimeDescription description;
|
||||
};
|
||||
// All previously generated pipeline state objects identified by hash and the
|
||||
// description.
|
||||
std::unordered_multimap<uint64_t, PipelineState*,
|
||||
// All previously generated pipelines identified by hash and the description.
|
||||
std::unordered_multimap<uint64_t, Pipeline*,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
pipeline_states_;
|
||||
pipelines_;
|
||||
|
||||
// Previously used pipeline state object. This matches our current state
|
||||
// settings and allows us to quickly(ish) reuse the pipeline state if no
|
||||
// registers have changed.
|
||||
PipelineState* current_pipeline_state_ = nullptr;
|
||||
// Previously used pipeline. This matches our current state settings and
|
||||
// allows us to quickly(ish) reuse the pipeline if no registers have been
|
||||
// changed.
|
||||
Pipeline* current_pipeline_ = nullptr;
|
||||
|
||||
// Currently open shader storage path.
|
||||
std::filesystem::path shader_storage_root_;
|
||||
|
@ -309,10 +307,9 @@ class PipelineCache {
|
|||
FILE* shader_storage_file_ = nullptr;
|
||||
bool shader_storage_file_flush_needed_ = false;
|
||||
|
||||
// Pipeline state storage output stream, for preload in the next emulator
|
||||
// runs.
|
||||
FILE* pipeline_state_storage_file_ = nullptr;
|
||||
bool pipeline_state_storage_file_flush_needed_ = false;
|
||||
// Pipeline storage output stream, for preload in the next emulator runs.
|
||||
FILE* pipeline_storage_file_ = nullptr;
|
||||
bool pipeline_storage_file_flush_needed_ = false;
|
||||
|
||||
// Thread for asynchronous writing to the storage streams.
|
||||
void StorageWriteThread();
|
||||
|
@ -322,28 +319,27 @@ class PipelineCache {
|
|||
// thread is notified about its change via storage_write_request_cond_.
|
||||
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
|
||||
storage_write_shader_queue_;
|
||||
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_;
|
||||
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
|
||||
bool storage_write_flush_shaders_ = false;
|
||||
bool storage_write_flush_pipeline_states_ = false;
|
||||
bool storage_write_flush_pipelines_ = false;
|
||||
bool storage_write_thread_shutdown_ = false;
|
||||
std::unique_ptr<xe::threading::Thread> storage_write_thread_;
|
||||
|
||||
// Pipeline state object creation threads.
|
||||
// Pipeline creation threads.
|
||||
void CreationThread(size_t thread_index);
|
||||
void CreateQueuedPipelineStatesOnProcessorThread();
|
||||
void CreateQueuedPipelinesOnProcessorThread();
|
||||
std::mutex creation_request_lock_;
|
||||
std::condition_variable creation_request_cond_;
|
||||
// Protected with creation_request_lock_, notify_one creation_request_cond_
|
||||
// when set.
|
||||
std::deque<PipelineState*> creation_queue_;
|
||||
// Number of threads that are currently creating a pipeline state object -
|
||||
// incremented when a pipeline state object is dequeued (the completion event
|
||||
// can't be triggered before this is zero). Protected with
|
||||
// creation_request_lock_.
|
||||
std::deque<Pipeline*> creation_queue_;
|
||||
// Number of threads that are currently creating a pipeline - incremented when
|
||||
// a pipeline is dequeued (the completion event can't be triggered before this
|
||||
// is zero). Protected with creation_request_lock_.
|
||||
size_t creation_threads_busy_ = 0;
|
||||
// Manual-reset event set when the last queued pipeline state object is
|
||||
// created and there are no more pipeline state objects to create. This is
|
||||
// triggered by the thread creating the last pipeline state object.
|
||||
// Manual-reset event set when the last queued pipeline is created and there
|
||||
// are no more pipelines to create. This is triggered by the thread creating
|
||||
// the last pipeline.
|
||||
std::unique_ptr<xe::threading::Event> creation_completion_event_;
|
||||
// Whether setting the event on completion is queued. Protected with
|
||||
// creation_request_lock_, notify_one creation_request_cond_ when set.
|
||||
|
|
|
@ -25,15 +25,6 @@ project("xenia-gpu-d3d12-trace-viewer")
|
|||
kind("WindowedApp")
|
||||
language("C++")
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"dxbc",
|
||||
"fmt",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"xenia-apu",
|
||||
"xenia-apu-nop",
|
||||
"xenia-base",
|
||||
|
@ -49,6 +40,17 @@ project("xenia-gpu-d3d12-trace-viewer")
|
|||
"xenia-ui-d3d12",
|
||||
"xenia-vfs",
|
||||
"xenia-patcher",
|
||||
})
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"dxbc",
|
||||
"fmt",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"xxhash",
|
||||
})
|
||||
files({
|
||||
|
@ -71,15 +73,6 @@ project("xenia-gpu-d3d12-trace-dump")
|
|||
kind("ConsoleApp")
|
||||
language("C++")
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"dxbc",
|
||||
"fmt",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"xenia-apu",
|
||||
"xenia-apu-nop",
|
||||
"xenia-base",
|
||||
|
@ -95,6 +88,17 @@ project("xenia-gpu-d3d12-trace-dump")
|
|||
"xenia-ui-d3d12",
|
||||
"xenia-vfs",
|
||||
"xenia-patcher",
|
||||
})
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"dxbc",
|
||||
"fmt",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"xxhash",
|
||||
})
|
||||
files({
|
||||
|
@ -109,4 +113,4 @@ project("xenia-gpu-d3d12-trace-dump")
|
|||
"2>&1",
|
||||
"1>scratch/stdout-trace-dump.txt",
|
||||
})
|
||||
end
|
||||
end
|
||||
|
|
|
@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
// again and again and exit.
|
||||
if (!conversion_needed || converted_index_count == 0) {
|
||||
converted_indices.gpu_address = 0;
|
||||
converted_indices_cache_.insert(
|
||||
std::make_pair(converted_indices.key.value, converted_indices));
|
||||
converted_indices_cache_.emplace(converted_indices.key.value,
|
||||
converted_indices);
|
||||
memory_regions_used_ |= memory_regions_used_bits;
|
||||
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
|
||||
: ConversionResult::kConversionNotNeeded;
|
||||
|
@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
|||
|
||||
// Cache and return the indices.
|
||||
converted_indices.gpu_address = gpu_address;
|
||||
converted_indices_cache_.insert(
|
||||
std::make_pair(converted_indices.key.value, converted_indices));
|
||||
converted_indices_cache_.emplace(converted_indices.key.value,
|
||||
converted_indices);
|
||||
memory_regions_used_ |= memory_regions_used_bits;
|
||||
gpu_address_out = gpu_address;
|
||||
index_count_out = converted_index_count;
|
||||
|
|
|
@ -277,20 +277,19 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Create the EDRAM load/store pipeline state objects.
|
||||
// Create the EDRAM load/store pipelines.
|
||||
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
|
||||
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
|
||||
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.load_shader, mode_info.load_shader_size,
|
||||
edram_load_store_root_signature_);
|
||||
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.store_shader, mode_info.store_shader_size,
|
||||
edram_load_store_root_signature_);
|
||||
if (edram_load_pipelines_[i] == nullptr ||
|
||||
edram_store_pipelines_[i] == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the EDRAM load/store pipeline states for mode {}",
|
||||
i);
|
||||
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
|
||||
i);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
|
@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
}
|
||||
}
|
||||
|
||||
// Create the resolve root signatures and pipeline state objects.
|
||||
// Create the resolve root signatures and pipelines.
|
||||
D3D12_ROOT_PARAMETER resolve_root_parameters[3];
|
||||
|
||||
// Copying root signature.
|
||||
|
@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Copying pipeline state objects.
|
||||
// Copying pipelines.
|
||||
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
|
||||
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
|
||||
++i) {
|
||||
|
@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|||
continue;
|
||||
}
|
||||
const auto& resolve_copy_shader = resolve_copy_shaders_[i];
|
||||
ID3D12PipelineState* resolve_copy_pipeline_state =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
ID3D12PipelineState* resolve_copy_pipeline =
|
||||
ui::d3d12::util::CreateComputePipeline(
|
||||
device, resolve_copy_shader.first, resolve_copy_shader.second,
|
||||
resolve_copy_root_signature_);
|
||||
if (resolve_copy_pipeline_state == nullptr) {
|
||||
XELOGE("Failed to create {} resolve copy pipeline state",
|
||||
if (resolve_copy_pipeline == nullptr) {
|
||||
XELOGE("Failed to create {} resolve copy pipeline",
|
||||
resolve_copy_shader_info.debug_name);
|
||||
}
|
||||
resolve_copy_pipeline_state->SetName(reinterpret_cast<LPCWSTR>(
|
||||
resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
|
||||
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
|
||||
resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state;
|
||||
resolve_copy_pipelines_[i] = resolve_copy_pipeline;
|
||||
}
|
||||
|
||||
// Clearing pipeline state objects.
|
||||
resolve_clear_32bpp_pipeline_state_ =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
device,
|
||||
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
|
||||
: resolve_clear_32bpp_cs,
|
||||
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
|
||||
: sizeof(resolve_clear_32bpp_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_32bpp_pipeline_state_ == nullptr) {
|
||||
XELOGE("Failed to create the 32bpp resolve clear pipeline state");
|
||||
// Clearing pipelines.
|
||||
resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device,
|
||||
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
|
||||
: resolve_clear_32bpp_cs,
|
||||
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
|
||||
: sizeof(resolve_clear_32bpp_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_32bpp_pipeline_ == nullptr) {
|
||||
XELOGE("Failed to create the 32bpp resolve clear pipeline");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp");
|
||||
resolve_clear_64bpp_pipeline_state_ =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
device,
|
||||
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
|
||||
: resolve_clear_64bpp_cs,
|
||||
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
|
||||
: sizeof(resolve_clear_64bpp_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_64bpp_pipeline_state_ == nullptr) {
|
||||
XELOGE("Failed to create the 64bpp resolve clear pipeline state");
|
||||
resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
|
||||
resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device,
|
||||
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
|
||||
: resolve_clear_64bpp_cs,
|
||||
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
|
||||
: sizeof(resolve_clear_64bpp_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_64bpp_pipeline_ == nullptr) {
|
||||
XELOGE("Failed to create the 64bpp resolve clear pipeline");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp");
|
||||
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
|
||||
if (!edram_rov_used_) {
|
||||
assert_false(resolution_scale_2x_);
|
||||
resolve_clear_depth_24_32_pipeline_state_ =
|
||||
ui::d3d12::util::CreateComputePipelineState(
|
||||
resolve_clear_depth_24_32_pipeline_ =
|
||||
ui::d3d12::util::CreateComputePipeline(
|
||||
device, resolve_clear_depth_24_32_cs,
|
||||
sizeof(resolve_clear_depth_24_32_cs),
|
||||
resolve_clear_root_signature_);
|
||||
if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) {
|
||||
if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
|
||||
"state");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
resolve_clear_64bpp_pipeline_state_->SetName(
|
||||
resolve_clear_64bpp_pipeline_->SetName(
|
||||
L"Resolve Clear 24-bit & 32-bit Depth");
|
||||
}
|
||||
|
||||
|
@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() {
|
|||
|
||||
edram_snapshot_restore_pool_.reset();
|
||||
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
|
||||
for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) {
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]);
|
||||
for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
|
||||
}
|
||||
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
|
||||
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
|
||||
|
@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory,
|
|||
0, sizeof(copy_shader_constants) / sizeof(uint32_t),
|
||||
©_shader_constants, 0);
|
||||
}
|
||||
command_processor_.SetComputePipelineState(
|
||||
resolve_copy_pipeline_states_[size_t(copy_shader)]);
|
||||
command_processor_.SetComputePipeline(
|
||||
resolve_copy_pipelines_[size_t(copy_shader)]);
|
||||
command_processor_.SubmitBarriers();
|
||||
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
|
||||
|
||||
|
@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory,
|
|||
command_list.D3DSetComputeRoot32BitConstants(
|
||||
0, sizeof(depth_clear_constants) / sizeof(uint32_t),
|
||||
&depth_clear_constants, 0);
|
||||
command_processor_.SetComputePipelineState(
|
||||
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_
|
||||
: resolve_clear_32bpp_pipeline_state_);
|
||||
command_processor_.SetComputePipeline(
|
||||
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
|
||||
: resolve_clear_32bpp_pipeline_);
|
||||
command_processor_.SubmitBarriers();
|
||||
command_list.D3DDispatch(clear_group_count.first,
|
||||
clear_group_count.second, 1);
|
||||
|
@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
|
|||
0, sizeof(color_clear_constants) / sizeof(uint32_t),
|
||||
&color_clear_constants, 0);
|
||||
}
|
||||
command_processor_.SetComputePipelineState(
|
||||
command_processor_.SetComputePipeline(
|
||||
resolve_info.color_edram_info.format_is_64bpp
|
||||
? resolve_clear_64bpp_pipeline_state_
|
||||
: resolve_clear_32bpp_pipeline_state_);
|
||||
? resolve_clear_64bpp_pipeline_
|
||||
: resolve_clear_32bpp_pipeline_);
|
||||
command_processor_.SubmitBarriers();
|
||||
command_list.D3DDispatch(clear_group_count.first,
|
||||
clear_group_count.second, 1);
|
||||
|
@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|||
render_target->footprints, nullptr, nullptr,
|
||||
©_buffer_size);
|
||||
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
|
||||
render_targets_.insert(std::make_pair(key.value, render_target));
|
||||
render_targets_.emplace(key.value, render_target);
|
||||
COUNT_profile_set("gpu/render_target_cache/render_targets",
|
||||
render_targets_.size());
|
||||
#if 0
|
||||
|
@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() {
|
|||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
||||
render_target->key.format);
|
||||
command_processor_.SetComputePipelineState(
|
||||
edram_store_pipelines_[size_t(mode)]);
|
||||
command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
|
||||
// 1 group per 80x16 samples.
|
||||
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
|
||||
|
||||
|
@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram(
|
|||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
||||
render_target->key.format);
|
||||
command_processor_.SetComputePipelineState(
|
||||
edram_load_pipelines_[size_t(mode)]);
|
||||
command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
|
||||
// 1 group per 80x16 samples.
|
||||
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);
|
||||
|
||||
|
|
|
@ -237,14 +237,13 @@ class D3D12CommandProcessor;
|
|||
// get each of the 4 host pixels for each sample.
|
||||
class RenderTargetCache {
|
||||
public:
|
||||
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
|
||||
// that contradict each other when you use null RTV descriptors - if you set
|
||||
// a valid format in RTVFormats in the pipeline state, it says that null
|
||||
// descriptors can only be used if the format in the pipeline state is
|
||||
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains
|
||||
// that the format in the pipeline doesn't match the RTV format. So we have to
|
||||
// make render target bindings consecutive and remap the output indices in
|
||||
// pixel shaders.
|
||||
// Direct3D 12 debug layer is giving errors that contradict each other when
|
||||
// you use null RTV descriptors - if you set a valid format in RTVFormats in
|
||||
// the pipeline state, it says that null descriptors can only be used if the
|
||||
// format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if
|
||||
// DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline
|
||||
// state doesn't match the RTV format. So we have to make render target
|
||||
// bindings consecutive and remap the output indices in pixel shaders.
|
||||
struct PipelineRenderTarget {
|
||||
uint32_t guest_render_target;
|
||||
DXGI_FORMAT format;
|
||||
|
@ -304,8 +303,7 @@ class RenderTargetCache {
|
|||
// performance difference, but with EDRAM loads/stores less conversion should
|
||||
// be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and
|
||||
// it's probably more accurate.
|
||||
static inline DXGI_FORMAT GetDepthDXGIFormat(
|
||||
xenos::DepthRenderTargetFormat format) {
|
||||
static DXGI_FORMAT GetDepthDXGIFormat(xenos::DepthRenderTargetFormat format) {
|
||||
return format == xenos::DepthRenderTargetFormat::kD24FS8
|
||||
? DXGI_FORMAT_D32_FLOAT_S8X24_UINT
|
||||
: DXGI_FORMAT_D24_UNORM_S8_UINT;
|
||||
|
@ -537,7 +535,7 @@ class RenderTargetCache {
|
|||
// 16: - EDRAM pitch in tiles.
|
||||
uint32_t base_samples_2x_depth_pitch;
|
||||
};
|
||||
// EDRAM pipeline states for the RTV/DSV path.
|
||||
// EDRAM pipelines for the RTV/DSV path.
|
||||
static const EdramLoadStoreModeInfo
|
||||
edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)];
|
||||
ID3D12PipelineState*
|
||||
|
@ -546,20 +544,20 @@ class RenderTargetCache {
|
|||
ID3D12PipelineState*
|
||||
edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {};
|
||||
|
||||
// Resolve root signatures and pipeline state objects.
|
||||
// Resolve root signatures and pipelines.
|
||||
ID3D12RootSignature* resolve_copy_root_signature_ = nullptr;
|
||||
static const std::pair<const uint8_t*, size_t>
|
||||
resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
|
||||
ID3D12PipelineState* resolve_copy_pipeline_states_[size_t(
|
||||
ID3D12PipelineState* resolve_copy_pipelines_[size_t(
|
||||
draw_util::ResolveCopyShaderIndex::kCount)] = {};
|
||||
ID3D12RootSignature* resolve_clear_root_signature_ = nullptr;
|
||||
// Clearing 32bpp color, depth with ROV, or unorm depth without ROV.
|
||||
ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr;
|
||||
ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr;
|
||||
// Clearing 64bpp color.
|
||||
ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr;
|
||||
ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr;
|
||||
// Clearing float depth without ROV, both the float24 and the host float32
|
||||
// versions.
|
||||
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr;
|
||||
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr;
|
||||
|
||||
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
|
||||
// Nvidia Maxwell 1st generation and older.
|
||||
|
|
|
@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Create the loading pipeline state objects.
|
||||
// Create the loading pipelines.
|
||||
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
|
||||
const LoadModeInfo& mode_info = load_mode_info_[i];
|
||||
load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.shader, mode_info.shader_size, load_root_signature_);
|
||||
if (load_pipeline_states_[i] == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the texture loading pipeline state object for mode "
|
||||
"{}",
|
||||
i);
|
||||
if (load_pipelines_[i] == nullptr) {
|
||||
XELOGE("Failed to create the texture loading pipeline for mode {}", i);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) {
|
||||
load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState(
|
||||
load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline(
|
||||
device, mode_info.shader_2x, mode_info.shader_2x_size,
|
||||
load_root_signature_);
|
||||
if (load_pipeline_states_2x_[i] == nullptr) {
|
||||
if (load_pipelines_2x_[i] == nullptr) {
|
||||
XELOGE(
|
||||
"Failed to create the 2x-scaled texture loading pipeline state "
|
||||
"for mode {}",
|
||||
"Failed to create the 2x-scaled texture loading pipeline for mode "
|
||||
"{}",
|
||||
i);
|
||||
Shutdown();
|
||||
return false;
|
||||
|
@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() {
|
|||
ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_);
|
||||
|
||||
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]);
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]);
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]);
|
||||
ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]);
|
||||
}
|
||||
ui::d3d12::util::ReleaseAndNull(load_root_signature_);
|
||||
|
||||
|
@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|||
if (IsResolutionScale2X() && key.tiled) {
|
||||
LoadMode load_mode = GetLoadMode(key);
|
||||
if (load_mode != LoadMode::kUnknown &&
|
||||
load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) {
|
||||
load_pipelines_2x_[uint32_t(load_mode)] != nullptr) {
|
||||
uint32_t base_size = 0, mip_size = 0;
|
||||
texture_util::GetTextureTotalSize(
|
||||
key.dimension, key.width, key.height, key.depth, key.format,
|
||||
|
@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|||
}
|
||||
texture->base_watch_handle = nullptr;
|
||||
texture->mip_watch_handle = nullptr;
|
||||
textures_.insert(std::make_pair(map_key, texture));
|
||||
textures_.emplace(map_key, texture);
|
||||
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
|
||||
textures_total_size_ += texture->resource_size;
|
||||
COUNT_profile_set("gpu/texture_cache/total_size_mb",
|
||||
|
@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
return false;
|
||||
}
|
||||
bool scaled_resolve = texture->key.scaled_resolve ? true : false;
|
||||
ID3D12PipelineState* pipeline_state =
|
||||
scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)]
|
||||
: load_pipeline_states_[uint32_t(load_mode)];
|
||||
if (pipeline_state == nullptr) {
|
||||
ID3D12PipelineState* pipeline = scaled_resolve
|
||||
? load_pipelines_2x_[uint32_t(load_mode)]
|
||||
: load_pipelines_[uint32_t(load_mode)];
|
||||
if (pipeline == nullptr) {
|
||||
return false;
|
||||
}
|
||||
const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)];
|
||||
|
@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
load_mode_info.srv_bpe_log2);
|
||||
}
|
||||
}
|
||||
command_processor_.SetComputePipelineState(pipeline_state);
|
||||
command_processor_.SetComputePipeline(pipeline);
|
||||
command_list.D3DSetComputeRootSignature(load_root_signature_);
|
||||
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
|
||||
|
||||
|
@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
|
|||
}
|
||||
device->CreateShaderResourceView(
|
||||
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
|
||||
texture.srv_descriptors.insert({descriptor_key, descriptor_index});
|
||||
texture.srv_descriptors.emplace(descriptor_key, descriptor_index);
|
||||
return descriptor_index;
|
||||
}
|
||||
|
||||
|
|
|
@ -106,18 +106,18 @@ class TextureCache {
|
|||
bool operator!=(const TextureKey& key) const {
|
||||
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
|
||||
}
|
||||
inline uint64_t GetMapKey() const {
|
||||
uint64_t GetMapKey() const {
|
||||
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
|
||||
}
|
||||
inline void SetMapKey(uint64_t key) {
|
||||
void SetMapKey(uint64_t key) {
|
||||
map_key[0] = uint32_t(key);
|
||||
map_key[1] = uint32_t(key >> 32);
|
||||
}
|
||||
inline bool IsInvalid() const {
|
||||
bool IsInvalid() const {
|
||||
// Zero base and zero width is enough for a binding to be invalid.
|
||||
return map_key[0] == 0;
|
||||
}
|
||||
inline void MakeInvalid() {
|
||||
void MakeInvalid() {
|
||||
// Reset all for a stable hash.
|
||||
SetMapKey(0);
|
||||
bucket_key = 0;
|
||||
|
@ -222,9 +222,7 @@ class TextureCache {
|
|||
|
||||
void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled);
|
||||
|
||||
inline bool IsResolutionScale2X() const {
|
||||
return scaled_resolve_buffer_ != nullptr;
|
||||
}
|
||||
bool IsResolutionScale2X() const { return scaled_resolve_buffer_ != nullptr; }
|
||||
ID3D12Resource* GetScaledResolveBuffer() const {
|
||||
return scaled_resolve_buffer_;
|
||||
}
|
||||
|
@ -233,7 +231,7 @@ class TextureCache {
|
|||
uint32_t length_unscaled);
|
||||
void UseScaledResolveBufferForReading();
|
||||
void UseScaledResolveBufferForWriting();
|
||||
inline void MarkScaledResolveBufferUAVWritesCommitNeeded() {
|
||||
void MarkScaledResolveBufferUAVWritesCommitNeeded() {
|
||||
if (scaled_resolve_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
||||
scaled_resolve_buffer_uav_writes_commit_needed_ = true;
|
||||
}
|
||||
|
@ -432,7 +430,7 @@ class TextureCache {
|
|||
// Whether the signed version of the texture has a different representation on
|
||||
// the host than its unsigned version (for example, if it's a fixed-point
|
||||
// texture emulated with a larger host pixel format).
|
||||
static inline bool IsSignedVersionSeparate(xenos::TextureFormat format) {
|
||||
static bool IsSignedVersionSeparate(xenos::TextureFormat format) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return host_format.load_mode_snorm != LoadMode::kUnknown &&
|
||||
host_format.load_mode_snorm != host_format.load_mode;
|
||||
|
@ -441,26 +439,24 @@ class TextureCache {
|
|||
// of block-compressed textures with 4x4-aligned dimensions on PC).
|
||||
static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width,
|
||||
uint32_t height);
|
||||
static inline DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
|
||||
uint32_t width,
|
||||
uint32_t height) {
|
||||
static DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
|
||||
uint32_t width, uint32_t height) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return IsDecompressionNeeded(format, width, height)
|
||||
? host_format.dxgi_format_uncompressed
|
||||
: host_format.dxgi_format_resource;
|
||||
}
|
||||
static inline DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
|
||||
static DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
|
||||
return GetDXGIResourceFormat(key.format, key.width, key.height);
|
||||
}
|
||||
static inline DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
|
||||
uint32_t width,
|
||||
uint32_t height) {
|
||||
static DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
|
||||
uint32_t width, uint32_t height) {
|
||||
const HostFormat& host_format = host_formats_[uint32_t(format)];
|
||||
return IsDecompressionNeeded(format, width, height)
|
||||
? host_format.dxgi_format_uncompressed
|
||||
: host_format.dxgi_format_unorm;
|
||||
}
|
||||
static inline DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
|
||||
static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
|
||||
return GetDXGIUnormFormat(key.format, key.width, key.height);
|
||||
}
|
||||
|
||||
|
@ -550,9 +546,9 @@ class TextureCache {
|
|||
|
||||
static const LoadModeInfo load_mode_info_[];
|
||||
ID3D12RootSignature* load_root_signature_ = nullptr;
|
||||
ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {};
|
||||
// Load pipeline state objects for 2x-scaled resolved targets.
|
||||
ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {};
|
||||
ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
|
||||
// Load pipelines for 2x-scaled resolved targets.
|
||||
ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {};
|
||||
|
||||
std::unordered_multimap<uint64_t, Texture*> textures_;
|
||||
uint64_t textures_total_size_ = 0;
|
||||
|
|
|
@ -111,6 +111,34 @@ int32_t FloatToD3D11Fixed16p8(float f32) {
|
|||
return result.s;
|
||||
}
|
||||
|
||||
void GetScissor(const RegisterFile& regs, Scissor& scissor_out) {
|
||||
// FIXME(Triang3l): Screen scissor isn't applied here, but it seems to be
|
||||
// unused on Xbox 360 Direct3D 9.
|
||||
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
|
||||
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
|
||||
uint32_t tl_x = pa_sc_window_scissor_tl.tl_x;
|
||||
uint32_t tl_y = pa_sc_window_scissor_tl.tl_y;
|
||||
uint32_t br_x = pa_sc_window_scissor_br.br_x;
|
||||
uint32_t br_y = pa_sc_window_scissor_br.br_y;
|
||||
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
||||
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
|
||||
tl_x = uint32_t(std::max(
|
||||
int32_t(tl_x) + pa_sc_window_offset.window_x_offset, int32_t(0)));
|
||||
tl_y = uint32_t(std::max(
|
||||
int32_t(tl_y) + pa_sc_window_offset.window_y_offset, int32_t(0)));
|
||||
br_x = uint32_t(std::max(
|
||||
int32_t(br_x) + pa_sc_window_offset.window_x_offset, int32_t(0)));
|
||||
br_y = uint32_t(std::max(
|
||||
int32_t(br_y) + pa_sc_window_offset.window_y_offset, int32_t(0)));
|
||||
}
|
||||
br_x = std::max(br_x, tl_x);
|
||||
br_y = std::max(br_y, tl_y);
|
||||
scissor_out.left = tl_x;
|
||||
scissor_out.top = tl_y;
|
||||
scissor_out.width = br_x - tl_x;
|
||||
scissor_out.height = br_y - tl_y;
|
||||
}
|
||||
|
||||
xenos::CopySampleSelect SanitizeCopySampleSelect(
|
||||
xenos::CopySampleSelect copy_sample_select, xenos::MsaaSamples msaa_samples,
|
||||
bool is_depth) {
|
||||
|
|
|
@ -33,6 +33,14 @@ namespace draw_util {
|
|||
// for use with the top-left rasterization rule later.
|
||||
int32_t FloatToD3D11Fixed16p8(float f32);
|
||||
|
||||
struct Scissor {
|
||||
uint32_t left;
|
||||
uint32_t top;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
};
|
||||
void GetScissor(const RegisterFile& regs, Scissor& scissor_out);
|
||||
|
||||
// To avoid passing values that the shader won't understand (even though
|
||||
// Direct3D 9 shouldn't pass them anyway).
|
||||
xenos::CopySampleSelect SanitizeCopySampleSelect(
|
||||
|
|
|
@ -68,32 +68,34 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
break;
|
||||
case AluVectorOpcode::kMul:
|
||||
case AluVectorOpcode::kMad: {
|
||||
bool is_mad = instr.vector_opcode == AluVectorOpcode::kMad;
|
||||
if (is_mad) {
|
||||
DxbcOpMAd(per_component_dest, operands[0], operands[1], operands[2]);
|
||||
} else {
|
||||
DxbcOpMul(per_component_dest, operands[0], operands[1]);
|
||||
}
|
||||
// Shader Model 3: 0 or denormal * anything = 0.
|
||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
||||
uint32_t absolute_different =
|
||||
// Not using DXBC mad to prevent fused multiply-add (mul followed by add
|
||||
// may be optimized into non-fused mad by the driver in the identical
|
||||
// operands case also).
|
||||
DxbcOpMul(per_component_dest, operands[0], operands[1]);
|
||||
uint32_t multiplicands_different =
|
||||
used_result_components &
|
||||
~instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
||||
~instr.vector_operands[0].GetIdenticalComponents(
|
||||
instr.vector_operands[1]);
|
||||
if (absolute_different) {
|
||||
if (multiplicands_different) {
|
||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||
uint32_t is_zero_temp = PushSystemTemp();
|
||||
DxbcOpMin(DxbcDest::R(is_zero_temp, absolute_different),
|
||||
DxbcOpMin(DxbcDest::R(is_zero_temp, multiplicands_different),
|
||||
operands[0].Abs(), operands[1].Abs());
|
||||
// min isn't required to flush denormals, eq is.
|
||||
DxbcOpEq(DxbcDest::R(is_zero_temp, absolute_different),
|
||||
DxbcOpEq(DxbcDest::R(is_zero_temp, multiplicands_different),
|
||||
DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f));
|
||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, absolute_different),
|
||||
DxbcSrc::R(is_zero_temp),
|
||||
is_mad ? operands[2] : DxbcSrc::LF(0.0f),
|
||||
// Not replacing true `0 + term` with movc of the term because +0 + -0
|
||||
// should result in +0, not -0.
|
||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, multiplicands_different),
|
||||
DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f),
|
||||
DxbcSrc::R(system_temp_result_));
|
||||
// Release is_zero_temp.
|
||||
PopSystemTemp();
|
||||
}
|
||||
if (instr.vector_opcode == AluVectorOpcode::kMad) {
|
||||
DxbcOpAdd(per_component_dest, DxbcSrc::R(system_temp_result_),
|
||||
operands[2]);
|
||||
}
|
||||
} break;
|
||||
|
||||
case AluVectorOpcode::kMax:
|
||||
|
@ -179,69 +181,40 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
component_count = 4;
|
||||
}
|
||||
result_swizzle = DxbcSrc::kXXXX;
|
||||
uint32_t absolute_different =
|
||||
uint32_t((1 << component_count) - 1) &
|
||||
~instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
||||
instr.vector_operands[1]);
|
||||
if (absolute_different) {
|
||||
// Shader Model 3: 0 or denormal * anything = 0.
|
||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
||||
// Add component products only if non-zero. For dp4, 16 scalar
|
||||
// operations in the worst case (as opposed to always 20 for
|
||||
// eq/movc/eq/movc/dp4 or min/eq/movc/movc/dp4 for preparing operands
|
||||
// for dp4).
|
||||
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0001),
|
||||
operands[0].SelectFromSwizzled(0),
|
||||
operands[1].SelectFromSwizzled(0));
|
||||
if (absolute_different & 0b0001) {
|
||||
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0010),
|
||||
operands[0].SelectFromSwizzled(0).Abs(),
|
||||
operands[1].SelectFromSwizzled(0).Abs());
|
||||
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0010),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY),
|
||||
uint32_t different = uint32_t((1 << component_count) - 1) &
|
||||
~instr.vector_operands[0].GetIdenticalComponents(
|
||||
instr.vector_operands[1]);
|
||||
for (uint32_t i = 0; i < component_count; ++i) {
|
||||
DxbcOpMul(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
|
||||
operands[0].SelectFromSwizzled(i),
|
||||
operands[1].SelectFromSwizzled(i));
|
||||
if ((different & (1 << i)) != 0) {
|
||||
// Shader Model 3: +-0 or denormal * anything = +0 (also not replacing
|
||||
// true `0 + term` with movc of the term because +0 + -0 should result
|
||||
// in +0, not -0).
|
||||
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
|
||||
operands[0].SelectFromSwizzled(i).Abs(),
|
||||
operands[1].SelectFromSwizzled(i).Abs());
|
||||
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::LF(0.0f));
|
||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY),
|
||||
DxbcSrc::LF(0.0f),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
|
||||
}
|
||||
for (uint32_t i = 1; i < component_count; ++i) {
|
||||
bool component_different = (absolute_different & (1 << i)) != 0;
|
||||
DxbcOpMAd(DxbcDest::R(system_temp_result_,
|
||||
component_different ? 0b0010 : 0b0001),
|
||||
operands[0].SelectFromSwizzled(i),
|
||||
operands[1].SelectFromSwizzled(i),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
|
||||
if (component_different) {
|
||||
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
|
||||
operands[0].SelectFromSwizzled(i).Abs(),
|
||||
operands[1].SelectFromSwizzled(i).Abs());
|
||||
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100),
|
||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::LF(0.0f));
|
||||
DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY));
|
||||
}
|
||||
DxbcSrc::LF(0.0f),
|
||||
DxbcSrc::R(system_temp_result_,
|
||||
i ? DxbcSrc::kYYYY : DxbcSrc::kXXXX));
|
||||
}
|
||||
} else {
|
||||
if (component_count == 2) {
|
||||
DxbcOpDP2(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
|
||||
operands[1]);
|
||||
} else if (component_count == 3) {
|
||||
DxbcOpDP3(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
|
||||
operands[1]);
|
||||
} else {
|
||||
assert_true(component_count == 4);
|
||||
DxbcOpDP4(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
|
||||
operands[1]);
|
||||
if (i) {
|
||||
// Not using DXBC dp# to avoid fused multiply-add, PC GPUs are scalar
|
||||
// as of 2020 anyway, and not using mad for the same reason (mul
|
||||
// followed by add may be optimized into non-fused mad by the driver
|
||||
// in the identical operands case also).
|
||||
DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY));
|
||||
}
|
||||
}
|
||||
if (component_count == 2) {
|
||||
// Add the third operand. Since floating-point addition isn't
|
||||
// associative, even though adding this in multiply-add for the first
|
||||
// component would be faster, it's safer to add here, in the end.
|
||||
DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001),
|
||||
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
|
||||
operands[2].SelectFromSwizzled(0));
|
||||
|
@ -592,14 +565,13 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
DxbcOpMov(DxbcDest::R(system_temp_result_, 0b0001), DxbcSrc::LF(1.0f));
|
||||
}
|
||||
if (used_result_components & 0b0010) {
|
||||
// Shader Model 3: 0 or denormal * anything = 0.
|
||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
||||
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0010),
|
||||
operands[0].SelectFromSwizzled(1),
|
||||
operands[1].SelectFromSwizzled(1));
|
||||
if (!(instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
||||
if (!(instr.vector_operands[0].GetIdenticalComponents(
|
||||
instr.vector_operands[1]) &
|
||||
0b0010)) {
|
||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
|
||||
operands[0].SelectFromSwizzled(1).Abs(),
|
||||
operands[1].SelectFromSwizzled(1).Abs());
|
||||
|
@ -700,8 +672,7 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
|
|||
DxbcOpMul(ps_dest, operand_0_a, operand_0_b);
|
||||
if (instr.scalar_operands[0].components[0] !=
|
||||
instr.scalar_operands[0].components[1]) {
|
||||
// Shader Model 3: 0 or denormal * anything = 0.
|
||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||
uint32_t is_zero_temp = PushSystemTemp();
|
||||
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
||||
operand_0_b.Abs());
|
||||
|
@ -714,58 +685,50 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
|
|||
PopSystemTemp();
|
||||
}
|
||||
break;
|
||||
case AluScalarOpcode::kMulsPrev: {
|
||||
// Shader Model 3: 0 or denormal * anything = 0.
|
||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
||||
uint32_t is_zero_temp = PushSystemTemp();
|
||||
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
||||
ps_src.Abs());
|
||||
// min isn't required to flush denormals, eq is.
|
||||
DxbcOpEq(DxbcDest::R(is_zero_temp, 0b0001),
|
||||
DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f));
|
||||
DxbcOpMul(ps_dest, operand_0_a, ps_src);
|
||||
DxbcOpMovC(ps_dest, DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::LF(0.0f), ps_src);
|
||||
// Release is_zero_temp.
|
||||
PopSystemTemp();
|
||||
} break;
|
||||
case AluScalarOpcode::kMulsPrev:
|
||||
case AluScalarOpcode::kMulsPrev2: {
|
||||
uint32_t test_temp = PushSystemTemp();
|
||||
// Check if need to select the src0.a * ps case.
|
||||
// ps != -FLT_MAX.
|
||||
DxbcOpNE(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(-FLT_MAX));
|
||||
// isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since -FLT_MAX
|
||||
// is already loaded to an SGPR, this is also false if it's NaN.
|
||||
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -ps_src.Abs(),
|
||||
DxbcSrc::LF(-FLT_MAX));
|
||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||
// isfinite(src0.b).
|
||||
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -operand_0_b.Abs(),
|
||||
DxbcSrc::LF(-FLT_MAX));
|
||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||
// src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked
|
||||
// for NaN).
|
||||
DxbcOpLT(DxbcDest::R(test_temp, 0b0010), DxbcSrc::LF(0.0f), operand_0_b);
|
||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||
DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX));
|
||||
// Shader Model 3: 0 or denormal * anything = 0.
|
||||
// ps is already known to be not NaN or Infinity, so multiplying it by 0
|
||||
// will result in 0. However, src0.a can be anything, so the result should
|
||||
// be zero if ps is zero.
|
||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
||||
DxbcOpEq(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(0.0f));
|
||||
if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) {
|
||||
// Check if need to select the src0.a * ps case.
|
||||
// ps != -FLT_MAX.
|
||||
DxbcOpNE(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(-FLT_MAX));
|
||||
// isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since
|
||||
// -FLT_MAX is already loaded to an SGPR, this is also false if it's
|
||||
// NaN.
|
||||
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -ps_src.Abs(),
|
||||
DxbcSrc::LF(-FLT_MAX));
|
||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||
// isfinite(src0.b).
|
||||
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -operand_0_b.Abs(),
|
||||
DxbcSrc::LF(-FLT_MAX));
|
||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||
// src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked
|
||||
// for NaN).
|
||||
DxbcOpLT(DxbcDest::R(test_temp, 0b0010), DxbcSrc::LF(0.0f),
|
||||
operand_0_b);
|
||||
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
|
||||
DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX));
|
||||
}
|
||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||
DxbcOpMin(DxbcDest::R(test_temp, 0b0001), operand_0_a.Abs(),
|
||||
ps_src.Abs());
|
||||
// min isn't required to flush denormals, eq is.
|
||||
DxbcOpEq(DxbcDest::R(test_temp, 0b0001),
|
||||
DxbcSrc::R(test_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f));
|
||||
DxbcOpMul(ps_dest, operand_0_a, ps_src);
|
||||
DxbcOpMovC(ps_dest, DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::LF(0.0f), ps_src);
|
||||
DxbcOpElse();
|
||||
DxbcOpMov(ps_dest, DxbcSrc::LF(-FLT_MAX));
|
||||
DxbcOpEndIf();
|
||||
if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) {
|
||||
DxbcOpElse();
|
||||
DxbcOpMov(ps_dest, DxbcSrc::LF(-FLT_MAX));
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
// Release test_temp.
|
||||
PopSystemTemp();
|
||||
} break;
|
||||
|
@ -1023,11 +986,10 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
|
|||
case AluScalarOpcode::kMulsc0:
|
||||
case AluScalarOpcode::kMulsc1:
|
||||
DxbcOpMul(ps_dest, operand_0_a, operand_1);
|
||||
if (!(instr.scalar_operands[0].GetAbsoluteIdenticalComponents(
|
||||
if (!(instr.scalar_operands[0].GetIdenticalComponents(
|
||||
instr.scalar_operands[1]) &
|
||||
0b0001)) {
|
||||
// Shader Model 3: 0 or denormal * anything = 0.
|
||||
// FIXME(Triang3l): Signed zero needs research and handling.
|
||||
// Shader Model 3: +-0 or denormal * anything = +0.
|
||||
uint32_t is_zero_temp = PushSystemTemp();
|
||||
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
|
||||
operand_1.Abs());
|
||||
|
|
|
@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0),
|
||||
DxbcSrc::LU(~uint32_t(3)));
|
||||
}
|
||||
// Add the word offset from the instruction, plus the offset of the first
|
||||
// needed word within the element.
|
||||
// Add the word offset from the instruction (signed), plus the offset of the
|
||||
// first needed word within the element.
|
||||
uint32_t first_word_index;
|
||||
xe::bit_scan_forward(needed_words, &first_word_index);
|
||||
int32_t first_word_buffer_offset =
|
||||
|
@ -1730,10 +1730,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
}
|
||||
uint32_t texture_binding_index_unsigned =
|
||||
FindOrAddTextureBinding(tfetch_index, srv_dimension, false);
|
||||
const TextureBinding& texture_binding_unsigned =
|
||||
texture_bindings_[texture_binding_index_unsigned];
|
||||
uint32_t texture_binding_index_signed =
|
||||
FindOrAddTextureBinding(tfetch_index, srv_dimension, true);
|
||||
const TextureBinding& texture_binding_unsigned =
|
||||
texture_bindings_[texture_binding_index_unsigned];
|
||||
const TextureBinding& texture_binding_signed =
|
||||
texture_bindings_[texture_binding_index_signed];
|
||||
DxbcSrc srv_unsigned(DxbcSrc::LF(0.0f)), srv_signed(DxbcSrc::LF(0.0f));
|
||||
|
|
|
@ -135,7 +135,7 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor,
|
|||
}));
|
||||
// As we run vblank interrupts the debugger must be able to suspend us.
|
||||
vsync_worker_thread_->set_can_debugger_suspend(true);
|
||||
vsync_worker_thread_->set_name("GraphicsSystem Vsync");
|
||||
vsync_worker_thread_->set_name("GPU VSync");
|
||||
vsync_worker_thread_->Create();
|
||||
|
||||
if (cvars::trace_gpu_stream) {
|
||||
|
|
|
@ -65,17 +65,17 @@ enum class InstructionStorageTarget {
|
|||
// disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both
|
||||
// skipped components and zeros, which cannot be encoded, and therefore it will
|
||||
// not).
|
||||
constexpr uint32_t GetInstructionStorageTargetUsedComponents(
|
||||
constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
|
||||
InstructionStorageTarget target) {
|
||||
switch (target) {
|
||||
case InstructionStorageTarget::kNone:
|
||||
return 0b0000;
|
||||
return 0;
|
||||
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
|
||||
return 0b0111;
|
||||
return 3;
|
||||
case InstructionStorageTarget::kDepth:
|
||||
return 0b0001;
|
||||
return 1;
|
||||
default:
|
||||
return 0b1111;
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,8 +136,9 @@ struct InstructionResult {
|
|||
// Returns the write mask containing only components actually present in the
|
||||
// target.
|
||||
uint32_t GetUsedWriteMask() const {
|
||||
return original_write_mask &
|
||||
GetInstructionStorageTargetUsedComponents(storage_target);
|
||||
uint32_t target_component_count =
|
||||
GetInstructionStorageTargetUsedComponentCount(storage_target);
|
||||
return original_write_mask & ((1 << target_component_count) - 1);
|
||||
}
|
||||
// True if the components are in their 'standard' swizzle arrangement (xyzw).
|
||||
bool IsStandardSwizzle() const {
|
||||
|
@ -161,6 +162,28 @@ struct InstructionResult {
|
|||
}
|
||||
return used_components;
|
||||
}
|
||||
// Returns which components of the used write mask are constant, and what
|
||||
// values they have.
|
||||
uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const {
|
||||
uint32_t constant_components = 0;
|
||||
uint32_t constant_values = 0;
|
||||
uint32_t used_write_mask = GetUsedWriteMask();
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!(used_write_mask & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
SwizzleSource component = components[i];
|
||||
if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) {
|
||||
continue;
|
||||
}
|
||||
constant_components |= 1 << i;
|
||||
if (component == SwizzleSource::k1) {
|
||||
constant_values |= 1 << i;
|
||||
}
|
||||
}
|
||||
constant_values_out = constant_values;
|
||||
return constant_components;
|
||||
}
|
||||
};
|
||||
|
||||
enum class InstructionStorageSource {
|
||||
|
@ -212,14 +235,18 @@ struct InstructionOperand {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Returns which components of two operands are identical, but may have
|
||||
// different signs (for simplicity of usage with GetComponent, treating the
|
||||
// rightmost component as replicated).
|
||||
uint32_t GetAbsoluteIdenticalComponents(
|
||||
const InstructionOperand& other) const {
|
||||
// Returns which components of two operands will always be bitwise equal
|
||||
// (disregarding component_count for simplicity of usage with GetComponent,
|
||||
// treating the rightmost component as replicated). This, strictly with all
|
||||
// conditions, must be used when emulating Shader Model 3 +-0 * x = +0
|
||||
// multiplication behavior with IEEE-compliant multiplication (because
|
||||
// -0 * |-0|, or -0 * +0, is -0, while the result must be +0).
|
||||
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
|
||||
if (storage_source != other.storage_source ||
|
||||
storage_index != other.storage_index ||
|
||||
storage_addressing_mode != other.storage_addressing_mode) {
|
||||
storage_addressing_mode != other.storage_addressing_mode ||
|
||||
is_negated != other.is_negated ||
|
||||
is_absolute_value != other.is_absolute_value) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t identical_components = 0;
|
||||
|
@ -229,16 +256,6 @@ struct InstructionOperand {
|
|||
}
|
||||
return identical_components;
|
||||
}
|
||||
// Returns which components of two operands will always be bitwise equal, but
|
||||
// may have different signs (disregarding component_count for simplicity of
|
||||
// usage with GetComponent, treating the rightmost component as replicated).
|
||||
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
|
||||
if (is_negated != other.is_negated ||
|
||||
is_absolute_value != other.is_absolute_value) {
|
||||
return 0;
|
||||
}
|
||||
return GetAbsoluteIdenticalComponents(other);
|
||||
}
|
||||
};
|
||||
|
||||
struct ParsedExecInstruction {
|
||||
|
|
|
@ -25,6 +25,9 @@ namespace gpu {
|
|||
// system page size granularity.
|
||||
class SharedMemory {
|
||||
public:
|
||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
|
||||
|
||||
virtual ~SharedMemory();
|
||||
// Call in the implementation-specific ClearCache.
|
||||
virtual void ClearCache();
|
||||
|
@ -98,9 +101,6 @@ class SharedMemory {
|
|||
// destructor.
|
||||
void ShutdownCommon();
|
||||
|
||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
|
||||
|
||||
// Sparse allocations are 4 MB, so not too many of them are allocated, but
|
||||
// also not to waste too much memory for padding (with 16 MB there's too
|
||||
// much).
|
||||
|
|
|
@ -800,13 +800,26 @@ static_assert_size(TextureFetchInstruction, 12);
|
|||
// Both are valid only within the current ALU clause. They are not modified
|
||||
// when the instruction that would write them fails its predication check.
|
||||
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
|
||||
// multiplication (0 or denormal * anything = 0) wherever it's present (mul,
|
||||
// mad, dp, etc.) and for NaN in min/max. It's very important to respect this
|
||||
// rule for multiplication, as games often rely on it in vector normalization
|
||||
// (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of things in
|
||||
// games - causes white screen in Halo 3, white specular on characters in GTA
|
||||
// IV.
|
||||
// TODO(Triang3l): Investigate signed zero handling in multiplication.
|
||||
// multiplication (+-0 or denormal * anything = +0) wherever it's present
|
||||
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
|
||||
// this rule for multiplication, as games often rely on it in vector
|
||||
// normalization (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of
|
||||
// things in games - causes white screen in Halo 3, white specular on
|
||||
// characters in GTA IV. The result is always positive zero in this case, no
|
||||
// matter what the signs of the other operands are, according to R5xx
|
||||
// Acceleration section 8.7.5 "Legacy multiply behavior" and testing on
|
||||
// Adreno 200. This means that the following need to be taken into account
|
||||
// (according to 8.7.2 "ALU Non-Transcendental Floating Point"):
|
||||
// - +0 * -0 is -0 with IEEE conformance, however, with this legacy SM3
|
||||
// handling, it should result in +0.
|
||||
// - +0 + -0 is +0, so multiply-add should not be replaced with conditional
|
||||
// move of the third operand in case of zero multiplicands, because the term
|
||||
// may be -0, while the result should be +0 in this case.
|
||||
// http://developer.amd.com/wordpress/media/2013/10/R5xx_Acceleration_v1.5.pdf
|
||||
// Multiply-add also appears to be not fused (the SM3 behavior instruction on
|
||||
// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32) - shader translators
|
||||
// should not use instructions that may be interpreted by the host GPU as
|
||||
// fused multiply-add.
|
||||
|
||||
enum class AluScalarOpcode : uint32_t {
|
||||
// Floating-Point Add
|
||||
|
|
|
@ -30,17 +30,6 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
kind("WindowedApp")
|
||||
language("C++")
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"fmt",
|
||||
"glslang-spirv",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"volk",
|
||||
"xenia-apu",
|
||||
"xenia-apu-nop",
|
||||
"xenia-base",
|
||||
|
@ -57,6 +46,19 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
"xenia-ui-vulkan",
|
||||
"xenia-vfs",
|
||||
"xenia-patcher",
|
||||
})
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"fmt",
|
||||
"glslang-spirv",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"volk",
|
||||
"xxhash",
|
||||
})
|
||||
defines({
|
||||
|
@ -98,17 +100,6 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
kind("ConsoleApp")
|
||||
language("C++")
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"fmt",
|
||||
"glslang-spirv",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"volk",
|
||||
"xenia-apu",
|
||||
"xenia-apu-nop",
|
||||
"xenia-base",
|
||||
|
@ -125,6 +116,19 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
"xenia-ui-vulkan",
|
||||
"xenia-vfs",
|
||||
"xenia-patcher",
|
||||
})
|
||||
links({
|
||||
"aes_128",
|
||||
"capstone",
|
||||
"fmt",
|
||||
"glslang-spirv",
|
||||
"imgui",
|
||||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"snappy",
|
||||
"spirv-tools",
|
||||
"volk",
|
||||
"xxhash",
|
||||
})
|
||||
defines({
|
||||
|
|
|
@ -41,11 +41,11 @@ project("xenia-hid-demo")
|
|||
|
||||
filter("platforms:Linux")
|
||||
links({
|
||||
"SDL2",
|
||||
"vulkan",
|
||||
"X11",
|
||||
"xcb",
|
||||
"X11-xcb",
|
||||
"vulkan",
|
||||
"SDL2",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
|
|
|
@ -359,7 +359,7 @@ void KernelState::SetExecutableModule(object_ref<UserModule> module) {
|
|||
}
|
||||
return 0;
|
||||
}));
|
||||
dispatch_thread_->set_name("Kernel Dispatch Thread");
|
||||
dispatch_thread_->set_name("Kernel Dispatch");
|
||||
dispatch_thread_->Create();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/kernel/util/shim_utils.h"
|
||||
#include "xenia/kernel/xam/xam_private.h"
|
||||
|
@ -235,7 +236,8 @@ dword_result_t XamContentCreateDeviceEnumerator(dword_t content_type,
|
|||
xe::store_and_swap(&dev->device_type, dummy_device_info_.device_type);
|
||||
xe::store_and_swap(&dev->total_bytes, dummy_device_info_.total_bytes);
|
||||
xe::store_and_swap(&dev->free_bytes, dummy_device_info_.free_bytes);
|
||||
xe::copy_and_swap(dev->name, dummy_device_info_.name, 28);
|
||||
xe::copy_and_swap(dev->name, dummy_device_info_.name,
|
||||
xe::countof(dev->name));
|
||||
}
|
||||
|
||||
*handle_out = e->handle();
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/string_util.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/kernel/user_module.h"
|
||||
#include "xenia/kernel/util/shim_utils.h"
|
||||
|
@ -77,15 +78,15 @@ static SYSTEMTIME xeGetLocalSystemTime(uint64_t filetime) {
|
|||
|
||||
void XamFormatDateString(dword_t unk, qword_t filetime, lpvoid_t output_buffer,
|
||||
dword_t output_count) {
|
||||
std::memset(output_buffer, 0, output_count * 2);
|
||||
std::memset(output_buffer, 0, output_count * sizeof(char16_t));
|
||||
|
||||
// TODO: implement this for other platforms
|
||||
#if XE_PLATFORM_WIN32
|
||||
auto st = xeGetLocalSystemTime(filetime);
|
||||
// TODO: format this depending on users locale?
|
||||
auto str = fmt::format(u"{:02d}/{:02d}/{}", st.wMonth, st.wDay, st.wYear);
|
||||
auto copy_length = std::min(size_t(output_count), str.size()) * 2;
|
||||
xe::copy_and_swap(output_buffer.as<char16_t*>(), str.c_str(), copy_length);
|
||||
xe::string_util::copy_and_swap_truncating(output_buffer.as<char16_t*>(), str,
|
||||
output_count);
|
||||
#else
|
||||
assert_always();
|
||||
#endif
|
||||
|
@ -94,15 +95,15 @@ DECLARE_XAM_EXPORT1(XamFormatDateString, kNone, kImplemented);
|
|||
|
||||
void XamFormatTimeString(dword_t unk, qword_t filetime, lpvoid_t output_buffer,
|
||||
dword_t output_count) {
|
||||
std::memset(output_buffer, 0, output_count * 2);
|
||||
std::memset(output_buffer, 0, output_count * sizeof(char16_t));
|
||||
|
||||
// TODO: implement this for other platforms
|
||||
#if XE_PLATFORM_WIN32
|
||||
auto st = xeGetLocalSystemTime(filetime);
|
||||
// TODO: format this depending on users locale?
|
||||
auto str = fmt::format(u"{:02d}:{:02d}", st.wHour, st.wMinute);
|
||||
auto copy_count = std::min(size_t(output_count), str.size());
|
||||
xe::copy_and_swap(output_buffer.as<char16_t*>(), str.c_str(), copy_count);
|
||||
xe::string_util::copy_and_swap_truncating(output_buffer.as<char16_t*>(), str,
|
||||
output_count);
|
||||
#else
|
||||
assert_always();
|
||||
#endif
|
||||
|
@ -124,9 +125,8 @@ dword_result_t keXamBuildResourceLocator(uint64_t module,
|
|||
path = fmt::format(u"section://{:X},{}#{}", (uint32_t)module, container,
|
||||
resource);
|
||||
}
|
||||
auto copy_count = std::min(size_t(buffer_count), path.size());
|
||||
xe::copy_and_swap(buffer_ptr.as<char16_t*>(), path.c_str(), copy_count);
|
||||
(buffer_ptr.as<char16_t*>())[copy_count] = 0;
|
||||
xe::string_util::copy_and_swap_truncating(buffer_ptr.as<char16_t*>(), path,
|
||||
buffer_count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -984,8 +984,7 @@ dword_result_t NetDll___WSAFDIsSet(dword_t socket_handle,
|
|||
DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented);
|
||||
|
||||
void RegisterNetExports(xe::cpu::ExportResolver* export_resolver,
|
||||
KernelState* kernel_state) {
|
||||
}
|
||||
KernelState* kernel_state) {}
|
||||
|
||||
} // namespace xam
|
||||
} // namespace kernel
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "third_party/imgui/imgui.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/string_util.h"
|
||||
#include "xenia/emulator.h"
|
||||
#include "xenia/kernel/kernel_flags.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
|
@ -188,8 +189,8 @@ class KeyboardInputDialog : public xe::ui::ImGuiDialog {
|
|||
*out_text_ = default_text;
|
||||
}
|
||||
text_buffer_.resize(max_length);
|
||||
std::strncpy(text_buffer_.data(), default_text_.c_str(),
|
||||
std::min(text_buffer_.size() - 1, default_text_.size()));
|
||||
xe::string_util::copy_truncating(text_buffer_.data(), default_text_,
|
||||
text_buffer_.size());
|
||||
}
|
||||
|
||||
void OnDraw(ImGuiIO& io) override {
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include <cstring>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/string_util.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/kernel/util/shim_utils.h"
|
||||
#include "xenia/kernel/xam/xam_private.h"
|
||||
|
@ -168,7 +170,8 @@ X_HRESULT_result_t XamUserGetSigninInfo(dword_t user_index, dword_t flags,
|
|||
const auto& user_profile = kernel_state()->user_profile();
|
||||
info->xuid = user_profile->xuid();
|
||||
info->signin_state = user_profile->signin_state();
|
||||
std::strncpy(info->name, user_profile->name().data(), 15);
|
||||
xe::string_util::copy_truncating(info->name, user_profile->name(),
|
||||
xe::countof(info->name));
|
||||
return X_E_SUCCESS;
|
||||
}
|
||||
DECLARE_XAM_EXPORT1(XamUserGetSigninInfo, kUserProfiles, kImplemented);
|
||||
|
@ -187,10 +190,8 @@ dword_result_t XamUserGetName(dword_t user_index, lpstring_t buffer,
|
|||
const auto& user_name = user_profile->name();
|
||||
|
||||
// Real XAM will only copy a maximum of 15 characters out.
|
||||
size_t copy_length = std::min(
|
||||
{size_t(15), user_name.size(), static_cast<size_t>(buffer_len) - 1});
|
||||
std::memcpy(buffer, user_name.data(), copy_length);
|
||||
buffer[copy_length] = '\0';
|
||||
xe::string_util::copy_truncating(buffer, user_name,
|
||||
std::min(buffer_len.value(), uint32_t(15)));
|
||||
return X_ERROR_SUCCESS;
|
||||
}
|
||||
DECLARE_XAM_EXPORT1(XamUserGetName, kUserProfiles, kImplemented);
|
||||
|
|
|
@ -226,19 +226,21 @@ DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented);
|
|||
|
||||
dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity,
|
||||
lpdword_t previous_affinity_ptr) {
|
||||
uint32_t previous_affinity = 0;
|
||||
|
||||
// The Xbox 360, according to disassembly of KeSetAffinityThread, unlike
|
||||
// Windows NT, stores the previous affinity via the pointer provided as an
|
||||
// argument, not in the return value - the return value is used for the
|
||||
// result.
|
||||
if (!affinity) {
|
||||
return X_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
auto thread = XObject::GetNativeObject<XThread>(kernel_state(), thread_ptr);
|
||||
if (thread) {
|
||||
previous_affinity = thread->affinity();
|
||||
if (previous_affinity_ptr) {
|
||||
*previous_affinity_ptr = uint32_t(1) << thread->active_cpu();
|
||||
}
|
||||
thread->SetAffinity(affinity);
|
||||
}
|
||||
|
||||
if (previous_affinity_ptr) {
|
||||
*previous_affinity_ptr = previous_affinity;
|
||||
}
|
||||
|
||||
return (uint32_t)affinity;
|
||||
return X_STATUS_SUCCESS;
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(KeSetAffinityThread, kThreading, kImplemented);
|
||||
|
||||
|
|
|
@ -157,11 +157,17 @@ void XThread::set_name(const std::string_view name) {
|
|||
}
|
||||
}
|
||||
|
||||
uint8_t next_cpu = 0;
|
||||
uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
|
||||
static uint8_t next_cpu = 0;
|
||||
static uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
|
||||
// NOTE: proc_mask is logical processors, not physical processors or cores.
|
||||
if (!proc_mask) {
|
||||
next_cpu = (next_cpu + 1) % 6;
|
||||
return next_cpu; // is this reasonable?
|
||||
// TODO(Triang3l): Does the following apply here?
|
||||
// https://docs.microsoft.com/en-us/windows/win32/dxtecharts/coding-for-multiple-cores
|
||||
// "On Xbox 360, you must explicitly assign software threads to a particular
|
||||
// hardware thread by using XSetThreadProcessor. Otherwise, all child
|
||||
// threads will stay on the same hardware thread as the parent."
|
||||
}
|
||||
assert_false(proc_mask & 0xC0);
|
||||
|
||||
|
@ -206,6 +212,7 @@ void XThread::InitializeGuestObject() {
|
|||
// 0xA88 = APC
|
||||
// 0x18 = timer
|
||||
xe::store_and_swap<uint32_t>(p + 0x09C, 0xFDFFD7FF);
|
||||
// current_cpu is expected to be initialized externally via SetActiveCpu.
|
||||
xe::store_and_swap<uint32_t>(p + 0x0D0, stack_base_);
|
||||
xe::store_and_swap<uint64_t>(p + 0x130, Clock::QueryGuestSystemTime());
|
||||
xe::store_and_swap<uint32_t>(p + 0x144, guest_object() + 0x144);
|
||||
|
@ -347,6 +354,12 @@ X_STATUS XThread::Create() {
|
|||
// Exports use this to get the kernel.
|
||||
thread_state_->context()->kernel_state = kernel_state_;
|
||||
|
||||
uint8_t cpu_index = GetFakeCpuNumber(
|
||||
static_cast<uint8_t>(creation_params_.creation_flags >> 24));
|
||||
|
||||
// Initialize the KTHREAD object.
|
||||
InitializeGuestObject();
|
||||
|
||||
X_KPCR* pcr = memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
|
||||
|
||||
pcr->tls_ptr = tls_static_address_;
|
||||
|
@ -356,14 +369,11 @@ X_STATUS XThread::Create() {
|
|||
pcr->stack_base_ptr = stack_base_;
|
||||
pcr->stack_end_ptr = stack_limit_;
|
||||
|
||||
uint8_t proc_mask =
|
||||
static_cast<uint8_t>(creation_params_.creation_flags >> 24);
|
||||
pcr->dpc_active = 0; // DPC active bool?
|
||||
|
||||
pcr->current_cpu = GetFakeCpuNumber(proc_mask); // Current CPU(?)
|
||||
pcr->dpc_active = 0; // DPC active bool?
|
||||
|
||||
// Initialize the KTHREAD object.
|
||||
InitializeGuestObject();
|
||||
// Assign the thread to the logical processor, and also set up the current CPU
|
||||
// in KPCR and KTHREAD.
|
||||
SetActiveCpu(cpu_index);
|
||||
|
||||
// Always retain when starting - the thread owns itself until exited.
|
||||
RetainHandle();
|
||||
|
@ -416,10 +426,6 @@ X_STATUS XThread::Create() {
|
|||
return X_STATUS_NO_MEMORY;
|
||||
}
|
||||
|
||||
if (!cvars::ignore_thread_affinities) {
|
||||
thread_->set_affinity_mask(proc_mask);
|
||||
}
|
||||
|
||||
// Set the thread name based on host ID (for easier debugging).
|
||||
if (thread_name_.empty()) {
|
||||
set_name(fmt::format("XThread{:04X}", thread_->system_id()));
|
||||
|
@ -712,37 +718,36 @@ void XThread::SetPriority(int32_t increment) {
|
|||
}
|
||||
|
||||
void XThread::SetAffinity(uint32_t affinity) {
|
||||
// Affinity mask, as in SetThreadAffinityMask.
|
||||
// Xbox thread IDs:
|
||||
// 0 - core 0, thread 0 - user
|
||||
// 1 - core 0, thread 1 - user
|
||||
// 2 - core 1, thread 0 - sometimes xcontent
|
||||
// 3 - core 1, thread 1 - user
|
||||
// 4 - core 2, thread 0 - xaudio
|
||||
// 5 - core 2, thread 1 - user
|
||||
// TODO(benvanik): implement better thread distribution.
|
||||
// NOTE: these are logical processors, not physical processors or cores.
|
||||
SetActiveCpu(GetFakeCpuNumber(affinity));
|
||||
}
|
||||
|
||||
uint8_t XThread::active_cpu() const {
|
||||
const X_KPCR& pcr = *memory()->TranslateVirtual<const X_KPCR*>(pcr_address_);
|
||||
return pcr.current_cpu;
|
||||
}
|
||||
|
||||
void XThread::SetActiveCpu(uint8_t cpu_index) {
|
||||
// May be called during thread creation - don't skip if current == new.
|
||||
|
||||
assert_true(cpu_index < 6);
|
||||
|
||||
X_KPCR& pcr = *memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
|
||||
pcr.current_cpu = cpu_index;
|
||||
|
||||
if (is_guest_thread()) {
|
||||
X_KTHREAD& thread_object =
|
||||
*memory()->TranslateVirtual<X_KTHREAD*>(guest_object());
|
||||
thread_object.current_cpu = cpu_index;
|
||||
}
|
||||
|
||||
if (xe::threading::logical_processor_count() < 6) {
|
||||
XELOGW("Too few processors - scheduling will be wonky");
|
||||
}
|
||||
SetActiveCpu(GetFakeCpuNumber(affinity));
|
||||
affinity_ = affinity;
|
||||
if (!cvars::ignore_thread_affinities) {
|
||||
thread_->set_affinity_mask(affinity);
|
||||
thread_->set_affinity_mask(uint64_t(1) << cpu_index);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t XThread::active_cpu() const {
|
||||
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
|
||||
return xe::load_and_swap<uint8_t>(pcr + 0x10C);
|
||||
}
|
||||
|
||||
void XThread::SetActiveCpu(uint32_t cpu_index) {
|
||||
assert_true(cpu_index < 6);
|
||||
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
|
||||
xe::store_and_swap<uint8_t>(pcr + 0x10C, cpu_index);
|
||||
}
|
||||
|
||||
bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) {
|
||||
if (slot * 4 > tls_total_size_) {
|
||||
return false;
|
||||
|
|
|
@ -88,7 +88,8 @@ struct X_KTHREAD {
|
|||
char unk_10[0xAC]; // 0x10
|
||||
uint8_t suspend_count; // 0xBC
|
||||
uint8_t unk_BD; // 0xBD
|
||||
uint16_t unk_BE; // 0xBE
|
||||
uint8_t unk_BE; // 0xBE
|
||||
uint8_t current_cpu; // 0xBF
|
||||
char unk_C0[0x70]; // 0xC0
|
||||
xe::be<uint64_t> create_time; // 0x130
|
||||
xe::be<uint64_t> exit_time; // 0x138
|
||||
|
@ -171,10 +172,17 @@ class XThread : public XObject, public cpu::Thread {
|
|||
int32_t priority() const { return priority_; }
|
||||
int32_t QueryPriority();
|
||||
void SetPriority(int32_t increment);
|
||||
uint32_t affinity() const { return affinity_; }
|
||||
|
||||
// Xbox thread IDs:
|
||||
// 0 - core 0, thread 0 - user
|
||||
// 1 - core 0, thread 1 - user
|
||||
// 2 - core 1, thread 0 - sometimes xcontent
|
||||
// 3 - core 1, thread 1 - user
|
||||
// 4 - core 2, thread 0 - xaudio
|
||||
// 5 - core 2, thread 1 - user
|
||||
void SetAffinity(uint32_t affinity);
|
||||
uint32_t active_cpu() const;
|
||||
void SetActiveCpu(uint32_t cpu_index);
|
||||
uint8_t active_cpu() const;
|
||||
void SetActiveCpu(uint8_t cpu_index);
|
||||
|
||||
bool GetTLSValue(uint32_t slot, uint32_t* value_out);
|
||||
bool SetTLSValue(uint32_t slot, uint32_t value);
|
||||
|
@ -226,7 +234,6 @@ class XThread : public XObject, public cpu::Thread {
|
|||
bool running_ = false;
|
||||
|
||||
int32_t priority_ = 0;
|
||||
uint32_t affinity_ = 0;
|
||||
|
||||
xe::global_critical_region global_critical_region_;
|
||||
std::atomic<uint32_t> irql_ = {0};
|
||||
|
|
|
@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Create the pipeline states.
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {};
|
||||
pipeline_state_desc.pRootSignature = root_signature_;
|
||||
pipeline_state_desc.VS.pShaderBytecode = immediate_vs;
|
||||
pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs);
|
||||
pipeline_state_desc.PS.pShaderBytecode = immediate_ps;
|
||||
pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps);
|
||||
// Create the pipelines.
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
|
||||
pipeline_desc.pRootSignature = root_signature_;
|
||||
pipeline_desc.VS.pShaderBytecode = immediate_vs;
|
||||
pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs);
|
||||
pipeline_desc.PS.pShaderBytecode = immediate_ps;
|
||||
pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps);
|
||||
D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc =
|
||||
pipeline_state_desc.BlendState.RenderTarget[0];
|
||||
pipeline_desc.BlendState.RenderTarget[0];
|
||||
pipeline_blend_desc.BlendEnable = TRUE;
|
||||
pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA;
|
||||
pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
|
||||
|
@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() {
|
|||
pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED |
|
||||
D3D12_COLOR_WRITE_ENABLE_GREEN |
|
||||
D3D12_COLOR_WRITE_ENABLE_BLUE;
|
||||
pipeline_state_desc.SampleMask = UINT_MAX;
|
||||
pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||
pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE;
|
||||
pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE;
|
||||
pipeline_desc.SampleMask = UINT_MAX;
|
||||
pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||
pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE;
|
||||
pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
|
||||
D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {};
|
||||
pipeline_input_elements[0].SemanticName = "POSITION";
|
||||
pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT;
|
||||
|
@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() {
|
|||
pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
pipeline_input_elements[2].AlignedByteOffset =
|
||||
offsetof(ImmediateVertex, color);
|
||||
pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
|
||||
pipeline_state_desc.InputLayout.NumElements =
|
||||
pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
|
||||
pipeline_desc.InputLayout.NumElements =
|
||||
UINT(xe::countof(pipeline_input_elements));
|
||||
pipeline_state_desc.PrimitiveTopologyType =
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
pipeline_state_desc.NumRenderTargets = 1;
|
||||
pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
|
||||
pipeline_state_desc.SampleDesc.Count = 1;
|
||||
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
pipeline_desc.NumRenderTargets = 1;
|
||||
pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
|
||||
pipeline_desc.SampleDesc.Count = 1;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) {
|
||||
&pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) {
|
||||
XELOGE(
|
||||
"Failed to create the Direct3D 12 immediate drawer triangle pipeline "
|
||||
"state");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
pipeline_state_desc.PrimitiveTopologyType =
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
|
||||
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(
|
||||
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) {
|
||||
&pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) {
|
||||
XELOGE(
|
||||
"Failed to create the Direct3D 12 immediate drawer line pipeline "
|
||||
"state");
|
||||
|
@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() {
|
|||
|
||||
util::ReleaseAndNull(sampler_heap_);
|
||||
|
||||
util::ReleaseAndNull(pipeline_state_line_);
|
||||
util::ReleaseAndNull(pipeline_state_triangle_);
|
||||
util::ReleaseAndNull(pipeline_line_);
|
||||
util::ReleaseAndNull(pipeline_triangle_);
|
||||
|
||||
util::ReleaseAndNull(root_signature_);
|
||||
}
|
||||
|
@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
|
|||
uint32_t(sampler_index)));
|
||||
}
|
||||
|
||||
// Set the primitive type and the pipeline state for it.
|
||||
// Set the primitive type and the pipeline for it.
|
||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||
ID3D12PipelineState* pipeline_state;
|
||||
ID3D12PipelineState* pipeline;
|
||||
switch (draw.primitive_type) {
|
||||
case ImmediatePrimitiveType::kLines:
|
||||
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
pipeline_state = pipeline_state_line_;
|
||||
pipeline = pipeline_line_;
|
||||
break;
|
||||
case ImmediatePrimitiveType::kTriangles:
|
||||
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
pipeline_state = pipeline_state_triangle_;
|
||||
pipeline = pipeline_triangle_;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(draw.primitive_type);
|
||||
|
@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
|
|||
if (current_primitive_topology_ != primitive_topology) {
|
||||
current_primitive_topology_ = primitive_topology;
|
||||
current_command_list_->IASetPrimitiveTopology(primitive_topology);
|
||||
current_command_list_->SetPipelineState(pipeline_state);
|
||||
current_command_list_->SetPipelineState(pipeline);
|
||||
}
|
||||
|
||||
// Draw.
|
||||
|
|
|
@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
|
|||
kCount
|
||||
};
|
||||
|
||||
ID3D12PipelineState* pipeline_state_triangle_ = nullptr;
|
||||
ID3D12PipelineState* pipeline_state_line_ = nullptr;
|
||||
ID3D12PipelineState* pipeline_triangle_ = nullptr;
|
||||
ID3D12PipelineState* pipeline_line_ = nullptr;
|
||||
|
||||
ID3D12DescriptorHeap* sampler_heap_ = nullptr;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_;
|
||||
|
|
|
@ -46,22 +46,22 @@ class D3D12Provider : public GraphicsProvider {
|
|||
uint32_t GetRTVDescriptorSize() const { return descriptor_size_rtv_; }
|
||||
uint32_t GetDSVDescriptorSize() const { return descriptor_size_dsv_; }
|
||||
template <typename T>
|
||||
inline T OffsetViewDescriptor(T start, uint32_t index) const {
|
||||
T OffsetViewDescriptor(T start, uint32_t index) const {
|
||||
start.ptr += index * descriptor_size_view_;
|
||||
return start;
|
||||
}
|
||||
template <typename T>
|
||||
inline T OffsetSamplerDescriptor(T start, uint32_t index) const {
|
||||
T OffsetSamplerDescriptor(T start, uint32_t index) const {
|
||||
start.ptr += index * descriptor_size_sampler_;
|
||||
return start;
|
||||
}
|
||||
template <typename T>
|
||||
inline T OffsetRTVDescriptor(T start, uint32_t index) const {
|
||||
T OffsetRTVDescriptor(T start, uint32_t index) const {
|
||||
start.ptr += index * descriptor_size_rtv_;
|
||||
return start;
|
||||
}
|
||||
template <typename T>
|
||||
inline T OffsetDSVDescriptor(T start, uint32_t index) const {
|
||||
T OffsetDSVDescriptor(T start, uint32_t index) const {
|
||||
start.ptr += index * descriptor_size_dsv_;
|
||||
return start;
|
||||
}
|
||||
|
@ -91,32 +91,30 @@ class D3D12Provider : public GraphicsProvider {
|
|||
}
|
||||
|
||||
// Proxies for Direct3D 12 functions since they are loaded dynamically.
|
||||
inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
|
||||
D3D_ROOT_SIGNATURE_VERSION version,
|
||||
ID3DBlob** blob_out,
|
||||
ID3DBlob** error_blob_out) const {
|
||||
HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
|
||||
D3D_ROOT_SIGNATURE_VERSION version,
|
||||
ID3DBlob** blob_out,
|
||||
ID3DBlob** error_blob_out) const {
|
||||
return pfn_d3d12_serialize_root_signature_(desc, version, blob_out,
|
||||
error_blob_out);
|
||||
}
|
||||
inline HRESULT Disassemble(const void* src_data, size_t src_data_size,
|
||||
UINT flags, const char* comments,
|
||||
ID3DBlob** disassembly_out) const {
|
||||
HRESULT Disassemble(const void* src_data, size_t src_data_size, UINT flags,
|
||||
const char* comments, ID3DBlob** disassembly_out) const {
|
||||
if (!pfn_d3d_disassemble_) {
|
||||
return E_NOINTERFACE;
|
||||
}
|
||||
return pfn_d3d_disassemble_(src_data, src_data_size, flags, comments,
|
||||
disassembly_out);
|
||||
}
|
||||
inline HRESULT DxbcConverterCreateInstance(const CLSID& rclsid,
|
||||
const IID& riid,
|
||||
void** ppv) const {
|
||||
HRESULT DxbcConverterCreateInstance(const CLSID& rclsid, const IID& riid,
|
||||
void** ppv) const {
|
||||
if (!pfn_dxilconv_dxc_create_instance_) {
|
||||
return E_NOINTERFACE;
|
||||
}
|
||||
return pfn_dxilconv_dxc_create_instance_(rclsid, riid, ppv);
|
||||
}
|
||||
inline HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid,
|
||||
void** ppv) const {
|
||||
HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid,
|
||||
void** ppv) const {
|
||||
if (!pfn_dxcompiler_dxc_create_instance_) {
|
||||
return E_NOINTERFACE;
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature(
|
|||
return root_signature;
|
||||
}
|
||||
|
||||
ID3D12PipelineState* CreateComputePipelineState(
|
||||
ID3D12PipelineState* CreateComputePipeline(
|
||||
ID3D12Device* device, const void* shader, size_t shader_size,
|
||||
ID3D12RootSignature* root_signature) {
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC desc;
|
||||
|
|
|
@ -27,7 +27,7 @@ extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload;
|
|||
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;
|
||||
|
||||
template <typename T>
|
||||
inline bool ReleaseAndNull(T& object) {
|
||||
bool ReleaseAndNull(T& object) {
|
||||
if (object != nullptr) {
|
||||
object->Release();
|
||||
object = nullptr;
|
||||
|
@ -39,9 +39,10 @@ inline bool ReleaseAndNull(T& object) {
|
|||
ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider,
|
||||
const D3D12_ROOT_SIGNATURE_DESC& desc);
|
||||
|
||||
ID3D12PipelineState* CreateComputePipelineState(
|
||||
ID3D12Device* device, const void* shader, size_t shader_size,
|
||||
ID3D12RootSignature* root_signature);
|
||||
ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device,
|
||||
const void* shader,
|
||||
size_t shader_size,
|
||||
ID3D12RootSignature* root_signature);
|
||||
|
||||
constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) {
|
||||
switch (element_size_bytes_log2) {
|
||||
|
|
|
@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() {
|
|||
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
|
||||
uint64_t submission_index, size_t size, size_t alignment,
|
||||
size_t& offset_out) {
|
||||
assert_not_zero(alignment);
|
||||
alignment = std::max(alignment, size_t(1));
|
||||
assert_true(xe::is_pow2(alignment));
|
||||
size = xe::align(size, alignment);
|
||||
assert_true(size <= page_size_);
|
||||
|
@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
|
|||
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial(
|
||||
uint64_t submission_index, size_t size, size_t alignment,
|
||||
size_t& offset_out, size_t& size_out) {
|
||||
assert_not_zero(alignment);
|
||||
alignment = std::max(alignment, size_t(1));
|
||||
assert_true(xe::is_pow2(alignment));
|
||||
size = xe::align(size, alignment);
|
||||
size = std::min(size, page_size_);
|
||||
|
|
|
@ -18,7 +18,7 @@ project("SDL2")
|
|||
"SDL2/include",
|
||||
})
|
||||
buildoptions({
|
||||
"/wd4828", -- illegal characters in file
|
||||
"/wd4828", -- illegal characters in file https://bugzilla.libsdl.org/show_bug.cgi?id=5333
|
||||
})
|
||||
files({
|
||||
-- 1:1 from SDL.vcxproj file
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 26fbbb9962aefcb1c24aff1e7952033ce1361190
|
|
@ -73,4 +73,4 @@ project("spirv-tools")
|
|||
buildoptions({
|
||||
"/wd4800", -- Forcing value to bool 'true' or 'false'
|
||||
"/wd4996", -- Call to 'std::equal' with parameters that may be unsafe
|
||||
})
|
||||
})
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2015 Ben Vanik. All Rights Reserved.
|
||||
|
||||
|
@ -107,13 +107,14 @@ def has_bin(bin):
|
|||
return None
|
||||
|
||||
|
||||
def shell_call(command, throw_on_error=True, stdout_path=None):
|
||||
def shell_call(command, throw_on_error=True, stdout_path=None, stderr_path=None, shell=False):
|
||||
"""Executes a shell command.
|
||||
|
||||
Args:
|
||||
command: Command to execute, as a list of parameters.
|
||||
throw_on_error: Whether to throw an error or return the status code.
|
||||
stdout_path: File path to write stdout output to.
|
||||
stderr_path: File path to write stderr output to.
|
||||
|
||||
Returns:
|
||||
If throw_on_error is False the status code of the call will be returned.
|
||||
|
@ -121,17 +122,22 @@ def shell_call(command, throw_on_error=True, stdout_path=None):
|
|||
stdout_file = None
|
||||
if stdout_path:
|
||||
stdout_file = open(stdout_path, 'w')
|
||||
stderr_file = None
|
||||
if stderr_path:
|
||||
stderr_file = open(stderr_path, 'w')
|
||||
result = 0
|
||||
try:
|
||||
if throw_on_error:
|
||||
result = 1
|
||||
subprocess.check_call(command, shell=False, stdout=stdout_file)
|
||||
subprocess.check_call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||
result = 0
|
||||
else:
|
||||
result = subprocess.call(command, shell=False, stdout=stdout_file)
|
||||
result = subprocess.call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||
finally:
|
||||
if stdout_file:
|
||||
stdout_file.close()
|
||||
if stderr_file:
|
||||
stderr_file.close()
|
||||
return result
|
||||
|
||||
|
||||
|
@ -196,42 +202,5 @@ def import_subprocess_environment(args):
|
|||
os.environ[var.upper()] = setting
|
||||
break
|
||||
|
||||
def git_submodule_update():
|
||||
"""Runs a full recursive git submodule init and update.
|
||||
|
||||
Older versions of git do not support 'update --init --recursive'. We could
|
||||
check and run it on versions that do support it and speed things up a bit.
|
||||
"""
|
||||
if True:
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'update',
|
||||
'--init',
|
||||
'--recursive',
|
||||
])
|
||||
else:
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'init',
|
||||
])
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'foreach',
|
||||
'--recursive',
|
||||
'git',
|
||||
'submodule',
|
||||
'init',
|
||||
])
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'update',
|
||||
'--recursive',
|
||||
])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
180
xenia-build
180
xenia-build
|
@ -34,8 +34,11 @@ def main():
|
|||
|
||||
# Check git exists.
|
||||
if not has_bin('git'):
|
||||
print('ERROR: git must be installed and on PATH.')
|
||||
sys.exit(1)
|
||||
print('WARNING: Git should be installed and on PATH. Version info will be omitted from all binaries!')
|
||||
print('')
|
||||
elif not git_is_repository():
|
||||
print('WARNING: The source tree is unversioned. Version info will be omitted from all binaries!')
|
||||
print('')
|
||||
|
||||
# Check python version.
|
||||
if not sys.version_info[:2] >= (3, 6):
|
||||
|
@ -85,6 +88,16 @@ def main():
|
|||
sys.exit(return_code)
|
||||
|
||||
|
||||
def print_box(msg):
|
||||
"""Prints an important message inside a box
|
||||
"""
|
||||
print(
|
||||
'┌{0:─^{2}}╖\n'
|
||||
'│{1: ^{2}}║\n'
|
||||
'╘{0:═^{2}}╝\n'
|
||||
.format('', msg, len(msg) + 2))
|
||||
|
||||
|
||||
def import_vs_environment():
|
||||
"""Finds the installed Visual Studio version and imports
|
||||
interesting environment variables into os.environ.
|
||||
|
@ -150,6 +163,7 @@ def import_subprocess_environment(args):
|
|||
os.environ[var.upper()] = setting
|
||||
break
|
||||
|
||||
|
||||
def has_bin(binary):
|
||||
"""Checks whether the given binary is present.
|
||||
|
||||
|
@ -185,13 +199,14 @@ def get_bin(binary):
|
|||
return None
|
||||
|
||||
|
||||
def shell_call(command, throw_on_error=True, stdout_path=None, shell=False):
|
||||
def shell_call(command, throw_on_error=True, stdout_path=None, stderr_path=None, shell=False):
|
||||
"""Executes a shell command.
|
||||
|
||||
Args:
|
||||
command: Command to execute, as a list of parameters.
|
||||
throw_on_error: Whether to throw an error or return the status code.
|
||||
stdout_path: File path to write stdout output to.
|
||||
stderr_path: File path to write stderr output to.
|
||||
|
||||
Returns:
|
||||
If throw_on_error is False the status code of the call will be returned.
|
||||
|
@ -199,21 +214,49 @@ def shell_call(command, throw_on_error=True, stdout_path=None, shell=False):
|
|||
stdout_file = None
|
||||
if stdout_path:
|
||||
stdout_file = open(stdout_path, 'w')
|
||||
stderr_file = None
|
||||
if stderr_path:
|
||||
stderr_file = open(stderr_path, 'w')
|
||||
result = 0
|
||||
try:
|
||||
if throw_on_error:
|
||||
result = 1
|
||||
subprocess.check_call(command, shell=shell, stdout=stdout_file)
|
||||
subprocess.check_call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||
result = 0
|
||||
else:
|
||||
result = subprocess.call(command, shell=shell, stdout=stdout_file)
|
||||
result = subprocess.call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
|
||||
finally:
|
||||
if stdout_file:
|
||||
stdout_file.close()
|
||||
if stderr_file:
|
||||
stderr_file.close()
|
||||
return result
|
||||
|
||||
|
||||
def get_git_head_info():
|
||||
def generate_version_h():
|
||||
"""Generates a build/version.h file that contains current git info.
|
||||
"""
|
||||
if git_is_repository():
|
||||
(branch_name, commit, commit_short) = git_get_head_info()
|
||||
else:
|
||||
branch_name = 'tarball'
|
||||
commit = ':(-dont-do-this'
|
||||
commit_short = ':('
|
||||
|
||||
contents = '''// Autogenerated by `xb premake`.
|
||||
#ifndef GENERATED_VERSION_H_
|
||||
#define GENERATED_VERSION_H_
|
||||
#define XE_BUILD_BRANCH "%s"
|
||||
#define XE_BUILD_COMMIT "%s"
|
||||
#define XE_BUILD_COMMIT_SHORT "%s"
|
||||
#define XE_BUILD_DATE __DATE__
|
||||
#endif // GENERATED_VERSION_H_
|
||||
''' % (branch_name, commit, commit_short)
|
||||
with open('build/version.h', 'w') as f:
|
||||
f.write(contents)
|
||||
|
||||
|
||||
def git_get_head_info():
|
||||
"""Queries the current branch and commit checksum from git.
|
||||
|
||||
Returns:
|
||||
|
@ -247,58 +290,28 @@ def get_git_head_info():
|
|||
return branch_name, commit, commit_short
|
||||
|
||||
|
||||
def generate_version_h():
|
||||
"""Generates a build/version.h file that contains current git info.
|
||||
def git_is_repository():
|
||||
"""Checks if git is available and this source tree is versioned.
|
||||
"""
|
||||
(branch_name, commit, commit_short) = get_git_head_info()
|
||||
contents = '''// Autogenerated by `xb premake`.
|
||||
#ifndef GENERATED_VERSION_H_
|
||||
#define GENERATED_VERSION_H_
|
||||
#define XE_BUILD_BRANCH "%s"
|
||||
#define XE_BUILD_COMMIT "%s"
|
||||
#define XE_BUILD_COMMIT_SHORT "%s"
|
||||
#define XE_BUILD_DATE __DATE__
|
||||
#endif // GENERATED_VERSION_H_
|
||||
''' % (branch_name, commit, commit_short)
|
||||
with open('build/version.h', 'w') as f:
|
||||
f.write(contents)
|
||||
if not has_bin('git'):
|
||||
return False
|
||||
return shell_call([
|
||||
'git',
|
||||
'rev-parse',
|
||||
'--is-inside-work-tree',
|
||||
], throw_on_error=False, stdout_path=os.devnull, stderr_path=os.devnull) == 0
|
||||
|
||||
|
||||
def git_submodule_update():
|
||||
"""Runs a full recursive git submodule init and update.
|
||||
|
||||
Older versions of git do not support 'update --init --recursive'. We could
|
||||
check and run it on versions that do support it and speed things up a bit.
|
||||
"""
|
||||
if True:
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'update',
|
||||
'--init',
|
||||
'--recursive',
|
||||
])
|
||||
else:
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'init',
|
||||
])
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'foreach',
|
||||
'--recursive',
|
||||
'git',
|
||||
'submodule',
|
||||
'init',
|
||||
])
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'update',
|
||||
'--recursive',
|
||||
])
|
||||
shell_call([
|
||||
'git',
|
||||
'submodule',
|
||||
'update',
|
||||
'--init',
|
||||
'--recursive',
|
||||
])
|
||||
|
||||
|
||||
def get_clang_format_binary():
|
||||
|
@ -370,9 +383,9 @@ def run_platform_premake(cc='clang', devenv=None):
|
|||
if 'VSVERSION' in os.environ:
|
||||
vs_version = os.environ['VSVERSION']
|
||||
|
||||
return run_premake('windows', 'vs' + vs_version)
|
||||
return run_premake('windows', devenv or ('vs' + vs_version))
|
||||
else:
|
||||
return run_premake('linux', devenv == 'codelite' and devenv or 'gmake2', cc)
|
||||
return run_premake('linux', devenv or 'gmake2', cc)
|
||||
|
||||
|
||||
def run_premake_export_commands():
|
||||
|
@ -406,6 +419,43 @@ def get_build_bin_path(args):
|
|||
return os.path.join(self_path, 'build', 'bin', platform.capitalize(), args['config'].capitalize())
|
||||
|
||||
|
||||
def create_clion_workspace():
|
||||
"""Creates some basic workspace information inside the .idea directory for first start.
|
||||
"""
|
||||
if os.path.exists('.idea'):
|
||||
# No first start
|
||||
return False
|
||||
print('Generating CLion workspace files...')
|
||||
# Might become easier in the future: https://youtrack.jetbrains.com/issue/CPP-7911
|
||||
|
||||
# Set the location of the CMakeLists.txt
|
||||
os.mkdir('.idea')
|
||||
with open(os.path.join('.idea', 'misc.xml'), 'w') as f:
|
||||
f.write("""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$/build">
|
||||
<contentRoot DIR="$PROJECT_DIR$" />
|
||||
</component>
|
||||
</project>
|
||||
""")
|
||||
|
||||
# Set available configurations
|
||||
# TODO Find a way to trigger a cmake reload
|
||||
with open(os.path.join('.idea', 'workspace.xml'), 'w') as f:
|
||||
f.write("""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CMakeSettings">
|
||||
<configurations>
|
||||
<configuration PROFILE_NAME="Checked" CONFIG_NAME="Checked" />
|
||||
<configuration PROFILE_NAME="Debug" CONFIG_NAME="Debug" />
|
||||
<configuration PROFILE_NAME="Release" CONFIG_NAME="Release" />
|
||||
</configurations>
|
||||
</component>
|
||||
</project>""")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def discover_commands(subparsers):
|
||||
"""Looks for all commands and returns a dictionary of them.
|
||||
In the future commands could be discovered on disk.
|
||||
|
@ -491,7 +541,10 @@ class SetupCommand(Command):
|
|||
|
||||
# Setup submodules.
|
||||
print('- git submodule init / update...')
|
||||
git_submodule_update()
|
||||
if git_is_repository():
|
||||
git_submodule_update()
|
||||
else:
|
||||
print('WARNING: Git not available or not a repository. Dependencies may be missing.')
|
||||
print('')
|
||||
|
||||
print('- running premake...')
|
||||
|
@ -1445,8 +1498,13 @@ class DevenvCommand(Command):
|
|||
|
||||
def execute(self, args, pass_args, cwd):
|
||||
devenv = None
|
||||
show_reload_prompt = False
|
||||
if sys.platform == 'win32':
|
||||
print('Launching Visual Studio...')
|
||||
elif has_bin('clion') or has_bin('clion.sh'):
|
||||
print('Launching CLion...')
|
||||
show_reload_prompt = create_clion_workspace()
|
||||
devenv = 'cmake'
|
||||
else:
|
||||
print('Launching CodeLite...')
|
||||
devenv = 'codelite'
|
||||
|
@ -1457,11 +1515,23 @@ class DevenvCommand(Command):
|
|||
print('')
|
||||
|
||||
print('- launching devenv...')
|
||||
if show_reload_prompt:
|
||||
print_box('Please run "File ⇒ ↺ Reload CMake Project" from inside the IDE!')
|
||||
if sys.platform == 'win32':
|
||||
shell_call([
|
||||
'devenv',
|
||||
'build\\xenia.sln',
|
||||
])
|
||||
elif has_bin('clion'):
|
||||
shell_call([
|
||||
'clion',
|
||||
'.',
|
||||
])
|
||||
elif has_bin('clion.sh'):
|
||||
shell_call([
|
||||
'clion.sh',
|
||||
'.',
|
||||
])
|
||||
else:
|
||||
shell_call([
|
||||
'codelite',
|
||||
|
|
Loading…
Reference in New Issue