Merge branch 'master' of https://github.com/xenia-project/xenia into canary_new

This commit is contained in:
Gliniak 2020-11-15 19:01:55 +01:00
commit 7abe6312be
64 changed files with 3001 additions and 1293 deletions

10
.gdbinit Normal file
View File

@ -0,0 +1,10 @@
# Ignore HighResolutionTimer custom event
handle SIG34 nostop noprint
# Ignore PosixTimer custom event
handle SIG35 nostop noprint
# Ignore PosixThread exit event
handle SIG32 nostop noprint
# Ignore PosixThread suspend event
handle SIG36 nostop noprint
# Ignore PosixThread user callback event
handle SIG37 nostop noprint

3
.gitmodules vendored
View File

@ -64,3 +64,6 @@
[submodule "third_party/DirectXShaderCompiler"]
path = third_party/DirectXShaderCompiler
url = https://github.com/microsoft/DirectXShaderCompiler.git
[submodule "third_party/premake-cmake"]
path = third_party/premake-cmake
url = https://github.com/Enhex/premake-cmake.git

View File

@ -28,9 +28,9 @@ addons:
jobs:
include:
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 LINT=true
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Debug
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Release
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 LINT=true
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Debug
- env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 AR_COMPILER=llvm-ar-9 BUILD=true CONFIG=Release
git:
# We handle submodules ourselves in xenia-build setup.
@ -40,8 +40,10 @@ before_script:
- export LIBVULKAN_VERSION=1.1.70
- export CXX=$CXX_COMPILER
- export CC=$C_COMPILER
- export AR=$AR_COMPILER
# Dump useful info.
- $CXX --version
- $AR_COMPILER --version
- python3 --version
- clang-format-9 --version
- clang-format-9 -style=file -dump-config

View File

@ -91,12 +91,14 @@ Linux support is extremely experimental and presently incomplete.
The build script uses LLVM/Clang 9. GCC while it should work in theory, is not easily
interchangeable right now.
[CodeLite](https://codelite.org) is the supported IDE and `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website.
Normal building via `xb build` uses Make.
* Normal building via `xb build` uses Make.
* [CodeLite](https://codelite.org) is supported. `xb devenv` will generate a workspace and attempt to open it. Your distribution's version may be out of date so check their website.
* Experimental CMake generation is available to facilitate use of other IDEs such as [CLion](https://www.jetbrains.com/clion/). If `clion` is available inside `$PATH`, `xb devenv` will start it. Otherwise `build/CMakeLists.txt` needs to be generated by invoking `xb premake --devenv=cmake` manually.
Clang-9 or newer should be available from system repositories on all up to date distributions.
You will also need some development libraries. To get them on an Ubuntu system:
```
```bash
sudo apt-get install libgtk-3-dev libpthread-stubs0-dev liblz4-dev libx11-dev libvulkan-dev libsdl2-dev libiberty-dev libunwind-dev libc++-dev libc++abi-dev
```

View File

@ -1,5 +1,6 @@
include("tools/build")
require("third_party/premake-export-compile-commands/export-compile-commands")
require("third_party/premake-cmake/cmake")
location(build_root)
targetdir(build_bin)
@ -24,6 +25,9 @@ defines({
"UNICODE",
})
cppdialect("C++17")
symbols("On")
-- TODO(DrChat): Find a way to disable this on other architectures.
if ARCH ~= "ppc64" then
filter("architecture:x86_64")
@ -44,30 +48,29 @@ filter("kind:StaticLib")
filter("configurations:Checked")
runtime("Debug")
optimize("Off")
defines({
"DEBUG",
})
runtime("Debug")
filter({"configurations:Checked", "platforms:Windows"})
buildoptions({
"/RTCsu", -- Full Run-Time Checks.
"/RTCsu", -- Full Run-Time Checks.
})
filter({"configurations:Checked", "platforms:Linux"})
defines({
"_GLIBCXX_DEBUG", -- libstdc++ debug mode
})
filter("configurations:Debug")
runtime("Debug")
runtime("Release")
optimize("Off")
defines({
"DEBUG",
"_NO_DEBUG_HEAP=1",
})
runtime("Release")
filter({"configurations:Debug", "platforms:Windows"})
linkoptions({
"/NODEFAULTLIB:MSVCRTD",
})
filter({"configurations:Debug", "platforms:Linux"})
buildoptions({
"-g",
defines({
"_GLIBCXX_DEBUG", -- make dbg symbols work on some distros
})
filter("configurations:Release")
@ -76,26 +79,18 @@ filter("configurations:Release")
"NDEBUG",
"_NO_DEBUG_HEAP=1",
})
optimize("speed")
optimize("Speed")
inlining("Auto")
floatingpoint("Fast")
flags({
"LinkTimeOptimization",
})
runtime("Release")
filter({"configurations:Release", "platforms:Windows"})
linkoptions({
"/NODEFAULTLIB:MSVCRTD",
})
filter("platforms:Linux")
system("linux")
toolset("clang")
cppdialect("C++17")
buildoptions({
-- "-mlzcnt", -- (don't) Assume lzcnt is supported.
"`pkg-config --cflags gtk+-x11-3.0`",
"-fno-lto", -- Premake doesn't support LTO on clang
({os.outputof("pkg-config --cflags gtk+-x11-3.0")})[1],
})
links({
"stdc++fs",
@ -105,14 +100,13 @@ filter("platforms:Linux")
"rt",
})
linkoptions({
"`pkg-config --libs gtk+-3.0`",
({os.outputof("pkg-config --libs gtk+-3.0")})[1],
})
filter({"platforms:Linux", "kind:*App"})
linkgroups("On")
filter({"platforms:Linux", "language:C++", "toolset:gcc"})
cppdialect("C++17")
links({
})
disablewarnings({
@ -147,13 +141,11 @@ filter({"platforms:Linux", "language:C++", "toolset:clang", "files:*.cc or *.cpp
filter("platforms:Windows")
system("windows")
toolset("msc")
cppdialect("C++17")
buildoptions({
"/MP", -- Multiprocessor compilation.
"/utf-8", -- 'build correctly on systems with non-Latin codepages'.
-- Mark warnings as severe
"/w14839", -- non-standard use of class 'type' as an argument to a variadic function
"/w14840", -- non-portable use of class 'type' as an argument to a variadic function
"/w14839", -- non-standard use of class 'type' as an argument to a variadic function
"/w14840", -- non-portable use of class 'type' as an argument to a variadic function
-- Disable warnings
"/wd4100", -- Unreferenced parameters are ok.
"/wd4201", -- Nameless struct/unions are ok.
@ -163,10 +155,10 @@ filter("platforms:Windows")
"/wd4189", -- 'local variable is initialized but not referenced'.
})
flags({
"NoMinimalRebuild", -- Required for /MP above.
"MultiProcessorCompile", -- Multiprocessor compilation.
"NoMinimalRebuild", -- Required for /MP above.
})
symbols("On")
defines({
"_CRT_NONSTDC_NO_DEPRECATE",
"_CRT_SECURE_NO_WARNINGS",

View File

@ -71,8 +71,8 @@ std::unique_ptr<EmulatorWindow> EmulatorWindow::Create(Emulator* emulator) {
std::unique_ptr<EmulatorWindow> emulator_window(new EmulatorWindow(emulator));
emulator_window->loop()->PostSynchronous([&emulator_window]() {
xe::threading::set_name("Win32 Loop");
xe::Profiler::ThreadEnter("Win32 Loop");
xe::threading::set_name("Windowing Loop");
xe::Profiler::ThreadEnter("Windowing Loop");
if (!emulator_window->Initialize()) {
xe::FatalError("Failed to initialize main window");

View File

@ -8,19 +8,6 @@ project("xenia-app")
targetname("xenia_canary")
language("C++")
links({
"aes_128",
"capstone",
"fmt",
"dxbc",
"discord-rpc",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"volk",
"xenia-app-discord",
"xenia-apu",
"xenia-apu-nop",
@ -43,6 +30,21 @@ project("xenia-app")
"xenia-ui-vulkan",
"xenia-patcher",
"xenia-vfs",
})
links({
"aes_128",
"capstone",
"fmt",
"dxbc",
"discord-rpc",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"volk",
"xxhash",
})
defines({

View File

@ -302,6 +302,7 @@ void XmaContext::DecodePackets(XMA_CONTEXT_DATA* data) {
// No available data.
if (!data->input_buffer_0_valid && !data->input_buffer_1_valid) {
data->output_buffer_valid = 0;
return;
}

View File

@ -144,7 +144,7 @@ X_STATUS XmaDecoder::Setup(kernel::KernelState* kernel_state) {
WorkerThreadMain();
return 0;
}));
worker_thread_->set_name("XMA Decoder Worker");
worker_thread_->set_name("XMA Decoder");
worker_thread_->set_can_debugger_suspend(true);
worker_thread_->Create();

View File

@ -9,21 +9,51 @@
#include "xenia/base/debugging.h"
#include <signal.h>
#include <csignal>
#include <cstdarg>
#include <fstream>
#include <iostream>
#include <mutex>
#include <sstream>
#include "xenia/base/string_buffer.h"
namespace xe {
namespace debugging {
bool IsDebuggerAttached() { return false; }
void Break() { raise(SIGTRAP); }
bool IsDebuggerAttached() {
std::ifstream proc_status_stream("/proc/self/status");
if (!proc_status_stream.is_open()) {
return false;
}
std::string line;
while (std::getline(proc_status_stream, line)) {
std::istringstream line_stream(line);
std::string key;
line_stream >> key;
if (key == "TracerPid:") {
uint32_t tracer_pid;
line_stream >> tracer_pid;
return tracer_pid != 0;
}
}
return false;
}
void Break() {
static std::once_flag flag;
std::call_once(flag, []() {
// Install handler for sigtrap only once
std::signal(SIGTRAP, [](int) {
// Forward signal to default handler after being caught
std::signal(SIGTRAP, SIG_DFL);
});
});
std::raise(SIGTRAP);
}
namespace internal {
void DebugPrint(const char* s) {
// TODO: proper implementation.
}
void DebugPrint(const char* s) { std::clog << s << std::endl; }
} // namespace internal
} // namespace debugging

View File

@ -93,7 +93,7 @@ class Logger {
write_thread_ =
xe::threading::Thread::Create({}, [this]() { WriteThread(); });
write_thread_->set_name("xe::FileLogSink Writer");
write_thread_->set_name("Logging Writer");
}
~Logger() {

View File

@ -76,14 +76,12 @@
#endif // XE_PLATFORM_MAC
#if XE_COMPILER_MSVC
#define XEPACKEDSTRUCT(name, value) \
__pragma(pack(push, 1)) struct name##_s value __pragma(pack(pop)); \
typedef struct name##_s name;
#define XEPACKEDSTRUCT(name, value) \
__pragma(pack(push, 1)) struct name value __pragma(pack(pop));
#define XEPACKEDSTRUCTANONYMOUS(value) \
__pragma(pack(push, 1)) struct value __pragma(pack(pop));
#define XEPACKEDUNION(name, value) \
__pragma(pack(push, 1)) union name##_s value __pragma(pack(pop)); \
typedef union name##_s name;
#define XEPACKEDUNION(name, value) \
__pragma(pack(push, 1)) union name value __pragma(pack(pop));
#else
#define XEPACKEDSTRUCT(name, value) struct __attribute__((packed)) name value;
#define XEPACKEDSTRUCTANONYMOUS(value) struct __attribute__((packed)) value;

View File

@ -10,11 +10,15 @@
#ifndef XENIA_BASE_STRING_UTIL_H_
#define XENIA_BASE_STRING_UTIL_H_
#include <algorithm>
#include <charconv>
#include <cstddef>
#include <cstring>
#include <string>
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
#include "xenia/base/memory.h"
#include "xenia/base/platform.h"
#include "xenia/base/string.h"
#include "xenia/base/vec128.h"
@ -30,6 +34,40 @@
namespace xe {
namespace string_util {
inline size_t copy_truncating(char* dest, const std::string_view source,
size_t dest_buffer_count) {
if (!dest_buffer_count) {
return 0;
}
size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1));
std::memcpy(dest, source.data(), chars_copied);
dest[chars_copied] = '\0';
return chars_copied;
}
inline size_t copy_truncating(char16_t* dest, const std::u16string_view source,
size_t dest_buffer_count) {
if (!dest_buffer_count) {
return 0;
}
size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1));
std::memcpy(dest, source.data(), chars_copied * sizeof(char16_t));
dest[chars_copied] = u'\0';
return chars_copied;
}
inline size_t copy_and_swap_truncating(char16_t* dest,
const std::u16string_view source,
size_t dest_buffer_count) {
if (!dest_buffer_count) {
return 0;
}
size_t chars_copied = std::min(source.size(), dest_buffer_count - size_t(1));
xe::copy_and_swap(dest, source.data(), chars_copied);
dest[chars_copied] = u'\0';
return chars_copied;
}
inline std::string to_hex_string(uint32_t value) {
return fmt::format("{:08X}", value);
}

View File

@ -0,0 +1,967 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <array>
#include "xenia/base/threading.h"
#include "third_party/catch/include/catch.hpp"
namespace xe {
namespace base {
namespace test {
using namespace threading;
using namespace std::chrono_literals;
TEST_CASE("Fence") {
std::unique_ptr<threading::Fence> pFence;
std::unique_ptr<threading::HighResolutionTimer> pTimer;
// Signal without wait
pFence = std::make_unique<threading::Fence>();
pFence->Signal();
// Signal once and wait
pFence = std::make_unique<threading::Fence>();
pFence->Signal();
pFence->Wait();
// Signal twice and wait
pFence = std::make_unique<threading::Fence>();
pFence->Signal();
pFence->Signal();
pFence->Wait();
// Signal and wait two times
pFence = std::make_unique<threading::Fence>();
pFence->Signal();
pFence->Wait();
pFence->Signal();
pFence->Wait();
// Test to synchronize multiple threads
std::atomic<int> started(0);
std::atomic<int> finished(0);
pFence = std::make_unique<threading::Fence>();
auto func = [&pFence, &started, &finished] {
started.fetch_add(1);
pFence->Wait();
finished.fetch_add(1);
};
auto threads = std::array<std::thread, 5>({
std::thread(func),
std::thread(func),
std::thread(func),
std::thread(func),
std::thread(func),
});
Sleep(100ms);
REQUIRE(started.load() == threads.size());
REQUIRE(finished.load() == 0);
pFence->Signal();
for (auto& t : threads) t.join();
REQUIRE(finished.load() == threads.size());
} // namespace test
TEST_CASE("Get number of logical processors") {
auto count = std::thread::hardware_concurrency();
REQUIRE(logical_processor_count() == count);
REQUIRE(logical_processor_count() == count);
REQUIRE(logical_processor_count() == count);
}
TEST_CASE("Enable process to set thread affinity") {
EnableAffinityConfiguration();
}
TEST_CASE("Yield Current Thread", "MaybeYield") {
// Run to see if there are any errors
MaybeYield();
}
TEST_CASE("Sync with Memory Barrier", "SyncMemory") {
// Run to see if there are any errors
SyncMemory();
}
TEST_CASE("Sleep Current Thread", "Sleep") {
auto wait_time = 50ms;
auto start = std::chrono::steady_clock::now();
Sleep(wait_time);
auto duration = std::chrono::steady_clock::now() - start;
REQUIRE(duration >= wait_time);
}
TEST_CASE("Sleep Current Thread in Alertable State", "Sleep") {
auto wait_time = 50ms;
auto start = std::chrono::steady_clock::now();
auto result = threading::AlertableSleep(wait_time);
auto duration = std::chrono::steady_clock::now() - start;
REQUIRE(duration >= wait_time);
REQUIRE(result == threading::SleepResult::kSuccess);
// TODO(bwrsandman): Test a Thread to return kAlerted.
// Need callback to call extended I/O function (ReadFileEx or WriteFileEx)
}
TEST_CASE("TlsHandle") {
// Test Allocate
auto handle = threading::AllocateTlsHandle();
// Test Free
REQUIRE(threading::FreeTlsHandle(handle));
REQUIRE(!threading::FreeTlsHandle(handle));
REQUIRE(!threading::FreeTlsHandle(threading::kInvalidTlsHandle));
// Test setting values
handle = threading::AllocateTlsHandle();
REQUIRE(threading::GetTlsValue(handle) == 0);
uint32_t value = 0xDEADBEEF;
threading::SetTlsValue(handle, reinterpret_cast<uintptr_t>(&value));
auto p_received_value = threading::GetTlsValue(handle);
REQUIRE(threading::GetTlsValue(handle) != 0);
auto received_value = *reinterpret_cast<uint32_t*>(p_received_value);
REQUIRE(received_value == value);
uintptr_t non_thread_local_value = 0;
auto thread = Thread::Create({}, [&non_thread_local_value, &handle] {
non_thread_local_value = threading::GetTlsValue(handle);
});
auto result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(non_thread_local_value == 0);
// Cleanup
REQUIRE(threading::FreeTlsHandle(handle));
}
TEST_CASE("HighResolutionTimer") {
// The wait time is 500ms with an interval of 50ms
// Smaller values are not as precise and fail the test
const auto wait_time = 500ms;
// Time the actual sleep duration
{
const auto interval = 50ms;
std::atomic<uint64_t> counter;
auto start = std::chrono::steady_clock::now();
auto cb = [&counter] { ++counter; };
auto pTimer = HighResolutionTimer::CreateRepeating(interval, cb);
Sleep(wait_time);
pTimer.reset();
auto duration = std::chrono::steady_clock::now() - start;
// Should have run as many times as wait_time / timer_interval plus or
// minus 1 due to imprecision of Sleep
REQUIRE(duration.count() >= wait_time.count());
auto ratio = static_cast<uint64_t>(duration / interval);
REQUIRE(counter >= ratio - 1);
REQUIRE(counter <= ratio + 1);
}
// Test concurrent timers
{
const auto interval1 = 100ms;
const auto interval2 = 200ms;
std::atomic<uint64_t> counter1;
std::atomic<uint64_t> counter2;
auto start = std::chrono::steady_clock::now();
auto cb1 = [&counter1] { ++counter1; };
auto cb2 = [&counter2] { ++counter2; };
auto pTimer1 = HighResolutionTimer::CreateRepeating(interval1, cb1);
auto pTimer2 = HighResolutionTimer::CreateRepeating(interval2, cb2);
Sleep(wait_time);
pTimer1.reset();
pTimer2.reset();
auto duration = std::chrono::steady_clock::now() - start;
// Should have run as many times as wait_time / timer_interval plus or
// minus 1 due to imprecision of Sleep
REQUIRE(duration.count() >= wait_time.count());
auto ratio1 = static_cast<uint64_t>(duration / interval1);
auto ratio2 = static_cast<uint64_t>(duration / interval2);
REQUIRE(counter1 >= ratio1 - 1);
REQUIRE(counter1 <= ratio1 + 1);
REQUIRE(counter2 >= ratio2 - 1);
REQUIRE(counter2 <= ratio2 + 1);
}
// TODO(bwrsandman): Check on which thread callbacks are executed when
// spawned from differing threads
}
TEST_CASE("Wait on Multiple Handles", "Wait") {
auto mutant = Mutant::Create(true);
auto semaphore = Semaphore::Create(10, 10);
auto event_ = Event::CreateManualResetEvent(false);
auto thread = Thread::Create({}, [&mutant, &semaphore, &event_] {
event_->Set();
Wait(mutant.get(), false, 25ms);
semaphore->Release(1, nullptr);
Wait(mutant.get(), false, 25ms);
mutant->Release();
});
std::vector<WaitHandle*> handles = {
mutant.get(),
semaphore.get(),
event_.get(),
thread.get(),
};
auto any_result = WaitAny(handles, false, 100ms);
REQUIRE(any_result.first == WaitResult::kSuccess);
REQUIRE(any_result.second == 0);
auto all_result = WaitAll(handles, false, 100ms);
REQUIRE(all_result == WaitResult::kSuccess);
}
TEST_CASE("Signal and Wait") {
WaitResult result;
auto mutant = Mutant::Create(true);
auto event_ = Event::CreateAutoResetEvent(false);
auto thread = Thread::Create({}, [&mutant, &event_] {
Wait(mutant.get(), false);
event_->Set();
});
result = Wait(event_.get(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
result = SignalAndWait(mutant.get(), event_.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
}
TEST_CASE("Wait on Event", "Event") {
auto evt = Event::CreateAutoResetEvent(false);
WaitResult result;
// Call wait on unset Event
result = Wait(evt.get(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
// Call wait on set Event
evt->Set();
result = Wait(evt.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
// Call wait on now consumed Event
result = Wait(evt.get(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
}
TEST_CASE("Reset Event", "Event") {
auto evt = Event::CreateAutoResetEvent(false);
WaitResult result;
// Call wait on reset Event
evt->Set();
evt->Reset();
result = Wait(evt.get(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
// Test resetting the unset event
evt->Reset();
result = Wait(evt.get(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
// Test setting the reset event
evt->Set();
result = Wait(evt.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
}
TEST_CASE("Wait on Multiple Events", "Event") {
auto events = std::array<std::unique_ptr<Event>, 4>{
Event::CreateAutoResetEvent(false),
Event::CreateAutoResetEvent(false),
Event::CreateAutoResetEvent(false),
Event::CreateManualResetEvent(false),
};
std::array<char, 8> order = {0};
std::atomic_uint index(0);
auto sign_in = [&order, &index](uint32_t id) {
auto i = index.fetch_add(1, std::memory_order::memory_order_relaxed);
order[i] = static_cast<char>('0' + id);
};
auto threads = std::array<std::thread, 4>{
std::thread([&events, &sign_in] {
auto res = WaitAll({events[1].get(), events[3].get()}, false, 100ms);
if (res == WaitResult::kSuccess) {
sign_in(1);
}
}),
std::thread([&events, &sign_in] {
auto res = WaitAny({events[0].get(), events[2].get()}, false, 100ms);
if (res.first == WaitResult::kSuccess) {
sign_in(2);
}
}),
std::thread([&events, &sign_in] {
auto res = WaitAll({events[0].get(), events[2].get(), events[3].get()},
false, 100ms);
if (res == WaitResult::kSuccess) {
sign_in(3);
}
}),
std::thread([&events, &sign_in] {
auto res = WaitAny({events[1].get(), events[3].get()}, false, 100ms);
if (res.first == WaitResult::kSuccess) {
sign_in(4);
}
}),
};
Sleep(10ms);
events[3]->Set(); // Signals thread id=4 and stays on for 1 and 3
Sleep(10ms);
events[1]->Set(); // Signals thread id=1
Sleep(10ms);
events[0]->Set(); // Signals thread id=2
Sleep(10ms);
events[2]->Set(); // Partial signals thread id=3
events[0]->Set(); // Signals thread id=3
for (auto& t : threads) {
t.join();
}
INFO(order.data());
REQUIRE(order[0] == '4');
// TODO(bwrsandman): Order is not always maintained on linux
// REQUIRE(order[1] == '1');
// REQUIRE(order[2] == '2');
// REQUIRE(order[3] == '3');
}
TEST_CASE("Wait on Semaphore", "Semaphore") {
WaitResult result;
std::unique_ptr<Semaphore> sem;
int previous_count = 0;
// Wait on semaphore with no room
sem = Semaphore::Create(0, 5);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kTimeout);
// Add room in semaphore
REQUIRE(sem->Release(2, &previous_count));
REQUIRE(previous_count == 0);
REQUIRE(sem->Release(1, &previous_count));
REQUIRE(previous_count == 2);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(sem->Release(1, &previous_count));
REQUIRE(previous_count == 2);
// Set semaphore over maximum_count
sem = Semaphore::Create(5, 5);
previous_count = -1;
REQUIRE_FALSE(sem->Release(1, &previous_count));
REQUIRE(previous_count == -1);
REQUIRE_FALSE(sem->Release(10, &previous_count));
REQUIRE(previous_count == -1);
sem = Semaphore::Create(0, 5);
REQUIRE_FALSE(sem->Release(10, &previous_count));
REQUIRE(previous_count == -1);
REQUIRE_FALSE(sem->Release(10, &previous_count));
REQUIRE(previous_count == -1);
// Test invalid Release parameters
REQUIRE_FALSE(sem->Release(0, &previous_count));
REQUIRE(previous_count == -1);
REQUIRE_FALSE(sem->Release(-1, &previous_count));
REQUIRE(previous_count == -1);
// Wait on fully available semaphore
sem = Semaphore::Create(5, 5);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kTimeout);
// Semaphore between threads
sem = Semaphore::Create(5, 5);
Sleep(10ms);
// Occupy the semaphore with 5 threads
auto func = [&sem] {
auto res = Wait(sem.get(), false, 100ms);
Sleep(500ms);
if (res == WaitResult::kSuccess) {
sem->Release(1, nullptr);
}
};
auto threads = std::array<std::thread, 5>{
std::thread(func), std::thread(func), std::thread(func),
std::thread(func), std::thread(func),
};
// Give threads time to acquire semaphore
Sleep(10ms);
// Attempt to acquire full semaphore with current (6th) thread
result = Wait(sem.get(), false, 20ms);
REQUIRE(result == WaitResult::kTimeout);
// Give threads time to release semaphore
for (auto& t : threads) {
t.join();
}
result = Wait(sem.get(), false, 10ms);
REQUIRE(result == WaitResult::kSuccess);
sem->Release(1, &previous_count);
REQUIRE(previous_count == 4);
// Test invalid construction parameters
// These are invalid according to documentation
// TODO(bwrsandman): Many of these invalid invocations succeed
sem = Semaphore::Create(-1, 5);
// REQUIRE(sem.get() == nullptr);
sem = Semaphore::Create(10, 5);
// REQUIRE(sem.get() == nullptr);
sem = Semaphore::Create(0, 0);
// REQUIRE(sem.get() == nullptr);
sem = Semaphore::Create(0, -1);
// REQUIRE(sem.get() == nullptr);
}
TEST_CASE("Wait on Multiple Semaphores", "Semaphore") {
WaitResult all_result;
std::pair<WaitResult, size_t> any_result;
int previous_count;
std::unique_ptr<Semaphore> sem0, sem1;
// Test Wait all which should fail
sem0 = Semaphore::Create(0, 5);
sem1 = Semaphore::Create(5, 5);
all_result = WaitAll({sem0.get(), sem1.get()}, false, 10ms);
REQUIRE(all_result == WaitResult::kTimeout);
previous_count = -1;
REQUIRE(sem0->Release(1, &previous_count));
REQUIRE(previous_count == 0);
previous_count = -1;
REQUIRE_FALSE(sem1->Release(1, &previous_count));
REQUIRE(previous_count == -1);
// Test Wait all again which should succeed
sem0 = Semaphore::Create(1, 5);
sem1 = Semaphore::Create(5, 5);
all_result = WaitAll({sem0.get(), sem1.get()}, false, 10ms);
REQUIRE(all_result == WaitResult::kSuccess);
previous_count = -1;
REQUIRE(sem0->Release(1, &previous_count));
REQUIRE(previous_count == 0);
previous_count = -1;
REQUIRE(sem1->Release(1, &previous_count));
REQUIRE(previous_count == 4);
// Test Wait Any which should fail
sem0 = Semaphore::Create(0, 5);
sem1 = Semaphore::Create(0, 5);
any_result = WaitAny({sem0.get(), sem1.get()}, false, 10ms);
REQUIRE(any_result.first == WaitResult::kTimeout);
REQUIRE(any_result.second == 0);
previous_count = -1;
REQUIRE(sem0->Release(1, &previous_count));
REQUIRE(previous_count == 0);
previous_count = -1;
REQUIRE(sem1->Release(1, &previous_count));
REQUIRE(previous_count == 0);
// Test Wait Any which should succeed
sem0 = Semaphore::Create(0, 5);
sem1 = Semaphore::Create(5, 5);
any_result = WaitAny({sem0.get(), sem1.get()}, false, 10ms);
REQUIRE(any_result.first == WaitResult::kSuccess);
REQUIRE(any_result.second == 1);
previous_count = -1;
REQUIRE(sem0->Release(1, &previous_count));
REQUIRE(previous_count == 0);
previous_count = -1;
REQUIRE(sem1->Release(1, &previous_count));
REQUIRE(previous_count == 4);
}
TEST_CASE("Wait on Mutant", "Mutant") {
WaitResult result;
std::unique_ptr<Mutant> mut;
// Release on initially owned mutant
mut = Mutant::Create(true);
REQUIRE(mut->Release());
REQUIRE_FALSE(mut->Release());
// Release on initially not-owned mutant
mut = Mutant::Create(false);
REQUIRE_FALSE(mut->Release());
// Wait on initially owned mutant
mut = Mutant::Create(true);
result = Wait(mut.get(), false, 1ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(mut->Release());
REQUIRE(mut->Release());
REQUIRE_FALSE(mut->Release());
// Wait on initially not owned mutant
mut = Mutant::Create(false);
result = Wait(mut.get(), false, 1ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(mut->Release());
REQUIRE_FALSE(mut->Release());
// Multiple waits (or locks)
mut = Mutant::Create(false);
for (int i = 0; i < 10; ++i) {
result = Wait(mut.get(), false, 1ms);
REQUIRE(result == WaitResult::kSuccess);
}
for (int i = 0; i < 10; ++i) {
REQUIRE(mut->Release());
}
REQUIRE_FALSE(mut->Release());
// Test mutants on other threads
auto thread1 = std::thread([&mut] {
Sleep(5ms);
mut = Mutant::Create(true);
Sleep(100ms);
mut->Release();
});
Sleep(10ms);
REQUIRE_FALSE(mut->Release());
Sleep(10ms);
result = Wait(mut.get(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
thread1.join();
result = Wait(mut.get(), false, 1ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(mut->Release());
}
TEST_CASE("Wait on Multiple Mutants", "Mutant") {
WaitResult all_result;
std::pair<WaitResult, size_t> any_result;
std::unique_ptr<Mutant> mut0, mut1;
// Test which should fail for WaitAll and WaitAny
auto thread0 = std::thread([&mut0, &mut1] {
mut0 = Mutant::Create(true);
mut1 = Mutant::Create(true);
Sleep(50ms);
mut0->Release();
mut1->Release();
});
Sleep(10ms);
all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms);
REQUIRE(all_result == WaitResult::kTimeout);
REQUIRE_FALSE(mut0->Release());
REQUIRE_FALSE(mut1->Release());
any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms);
REQUIRE(any_result.first == WaitResult::kTimeout);
REQUIRE(any_result.second == 0);
REQUIRE_FALSE(mut0->Release());
REQUIRE_FALSE(mut1->Release());
thread0.join();
// Test which should fail for WaitAll but not WaitAny
auto thread1 = std::thread([&mut0, &mut1] {
mut0 = Mutant::Create(true);
mut1 = Mutant::Create(false);
Sleep(50ms);
mut0->Release();
});
Sleep(10ms);
all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms);
REQUIRE(all_result == WaitResult::kTimeout);
REQUIRE_FALSE(mut0->Release());
REQUIRE_FALSE(mut1->Release());
any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms);
REQUIRE(any_result.first == WaitResult::kSuccess);
REQUIRE(any_result.second == 1);
REQUIRE_FALSE(mut0->Release());
REQUIRE(mut1->Release());
thread1.join();
// Test which should pass for WaitAll and WaitAny
auto thread2 = std::thread([&mut0, &mut1] {
mut0 = Mutant::Create(false);
mut1 = Mutant::Create(false);
Sleep(50ms);
});
Sleep(10ms);
all_result = WaitAll({mut0.get(), mut1.get()}, false, 10ms);
REQUIRE(all_result == WaitResult::kSuccess);
REQUIRE(mut0->Release());
REQUIRE(mut1->Release());
any_result = WaitAny({mut0.get(), mut1.get()}, false, 10ms);
REQUIRE(any_result.first == WaitResult::kSuccess);
REQUIRE(any_result.second == 0);
REQUIRE(mut0->Release());
REQUIRE_FALSE(mut1->Release());
thread2.join();
}
TEST_CASE("Wait on Timer", "Timer") {
WaitResult result;
std::unique_ptr<Timer> timer;
// Test Manual Reset
timer = Timer::CreateManualResetTimer();
result = Wait(timer.get(), false, 1ms);
REQUIRE(result == WaitResult::kTimeout);
REQUIRE(timer->SetOnce(1ms)); // Signals it
result = Wait(timer.get(), false, 2ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(timer.get(), false, 1ms);
REQUIRE(result == WaitResult::kSuccess); // Did not reset
// Test Synchronization
timer = Timer::CreateSynchronizationTimer();
result = Wait(timer.get(), false, 1ms);
REQUIRE(result == WaitResult::kTimeout);
REQUIRE(timer->SetOnce(1ms)); // Signals it
result = Wait(timer.get(), false, 2ms);
REQUIRE(result == WaitResult::kSuccess);
result = Wait(timer.get(), false, 1ms);
REQUIRE(result == WaitResult::kTimeout); // Did reset
// TODO(bwrsandman): This test unexpectedly fails under windows
// Test long due time
// timer = Timer::CreateSynchronizationTimer();
// REQUIRE(timer->SetOnce(10s));
// result = Wait(timer.get(), false, 10ms); // Still signals under windows
// REQUIRE(result == WaitResult::kTimeout);
// Test Repeating
REQUIRE(timer->SetRepeating(1ms, 10ms));
for (int i = 0; i < 10; ++i) {
result = Wait(timer.get(), false, 20ms);
INFO(i);
REQUIRE(result == WaitResult::kSuccess);
}
MaybeYield();
Sleep(10ms); // Skip a few events
for (int i = 0; i < 10; ++i) {
result = Wait(timer.get(), false, 20ms);
REQUIRE(result == WaitResult::kSuccess);
}
// Cancel it
timer->Cancel();
result = Wait(timer.get(), false, 20ms);
REQUIRE(result == WaitResult::kTimeout);
MaybeYield();
Sleep(10ms); // Skip a few events
result = Wait(timer.get(), false, 20ms);
REQUIRE(result == WaitResult::kTimeout);
// Cancel with SetOnce
REQUIRE(timer->SetRepeating(1ms, 10ms));
for (int i = 0; i < 10; ++i) {
result = Wait(timer.get(), false, 20ms);
REQUIRE(result == WaitResult::kSuccess);
}
REQUIRE(timer->SetOnce(1ms));
result = Wait(timer.get(), false, 20ms);
REQUIRE(result == WaitResult::kSuccess); // Signal from Set Once
result = Wait(timer.get(), false, 20ms);
REQUIRE(result == WaitResult::kTimeout); // No more signals from repeating
}
TEST_CASE("Wait on Multiple Timers", "Timer") {
WaitResult all_result;
std::pair<WaitResult, size_t> any_result;
auto timer0 = Timer::CreateSynchronizationTimer();
auto timer1 = Timer::CreateManualResetTimer();
// None signaled
all_result = WaitAll({timer0.get(), timer1.get()}, false, 1ms);
REQUIRE(all_result == WaitResult::kTimeout);
any_result = WaitAny({timer0.get(), timer1.get()}, false, 1ms);
REQUIRE(any_result.first == WaitResult::kTimeout);
REQUIRE(any_result.second == 0);
// Some signaled
REQUIRE(timer1->SetOnce(1ms));
all_result = WaitAll({timer0.get(), timer1.get()}, false, 100ms);
REQUIRE(all_result == WaitResult::kTimeout);
any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms);
REQUIRE(any_result.first == WaitResult::kSuccess);
REQUIRE(any_result.second == 1);
// All signaled
REQUIRE(timer0->SetOnce(1ms));
all_result = WaitAll({timer0.get(), timer1.get()}, false, 100ms);
REQUIRE(all_result == WaitResult::kSuccess);
REQUIRE(timer0->SetOnce(1ms));
Sleep(1ms);
any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms);
REQUIRE(any_result.first == WaitResult::kSuccess);
REQUIRE(any_result.second == 0);
// Check that timer0 reset
any_result = WaitAny({timer0.get(), timer1.get()}, false, 100ms);
REQUIRE(any_result.first == WaitResult::kSuccess);
REQUIRE(any_result.second == 1);
}
TEST_CASE("Create and Trigger Timer Callbacks", "Timer") {
// TODO(bwrsandman): Check which thread performs callback and timing of
// callback
REQUIRE(true);
}
TEST_CASE("Set and Test Current Thread ID", "Thread") {
// System ID
auto system_id = current_thread_system_id();
REQUIRE(system_id > 0);
// Thread ID
auto thread_id = current_thread_id();
REQUIRE(thread_id == system_id);
// Set a new thread id
const uint32_t new_thread_id = 0xDEADBEEF;
set_current_thread_id(new_thread_id);
REQUIRE(current_thread_id() == new_thread_id);
// Set back original thread id of system
set_current_thread_id(std::numeric_limits<uint32_t>::max());
REQUIRE(current_thread_id() == system_id);
// TODO(bwrsandman): Test on Thread object
}
TEST_CASE("Set and Test Current Thread Name", "Thread") {
auto current_thread = Thread::GetCurrentThread();
REQUIRE(current_thread);
auto old_thread_name = current_thread->name();
std::string new_thread_name = "Threading Test";
REQUIRE_NOTHROW(set_name(new_thread_name));
// Restore the old catch.hpp thread name
REQUIRE_NOTHROW(set_name(old_thread_name));
}
TEST_CASE("Create and Run Thread", "Thread") {
std::unique_ptr<Thread> thread;
WaitResult result;
Thread::CreationParameters params = {};
auto func = [] { Sleep(20ms); };
// Create most basic case of thread
thread = Thread::Create(params, func);
REQUIRE(thread->native_handle() != nullptr);
REQUIRE_NOTHROW(thread->affinity_mask());
REQUIRE(thread->name().empty());
result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
// Add thread name
std::string new_name = "Test thread name";
thread = Thread::Create(params, func);
auto name = thread->name();
INFO(name.c_str());
REQUIRE(name.empty());
thread->set_name(new_name);
REQUIRE(thread->name() == new_name);
result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
// Use Terminate to end an infinitely looping thread
thread = Thread::Create(params, [] {
while (true) {
Sleep(1ms);
}
});
result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
thread->Terminate(-1);
result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
// Call Exit from inside an infinitely looping thread
thread = Thread::Create(params, [] {
while (true) {
Thread::Exit(-1);
}
});
result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
// Call timeout wait on self
result = Wait(Thread::GetCurrentThread(), false, 50ms);
REQUIRE(result == WaitResult::kTimeout);
params.stack_size = 16 * 1024;
thread = Thread::Create(params, [] {
while (true) {
Thread::Exit(-1);
}
});
REQUIRE(thread != nullptr);
result = Wait(thread.get(), false, 50ms);
REQUIRE(result == WaitResult::kSuccess);
// TODO(bwrsandman): Test with different priorities
// TODO(bwrsandman): Test setting and getting thread affinity
}
TEST_CASE("Test Suspending Thread", "Thread") {
std::unique_ptr<Thread> thread;
WaitResult result;
Thread::CreationParameters params = {};
auto func = [] { Sleep(20ms); };
// Create initially suspended
params.create_suspended = true;
thread = threading::Thread::Create(params, func);
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kTimeout);
thread->Resume();
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kSuccess);
params.create_suspended = false;
// Create and then suspend
thread = threading::Thread::Create(params, func);
thread->Suspend();
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kTimeout);
thread->Resume();
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kSuccess);
// Test recursive suspend
thread = threading::Thread::Create(params, func);
thread->Suspend();
thread->Suspend();
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kTimeout);
thread->Resume();
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kTimeout);
thread->Resume();
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kSuccess);
// Test suspend count
uint32_t suspend_count = 0;
thread = threading::Thread::Create(params, func);
thread->Suspend(&suspend_count);
REQUIRE(suspend_count == 0);
thread->Suspend(&suspend_count);
REQUIRE(suspend_count == 1);
thread->Suspend(&suspend_count);
REQUIRE(suspend_count == 2);
thread->Resume(&suspend_count);
REQUIRE(suspend_count == 3);
thread->Resume(&suspend_count);
REQUIRE(suspend_count == 2);
thread->Resume(&suspend_count);
REQUIRE(suspend_count == 1);
thread->Suspend(&suspend_count);
REQUIRE(suspend_count == 0);
thread->Resume(&suspend_count);
REQUIRE(suspend_count == 1);
result = threading::Wait(thread.get(), false, 50ms);
REQUIRE(result == threading::WaitResult::kSuccess);
}
TEST_CASE("Test Thread QueueUserCallback", "Thread") {
std::unique_ptr<Thread> thread;
WaitResult result;
Thread::CreationParameters params = {};
std::atomic_int order;
int is_modified;
int has_finished;
auto callback = [&is_modified, &order] {
is_modified = std::atomic_fetch_add_explicit(
&order, 1, std::memory_order::memory_order_relaxed);
};
// Without alertable
order = 0;
is_modified = -1;
has_finished = -1;
thread = Thread::Create(params, [&has_finished, &order] {
// Not using Alertable so callback is not registered
Sleep(90ms);
has_finished = std::atomic_fetch_add_explicit(
&order, 1, std::memory_order::memory_order_relaxed);
});
result = Wait(thread.get(), true, 50ms);
REQUIRE(result == WaitResult::kTimeout);
REQUIRE(is_modified == -1);
thread->QueueUserCallback(callback);
result = Wait(thread.get(), true, 100ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(is_modified == -1);
REQUIRE(has_finished == 0);
// With alertable
order = 0;
is_modified = -1;
has_finished = -1;
thread = Thread::Create(params, [&has_finished, &order] {
// Using Alertable so callback is registered
AlertableSleep(90ms);
has_finished = std::atomic_fetch_add_explicit(
&order, 1, std::memory_order::memory_order_relaxed);
});
result = Wait(thread.get(), true, 50ms);
REQUIRE(result == WaitResult::kTimeout);
REQUIRE(is_modified == -1);
thread->QueueUserCallback(callback);
result = Wait(thread.get(), true, 100ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(is_modified == 0);
REQUIRE(has_finished == 1);
// Test Exit command with QueueUserCallback
order = 0;
is_modified = -1;
has_finished = -1;
thread = Thread::Create(params, [&is_modified, &has_finished, &order] {
is_modified = std::atomic_fetch_add_explicit(
&order, 1, std::memory_order::memory_order_relaxed);
// Using Alertable so callback is registered
AlertableSleep(200ms);
has_finished = std::atomic_fetch_add_explicit(
&order, 1, std::memory_order::memory_order_relaxed);
});
result = Wait(thread.get(), true, 100ms);
REQUIRE(result == WaitResult::kTimeout);
thread->QueueUserCallback([] { Thread::Exit(0); });
result = Wait(thread.get(), true, 500ms);
REQUIRE(result == WaitResult::kSuccess);
REQUIRE(is_modified == 0);
REQUIRE(has_finished == -1);
// TODO(bwrsandman): Test alertable wait returning kUserCallback by using IO
// callbacks.
}
} // namespace test
} // namespace base
} // namespace xe

View File

@ -24,29 +24,56 @@
#include <utility>
#include <vector>
#include "xenia/base/assert.h"
namespace xe {
namespace threading {
// This is more like an Event with self-reset when returning from Wait()
class Fence {
public:
Fence() : signaled_(false) {}
Fence() : signal_state_(0) {}
void Signal() {
std::unique_lock<std::mutex> lock(mutex_);
signaled_.store(true);
signal_state_ |= SIGMASK_;
cond_.notify_all();
}
// Wait for the Fence to be signaled. Clears the signal on return.
void Wait() {
std::unique_lock<std::mutex> lock(mutex_);
while (!signaled_.load()) {
assert_true((signal_state_ & ~SIGMASK_) < (SIGMASK_ - 1) &&
"Too many threads?");
// keep local copy to minimize loads
auto signal_state = ++signal_state_;
for (; !(signal_state & SIGMASK_); signal_state = signal_state_) {
cond_.wait(lock);
}
signaled_.store(false);
// We can't just clear the signal as other threads may not have read it yet
assert_true((signal_state & ~SIGMASK_) > 0); // wait_count > 0
if (signal_state == (1 | SIGMASK_)) { // wait_count == 1
// Last one out turn off the lights
signal_state_ = 0;
} else {
// Oops, another thread is still waiting, set the new count and keep the
// signal.
signal_state_ = --signal_state;
}
}
private:
using state_t_ = uint_fast32_t;
static constexpr state_t_ SIGMASK_ = state_t_(1)
<< (sizeof(state_t_) * 8 - 1);
std::mutex mutex_;
std::condition_variable cond_;
std::atomic<bool> signaled_;
// Use the highest bit (sign bit) as the signal flag and the rest to count
// waiting threads.
volatile state_t_ signal_state_;
};
// Returns the total number of logical processors in the host system.
@ -308,12 +335,12 @@ class Timer : public WaitHandle {
std::chrono::milliseconds period,
std::function<void()> opt_callback = nullptr) = 0;
template <typename Rep, typename Period>
void SetRepeating(std::chrono::nanoseconds due_time,
bool SetRepeating(std::chrono::nanoseconds due_time,
std::chrono::duration<Rep, Period> period,
std::function<void()> opt_callback = nullptr) {
SetRepeating(due_time,
std::chrono::duration_cast<std::chrono::milliseconds>(period),
std::move(opt_callback));
return SetRepeating(
due_time, std::chrono::duration_cast<std::chrono::milliseconds>(period),
std::move(opt_callback));
}
// Stops the timer before it can be set to the signaled state and cancels
@ -391,7 +418,7 @@ class Thread : public WaitHandle {
// Decrements a thread's suspend count. When the suspend count is decremented
// to zero, the execution of the thread is resumed.
virtual bool Resume(uint32_t* out_new_suspend_count = nullptr) = 0;
virtual bool Resume(uint32_t* out_previous_suspend_count = nullptr) = 0;
// Suspends the specified thread.
virtual bool Suspend(uint32_t* out_previous_suspend_count = nullptr) = 0;

File diff suppressed because it is too large Load Diff

View File

@ -388,16 +388,16 @@ class Win32Thread : public Win32Handle<Thread> {
QueueUserAPC(DispatchApc, handle_, reinterpret_cast<ULONG_PTR>(apc_data));
}
bool Resume(uint32_t* out_new_suspend_count = nullptr) override {
if (out_new_suspend_count) {
*out_new_suspend_count = 0;
bool Resume(uint32_t* out_previous_suspend_count = nullptr) override {
if (out_previous_suspend_count) {
*out_previous_suspend_count = 0;
}
DWORD result = ResumeThread(handle_);
if (result == UINT_MAX) {
return false;
}
if (out_new_suspend_count) {
*out_new_suspend_count = result;
if (out_previous_suspend_count) {
*out_previous_suspend_count = result;
}
return true;
}

View File

@ -30,7 +30,7 @@ ExportResolver::Table::Table(const std::string_view module_name,
}
std::sort(
exports_by_name_.begin(), exports_by_name_.end(),
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; });
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
}
ExportResolver::ExportResolver() = default;
@ -51,7 +51,7 @@ void ExportResolver::RegisterTable(
}
std::sort(
all_exports_by_name_.begin(), all_exports_by_name_.end(),
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) <= 0; });
[](Export* a, Export* b) { return std::strcmp(a->name, b->name) < 0; });
}
Export* ExportResolver::GetExportByOrdinal(const std::string_view module_name,

View File

@ -73,7 +73,7 @@ bool CommandProcessor::Initialize(
WorkerThreadMain();
return 0;
}));
worker_thread_->set_name("GraphicsSystem Command Processor");
worker_thread_->set_name("GPU Commands");
worker_thread_->Create();
return true;
@ -731,12 +731,20 @@ bool CommandProcessor::ExecutePacketType3(RingBuffer* reader, uint32_t packet) {
} break;
case PM4_CONTEXT_UPDATE: {
assert_true(count == 1);
uint64_t value = reader->ReadAndSwap<uint32_t>();
uint32_t value = reader->ReadAndSwap<uint32_t>();
XELOGGPU("GPU context update = {:08X}", value);
assert_true(value == 0);
result = true;
break;
}
case PM4_WAIT_FOR_IDLE: {
// This opcode is used by "Duke Nukem Forever" while going/being ingame
assert_true(count == 1);
uint32_t value = reader->ReadAndSwap<uint32_t>();
XELOGGPU("GPU wait for idle = {:08X}", value);
result = true;
break;
}
default:
XELOGGPU("Unimplemented GPU OPCODE: 0x{:02X}\t\tCOUNT: {}\n", opcode,

View File

@ -21,6 +21,7 @@
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/draw_util.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_util.h"
@ -387,7 +388,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
sampler_count_vertex);
return nullptr;
}
root_signatures_bindful_.insert({index, root_signature});
root_signatures_bindful_.emplace(index, root_signature);
return root_signature;
}
@ -745,12 +746,11 @@ void D3D12CommandProcessor::SetSamplePositions(
current_sample_positions_ = sample_positions;
}
void D3D12CommandProcessor::SetComputePipelineState(
ID3D12PipelineState* pipeline_state) {
if (current_external_pipeline_state_ != pipeline_state) {
deferred_command_list_.D3DSetPipelineState(pipeline_state);
current_external_pipeline_state_ = pipeline_state;
current_cached_pipeline_state_ = nullptr;
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
if (current_external_pipeline_ != pipeline) {
deferred_command_list_.D3DSetPipelineState(pipeline);
current_external_pipeline_ = pipeline;
current_cached_pipeline_ = nullptr;
}
}
@ -773,8 +773,16 @@ std::string D3D12CommandProcessor::GetWindowTitleText() const {
}
// Currently scaling is only supported with ROV.
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
return "Direct3D 12 - 2x";
return "Direct3D 12 - ROV 2x";
}
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
// that faces adoption complications (outside of Direct3D - on Vulkan - at
// least), but crucial to Xenia - raise awareness of its usage.
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
// feature" - oscarbg in that issue.
return "Direct3D 12 - ROV";
}
return "Direct3D 12";
}
@ -1196,7 +1204,7 @@ bool D3D12CommandProcessor::SetupContext() {
*this, *register_file_, bindless_resources_used_, edram_rov_used_,
texture_cache_->IsResolutionScale2X() ? 2 : 1);
if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline state cache");
XELOGE("Failed to initialize the graphics pipeline cache");
return false;
}
@ -1526,8 +1534,7 @@ void D3D12CommandProcessor::ShutdownContext() {
// Shut down binding - bindless descriptors may be owned by subsystems like
// the texture cache.
// Root signatured are used by pipeline states, thus freed after the pipeline
// states.
// Root signatures are used by pipelines, thus freed after the pipelines.
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
for (auto it : root_signatures_bindful_) {
@ -1878,7 +1885,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
xenos::VertexShaderExportMode::kMultipass ||
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
pa_su_sc_mode_cntl.cull_back))) {
// All faces are culled - can't be expressed in the pipeline state.
// All faces are culled - can't be expressed in the pipeline.
return true;
}
@ -1954,7 +1961,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
line_loop_closing_index = 0;
}
// Update the textures - this may bind pipeline state objects.
// Update the textures - this may bind pipelines.
uint32_t used_texture_mask =
vertex_shader->GetUsedTextureMask() |
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
@ -1972,21 +1979,21 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
early_z = true;
}
// Create the pipeline state object if needed and bind it.
void* pipeline_state_handle;
// Create the pipeline if needed and bind it.
void* pipeline_handle;
ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type_converted,
indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16,
early_z, pipeline_render_targets, &pipeline_state_handle,
early_z, pipeline_render_targets, &pipeline_handle,
&root_signature)) {
return false;
}
if (current_cached_pipeline_state_ != pipeline_state_handle) {
if (current_cached_pipeline_ != pipeline_handle) {
deferred_command_list_.SetPipelineStateHandle(
reinterpret_cast<void*>(pipeline_state_handle));
current_cached_pipeline_state_ = pipeline_state_handle;
current_external_pipeline_state_ = nullptr;
reinterpret_cast<void*>(pipeline_handle));
current_cached_pipeline_ = pipeline_handle;
current_external_pipeline_ = nullptr;
}
// Update viewport, scissor, blend factor and stencil reference.
@ -2005,14 +2012,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
}
// Must not call anything that can change the descriptor heap from now on!
// Ensure vertex and index buffers are resident and draw.
// Ensure vertex buffers are resident.
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
// validity will be tracked.
// validity is tracked.
uint64_t vertex_buffers_resident[2] = {};
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
for (const Shader::VertexBinding& vertex_binding :
vertex_shader->vertex_bindings()) {
uint32_t vfetch_index = vertex_binding.fetch_constant;
if (vertex_buffers_resident[vfetch_index >> 6] &
(1ull << (vfetch_index & 63))) {
(uint64_t(1) << (vfetch_index & 63))) {
continue;
}
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
@ -2045,7 +2053,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
vfetch_constant.address << 2, vfetch_constant.size << 2);
return false;
}
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
<< (vfetch_index & 63);
}
// Gather memexport ranges and ensure the heaps for them are resident, and
@ -2517,8 +2526,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
submission_open_ = true;
// Start a new deferred command list - will submit it to the real one in the
// end of the submission (when async pipeline state object creation requests
// are fulfilled).
// end of the submission (when async pipeline creation requests are
// fulfilled).
deferred_command_list_.Reset();
// Reset cached state of the command list.
@ -2527,8 +2536,8 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
ff_blend_factor_update_needed_ = true;
ff_stencil_ref_update_needed_ = true;
current_sample_positions_ = xenos::MsaaSamples::k1X;
current_cached_pipeline_state_ = nullptr;
current_external_pipeline_state_ = nullptr;
current_cached_pipeline_ = nullptr;
current_external_pipeline_ = nullptr;
current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0;
if (bindless_resources_used_) {
@ -2724,7 +2733,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
}
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
}
void D3D12CommandProcessor::ClearCommandAllocatorCache() {
@ -2745,12 +2754,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() {
}
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
// Window parameters.
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow:
@ -2838,34 +2847,20 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
}
// Scissor.
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
D3D12_RECT scissor;
scissor.left = pa_sc_window_scissor_tl.tl_x;
scissor.top = pa_sc_window_scissor_tl.tl_y;
scissor.right = pa_sc_window_scissor_br.br_x;
scissor.bottom = pa_sc_window_scissor_br.br_y;
if (!pa_sc_window_scissor_tl.window_offset_disable) {
scissor.left =
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.top =
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
scissor.right =
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.bottom =
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
}
scissor.left *= pixel_size_x;
scissor.top *= pixel_size_y;
scissor.right *= pixel_size_x;
scissor.bottom *= pixel_size_y;
ff_scissor_update_needed_ |= ff_scissor_.left != scissor.left;
ff_scissor_update_needed_ |= ff_scissor_.top != scissor.top;
ff_scissor_update_needed_ |= ff_scissor_.right != scissor.right;
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom;
draw_util::Scissor scissor;
draw_util::GetScissor(regs, scissor);
D3D12_RECT scissor_rect;
scissor_rect.left = LONG(scissor.left * pixel_size_x);
scissor_rect.top = LONG(scissor.top * pixel_size_y);
scissor_rect.right = LONG((scissor.left + scissor.width) * pixel_size_x);
scissor_rect.bottom = LONG((scissor.top + scissor.height) * pixel_size_y);
ff_scissor_update_needed_ |= ff_scissor_.left != scissor_rect.left;
ff_scissor_update_needed_ |= ff_scissor_.top != scissor_rect.top;
ff_scissor_update_needed_ |= ff_scissor_.right != scissor_rect.right;
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor_rect.bottom;
if (ff_scissor_update_needed_) {
ff_scissor_ = scissor;
deferred_command_list_.RSSetScissorRect(scissor);
ff_scissor_ = scissor_rect;
deferred_command_list_.RSSetScissorRect(scissor_rect);
ff_scissor_update_needed_ = false;
}
@ -2915,12 +2910,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t line_loop_closing_index, xenos::Endian index_endian,
uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
@ -3103,14 +3097,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
system_constants_.line_loop_closing_index = line_loop_closing_index;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// Index or tessellation edge factor buffer endianness.
dirty |= system_constants_.vertex_index_endian != index_endian;
system_constants_.vertex_index_endian = index_endian;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
if (!pa_cl_clip_cntl.clip_disable) {
for (uint32_t i = 0; i < 6; ++i) {
@ -3574,7 +3568,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_vertex.float_bitmap[i];
// If no float constants at all, we can reuse any buffer for them, so not
// invalidating.
if (float_constant_map_vertex.float_count != 0) {
if (float_constant_count_vertex) {
cbuffer_binding_float_vertex_.up_to_date = false;
}
}
@ -3589,7 +3583,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_pixel.float_bitmap[i]) {
current_float_constant_map_pixel_[i] =
float_constant_map_pixel.float_bitmap[i];
if (float_constant_map_pixel.float_count != 0) {
if (float_constant_count_pixel) {
cbuffer_binding_float_pixel_.up_to_date = false;
}
}
@ -3889,8 +3883,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters,
provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert(
{sampler_parameters.value, sampler_index});
texture_cache_bindless_sampler_map_.emplace(
sampler_parameters.value, sampler_index);
}
current_sampler_bindless_indices_vertex_[j] = sampler_index;
}
@ -3921,8 +3915,8 @@ bool D3D12CommandProcessor::UpdateBindings(
sampler_parameters,
provider.OffsetSamplerDescriptor(
sampler_bindless_heap_cpu_start_, sampler_index));
texture_cache_bindless_sampler_map_.insert(
{sampler_parameters.value, sampler_index});
texture_cache_bindless_sampler_map_.emplace(
sampler_parameters.value, sampler_index);
}
current_sampler_bindless_indices_pixel_[j] = sampler_index;
}

View File

@ -190,19 +190,17 @@ class D3D12CommandProcessor : public CommandProcessor {
// render targets or copying to depth render targets.
void SetSamplePositions(xenos::MsaaSamples sample_positions);
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
return pipeline_cache_->GetD3D12PipelineByHandle(handle);
}
// Sets the current pipeline state to a compute one. This is for cache
// invalidation primarily. A submission must be open.
void SetComputePipelineState(ID3D12PipelineState* pipeline_state);
// Sets the current pipeline to a compute one. This is for cache invalidation
// primarily. A submission must be open.
void SetComputePipeline(ID3D12PipelineState* pipeline);
// For the pipeline state cache to call when binding layout UIDs may be
// reused.
// For the pipeline cache to call when binding layout UIDs may be reused.
void NotifyShaderBindingsLayoutUIDsInvalidated();
// Returns the text to display in the GPU backend name in the window title.
@ -327,8 +325,8 @@ class D3D12CommandProcessor : public CommandProcessor {
bool EndSubmission(bool is_swap);
// Checks if ending a submission right now would not cause potentially more
// delay than it would reduce by making the GPU start working earlier - such
// as when there are unfinished graphics pipeline state creation requests that
// would need to be fulfilled before actually submitting the command list.
// as when there are unfinished graphics pipeline creation requests that would
// need to be fulfilled before actually submitting the command list.
bool CanEndSubmissionImmediately() const;
bool AwaitAllQueueOperationsCompletion() {
CheckSubmissionFence(submission_current_);
@ -512,7 +510,7 @@ class D3D12CommandProcessor : public CommandProcessor {
return cvars::internal_tile_height;
}
inline std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
if (texture_cache_->IsResolutionScale2X()) {
return std::make_pair(kSwapTextureWidth() * 2, kSwapTextureHeight() * 2);
}
@ -557,13 +555,12 @@ class D3D12CommandProcessor : public CommandProcessor {
// Current SSAA sample positions (to be updated by the render target cache).
xenos::MsaaSamples current_sample_positions_;
// Currently bound pipeline state, either a graphics pipeline state object
// from the pipeline state cache (with potentially deferred creation -
// current_external_pipeline_state_ is nullptr in this case) or a non-Xenos
// graphics or compute pipeline state object (current_cached_pipeline_state_
// is nullptr in this case).
void* current_cached_pipeline_state_;
ID3D12PipelineState* current_external_pipeline_state_;
// Currently bound pipeline, either a graphics pipeline from the pipeline
// cache (with potentially deferred creation - current_external_pipeline_ is
// nullptr in this case) or a non-Xenos graphics or compute pipeline
// (current_cached_pipeline_ is nullptr in this case).
void* current_cached_pipeline_;
ID3D12PipelineState* current_external_pipeline_;
// Currently bound graphics root signature.
ID3D12RootSignature* current_graphics_root_signature_;

View File

@ -157,7 +157,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
stretch_pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) {
XELOGE("Failed to create the front buffer stretch pipeline state");
XELOGE("Failed to create the front buffer stretch pipeline");
stretch_gamma_root_signature_->Release();
stretch_gamma_root_signature_ = nullptr;
stretch_root_signature_->Release();
@ -170,8 +170,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor,
if (FAILED(device->CreateGraphicsPipelineState(
&stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) {
XELOGE(
"Failed to create the gamma-correcting front buffer stretch "
"pipeline state");
"Failed to create the gamma-correcting front buffer stretch pipeline");
stretch_pipeline_->Release();
stretch_pipeline_ = nullptr;
stretch_gamma_root_signature_->Release();

View File

@ -85,7 +85,7 @@ class D3D12Shader : public Shader {
return sampler_bindings_.data();
}
// For owning subsystems like the pipeline state cache, accessors for unique
// For owning subsystems like the pipeline cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout was bound.

View File

@ -48,7 +48,7 @@ class D3D12SharedMemory : public SharedMemory {
// UseForReading or UseForWriting.
// Makes the buffer usable for vertices, indices and texture untiling.
inline void UseForReading() {
void UseForReading() {
// Vertex fetch is also allowed in pixel shaders.
CommitUAVWritesAndTransitionBuffer(
D3D12_RESOURCE_STATE_INDEX_BUFFER |
@ -56,18 +56,18 @@ class D3D12SharedMemory : public SharedMemory {
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
// Makes the buffer usable for texture tiling after a resolve.
inline void UseForWriting() {
void UseForWriting() {
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
// Makes the buffer usable as a source for copy commands.
inline void UseAsCopySource() {
void UseAsCopySource() {
CommitUAVWritesAndTransitionBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
}
// Must be called when doing draws/dispatches modifying data within the shared
// memory buffer as a UAV, to make sure that when UseForWriting is called the
// next time, a UAV barrier will be done, and subsequent overlapping UAV
// writes and reads are ordered.
inline void MarkUAVWritesCommitNeeded() {
void MarkUAVWritesCommitNeeded() {
if (buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
buffer_uav_writes_commit_needed_ = true;
}

View File

@ -209,9 +209,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
}
} break;
case Command::kSetPipelineStateHandle: {
current_pipeline_state =
command_processor_.GetD3D12PipelineStateByHandle(
*reinterpret_cast<void* const*>(stream));
current_pipeline_state = command_processor_.GetD3D12PipelineByHandle(
*reinterpret_cast<void* const*>(stream));
if (current_pipeline_state) {
command_list->SetPipelineState(current_pipeline_state);
}

View File

@ -33,7 +33,7 @@ class DeferredCommandList {
void Execute(ID3D12GraphicsCommandList* command_list,
ID3D12GraphicsCommandList1* command_list_1);
inline void D3DClearUnorderedAccessViewUint(
void D3DClearUnorderedAccessViewUint(
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle_in_current_heap,
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle, ID3D12Resource* resource,
const UINT values[4], UINT num_rects, const D3D12_RECT* rects) {
@ -51,9 +51,9 @@ class DeferredCommandList {
}
}
inline void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
ID3D12Resource* src_buffer, UINT64 src_offset,
UINT64 num_bytes) {
void D3DCopyBufferRegion(ID3D12Resource* dst_buffer, UINT64 dst_offset,
ID3D12Resource* src_buffer, UINT64 src_offset,
UINT64 num_bytes) {
auto& args = *reinterpret_cast<D3DCopyBufferRegionArguments*>(WriteCommand(
Command::kD3DCopyBufferRegion, sizeof(D3DCopyBufferRegionArguments)));
args.dst_buffer = dst_buffer;
@ -63,26 +63,26 @@ class DeferredCommandList {
args.num_bytes = num_bytes;
}
inline void D3DCopyResource(ID3D12Resource* dst_resource,
ID3D12Resource* src_resource) {
void D3DCopyResource(ID3D12Resource* dst_resource,
ID3D12Resource* src_resource) {
auto& args = *reinterpret_cast<D3DCopyResourceArguments*>(WriteCommand(
Command::kD3DCopyResource, sizeof(D3DCopyResourceArguments)));
args.dst_resource = dst_resource;
args.src_resource = src_resource;
}
inline void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst,
const D3D12_TEXTURE_COPY_LOCATION& src) {
void CopyTexture(const D3D12_TEXTURE_COPY_LOCATION& dst,
const D3D12_TEXTURE_COPY_LOCATION& src) {
auto& args = *reinterpret_cast<CopyTextureArguments*>(
WriteCommand(Command::kCopyTexture, sizeof(CopyTextureArguments)));
std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION));
std::memcpy(&args.src, &src, sizeof(D3D12_TEXTURE_COPY_LOCATION));
}
inline void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst,
UINT dst_x, UINT dst_y, UINT dst_z,
const D3D12_TEXTURE_COPY_LOCATION& src,
const D3D12_BOX& src_box) {
void CopyTextureRegion(const D3D12_TEXTURE_COPY_LOCATION& dst, UINT dst_x,
UINT dst_y, UINT dst_z,
const D3D12_TEXTURE_COPY_LOCATION& src,
const D3D12_BOX& src_box) {
auto& args = *reinterpret_cast<CopyTextureRegionArguments*>(WriteCommand(
Command::kCopyTextureRegion, sizeof(CopyTextureRegionArguments)));
std::memcpy(&args.dst, &dst, sizeof(D3D12_TEXTURE_COPY_LOCATION));
@ -93,8 +93,8 @@ class DeferredCommandList {
args.src_box = src_box;
}
inline void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y,
UINT thread_group_count_z) {
void D3DDispatch(UINT thread_group_count_x, UINT thread_group_count_y,
UINT thread_group_count_z) {
auto& args = *reinterpret_cast<D3DDispatchArguments*>(
WriteCommand(Command::kD3DDispatch, sizeof(D3DDispatchArguments)));
args.thread_group_count_x = thread_group_count_x;
@ -102,11 +102,10 @@ class DeferredCommandList {
args.thread_group_count_z = thread_group_count_z;
}
inline void D3DDrawIndexedInstanced(UINT index_count_per_instance,
UINT instance_count,
UINT start_index_location,
INT base_vertex_location,
UINT start_instance_location) {
void D3DDrawIndexedInstanced(UINT index_count_per_instance,
UINT instance_count, UINT start_index_location,
INT base_vertex_location,
UINT start_instance_location) {
auto& args = *reinterpret_cast<D3DDrawIndexedInstancedArguments*>(
WriteCommand(Command::kD3DDrawIndexedInstanced,
sizeof(D3DDrawIndexedInstancedArguments)));
@ -117,9 +116,9 @@ class DeferredCommandList {
args.start_instance_location = start_instance_location;
}
inline void D3DDrawInstanced(UINT vertex_count_per_instance,
UINT instance_count, UINT start_vertex_location,
UINT start_instance_location) {
void D3DDrawInstanced(UINT vertex_count_per_instance, UINT instance_count,
UINT start_vertex_location,
UINT start_instance_location) {
auto& args = *reinterpret_cast<D3DDrawInstancedArguments*>(WriteCommand(
Command::kD3DDrawInstanced, sizeof(D3DDrawInstancedArguments)));
args.vertex_count_per_instance = vertex_count_per_instance;
@ -128,7 +127,7 @@ class DeferredCommandList {
args.start_instance_location = start_instance_location;
}
inline void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) {
void D3DIASetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW* view) {
auto& args = *reinterpret_cast<D3D12_INDEX_BUFFER_VIEW*>(WriteCommand(
Command::kD3DIASetIndexBuffer, sizeof(D3D12_INDEX_BUFFER_VIEW)));
if (view != nullptr) {
@ -142,14 +141,13 @@ class DeferredCommandList {
}
}
inline void D3DIASetPrimitiveTopology(
D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
void D3DIASetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
auto& arg = *reinterpret_cast<D3D12_PRIMITIVE_TOPOLOGY*>(WriteCommand(
Command::kD3DIASetPrimitiveTopology, sizeof(D3D12_PRIMITIVE_TOPOLOGY)));
arg = primitive_topology;
}
inline void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) {
void D3DOMSetBlendFactor(const FLOAT blend_factor[4]) {
auto args = reinterpret_cast<FLOAT*>(
WriteCommand(Command::kD3DOMSetBlendFactor, 4 * sizeof(FLOAT)));
args[0] = blend_factor[0];
@ -158,7 +156,7 @@ class DeferredCommandList {
args[3] = blend_factor[3];
}
inline void D3DOMSetRenderTargets(
void D3DOMSetRenderTargets(
UINT num_render_target_descriptors,
const D3D12_CPU_DESCRIPTOR_HANDLE* render_target_descriptors,
BOOL rts_single_handle_to_descriptor_range,
@ -185,14 +183,14 @@ class DeferredCommandList {
}
}
inline void D3DOMSetStencilRef(UINT stencil_ref) {
void D3DOMSetStencilRef(UINT stencil_ref) {
auto& arg = *reinterpret_cast<UINT*>(
WriteCommand(Command::kD3DOMSetStencilRef, sizeof(UINT)));
arg = stencil_ref;
}
inline void D3DResourceBarrier(UINT num_barriers,
const D3D12_RESOURCE_BARRIER* barriers) {
void D3DResourceBarrier(UINT num_barriers,
const D3D12_RESOURCE_BARRIER* barriers) {
if (num_barriers == 0) {
return;
}
@ -207,21 +205,22 @@ class DeferredCommandList {
num_barriers * sizeof(D3D12_RESOURCE_BARRIER));
}
inline void RSSetScissorRect(const D3D12_RECT& rect) {
void RSSetScissorRect(const D3D12_RECT& rect) {
auto& arg = *reinterpret_cast<D3D12_RECT*>(
WriteCommand(Command::kRSSetScissorRect, sizeof(D3D12_RECT)));
arg = rect;
}
inline void RSSetViewport(const D3D12_VIEWPORT& viewport) {
void RSSetViewport(const D3D12_VIEWPORT& viewport) {
auto& arg = *reinterpret_cast<D3D12_VIEWPORT*>(
WriteCommand(Command::kRSSetViewport, sizeof(D3D12_VIEWPORT)));
arg = viewport;
}
inline void D3DSetComputeRoot32BitConstants(
UINT root_parameter_index, UINT num_32bit_values_to_set,
const void* src_data, UINT dest_offset_in_32bit_values) {
void D3DSetComputeRoot32BitConstants(UINT root_parameter_index,
UINT num_32bit_values_to_set,
const void* src_data,
UINT dest_offset_in_32bit_values) {
if (num_32bit_values_to_set == 0) {
return;
}
@ -235,9 +234,10 @@ class DeferredCommandList {
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
}
inline void D3DSetGraphicsRoot32BitConstants(
UINT root_parameter_index, UINT num_32bit_values_to_set,
const void* src_data, UINT dest_offset_in_32bit_values) {
void D3DSetGraphicsRoot32BitConstants(UINT root_parameter_index,
UINT num_32bit_values_to_set,
const void* src_data,
UINT dest_offset_in_32bit_values) {
if (num_32bit_values_to_set == 0) {
return;
}
@ -251,7 +251,7 @@ class DeferredCommandList {
std::memcpy(args + 1, src_data, num_32bit_values_to_set * sizeof(uint32_t));
}
inline void D3DSetComputeRootConstantBufferView(
void D3DSetComputeRootConstantBufferView(
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
WriteCommand(Command::kD3DSetComputeRootConstantBufferView,
@ -260,7 +260,7 @@ class DeferredCommandList {
args.buffer_location = buffer_location;
}
inline void D3DSetGraphicsRootConstantBufferView(
void D3DSetGraphicsRootConstantBufferView(
UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS buffer_location) {
auto& args = *reinterpret_cast<SetRootConstantBufferViewArguments*>(
WriteCommand(Command::kD3DSetGraphicsRootConstantBufferView,
@ -269,7 +269,7 @@ class DeferredCommandList {
args.buffer_location = buffer_location;
}
inline void D3DSetComputeRootDescriptorTable(
void D3DSetComputeRootDescriptorTable(
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
WriteCommand(Command::kD3DSetComputeRootDescriptorTable,
@ -278,7 +278,7 @@ class DeferredCommandList {
args.base_descriptor.ptr = base_descriptor.ptr;
}
inline void D3DSetGraphicsRootDescriptorTable(
void D3DSetGraphicsRootDescriptorTable(
UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) {
auto& args = *reinterpret_cast<SetRootDescriptorTableArguments*>(
WriteCommand(Command::kD3DSetGraphicsRootDescriptorTable,
@ -287,42 +287,40 @@ class DeferredCommandList {
args.base_descriptor.ptr = base_descriptor.ptr;
}
inline void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) {
void D3DSetComputeRootSignature(ID3D12RootSignature* root_signature) {
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
Command::kD3DSetComputeRootSignature, sizeof(ID3D12RootSignature*)));
arg = root_signature;
}
inline void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) {
void D3DSetGraphicsRootSignature(ID3D12RootSignature* root_signature) {
auto& arg = *reinterpret_cast<ID3D12RootSignature**>(WriteCommand(
Command::kD3DSetGraphicsRootSignature, sizeof(ID3D12RootSignature*)));
arg = root_signature;
}
inline void SetDescriptorHeaps(
ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
ID3D12DescriptorHeap* sampler_descriptor_heap) {
void SetDescriptorHeaps(ID3D12DescriptorHeap* cbv_srv_uav_descriptor_heap,
ID3D12DescriptorHeap* sampler_descriptor_heap) {
auto& args = *reinterpret_cast<SetDescriptorHeapsArguments*>(WriteCommand(
Command::kSetDescriptorHeaps, sizeof(SetDescriptorHeapsArguments)));
args.cbv_srv_uav_descriptor_heap = cbv_srv_uav_descriptor_heap;
args.sampler_descriptor_heap = sampler_descriptor_heap;
}
inline void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) {
void D3DSetPipelineState(ID3D12PipelineState* pipeline_state) {
auto& arg = *reinterpret_cast<ID3D12PipelineState**>(WriteCommand(
Command::kD3DSetPipelineState, sizeof(ID3D12PipelineState*)));
arg = pipeline_state;
}
inline void SetPipelineStateHandle(void* pipeline_state_handle) {
void SetPipelineStateHandle(void* pipeline_state_handle) {
auto& arg = *reinterpret_cast<void**>(
WriteCommand(Command::kSetPipelineStateHandle, sizeof(void*)));
arg = pipeline_state_handle;
}
inline void D3DSetSamplePositions(
UINT num_samples_per_pixel, UINT num_pixels,
const D3D12_SAMPLE_POSITION* sample_positions) {
void D3DSetSamplePositions(UINT num_samples_per_pixel, UINT num_pixels,
const D3D12_SAMPLE_POSITION* sample_positions) {
auto& args = *reinterpret_cast<D3DSetSamplePositionsArguments*>(
WriteCommand(Command::kD3DSetSamplePositions,
sizeof(D3DSetSamplePositionsArguments)));

View File

@ -43,10 +43,10 @@ DEFINE_bool(
"D3D12");
DEFINE_int32(
d3d12_pipeline_creation_threads, -1,
"Number of threads used for graphics pipeline state object creation. -1 to "
"calculate automatically (75% of logical CPU cores), a positive number to "
"specify the number of threads explicitly (up to the number of logical CPU "
"cores), 0 to disable multithreaded pipeline state object creation.",
"Number of threads used for graphics pipeline creation. -1 to calculate "
"automatically (75% of logical CPU cores), a positive number to specify "
"the number of threads explicitly (up to the number of logical CPU cores), "
"0 to disable multithreaded pipeline creation.",
"D3D12");
DEFINE_bool(d3d12_tessellation_wireframe, false,
"Display tessellated surfaces as wireframe for debugging.",
@ -125,8 +125,8 @@ bool PipelineCache::Initialize() {
logical_processor_count = 6;
}
// Initialize creation thread synchronization data even if not using creation
// threads because they may be used anyway to create pipeline state objects
// from the storage.
// threads because they may be used anyway to create pipelines from the
// storage.
creation_threads_busy_ = 0;
creation_completion_event_ =
xe::threading::Event::CreateManualResetEvent(true);
@ -145,7 +145,7 @@ bool PipelineCache::Initialize() {
for (size_t i = 0; i < creation_thread_count; ++i) {
std::unique_ptr<xe::threading::Thread> creation_thread =
xe::threading::Thread::Create({}, [this, i]() { CreationThread(i); });
creation_thread->set_name("D3D12 Pipeline States");
creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread));
}
}
@ -184,13 +184,12 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
ShutdownShaderStorage();
// Remove references to the current pipeline state object.
current_pipeline_state_ = nullptr;
// Remove references to the current pipeline.
current_pipeline_ = nullptr;
if (!creation_threads_.empty()) {
// Empty the pipeline state object creation queue and make sure there are no
// threads currently creating pipeline state objects because pipeline states
// are going to be deleted.
// Empty the pipeline creation queue and make sure there are no threads
// currently creating pipelines because pipelines are going to be deleted.
bool await_creation_completion_event = false;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
@ -207,13 +206,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
}
// Destroy all pipeline state objects.
for (auto it : pipeline_states_) {
// Destroy all pipelines.
for (auto it : pipelines_) {
it.second->state->Release();
delete it.second;
}
pipeline_states_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0);
pipelines_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
// Destroy all shaders.
command_processor_.NotifyShaderBindingsLayoutUIDsInvalidated();
@ -223,10 +222,10 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
texture_binding_layout_map_.clear();
texture_binding_layouts_.clear();
for (auto it : shader_map_) {
for (auto it : shaders_) {
delete it.second;
}
shader_map_.clear();
shaders_.clear();
if (reinitialize_shader_storage) {
InitializeShaderStorage(shader_storage_root, shader_storage_title_id,
@ -374,8 +373,7 @@ void PipelineCache::InitializeShaderStorage(
}
size_t ucode_byte_count =
shader_header.ucode_dword_count * sizeof(uint32_t);
if (shader_map_.find(shader_header.ucode_data_hash) !=
shader_map_.end()) {
if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) {
// Already added - usually shaders aren't added without the intention of
// translating them imminently, so don't do additional checks to
// actually ensure that translation happens right now (they would cause
@ -402,7 +400,7 @@ void PipelineCache::InitializeShaderStorage(
D3D12Shader* shader =
new D3D12Shader(shader_header.type, ucode_data_hash,
ucode_dwords.data(), shader_header.ucode_dword_count);
shader_map_.insert({ucode_data_hash, shader});
shaders_.emplace(ucode_data_hash, shader);
// Create new threads if the currently existing threads can't keep up with
// file reading, but not more than the number of logical processors minus
// one.
@ -439,7 +437,7 @@ void PipelineCache::InitializeShaderStorage(
}
shader_translation_threads.clear();
for (D3D12Shader* shader : shaders_failed_to_translate) {
shader_map_.erase(shader->ucode_data_hash());
shaders_.erase(shader->ucode_data_hash());
delete shader;
}
}
@ -460,72 +458,66 @@ void PipelineCache::InitializeShaderStorage(
}
// 'DXRO' or 'DXRT'.
const uint32_t pipeline_state_storage_magic_api =
const uint32_t pipeline_storage_magic_api =
edram_rov_used_ ? 0x4F525844 : 0x54525844;
// Initialize the pipeline state storage stream.
uint64_t pipeline_state_storage_initialization_start_ =
// Initialize the pipeline storage stream.
uint64_t pipeline_storage_initialization_start_ =
xe::Clock::QueryHostTickCount();
auto pipeline_state_storage_file_path =
auto pipeline_storage_file_path =
shader_storage_shareable_root /
fmt::format("{:08X}.{}.d3d12.xpso", title_id,
edram_rov_used_ ? "rov" : "rtv");
pipeline_state_storage_file_ =
xe::filesystem::OpenFile(pipeline_state_storage_file_path, "a+b");
if (!pipeline_state_storage_file_) {
pipeline_storage_file_ =
xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b");
if (!pipeline_storage_file_) {
XELOGE(
"Failed to open the Direct3D 12 pipeline state description storage "
"file for writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_state_storage_file_path));
"Failed to open the Direct3D 12 pipeline description storage file for "
"writing, persistent shader storage will be disabled: {}",
xe::path_to_utf8(pipeline_storage_file_path));
fclose(shader_storage_file_);
shader_storage_file_ = nullptr;
return;
}
pipeline_state_storage_file_flush_needed_ = false;
pipeline_storage_file_flush_needed_ = false;
// 'XEPS'.
const uint32_t pipeline_state_storage_magic = 0x53504558;
const uint32_t pipeline_storage_magic = 0x53504558;
struct {
uint32_t magic;
uint32_t magic_api;
uint32_t version_swapped;
} pipeline_state_storage_file_header;
if (fread(&pipeline_state_storage_file_header,
sizeof(pipeline_state_storage_file_header), 1,
pipeline_state_storage_file_) &&
pipeline_state_storage_file_header.magic ==
pipeline_state_storage_magic &&
pipeline_state_storage_file_header.magic_api ==
pipeline_state_storage_magic_api &&
xe::byte_swap(pipeline_state_storage_file_header.version_swapped) ==
} pipeline_storage_file_header;
if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_) &&
pipeline_storage_file_header.magic == pipeline_storage_magic &&
pipeline_storage_file_header.magic_api == pipeline_storage_magic_api &&
xe::byte_swap(pipeline_storage_file_header.version_swapped) ==
PipelineDescription::kVersion) {
uint64_t pipeline_state_storage_valid_bytes =
sizeof(pipeline_state_storage_file_header);
// Enqueue pipeline state descriptions written by previous Xenia executions
// until the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_state_storage_file_, 0, SEEK_END);
int64_t pipeline_state_storage_told_end =
xe::filesystem::Tell(pipeline_state_storage_file_);
size_t pipeline_state_storage_told_count =
size_t(pipeline_state_storage_told_end >=
int64_t(pipeline_state_storage_valid_bytes)
? (uint64_t(pipeline_state_storage_told_end) -
pipeline_state_storage_valid_bytes) /
sizeof(PipelineStoredDescription)
: 0);
if (pipeline_state_storage_told_count &&
xe::filesystem::Seek(pipeline_state_storage_file_,
int64_t(pipeline_state_storage_valid_bytes),
SEEK_SET)) {
uint64_t pipeline_storage_valid_bytes =
sizeof(pipeline_storage_file_header);
// Enqueue pipeline descriptions written by previous Xenia executions until
// the end of the file or until a corrupted one is detected.
xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END);
int64_t pipeline_storage_told_end =
xe::filesystem::Tell(pipeline_storage_file_);
size_t pipeline_storage_told_count = size_t(
pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes)
? (uint64_t(pipeline_storage_told_end) -
pipeline_storage_valid_bytes) /
sizeof(PipelineStoredDescription)
: 0);
if (pipeline_storage_told_count &&
xe::filesystem::Seek(pipeline_storage_file_,
int64_t(pipeline_storage_valid_bytes), SEEK_SET)) {
std::vector<PipelineStoredDescription> pipeline_stored_descriptions;
pipeline_stored_descriptions.resize(pipeline_state_storage_told_count);
pipeline_stored_descriptions.resize(fread(
pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_state_storage_told_count,
pipeline_state_storage_file_));
pipeline_stored_descriptions.resize(pipeline_storage_told_count);
pipeline_stored_descriptions.resize(
fread(pipeline_stored_descriptions.data(),
sizeof(PipelineStoredDescription), pipeline_storage_told_count,
pipeline_storage_file_));
if (!pipeline_stored_descriptions.empty()) {
// Launch additional creation threads to use all cores to create
// pipeline state objects faster. Will also be using the main thread, so
// minus 1.
// pipelines faster. Will also be using the main thread, so minus 1.
size_t creation_thread_original_count = creation_threads_.size();
size_t creation_thread_needed_count =
std::max(std::min(pipeline_stored_descriptions.size(),
@ -539,10 +531,10 @@ void PipelineCache::InitializeShaderStorage(
{}, [this, creation_thread_index]() {
CreationThread(creation_thread_index);
});
creation_thread->set_name("D3D12 Pipeline States Additional");
creation_thread->set_name("D3D12 Pipelines");
creation_threads_.push_back(std::move(creation_thread));
}
size_t pipeline_states_created = 0;
size_t pipelines_created = 0;
for (const PipelineStoredDescription& pipeline_stored_description :
pipeline_stored_descriptions) {
const PipelineDescription& pipeline_description =
@ -554,30 +546,28 @@ void PipelineCache::InitializeShaderStorage(
0) != pipeline_stored_description.description_hash) {
break;
}
pipeline_state_storage_valid_bytes +=
sizeof(PipelineStoredDescription);
// Skip already known pipeline states - those have already been
// enqueued.
auto found_range = pipeline_states_.equal_range(
pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription);
// Skip already known pipelines - those have already been enqueued.
auto found_range = pipelines_.equal_range(
pipeline_stored_description.description_hash);
bool pipeline_state_found = false;
bool pipeline_found = false;
for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second;
if (!std::memcmp(&found_pipeline_state->description.description,
Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline->description.description,
&pipeline_description,
sizeof(pipeline_description))) {
pipeline_state_found = true;
pipeline_found = true;
break;
}
}
if (pipeline_state_found) {
if (pipeline_found) {
continue;
}
PipelineRuntimeDescription pipeline_runtime_description;
auto vertex_shader_it =
shader_map_.find(pipeline_description.vertex_shader_hash);
if (vertex_shader_it == shader_map_.end()) {
shaders_.find(pipeline_description.vertex_shader_hash);
if (vertex_shader_it == shaders_.end()) {
continue;
}
pipeline_runtime_description.vertex_shader = vertex_shader_it->second;
@ -586,8 +576,8 @@ void PipelineCache::InitializeShaderStorage(
}
if (pipeline_description.pixel_shader_hash) {
auto pixel_shader_it =
shader_map_.find(pipeline_description.pixel_shader_hash);
if (pixel_shader_it == shader_map_.end()) {
shaders_.find(pipeline_description.pixel_shader_hash);
if (pixel_shader_it == shaders_.end()) {
continue;
}
pipeline_runtime_description.pixel_shader = pixel_shader_it->second;
@ -607,36 +597,33 @@ void PipelineCache::InitializeShaderStorage(
std::memcpy(&pipeline_runtime_description.description,
&pipeline_description, sizeof(pipeline_description));
PipelineState* new_pipeline_state = new PipelineState;
new_pipeline_state->state = nullptr;
std::memcpy(&new_pipeline_state->description,
&pipeline_runtime_description,
Pipeline* new_pipeline = new Pipeline;
new_pipeline->state = nullptr;
std::memcpy(&new_pipeline->description, &pipeline_runtime_description,
sizeof(pipeline_runtime_description));
pipeline_states_.insert(
std::make_pair(pipeline_stored_description.description_hash,
new_pipeline_state));
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
pipelines_.emplace(pipeline_stored_description.description_hash,
new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
if (!creation_threads_.empty()) {
// Submit the pipeline for creation to any available thread.
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state);
creation_queue_.push_back(new_pipeline);
}
creation_request_cond_.notify_one();
} else {
new_pipeline_state->state =
CreateD3D12PipelineState(pipeline_runtime_description);
new_pipeline->state =
CreateD3D12Pipeline(pipeline_runtime_description);
}
++pipeline_states_created;
++pipelines_created;
}
CreateQueuedPipelineStatesOnProcessorThread();
CreateQueuedPipelinesOnProcessorThread();
if (creation_threads_.size() > creation_thread_original_count) {
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_threads_shutdown_from_ = creation_thread_original_count;
// Assuming the queue is empty because of
// CreateQueuedPipelineStatesOnProcessorThread.
// CreateQueuedPipelinesOnProcessorThread.
}
creation_request_cond_.notify_all();
while (creation_threads_.size() > creation_thread_original_count) {
@ -664,26 +651,23 @@ void PipelineCache::InitializeShaderStorage(
}
}
XELOGGPU(
"Created {} graphics pipeline state objects from the storage in {} "
"milliseconds",
pipeline_states_created,
"Created {} graphics pipelines from the storage in {} milliseconds",
pipelines_created,
(xe::Clock::QueryHostTickCount() -
pipeline_state_storage_initialization_start_) *
pipeline_storage_initialization_start_) *
1000 / xe::Clock::QueryHostTickFrequency());
}
}
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_,
pipeline_state_storage_valid_bytes);
xe::filesystem::TruncateStdioFile(pipeline_storage_file_,
pipeline_storage_valid_bytes);
} else {
xe::filesystem::TruncateStdioFile(pipeline_state_storage_file_, 0);
pipeline_state_storage_file_header.magic = pipeline_state_storage_magic;
pipeline_state_storage_file_header.magic_api =
pipeline_state_storage_magic_api;
pipeline_state_storage_file_header.version_swapped =
xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0);
pipeline_storage_file_header.magic = pipeline_storage_magic;
pipeline_storage_file_header.magic_api = pipeline_storage_magic_api;
pipeline_storage_file_header.version_swapped =
xe::byte_swap(PipelineDescription::kVersion);
fwrite(&pipeline_state_storage_file_header,
sizeof(pipeline_state_storage_file_header), 1,
pipeline_state_storage_file_);
fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header),
1, pipeline_storage_file_);
}
shader_storage_root_ = storage_root;
@ -691,7 +675,7 @@ void PipelineCache::InitializeShaderStorage(
// Start the storage writing thread.
storage_write_flush_shaders_ = false;
storage_write_flush_pipeline_states_ = false;
storage_write_flush_pipelines_ = false;
storage_write_thread_shutdown_ = false;
storage_write_thread_ =
xe::threading::Thread::Create({}, [this]() { StorageWriteThread(); });
@ -708,12 +692,12 @@ void PipelineCache::ShutdownShaderStorage() {
storage_write_thread_.reset();
}
storage_write_shader_queue_.clear();
storage_write_pipeline_state_queue_.clear();
storage_write_pipeline_queue_.clear();
if (pipeline_state_storage_file_) {
fclose(pipeline_state_storage_file_);
pipeline_state_storage_file_ = nullptr;
pipeline_state_storage_file_flush_needed_ = false;
if (pipeline_storage_file_) {
fclose(pipeline_storage_file_);
pipeline_storage_file_ = nullptr;
pipeline_storage_file_flush_needed_ = false;
}
if (shader_storage_file_) {
@ -728,30 +712,29 @@ void PipelineCache::ShutdownShaderStorage() {
void PipelineCache::EndSubmission() {
if (shader_storage_file_flush_needed_ ||
pipeline_state_storage_file_flush_needed_) {
pipeline_storage_file_flush_needed_) {
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
if (shader_storage_file_flush_needed_) {
storage_write_flush_shaders_ = true;
}
if (pipeline_state_storage_file_flush_needed_) {
storage_write_flush_pipeline_states_ = true;
if (pipeline_storage_file_flush_needed_) {
storage_write_flush_pipelines_ = true;
}
}
storage_write_request_cond_.notify_one();
shader_storage_file_flush_needed_ = false;
pipeline_state_storage_file_flush_needed_ = false;
pipeline_storage_file_flush_needed_ = false;
}
if (!creation_threads_.empty()) {
CreateQueuedPipelineStatesOnProcessorThread();
// Await creation of all queued pipeline state objects.
CreateQueuedPipelinesOnProcessorThread();
// Await creation of all queued pipelines.
bool await_creation_completion_event;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
// Assuming the creation queue is already empty (because the processor
// thread also worked on creating the leftover pipeline state objects), so
// only check if there are threads with pipeline state objects currently
// being created.
// thread also worked on creating the leftover pipelines), so only check
// if there are threads with pipelines currently being created.
await_creation_completion_event = creation_threads_busy_ != 0;
if (await_creation_completion_event) {
creation_completion_event_->Reset();
@ -765,7 +748,7 @@ void PipelineCache::EndSubmission() {
}
}
bool PipelineCache::IsCreatingPipelineStates() {
bool PipelineCache::IsCreatingPipelines() {
if (creation_threads_.empty()) {
return false;
}
@ -779,8 +762,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(data_hash);
if (it != shader_map_.end()) {
auto it = shaders_.find(data_hash);
if (it != shaders_.end()) {
// Shader has been previously loaded.
return it->second;
}
@ -790,7 +773,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
// again.
D3D12Shader* shader =
new D3D12Shader(shader_type, data_hash, host_address, dword_count);
shader_map_.insert({data_hash, shader});
shaders_.emplace(data_hash, shader);
return shader;
}
@ -798,11 +781,11 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid()
const {
// If the values this functions returns are changed, INVALIDATE THE SHADER
// STORAGE (increase kVersion for BOTH shaders and pipeline states)! The
// exception is when the function originally returned "unsupported", but
// started to return a valid value (in this case the shader wouldn't be cached
// in the first place). Otherwise games will not be able to locate shaders for
// draws for which the host vertex shader type has changed!
// STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception
// is when the function originally returned "unsupported", but started to
// return a valid value (in this case the shader wouldn't be cached in the
// first place). Otherwise games will not be able to locate shaders for draws
// for which the host vertex shader type has changed!
const auto& regs = register_file_;
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
if (!xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode,
@ -929,13 +912,12 @@ bool PipelineCache::ConfigurePipeline(
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out,
ID3D12RootSignature** root_signature_out) {
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
assert_not_null(pipeline_state_handle_out);
assert_not_null(pipeline_handle_out);
assert_not_null(root_signature_out);
PipelineRuntimeDescription runtime_description;
@ -946,24 +928,24 @@ bool PipelineCache::ConfigurePipeline(
}
PipelineDescription& description = runtime_description.description;
if (current_pipeline_state_ != nullptr &&
!std::memcmp(&current_pipeline_state_->description.description,
&description, sizeof(description))) {
*pipeline_state_handle_out = current_pipeline_state_;
if (current_pipeline_ != nullptr &&
!std::memcmp(&current_pipeline_->description.description, &description,
sizeof(description))) {
*pipeline_handle_out = current_pipeline_;
*root_signature_out = runtime_description.root_signature;
return true;
}
// Find an existing pipeline state object in the cache.
// Find an existing pipeline in the cache.
uint64_t hash = XXH64(&description, sizeof(description), 0);
auto found_range = pipeline_states_.equal_range(hash);
auto found_range = pipelines_.equal_range(hash);
for (auto it = found_range.first; it != found_range.second; ++it) {
PipelineState* found_pipeline_state = it->second;
if (!std::memcmp(&found_pipeline_state->description.description,
&description, sizeof(description))) {
current_pipeline_state_ = found_pipeline_state;
*pipeline_state_handle_out = found_pipeline_state;
*root_signature_out = found_pipeline_state->description.root_signature;
Pipeline* found_pipeline = it->second;
if (!std::memcmp(&found_pipeline->description.description, &description,
sizeof(description))) {
current_pipeline_ = found_pipeline;
*pipeline_handle_out = found_pipeline;
*root_signature_out = found_pipeline->description.root_signature;
return true;
}
}
@ -974,33 +956,32 @@ bool PipelineCache::ConfigurePipeline(
return false;
}
PipelineState* new_pipeline_state = new PipelineState;
new_pipeline_state->state = nullptr;
std::memcpy(&new_pipeline_state->description, &runtime_description,
Pipeline* new_pipeline = new Pipeline;
new_pipeline->state = nullptr;
std::memcpy(&new_pipeline->description, &runtime_description,
sizeof(runtime_description));
pipeline_states_.insert(std::make_pair(hash, new_pipeline_state));
COUNT_profile_set("gpu/pipeline_cache/pipeline_states",
pipeline_states_.size());
pipelines_.emplace(hash, new_pipeline);
COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size());
if (!creation_threads_.empty()) {
// Submit the pipeline state object for creation to any available thread.
// Submit the pipeline for creation to any available thread.
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
creation_queue_.push_back(new_pipeline_state);
creation_queue_.push_back(new_pipeline);
}
creation_request_cond_.notify_one();
} else {
new_pipeline_state->state = CreateD3D12PipelineState(runtime_description);
new_pipeline->state = CreateD3D12Pipeline(runtime_description);
}
if (pipeline_state_storage_file_) {
if (pipeline_storage_file_) {
assert_not_null(storage_write_thread_);
pipeline_state_storage_file_flush_needed_ = true;
pipeline_storage_file_flush_needed_ = true;
{
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
storage_write_pipeline_state_queue_.emplace_back();
storage_write_pipeline_queue_.emplace_back();
PipelineStoredDescription& stored_description =
storage_write_pipeline_state_queue_.back();
storage_write_pipeline_queue_.back();
stored_description.description_hash = hash;
std::memcpy(&stored_description.description, &description,
sizeof(description));
@ -1008,8 +989,8 @@ bool PipelineCache::ConfigurePipeline(
storage_write_request_cond_.notify_all();
}
current_pipeline_state_ = new_pipeline_state;
*pipeline_state_handle_out = new_pipeline_state;
current_pipeline_ = new_pipeline;
*pipeline_handle_out = new_pipeline;
*root_signature_out = runtime_description.root_signature;
return true;
}
@ -1136,8 +1117,8 @@ bool PipelineCache::TranslateShader(
std::memcpy(
texture_binding_layouts_.data() + new_uid.vector_span_offset,
texture_bindings, texture_binding_layout_bytes);
texture_binding_layout_map_.insert(
{texture_binding_layout_hash, new_uid});
texture_binding_layout_map_.emplace(texture_binding_layout_hash,
new_uid);
}
}
if (bindless_sampler_count) {
@ -1179,8 +1160,8 @@ bool PipelineCache::TranslateShader(
vector_bindless_sampler_layout[i] =
sampler_bindings[i].bindless_descriptor_index;
}
bindless_sampler_layout_map_.insert(
{bindless_sampler_layout_hash, new_uid});
bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash,
new_uid);
}
}
}
@ -1508,8 +1489,7 @@ bool PipelineCache::GetCurrentStateDescription(
/* 16 */ PipelineBlendFactor::kSrcAlphaSat,
};
// Like kBlendFactorMap, but with color modes changed to alpha. Some
// pipeline state objects aren't created in Prey because a color mode is
// used for alpha.
// pipelines aren't created in Prey because a color mode is used for alpha.
static const PipelineBlendFactor kBlendFactorAlphaMap[32] = {
/* 0 */ PipelineBlendFactor::kZero,
/* 1 */ PipelineBlendFactor::kOne,
@ -1569,18 +1549,16 @@ bool PipelineCache::GetCurrentStateDescription(
return true;
}
ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description) {
const PipelineDescription& description = runtime_description.description;
if (runtime_description.pixel_shader != nullptr) {
XELOGGPU(
"Creating graphics pipeline state with VS {:016X}"
", PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else {
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
XELOGGPU("Creating graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash());
}
@ -1893,20 +1871,18 @@ ID3D12PipelineState* PipelineCache::CreateD3D12PipelineState(
}
}
// Create the pipeline state object.
// Create the D3D12 pipeline state object.
auto device =
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
ID3D12PipelineState* state;
if (FAILED(device->CreateGraphicsPipelineState(&state_desc,
IID_PPV_ARGS(&state)))) {
if (runtime_description.pixel_shader != nullptr) {
XELOGE(
"Failed to create graphics pipeline state with VS {:016X}"
", PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
runtime_description.vertex_shader->ucode_data_hash(),
runtime_description.pixel_shader->ucode_data_hash());
} else {
XELOGE("Failed to create graphics pipeline state with VS {:016X}",
XELOGE("Failed to create graphics pipeline with VS {:016X}",
runtime_description.vertex_shader->ucode_data_hash());
}
return nullptr;
@ -1933,7 +1909,7 @@ void PipelineCache::StorageWriteThread() {
ucode_guest_endian.reserve(0xFFFF);
bool flush_shaders = false;
bool flush_pipeline_states = false;
bool flush_pipelines = false;
while (true) {
if (flush_shaders) {
@ -1941,15 +1917,15 @@ void PipelineCache::StorageWriteThread() {
assert_not_null(shader_storage_file_);
fflush(shader_storage_file_);
}
if (flush_pipeline_states) {
flush_pipeline_states = false;
assert_not_null(pipeline_state_storage_file_);
fflush(pipeline_state_storage_file_);
if (flush_pipelines) {
flush_pipelines = false;
assert_not_null(pipeline_storage_file_);
fflush(pipeline_storage_file_);
}
std::pair<const Shader*, reg::SQ_PROGRAM_CNTL> shader_pair = {};
PipelineStoredDescription pipeline_description;
bool write_pipeline_state = false;
bool write_pipeline = false;
{
std::unique_lock<std::mutex> lock(storage_write_request_lock_);
if (storage_write_thread_shutdown_) {
@ -1962,17 +1938,17 @@ void PipelineCache::StorageWriteThread() {
storage_write_flush_shaders_ = false;
flush_shaders = true;
}
if (!storage_write_pipeline_state_queue_.empty()) {
if (!storage_write_pipeline_queue_.empty()) {
std::memcpy(&pipeline_description,
&storage_write_pipeline_state_queue_.front(),
&storage_write_pipeline_queue_.front(),
sizeof(pipeline_description));
storage_write_pipeline_state_queue_.pop_front();
write_pipeline_state = true;
} else if (storage_write_flush_pipeline_states_) {
storage_write_flush_pipeline_states_ = false;
flush_pipeline_states = true;
storage_write_pipeline_queue_.pop_front();
write_pipeline = true;
} else if (storage_write_flush_pipelines_) {
storage_write_flush_pipelines_ = false;
flush_pipelines = true;
}
if (!shader_pair.first && !write_pipeline_state) {
if (!shader_pair.first && !write_pipeline) {
storage_write_request_cond_.wait(lock);
continue;
}
@ -1999,27 +1975,26 @@ void PipelineCache::StorageWriteThread() {
}
}
if (write_pipeline_state) {
assert_not_null(pipeline_state_storage_file_);
if (write_pipeline) {
assert_not_null(pipeline_storage_file_);
fwrite(&pipeline_description, sizeof(pipeline_description), 1,
pipeline_state_storage_file_);
pipeline_storage_file_);
}
}
}
void PipelineCache::CreationThread(size_t thread_index) {
while (true) {
PipelineState* pipeline_state_to_create = nullptr;
Pipeline* pipeline_to_create = nullptr;
// Check if need to shut down or set the completion event and dequeue the
// pipeline state if there is any.
// pipeline if there is any.
{
std::unique_lock<std::mutex> lock(creation_request_lock_);
if (thread_index >= creation_threads_shutdown_from_ ||
creation_queue_.empty()) {
if (creation_completion_set_event_ && creation_threads_busy_ == 0) {
// Last pipeline state object in the queue created - signal the event
// if requested.
// Last pipeline in the queue created - signal the event if requested.
creation_completion_set_event_ = false;
creation_completion_event_->Set();
}
@ -2029,23 +2004,22 @@ void PipelineCache::CreationThread(size_t thread_index) {
creation_request_cond_.wait(lock);
continue;
}
// Take the pipeline state from the queue and increment the busy thread
// count until the pipeline state object is created - other threads must
// be able to dequeue requests, but can't set the completion event until
// the pipeline state objects are fully created (rather than just started
// creating).
pipeline_state_to_create = creation_queue_.front();
// Take the pipeline from the queue and increment the busy thread count
// until the pipeline is created - other threads must be able to dequeue
// requests, but can't set the completion event until the pipelines are
// fully created (rather than just started creating).
pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front();
++creation_threads_busy_;
}
// Create the D3D12 pipeline state object.
pipeline_state_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description);
pipeline_to_create->state =
CreateD3D12Pipeline(pipeline_to_create->description);
// Pipeline state object created - the thread is not busy anymore, safe to
// set the completion event if needed (at the next iteration, or in some
// other thread).
// Pipeline created - the thread is not busy anymore, safe to set the
// completion event if needed (at the next iteration, or in some other
// thread).
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
--creation_threads_busy_;
@ -2053,20 +2027,20 @@ void PipelineCache::CreationThread(size_t thread_index) {
}
}
void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() {
void PipelineCache::CreateQueuedPipelinesOnProcessorThread() {
assert_false(creation_threads_.empty());
while (true) {
PipelineState* pipeline_state_to_create;
Pipeline* pipeline_to_create;
{
std::lock_guard<std::mutex> lock(creation_request_lock_);
if (creation_queue_.empty()) {
break;
}
pipeline_state_to_create = creation_queue_.front();
pipeline_to_create = creation_queue_.front();
creation_queue_.pop_front();
}
pipeline_state_to_create->state =
CreateD3D12PipelineState(pipeline_state_to_create->description);
pipeline_to_create->state =
CreateD3D12Pipeline(pipeline_to_create->description);
}
}

View File

@ -29,6 +29,7 @@
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe {
namespace gpu {
@ -54,7 +55,7 @@ class PipelineCache {
void ShutdownShaderStorage();
void EndSubmission();
bool IsCreatingPipelineStates();
bool IsCreatingPipelines();
D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count);
@ -73,14 +74,12 @@ class PipelineCache {
xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format,
bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_state_handle_out,
ID3D12RootSignature** root_signature_out);
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return reinterpret_cast<const PipelineState*>(handle)->state;
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
ID3D12PipelineState* GetD3D12PipelineByHandle(void* handle) const {
return reinterpret_cast<const Pipeline*>(handle)->state;
}
private:
@ -237,7 +236,7 @@ class PipelineCache {
const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineRuntimeDescription& runtime_description_out);
ID3D12PipelineState* CreateD3D12PipelineState(
ID3D12PipelineState* CreateD3D12Pipeline(
const PipelineRuntimeDescription& runtime_description);
D3D12CommandProcessor& command_processor_;
@ -255,9 +254,9 @@ class PipelineCache {
IDxcUtils* dxc_utils_ = nullptr;
IDxcCompiler* dxc_compiler_ = nullptr;
// All loaded shaders mapped by their guest hash key.
// Ucode hash -> shader.
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
shader_map_;
shaders_;
struct LayoutUID {
size_t uid;
@ -285,21 +284,20 @@ class PipelineCache {
// Xenos pixel shader provided.
std::vector<uint8_t> depth_only_pixel_shader_;
struct PipelineState {
struct Pipeline {
// nullptr if creation has failed.
ID3D12PipelineState* state;
PipelineRuntimeDescription description;
};
// All previously generated pipeline state objects identified by hash and the
// description.
std::unordered_multimap<uint64_t, PipelineState*,
// All previously generated pipelines identified by hash and the description.
std::unordered_multimap<uint64_t, Pipeline*,
xe::hash::IdentityHasher<uint64_t>>
pipeline_states_;
pipelines_;
// Previously used pipeline state object. This matches our current state
// settings and allows us to quickly(ish) reuse the pipeline state if no
// registers have changed.
PipelineState* current_pipeline_state_ = nullptr;
// Previously used pipeline. This matches our current state settings and
// allows us to quickly(ish) reuse the pipeline if no registers have been
// changed.
Pipeline* current_pipeline_ = nullptr;
// Currently open shader storage path.
std::filesystem::path shader_storage_root_;
@ -309,10 +307,9 @@ class PipelineCache {
FILE* shader_storage_file_ = nullptr;
bool shader_storage_file_flush_needed_ = false;
// Pipeline state storage output stream, for preload in the next emulator
// runs.
FILE* pipeline_state_storage_file_ = nullptr;
bool pipeline_state_storage_file_flush_needed_ = false;
// Pipeline storage output stream, for preload in the next emulator runs.
FILE* pipeline_storage_file_ = nullptr;
bool pipeline_storage_file_flush_needed_ = false;
// Thread for asynchronous writing to the storage streams.
void StorageWriteThread();
@ -322,28 +319,27 @@ class PipelineCache {
// thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_queue_;
bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipeline_states_ = false;
bool storage_write_flush_pipelines_ = false;
bool storage_write_thread_shutdown_ = false;
std::unique_ptr<xe::threading::Thread> storage_write_thread_;
// Pipeline state object creation threads.
// Pipeline creation threads.
void CreationThread(size_t thread_index);
void CreateQueuedPipelineStatesOnProcessorThread();
void CreateQueuedPipelinesOnProcessorThread();
std::mutex creation_request_lock_;
std::condition_variable creation_request_cond_;
// Protected with creation_request_lock_, notify_one creation_request_cond_
// when set.
std::deque<PipelineState*> creation_queue_;
// Number of threads that are currently creating a pipeline state object -
// incremented when a pipeline state object is dequeued (the completion event
// can't be triggered before this is zero). Protected with
// creation_request_lock_.
std::deque<Pipeline*> creation_queue_;
// Number of threads that are currently creating a pipeline - incremented when
// a pipeline is dequeued (the completion event can't be triggered before this
// is zero). Protected with creation_request_lock_.
size_t creation_threads_busy_ = 0;
// Manual-reset event set when the last queued pipeline state object is
// created and there are no more pipeline state objects to create. This is
// triggered by the thread creating the last pipeline state object.
// Manual-reset event set when the last queued pipeline is created and there
// are no more pipelines to create. This is triggered by the thread creating
// the last pipeline.
std::unique_ptr<xe::threading::Event> creation_completion_event_;
// Whether setting the event on completion is queued. Protected with
// creation_request_lock_, notify_one creation_request_cond_ when set.

View File

@ -25,15 +25,6 @@ project("xenia-gpu-d3d12-trace-viewer")
kind("WindowedApp")
language("C++")
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -49,6 +40,17 @@ project("xenia-gpu-d3d12-trace-viewer")
"xenia-ui-d3d12",
"xenia-vfs",
"xenia-patcher",
})
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash",
})
files({
@ -71,15 +73,6 @@ project("xenia-gpu-d3d12-trace-dump")
kind("ConsoleApp")
language("C++")
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -95,6 +88,17 @@ project("xenia-gpu-d3d12-trace-dump")
"xenia-ui-d3d12",
"xenia-vfs",
"xenia-patcher",
})
links({
"aes_128",
"capstone",
"dxbc",
"fmt",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"xxhash",
})
files({
@ -109,4 +113,4 @@ project("xenia-gpu-d3d12-trace-dump")
"2>&1",
"1>scratch/stdout-trace-dump.txt",
})
end
end

View File

@ -454,8 +454,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// again and again and exit.
if (!conversion_needed || converted_index_count == 0) {
converted_indices.gpu_address = 0;
converted_indices_cache_.insert(
std::make_pair(converted_indices.key.value, converted_indices));
converted_indices_cache_.emplace(converted_indices.key.value,
converted_indices);
memory_regions_used_ |= memory_regions_used_bits;
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
: ConversionResult::kConversionNotNeeded;
@ -670,8 +670,8 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
// Cache and return the indices.
converted_indices.gpu_address = gpu_address;
converted_indices_cache_.insert(
std::make_pair(converted_indices.key.value, converted_indices));
converted_indices_cache_.emplace(converted_indices.key.value,
converted_indices);
memory_regions_used_ |= memory_regions_used_bits;
gpu_address_out = gpu_address;
index_count_out = converted_index_count;

View File

@ -277,20 +277,19 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false;
}
// Create the EDRAM load/store pipeline state objects.
// Create the EDRAM load/store pipelines.
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.load_shader, mode_info.load_shader_size,
edram_load_store_root_signature_);
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipelineState(
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.store_shader, mode_info.store_shader_size,
edram_load_store_root_signature_);
if (edram_load_pipelines_[i] == nullptr ||
edram_store_pipelines_[i] == nullptr) {
XELOGE(
"Failed to create the EDRAM load/store pipeline states for mode {}",
i);
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
i);
Shutdown();
return false;
}
@ -299,7 +298,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
}
}
// Create the resolve root signatures and pipeline state objects.
// Create the resolve root signatures and pipelines.
D3D12_ROOT_PARAMETER resolve_root_parameters[3];
// Copying root signature.
@ -369,7 +368,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
return false;
}
// Copying pipeline state objects.
// Copying pipelines.
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
++i) {
@ -381,63 +380,61 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
continue;
}
const auto& resolve_copy_shader = resolve_copy_shaders_[i];
ID3D12PipelineState* resolve_copy_pipeline_state =
ui::d3d12::util::CreateComputePipelineState(
ID3D12PipelineState* resolve_copy_pipeline =
ui::d3d12::util::CreateComputePipeline(
device, resolve_copy_shader.first, resolve_copy_shader.second,
resolve_copy_root_signature_);
if (resolve_copy_pipeline_state == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline state",
if (resolve_copy_pipeline == nullptr) {
XELOGE("Failed to create {} resolve copy pipeline",
resolve_copy_shader_info.debug_name);
}
resolve_copy_pipeline_state->SetName(reinterpret_cast<LPCWSTR>(
resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
resolve_copy_pipeline_states_[i] = resolve_copy_pipeline_state;
resolve_copy_pipelines_[i] = resolve_copy_pipeline;
}
// Clearing pipeline state objects.
resolve_clear_32bpp_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
device,
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
: resolve_clear_32bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
: sizeof(resolve_clear_32bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_32bpp_pipeline_state_ == nullptr) {
XELOGE("Failed to create the 32bpp resolve clear pipeline state");
// Clearing pipelines.
resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
device,
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
: resolve_clear_32bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
: sizeof(resolve_clear_32bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_32bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 32bpp resolve clear pipeline");
Shutdown();
return false;
}
resolve_clear_32bpp_pipeline_state_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
device,
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
: resolve_clear_64bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
: sizeof(resolve_clear_64bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_64bpp_pipeline_state_ == nullptr) {
XELOGE("Failed to create the 64bpp resolve clear pipeline state");
resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
device,
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
: resolve_clear_64bpp_cs,
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
: sizeof(resolve_clear_64bpp_cs),
resolve_clear_root_signature_);
if (resolve_clear_64bpp_pipeline_ == nullptr) {
XELOGE("Failed to create the 64bpp resolve clear pipeline");
Shutdown();
return false;
}
resolve_clear_64bpp_pipeline_state_->SetName(L"Resolve Clear 64bpp");
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
if (!edram_rov_used_) {
assert_false(resolution_scale_2x_);
resolve_clear_depth_24_32_pipeline_state_ =
ui::d3d12::util::CreateComputePipelineState(
resolve_clear_depth_24_32_pipeline_ =
ui::d3d12::util::CreateComputePipeline(
device, resolve_clear_depth_24_32_cs,
sizeof(resolve_clear_depth_24_32_cs),
resolve_clear_root_signature_);
if (resolve_clear_depth_24_32_pipeline_state_ == nullptr) {
if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
XELOGE(
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
"state");
Shutdown();
return false;
}
resolve_clear_64bpp_pipeline_state_->SetName(
resolve_clear_64bpp_pipeline_->SetName(
L"Resolve Clear 24-bit & 32-bit Depth");
}
@ -451,12 +448,12 @@ void RenderTargetCache::Shutdown() {
edram_snapshot_restore_pool_.reset();
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_state_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
for (size_t i = 0; i < xe::countof(resolve_copy_pipeline_states_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipeline_states_[i]);
for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
}
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
@ -1209,8 +1206,8 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(copy_shader_constants) / sizeof(uint32_t),
&copy_shader_constants, 0);
}
command_processor_.SetComputePipelineState(
resolve_copy_pipeline_states_[size_t(copy_shader)]);
command_processor_.SetComputePipeline(
resolve_copy_pipelines_[size_t(copy_shader)]);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
@ -1279,9 +1276,9 @@ bool RenderTargetCache::Resolve(const Memory& memory,
command_list.D3DSetComputeRoot32BitConstants(
0, sizeof(depth_clear_constants) / sizeof(uint32_t),
&depth_clear_constants, 0);
command_processor_.SetComputePipelineState(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_state_
: resolve_clear_32bpp_pipeline_state_);
command_processor_.SetComputePipeline(
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
: resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1);
@ -1301,10 +1298,10 @@ bool RenderTargetCache::Resolve(const Memory& memory,
0, sizeof(color_clear_constants) / sizeof(uint32_t),
&color_clear_constants, 0);
}
command_processor_.SetComputePipelineState(
command_processor_.SetComputePipeline(
resolve_info.color_edram_info.format_is_64bpp
? resolve_clear_64bpp_pipeline_state_
: resolve_clear_32bpp_pipeline_state_);
? resolve_clear_64bpp_pipeline_
: resolve_clear_32bpp_pipeline_);
command_processor_.SubmitBarriers();
command_list.D3DDispatch(clear_group_count.first,
clear_group_count.second, 1);
@ -1816,7 +1813,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
render_target->footprints, nullptr, nullptr,
&copy_buffer_size);
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
render_targets_.insert(std::make_pair(key.value, render_target));
render_targets_.emplace(key.value, render_target);
COUNT_profile_set("gpu/render_target_cache/render_targets",
render_targets_.size());
#if 0
@ -2015,8 +2012,7 @@ void RenderTargetCache::StoreRenderTargetsToEdram() {
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_.SetComputePipelineState(
edram_store_pipelines_[size_t(mode)]);
command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples.
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
@ -2140,8 +2136,7 @@ void RenderTargetCache::LoadRenderTargetsFromEdram(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
render_target->key.format);
command_processor_.SetComputePipelineState(
edram_load_pipelines_[size_t(mode)]);
command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
// 1 group per 80x16 samples.
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);

View File

@ -237,14 +237,13 @@ class D3D12CommandProcessor;
// get each of the 4 host pixels for each sample.
class RenderTargetCache {
public:
// Direct3D 12 debug layer does some kaschenit-style trolling by giving errors
// that contradict each other when you use null RTV descriptors - if you set
// a valid format in RTVFormats in the pipeline state, it says that null
// descriptors can only be used if the format in the pipeline state is
// DXGI_FORMAT_UNKNOWN, however, if DXGI_FORMAT_UNKNOWN is set, it complains
// that the format in the pipeline doesn't match the RTV format. So we have to
// make render target bindings consecutive and remap the output indices in
// pixel shaders.
// Direct3D 12 debug layer is giving errors that contradict each other when
// you use null RTV descriptors - if you set a valid format in RTVFormats in
// the pipeline state, it says that null descriptors can only be used if the
// format in the pipeline state is DXGI_FORMAT_UNKNOWN, however, if
// DXGI_FORMAT_UNKNOWN is set, it complains that the format in the pipeline
// state doesn't match the RTV format. So we have to make render target
// bindings consecutive and remap the output indices in pixel shaders.
struct PipelineRenderTarget {
uint32_t guest_render_target;
DXGI_FORMAT format;
@ -304,8 +303,7 @@ class RenderTargetCache {
// performance difference, but with EDRAM loads/stores less conversion should
// be performed by the shaders if D24S8 is emulated as D24_UNORM_S8_UINT, and
// it's probably more accurate.
static inline DXGI_FORMAT GetDepthDXGIFormat(
xenos::DepthRenderTargetFormat format) {
static DXGI_FORMAT GetDepthDXGIFormat(xenos::DepthRenderTargetFormat format) {
return format == xenos::DepthRenderTargetFormat::kD24FS8
? DXGI_FORMAT_D32_FLOAT_S8X24_UINT
: DXGI_FORMAT_D24_UNORM_S8_UINT;
@ -537,7 +535,7 @@ class RenderTargetCache {
// 16: - EDRAM pitch in tiles.
uint32_t base_samples_2x_depth_pitch;
};
// EDRAM pipeline states for the RTV/DSV path.
// EDRAM pipelines for the RTV/DSV path.
static const EdramLoadStoreModeInfo
edram_load_store_mode_info_[size_t(EdramLoadStoreMode::kCount)];
ID3D12PipelineState*
@ -546,20 +544,20 @@ class RenderTargetCache {
ID3D12PipelineState*
edram_store_pipelines_[size_t(EdramLoadStoreMode::kCount)] = {};
// Resolve root signatures and pipeline state objects.
// Resolve root signatures and pipelines.
ID3D12RootSignature* resolve_copy_root_signature_ = nullptr;
static const std::pair<const uint8_t*, size_t>
resolve_copy_shaders_[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
ID3D12PipelineState* resolve_copy_pipeline_states_[size_t(
ID3D12PipelineState* resolve_copy_pipelines_[size_t(
draw_util::ResolveCopyShaderIndex::kCount)] = {};
ID3D12RootSignature* resolve_clear_root_signature_ = nullptr;
// Clearing 32bpp color, depth with ROV, or unorm depth without ROV.
ID3D12PipelineState* resolve_clear_32bpp_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_32bpp_pipeline_ = nullptr;
// Clearing 64bpp color.
ID3D12PipelineState* resolve_clear_64bpp_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_64bpp_pipeline_ = nullptr;
// Clearing float depth without ROV, both the float24 and the host float32
// versions.
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_state_ = nullptr;
ID3D12PipelineState* resolve_clear_depth_24_32_pipeline_ = nullptr;
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
// Nvidia Maxwell 1st generation and older.

View File

@ -918,27 +918,24 @@ bool TextureCache::Initialize(bool edram_rov_used) {
return false;
}
// Create the loading pipeline state objects.
// Create the loading pipelines.
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
const LoadModeInfo& mode_info = load_mode_info_[i];
load_pipeline_states_[i] = ui::d3d12::util::CreateComputePipelineState(
load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader, mode_info.shader_size, load_root_signature_);
if (load_pipeline_states_[i] == nullptr) {
XELOGE(
"Failed to create the texture loading pipeline state object for mode "
"{}",
i);
if (load_pipelines_[i] == nullptr) {
XELOGE("Failed to create the texture loading pipeline for mode {}", i);
Shutdown();
return false;
}
if (IsResolutionScale2X() && mode_info.shader_2x != nullptr) {
load_pipeline_states_2x_[i] = ui::d3d12::util::CreateComputePipelineState(
load_pipelines_2x_[i] = ui::d3d12::util::CreateComputePipeline(
device, mode_info.shader_2x, mode_info.shader_2x_size,
load_root_signature_);
if (load_pipeline_states_2x_[i] == nullptr) {
if (load_pipelines_2x_[i] == nullptr) {
XELOGE(
"Failed to create the 2x-scaled texture loading pipeline state "
"for mode {}",
"Failed to create the 2x-scaled texture loading pipeline for mode "
"{}",
i);
Shutdown();
return false;
@ -1024,8 +1021,8 @@ void TextureCache::Shutdown() {
ui::d3d12::util::ReleaseAndNull(null_srv_descriptor_heap_);
for (uint32_t i = 0; i < uint32_t(LoadMode::kCount); ++i) {
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipeline_states_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipelines_2x_[i]);
ui::d3d12::util::ReleaseAndNull(load_pipelines_[i]);
}
ui::d3d12::util::ReleaseAndNull(load_root_signature_);
@ -1892,7 +1889,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
if (IsResolutionScale2X() && key.tiled) {
LoadMode load_mode = GetLoadMode(key);
if (load_mode != LoadMode::kUnknown &&
load_pipeline_states_2x_[uint32_t(load_mode)] != nullptr) {
load_pipelines_2x_[uint32_t(load_mode)] != nullptr) {
uint32_t base_size = 0, mip_size = 0;
texture_util::GetTextureTotalSize(
key.dimension, key.width, key.height, key.depth, key.format,
@ -2047,7 +2044,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
}
texture->base_watch_handle = nullptr;
texture->mip_watch_handle = nullptr;
textures_.insert(std::make_pair(map_key, texture));
textures_.emplace(map_key, texture);
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
textures_total_size_ += texture->resource_size;
COUNT_profile_set("gpu/texture_cache/total_size_mb",
@ -2079,10 +2076,10 @@ bool TextureCache::LoadTextureData(Texture* texture) {
return false;
}
bool scaled_resolve = texture->key.scaled_resolve ? true : false;
ID3D12PipelineState* pipeline_state =
scaled_resolve ? load_pipeline_states_2x_[uint32_t(load_mode)]
: load_pipeline_states_[uint32_t(load_mode)];
if (pipeline_state == nullptr) {
ID3D12PipelineState* pipeline = scaled_resolve
? load_pipelines_2x_[uint32_t(load_mode)]
: load_pipelines_[uint32_t(load_mode)];
if (pipeline == nullptr) {
return false;
}
const LoadModeInfo& load_mode_info = load_mode_info_[uint32_t(load_mode)];
@ -2296,7 +2293,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
load_mode_info.srv_bpe_log2);
}
}
command_processor_.SetComputePipelineState(pipeline_state);
command_processor_.SetComputePipeline(pipeline);
command_list.D3DSetComputeRootSignature(load_root_signature_);
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_dest.second);
@ -2597,7 +2594,7 @@ uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
}
device->CreateShaderResourceView(
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
texture.srv_descriptors.insert({descriptor_key, descriptor_index});
texture.srv_descriptors.emplace(descriptor_key, descriptor_index);
return descriptor_index;
}

View File

@ -106,18 +106,18 @@ class TextureCache {
bool operator!=(const TextureKey& key) const {
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
}
inline uint64_t GetMapKey() const {
uint64_t GetMapKey() const {
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
}
inline void SetMapKey(uint64_t key) {
void SetMapKey(uint64_t key) {
map_key[0] = uint32_t(key);
map_key[1] = uint32_t(key >> 32);
}
inline bool IsInvalid() const {
bool IsInvalid() const {
// Zero base and zero width is enough for a binding to be invalid.
return map_key[0] == 0;
}
inline void MakeInvalid() {
void MakeInvalid() {
// Reset all for a stable hash.
SetMapKey(0);
bucket_key = 0;
@ -222,9 +222,7 @@ class TextureCache {
void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled);
inline bool IsResolutionScale2X() const {
return scaled_resolve_buffer_ != nullptr;
}
bool IsResolutionScale2X() const { return scaled_resolve_buffer_ != nullptr; }
ID3D12Resource* GetScaledResolveBuffer() const {
return scaled_resolve_buffer_;
}
@ -233,7 +231,7 @@ class TextureCache {
uint32_t length_unscaled);
void UseScaledResolveBufferForReading();
void UseScaledResolveBufferForWriting();
inline void MarkScaledResolveBufferUAVWritesCommitNeeded() {
void MarkScaledResolveBufferUAVWritesCommitNeeded() {
if (scaled_resolve_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
scaled_resolve_buffer_uav_writes_commit_needed_ = true;
}
@ -432,7 +430,7 @@ class TextureCache {
// Whether the signed version of the texture has a different representation on
// the host than its unsigned version (for example, if it's a fixed-point
// texture emulated with a larger host pixel format).
static inline bool IsSignedVersionSeparate(xenos::TextureFormat format) {
static bool IsSignedVersionSeparate(xenos::TextureFormat format) {
const HostFormat& host_format = host_formats_[uint32_t(format)];
return host_format.load_mode_snorm != LoadMode::kUnknown &&
host_format.load_mode_snorm != host_format.load_mode;
@ -441,26 +439,24 @@ class TextureCache {
// of block-compressed textures with 4x4-aligned dimensions on PC).
static bool IsDecompressionNeeded(xenos::TextureFormat format, uint32_t width,
uint32_t height);
static inline DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
uint32_t width,
uint32_t height) {
static DXGI_FORMAT GetDXGIResourceFormat(xenos::TextureFormat format,
uint32_t width, uint32_t height) {
const HostFormat& host_format = host_formats_[uint32_t(format)];
return IsDecompressionNeeded(format, width, height)
? host_format.dxgi_format_uncompressed
: host_format.dxgi_format_resource;
}
static inline DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
static DXGI_FORMAT GetDXGIResourceFormat(TextureKey key) {
return GetDXGIResourceFormat(key.format, key.width, key.height);
}
static inline DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
uint32_t width,
uint32_t height) {
static DXGI_FORMAT GetDXGIUnormFormat(xenos::TextureFormat format,
uint32_t width, uint32_t height) {
const HostFormat& host_format = host_formats_[uint32_t(format)];
return IsDecompressionNeeded(format, width, height)
? host_format.dxgi_format_uncompressed
: host_format.dxgi_format_unorm;
}
static inline DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
static DXGI_FORMAT GetDXGIUnormFormat(TextureKey key) {
return GetDXGIUnormFormat(key.format, key.width, key.height);
}
@ -550,9 +546,9 @@ class TextureCache {
static const LoadModeInfo load_mode_info_[];
ID3D12RootSignature* load_root_signature_ = nullptr;
ID3D12PipelineState* load_pipeline_states_[size_t(LoadMode::kCount)] = {};
// Load pipeline state objects for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipeline_states_2x_[size_t(LoadMode::kCount)] = {};
ID3D12PipelineState* load_pipelines_[size_t(LoadMode::kCount)] = {};
// Load pipelines for 2x-scaled resolved targets.
ID3D12PipelineState* load_pipelines_2x_[size_t(LoadMode::kCount)] = {};
std::unordered_multimap<uint64_t, Texture*> textures_;
uint64_t textures_total_size_ = 0;

View File

@ -111,6 +111,34 @@ int32_t FloatToD3D11Fixed16p8(float f32) {
return result.s;
}
void GetScissor(const RegisterFile& regs, Scissor& scissor_out) {
// FIXME(Triang3l): Screen scissor isn't applied here, but it seems to be
// unused on Xbox 360 Direct3D 9.
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
uint32_t tl_x = pa_sc_window_scissor_tl.tl_x;
uint32_t tl_y = pa_sc_window_scissor_tl.tl_y;
uint32_t br_x = pa_sc_window_scissor_br.br_x;
uint32_t br_y = pa_sc_window_scissor_br.br_y;
if (!pa_sc_window_scissor_tl.window_offset_disable) {
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
tl_x = uint32_t(std::max(
int32_t(tl_x) + pa_sc_window_offset.window_x_offset, int32_t(0)));
tl_y = uint32_t(std::max(
int32_t(tl_y) + pa_sc_window_offset.window_y_offset, int32_t(0)));
br_x = uint32_t(std::max(
int32_t(br_x) + pa_sc_window_offset.window_x_offset, int32_t(0)));
br_y = uint32_t(std::max(
int32_t(br_y) + pa_sc_window_offset.window_y_offset, int32_t(0)));
}
br_x = std::max(br_x, tl_x);
br_y = std::max(br_y, tl_y);
scissor_out.left = tl_x;
scissor_out.top = tl_y;
scissor_out.width = br_x - tl_x;
scissor_out.height = br_y - tl_y;
}
xenos::CopySampleSelect SanitizeCopySampleSelect(
xenos::CopySampleSelect copy_sample_select, xenos::MsaaSamples msaa_samples,
bool is_depth) {

View File

@ -33,6 +33,14 @@ namespace draw_util {
// for use with the top-left rasterization rule later.
int32_t FloatToD3D11Fixed16p8(float f32);
struct Scissor {
uint32_t left;
uint32_t top;
uint32_t width;
uint32_t height;
};
void GetScissor(const RegisterFile& regs, Scissor& scissor_out);
// To avoid passing values that the shader won't understand (even though
// Direct3D 9 shouldn't pass them anyway).
xenos::CopySampleSelect SanitizeCopySampleSelect(

View File

@ -68,32 +68,34 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
break;
case AluVectorOpcode::kMul:
case AluVectorOpcode::kMad: {
bool is_mad = instr.vector_opcode == AluVectorOpcode::kMad;
if (is_mad) {
DxbcOpMAd(per_component_dest, operands[0], operands[1], operands[2]);
} else {
DxbcOpMul(per_component_dest, operands[0], operands[1]);
}
// Shader Model 3: 0 or denormal * anything = 0.
// FIXME(Triang3l): Signed zero needs research and handling.
uint32_t absolute_different =
// Not using DXBC mad to prevent fused multiply-add (mul followed by add
// may be optimized into non-fused mad by the driver in the identical
// operands case also).
DxbcOpMul(per_component_dest, operands[0], operands[1]);
uint32_t multiplicands_different =
used_result_components &
~instr.vector_operands[0].GetAbsoluteIdenticalComponents(
~instr.vector_operands[0].GetIdenticalComponents(
instr.vector_operands[1]);
if (absolute_different) {
if (multiplicands_different) {
// Shader Model 3: +-0 or denormal * anything = +0.
uint32_t is_zero_temp = PushSystemTemp();
DxbcOpMin(DxbcDest::R(is_zero_temp, absolute_different),
DxbcOpMin(DxbcDest::R(is_zero_temp, multiplicands_different),
operands[0].Abs(), operands[1].Abs());
// min isn't required to flush denormals, eq is.
DxbcOpEq(DxbcDest::R(is_zero_temp, absolute_different),
DxbcOpEq(DxbcDest::R(is_zero_temp, multiplicands_different),
DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f));
DxbcOpMovC(DxbcDest::R(system_temp_result_, absolute_different),
DxbcSrc::R(is_zero_temp),
is_mad ? operands[2] : DxbcSrc::LF(0.0f),
// Not replacing true `0 + term` with movc of the term because +0 + -0
// should result in +0, not -0.
DxbcOpMovC(DxbcDest::R(system_temp_result_, multiplicands_different),
DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f),
DxbcSrc::R(system_temp_result_));
// Release is_zero_temp.
PopSystemTemp();
}
if (instr.vector_opcode == AluVectorOpcode::kMad) {
DxbcOpAdd(per_component_dest, DxbcSrc::R(system_temp_result_),
operands[2]);
}
} break;
case AluVectorOpcode::kMax:
@ -179,69 +181,40 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
component_count = 4;
}
result_swizzle = DxbcSrc::kXXXX;
uint32_t absolute_different =
uint32_t((1 << component_count) - 1) &
~instr.vector_operands[0].GetAbsoluteIdenticalComponents(
instr.vector_operands[1]);
if (absolute_different) {
// Shader Model 3: 0 or denormal * anything = 0.
// FIXME(Triang3l): Signed zero needs research and handling.
// Add component products only if non-zero. For dp4, 16 scalar
// operations in the worst case (as opposed to always 20 for
// eq/movc/eq/movc/dp4 or min/eq/movc/movc/dp4 for preparing operands
// for dp4).
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0001),
operands[0].SelectFromSwizzled(0),
operands[1].SelectFromSwizzled(0));
if (absolute_different & 0b0001) {
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0010),
operands[0].SelectFromSwizzled(0).Abs(),
operands[1].SelectFromSwizzled(0).Abs());
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0010),
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY),
uint32_t different = uint32_t((1 << component_count) - 1) &
~instr.vector_operands[0].GetIdenticalComponents(
instr.vector_operands[1]);
for (uint32_t i = 0; i < component_count; ++i) {
DxbcOpMul(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
operands[0].SelectFromSwizzled(i),
operands[1].SelectFromSwizzled(i));
if ((different & (1 << i)) != 0) {
// Shader Model 3: +-0 or denormal * anything = +0 (also not replacing
// true `0 + term` with movc of the term because +0 + -0 should result
// in +0, not -0).
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
operands[0].SelectFromSwizzled(i).Abs(),
operands[1].SelectFromSwizzled(i).Abs());
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100),
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
DxbcSrc::LF(0.0f));
DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001),
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY),
DxbcSrc::LF(0.0f),
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
}
for (uint32_t i = 1; i < component_count; ++i) {
bool component_different = (absolute_different & (1 << i)) != 0;
DxbcOpMAd(DxbcDest::R(system_temp_result_,
component_different ? 0b0010 : 0b0001),
operands[0].SelectFromSwizzled(i),
operands[1].SelectFromSwizzled(i),
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
if (component_different) {
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
operands[0].SelectFromSwizzled(i).Abs(),
operands[1].SelectFromSwizzled(i).Abs());
DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100),
DxbcOpMovC(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001),
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
DxbcSrc::LF(0.0f));
DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001),
DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ),
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY));
}
DxbcSrc::LF(0.0f),
DxbcSrc::R(system_temp_result_,
i ? DxbcSrc::kYYYY : DxbcSrc::kXXXX));
}
} else {
if (component_count == 2) {
DxbcOpDP2(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
operands[1]);
} else if (component_count == 3) {
DxbcOpDP3(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
operands[1]);
} else {
assert_true(component_count == 4);
DxbcOpDP4(DxbcDest::R(system_temp_result_, 0b0001), operands[0],
operands[1]);
if (i) {
// Not using DXBC dp# to avoid fused multiply-add, PC GPUs are scalar
// as of 2020 anyway, and not using mad for the same reason (mul
// followed by add may be optimized into non-fused mad by the driver
// in the identical operands case also).
DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001),
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY));
}
}
if (component_count == 2) {
// Add the third operand. Since floating-point addition isn't
// associative, even though adding this in multiply-add for the first
// component would be faster, it's safer to add here, in the end.
DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001),
DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX),
operands[2].SelectFromSwizzled(0));
@ -592,14 +565,13 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
DxbcOpMov(DxbcDest::R(system_temp_result_, 0b0001), DxbcSrc::LF(1.0f));
}
if (used_result_components & 0b0010) {
// Shader Model 3: 0 or denormal * anything = 0.
// FIXME(Triang3l): Signed zero needs research and handling.
DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0010),
operands[0].SelectFromSwizzled(1),
operands[1].SelectFromSwizzled(1));
if (!(instr.vector_operands[0].GetAbsoluteIdenticalComponents(
if (!(instr.vector_operands[0].GetIdenticalComponents(
instr.vector_operands[1]) &
0b0010)) {
// Shader Model 3: +-0 or denormal * anything = +0.
DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100),
operands[0].SelectFromSwizzled(1).Abs(),
operands[1].SelectFromSwizzled(1).Abs());
@ -700,8 +672,7 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
DxbcOpMul(ps_dest, operand_0_a, operand_0_b);
if (instr.scalar_operands[0].components[0] !=
instr.scalar_operands[0].components[1]) {
// Shader Model 3: 0 or denormal * anything = 0.
// FIXME(Triang3l): Signed zero needs research and handling.
// Shader Model 3: +-0 or denormal * anything = +0.
uint32_t is_zero_temp = PushSystemTemp();
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
operand_0_b.Abs());
@ -714,58 +685,50 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
PopSystemTemp();
}
break;
case AluScalarOpcode::kMulsPrev: {
// Shader Model 3: 0 or denormal * anything = 0.
// FIXME(Triang3l): Signed zero needs research and handling.
uint32_t is_zero_temp = PushSystemTemp();
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
ps_src.Abs());
// min isn't required to flush denormals, eq is.
DxbcOpEq(DxbcDest::R(is_zero_temp, 0b0001),
DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f));
DxbcOpMul(ps_dest, operand_0_a, ps_src);
DxbcOpMovC(ps_dest, DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX),
DxbcSrc::LF(0.0f), ps_src);
// Release is_zero_temp.
PopSystemTemp();
} break;
case AluScalarOpcode::kMulsPrev:
case AluScalarOpcode::kMulsPrev2: {
uint32_t test_temp = PushSystemTemp();
// Check if need to select the src0.a * ps case.
// ps != -FLT_MAX.
DxbcOpNE(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(-FLT_MAX));
// isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since -FLT_MAX
// is already loaded to an SGPR, this is also false if it's NaN.
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -ps_src.Abs(),
DxbcSrc::LF(-FLT_MAX));
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
// isfinite(src0.b).
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -operand_0_b.Abs(),
DxbcSrc::LF(-FLT_MAX));
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
// src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked
// for NaN).
DxbcOpLT(DxbcDest::R(test_temp, 0b0010), DxbcSrc::LF(0.0f), operand_0_b);
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX));
// Shader Model 3: 0 or denormal * anything = 0.
// ps is already known to be not NaN or Infinity, so multiplying it by 0
// will result in 0. However, src0.a can be anything, so the result should
// be zero if ps is zero.
// FIXME(Triang3l): Signed zero needs research and handling.
DxbcOpEq(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(0.0f));
if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) {
// Check if need to select the src0.a * ps case.
// ps != -FLT_MAX.
DxbcOpNE(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(-FLT_MAX));
// isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since
// -FLT_MAX is already loaded to an SGPR, this is also false if it's
// NaN.
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -ps_src.Abs(),
DxbcSrc::LF(-FLT_MAX));
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
// isfinite(src0.b).
DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -operand_0_b.Abs(),
DxbcSrc::LF(-FLT_MAX));
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
// src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked
// for NaN).
DxbcOpLT(DxbcDest::R(test_temp, 0b0010), DxbcSrc::LF(0.0f),
operand_0_b);
DxbcOpAnd(DxbcDest::R(test_temp, 0b0001),
DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
DxbcSrc::R(test_temp, DxbcSrc::kYYYY));
DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX));
}
// Shader Model 3: +-0 or denormal * anything = +0.
DxbcOpMin(DxbcDest::R(test_temp, 0b0001), operand_0_a.Abs(),
ps_src.Abs());
// min isn't required to flush denormals, eq is.
DxbcOpEq(DxbcDest::R(test_temp, 0b0001),
DxbcSrc::R(test_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f));
DxbcOpMul(ps_dest, operand_0_a, ps_src);
DxbcOpMovC(ps_dest, DxbcSrc::R(test_temp, DxbcSrc::kXXXX),
DxbcSrc::LF(0.0f), ps_src);
DxbcOpElse();
DxbcOpMov(ps_dest, DxbcSrc::LF(-FLT_MAX));
DxbcOpEndIf();
if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) {
DxbcOpElse();
DxbcOpMov(ps_dest, DxbcSrc::LF(-FLT_MAX));
DxbcOpEndIf();
}
// Release test_temp.
PopSystemTemp();
} break;
@ -1023,11 +986,10 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
case AluScalarOpcode::kMulsc0:
case AluScalarOpcode::kMulsc1:
DxbcOpMul(ps_dest, operand_0_a, operand_1);
if (!(instr.scalar_operands[0].GetAbsoluteIdenticalComponents(
if (!(instr.scalar_operands[0].GetIdenticalComponents(
instr.scalar_operands[1]) &
0b0001)) {
// Shader Model 3: 0 or denormal * anything = 0.
// FIXME(Triang3l): Signed zero needs research and handling.
// Shader Model 3: +-0 or denormal * anything = +0.
uint32_t is_zero_temp = PushSystemTemp();
DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(),
operand_1.Abs());

View File

@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0),
DxbcSrc::LU(~uint32_t(3)));
}
// Add the word offset from the instruction, plus the offset of the first
// needed word within the element.
// Add the word offset from the instruction (signed), plus the offset of the
// first needed word within the element.
uint32_t first_word_index;
xe::bit_scan_forward(needed_words, &first_word_index);
int32_t first_word_buffer_offset =
@ -1730,10 +1730,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
}
uint32_t texture_binding_index_unsigned =
FindOrAddTextureBinding(tfetch_index, srv_dimension, false);
const TextureBinding& texture_binding_unsigned =
texture_bindings_[texture_binding_index_unsigned];
uint32_t texture_binding_index_signed =
FindOrAddTextureBinding(tfetch_index, srv_dimension, true);
const TextureBinding& texture_binding_unsigned =
texture_bindings_[texture_binding_index_unsigned];
const TextureBinding& texture_binding_signed =
texture_bindings_[texture_binding_index_signed];
DxbcSrc srv_unsigned(DxbcSrc::LF(0.0f)), srv_signed(DxbcSrc::LF(0.0f));

View File

@ -135,7 +135,7 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor,
}));
// As we run vblank interrupts the debugger must be able to suspend us.
vsync_worker_thread_->set_can_debugger_suspend(true);
vsync_worker_thread_->set_name("GraphicsSystem Vsync");
vsync_worker_thread_->set_name("GPU VSync");
vsync_worker_thread_->Create();
if (cvars::trace_gpu_stream) {

View File

@ -65,17 +65,17 @@ enum class InstructionStorageTarget {
// disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both
// skipped components and zeros, which cannot be encoded, and therefore it will
// not).
constexpr uint32_t GetInstructionStorageTargetUsedComponents(
constexpr uint32_t GetInstructionStorageTargetUsedComponentCount(
InstructionStorageTarget target) {
switch (target) {
case InstructionStorageTarget::kNone:
return 0b0000;
return 0;
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
return 0b0111;
return 3;
case InstructionStorageTarget::kDepth:
return 0b0001;
return 1;
default:
return 0b1111;
return 4;
}
}
@ -136,8 +136,9 @@ struct InstructionResult {
// Returns the write mask containing only components actually present in the
// target.
uint32_t GetUsedWriteMask() const {
return original_write_mask &
GetInstructionStorageTargetUsedComponents(storage_target);
uint32_t target_component_count =
GetInstructionStorageTargetUsedComponentCount(storage_target);
return original_write_mask & ((1 << target_component_count) - 1);
}
// True if the components are in their 'standard' swizzle arrangement (xyzw).
bool IsStandardSwizzle() const {
@ -161,6 +162,28 @@ struct InstructionResult {
}
return used_components;
}
// Returns which components of the used write mask are constant, and what
// values they have.
uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const {
uint32_t constant_components = 0;
uint32_t constant_values = 0;
uint32_t used_write_mask = GetUsedWriteMask();
for (uint32_t i = 0; i < 4; ++i) {
if (!(used_write_mask & (1 << i))) {
continue;
}
SwizzleSource component = components[i];
if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) {
continue;
}
constant_components |= 1 << i;
if (component == SwizzleSource::k1) {
constant_values |= 1 << i;
}
}
constant_values_out = constant_values;
return constant_components;
}
};
enum class InstructionStorageSource {
@ -212,14 +235,18 @@ struct InstructionOperand {
return false;
}
// Returns which components of two operands are identical, but may have
// different signs (for simplicity of usage with GetComponent, treating the
// rightmost component as replicated).
uint32_t GetAbsoluteIdenticalComponents(
const InstructionOperand& other) const {
// Returns which components of two operands will always be bitwise equal
// (disregarding component_count for simplicity of usage with GetComponent,
// treating the rightmost component as replicated). This, strictly with all
// conditions, must be used when emulating Shader Model 3 +-0 * x = +0
// multiplication behavior with IEEE-compliant multiplication (because
// -0 * |-0|, or -0 * +0, is -0, while the result must be +0).
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
if (storage_source != other.storage_source ||
storage_index != other.storage_index ||
storage_addressing_mode != other.storage_addressing_mode) {
storage_addressing_mode != other.storage_addressing_mode ||
is_negated != other.is_negated ||
is_absolute_value != other.is_absolute_value) {
return 0;
}
uint32_t identical_components = 0;
@ -229,16 +256,6 @@ struct InstructionOperand {
}
return identical_components;
}
// Returns which components of two operands will always be bitwise equal, but
// may have different signs (disregarding component_count for simplicity of
// usage with GetComponent, treating the rightmost component as replicated).
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
if (is_negated != other.is_negated ||
is_absolute_value != other.is_absolute_value) {
return 0;
}
return GetAbsoluteIdenticalComponents(other);
}
};
struct ParsedExecInstruction {

View File

@ -25,6 +25,9 @@ namespace gpu {
// system page size granularity.
class SharedMemory {
public:
static constexpr uint32_t kBufferSizeLog2 = 29;
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
virtual ~SharedMemory();
// Call in the implementation-specific ClearCache.
virtual void ClearCache();
@ -98,9 +101,6 @@ class SharedMemory {
// destructor.
void ShutdownCommon();
static constexpr uint32_t kBufferSizeLog2 = 29;
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
// Sparse allocations are 4 MB, so not too many of them are allocated, but
// also not to waste too much memory for padding (with 16 MB there's too
// much).

View File

@ -800,13 +800,26 @@ static_assert_size(TextureFetchInstruction, 12);
// Both are valid only within the current ALU clause. They are not modified
// when the instruction that would write them fails its predication check.
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
// multiplication (0 or denormal * anything = 0) wherever it's present (mul,
// mad, dp, etc.) and for NaN in min/max. It's very important to respect this
// rule for multiplication, as games often rely on it in vector normalization
// (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of things in
// games - causes white screen in Halo 3, white specular on characters in GTA
// IV.
// TODO(Triang3l): Investigate signed zero handling in multiplication.
// multiplication (+-0 or denormal * anything = +0) wherever it's present
// (mul, mad, dp, etc.) and for NaN in min/max. It's very important to respect
// this rule for multiplication, as games often rely on it in vector
// normalization (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of
// things in games - causes white screen in Halo 3, white specular on
// characters in GTA IV. The result is always positive zero in this case, no
// matter what the signs of the other operands are, according to R5xx
// Acceleration section 8.7.5 "Legacy multiply behavior" and testing on
// Adreno 200. This means that the following need to be taken into account
// (according to 8.7.2 "ALU Non-Transcendental Floating Point"):
// - +0 * -0 is -0 with IEEE conformance, however, with this legacy SM3
// handling, it should result in +0.
// - +0 + -0 is +0, so multiply-add should not be replaced with conditional
// move of the third operand in case of zero multiplicands, because the term
// may be -0, while the result should be +0 in this case.
// http://developer.amd.com/wordpress/media/2013/10/R5xx_Acceleration_v1.5.pdf
// Multiply-add also appears to be not fused (the SM3 behavior instruction on
// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32) - shader translators
// should not use instructions that may be interpreted by the host GPU as
// fused multiply-add.
enum class AluScalarOpcode : uint32_t {
// Floating-Point Add

View File

@ -30,17 +30,6 @@ project("xenia-gpu-vulkan-trace-viewer")
kind("WindowedApp")
language("C++")
links({
"aes_128",
"capstone",
"fmt",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"volk",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -57,6 +46,19 @@ project("xenia-gpu-vulkan-trace-viewer")
"xenia-ui-vulkan",
"xenia-vfs",
"xenia-patcher",
})
links({
"aes_128",
"capstone",
"fmt",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"volk",
"xxhash",
})
defines({
@ -98,17 +100,6 @@ project("xenia-gpu-vulkan-trace-dump")
kind("ConsoleApp")
language("C++")
links({
"aes_128",
"capstone",
"fmt",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"volk",
"xenia-apu",
"xenia-apu-nop",
"xenia-base",
@ -125,6 +116,19 @@ project("xenia-gpu-vulkan-trace-dump")
"xenia-ui-vulkan",
"xenia-vfs",
"xenia-patcher",
})
links({
"aes_128",
"capstone",
"fmt",
"glslang-spirv",
"imgui",
"libavcodec",
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"volk",
"xxhash",
})
defines({

View File

@ -41,11 +41,11 @@ project("xenia-hid-demo")
filter("platforms:Linux")
links({
"SDL2",
"vulkan",
"X11",
"xcb",
"X11-xcb",
"vulkan",
"SDL2",
})
filter("platforms:Windows")

View File

@ -359,7 +359,7 @@ void KernelState::SetExecutableModule(object_ref<UserModule> module) {
}
return 0;
}));
dispatch_thread_->set_name("Kernel Dispatch Thread");
dispatch_thread_->set_name("Kernel Dispatch");
dispatch_thread_->Create();
}
}

View File

@ -8,6 +8,7 @@
*/
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/util/shim_utils.h"
#include "xenia/kernel/xam/xam_private.h"
@ -235,7 +236,8 @@ dword_result_t XamContentCreateDeviceEnumerator(dword_t content_type,
xe::store_and_swap(&dev->device_type, dummy_device_info_.device_type);
xe::store_and_swap(&dev->total_bytes, dummy_device_info_.total_bytes);
xe::store_and_swap(&dev->free_bytes, dummy_device_info_.free_bytes);
xe::copy_and_swap(dev->name, dummy_device_info_.name, 28);
xe::copy_and_swap(dev->name, dummy_device_info_.name,
xe::countof(dev->name));
}
*handle_out = e->handle();

View File

@ -9,6 +9,7 @@
#include "xenia/base/logging.h"
#include "xenia/base/cvar.h"
#include "xenia/base/string_util.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/user_module.h"
#include "xenia/kernel/util/shim_utils.h"
@ -77,15 +78,15 @@ static SYSTEMTIME xeGetLocalSystemTime(uint64_t filetime) {
void XamFormatDateString(dword_t unk, qword_t filetime, lpvoid_t output_buffer,
dword_t output_count) {
std::memset(output_buffer, 0, output_count * 2);
std::memset(output_buffer, 0, output_count * sizeof(char16_t));
// TODO: implement this for other platforms
#if XE_PLATFORM_WIN32
auto st = xeGetLocalSystemTime(filetime);
// TODO: format this depending on users locale?
auto str = fmt::format(u"{:02d}/{:02d}/{}", st.wMonth, st.wDay, st.wYear);
auto copy_length = std::min(size_t(output_count), str.size()) * 2;
xe::copy_and_swap(output_buffer.as<char16_t*>(), str.c_str(), copy_length);
xe::string_util::copy_and_swap_truncating(output_buffer.as<char16_t*>(), str,
output_count);
#else
assert_always();
#endif
@ -94,15 +95,15 @@ DECLARE_XAM_EXPORT1(XamFormatDateString, kNone, kImplemented);
void XamFormatTimeString(dword_t unk, qword_t filetime, lpvoid_t output_buffer,
dword_t output_count) {
std::memset(output_buffer, 0, output_count * 2);
std::memset(output_buffer, 0, output_count * sizeof(char16_t));
// TODO: implement this for other platforms
#if XE_PLATFORM_WIN32
auto st = xeGetLocalSystemTime(filetime);
// TODO: format this depending on users locale?
auto str = fmt::format(u"{:02d}:{:02d}", st.wHour, st.wMinute);
auto copy_count = std::min(size_t(output_count), str.size());
xe::copy_and_swap(output_buffer.as<char16_t*>(), str.c_str(), copy_count);
xe::string_util::copy_and_swap_truncating(output_buffer.as<char16_t*>(), str,
output_count);
#else
assert_always();
#endif
@ -124,9 +125,8 @@ dword_result_t keXamBuildResourceLocator(uint64_t module,
path = fmt::format(u"section://{:X},{}#{}", (uint32_t)module, container,
resource);
}
auto copy_count = std::min(size_t(buffer_count), path.size());
xe::copy_and_swap(buffer_ptr.as<char16_t*>(), path.c_str(), copy_count);
(buffer_ptr.as<char16_t*>())[copy_count] = 0;
xe::string_util::copy_and_swap_truncating(buffer_ptr.as<char16_t*>(), path,
buffer_count);
return 0;
}

View File

@ -984,8 +984,7 @@ dword_result_t NetDll___WSAFDIsSet(dword_t socket_handle,
DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented);
void RegisterNetExports(xe::cpu::ExportResolver* export_resolver,
KernelState* kernel_state) {
}
KernelState* kernel_state) {}
} // namespace xam
} // namespace kernel

View File

@ -9,6 +9,7 @@
#include "third_party/imgui/imgui.h"
#include "xenia/base/logging.h"
#include "xenia/base/string_util.h"
#include "xenia/emulator.h"
#include "xenia/kernel/kernel_flags.h"
#include "xenia/kernel/kernel_state.h"
@ -188,8 +189,8 @@ class KeyboardInputDialog : public xe::ui::ImGuiDialog {
*out_text_ = default_text;
}
text_buffer_.resize(max_length);
std::strncpy(text_buffer_.data(), default_text_.c_str(),
std::min(text_buffer_.size() - 1, default_text_.size()));
xe::string_util::copy_truncating(text_buffer_.data(), default_text_,
text_buffer_.size());
}
void OnDraw(ImGuiIO& io) override {

View File

@ -10,6 +10,8 @@
#include <cstring>
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/string_util.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/util/shim_utils.h"
#include "xenia/kernel/xam/xam_private.h"
@ -168,7 +170,8 @@ X_HRESULT_result_t XamUserGetSigninInfo(dword_t user_index, dword_t flags,
const auto& user_profile = kernel_state()->user_profile();
info->xuid = user_profile->xuid();
info->signin_state = user_profile->signin_state();
std::strncpy(info->name, user_profile->name().data(), 15);
xe::string_util::copy_truncating(info->name, user_profile->name(),
xe::countof(info->name));
return X_E_SUCCESS;
}
DECLARE_XAM_EXPORT1(XamUserGetSigninInfo, kUserProfiles, kImplemented);
@ -187,10 +190,8 @@ dword_result_t XamUserGetName(dword_t user_index, lpstring_t buffer,
const auto& user_name = user_profile->name();
// Real XAM will only copy a maximum of 15 characters out.
size_t copy_length = std::min(
{size_t(15), user_name.size(), static_cast<size_t>(buffer_len) - 1});
std::memcpy(buffer, user_name.data(), copy_length);
buffer[copy_length] = '\0';
xe::string_util::copy_truncating(buffer, user_name,
std::min(buffer_len.value(), uint32_t(15)));
return X_ERROR_SUCCESS;
}
DECLARE_XAM_EXPORT1(XamUserGetName, kUserProfiles, kImplemented);

View File

@ -226,19 +226,21 @@ DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented);
dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity,
lpdword_t previous_affinity_ptr) {
uint32_t previous_affinity = 0;
// The Xbox 360, according to disassembly of KeSetAffinityThread, unlike
// Windows NT, stores the previous affinity via the pointer provided as an
// argument, not in the return value - the return value is used for the
// result.
if (!affinity) {
return X_STATUS_INVALID_PARAMETER;
}
auto thread = XObject::GetNativeObject<XThread>(kernel_state(), thread_ptr);
if (thread) {
previous_affinity = thread->affinity();
if (previous_affinity_ptr) {
*previous_affinity_ptr = uint32_t(1) << thread->active_cpu();
}
thread->SetAffinity(affinity);
}
if (previous_affinity_ptr) {
*previous_affinity_ptr = previous_affinity;
}
return (uint32_t)affinity;
return X_STATUS_SUCCESS;
}
DECLARE_XBOXKRNL_EXPORT1(KeSetAffinityThread, kThreading, kImplemented);

View File

@ -157,11 +157,17 @@ void XThread::set_name(const std::string_view name) {
}
}
uint8_t next_cpu = 0;
uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
static uint8_t next_cpu = 0;
static uint8_t GetFakeCpuNumber(uint8_t proc_mask) {
// NOTE: proc_mask is logical processors, not physical processors or cores.
if (!proc_mask) {
next_cpu = (next_cpu + 1) % 6;
return next_cpu; // is this reasonable?
// TODO(Triang3l): Does the following apply here?
// https://docs.microsoft.com/en-us/windows/win32/dxtecharts/coding-for-multiple-cores
// "On Xbox 360, you must explicitly assign software threads to a particular
// hardware thread by using XSetThreadProcessor. Otherwise, all child
// threads will stay on the same hardware thread as the parent."
}
assert_false(proc_mask & 0xC0);
@ -206,6 +212,7 @@ void XThread::InitializeGuestObject() {
// 0xA88 = APC
// 0x18 = timer
xe::store_and_swap<uint32_t>(p + 0x09C, 0xFDFFD7FF);
// current_cpu is expected to be initialized externally via SetActiveCpu.
xe::store_and_swap<uint32_t>(p + 0x0D0, stack_base_);
xe::store_and_swap<uint64_t>(p + 0x130, Clock::QueryGuestSystemTime());
xe::store_and_swap<uint32_t>(p + 0x144, guest_object() + 0x144);
@ -347,6 +354,12 @@ X_STATUS XThread::Create() {
// Exports use this to get the kernel.
thread_state_->context()->kernel_state = kernel_state_;
uint8_t cpu_index = GetFakeCpuNumber(
static_cast<uint8_t>(creation_params_.creation_flags >> 24));
// Initialize the KTHREAD object.
InitializeGuestObject();
X_KPCR* pcr = memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
pcr->tls_ptr = tls_static_address_;
@ -356,14 +369,11 @@ X_STATUS XThread::Create() {
pcr->stack_base_ptr = stack_base_;
pcr->stack_end_ptr = stack_limit_;
uint8_t proc_mask =
static_cast<uint8_t>(creation_params_.creation_flags >> 24);
pcr->dpc_active = 0; // DPC active bool?
pcr->current_cpu = GetFakeCpuNumber(proc_mask); // Current CPU(?)
pcr->dpc_active = 0; // DPC active bool?
// Initialize the KTHREAD object.
InitializeGuestObject();
// Assign the thread to the logical processor, and also set up the current CPU
// in KPCR and KTHREAD.
SetActiveCpu(cpu_index);
// Always retain when starting - the thread owns itself until exited.
RetainHandle();
@ -416,10 +426,6 @@ X_STATUS XThread::Create() {
return X_STATUS_NO_MEMORY;
}
if (!cvars::ignore_thread_affinities) {
thread_->set_affinity_mask(proc_mask);
}
// Set the thread name based on host ID (for easier debugging).
if (thread_name_.empty()) {
set_name(fmt::format("XThread{:04X}", thread_->system_id()));
@ -712,37 +718,36 @@ void XThread::SetPriority(int32_t increment) {
}
void XThread::SetAffinity(uint32_t affinity) {
// Affinity mask, as in SetThreadAffinityMask.
// Xbox thread IDs:
// 0 - core 0, thread 0 - user
// 1 - core 0, thread 1 - user
// 2 - core 1, thread 0 - sometimes xcontent
// 3 - core 1, thread 1 - user
// 4 - core 2, thread 0 - xaudio
// 5 - core 2, thread 1 - user
// TODO(benvanik): implement better thread distribution.
// NOTE: these are logical processors, not physical processors or cores.
SetActiveCpu(GetFakeCpuNumber(affinity));
}
uint8_t XThread::active_cpu() const {
const X_KPCR& pcr = *memory()->TranslateVirtual<const X_KPCR*>(pcr_address_);
return pcr.current_cpu;
}
void XThread::SetActiveCpu(uint8_t cpu_index) {
// May be called during thread creation - don't skip if current == new.
assert_true(cpu_index < 6);
X_KPCR& pcr = *memory()->TranslateVirtual<X_KPCR*>(pcr_address_);
pcr.current_cpu = cpu_index;
if (is_guest_thread()) {
X_KTHREAD& thread_object =
*memory()->TranslateVirtual<X_KTHREAD*>(guest_object());
thread_object.current_cpu = cpu_index;
}
if (xe::threading::logical_processor_count() < 6) {
XELOGW("Too few processors - scheduling will be wonky");
}
SetActiveCpu(GetFakeCpuNumber(affinity));
affinity_ = affinity;
if (!cvars::ignore_thread_affinities) {
thread_->set_affinity_mask(affinity);
thread_->set_affinity_mask(uint64_t(1) << cpu_index);
}
}
uint32_t XThread::active_cpu() const {
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
return xe::load_and_swap<uint8_t>(pcr + 0x10C);
}
void XThread::SetActiveCpu(uint32_t cpu_index) {
assert_true(cpu_index < 6);
uint8_t* pcr = memory()->TranslateVirtual(pcr_address_);
xe::store_and_swap<uint8_t>(pcr + 0x10C, cpu_index);
}
bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) {
if (slot * 4 > tls_total_size_) {
return false;

View File

@ -88,7 +88,8 @@ struct X_KTHREAD {
char unk_10[0xAC]; // 0x10
uint8_t suspend_count; // 0xBC
uint8_t unk_BD; // 0xBD
uint16_t unk_BE; // 0xBE
uint8_t unk_BE; // 0xBE
uint8_t current_cpu; // 0xBF
char unk_C0[0x70]; // 0xC0
xe::be<uint64_t> create_time; // 0x130
xe::be<uint64_t> exit_time; // 0x138
@ -171,10 +172,17 @@ class XThread : public XObject, public cpu::Thread {
int32_t priority() const { return priority_; }
int32_t QueryPriority();
void SetPriority(int32_t increment);
uint32_t affinity() const { return affinity_; }
// Xbox thread IDs:
// 0 - core 0, thread 0 - user
// 1 - core 0, thread 1 - user
// 2 - core 1, thread 0 - sometimes xcontent
// 3 - core 1, thread 1 - user
// 4 - core 2, thread 0 - xaudio
// 5 - core 2, thread 1 - user
void SetAffinity(uint32_t affinity);
uint32_t active_cpu() const;
void SetActiveCpu(uint32_t cpu_index);
uint8_t active_cpu() const;
void SetActiveCpu(uint8_t cpu_index);
bool GetTLSValue(uint32_t slot, uint32_t* value_out);
bool SetTLSValue(uint32_t slot, uint32_t value);
@ -226,7 +234,6 @@ class XThread : public XObject, public cpu::Thread {
bool running_ = false;
int32_t priority_ = 0;
uint32_t affinity_ = 0;
xe::global_critical_region global_critical_region_;
std::atomic<uint32_t> irql_ = {0};

View File

@ -118,15 +118,15 @@ bool D3D12ImmediateDrawer::Initialize() {
return false;
}
// Create the pipeline states.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_desc = {};
pipeline_state_desc.pRootSignature = root_signature_;
pipeline_state_desc.VS.pShaderBytecode = immediate_vs;
pipeline_state_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_state_desc.PS.pShaderBytecode = immediate_ps;
pipeline_state_desc.PS.BytecodeLength = sizeof(immediate_ps);
// Create the pipelines.
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {};
pipeline_desc.pRootSignature = root_signature_;
pipeline_desc.VS.pShaderBytecode = immediate_vs;
pipeline_desc.VS.BytecodeLength = sizeof(immediate_vs);
pipeline_desc.PS.pShaderBytecode = immediate_ps;
pipeline_desc.PS.BytecodeLength = sizeof(immediate_ps);
D3D12_RENDER_TARGET_BLEND_DESC& pipeline_blend_desc =
pipeline_state_desc.BlendState.RenderTarget[0];
pipeline_desc.BlendState.RenderTarget[0];
pipeline_blend_desc.BlendEnable = TRUE;
pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA;
pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
@ -138,11 +138,11 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED |
D3D12_COLOR_WRITE_ENABLE_GREEN |
D3D12_COLOR_WRITE_ENABLE_BLUE;
pipeline_state_desc.SampleMask = UINT_MAX;
pipeline_state_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_state_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_state_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_state_desc.RasterizerState.DepthClipEnable = TRUE;
pipeline_desc.SampleMask = UINT_MAX;
pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
pipeline_desc.RasterizerState.FrontCounterClockwise = FALSE;
pipeline_desc.RasterizerState.DepthClipEnable = TRUE;
D3D12_INPUT_ELEMENT_DESC pipeline_input_elements[3] = {};
pipeline_input_elements[0].SemanticName = "POSITION";
pipeline_input_elements[0].Format = DXGI_FORMAT_R32G32_FLOAT;
@ -154,26 +154,24 @@ bool D3D12ImmediateDrawer::Initialize() {
pipeline_input_elements[2].Format = DXGI_FORMAT_R8G8B8A8_UNORM;
pipeline_input_elements[2].AlignedByteOffset =
offsetof(ImmediateVertex, color);
pipeline_state_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_state_desc.InputLayout.NumElements =
pipeline_desc.InputLayout.pInputElementDescs = pipeline_input_elements;
pipeline_desc.InputLayout.NumElements =
UINT(xe::countof(pipeline_input_elements));
pipeline_state_desc.PrimitiveTopologyType =
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pipeline_state_desc.NumRenderTargets = 1;
pipeline_state_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_state_desc.SampleDesc.Count = 1;
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pipeline_desc.NumRenderTargets = 1;
pipeline_desc.RTVFormats[0] = D3D12Context::kSwapChainFormat;
pipeline_desc.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_triangle_)))) {
&pipeline_desc, IID_PPV_ARGS(&pipeline_triangle_)))) {
XELOGE(
"Failed to create the Direct3D 12 immediate drawer triangle pipeline "
"state");
Shutdown();
return false;
}
pipeline_state_desc.PrimitiveTopologyType =
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
pipeline_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
if (FAILED(device->CreateGraphicsPipelineState(
&pipeline_state_desc, IID_PPV_ARGS(&pipeline_state_line_)))) {
&pipeline_desc, IID_PPV_ARGS(&pipeline_line_)))) {
XELOGE(
"Failed to create the Direct3D 12 immediate drawer line pipeline "
"state");
@ -267,8 +265,8 @@ void D3D12ImmediateDrawer::Shutdown() {
util::ReleaseAndNull(sampler_heap_);
util::ReleaseAndNull(pipeline_state_line_);
util::ReleaseAndNull(pipeline_state_triangle_);
util::ReleaseAndNull(pipeline_line_);
util::ReleaseAndNull(pipeline_triangle_);
util::ReleaseAndNull(root_signature_);
}
@ -611,17 +609,17 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
uint32_t(sampler_index)));
}
// Set the primitive type and the pipeline state for it.
// Set the primitive type and the pipeline for it.
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
ID3D12PipelineState* pipeline_state;
ID3D12PipelineState* pipeline;
switch (draw.primitive_type) {
case ImmediatePrimitiveType::kLines:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
pipeline_state = pipeline_state_line_;
pipeline = pipeline_line_;
break;
case ImmediatePrimitiveType::kTriangles:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
pipeline_state = pipeline_state_triangle_;
pipeline = pipeline_triangle_;
break;
default:
assert_unhandled_case(draw.primitive_type);
@ -630,7 +628,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) {
if (current_primitive_topology_ != primitive_topology) {
current_primitive_topology_ = primitive_topology;
current_command_list_->IASetPrimitiveTopology(primitive_topology);
current_command_list_->SetPipelineState(pipeline_state);
current_command_list_->SetPipelineState(pipeline);
}
// Draw.

View File

@ -105,8 +105,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer {
kCount
};
ID3D12PipelineState* pipeline_state_triangle_ = nullptr;
ID3D12PipelineState* pipeline_state_line_ = nullptr;
ID3D12PipelineState* pipeline_triangle_ = nullptr;
ID3D12PipelineState* pipeline_line_ = nullptr;
ID3D12DescriptorHeap* sampler_heap_ = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_;

View File

@ -46,22 +46,22 @@ class D3D12Provider : public GraphicsProvider {
uint32_t GetRTVDescriptorSize() const { return descriptor_size_rtv_; }
uint32_t GetDSVDescriptorSize() const { return descriptor_size_dsv_; }
template <typename T>
inline T OffsetViewDescriptor(T start, uint32_t index) const {
T OffsetViewDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_view_;
return start;
}
template <typename T>
inline T OffsetSamplerDescriptor(T start, uint32_t index) const {
T OffsetSamplerDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_sampler_;
return start;
}
template <typename T>
inline T OffsetRTVDescriptor(T start, uint32_t index) const {
T OffsetRTVDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_rtv_;
return start;
}
template <typename T>
inline T OffsetDSVDescriptor(T start, uint32_t index) const {
T OffsetDSVDescriptor(T start, uint32_t index) const {
start.ptr += index * descriptor_size_dsv_;
return start;
}
@ -91,32 +91,30 @@ class D3D12Provider : public GraphicsProvider {
}
// Proxies for Direct3D 12 functions since they are loaded dynamically.
inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
D3D_ROOT_SIGNATURE_VERSION version,
ID3DBlob** blob_out,
ID3DBlob** error_blob_out) const {
HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
D3D_ROOT_SIGNATURE_VERSION version,
ID3DBlob** blob_out,
ID3DBlob** error_blob_out) const {
return pfn_d3d12_serialize_root_signature_(desc, version, blob_out,
error_blob_out);
}
inline HRESULT Disassemble(const void* src_data, size_t src_data_size,
UINT flags, const char* comments,
ID3DBlob** disassembly_out) const {
HRESULT Disassemble(const void* src_data, size_t src_data_size, UINT flags,
const char* comments, ID3DBlob** disassembly_out) const {
if (!pfn_d3d_disassemble_) {
return E_NOINTERFACE;
}
return pfn_d3d_disassemble_(src_data, src_data_size, flags, comments,
disassembly_out);
}
inline HRESULT DxbcConverterCreateInstance(const CLSID& rclsid,
const IID& riid,
void** ppv) const {
HRESULT DxbcConverterCreateInstance(const CLSID& rclsid, const IID& riid,
void** ppv) const {
if (!pfn_dxilconv_dxc_create_instance_) {
return E_NOINTERFACE;
}
return pfn_dxilconv_dxc_create_instance_(rclsid, riid, ppv);
}
inline HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid,
void** ppv) const {
HRESULT DxcCreateInstance(const CLSID& rclsid, const IID& riid,
void** ppv) const {
if (!pfn_dxcompiler_dxc_create_instance_) {
return E_NOINTERFACE;
}

View File

@ -47,7 +47,7 @@ ID3D12RootSignature* CreateRootSignature(
return root_signature;
}
ID3D12PipelineState* CreateComputePipelineState(
ID3D12PipelineState* CreateComputePipeline(
ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12RootSignature* root_signature) {
D3D12_COMPUTE_PIPELINE_STATE_DESC desc;

View File

@ -27,7 +27,7 @@ extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload;
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;
template <typename T>
inline bool ReleaseAndNull(T& object) {
bool ReleaseAndNull(T& object) {
if (object != nullptr) {
object->Release();
object = nullptr;
@ -39,9 +39,10 @@ inline bool ReleaseAndNull(T& object) {
ID3D12RootSignature* CreateRootSignature(const D3D12Provider& provider,
const D3D12_ROOT_SIGNATURE_DESC& desc);
ID3D12PipelineState* CreateComputePipelineState(
ID3D12Device* device, const void* shader, size_t shader_size,
ID3D12RootSignature* root_signature);
ID3D12PipelineState* CreateComputePipeline(ID3D12Device* device,
const void* shader,
size_t shader_size,
ID3D12RootSignature* root_signature);
constexpr DXGI_FORMAT GetUintPow2DXGIFormat(uint32_t element_size_bytes_log2) {
switch (element_size_bytes_log2) {

View File

@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() {
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
uint64_t submission_index, size_t size, size_t alignment,
size_t& offset_out) {
assert_not_zero(alignment);
alignment = std::max(alignment, size_t(1));
assert_true(xe::is_pow2(alignment));
size = xe::align(size, alignment);
assert_true(size <= page_size_);
@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request(
GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial(
uint64_t submission_index, size_t size, size_t alignment,
size_t& offset_out, size_t& size_out) {
assert_not_zero(alignment);
alignment = std::max(alignment, size_t(1));
assert_true(xe::is_pow2(alignment));
size = xe::align(size, alignment);
size = std::min(size, page_size_);

View File

@ -18,7 +18,7 @@ project("SDL2")
"SDL2/include",
})
buildoptions({
"/wd4828", -- illegal characters in file
"/wd4828", -- illegal characters in file https://bugzilla.libsdl.org/show_bug.cgi?id=5333
})
files({
-- 1:1 from SDL.vcxproj file

1
third_party/premake-cmake vendored Submodule

@ -0,0 +1 @@
Subproject commit 26fbbb9962aefcb1c24aff1e7952033ce1361190

View File

@ -73,4 +73,4 @@ project("spirv-tools")
buildoptions({
"/wd4800", -- Forcing value to bool 'true' or 'false'
"/wd4996", -- Call to 'std::equal' with parameters that may be unsafe
})
})

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/env python3
# Copyright 2015 Ben Vanik. All Rights Reserved.
@ -107,13 +107,14 @@ def has_bin(bin):
return None
def shell_call(command, throw_on_error=True, stdout_path=None):
def shell_call(command, throw_on_error=True, stdout_path=None, stderr_path=None, shell=False):
"""Executes a shell command.
Args:
command: Command to execute, as a list of parameters.
throw_on_error: Whether to throw an error or return the status code.
stdout_path: File path to write stdout output to.
stderr_path: File path to write stderr output to.
Returns:
If throw_on_error is False the status code of the call will be returned.
@ -121,17 +122,22 @@ def shell_call(command, throw_on_error=True, stdout_path=None):
stdout_file = None
if stdout_path:
stdout_file = open(stdout_path, 'w')
stderr_file = None
if stderr_path:
stderr_file = open(stderr_path, 'w')
result = 0
try:
if throw_on_error:
result = 1
subprocess.check_call(command, shell=False, stdout=stdout_file)
subprocess.check_call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
result = 0
else:
result = subprocess.call(command, shell=False, stdout=stdout_file)
result = subprocess.call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
finally:
if stdout_file:
stdout_file.close()
if stderr_file:
stderr_file.close()
return result
@ -196,42 +202,5 @@ def import_subprocess_environment(args):
os.environ[var.upper()] = setting
break
def git_submodule_update():
"""Runs a full recursive git submodule init and update.
Older versions of git do not support 'update --init --recursive'. We could
check and run it on versions that do support it and speed things up a bit.
"""
if True:
shell_call([
'git',
'submodule',
'update',
'--init',
'--recursive',
])
else:
shell_call([
'git',
'submodule',
'init',
])
shell_call([
'git',
'submodule',
'foreach',
'--recursive',
'git',
'submodule',
'init',
])
shell_call([
'git',
'submodule',
'update',
'--recursive',
])
if __name__ == '__main__':
main()

View File

@ -34,8 +34,11 @@ def main():
# Check git exists.
if not has_bin('git'):
print('ERROR: git must be installed and on PATH.')
sys.exit(1)
print('WARNING: Git should be installed and on PATH. Version info will be omitted from all binaries!')
print('')
elif not git_is_repository():
print('WARNING: The source tree is unversioned. Version info will be omitted from all binaries!')
print('')
# Check python version.
if not sys.version_info[:2] >= (3, 6):
@ -85,6 +88,16 @@ def main():
sys.exit(return_code)
def print_box(msg):
"""Prints an important message inside a box
"""
print(
'┌{0:─^{2}}╖\n'
'│{1: ^{2}}║\n'
'╘{0:═^{2}}╝\n'
.format('', msg, len(msg) + 2))
def import_vs_environment():
"""Finds the installed Visual Studio version and imports
interesting environment variables into os.environ.
@ -150,6 +163,7 @@ def import_subprocess_environment(args):
os.environ[var.upper()] = setting
break
def has_bin(binary):
"""Checks whether the given binary is present.
@ -185,13 +199,14 @@ def get_bin(binary):
return None
def shell_call(command, throw_on_error=True, stdout_path=None, shell=False):
def shell_call(command, throw_on_error=True, stdout_path=None, stderr_path=None, shell=False):
"""Executes a shell command.
Args:
command: Command to execute, as a list of parameters.
throw_on_error: Whether to throw an error or return the status code.
stdout_path: File path to write stdout output to.
stderr_path: File path to write stderr output to.
Returns:
If throw_on_error is False the status code of the call will be returned.
@ -199,21 +214,49 @@ def shell_call(command, throw_on_error=True, stdout_path=None, shell=False):
stdout_file = None
if stdout_path:
stdout_file = open(stdout_path, 'w')
stderr_file = None
if stderr_path:
stderr_file = open(stderr_path, 'w')
result = 0
try:
if throw_on_error:
result = 1
subprocess.check_call(command, shell=shell, stdout=stdout_file)
subprocess.check_call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
result = 0
else:
result = subprocess.call(command, shell=shell, stdout=stdout_file)
result = subprocess.call(command, shell=shell, stdout=stdout_file, stderr=stderr_file)
finally:
if stdout_file:
stdout_file.close()
if stderr_file:
stderr_file.close()
return result
def get_git_head_info():
def generate_version_h():
"""Generates a build/version.h file that contains current git info.
"""
if git_is_repository():
(branch_name, commit, commit_short) = git_get_head_info()
else:
branch_name = 'tarball'
commit = ':(-dont-do-this'
commit_short = ':('
contents = '''// Autogenerated by `xb premake`.
#ifndef GENERATED_VERSION_H_
#define GENERATED_VERSION_H_
#define XE_BUILD_BRANCH "%s"
#define XE_BUILD_COMMIT "%s"
#define XE_BUILD_COMMIT_SHORT "%s"
#define XE_BUILD_DATE __DATE__
#endif // GENERATED_VERSION_H_
''' % (branch_name, commit, commit_short)
with open('build/version.h', 'w') as f:
f.write(contents)
def git_get_head_info():
"""Queries the current branch and commit checksum from git.
Returns:
@ -247,58 +290,28 @@ def get_git_head_info():
return branch_name, commit, commit_short
def generate_version_h():
"""Generates a build/version.h file that contains current git info.
def git_is_repository():
"""Checks if git is available and this source tree is versioned.
"""
(branch_name, commit, commit_short) = get_git_head_info()
contents = '''// Autogenerated by `xb premake`.
#ifndef GENERATED_VERSION_H_
#define GENERATED_VERSION_H_
#define XE_BUILD_BRANCH "%s"
#define XE_BUILD_COMMIT "%s"
#define XE_BUILD_COMMIT_SHORT "%s"
#define XE_BUILD_DATE __DATE__
#endif // GENERATED_VERSION_H_
''' % (branch_name, commit, commit_short)
with open('build/version.h', 'w') as f:
f.write(contents)
if not has_bin('git'):
return False
return shell_call([
'git',
'rev-parse',
'--is-inside-work-tree',
], throw_on_error=False, stdout_path=os.devnull, stderr_path=os.devnull) == 0
def git_submodule_update():
"""Runs a full recursive git submodule init and update.
Older versions of git do not support 'update --init --recursive'. We could
check and run it on versions that do support it and speed things up a bit.
"""
if True:
shell_call([
'git',
'submodule',
'update',
'--init',
'--recursive',
])
else:
shell_call([
'git',
'submodule',
'init',
])
shell_call([
'git',
'submodule',
'foreach',
'--recursive',
'git',
'submodule',
'init',
])
shell_call([
'git',
'submodule',
'update',
'--recursive',
])
shell_call([
'git',
'submodule',
'update',
'--init',
'--recursive',
])
def get_clang_format_binary():
@ -370,9 +383,9 @@ def run_platform_premake(cc='clang', devenv=None):
if 'VSVERSION' in os.environ:
vs_version = os.environ['VSVERSION']
return run_premake('windows', 'vs' + vs_version)
return run_premake('windows', devenv or ('vs' + vs_version))
else:
return run_premake('linux', devenv == 'codelite' and devenv or 'gmake2', cc)
return run_premake('linux', devenv or 'gmake2', cc)
def run_premake_export_commands():
@ -406,6 +419,43 @@ def get_build_bin_path(args):
return os.path.join(self_path, 'build', 'bin', platform.capitalize(), args['config'].capitalize())
def create_clion_workspace():
"""Creates some basic workspace information inside the .idea directory for first start.
"""
if os.path.exists('.idea'):
# No first start
return False
print('Generating CLion workspace files...')
# Might become easier in the future: https://youtrack.jetbrains.com/issue/CPP-7911
# Set the location of the CMakeLists.txt
os.mkdir('.idea')
with open(os.path.join('.idea', 'misc.xml'), 'w') as f:
f.write("""<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$/build">
<contentRoot DIR="$PROJECT_DIR$" />
</component>
</project>
""")
# Set available configurations
# TODO Find a way to trigger a cmake reload
with open(os.path.join('.idea', 'workspace.xml'), 'w') as f:
f.write("""<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeSettings">
<configurations>
<configuration PROFILE_NAME="Checked" CONFIG_NAME="Checked" />
<configuration PROFILE_NAME="Debug" CONFIG_NAME="Debug" />
<configuration PROFILE_NAME="Release" CONFIG_NAME="Release" />
</configurations>
</component>
</project>""")
return True
def discover_commands(subparsers):
"""Looks for all commands and returns a dictionary of them.
In the future commands could be discovered on disk.
@ -491,7 +541,10 @@ class SetupCommand(Command):
# Setup submodules.
print('- git submodule init / update...')
git_submodule_update()
if git_is_repository():
git_submodule_update()
else:
print('WARNING: Git not available or not a repository. Dependencies may be missing.')
print('')
print('- running premake...')
@ -1445,8 +1498,13 @@ class DevenvCommand(Command):
def execute(self, args, pass_args, cwd):
devenv = None
show_reload_prompt = False
if sys.platform == 'win32':
print('Launching Visual Studio...')
elif has_bin('clion') or has_bin('clion.sh'):
print('Launching CLion...')
show_reload_prompt = create_clion_workspace()
devenv = 'cmake'
else:
print('Launching CodeLite...')
devenv = 'codelite'
@ -1457,11 +1515,23 @@ class DevenvCommand(Command):
print('')
print('- launching devenv...')
if show_reload_prompt:
print_box('Please run "File ⇒ ↺ Reload CMake Project" from inside the IDE!')
if sys.platform == 'win32':
shell_call([
'devenv',
'build\\xenia.sln',
])
elif has_bin('clion'):
shell_call([
'clion',
'.',
])
elif has_bin('clion.sh'):
shell_call([
'clion.sh',
'.',
])
else:
shell_call([
'codelite',