[D3D12] Persistent shader and PSO storage

This commit is contained in:
Triang3l 2020-03-21 19:21:00 +03:00
parent b1d3fd2ad3
commit cde092ece1
20 changed files with 1112 additions and 203 deletions

View File

@ -245,8 +245,8 @@ bool EmulatorWindow::Initialize() {
}
gpu_menu->AddChild(MenuItem::Create(MenuItem::Type::kSeparator));
{
gpu_menu->AddChild(
MenuItem::Create(MenuItem::Type::kString, L"&Clear Caches", L"F5",
gpu_menu->AddChild(MenuItem::Create(
MenuItem::Type::kString, L"&Clear Runtime Caches", L"F5",
std::bind(&EmulatorWindow::GpuClearCaches, this)));
}
main_menu->AddChild(std::move(gpu_menu));
@ -454,8 +454,20 @@ void EmulatorWindow::UpdateTitle() {
title += xe::format_string(L" (@%.2fx)", Clock::guest_time_scalar());
}
if (initializing_shader_storage_) {
title += L" (Preloading shaders\u2026)";
}
window_->set_title(title);
}
void EmulatorWindow::SetInitializingShaderStorage(bool initializing) {
if (initializing_shader_storage_ == initializing) {
return;
}
initializing_shader_storage_ = initializing;
UpdateTitle();
}
} // namespace app
} // namespace xe

View File

@ -37,6 +37,7 @@ class EmulatorWindow {
void UpdateTitle();
void ToggleFullscreen();
void SetInitializingShaderStorage(bool initializing);
private:
explicit EmulatorWindow(Emulator* emulator);
@ -63,6 +64,7 @@ class EmulatorWindow {
std::unique_ptr<ui::Window> window_;
std::wstring base_title_;
uint64_t cursor_hide_time_ = 0;
bool initializing_shader_storage_ = false;
};
} // namespace app

View File

@ -243,7 +243,7 @@ int xenia_main(const std::vector<std::wstring>& args) {
}
// Create the emulator but don't initialize so we can setup the window.
auto emulator = std::make_unique<Emulator>(L"", content_root);
auto emulator = std::make_unique<Emulator>(L"", storage_root, content_root);
// Main emulator display window.
auto emulator_window = EmulatorWindow::Create(emulator.get());
@ -331,6 +331,11 @@ int xenia_main(const std::vector<std::wstring>& args) {
evt->Set();
});
emulator->on_shader_storage_initialization.AddListener(
[&](bool initializing) {
emulator_window->SetInitializingShaderStorage(initializing);
});
emulator->on_terminate.AddListener([&]() {
if (cvars::discord) {
discord::DiscordPresence::NotPlaying();

View File

@ -58,6 +58,17 @@ bool CreateFile(const std::wstring& path);
// This behaves like fopen and the returned handle can be used with stdio.
FILE* OpenFile(const std::wstring& path, const char* mode);
// Wrapper for the 64-bit version of fseek, returns true on success.
bool Seek(FILE* file, int64_t offset, int origin);
// Wrapper for the 64-bit version of ftell, returns a positive value on success.
int64_t Tell(FILE* file);
// Reduces the size of a stdio file opened for writing. The file pointer is
// clamped. If this returns false, the size of the file and the file pointer are
// undefined.
bool TruncateStdioFile(FILE* file, uint64_t length);
// Deletes the file at the given path.
// Returns true if the file was found and removed.
bool DeleteFile(const std::wstring& path);

View File

@ -76,6 +76,31 @@ FILE* OpenFile(const std::wstring& path, const char* mode) {
return fopen(xe::to_string(fixed_path).c_str(), mode);
}
bool Seek(FILE* file, int64_t offset, int origin) {
return fseeko64(file, off64_t(offset), origin) == 0;
}
int64_t Tell(FILE* file) { return int64_t(ftello64(file)); }
bool TruncateStdioFile(FILE* file, uint64_t length) {
if (fflush(file)) {
return false;
}
int64_t position = Tell(file);
if (position < 0) {
return false;
}
if (ftruncate64(fileno(file), off64_t(length))) {
return false;
}
if (uint64_t(position) > length) {
if (!Seek(file, 0, SEEK_END)) {
return false;
}
}
return true;
}
bool CreateFolder(const std::wstring& path) {
return mkdir(xe::to_string(path).c_str(), 0774);
}

View File

@ -12,6 +12,7 @@
#include <string>
#include <io.h>
#include <shlobj.h>
#include "xenia/base/platform_win.h"
@ -87,6 +88,32 @@ FILE* OpenFile(const std::wstring& path, const char* mode) {
return _wfopen(fixed_path.c_str(), xe::to_wstring(mode).c_str());
}
bool Seek(FILE* file, int64_t offset, int origin) {
return _fseeki64(file, offset, origin) == 0;
}
int64_t Tell(FILE* file) { return _ftelli64(file); }
bool TruncateStdioFile(FILE* file, uint64_t length) {
// Flush is necessary - if not flushing, stream position may be out of sync.
if (fflush(file)) {
return false;
}
int64_t position = Tell(file);
if (position < 0) {
return false;
}
if (_chsize_s(_fileno(file), int64_t(length))) {
return false;
}
if (uint64_t(position) > length) {
if (!Seek(file, 0, SEEK_END)) {
return false;
}
}
return true;
}
bool DeleteFile(const std::wstring& path) {
return DeleteFileW(path.c_str()) ? true : false;
}

View File

@ -57,11 +57,13 @@ DEFINE_string(
namespace xe {
Emulator::Emulator(const std::wstring& command_line,
const std::wstring& storage_root,
const std::wstring& content_root)
: on_launch(),
on_terminate(),
on_exit(),
command_line_(command_line),
storage_root_(storage_root),
content_root_(content_root),
game_title_(),
display_window_(nullptr),
@ -685,11 +687,18 @@ X_STATUS Emulator::CompleteLaunch(const std::wstring& path,
}
}
// Initializing the shader storage in a blocking way so the user doesn't miss
// the initial seconds - for instance, sound from an intro video may start
// playing before the video can be seen if doing this in parallel with the
// main thread.
on_shader_storage_initialization(true);
graphics_system_->InitializeShaderStorage(storage_root_, title_id_, true);
on_shader_storage_initialization(false);
auto main_thread = kernel_state_->LaunchModule(module);
if (!main_thread) {
return X_STATUS_UNSUCCESSFUL;
}
main_thread_ = main_thread;
on_launch(title_id_, game_title_);

View File

@ -48,13 +48,17 @@ namespace xe {
class Emulator {
public:
explicit Emulator(const std::wstring& command_line,
const std::wstring& storage_root,
const std::wstring& content_root);
~Emulator();
// Full command line used when launching the process.
const std::wstring& command_line() const { return command_line_; }
// Folder content is stored in.
// Folder persistent internal emulator data is stored in.
const std::wstring& storage_root() const { return storage_root_; }
// Folder guest content is stored in.
const std::wstring& content_root() const { return content_root_; }
// Title of the game in the default language.
@ -146,6 +150,7 @@ class Emulator {
public:
xe::Delegate<uint32_t, const std::wstring&> on_launch;
xe::Delegate<bool> on_shader_storage_initialization;
xe::Delegate<> on_terminate;
xe::Delegate<> on_exit;
@ -159,6 +164,7 @@ class Emulator {
const std::string& module_path);
std::wstring command_line_;
std::wstring storage_root_;
std::wstring content_root_;
std::wstring game_title_;

View File

@ -87,6 +87,10 @@ void CommandProcessor::Shutdown() {
worker_thread_.reset();
}
void CommandProcessor::InitializeShaderStorage(const std::wstring& storage_root,
uint32_t title_id,
bool blocking) {}
void CommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
if (trace_state_ == TraceState::kStreaming) {
XELOGE("Streaming trace; cannot also trace frame.");

View File

@ -130,6 +130,12 @@ class CommandProcessor {
swap_request_handler_ = fn;
}
// May be called not only from the command processor thread when the command
// processor is paused, and the termination of this function may be explicitly
// awaited.
virtual void InitializeShaderStorage(const std::wstring& storage_root,
uint32_t title_id, bool blocking);
virtual void RequestFrameTrace(const std::wstring& root_path);
virtual void BeginTracing(const std::wstring& root_path);
virtual void EndTracing();

View File

@ -77,6 +77,12 @@ void D3D12CommandProcessor::ClearCaches() {
cache_clear_requested_ = true;
}
void D3D12CommandProcessor::InitializeShaderStorage(
const std::wstring& storage_root, uint32_t title_id, bool blocking) {
CommandProcessor::InitializeShaderStorage(storage_root, title_id, blocking);
pipeline_cache_->InitializeShaderStorage(storage_root, title_id, blocking);
}
void D3D12CommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
// Capture with PIX if attached.
if (GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis() != nullptr) {
@ -2123,7 +2129,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
}
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
}
void D3D12CommandProcessor::AwaitAllSubmissionsCompletion() {

View File

@ -43,6 +43,9 @@ class D3D12CommandProcessor : public CommandProcessor {
void ClearCaches() override;
void InitializeShaderStorage(const std::wstring& storage_root,
uint32_t title_id, bool blocking) override;
void RequestFrameTrace(const std::wstring& root_path) override;
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
@ -125,10 +128,11 @@ class D3D12CommandProcessor : public CommandProcessor {
// render targets or copying to depth render targets.
void SetSamplePositions(MsaaSamples sample_positions);
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
inline ID3D12PipelineState* GetPipelineStateByHandle(void* handle) const {
return pipeline_cache_->GetPipelineStateByHandle(handle);
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
}
// Sets the current pipeline state to a compute pipeline. This is for cache

View File

@ -200,7 +200,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
}
} break;
case Command::kSetPipelineStateHandle: {
current_pipeline_state = command_processor_->GetPipelineStateByHandle(
current_pipeline_state =
command_processor_->GetD3D12PipelineStateByHandle(
*reinterpret_cast<void* const*>(stream));
if (current_pipeline_state) {
command_list->SetPipelineState(current_pipeline_state);

File diff suppressed because it is too large Load Diff

View File

@ -11,13 +11,17 @@
#define XENIA_GPU_D3D12_PIPELINE_CACHE_H_
#include <condition_variable>
#include <cstdio>
#include <deque>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <unordered_map>
#include <utility>
#include <vector>
#include "xenia/base/platform.h"
#include "xenia/base/threading.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/d3d12/render_target_cache.h"
@ -40,10 +44,14 @@ class PipelineCache {
bool Initialize();
void Shutdown();
void ClearCache();
void ClearCache(bool shutting_down = false);
void InitializeShaderStorage(const std::wstring& storage_root,
uint32_t title_id, bool blocking);
void ShutdownShaderStorage();
void EndSubmission();
bool IsCreatingPipelines();
bool IsCreatingPipelineStates();
D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count);
@ -57,15 +65,32 @@ class PipelineCache {
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
void** pipeline_state_handle_out,
ID3D12RootSignature** root_signature_out);
// Returns a pipeline with deferred creation by its handle. May return nullptr
// if failed to create the pipeline.
inline ID3D12PipelineState* GetPipelineStateByHandle(void* handle) const {
return reinterpret_cast<const Pipeline*>(handle)->state;
// Returns a pipeline state object with deferred creation by its handle. May
// return nullptr if failed to create the pipeline state object.
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
void* handle) const {
return reinterpret_cast<const PipelineState*>(handle)->state;
}
private:
XEPACKEDSTRUCT(ShaderStoredHeader, {
uint64_t ucode_data_hash;
uint32_t ucode_dword_count : 16;
ShaderType type : 1;
PrimitiveType patch_primitive_type : 6;
reg::SQ_PROGRAM_CNTL sq_program_cntl;
static constexpr uint32_t kVersion = 0x20200301;
});
// Update PipelineDescription::kVersion if any of the Pipeline* enums are
// changed!
enum class PipelineStripCutIndex : uint32_t {
kNone,
kFFFF,
@ -122,7 +147,8 @@ class PipelineCache {
kSrcAlphaSat,
};
struct PipelineRenderTarget {
// Update PipelineDescription::kVersion if anything is changed!
XEPACKEDSTRUCT(PipelineRenderTarget, {
uint32_t used : 1; // 1
ColorRenderTargetFormat format : 4; // 5
PipelineBlendFactor src_blend : 4; // 9
@ -132,12 +158,12 @@ class PipelineCache {
PipelineBlendFactor dest_blend_alpha : 4; // 24
BlendOp blend_op_alpha : 3; // 27
uint32_t write_mask : 4; // 31
};
});
struct PipelineDescription {
ID3D12RootSignature* root_signature;
D3D12Shader* vertex_shader;
D3D12Shader* pixel_shader;
XEPACKEDSTRUCT(PipelineDescription, {
uint64_t vertex_shader_hash;
// 0 if drawing without a pixel shader.
uint64_t pixel_shader_hash;
int32_t depth_bias;
float depth_bias_slope_scaled;
@ -170,19 +196,34 @@ class PipelineCache {
CompareFunction stencil_back_func : 3; // 32
PipelineRenderTarget render_targets[4];
static constexpr uint32_t kVersion = 0x20200309;
});
XEPACKEDSTRUCT(PipelineStoredDescription, {
uint64_t description_hash;
PipelineDescription description;
});
struct PipelineRuntimeDescription {
ID3D12RootSignature* root_signature;
D3D12Shader* vertex_shader;
D3D12Shader* pixel_shader;
PipelineDescription description;
};
bool TranslateShader(D3D12Shader* shader, reg::SQ_PROGRAM_CNTL cntl,
bool tessellated, PrimitiveType primitive_type);
bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader* shader,
reg::SQ_PROGRAM_CNTL cntl,
PrimitiveType patch_primitive_type);
bool GetCurrentStateDescription(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineDescription& description_out);
PipelineRuntimeDescription& runtime_description_out);
ID3D12PipelineState* CreatePipelineState(
const PipelineDescription& description);
ID3D12PipelineState* CreateD3D12PipelineState(
const PipelineRuntimeDescription& runtime_description);
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
@ -200,40 +241,71 @@ class PipelineCache {
// Xenos pixel shader provided.
std::vector<uint8_t> depth_only_pixel_shader_;
struct Pipeline {
struct PipelineState {
// nullptr if creation has failed.
ID3D12PipelineState* state;
PipelineDescription description;
PipelineRuntimeDescription description;
};
// All previously generated pipelines identified by hash and the description.
std::unordered_multimap<uint64_t, Pipeline*> pipelines_;
// All previously generated pipeline state objects identified by hash and the
// description.
std::unordered_multimap<uint64_t, PipelineState*> pipeline_states_;
// Previously used pipeline. This matches our current state settings
// and allows us to quickly(ish) reuse the pipeline if no registers have
// changed.
Pipeline* current_pipeline_ = nullptr;
// Previously used pipeline state object. This matches our current state
// settings and allows us to quickly(ish) reuse the pipeline state if no
// registers have changed.
PipelineState* current_pipeline_state_ = nullptr;
// Pipeline creation threads.
void CreationThread();
// Currently open shader storage path.
std::wstring shader_storage_root_;
uint32_t shader_storage_title_id_ = 0;
// Shader storage output stream, for preload in the next emulator runs.
FILE* shader_storage_file_ = nullptr;
bool shader_storage_file_flush_needed_ = false;
// Pipeline state storage output stream, for preload in the next emulator
// runs.
FILE* pipeline_state_storage_file_ = nullptr;
bool pipeline_state_storage_file_flush_needed_ = false;
// Thread for asynchronous writing to the storage streams.
void StorageWriteThread();
std::mutex storage_write_request_lock_;
std::condition_variable storage_write_request_cond_;
// Storage thread input is protected with storage_write_request_lock_, and the
// thread is notified about its change via storage_write_request_cond_.
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
storage_write_shader_queue_;
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_;
bool storage_write_flush_shaders_ = false;
bool storage_write_flush_pipeline_states_ = false;
bool storage_write_thread_shutdown_ = false;
std::unique_ptr<xe::threading::Thread> storage_write_thread_;
// Pipeline state object creation threads.
void CreationThread(size_t thread_index);
void CreateQueuedPipelineStatesOnProcessorThread();
std::mutex creation_request_lock_;
std::condition_variable creation_request_cond_;
// Protected with creation_request_lock_, notify_one creation_request_cond_
// when set.
std::deque<Pipeline*> creation_queue_;
// Number of threads that are currently creating a pipeline - incremented when
// a pipeline is dequeued (the completion event can't be triggered before this
// is zero). Protected with creation_request_lock_.
uint32_t creation_threads_busy_ = 0;
// Manual-reset event set when the last queued pipeline is created and there
// are no more pipelines to create. This is triggered by the thread creating
// the last pipeline.
std::deque<PipelineState*> creation_queue_;
// Number of threads that are currently creating a pipeline state object -
// incremented when a pipeline state object is dequeued (the completion event
// can't be triggered before this is zero). Protected with
// creation_request_lock_.
size_t creation_threads_busy_ = 0;
// Manual-reset event set when the last queued pipeline state object is
// created and there are no more pipeline state objects to create. This is
// triggered by the thread creating the last pipeline state object.
std::unique_ptr<xe::threading::Event> creation_completion_event_ = nullptr;
// Whether setting the event on completion is queued. Protected with
// creation_request_lock_, notify_one creation_request_cond_ when set.
bool creation_completion_set_event_ = false;
// Whether to shut down the creation threads as soon as possible. Protected
// with creation_request_lock_, notify_all creation_request_cond_ when set.
bool creation_threads_shutdown_ = false;
// Creation threads with this index or above need to be shut down as soon as
// possible. Protected with creation_request_lock_, notify_all
// creation_request_cond_ when set.
size_t creation_threads_shutdown_from_ = SIZE_MAX;
std::vector<std::unique_ptr<xe::threading::Thread>> creation_threads_;
};

View File

@ -13,8 +13,10 @@ DEFINE_string(trace_gpu_prefix, "scratch/gpu/",
"Prefix path for GPU trace files.", "GPU");
DEFINE_bool(trace_gpu_stream, false, "Trace all GPU packets.", "GPU");
DEFINE_string(dump_shaders, "",
"Path to write GPU shaders to as they are compiled.", "GPU");
DEFINE_string(
dump_shaders, "",
"For shader debugging, path to dump GPU shaders to as they are compiled.",
"GPU");
DEFINE_bool(vsync, true, "Enable VSYNC.", "GPU");

View File

@ -20,6 +20,12 @@
#include "xenia/ui/graphics_provider.h"
#include "xenia/ui/loop.h"
DEFINE_bool(
store_shaders, true,
"Store shaders persistently and load them when loading games to avoid "
"runtime spikes and freezes when playing the game not for the first time.",
"GPU");
namespace xe {
namespace gpu {
@ -269,6 +275,34 @@ void GraphicsSystem::ClearCaches() {
[&]() { command_processor_->ClearCaches(); });
}
void GraphicsSystem::InitializeShaderStorage(const std::wstring& storage_root,
uint32_t title_id, bool blocking) {
if (!cvars::store_shaders) {
return;
}
if (blocking) {
if (command_processor_->is_paused()) {
// Safe to run on any thread while the command processor is paused, no
// race condition.
command_processor_->InitializeShaderStorage(storage_root, title_id, true);
} else {
xe::threading::Fence fence;
command_processor_->CallInThread(
[this, storage_root, title_id, &fence]() {
command_processor_->InitializeShaderStorage(storage_root, title_id,
true);
fence.Signal();
});
fence.Wait();
}
} else {
command_processor_->CallInThread([this, storage_root, title_id]() {
command_processor_->InitializeShaderStorage(storage_root, title_id,
false);
});
}
}
void GraphicsSystem::RequestFrameTrace() {
command_processor_->RequestFrameTrace(
xe::to_wstring(cvars::trace_gpu_prefix));

View File

@ -12,6 +12,7 @@
#include <atomic>
#include <memory>
#include <string>
#include <thread>
#include "xenia/cpu/processor.h"
@ -62,6 +63,9 @@ class GraphicsSystem {
virtual void ClearCaches();
void InitializeShaderStorage(const std::wstring& storage_root,
uint32_t title_id, bool blocking);
void RequestFrameTrace();
void BeginTracing();
void EndTracing();

View File

@ -102,7 +102,7 @@ int TraceDump::Main(const std::vector<std::wstring>& args) {
bool TraceDump::Setup() {
// Create the emulator but don't initialize so we can setup the window.
emulator_ = std::make_unique<Emulator>(L"", L"");
emulator_ = std::make_unique<Emulator>(L"", L"", L"");
X_STATUS result = emulator_->Setup(
nullptr, nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr);
if (XFAILED(result)) {

View File

@ -122,7 +122,7 @@ bool TraceViewer::Setup() {
window_->Resize(1920, 1200);
// Create the emulator but don't initialize so we can setup the window.
emulator_ = std::make_unique<Emulator>(L"", L"");
emulator_ = std::make_unique<Emulator>(L"", L"", L"");
X_STATUS result = emulator_->Setup(
window_.get(), nullptr, [this]() { return CreateGraphicsSystem(); },
nullptr);