Merge remote-tracking branch 'upstream/master' into canary-old-update
This commit is contained in:
commit
1b197e0349
|
@ -286,8 +286,8 @@ bool EmulatorWindow::Initialize() {
|
|||
}
|
||||
gpu_menu->AddChild(MenuItem::Create(MenuItem::Type::kSeparator));
|
||||
{
|
||||
gpu_menu->AddChild(
|
||||
MenuItem::Create(MenuItem::Type::kString, L"&Clear Caches", L"F5",
|
||||
gpu_menu->AddChild(MenuItem::Create(
|
||||
MenuItem::Type::kString, L"&Clear Runtime Caches", L"F5",
|
||||
std::bind(&EmulatorWindow::GpuClearCaches, this)));
|
||||
}
|
||||
main_menu->AddChild(std::move(gpu_menu));
|
||||
|
@ -584,8 +584,20 @@ void EmulatorWindow::UpdateTitle() {
|
|||
title += xe::format_string(L" (@%.2fx)", Clock::guest_time_scalar());
|
||||
}
|
||||
|
||||
if (initializing_shader_storage_) {
|
||||
title += L" (Preloading shaders\u2026)";
|
||||
}
|
||||
|
||||
window_->set_title(title);
|
||||
}
|
||||
|
||||
void EmulatorWindow::SetInitializingShaderStorage(bool initializing) {
|
||||
if (initializing_shader_storage_ == initializing) {
|
||||
return;
|
||||
}
|
||||
initializing_shader_storage_ = initializing;
|
||||
UpdateTitle();
|
||||
}
|
||||
|
||||
} // namespace app
|
||||
} // namespace xe
|
||||
|
|
|
@ -38,6 +38,7 @@ class EmulatorWindow {
|
|||
void UpdateTitle();
|
||||
void ToggleFullscreen();
|
||||
static std::wstring SwapNext(int8_t disc_number);
|
||||
void SetInitializingShaderStorage(bool initializing);
|
||||
|
||||
private:
|
||||
explicit EmulatorWindow(Emulator* emulator);
|
||||
|
@ -67,6 +68,7 @@ class EmulatorWindow {
|
|||
std::wstring base_title_;
|
||||
std::wstring global_recent_paths_[10];
|
||||
uint64_t cursor_hide_time_ = 0;
|
||||
bool initializing_shader_storage_ = false;
|
||||
};
|
||||
|
||||
} // namespace app
|
||||
|
|
|
@ -243,7 +243,7 @@ int xenia_main(const std::vector<std::wstring>& args) {
|
|||
}
|
||||
|
||||
// Create the emulator but don't initialize so we can setup the window.
|
||||
auto emulator = std::make_unique<Emulator>(L"", content_root);
|
||||
auto emulator = std::make_unique<Emulator>(L"", storage_root, content_root);
|
||||
|
||||
// Main emulator display window.
|
||||
auto emulator_window = EmulatorWindow::Create(emulator.get());
|
||||
|
@ -335,6 +335,11 @@ int xenia_main(const std::vector<std::wstring>& args) {
|
|||
evt->Set();
|
||||
});
|
||||
|
||||
emulator->on_shader_storage_initialization.AddListener(
|
||||
[&](bool initializing) {
|
||||
emulator_window->SetInitializingShaderStorage(initializing);
|
||||
});
|
||||
|
||||
emulator->on_terminate.AddListener([&]() {
|
||||
if (cvars::discord) {
|
||||
discord::DiscordPresence::NotPlaying();
|
||||
|
|
|
@ -58,6 +58,17 @@ bool CreateFile(const std::wstring& path);
|
|||
// This behaves like fopen and the returned handle can be used with stdio.
|
||||
FILE* OpenFile(const std::wstring& path, const char* mode);
|
||||
|
||||
// Wrapper for the 64-bit version of fseek, returns true on success.
|
||||
bool Seek(FILE* file, int64_t offset, int origin);
|
||||
|
||||
// Wrapper for the 64-bit version of ftell, returns a positive value on success.
|
||||
int64_t Tell(FILE* file);
|
||||
|
||||
// Reduces the size of a stdio file opened for writing. The file pointer is
|
||||
// clamped. If this returns false, the size of the file and the file pointer are
|
||||
// undefined.
|
||||
bool TruncateStdioFile(FILE* file, uint64_t length);
|
||||
|
||||
// Deletes the file at the given path.
|
||||
// Returns true if the file was found and removed.
|
||||
bool DeleteFile(const std::wstring& path);
|
||||
|
|
|
@ -76,6 +76,31 @@ FILE* OpenFile(const std::wstring& path, const char* mode) {
|
|||
return fopen(xe::to_string(fixed_path).c_str(), mode);
|
||||
}
|
||||
|
||||
bool Seek(FILE* file, int64_t offset, int origin) {
|
||||
return fseeko64(file, off64_t(offset), origin) == 0;
|
||||
}
|
||||
|
||||
int64_t Tell(FILE* file) { return int64_t(ftello64(file)); }
|
||||
|
||||
bool TruncateStdioFile(FILE* file, uint64_t length) {
|
||||
if (fflush(file)) {
|
||||
return false;
|
||||
}
|
||||
int64_t position = Tell(file);
|
||||
if (position < 0) {
|
||||
return false;
|
||||
}
|
||||
if (ftruncate64(fileno(file), off64_t(length))) {
|
||||
return false;
|
||||
}
|
||||
if (uint64_t(position) > length) {
|
||||
if (!Seek(file, 0, SEEK_END)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CreateFolder(const std::wstring& path) {
|
||||
return mkdir(xe::to_string(path).c_str(), 0774);
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
#include <io.h>
|
||||
#include <shlobj.h>
|
||||
|
||||
#include "xenia/base/platform_win.h"
|
||||
|
@ -87,6 +88,32 @@ FILE* OpenFile(const std::wstring& path, const char* mode) {
|
|||
return _wfopen(fixed_path.c_str(), xe::to_wstring(mode).c_str());
|
||||
}
|
||||
|
||||
bool Seek(FILE* file, int64_t offset, int origin) {
|
||||
return _fseeki64(file, offset, origin) == 0;
|
||||
}
|
||||
|
||||
int64_t Tell(FILE* file) { return _ftelli64(file); }
|
||||
|
||||
bool TruncateStdioFile(FILE* file, uint64_t length) {
|
||||
// Flush is necessary - if not flushing, stream position may be out of sync.
|
||||
if (fflush(file)) {
|
||||
return false;
|
||||
}
|
||||
int64_t position = Tell(file);
|
||||
if (position < 0) {
|
||||
return false;
|
||||
}
|
||||
if (_chsize_s(_fileno(file), int64_t(length))) {
|
||||
return false;
|
||||
}
|
||||
if (uint64_t(position) > length) {
|
||||
if (!Seek(file, 0, SEEK_END)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DeleteFile(const std::wstring& path) {
|
||||
return DeleteFileW(path.c_str()) ? true : false;
|
||||
}
|
||||
|
|
|
@ -60,11 +60,13 @@ DEFINE_string(
|
|||
namespace xe {
|
||||
|
||||
Emulator::Emulator(const std::wstring& command_line,
|
||||
const std::wstring& storage_root,
|
||||
const std::wstring& content_root)
|
||||
: on_launch(),
|
||||
on_terminate(),
|
||||
on_exit(),
|
||||
command_line_(command_line),
|
||||
storage_root_(storage_root),
|
||||
content_root_(content_root),
|
||||
game_title_(),
|
||||
display_window_(nullptr),
|
||||
|
@ -722,11 +724,18 @@ X_STATUS Emulator::CompleteLaunch(const std::wstring& path,
|
|||
}
|
||||
}
|
||||
|
||||
// Initializing the shader storage in a blocking way so the user doesn't miss
|
||||
// the initial seconds - for instance, sound from an intro video may start
|
||||
// playing before the video can be seen if doing this in parallel with the
|
||||
// main thread.
|
||||
on_shader_storage_initialization(true);
|
||||
graphics_system_->InitializeShaderStorage(storage_root_, title_id_, true);
|
||||
on_shader_storage_initialization(false);
|
||||
|
||||
auto main_thread = kernel_state_->LaunchModule(module);
|
||||
if (!main_thread) {
|
||||
return X_STATUS_UNSUCCESSFUL;
|
||||
}
|
||||
|
||||
main_thread_ = main_thread;
|
||||
on_launch(title_id_, game_title_);
|
||||
|
||||
|
|
|
@ -48,13 +48,17 @@ namespace xe {
|
|||
class Emulator {
|
||||
public:
|
||||
explicit Emulator(const std::wstring& command_line,
|
||||
const std::wstring& storage_root,
|
||||
const std::wstring& content_root);
|
||||
~Emulator();
|
||||
|
||||
// Full command line used when launching the process.
|
||||
const std::wstring& command_line() const { return command_line_; }
|
||||
|
||||
// Folder content is stored in.
|
||||
// Folder persistent internal emulator data is stored in.
|
||||
const std::wstring& storage_root() const { return storage_root_; }
|
||||
|
||||
// Folder guest content is stored in.
|
||||
const std::wstring& content_root() const { return content_root_; }
|
||||
|
||||
// Title of the game in the default language.
|
||||
|
@ -149,6 +153,7 @@ class Emulator {
|
|||
|
||||
public:
|
||||
xe::Delegate<uint32_t, const std::wstring&> on_launch;
|
||||
xe::Delegate<bool> on_shader_storage_initialization;
|
||||
xe::Delegate<> on_terminate;
|
||||
xe::Delegate<> on_exit;
|
||||
|
||||
|
@ -162,6 +167,7 @@ class Emulator {
|
|||
const std::string& module_path);
|
||||
|
||||
std::wstring command_line_;
|
||||
std::wstring storage_root_;
|
||||
std::wstring content_root_;
|
||||
|
||||
std::wstring game_title_;
|
||||
|
|
|
@ -87,6 +87,10 @@ void CommandProcessor::Shutdown() {
|
|||
worker_thread_.reset();
|
||||
}
|
||||
|
||||
void CommandProcessor::InitializeShaderStorage(const std::wstring& storage_root,
|
||||
uint32_t title_id,
|
||||
bool blocking) {}
|
||||
|
||||
void CommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
|
||||
if (trace_state_ == TraceState::kStreaming) {
|
||||
XELOGE("Streaming trace; cannot also trace frame.");
|
||||
|
|
|
@ -130,6 +130,12 @@ class CommandProcessor {
|
|||
swap_request_handler_ = fn;
|
||||
}
|
||||
|
||||
// May be called not only from the command processor thread when the command
|
||||
// processor is paused, and the termination of this function may be explicitly
|
||||
// awaited.
|
||||
virtual void InitializeShaderStorage(const std::wstring& storage_root,
|
||||
uint32_t title_id, bool blocking);
|
||||
|
||||
virtual void RequestFrameTrace(const std::wstring& root_path);
|
||||
virtual void BeginTracing(const std::wstring& root_path);
|
||||
virtual void EndTracing();
|
||||
|
|
|
@ -77,6 +77,12 @@ void D3D12CommandProcessor::ClearCaches() {
|
|||
cache_clear_requested_ = true;
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::InitializeShaderStorage(
|
||||
const std::wstring& storage_root, uint32_t title_id, bool blocking) {
|
||||
CommandProcessor::InitializeShaderStorage(storage_root, title_id, blocking);
|
||||
pipeline_cache_->InitializeShaderStorage(storage_root, title_id, blocking);
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
|
||||
// Capture with PIX if attached.
|
||||
if (GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis() != nullptr) {
|
||||
|
@ -2123,7 +2129,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
|
|||
}
|
||||
|
||||
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
|
||||
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
|
||||
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
|
||||
}
|
||||
|
||||
void D3D12CommandProcessor::AwaitAllSubmissionsCompletion() {
|
||||
|
|
|
@ -47,6 +47,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
void ClearCaches() override;
|
||||
|
||||
void InitializeShaderStorage(const std::wstring& storage_root,
|
||||
uint32_t title_id, bool blocking) override;
|
||||
|
||||
void RequestFrameTrace(const std::wstring& root_path) override;
|
||||
|
||||
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
||||
|
@ -129,10 +132,11 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// render targets or copying to depth render targets.
|
||||
void SetSamplePositions(MsaaSamples sample_positions);
|
||||
|
||||
// Returns a pipeline with deferred creation by its handle. May return nullptr
|
||||
// if failed to create the pipeline.
|
||||
inline ID3D12PipelineState* GetPipelineStateByHandle(void* handle) const {
|
||||
return pipeline_cache_->GetPipelineStateByHandle(handle);
|
||||
// Returns a pipeline state object with deferred creation by its handle. May
|
||||
// return nullptr if failed to create the pipeline state object.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
|
||||
void* handle) const {
|
||||
return pipeline_cache_->GetD3D12PipelineStateByHandle(handle);
|
||||
}
|
||||
|
||||
// Sets the current pipeline state to a compute pipeline. This is for cache
|
||||
|
|
|
@ -200,7 +200,8 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list,
|
|||
}
|
||||
} break;
|
||||
case Command::kSetPipelineStateHandle: {
|
||||
current_pipeline_state = command_processor_->GetPipelineStateByHandle(
|
||||
current_pipeline_state =
|
||||
command_processor_->GetD3D12PipelineStateByHandle(
|
||||
*reinterpret_cast<void* const*>(stream));
|
||||
if (current_pipeline_state) {
|
||||
command_list->SetPipelineState(current_pipeline_state);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,13 +11,17 @@
|
|||
#define XENIA_GPU_D3D12_PIPELINE_CACHE_H_
|
||||
|
||||
#include <condition_variable>
|
||||
#include <cstdio>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/threading.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
#include "xenia/gpu/d3d12/render_target_cache.h"
|
||||
|
@ -40,10 +44,14 @@ class PipelineCache {
|
|||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
void ClearCache();
|
||||
void ClearCache(bool shutting_down = false);
|
||||
|
||||
void InitializeShaderStorage(const std::wstring& storage_root,
|
||||
uint32_t title_id, bool blocking);
|
||||
void ShutdownShaderStorage();
|
||||
|
||||
void EndSubmission();
|
||||
bool IsCreatingPipelines();
|
||||
bool IsCreatingPipelineStates();
|
||||
|
||||
D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
|
@ -57,15 +65,32 @@ class PipelineCache {
|
|||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
|
||||
PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
|
||||
void** pipeline_state_handle_out,
|
||||
ID3D12RootSignature** root_signature_out);
|
||||
|
||||
// Returns a pipeline with deferred creation by its handle. May return nullptr
|
||||
// if failed to create the pipeline.
|
||||
inline ID3D12PipelineState* GetPipelineStateByHandle(void* handle) const {
|
||||
return reinterpret_cast<const Pipeline*>(handle)->state;
|
||||
// Returns a pipeline state object with deferred creation by its handle. May
|
||||
// return nullptr if failed to create the pipeline state object.
|
||||
inline ID3D12PipelineState* GetD3D12PipelineStateByHandle(
|
||||
void* handle) const {
|
||||
return reinterpret_cast<const PipelineState*>(handle)->state;
|
||||
}
|
||||
|
||||
private:
|
||||
XEPACKEDSTRUCT(ShaderStoredHeader, {
|
||||
uint64_t ucode_data_hash;
|
||||
|
||||
uint32_t ucode_dword_count : 16;
|
||||
ShaderType type : 1;
|
||||
PrimitiveType patch_primitive_type : 6;
|
||||
|
||||
reg::SQ_PROGRAM_CNTL sq_program_cntl;
|
||||
|
||||
static constexpr uint32_t kVersion = 0x20200301;
|
||||
});
|
||||
|
||||
// Update PipelineDescription::kVersion if any of the Pipeline* enums are
|
||||
// changed!
|
||||
|
||||
enum class PipelineStripCutIndex : uint32_t {
|
||||
kNone,
|
||||
kFFFF,
|
||||
|
@ -122,7 +147,8 @@ class PipelineCache {
|
|||
kSrcAlphaSat,
|
||||
};
|
||||
|
||||
struct PipelineRenderTarget {
|
||||
// Update PipelineDescription::kVersion if anything is changed!
|
||||
XEPACKEDSTRUCT(PipelineRenderTarget, {
|
||||
uint32_t used : 1; // 1
|
||||
ColorRenderTargetFormat format : 4; // 5
|
||||
PipelineBlendFactor src_blend : 4; // 9
|
||||
|
@ -132,12 +158,12 @@ class PipelineCache {
|
|||
PipelineBlendFactor dest_blend_alpha : 4; // 24
|
||||
BlendOp blend_op_alpha : 3; // 27
|
||||
uint32_t write_mask : 4; // 31
|
||||
};
|
||||
});
|
||||
|
||||
struct PipelineDescription {
|
||||
ID3D12RootSignature* root_signature;
|
||||
D3D12Shader* vertex_shader;
|
||||
D3D12Shader* pixel_shader;
|
||||
XEPACKEDSTRUCT(PipelineDescription, {
|
||||
uint64_t vertex_shader_hash;
|
||||
// 0 if drawing without a pixel shader.
|
||||
uint64_t pixel_shader_hash;
|
||||
|
||||
int32_t depth_bias;
|
||||
float depth_bias_slope_scaled;
|
||||
|
@ -170,19 +196,34 @@ class PipelineCache {
|
|||
CompareFunction stencil_back_func : 3; // 32
|
||||
|
||||
PipelineRenderTarget render_targets[4];
|
||||
|
||||
static constexpr uint32_t kVersion = 0x20200309;
|
||||
});
|
||||
|
||||
XEPACKEDSTRUCT(PipelineStoredDescription, {
|
||||
uint64_t description_hash;
|
||||
PipelineDescription description;
|
||||
});
|
||||
|
||||
struct PipelineRuntimeDescription {
|
||||
ID3D12RootSignature* root_signature;
|
||||
D3D12Shader* vertex_shader;
|
||||
D3D12Shader* pixel_shader;
|
||||
PipelineDescription description;
|
||||
};
|
||||
|
||||
bool TranslateShader(D3D12Shader* shader, reg::SQ_PROGRAM_CNTL cntl,
|
||||
bool tessellated, PrimitiveType primitive_type);
|
||||
bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader* shader,
|
||||
reg::SQ_PROGRAM_CNTL cntl,
|
||||
PrimitiveType patch_primitive_type);
|
||||
|
||||
bool GetCurrentStateDescription(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
|
||||
PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[5],
|
||||
PipelineDescription& description_out);
|
||||
PipelineRuntimeDescription& runtime_description_out);
|
||||
|
||||
ID3D12PipelineState* CreatePipelineState(
|
||||
const PipelineDescription& description);
|
||||
ID3D12PipelineState* CreateD3D12PipelineState(
|
||||
const PipelineRuntimeDescription& runtime_description);
|
||||
|
||||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
|
@ -200,40 +241,71 @@ class PipelineCache {
|
|||
// Xenos pixel shader provided.
|
||||
std::vector<uint8_t> depth_only_pixel_shader_;
|
||||
|
||||
struct Pipeline {
|
||||
struct PipelineState {
|
||||
// nullptr if creation has failed.
|
||||
ID3D12PipelineState* state;
|
||||
PipelineDescription description;
|
||||
PipelineRuntimeDescription description;
|
||||
};
|
||||
// All previously generated pipelines identified by hash and the description.
|
||||
std::unordered_multimap<uint64_t, Pipeline*> pipelines_;
|
||||
// All previously generated pipeline state objects identified by hash and the
|
||||
// description.
|
||||
std::unordered_multimap<uint64_t, PipelineState*> pipeline_states_;
|
||||
|
||||
// Previously used pipeline. This matches our current state settings
|
||||
// and allows us to quickly(ish) reuse the pipeline if no registers have
|
||||
// changed.
|
||||
Pipeline* current_pipeline_ = nullptr;
|
||||
// Previously used pipeline state object. This matches our current state
|
||||
// settings and allows us to quickly(ish) reuse the pipeline state if no
|
||||
// registers have changed.
|
||||
PipelineState* current_pipeline_state_ = nullptr;
|
||||
|
||||
// Pipeline creation threads.
|
||||
void CreationThread();
|
||||
// Currently open shader storage path.
|
||||
std::wstring shader_storage_root_;
|
||||
uint32_t shader_storage_title_id_ = 0;
|
||||
|
||||
// Shader storage output stream, for preload in the next emulator runs.
|
||||
FILE* shader_storage_file_ = nullptr;
|
||||
bool shader_storage_file_flush_needed_ = false;
|
||||
|
||||
// Pipeline state storage output stream, for preload in the next emulator
|
||||
// runs.
|
||||
FILE* pipeline_state_storage_file_ = nullptr;
|
||||
bool pipeline_state_storage_file_flush_needed_ = false;
|
||||
|
||||
// Thread for asynchronous writing to the storage streams.
|
||||
void StorageWriteThread();
|
||||
std::mutex storage_write_request_lock_;
|
||||
std::condition_variable storage_write_request_cond_;
|
||||
// Storage thread input is protected with storage_write_request_lock_, and the
|
||||
// thread is notified about its change via storage_write_request_cond_.
|
||||
std::deque<std::pair<const Shader*, reg::SQ_PROGRAM_CNTL>>
|
||||
storage_write_shader_queue_;
|
||||
std::deque<PipelineStoredDescription> storage_write_pipeline_state_queue_;
|
||||
bool storage_write_flush_shaders_ = false;
|
||||
bool storage_write_flush_pipeline_states_ = false;
|
||||
bool storage_write_thread_shutdown_ = false;
|
||||
std::unique_ptr<xe::threading::Thread> storage_write_thread_;
|
||||
|
||||
// Pipeline state object creation threads.
|
||||
void CreationThread(size_t thread_index);
|
||||
void CreateQueuedPipelineStatesOnProcessorThread();
|
||||
std::mutex creation_request_lock_;
|
||||
std::condition_variable creation_request_cond_;
|
||||
// Protected with creation_request_lock_, notify_one creation_request_cond_
|
||||
// when set.
|
||||
std::deque<Pipeline*> creation_queue_;
|
||||
// Number of threads that are currently creating a pipeline - incremented when
|
||||
// a pipeline is dequeued (the completion event can't be triggered before this
|
||||
// is zero). Protected with creation_request_lock_.
|
||||
uint32_t creation_threads_busy_ = 0;
|
||||
// Manual-reset event set when the last queued pipeline is created and there
|
||||
// are no more pipelines to create. This is triggered by the thread creating
|
||||
// the last pipeline.
|
||||
std::deque<PipelineState*> creation_queue_;
|
||||
// Number of threads that are currently creating a pipeline state object -
|
||||
// incremented when a pipeline state object is dequeued (the completion event
|
||||
// can't be triggered before this is zero). Protected with
|
||||
// creation_request_lock_.
|
||||
size_t creation_threads_busy_ = 0;
|
||||
// Manual-reset event set when the last queued pipeline state object is
|
||||
// created and there are no more pipeline state objects to create. This is
|
||||
// triggered by the thread creating the last pipeline state object.
|
||||
std::unique_ptr<xe::threading::Event> creation_completion_event_ = nullptr;
|
||||
// Whether setting the event on completion is queued. Protected with
|
||||
// creation_request_lock_, notify_one creation_request_cond_ when set.
|
||||
bool creation_completion_set_event_ = false;
|
||||
// Whether to shut down the creation threads as soon as possible. Protected
|
||||
// with creation_request_lock_, notify_all creation_request_cond_ when set.
|
||||
bool creation_threads_shutdown_ = false;
|
||||
// Creation threads with this index or above need to be shut down as soon as
|
||||
// possible. Protected with creation_request_lock_, notify_all
|
||||
// creation_request_cond_ when set.
|
||||
size_t creation_threads_shutdown_from_ = SIZE_MAX;
|
||||
std::vector<std::unique_ptr<xe::threading::Thread>> creation_threads_;
|
||||
};
|
||||
|
||||
|
|
|
@ -40,17 +40,15 @@ using namespace ucode;
|
|||
// Notes about operands:
|
||||
//
|
||||
// Reading and writing:
|
||||
// - Writes to 4-component registers must be masked.
|
||||
// - Reads from 4-component registers can be swizzled, or 1 component can be
|
||||
// selected.
|
||||
// - r# (temporary registers) are 4-component and can be used anywhere.
|
||||
// - v# (inputs) are 4-component and read-only.
|
||||
// - o# (outputs) are 4-component and write-only.
|
||||
// - oDepth (pixel shader depth output) is 1-component and write-only.
|
||||
// - x# (indexable temporary registers) are 4-component (though not sure what
|
||||
// happens if you dcl them as 1-component) and can be accessed either via
|
||||
// a mov load or a mov store (and those movs are counted as ArrayInstructions
|
||||
// in STAT, not as MovInstructions).
|
||||
// - x# (indexable temporary registers) are 4-component and can be accessed
|
||||
// either via a mov load or a mov store (and those movs are counted as
|
||||
// ArrayInstructions in STAT, not as MovInstructions), even though the D3D11.3
|
||||
// functional specification says x# can be used wherever r# can be used, but
|
||||
// FXC emits only mov load/store in simple tests.
|
||||
//
|
||||
// Indexing:
|
||||
// - Constant buffers use 3D indices in CBx[y][z] format, where x is the ID of
|
||||
|
|
|
@ -30,17 +30,53 @@ namespace gpu {
|
|||
//
|
||||
// IMPORTANT CONTRIBUTION NOTES:
|
||||
//
|
||||
// Not all DXBC instructions accept all kinds of operands equally!
|
||||
// Refer to Shader Model 4 and 5 Assembly on MSDN to see if the needed
|
||||
// swizzle/selection, absolute/negate modifiers and saturation are supported by
|
||||
// the instruction.
|
||||
// While DXBC may look like a flexible and high-level representation with highly
|
||||
// generalized building blocks, actually it has a lot of restrictions on operand
|
||||
// usage!
|
||||
// Check the Direct3D 11.3 Functional Specification before adding anything!
|
||||
// https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm
|
||||
// (the "7. Common Shader Internals" chapter and the documentation of the
|
||||
// specific instruction you want to use).
|
||||
// For instructions, MSDN also provides some information, but it's not as
|
||||
// detailed as the functional specification:
|
||||
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx9-graphics-reference-asm
|
||||
// Before adding anything that behaves in a way that doesn't follow patterns
|
||||
// already used in Xenia, try to write the same logic in HLSL, compile it with
|
||||
// FXC and see the resulting assembly *and preferably binary bytecode* as some
|
||||
// instructions may, for example, require selection rather than swizzling for
|
||||
// certain operands. For bytecode structure, see d3d12TokenizedProgramFormat.hpp
|
||||
// from the Windows Driver Kit.
|
||||
// Most important limitations:
|
||||
// - This is very easy to hit, looks weird at first, and also not very important
|
||||
// for modern drivers using DXILConv, but still needs to be respected for
|
||||
// safety! One instruction can't accept more than one immediate or constant
|
||||
// buffer source operand combined in total:
|
||||
// and r0.x, CB0[0][0].x, l(1)
|
||||
// and r0.x, CB0[0][0].x, CB0[0][0].y
|
||||
// are illegal, even though pretty useful. Copy one of the operands to r#.
|
||||
// - Absolute, negate and saturate are only supported by instructions that
|
||||
// explicitly support them.
|
||||
// - Component selection in the general case (ALU instructions - things like
|
||||
// resource access and flow control mostly explicitly need a specific
|
||||
// component selection mode defined in the specification of the instruction):
|
||||
// - 0-component - for operand types with no data (samplers, labels).
|
||||
// - 1-component - for scalar destination operand types, and for scalar source
|
||||
// operand types when the destination vector has 1 component masked
|
||||
// (including scalar immediates).
|
||||
// - Mask - for vector destination operand types.
|
||||
// - Swizzle - for both vector and scalar (replicated in this case) source
|
||||
// operand types, when the destination vector has 2 or more components
|
||||
// masked. Immediates in this case have XYZW swizzle.
|
||||
// - Select 1 - for vector source operand types, when the destination has 1
|
||||
// component masked or is of a scalar type.
|
||||
// - Input operands (v#) can be used only as sources, output operands (o#) can
|
||||
// be used only as destinations.
|
||||
// - The specification says that x#[] can be used wherever r# can be used,
|
||||
// however, in tests, FXC only emits load/store mov instructions for x#[]
|
||||
// (they are also counted in ArrayInstructions rather than MovInstructions in
|
||||
// STAT), so it's better to only use mov for x#[]. The specification also
|
||||
// permits using x#[] in relative addressing along with r# (as long as
|
||||
// relative addressing isn't nested), but it's probably not very safe either.
|
||||
// Don't do anything that FXC wouldn't do.
|
||||
// TODO(Triang3l): Fix all places violating these rules - currently there are
|
||||
// lots of them in Xenia!
|
||||
//
|
||||
// For bytecode structure, see d3d12TokenizedProgramFormat.hpp from the Windows
|
||||
// Driver Kit.
|
||||
//
|
||||
// Avoid using uninitialized register components - such as registers written to
|
||||
// in "if" and not in "else", but then used outside unconditionally or with a
|
||||
|
|
|
@ -13,8 +13,10 @@ DEFINE_string(trace_gpu_prefix, "scratch/gpu/",
|
|||
"Prefix path for GPU trace files.", "GPU");
|
||||
DEFINE_bool(trace_gpu_stream, false, "Trace all GPU packets.", "GPU");
|
||||
|
||||
DEFINE_string(dump_shaders, "",
|
||||
"Path to write GPU shaders to as they are compiled.", "GPU");
|
||||
DEFINE_string(
|
||||
dump_shaders, "",
|
||||
"For shader debugging, path to dump GPU shaders to as they are compiled.",
|
||||
"GPU");
|
||||
|
||||
DEFINE_bool(vsync, true, "Enable VSYNC.", "GPU");
|
||||
|
||||
|
|
|
@ -20,6 +20,12 @@
|
|||
#include "xenia/ui/graphics_provider.h"
|
||||
#include "xenia/ui/loop.h"
|
||||
|
||||
DEFINE_bool(
|
||||
store_shaders, true,
|
||||
"Store shaders persistently and load them when loading games to avoid "
|
||||
"runtime spikes and freezes when playing the game not for the first time.",
|
||||
"GPU");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
@ -269,6 +275,34 @@ void GraphicsSystem::ClearCaches() {
|
|||
[&]() { command_processor_->ClearCaches(); });
|
||||
}
|
||||
|
||||
void GraphicsSystem::InitializeShaderStorage(const std::wstring& storage_root,
|
||||
uint32_t title_id, bool blocking) {
|
||||
if (!cvars::store_shaders) {
|
||||
return;
|
||||
}
|
||||
if (blocking) {
|
||||
if (command_processor_->is_paused()) {
|
||||
// Safe to run on any thread while the command processor is paused, no
|
||||
// race condition.
|
||||
command_processor_->InitializeShaderStorage(storage_root, title_id, true);
|
||||
} else {
|
||||
xe::threading::Fence fence;
|
||||
command_processor_->CallInThread(
|
||||
[this, storage_root, title_id, &fence]() {
|
||||
command_processor_->InitializeShaderStorage(storage_root, title_id,
|
||||
true);
|
||||
fence.Signal();
|
||||
});
|
||||
fence.Wait();
|
||||
}
|
||||
} else {
|
||||
command_processor_->CallInThread([this, storage_root, title_id]() {
|
||||
command_processor_->InitializeShaderStorage(storage_root, title_id,
|
||||
false);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void GraphicsSystem::RequestFrameTrace() {
|
||||
command_processor_->RequestFrameTrace(
|
||||
xe::to_wstring(cvars::trace_gpu_prefix));
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "xenia/cpu/processor.h"
|
||||
|
@ -62,6 +63,9 @@ class GraphicsSystem {
|
|||
|
||||
virtual void ClearCaches();
|
||||
|
||||
void InitializeShaderStorage(const std::wstring& storage_root,
|
||||
uint32_t title_id, bool blocking);
|
||||
|
||||
void RequestFrameTrace();
|
||||
void BeginTracing();
|
||||
void EndTracing();
|
||||
|
|
|
@ -102,7 +102,7 @@ int TraceDump::Main(const std::vector<std::wstring>& args) {
|
|||
|
||||
bool TraceDump::Setup() {
|
||||
// Create the emulator but don't initialize so we can setup the window.
|
||||
emulator_ = std::make_unique<Emulator>(L"", L"");
|
||||
emulator_ = std::make_unique<Emulator>(L"", L"", L"");
|
||||
X_STATUS result = emulator_->Setup(
|
||||
nullptr, nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr);
|
||||
if (XFAILED(result)) {
|
||||
|
|
|
@ -122,7 +122,7 @@ bool TraceViewer::Setup() {
|
|||
window_->Resize(1920, 1200);
|
||||
|
||||
// Create the emulator but don't initialize so we can setup the window.
|
||||
emulator_ = std::make_unique<Emulator>(L"", L"");
|
||||
emulator_ = std::make_unique<Emulator>(L"", L"", L"");
|
||||
X_STATUS result = emulator_->Setup(
|
||||
window_.get(), nullptr, [this]() { return CreateGraphicsSystem(); },
|
||||
nullptr);
|
||||
|
|
|
@ -68,13 +68,20 @@ dword_result_t XNotifyGetNext(dword_t handle, dword_t match_id,
|
|||
dequeued = listener->DequeueNotification(&id, ¶m);
|
||||
}
|
||||
|
||||
// param_ptr may be null - Ghost Recon Advanced Warfighter 2 Demo explicitly
|
||||
// passes nullptr in the code.
|
||||
// https://github.com/xenia-project/xenia/pull/1577
|
||||
if (dequeued) {
|
||||
*id_ptr = id;
|
||||
if (param_ptr) {
|
||||
*param_ptr = param;
|
||||
}
|
||||
} else {
|
||||
*id_ptr = 0;
|
||||
if (param_ptr) {
|
||||
*param_ptr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return dequeued ? 1 : 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue