From 87aecfa1b86ad2304163475cc207deab3a7e82eb Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 24 Jul 2018 14:57:21 +0300 Subject: [PATCH] [D3D12] Shaders (not all compiling yet) --- .../gpu/d3d12/d3d12_command_processor.cc | 116 +++++++++- src/xenia/gpu/d3d12/d3d12_command_processor.h | 25 ++- src/xenia/gpu/d3d12/d3d12_shader.cc | 117 ++++++++++ src/xenia/gpu/d3d12/d3d12_shader.h | 39 ++++ src/xenia/gpu/d3d12/pipeline_cache.cc | 199 ++++++++++++++++++ src/xenia/gpu/d3d12/pipeline_cache.h | 93 ++++++++ src/xenia/gpu/hlsl_shader_translator.cc | 125 +++++++---- src/xenia/ui/d3d12/d3d12_api.h | 1 + src/xenia/ui/d3d12/d3d12_context.cc | 13 +- src/xenia/ui/d3d12/d3d12_context.h | 2 + 10 files changed, 671 insertions(+), 59 deletions(-) create mode 100644 src/xenia/gpu/d3d12/d3d12_shader.cc create mode 100644 src/xenia/gpu/d3d12/d3d12_shader.h create mode 100644 src/xenia/gpu/d3d12/pipeline_cache.cc create mode 100644 src/xenia/gpu/d3d12/pipeline_cache.h diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 04fc5415f..cfc894a36 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -9,6 +9,12 @@ #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/d3d12/d3d12_shader.h" +#include "xenia/gpu/xenos.h" + namespace xe { namespace gpu { namespace d3d12 { @@ -18,33 +24,133 @@ D3D12CommandProcessor::D3D12CommandProcessor( : CommandProcessor(graphics_system, kernel_state) {} D3D12CommandProcessor::~D3D12CommandProcessor() = default; +void D3D12CommandProcessor::ClearCaches() { + CommandProcessor::ClearCaches(); + cache_clear_requested_ = true; +} + bool D3D12CommandProcessor::SetupContext() { - return CommandProcessor::SetupContext(); + if (!CommandProcessor::SetupContext()) { + XELOGE("Unable to initialize base command processor context"); + return false; + } + + auto context = GetD3D12Context(); + + pipeline_cache_ = std::make_unique(register_file_, context); + + return true; } void D3D12CommandProcessor::ShutdownContext() { - return CommandProcessor::ShutdownContext(); + auto context = GetD3D12Context(); + context->AwaitAllFramesCompletion(); + + pipeline_cache_.reset(); + + CommandProcessor::ShutdownContext(); } void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, - uint32_t frontbuffer_height) {} + uint32_t frontbuffer_height) { + SCOPE_profile_cpu_f("gpu"); + + if (current_queue_frame_ != UINT32_MAX) { + EndFrame(); + } + + if (cache_clear_requested_) { + cache_clear_requested_ = false; + GetD3D12Context()->AwaitAllFramesCompletion(); + pipeline_cache_->ClearCache(); + } +} Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - return nullptr; + return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, + dword_count); } -bool D3D12CommandProcessor::IssueDraw(PrimitiveType prim_type, +bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) { + auto& regs = *register_file_; + +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + auto enable_mode = static_cast( + regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + if (enable_mode == xenos::ModeControl::kIgnore) { + // Ignored. + return true; + } else if (enable_mode == xenos::ModeControl::kCopy) { + // Special copy handling. + return IssueCopy(); + } + + if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { + // Doesn't actually draw. + return true; + } + + // Shaders will have already been defined by previous loads. + // We need them to do just about anything so validate here. + auto vertex_shader = static_cast(active_vertex_shader()); + auto pixel_shader = static_cast(active_pixel_shader()); + if (!vertex_shader) { + // Always need a vertex shader. + return false; + } + // Depth-only mode doesn't need a pixel shader (we'll use a fake one). + if (enable_mode == xenos::ModeControl::kDepth) { + // Use a dummy pixel shader when required. + pixel_shader = nullptr; + } else if (!pixel_shader) { + // Need a pixel shader in normal color mode. + return true; + } + + bool full_update = false; + if (current_queue_frame_ == UINT32_MAX) { + BeginFrame(); + full_update = true; + } + + auto pipeline_status = pipeline_cache_->ConfigurePipeline( + vertex_shader, pixel_shader, primitive_type); + if (pipeline_status == PipelineCache::UpdateStatus::kError) { + return false; + } + return true; } bool D3D12CommandProcessor::IssueCopy() { return true; } +void D3D12CommandProcessor::BeginFrame() { + assert_true(current_queue_frame_ == UINT32_MAX); + auto context = GetD3D12Context(); + + context->BeginSwap(); + + current_queue_frame_ = context->GetCurrentQueueFrame(); +} + +void D3D12CommandProcessor::EndFrame() { + assert_true(current_queue_frame_ != UINT32_MAX); + auto context = GetD3D12Context(); + + context->EndSwap(); + + current_queue_frame_ = UINT32_MAX; +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 22e843cf8..8f7856655 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -10,10 +10,14 @@ #ifndef XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_ #define XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_ +#include + #include "xenia/gpu/command_processor.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" +#include "xenia/gpu/d3d12/pipeline_cache.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" +#include "xenia/ui/d3d12/d3d12_context.h" namespace xe { namespace gpu { @@ -25,7 +29,14 @@ class D3D12CommandProcessor : public CommandProcessor { kernel::KernelState* kernel_state); ~D3D12CommandProcessor(); - private: + void ClearCaches() override; + + // Needed by everything that owns transient objects. + xe::ui::d3d12::D3D12Context* GetD3D12Context() const { + return static_cast(context_.get()); + } + + protected: bool SetupContext() override; void ShutdownContext() override; @@ -36,9 +47,19 @@ class D3D12CommandProcessor : public CommandProcessor { const uint32_t* host_address, uint32_t dword_count) override; - bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, + bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info) override; bool IssueCopy() override; + + private: + void BeginFrame(); + void EndFrame(); + + bool cache_clear_requested_ = false; + + std::unique_ptr pipeline_cache_; + + uint32_t current_queue_frame_ = UINT32_MAX; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/d3d12_shader.cc b/src/xenia/gpu/d3d12/d3d12_shader.cc new file mode 100644 index 000000000..fa9edf390 --- /dev/null +++ b/src/xenia/gpu/d3d12/d3d12_shader.cc @@ -0,0 +1,117 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/d3d12/d3d12_shader.h" + +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/gpu/gpu_flags.h" + +DEFINE_bool(d3d12_shader_disasm, true, + "Disassemble translated shaders after compilation."); + +namespace xe { +namespace gpu { +namespace d3d12 { + +D3D12Shader::D3D12Shader(ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count) + : Shader(shader_type, data_hash, dword_ptr, dword_count) {} + +D3D12Shader::~D3D12Shader() { + if (blob_ != nullptr) { + blob_->Release(); + } +} + +bool D3D12Shader::Prepare() { + assert_null(blob_); + assert_true(is_valid()); + + const char* target; + switch (shader_type_) { + case ShaderType::kVertex: + target = "vs_5_1"; + break; + case ShaderType::kPixel: + target = "ps_5_1"; + break; + default: + assert_unhandled_case(shader_type_); + return false; + } + + // TODO(Triang3l): Choose the appropriate optimization level based on compile + // time and how invariance is handled in vertex shaders. + ID3DBlob* error_blob = nullptr; + bool compiled = + SUCCEEDED(D3DCompile(translated_binary_.data(), translated_binary_.size(), + nullptr, nullptr, nullptr, "main", target, + D3DCOMPILE_OPTIMIZATION_LEVEL0, 0, &blob_, + &error_blob)); + + if (!compiled) { + XELOGE("%s shader %.16llX compilation failed!", target, ucode_data_hash()); + } + if (error_blob != nullptr) { + if (compiled) { + XELOGW("%s shader %.16llX compiled with warnings!", target, + ucode_data_hash()); + XELOGW("%s", reinterpret_cast(error_blob->GetBufferPointer())); + XELOGW("HLSL source:"); + // The buffer isn't terminated. + translated_binary_.push_back(0); + XELOGW("%s", reinterpret_cast(translated_binary_.data())); + translated_binary_.pop_back(); + } else { + XELOGE("%s", reinterpret_cast(error_blob->GetBufferPointer())); + XELOGE("HLSL source:"); + translated_binary_.push_back(0); + XELOGE("%s", reinterpret_cast(translated_binary_.data())); + translated_binary_.pop_back(); + } + error_blob->Release(); + } + + if (!compiled) { + return false; + } + + if (FLAGS_d3d12_shader_disasm) { + ID3DBlob* disassembly_blob; + if (SUCCEEDED(D3DDisassemble(blob_->GetBufferPointer(), + blob_->GetBufferSize(), 0, nullptr, + &disassembly_blob))) { + host_disassembly_ = + reinterpret_cast(disassembly_blob->GetBufferPointer()); + disassembly_blob->Release(); + } else { + XELOGE("Failed to disassemble DXBC for %s shader %.16llX", target, + ucode_data_hash()); + } + } + + return true; +} + +const uint8_t* D3D12Shader::GetDXBC() const { + assert_not_null(blob_); + return reinterpret_cast(blob_->GetBufferPointer()); +} + +size_t D3D12Shader::GetDXBCSize() const { + assert_not_null(blob_); + return blob_->GetBufferSize(); +} + +} // namespace d3d12 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h new file mode 100644 index 000000000..f7cca96f0 --- /dev/null +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -0,0 +1,39 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_ +#define XENIA_GPU_D3D12_D3D12_SHADER_H_ + +#include "xenia/gpu/shader.h" +#include "xenia/ui/d3d12/d3d12_api.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +class D3D12Shader : public Shader { + public: + D3D12Shader(ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count); + ~D3D12Shader() override; + + bool Prepare(); + + const uint8_t* GetDXBC() const; + size_t GetDXBCSize() const; + + private: + ID3DBlob* blob_ = nullptr; +}; + +} // namespace d3d12 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_D3D12_D3D12_SHADER_H_ diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc new file mode 100644 index 000000000..3dfa3090a --- /dev/null +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -0,0 +1,199 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/d3d12/pipeline_cache.h" + +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/hlsl_shader_translator.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +PipelineCache::PipelineCache(RegisterFile* register_file, + ui::d3d12::D3D12Context* context) + : register_file_(register_file), context_(context) { + shader_translator_.reset(new HlslShaderTranslator()); +} + +PipelineCache::~PipelineCache() { Shutdown(); } + +void PipelineCache::Shutdown() { + ClearCache(); +} + +D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type, + uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) { + // Hash the input memory and lookup the shader. + uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); + auto it = shader_map_.find(data_hash); + if (it != shader_map_.end()) { + // Shader has been previously loaded. + return it->second; + } + + // Always create the shader and stash it away. + // We need to track it even if it fails translation so we know not to try + // again. + D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address, + dword_count); + shader_map_.insert({data_hash, shader}); + + return shader; +} + +PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline( + D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + PrimitiveType primitive_type) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + return UpdateState(vertex_shader, pixel_shader, primitive_type); +} + +void PipelineCache::ClearCache() { + // Destroy all shaders. + for (auto it : shader_map_) { + delete it.second; + } + shader_map_.clear(); +} + +bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { + float value = register_file_->values[register_name].f32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +bool PipelineCache::TranslateShader(D3D12Shader* shader, + xenos::xe_gpu_program_cntl_t cntl) { + // Perform translation. + // If this fails the shader will be marked as invalid and ignored later. + if (!shader_translator_->Translate(shader, cntl)) { + XELOGE("Shader translation failed; marking shader as ignored"); + return false; + } + + // Prepare the shader for use (creates the Shader Model bytecode). + // It could still fail at this point. + if (!shader->Prepare()) { + XELOGE("Shader preparation failed; marking shader as ignored"); + return false; + } + + if (shader->is_valid()) { + XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n", + shader->type() == ShaderType::kVertex ? "vertex" : "pixel", + shader->ucode_dword_count() * 4, shader->ucode_data_hash(), + shader->ucode_disassembly().c_str()); + } + + // Dump shader files if desired. + if (!FLAGS_dump_shaders.empty()) { + shader->Dump(FLAGS_dump_shaders, "d3d12"); + } + + return shader->is_valid(); +} + +PipelineCache::UpdateStatus PipelineCache::UpdateState( + D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + PrimitiveType primitive_type) { + bool mismatch = false; + + // Reset hash so we can build it up. + XXH64_reset(&hash_state_, 0); + +#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + XELOGE(error_message); \ + return status; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } + + UpdateStatus status; + status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages"); + +#undef CHECK_UPDATE_STATUS + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( + D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + PrimitiveType primitive_type) { + auto& regs = update_shader_stages_regs_; + + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ. + assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == + 0x000FF000 || + register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == + 0x000FF100 || + register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= regs.vertex_shader != vertex_shader; + dirty |= regs.pixel_shader != pixel_shader; + dirty |= regs.primitive_type != primitive_type; + regs.vertex_shader = vertex_shader; + regs.pixel_shader = pixel_shader; + regs.primitive_type = primitive_type; + XXH64_update(&hash_state_, ®s, sizeof(regs)); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + xenos::xe_gpu_program_cntl_t sq_program_cntl; + sq_program_cntl.dword_0 = regs.sq_program_cntl; + + if (!vertex_shader->is_translated() && + !TranslateShader(vertex_shader, sq_program_cntl)) { + XELOGE("Failed to translate the vertex shader!"); + return UpdateStatus::kError; + } + + if (pixel_shader && !pixel_shader->is_translated() && + !TranslateShader(pixel_shader, sq_program_cntl)) { + XELOGE("Failed to translate the pixel shader!"); + return UpdateStatus::kError; + } + + return UpdateStatus::kMismatch; +} + +} // namespace d3d12 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h new file mode 100644 index 000000000..9fe2b7dce --- /dev/null +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -0,0 +1,93 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D12_PIPELINE_CACHE_H_ +#define XENIA_GPU_D3D12_PIPELINE_CACHE_H_ + +#include + +#include "third_party/xxhash/xxhash.h" + +#include "xenia/gpu/d3d12/d3d12_shader.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/shader_translator.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/d3d12/d3d12_context.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +class PipelineCache { + public: + enum class UpdateStatus { + kCompatible, + kMismatch, + kError, + }; + + PipelineCache(RegisterFile* register_file, ui::d3d12::D3D12Context* context); + ~PipelineCache(); + + void Shutdown(); + + D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address, + const uint32_t* host_address, uint32_t dword_count); + + UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader, + D3D12Shader* pixel_shader, + PrimitiveType primitive_type); + + void ClearCache(); + + private: + bool SetShadowRegister(uint32_t* dest, uint32_t register_name); + bool SetShadowRegister(float* dest, uint32_t register_name); + + bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl); + + UpdateStatus UpdateState(D3D12Shader* vertex_shader, + D3D12Shader* pixel_shader, + PrimitiveType primitive_type); + + UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader, + D3D12Shader* pixel_shader, + PrimitiveType primitive_type); + + RegisterFile* register_file_ = nullptr; + ui::d3d12::D3D12Context* context_ = nullptr; + + // Reusable shader translator. + std::unique_ptr shader_translator_ = nullptr; + // All loaded shaders mapped by their guest hash key. + std::unordered_map shader_map_; + + // Hash state used to incrementally produce pipeline hashes during update. + // By the time the full update pass has run the hash will represent the + // current state in a way that can uniquely identify the produced + // ID3D12PipelineState. + XXH64_state_t hash_state_; + + struct UpdateShaderStagesRegisters { + PrimitiveType primitive_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t sq_program_cntl; + D3D12Shader* vertex_shader; + D3D12Shader* pixel_shader; + + UpdateShaderStagesRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_shader_stages_regs_; +}; + +} // namespace d3d12 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_D3D12_PIPELINE_CACHE_H_ diff --git a/src/xenia/gpu/hlsl_shader_translator.cc b/src/xenia/gpu/hlsl_shader_translator.cc index 4bc377340..c75da2adf 100644 --- a/src/xenia/gpu/hlsl_shader_translator.cc +++ b/src/xenia/gpu/hlsl_shader_translator.cc @@ -76,13 +76,22 @@ void HlslShaderTranslator::StartTranslation() { Indent(); // Switch level (3). Indent(); - EmitSourceDepth("case 0:\n"); + EmitSourceDepth("case 0u:\n"); } std::vector HlslShaderTranslator::CompleteTranslation() { // Add the declarations, the prologue and the epilogue knowing what is needed. StringBuffer source; + // Common preprocessor statements. + // 3557 is the "loop only executes for 1 iteration" warning caused by the + // control flow loop design. + source.Append( + "#pragma warning(disable : 3557)\n" + "\n" + "#define XE_FLT_MAX 3.402823466e+38\n" + "\n"); + // Cubemap sampling. XeCubeTo2D emulates the cube vector ALU instruction that // gives (t, s, 2 * major axis, face index), XeCubeTo3D reverts its effects // in tfetchCube because sampling a cubemap as an array doesn't work properly @@ -132,7 +141,6 @@ std::vector HlslShaderTranslator::CompleteTranslation() { "}\n" "\n" "float3 XeCubeTo3D(float3 xe_cube_2d) {\n" - "{\n" " xe_cube_2d.xy = (xe_cube_2d.xy * 2.0) + 1.0;\n" " float3 xe_cube_3d;\n" " uint xe_cube_face_index = uint(xe_cube_2d.z);\n" @@ -157,24 +165,22 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // Common declarations. source.Append( - "#define XE_FLT_MAX 3.402823466e+38\n" - "\n" "cbuffer xe_system_constants : register(b0) {\n" " float2 xe_viewport_inv_scale;\n" " uint xe_vertex_index_endian;\n" " uint xe_textures_are_3d;\n" - "}\n" + "};\n" "\n" "struct XeFloatConstantPage {\n" " float4 c[16];\n" - "}\n" + "};\n" "ConstantBuffer " "xe_float_constants[16] : register(b1);\n" "\n" "cbuffer xe_loop_bool_constants : register(b17) {\n" " uint xe_bool_constants[8];\n" " uint xe_loop_constants[32];\n" - "}\n" + "};\n" "\n"); if (is_vertex_shader()) { @@ -184,15 +190,17 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // 11 for 16-in-32. This means we can check bits 0 ^ 1 to see if we need to // do a 8-in-16 swap, and bit 1 to see if a 16-in-32 swap is needed. // Vertex element is a temporary integer value for fetches. + // -1 point size means the geometry shader will use the global setting by + // default. source.AppendFormat( "cbuffer xe_vertex_fetch_constants : register(b18) {\n" " uint2 xe_vertex_fetch[96];\n" - "}\n" + "};\n" "\n" "ByteAddressBuffer xe_virtual_memory : register(t0, space1);\n" "\n" - "#define XE_SWAP_OVERLOAD(XeSwapType) \\\n" - "XeSwapType XeSwap(XeSwapType v, uint endian) { \\\n" + "#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \\\n" + "XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \\\n" " [flatten] if (((endian ^ (endian >> 1u)) & 1u) != 0u) { \\\n" " v = ((v & 0x00FF00FFu) << 8u) | ((v & 0xFF00FF00u) >> 8u); \\\n" " } \\\n" @@ -201,25 +209,30 @@ std::vector HlslShaderTranslator::CompleteTranslation() { " } \\\n" " return v; \\\n" "}\n" - "XE_SWAP_OVERLOAD(uint)\n" - "XE_SWAP_OVERLOAD(uint2)\n" - "XE_SWAP_OVERLOAD(uint3)\n" - "XE_SWAP_OVERLOAD(uint4)\n" + "XE_BYTE_SWAP_OVERLOAD(uint)\n" + "XE_BYTE_SWAP_OVERLOAD(uint2)\n" + "XE_BYTE_SWAP_OVERLOAD(uint3)\n" + "XE_BYTE_SWAP_OVERLOAD(uint4)\n" "\n" "struct XeVertexShaderOutput {\n" " float4 position : SV_Position;\n" " float4 interpolators[%u] : TEXCOORD;\n" - " float4 point_size : PSIZE;\n" - "}\n" + " float point_size : PSIZE;\n" + "};\n" "\n" "XeVertexShaderOutput main(uint xe_vertex_index_be : SV_VertexID) {\n" " float4 xe_r[%u];\n" " uint xe_vertex_index =\n" - " XeSwap(xe_vertex_index_be, xe_vertex_index_endian);\n" + " XeByteSwap(xe_vertex_index_be, xe_vertex_index_endian);\n" " uint4 xe_vertex_element;\n" " xe_r[0].r = float(xe_vertex_index);\n" - " XeVertexShaderOutput xe_output;\n", + " XeVertexShaderOutput xe_output;\n" + " xe_output.position = float4(0.0, 0.0, 0.0, 1.0);\n" + " xe_output.point_size = -1.0;\n", kMaxInterpolators, register_count()); + for (uint32_t i = 0; i < kMaxInterpolators; ++i) { + source.AppendFormat(" xe_output.interpolators[%u] = (0.0).xxxx;\n", i); + } // TODO(Triang3l): Reset interpolators to zero if really needed. } else if (is_pixel_shader()) { // Pixel shader inputs, outputs and prologue. @@ -229,18 +242,26 @@ std::vector HlslShaderTranslator::CompleteTranslation() { "struct XePixelShaderInput {\n" " float4 position : SV_Position;\n" " float4 interpolators[%u] : TEXCOORD;\n" - "}\n" + "};\n" "\n" "struct XePixelShaderOutput {\n" " float4 colors[4] : SV_Target;\n" "%s" - "}\n" + "};\n" "\n" "XePixelShaderOutput main(XePixelShaderInput xe_input) {\n" " float4 xe_r[%u];\n" - " XePixelShaderOutput xe_output;\n", + " XePixelShaderOutput xe_output;\n" + " xe_output.colors[0] = (0.0).xxxx;\n" + " xe_output.colors[1] = (0.0).xxxx;\n" + " xe_output.colors[2] = (0.0).xxxx;\n" + " xe_output.colors[3] = (0.0).xxxx;\n", kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "", register_count()); + // Initialize SV_Depth if using it. + if (writes_depth_) { + source.Append(" xe_output.depth = xe_input.position.z;\n"); + } // Copy interpolants to the first registers. uint32_t interpolator_register_count = std::min(register_count(), kMaxInterpolators); @@ -262,9 +283,9 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // Sources for instructions. " float4 xe_src0, xe_src1, xe_src2;\n" // Previous vector result (used as a scratch). - " float4 xe_pv;\n" + " float4 xe_pv = float4(0.0, 0.0, 0.0, 0.0);\n" // Previous scalar result (used for RETAIN_PREV). - " float xe_ps;\n" + " float xe_ps = 0.0;\n" // Predicate temp, clause-local. Initially false like cf_exec_pred_cond_. " bool xe_p0 = false;\n" // Address register when using absolute addressing. @@ -284,9 +305,15 @@ std::vector HlslShaderTranslator::CompleteTranslation() { source.Append(source_inner_.GetString()); // Epilogue. + if (!cf_wrote_pc_) { + source.Append( + " xe_pc = 0xFFFFu;\n" + " break;\n"); + } source.Append( " default:\n" - " pc = 0xFFFFu;\n" + " xe_pc = 0xFFFFu;\n" + " break;\n" " }\n" " } while (xe_pc != 0xFFFFu);\n"); // TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma. @@ -300,7 +327,11 @@ std::vector HlslShaderTranslator::CompleteTranslation() { void HlslShaderTranslator::ProcessLabel(uint32_t cf_index) { // 0 is always added in the beginning. if (cf_index != 0) { - EmitSourceDepth("case %u:\n", cf_index); + if (!cf_wrote_pc_) { + EmitSourceDepth("xe_pc = %uu;\n", cf_index); + EmitSourceDepth("break;"); + } + EmitSourceDepth("case %uu:\n", cf_index); } } @@ -311,14 +342,12 @@ void HlslShaderTranslator::ProcessControlFlowNopInstruction(uint32_t cf_index) { void HlslShaderTranslator::ProcessControlFlowInstructionBegin( uint32_t cf_index) { cf_wrote_pc_ = false; - Indent(); } void HlslShaderTranslator::ProcessControlFlowInstructionEnd(uint32_t cf_index) { if (!cf_wrote_pc_) { EmitSourceDepth("// Falling through to L%u\n", cf_index + 1); } - Unindent(); } void HlslShaderTranslator::ProcessExecInstructionBegin( @@ -364,21 +393,23 @@ void HlslShaderTranslator::ProcessLoopStartInstruction( // Setup counter. EmitSourceDepth("xe_loop_count.yzw = xe_loop_count.xyz;\n"); - EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u] & 0xFFu;\n"); + EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u] & 0xFFu;\n", + instr.loop_constant_index); // Setup relative indexing. EmitSourceDepth("xe_aL = xe_aL.xxyz;\n"); if (!instr.is_repeat) { // Push new loop starting index if not reusing the current one. - EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u] >> 8u) & 0xFFu);\n"); + EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u] >> 8u) & 0xFFu);\n", + instr.loop_constant_index); } // Quick skip loop if zero count. EmitSourceDepth("if (xe_loop_count.x == 0u) {\n"); - EmitSourceDepth(" xe_pc = %u; // Skip loop to L%u\n", + EmitSourceDepth(" xe_pc = %uu; // Skip loop to L%u\n", instr.loop_skip_address, instr.loop_skip_address); EmitSourceDepth("} else {\n"); - EmitSourceDepth(" xe_pc = %u; // Fallthrough to loop body L%u\n", + EmitSourceDepth(" xe_pc = %uu; // Fallthrough to loop body L%u\n", instr.dword_index + 1, instr.dword_index + 1); EmitSourceDepth("}\n"); EmitSourceDepth("break;\n"); @@ -406,7 +437,7 @@ void HlslShaderTranslator::ProcessLoopEndInstruction( EmitSourceDepth("xe_loop_count.w = 0u;\n"); EmitSourceDepth("xe_aL.xyz = xe_aL.yzw;\n"); EmitSourceDepth("xe_aL.w = 0;\n"); - EmitSourceDepth("xe_pc = %u; // Exit loop to L%u\n", instr.dword_index + 1, + EmitSourceDepth("xe_pc = %uu; // Exit loop to L%u\n", instr.dword_index + 1, instr.dword_index + 1); Unindent(); @@ -416,7 +447,7 @@ void HlslShaderTranslator::ProcessLoopEndInstruction( // Still looping. Adjust index and jump back to body. EmitSourceDepth("xe_aL.x += int(xe_loop_constants[%u] << 8u) >> 24;\n", instr.loop_constant_index); - EmitSourceDepth("xe_pc = %u; // Loop back to body L%u\n", + EmitSourceDepth("xe_pc = %uu; // Loop back to body L%u\n", instr.loop_body_address, instr.loop_body_address); Unindent(); @@ -465,7 +496,7 @@ void HlslShaderTranslator::ProcessJumpInstruction( } Indent(); - EmitSourceDepth("xe_pc = %u; // L%u\n", instr.target_address, + EmitSourceDepth("xe_pc = %uu; // L%u\n", instr.target_address, instr.target_address); EmitSourceDepth("break;\n"); @@ -473,7 +504,7 @@ void HlslShaderTranslator::ProcessJumpInstruction( if (needs_fallthrough) { uint32_t next_address = instr.dword_index + 1; EmitSourceDepth("} else {\n"); - EmitSourceDepth(" xe_pc = %u; // Fallthrough to L%u\n", next_address, + EmitSourceDepth(" xe_pc = %uu; // Fallthrough to L%u\n", next_address, next_address); } EmitSourceDepth("}\n"); @@ -687,9 +718,6 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result, break; } } else { - if (result.is_clamped) { - EmitSource("saturate("); - } bool has_const_writes = false; uint32_t component_write_count = 0; EmitSource("."); @@ -704,6 +732,9 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result, } } EmitSource(" = "); + if (result.is_clamped) { + EmitSource("saturate("); + } if (has_const_writes) { if (component_write_count > 1) { EmitSource("float%u(", component_write_count); @@ -804,12 +835,18 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction( load_function_suffix = ""; break; } - EmitSourceDepth("xe_vertex_element%s = XeSwap(xe_virtual_memory.Load%s(\n", - load_swizzle, load_function_suffix); - EmitSourceDepth(" (xe_vertex_fetch[%u].x & 0x1FFFFFFCu) + " - "uint(xe_src0.x) * %u + %u),\n", - instr.operands[1].storage_index, instr.attributes.stride * 4, - instr.attributes.offset * 4); + EmitSourceDepth( + "xe_vertex_element%s = XeByteSwap(xe_virtual_memory.Load%s(\n", + load_swizzle, load_function_suffix); + EmitSourceDepth(" (xe_vertex_fetch[%uu].x & 0x1FFFFFFCu)", + instr.operands[1].storage_index); + if (instr.attributes.stride != 0) { + EmitSource(" + uint(xe_src0.x) * %uu", instr.attributes.stride * 4); + } + if (instr.attributes.offset != 0) { + EmitSource(" + %uu", instr.attributes.offset * 4); + } + EmitSource("),\n"); EmitSourceDepth(" xe_vertex_fetch[%u].y);\n", instr.operands[1].storage_index); diff --git a/src/xenia/ui/d3d12/d3d12_api.h b/src/xenia/ui/d3d12/d3d12_api.h index 3add074ad..53d0d3b8a 100644 --- a/src/xenia/ui/d3d12/d3d12_api.h +++ b/src/xenia/ui/d3d12/d3d12_api.h @@ -15,6 +15,7 @@ #include #include +#include #define XELOGD3D XELOGI diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index 4feefa00c..7fd78acbf 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -9,18 +9,13 @@ #include "xenia/ui/d3d12/d3d12_context.h" -#include - #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/gpu/gpu_flags.h" #include "xenia/ui/d3d12/d3d12_immediate_drawer.h" #include "xenia/ui/d3d12/d3d12_provider.h" #include "xenia/ui/window.h" -DEFINE_int32(d3d12_sync_interval, 1, - "Vertical synchronization interval. 0 to disable vertical sync, " - "1 to enable it, 2/3/4 to sync every 2/3/4 vertical blanks."); - namespace xe { namespace ui { namespace d3d12 { @@ -284,8 +279,7 @@ void D3D12Context::EndSwap() { graphics_command_list->ResourceBarrier(1, &barrier); command_list->Execute(); // Present and check if the context was lost. - HRESULT result = - swap_chain_->Present(xe::clamp(FLAGS_d3d12_sync_interval, 0, 4), 0); + HRESULT result = swap_chain_->Present(FLAGS_vsync ? 1 : 0, 0); if (result == DXGI_ERROR_DEVICE_RESET || result == DXGI_ERROR_DEVICE_REMOVED) { context_lost_ = true; @@ -311,6 +305,9 @@ std::unique_ptr D3D12Context::Capture() { void D3D12Context::AwaitAllFramesCompletion() { // Await the last frame since previous frames must be completed before it. + if (context_lost_) { + return; + } uint32_t await_frame = current_queue_frame_ + (kQueuedFrames - 1); if (await_frame >= kQueuedFrames) { await_frame -= kQueuedFrames; diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index ff48c8cfd..e64cff97e 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -18,6 +18,8 @@ #include "xenia/ui/d3d12/d3d12_provider.h" #include "xenia/ui/graphics_context.h" +#define FINE_GRAINED_DRAW_SCOPES 1 + namespace xe { namespace ui { namespace d3d12 {