diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index d73937955..6a697edb9 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -41,7 +41,10 @@ PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, : command_processor_(command_processor), register_file_(register_file), edram_rov_used_(edram_rov_used) { - shader_translator_ = std::make_unique(edram_rov_used_); + auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); + + shader_translator_ = std::make_unique( + provider->GetAdapterVendorID(), edram_rov_used_); if (edram_rov_used_) { depth_only_pixel_shader_ = diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 56e093644..496210bdb 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -33,7 +33,9 @@ DEFINE_bool(dxbc_switch, true, "on may improve stability, though this heavily depends on the " "driver - on AMD, it's recommended to have this set to true, as " "Halo 3 appears to crash when if is used for flow control " - "(possibly the shader compiler tries to flatten them)."); + "(possibly the shader compiler tries to flatten them). On Intel " + "HD Graphics, this is ignored because of a crash with the switch " + "instruction."); DEFINE_bool(dxbc_source_map, false, "Disassemble Xenos instructions as comments in the resulting DXBC " "for debugging."); @@ -85,8 +87,9 @@ constexpr uint32_t constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated; constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone; -DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rov_used) - : edram_rov_used_(edram_rov_used) { +DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id, + bool edram_rov_used) + : vendor_id_(vendor_id), edram_rov_used_(edram_rov_used) { // Don't allocate again and again for the first shader. shader_code_.reserve(8192); shader_object_.reserve(16384); @@ -519,6 +522,11 @@ void DxbcShaderTranslator::Reset() { std::memset(&stat_, 0, sizeof(stat_)); } +bool DxbcShaderTranslator::UseSwitchForControlFlow() const { + // Xenia crashes on Intel HD Graphics 4000 with switch. + return FLAGS_dxbc_switch && vendor_id_ != 0x8086; +} + uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) { uint32_t register_index = system_temp_count_current_; if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) { @@ -1208,7 +1216,7 @@ void DxbcShaderTranslator::StartTranslation() { ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; // Switch and the first label (pc == 0). - if (FLAGS_dxbc_switch) { + if (UseSwitchForControlFlow()) { shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( @@ -6427,7 +6435,7 @@ void DxbcShaderTranslator::CompleteShaderCode() { // closing upper-level flow control blocks. CloseExecConditionals(); // Close the last label and the switch. - if (FLAGS_dxbc_switch) { + if (UseSwitchForControlFlow()) { shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); @@ -7982,7 +7990,7 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) { // execs across labels. CloseExecConditionals(); - if (FLAGS_dxbc_switch) { + if (UseSwitchForControlFlow()) { // Fallthrough to the label from the previous one on the next iteration if // no `continue` was done. Can't simply fallthrough because in DXBC, a // non-empty switch case must end with a break. @@ -8067,7 +8075,7 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd( if (instr.is_end) { // Break out of the main loop. CloseInstructionPredication(); - if (FLAGS_dxbc_switch) { + if (UseSwitchForControlFlow()) { // Write an invalid value to pc. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index a30869365..8a9ca494d 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -24,7 +24,7 @@ namespace gpu { // Generates shader model 5_1 byte code (for Direct3D 12). class DxbcShaderTranslator : public ShaderTranslator { public: - DxbcShaderTranslator(bool edram_rov_used); + DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used); ~DxbcShaderTranslator() override; // Constant buffer bindings in space 0. @@ -758,6 +758,9 @@ class DxbcShaderTranslator : public ShaderTranslator { return is_depth_only_pixel_shader_ || is_pixel_shader(); } + // Whether to use switch-case rather than if (pc >= label) for control flow. + bool UseSwitchForControlFlow() const; + // Allocates a new r# register for internal use and returns its index. uint32_t PushSystemTemp(bool zero = false); // Frees the last allocated internal r# registers for later reuse. @@ -994,6 +997,9 @@ class DxbcShaderTranslator : public ShaderTranslator { // Buffer for instruction disassembly comments. StringBuffer instruction_disassembly_buffer_; + // Vendor ID of the GPU manufacturer, for toggling unsupported features. + uint32_t vendor_id_; + // Whether the output merger should be emulated in pixel shaders. bool edram_rov_used_; diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index 2ca7e39f1..11efb4568 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -103,7 +103,7 @@ int shader_compiler_main(const std::vector& args) { GlslShaderTranslator::Dialect::kGL45); } else if (FLAGS_shader_output_type == "dxbc") { translator = - std::make_unique(FLAGS_shader_output_dxbc_rov); + std::make_unique(0, FLAGS_shader_output_dxbc_rov); } else { translator = std::make_unique(); } diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 82a0de913..b51fd6ad0 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -11,6 +11,9 @@ #include +#include +#include + #include "xenia/base/logging.h" #include "xenia/ui/d3d12/d3d12_context.h" @@ -125,8 +128,7 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() { return InitializationResult::kDeviceInitializationFailed; } - // Choose the adapter and create a device with required features. - // TODO(Triang3l): Log adapter info (contains a wide string). + // Choose the adapter. uint32_t adapter_index = 0; IDXGIAdapter1* adapter = nullptr; while (dxgi_factory->EnumAdapters1(adapter_index, &adapter) == S_OK) { @@ -158,6 +160,24 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() { dxgi_factory->Release(); return InitializationResult::kDeviceInitializationFailed; } + DXGI_ADAPTER_DESC adapter_desc; + if (FAILED(adapter->GetDesc(&adapter_desc))) { + XELOGE("Failed to get the DXGI adapter description."); + adapter->Release(); + dxgi_factory->Release(); + return InitializationResult::kDeviceInitializationFailed; + } + adapter_vendor_id_ = adapter_desc.VendorId; + size_t adapter_name_length = + std::wcstombs(nullptr, adapter_desc.Description, 0); + char* adapter_name_mbcs = + reinterpret_cast(alloca((adapter_name_length + 1) * sizeof(char))); + std::wcstombs(adapter_name_mbcs, adapter_desc.Description, + adapter_name_length + 1); + XELOGD3D("DXGI adapter: %s (vendor %.4X, device %.4X)", adapter_name_mbcs, + adapter_desc.VendorId, adapter_desc.DeviceId); + + // Create the Direct3D 12 device. ID3D12Device* device; if (FAILED(pfn_d3d12_create_device_(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device)))) { diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index f2cdfa8dc..861405f38 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -62,6 +62,9 @@ class D3D12Provider : public GraphicsProvider { return start; } + // Adapter info. + uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; } + // Device features. uint32_t GetProgrammableSamplePositionsTier() const { return programmable_sample_positions_tier_; @@ -126,6 +129,8 @@ class D3D12Provider : public GraphicsProvider { uint32_t descriptor_size_rtv_; uint32_t descriptor_size_dsv_; + uint32_t adapter_vendor_id_; + uint32_t programmable_sample_positions_tier_; bool rasterizer_ordered_views_supported_; uint32_t tiled_resources_tier_;