[D3D12] Don't use switch in shaders on Intel, log GPU name

This commit is contained in:
Triang3l 2018-12-03 16:31:49 +03:00
parent 6824905b78
commit bb53d722f9
6 changed files with 54 additions and 12 deletions

View File

@ -41,7 +41,10 @@ PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
: command_processor_(command_processor), : command_processor_(command_processor),
register_file_(register_file), register_file_(register_file),
edram_rov_used_(edram_rov_used) { edram_rov_used_(edram_rov_used) {
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
shader_translator_ = std::make_unique<DxbcShaderTranslator>(
provider->GetAdapterVendorID(), edram_rov_used_);
if (edram_rov_used_) { if (edram_rov_used_) {
depth_only_pixel_shader_ = depth_only_pixel_shader_ =

View File

@ -33,7 +33,9 @@ DEFINE_bool(dxbc_switch, true,
"on may improve stability, though this heavily depends on the " "on may improve stability, though this heavily depends on the "
"driver - on AMD, it's recommended to have this set to true, as " "driver - on AMD, it's recommended to have this set to true, as "
"Halo 3 appears to crash when if is used for flow control " "Halo 3 appears to crash when if is used for flow control "
"(possibly the shader compiler tries to flatten them)."); "(possibly the shader compiler tries to flatten them). On Intel "
"HD Graphics, this is ignored because of a crash with the switch "
"instruction.");
DEFINE_bool(dxbc_source_map, false, DEFINE_bool(dxbc_source_map, false,
"Disassemble Xenos instructions as comments in the resulting DXBC " "Disassemble Xenos instructions as comments in the resulting DXBC "
"for debugging."); "for debugging.");
@ -85,8 +87,9 @@ constexpr uint32_t
constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated; constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated;
constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone; constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone;
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rov_used) DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
: edram_rov_used_(edram_rov_used) { bool edram_rov_used)
: vendor_id_(vendor_id), edram_rov_used_(edram_rov_used) {
// Don't allocate again and again for the first shader. // Don't allocate again and again for the first shader.
shader_code_.reserve(8192); shader_code_.reserve(8192);
shader_object_.reserve(16384); shader_object_.reserve(16384);
@ -519,6 +522,11 @@ void DxbcShaderTranslator::Reset() {
std::memset(&stat_, 0, sizeof(stat_)); std::memset(&stat_, 0, sizeof(stat_));
} }
bool DxbcShaderTranslator::UseSwitchForControlFlow() const {
// Xenia crashes on Intel HD Graphics 4000 with switch.
return FLAGS_dxbc_switch && vendor_id_ != 0x8086;
}
uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) { uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) {
uint32_t register_index = system_temp_count_current_; uint32_t register_index = system_temp_count_current_;
if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) { if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) {
@ -1208,7 +1216,7 @@ void DxbcShaderTranslator::StartTranslation() {
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
// Switch and the first label (pc == 0). // Switch and the first label (pc == 0).
if (FLAGS_dxbc_switch) { if (UseSwitchForControlFlow()) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
@ -6427,7 +6435,7 @@ void DxbcShaderTranslator::CompleteShaderCode() {
// closing upper-level flow control blocks. // closing upper-level flow control blocks.
CloseExecConditionals(); CloseExecConditionals();
// Close the last label and the switch. // Close the last label and the switch.
if (FLAGS_dxbc_switch) { if (UseSwitchForControlFlow()) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
@ -7982,7 +7990,7 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
// execs across labels. // execs across labels.
CloseExecConditionals(); CloseExecConditionals();
if (FLAGS_dxbc_switch) { if (UseSwitchForControlFlow()) {
// Fallthrough to the label from the previous one on the next iteration if // Fallthrough to the label from the previous one on the next iteration if
// no `continue` was done. Can't simply fallthrough because in DXBC, a // no `continue` was done. Can't simply fallthrough because in DXBC, a
// non-empty switch case must end with a break. // non-empty switch case must end with a break.
@ -8067,7 +8075,7 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd(
if (instr.is_end) { if (instr.is_end) {
// Break out of the main loop. // Break out of the main loop.
CloseInstructionPredication(); CloseInstructionPredication();
if (FLAGS_dxbc_switch) { if (UseSwitchForControlFlow()) {
// Write an invalid value to pc. // Write an invalid value to pc.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));

View File

@ -24,7 +24,7 @@ namespace gpu {
// Generates shader model 5_1 byte code (for Direct3D 12). // Generates shader model 5_1 byte code (for Direct3D 12).
class DxbcShaderTranslator : public ShaderTranslator { class DxbcShaderTranslator : public ShaderTranslator {
public: public:
DxbcShaderTranslator(bool edram_rov_used); DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used);
~DxbcShaderTranslator() override; ~DxbcShaderTranslator() override;
// Constant buffer bindings in space 0. // Constant buffer bindings in space 0.
@ -758,6 +758,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
return is_depth_only_pixel_shader_ || is_pixel_shader(); return is_depth_only_pixel_shader_ || is_pixel_shader();
} }
// Whether to use switch-case rather than if (pc >= label) for control flow.
bool UseSwitchForControlFlow() const;
// Allocates a new r# register for internal use and returns its index. // Allocates a new r# register for internal use and returns its index.
uint32_t PushSystemTemp(bool zero = false); uint32_t PushSystemTemp(bool zero = false);
// Frees the last allocated internal r# registers for later reuse. // Frees the last allocated internal r# registers for later reuse.
@ -994,6 +997,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Buffer for instruction disassembly comments. // Buffer for instruction disassembly comments.
StringBuffer instruction_disassembly_buffer_; StringBuffer instruction_disassembly_buffer_;
// Vendor ID of the GPU manufacturer, for toggling unsupported features.
uint32_t vendor_id_;
// Whether the output merger should be emulated in pixel shaders. // Whether the output merger should be emulated in pixel shaders.
bool edram_rov_used_; bool edram_rov_used_;

View File

@ -103,7 +103,7 @@ int shader_compiler_main(const std::vector<std::wstring>& args) {
GlslShaderTranslator::Dialect::kGL45); GlslShaderTranslator::Dialect::kGL45);
} else if (FLAGS_shader_output_type == "dxbc") { } else if (FLAGS_shader_output_type == "dxbc") {
translator = translator =
std::make_unique<DxbcShaderTranslator>(FLAGS_shader_output_dxbc_rov); std::make_unique<DxbcShaderTranslator>(0, FLAGS_shader_output_dxbc_rov);
} else { } else {
translator = std::make_unique<UcodeShaderTranslator>(); translator = std::make_unique<UcodeShaderTranslator>();
} }

View File

@ -11,6 +11,9 @@
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <malloc.h>
#include <cstdlib>
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/ui/d3d12/d3d12_context.h" #include "xenia/ui/d3d12/d3d12_context.h"
@ -125,8 +128,7 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() {
return InitializationResult::kDeviceInitializationFailed; return InitializationResult::kDeviceInitializationFailed;
} }
// Choose the adapter and create a device with required features. // Choose the adapter.
// TODO(Triang3l): Log adapter info (contains a wide string).
uint32_t adapter_index = 0; uint32_t adapter_index = 0;
IDXGIAdapter1* adapter = nullptr; IDXGIAdapter1* adapter = nullptr;
while (dxgi_factory->EnumAdapters1(adapter_index, &adapter) == S_OK) { while (dxgi_factory->EnumAdapters1(adapter_index, &adapter) == S_OK) {
@ -158,6 +160,24 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() {
dxgi_factory->Release(); dxgi_factory->Release();
return InitializationResult::kDeviceInitializationFailed; return InitializationResult::kDeviceInitializationFailed;
} }
DXGI_ADAPTER_DESC adapter_desc;
if (FAILED(adapter->GetDesc(&adapter_desc))) {
XELOGE("Failed to get the DXGI adapter description.");
adapter->Release();
dxgi_factory->Release();
return InitializationResult::kDeviceInitializationFailed;
}
adapter_vendor_id_ = adapter_desc.VendorId;
size_t adapter_name_length =
std::wcstombs(nullptr, adapter_desc.Description, 0);
char* adapter_name_mbcs =
reinterpret_cast<char*>(alloca((adapter_name_length + 1) * sizeof(char)));
std::wcstombs(adapter_name_mbcs, adapter_desc.Description,
adapter_name_length + 1);
XELOGD3D("DXGI adapter: %s (vendor %.4X, device %.4X)", adapter_name_mbcs,
adapter_desc.VendorId, adapter_desc.DeviceId);
// Create the Direct3D 12 device.
ID3D12Device* device; ID3D12Device* device;
if (FAILED(pfn_d3d12_create_device_(adapter, D3D_FEATURE_LEVEL_11_0, if (FAILED(pfn_d3d12_create_device_(adapter, D3D_FEATURE_LEVEL_11_0,
IID_PPV_ARGS(&device)))) { IID_PPV_ARGS(&device)))) {

View File

@ -62,6 +62,9 @@ class D3D12Provider : public GraphicsProvider {
return start; return start;
} }
// Adapter info.
uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; }
// Device features. // Device features.
uint32_t GetProgrammableSamplePositionsTier() const { uint32_t GetProgrammableSamplePositionsTier() const {
return programmable_sample_positions_tier_; return programmable_sample_positions_tier_;
@ -126,6 +129,8 @@ class D3D12Provider : public GraphicsProvider {
uint32_t descriptor_size_rtv_; uint32_t descriptor_size_rtv_;
uint32_t descriptor_size_dsv_; uint32_t descriptor_size_dsv_;
uint32_t adapter_vendor_id_;
uint32_t programmable_sample_positions_tier_; uint32_t programmable_sample_positions_tier_;
bool rasterizer_ordered_views_supported_; bool rasterizer_ordered_views_supported_;
uint32_t tiled_resources_tier_; uint32_t tiled_resources_tier_;