[D3D12] Don't use switch in shaders on Intel, log GPU name
This commit is contained in:
parent
6824905b78
commit
bb53d722f9
|
@ -41,7 +41,10 @@ PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
||||||
: command_processor_(command_processor),
|
: command_processor_(command_processor),
|
||||||
register_file_(register_file),
|
register_file_(register_file),
|
||||||
edram_rov_used_(edram_rov_used) {
|
edram_rov_used_(edram_rov_used) {
|
||||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_);
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
|
|
||||||
|
shader_translator_ = std::make_unique<DxbcShaderTranslator>(
|
||||||
|
provider->GetAdapterVendorID(), edram_rov_used_);
|
||||||
|
|
||||||
if (edram_rov_used_) {
|
if (edram_rov_used_) {
|
||||||
depth_only_pixel_shader_ =
|
depth_only_pixel_shader_ =
|
||||||
|
|
|
@ -33,7 +33,9 @@ DEFINE_bool(dxbc_switch, true,
|
||||||
"on may improve stability, though this heavily depends on the "
|
"on may improve stability, though this heavily depends on the "
|
||||||
"driver - on AMD, it's recommended to have this set to true, as "
|
"driver - on AMD, it's recommended to have this set to true, as "
|
||||||
"Halo 3 appears to crash when if is used for flow control "
|
"Halo 3 appears to crash when if is used for flow control "
|
||||||
"(possibly the shader compiler tries to flatten them).");
|
"(possibly the shader compiler tries to flatten them). On Intel "
|
||||||
|
"HD Graphics, this is ignored because of a crash with the switch "
|
||||||
|
"instruction.");
|
||||||
DEFINE_bool(dxbc_source_map, false,
|
DEFINE_bool(dxbc_source_map, false,
|
||||||
"Disassemble Xenos instructions as comments in the resulting DXBC "
|
"Disassemble Xenos instructions as comments in the resulting DXBC "
|
||||||
"for debugging.");
|
"for debugging.");
|
||||||
|
@ -85,8 +87,9 @@ constexpr uint32_t
|
||||||
constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated;
|
constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated;
|
||||||
constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone;
|
constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone;
|
||||||
|
|
||||||
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rov_used)
|
DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
|
||||||
: edram_rov_used_(edram_rov_used) {
|
bool edram_rov_used)
|
||||||
|
: vendor_id_(vendor_id), edram_rov_used_(edram_rov_used) {
|
||||||
// Don't allocate again and again for the first shader.
|
// Don't allocate again and again for the first shader.
|
||||||
shader_code_.reserve(8192);
|
shader_code_.reserve(8192);
|
||||||
shader_object_.reserve(16384);
|
shader_object_.reserve(16384);
|
||||||
|
@ -519,6 +522,11 @@ void DxbcShaderTranslator::Reset() {
|
||||||
std::memset(&stat_, 0, sizeof(stat_));
|
std::memset(&stat_, 0, sizeof(stat_));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DxbcShaderTranslator::UseSwitchForControlFlow() const {
|
||||||
|
// Xenia crashes on Intel HD Graphics 4000 with switch.
|
||||||
|
return FLAGS_dxbc_switch && vendor_id_ != 0x8086;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) {
|
uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) {
|
||||||
uint32_t register_index = system_temp_count_current_;
|
uint32_t register_index = system_temp_count_current_;
|
||||||
if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) {
|
if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) {
|
||||||
|
@ -1208,7 +1216,7 @@ void DxbcShaderTranslator::StartTranslation() {
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
// Switch and the first label (pc == 0).
|
// Switch and the first label (pc == 0).
|
||||||
if (FLAGS_dxbc_switch) {
|
if (UseSwitchForControlFlow()) {
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -6427,7 +6435,7 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
// closing upper-level flow control blocks.
|
// closing upper-level flow control blocks.
|
||||||
CloseExecConditionals();
|
CloseExecConditionals();
|
||||||
// Close the last label and the switch.
|
// Close the last label and the switch.
|
||||||
if (FLAGS_dxbc_switch) {
|
if (UseSwitchForControlFlow()) {
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
@ -7982,7 +7990,7 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
|
||||||
// execs across labels.
|
// execs across labels.
|
||||||
CloseExecConditionals();
|
CloseExecConditionals();
|
||||||
|
|
||||||
if (FLAGS_dxbc_switch) {
|
if (UseSwitchForControlFlow()) {
|
||||||
// Fallthrough to the label from the previous one on the next iteration if
|
// Fallthrough to the label from the previous one on the next iteration if
|
||||||
// no `continue` was done. Can't simply fallthrough because in DXBC, a
|
// no `continue` was done. Can't simply fallthrough because in DXBC, a
|
||||||
// non-empty switch case must end with a break.
|
// non-empty switch case must end with a break.
|
||||||
|
@ -8067,7 +8075,7 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd(
|
||||||
if (instr.is_end) {
|
if (instr.is_end) {
|
||||||
// Break out of the main loop.
|
// Break out of the main loop.
|
||||||
CloseInstructionPredication();
|
CloseInstructionPredication();
|
||||||
if (FLAGS_dxbc_switch) {
|
if (UseSwitchForControlFlow()) {
|
||||||
// Write an invalid value to pc.
|
// Write an invalid value to pc.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||||
|
|
|
@ -24,7 +24,7 @@ namespace gpu {
|
||||||
// Generates shader model 5_1 byte code (for Direct3D 12).
|
// Generates shader model 5_1 byte code (for Direct3D 12).
|
||||||
class DxbcShaderTranslator : public ShaderTranslator {
|
class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
public:
|
public:
|
||||||
DxbcShaderTranslator(bool edram_rov_used);
|
DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used);
|
||||||
~DxbcShaderTranslator() override;
|
~DxbcShaderTranslator() override;
|
||||||
|
|
||||||
// Constant buffer bindings in space 0.
|
// Constant buffer bindings in space 0.
|
||||||
|
@ -758,6 +758,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
return is_depth_only_pixel_shader_ || is_pixel_shader();
|
return is_depth_only_pixel_shader_ || is_pixel_shader();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Whether to use switch-case rather than if (pc >= label) for control flow.
|
||||||
|
bool UseSwitchForControlFlow() const;
|
||||||
|
|
||||||
// Allocates a new r# register for internal use and returns its index.
|
// Allocates a new r# register for internal use and returns its index.
|
||||||
uint32_t PushSystemTemp(bool zero = false);
|
uint32_t PushSystemTemp(bool zero = false);
|
||||||
// Frees the last allocated internal r# registers for later reuse.
|
// Frees the last allocated internal r# registers for later reuse.
|
||||||
|
@ -994,6 +997,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// Buffer for instruction disassembly comments.
|
// Buffer for instruction disassembly comments.
|
||||||
StringBuffer instruction_disassembly_buffer_;
|
StringBuffer instruction_disassembly_buffer_;
|
||||||
|
|
||||||
|
// Vendor ID of the GPU manufacturer, for toggling unsupported features.
|
||||||
|
uint32_t vendor_id_;
|
||||||
|
|
||||||
// Whether the output merger should be emulated in pixel shaders.
|
// Whether the output merger should be emulated in pixel shaders.
|
||||||
bool edram_rov_used_;
|
bool edram_rov_used_;
|
||||||
|
|
||||||
|
|
|
@ -103,7 +103,7 @@ int shader_compiler_main(const std::vector<std::wstring>& args) {
|
||||||
GlslShaderTranslator::Dialect::kGL45);
|
GlslShaderTranslator::Dialect::kGL45);
|
||||||
} else if (FLAGS_shader_output_type == "dxbc") {
|
} else if (FLAGS_shader_output_type == "dxbc") {
|
||||||
translator =
|
translator =
|
||||||
std::make_unique<DxbcShaderTranslator>(FLAGS_shader_output_dxbc_rov);
|
std::make_unique<DxbcShaderTranslator>(0, FLAGS_shader_output_dxbc_rov);
|
||||||
} else {
|
} else {
|
||||||
translator = std::make_unique<UcodeShaderTranslator>();
|
translator = std::make_unique<UcodeShaderTranslator>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,9 @@
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
|
|
||||||
|
#include <malloc.h>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_context.h"
|
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||||
|
|
||||||
|
@ -125,8 +128,7 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() {
|
||||||
return InitializationResult::kDeviceInitializationFailed;
|
return InitializationResult::kDeviceInitializationFailed;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Choose the adapter and create a device with required features.
|
// Choose the adapter.
|
||||||
// TODO(Triang3l): Log adapter info (contains a wide string).
|
|
||||||
uint32_t adapter_index = 0;
|
uint32_t adapter_index = 0;
|
||||||
IDXGIAdapter1* adapter = nullptr;
|
IDXGIAdapter1* adapter = nullptr;
|
||||||
while (dxgi_factory->EnumAdapters1(adapter_index, &adapter) == S_OK) {
|
while (dxgi_factory->EnumAdapters1(adapter_index, &adapter) == S_OK) {
|
||||||
|
@ -158,6 +160,24 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() {
|
||||||
dxgi_factory->Release();
|
dxgi_factory->Release();
|
||||||
return InitializationResult::kDeviceInitializationFailed;
|
return InitializationResult::kDeviceInitializationFailed;
|
||||||
}
|
}
|
||||||
|
DXGI_ADAPTER_DESC adapter_desc;
|
||||||
|
if (FAILED(adapter->GetDesc(&adapter_desc))) {
|
||||||
|
XELOGE("Failed to get the DXGI adapter description.");
|
||||||
|
adapter->Release();
|
||||||
|
dxgi_factory->Release();
|
||||||
|
return InitializationResult::kDeviceInitializationFailed;
|
||||||
|
}
|
||||||
|
adapter_vendor_id_ = adapter_desc.VendorId;
|
||||||
|
size_t adapter_name_length =
|
||||||
|
std::wcstombs(nullptr, adapter_desc.Description, 0);
|
||||||
|
char* adapter_name_mbcs =
|
||||||
|
reinterpret_cast<char*>(alloca((adapter_name_length + 1) * sizeof(char)));
|
||||||
|
std::wcstombs(adapter_name_mbcs, adapter_desc.Description,
|
||||||
|
adapter_name_length + 1);
|
||||||
|
XELOGD3D("DXGI adapter: %s (vendor %.4X, device %.4X)", adapter_name_mbcs,
|
||||||
|
adapter_desc.VendorId, adapter_desc.DeviceId);
|
||||||
|
|
||||||
|
// Create the Direct3D 12 device.
|
||||||
ID3D12Device* device;
|
ID3D12Device* device;
|
||||||
if (FAILED(pfn_d3d12_create_device_(adapter, D3D_FEATURE_LEVEL_11_0,
|
if (FAILED(pfn_d3d12_create_device_(adapter, D3D_FEATURE_LEVEL_11_0,
|
||||||
IID_PPV_ARGS(&device)))) {
|
IID_PPV_ARGS(&device)))) {
|
||||||
|
|
|
@ -62,6 +62,9 @@ class D3D12Provider : public GraphicsProvider {
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Adapter info.
|
||||||
|
uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; }
|
||||||
|
|
||||||
// Device features.
|
// Device features.
|
||||||
uint32_t GetProgrammableSamplePositionsTier() const {
|
uint32_t GetProgrammableSamplePositionsTier() const {
|
||||||
return programmable_sample_positions_tier_;
|
return programmable_sample_positions_tier_;
|
||||||
|
@ -126,6 +129,8 @@ class D3D12Provider : public GraphicsProvider {
|
||||||
uint32_t descriptor_size_rtv_;
|
uint32_t descriptor_size_rtv_;
|
||||||
uint32_t descriptor_size_dsv_;
|
uint32_t descriptor_size_dsv_;
|
||||||
|
|
||||||
|
uint32_t adapter_vendor_id_;
|
||||||
|
|
||||||
uint32_t programmable_sample_positions_tier_;
|
uint32_t programmable_sample_positions_tier_;
|
||||||
bool rasterizer_ordered_views_supported_;
|
bool rasterizer_ordered_views_supported_;
|
||||||
uint32_t tiled_resources_tier_;
|
uint32_t tiled_resources_tier_;
|
||||||
|
|
Loading…
Reference in New Issue