[D3D12] Don't use switch in shaders on Intel, log GPU name
This commit is contained in:
parent
6824905b78
commit
bb53d722f9
|
@ -41,7 +41,10 @@ PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
|||
: command_processor_(command_processor),
|
||||
register_file_(register_file),
|
||||
edram_rov_used_(edram_rov_used) {
|
||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>(edram_rov_used_);
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
|
||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>(
|
||||
provider->GetAdapterVendorID(), edram_rov_used_);
|
||||
|
||||
if (edram_rov_used_) {
|
||||
depth_only_pixel_shader_ =
|
||||
|
|
|
@ -33,7 +33,9 @@ DEFINE_bool(dxbc_switch, true,
|
|||
"on may improve stability, though this heavily depends on the "
|
||||
"driver - on AMD, it's recommended to have this set to true, as "
|
||||
"Halo 3 appears to crash when if is used for flow control "
|
||||
"(possibly the shader compiler tries to flatten them).");
|
||||
"(possibly the shader compiler tries to flatten them). On Intel "
|
||||
"HD Graphics, this is ignored because of a crash with the switch "
|
||||
"instruction.");
|
||||
DEFINE_bool(dxbc_source_map, false,
|
||||
"Disassemble Xenos instructions as comments in the resulting DXBC "
|
||||
"for debugging.");
|
||||
|
@ -85,8 +87,9 @@ constexpr uint32_t
|
|||
constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated;
|
||||
constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone;
|
||||
|
||||
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rov_used)
|
||||
: edram_rov_used_(edram_rov_used) {
|
||||
DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
|
||||
bool edram_rov_used)
|
||||
: vendor_id_(vendor_id), edram_rov_used_(edram_rov_used) {
|
||||
// Don't allocate again and again for the first shader.
|
||||
shader_code_.reserve(8192);
|
||||
shader_object_.reserve(16384);
|
||||
|
@ -519,6 +522,11 @@ void DxbcShaderTranslator::Reset() {
|
|||
std::memset(&stat_, 0, sizeof(stat_));
|
||||
}
|
||||
|
||||
bool DxbcShaderTranslator::UseSwitchForControlFlow() const {
|
||||
// Xenia crashes on Intel HD Graphics 4000 with switch.
|
||||
return FLAGS_dxbc_switch && vendor_id_ != 0x8086;
|
||||
}
|
||||
|
||||
uint32_t DxbcShaderTranslator::PushSystemTemp(bool zero) {
|
||||
uint32_t register_index = system_temp_count_current_;
|
||||
if (!IndexableGPRsUsed() && !is_depth_only_pixel_shader_) {
|
||||
|
@ -1208,7 +1216,7 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
// Switch and the first label (pc == 0).
|
||||
if (FLAGS_dxbc_switch) {
|
||||
if (UseSwitchForControlFlow()) {
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
|
@ -6427,7 +6435,7 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
// closing upper-level flow control blocks.
|
||||
CloseExecConditionals();
|
||||
// Close the last label and the switch.
|
||||
if (FLAGS_dxbc_switch) {
|
||||
if (UseSwitchForControlFlow()) {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
|
@ -7982,7 +7990,7 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) {
|
|||
// execs across labels.
|
||||
CloseExecConditionals();
|
||||
|
||||
if (FLAGS_dxbc_switch) {
|
||||
if (UseSwitchForControlFlow()) {
|
||||
// Fallthrough to the label from the previous one on the next iteration if
|
||||
// no `continue` was done. Can't simply fallthrough because in DXBC, a
|
||||
// non-empty switch case must end with a break.
|
||||
|
@ -8067,7 +8075,7 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd(
|
|||
if (instr.is_end) {
|
||||
// Break out of the main loop.
|
||||
CloseInstructionPredication();
|
||||
if (FLAGS_dxbc_switch) {
|
||||
if (UseSwitchForControlFlow()) {
|
||||
// Write an invalid value to pc.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
|
|
|
@ -24,7 +24,7 @@ namespace gpu {
|
|||
// Generates shader model 5_1 byte code (for Direct3D 12).
|
||||
class DxbcShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
DxbcShaderTranslator(bool edram_rov_used);
|
||||
DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used);
|
||||
~DxbcShaderTranslator() override;
|
||||
|
||||
// Constant buffer bindings in space 0.
|
||||
|
@ -758,6 +758,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
return is_depth_only_pixel_shader_ || is_pixel_shader();
|
||||
}
|
||||
|
||||
// Whether to use switch-case rather than if (pc >= label) for control flow.
|
||||
bool UseSwitchForControlFlow() const;
|
||||
|
||||
// Allocates a new r# register for internal use and returns its index.
|
||||
uint32_t PushSystemTemp(bool zero = false);
|
||||
// Frees the last allocated internal r# registers for later reuse.
|
||||
|
@ -994,6 +997,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// Buffer for instruction disassembly comments.
|
||||
StringBuffer instruction_disassembly_buffer_;
|
||||
|
||||
// Vendor ID of the GPU manufacturer, for toggling unsupported features.
|
||||
uint32_t vendor_id_;
|
||||
|
||||
// Whether the output merger should be emulated in pixel shaders.
|
||||
bool edram_rov_used_;
|
||||
|
||||
|
|
|
@ -103,7 +103,7 @@ int shader_compiler_main(const std::vector<std::wstring>& args) {
|
|||
GlslShaderTranslator::Dialect::kGL45);
|
||||
} else if (FLAGS_shader_output_type == "dxbc") {
|
||||
translator =
|
||||
std::make_unique<DxbcShaderTranslator>(FLAGS_shader_output_dxbc_rov);
|
||||
std::make_unique<DxbcShaderTranslator>(0, FLAGS_shader_output_dxbc_rov);
|
||||
} else {
|
||||
translator = std::make_unique<UcodeShaderTranslator>();
|
||||
}
|
||||
|
|
|
@ -11,6 +11,9 @@
|
|||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include <malloc.h>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||
|
||||
|
@ -125,8 +128,7 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() {
|
|||
return InitializationResult::kDeviceInitializationFailed;
|
||||
}
|
||||
|
||||
// Choose the adapter and create a device with required features.
|
||||
// TODO(Triang3l): Log adapter info (contains a wide string).
|
||||
// Choose the adapter.
|
||||
uint32_t adapter_index = 0;
|
||||
IDXGIAdapter1* adapter = nullptr;
|
||||
while (dxgi_factory->EnumAdapters1(adapter_index, &adapter) == S_OK) {
|
||||
|
@ -158,6 +160,24 @@ D3D12Provider::InitializationResult D3D12Provider::Initialize() {
|
|||
dxgi_factory->Release();
|
||||
return InitializationResult::kDeviceInitializationFailed;
|
||||
}
|
||||
DXGI_ADAPTER_DESC adapter_desc;
|
||||
if (FAILED(adapter->GetDesc(&adapter_desc))) {
|
||||
XELOGE("Failed to get the DXGI adapter description.");
|
||||
adapter->Release();
|
||||
dxgi_factory->Release();
|
||||
return InitializationResult::kDeviceInitializationFailed;
|
||||
}
|
||||
adapter_vendor_id_ = adapter_desc.VendorId;
|
||||
size_t adapter_name_length =
|
||||
std::wcstombs(nullptr, adapter_desc.Description, 0);
|
||||
char* adapter_name_mbcs =
|
||||
reinterpret_cast<char*>(alloca((adapter_name_length + 1) * sizeof(char)));
|
||||
std::wcstombs(adapter_name_mbcs, adapter_desc.Description,
|
||||
adapter_name_length + 1);
|
||||
XELOGD3D("DXGI adapter: %s (vendor %.4X, device %.4X)", adapter_name_mbcs,
|
||||
adapter_desc.VendorId, adapter_desc.DeviceId);
|
||||
|
||||
// Create the Direct3D 12 device.
|
||||
ID3D12Device* device;
|
||||
if (FAILED(pfn_d3d12_create_device_(adapter, D3D_FEATURE_LEVEL_11_0,
|
||||
IID_PPV_ARGS(&device)))) {
|
||||
|
|
|
@ -62,6 +62,9 @@ class D3D12Provider : public GraphicsProvider {
|
|||
return start;
|
||||
}
|
||||
|
||||
// Adapter info.
|
||||
uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; }
|
||||
|
||||
// Device features.
|
||||
uint32_t GetProgrammableSamplePositionsTier() const {
|
||||
return programmable_sample_positions_tier_;
|
||||
|
@ -126,6 +129,8 @@ class D3D12Provider : public GraphicsProvider {
|
|||
uint32_t descriptor_size_rtv_;
|
||||
uint32_t descriptor_size_dsv_;
|
||||
|
||||
uint32_t adapter_vendor_id_;
|
||||
|
||||
uint32_t programmable_sample_positions_tier_;
|
||||
bool rasterizer_ordered_views_supported_;
|
||||
uint32_t tiled_resources_tier_;
|
||||
|
|
Loading…
Reference in New Issue