From 8268825f3b6bc78bdd792b7da11bb855f47dd0a6 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 30 Aug 2018 20:42:22 +0300 Subject: [PATCH] [D3D12] Switch to DXBC shader translator (currently unusable) --- src/xenia/gpu/d3d12/d3d12_command_processor.h | 4 +- src/xenia/gpu/d3d12/d3d12_shader.cc | 98 +++---------------- src/xenia/gpu/d3d12/d3d12_shader.h | 8 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 45 +++++---- src/xenia/gpu/d3d12/pipeline_cache.h | 4 +- .../shaders/primitive_point_list.gs.hlsl | 14 +-- .../shaders/primitive_rectangle_list.gs.hlsl | 14 +-- src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli | 3 +- src/xenia/gpu/dxbc_shader_translator.cc | 2 +- 9 files changed, 59 insertions(+), 133 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 54528eefb..fd1ca9557 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -20,7 +20,7 @@ #include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/texture_cache.h" -#include "xenia/gpu/hlsl_shader_translator.h" +#include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/d3d12/command_list.h" @@ -266,7 +266,7 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12DescriptorHeap* current_sampler_heap_; // System shader constants. - HlslShaderTranslator::SystemConstants system_constants_; + DxbcShaderTranslator::SystemConstants system_constants_; // Constant buffer bindings. struct ConstantBufferBinding { diff --git a/src/xenia/gpu/d3d12/d3d12_shader.cc b/src/xenia/gpu/d3d12/d3d12_shader.cc index 1e049dad4..80c1a653d 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.cc +++ b/src/xenia/gpu/d3d12/d3d12_shader.cc @@ -9,15 +9,10 @@ #include "xenia/gpu/d3d12/d3d12_shader.h" -#include - #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/gpu/gpu_flags.h" -DEFINE_bool(d3d12_shader_disasm, false, - "Disassemble translated shaders after compilation."); - namespace xe { namespace gpu { namespace d3d12 { @@ -32,6 +27,7 @@ D3D12Shader::~D3D12Shader() { } } +#if 0 void D3D12Shader::SetTexturesAndSamplers( const HlslShaderTranslator::TextureSRV* texture_srvs, uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, @@ -52,93 +48,25 @@ void D3D12Shader::SetTexturesAndSamplers( } sampler_count_ = sampler_count; } +#endif -bool D3D12Shader::Prepare() { - assert_null(blob_); - assert_true(is_valid()); - - const char* target; - switch (shader_type_) { - case ShaderType::kVertex: - target = "vs_5_1"; - break; - case ShaderType::kPixel: - target = "ps_5_1"; - break; - default: - assert_unhandled_case(shader_type_); - is_valid_ = false; - return false; +bool D3D12Shader::DisassembleDXBC() { + if (!host_disassembly_.empty()) { + return true; } - - // TODO(Triang3l): Choose the appropriate optimization level based on compile - // time and how invariance is handled in vertex shaders. - ID3DBlob* error_blob = nullptr; - bool compiled = SUCCEEDED( - D3DCompile(translated_binary_.data(), translated_binary_.size(), nullptr, - nullptr, nullptr, "main", target, - D3DCOMPILE_SKIP_OPTIMIZATION, 0, &blob_, &error_blob)); - - if (!compiled) { - XELOGE("%s shader %.16llX compilation failed!", target, ucode_data_hash()); - } - if (error_blob != nullptr) { - const char* error_log = - reinterpret_cast(error_blob->GetBufferPointer()); - host_error_log_ = error_log; - if (compiled) { - XELOGW("%s shader %.16llX compiled with warnings!", target, - ucode_data_hash()); - XELOGW("%s", error_log); - XELOGW("HLSL source:"); - // The buffer isn't terminated. - translated_binary_.push_back(0); - XELOGW("%s", reinterpret_cast(translated_binary_.data())); - translated_binary_.pop_back(); - } else { - XELOGE("%s", error_log); - XELOGE("HLSL source:"); - translated_binary_.push_back(0); - XELOGE("%s", reinterpret_cast(translated_binary_.data())); - translated_binary_.pop_back(); - } - error_blob->Release(); - } - - if (!compiled) { - is_valid_ = false; + ID3DBlob* blob; + if (FAILED(D3DDisassemble(translated_binary().data(), + translated_binary().size(), + D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING | + D3D_DISASM_ENABLE_INSTRUCTION_OFFSET, + nullptr, &blob))) { return false; } - - if (FLAGS_d3d12_shader_disasm) { - ID3DBlob* disassembly_blob; - if (SUCCEEDED(D3DDisassemble(blob_->GetBufferPointer(), - blob_->GetBufferSize(), - D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING | - D3D_DISASM_ENABLE_INSTRUCTION_OFFSET, nullptr, - &disassembly_blob))) { - host_disassembly_ = - reinterpret_cast(disassembly_blob->GetBufferPointer()); - disassembly_blob->Release(); - } else { - XELOGE("Failed to disassemble DXBC for %s shader %.16llX", target, - ucode_data_hash()); - } - } - + host_disassembly_ = reinterpret_cast(blob->GetBufferPointer()); + blob->Release(); return true; } -const uint8_t* D3D12Shader::GetDXBC() const { - assert_not_null(blob_); - return reinterpret_cast(blob_->GetBufferPointer()); -} - -size_t D3D12Shader::GetDXBCSize() const { - assert_not_null(blob_); - return blob_->GetBufferSize(); -} - } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index a38ac6f02..a374608c3 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -10,6 +10,7 @@ #ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_ #define XENIA_GPU_D3D12_D3D12_SHADER_H_ +// TODO(Triang3l): Remove hlsl_shader_translator. #include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/shader.h" #include "xenia/ui/d3d12/d3d12_api.h" @@ -24,15 +25,14 @@ class D3D12Shader : public Shader { const uint32_t* dword_ptr, uint32_t dword_count); ~D3D12Shader() override; +#if 0 void SetTexturesAndSamplers( const HlslShaderTranslator::TextureSRV* texture_srvs, uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, uint32_t sampler_count); +#endif - bool Prepare(); - - const uint8_t* GetDXBC() const; - size_t GetDXBCSize() const; + bool DisassembleDXBC(); struct TextureSRV { uint32_t fetch_constant; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index c8abe2fa8..1c47c277f 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -9,6 +9,8 @@ #include "xenia/gpu/d3d12/pipeline_cache.h" +#include + #include #include #include @@ -18,9 +20,10 @@ #include "xenia/base/logging.h" #include "xenia/base/profiling.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" -#include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/hlsl_shader_translator.h" + +DEFINE_bool(d3d12_dxbc_disasm, false, + "Disassemble DXBC shaders after generation."); namespace xe { namespace gpu { @@ -34,7 +37,7 @@ namespace d3d12 { PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file) : command_processor_(command_processor), register_file_(register_file) { - shader_translator_ = std::make_unique(); + shader_translator_ = std::make_unique(); // Set pipeline state description values we never change. // Zero out tessellation, stream output, blend state and formats for render @@ -94,14 +97,6 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, XELOGE("Failed to translate the pixel shader!"); return false; } - if (!vertex_shader->is_valid()) { - XELOGE("Failed to prepare the vertex shader!"); - return false; - } - if (pixel_shader != nullptr && !pixel_shader->is_valid()) { - XELOGE("Failed to prepare the pixel shader!"); - return false; - } return true; } @@ -201,6 +196,9 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader, return false; } + // TODO(Triang3l): Re-enable this when the DXBC shader translators supports + // textures. +#if 0 uint32_t texture_srv_count, sampler_count; const HlslShaderTranslator::TextureSRV* texture_srvs = shader_translator_->GetTextureSRVs(texture_srv_count); @@ -208,14 +206,7 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader, shader_translator_->GetSamplerFetchConstants(sampler_count); shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count, sampler_fetch_constants, sampler_count); - - // Prepare the shader for use (creates the Shader Model bytecode). - // It could still fail at this point. - if (!shader->Prepare()) { - XELOGE("Shader %.16" PRIX64 "preparation failed; marking as ignored", - shader->ucode_data_hash()); - return false; - } +#endif if (shader->is_valid()) { XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n", @@ -224,6 +215,14 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader, shader->ucode_disassembly().c_str()); } + // Disassemble the shader for dumping. + if (FLAGS_d3d12_dxbc_disasm) { + if (!shader->DisassembleDXBC()) { + XELOGE("Failed to disassemble DXBC shader %.16" PRIX64, + shader->ucode_data_hash()); + } + } + // Dump shader files if desired. if (!FLAGS_dump_shaders.empty()) { shader->Dump(FLAGS_dump_shaders, "d3d12"); @@ -318,11 +317,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( if (update_desc_.pRootSignature == nullptr) { return UpdateStatus::kError; } - update_desc_.VS.pShaderBytecode = vertex_shader->GetDXBC(); - update_desc_.VS.BytecodeLength = vertex_shader->GetDXBCSize(); + update_desc_.VS.pShaderBytecode = vertex_shader->translated_binary().data(); + update_desc_.VS.BytecodeLength = vertex_shader->translated_binary().size(); if (pixel_shader != nullptr) { - update_desc_.PS.pShaderBytecode = pixel_shader->GetDXBC(); - update_desc_.PS.BytecodeLength = pixel_shader->GetDXBCSize(); + update_desc_.PS.pShaderBytecode = pixel_shader->translated_binary().data(); + update_desc_.PS.BytecodeLength = pixel_shader->translated_binary().size(); } else { update_desc_.PS.pShaderBytecode = nullptr; update_desc_.PS.BytecodeLength = 0; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 138e274de..593d1eb92 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -16,7 +16,7 @@ #include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/d3d12/render_target_cache.h" -#include "xenia/gpu/hlsl_shader_translator.h" +#include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/xenos.h" @@ -86,7 +86,7 @@ class PipelineCache { RegisterFile* register_file_; // Reusable shader translator. - std::unique_ptr shader_translator_ = nullptr; + std::unique_ptr shader_translator_ = nullptr; // All loaded shaders mapped by their guest hash key. std::unordered_map shader_map_; diff --git a/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl index 58a84eced..88a9163ed 100644 --- a/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl @@ -5,24 +5,24 @@ void main(point XeVertex xe_in[1], inout TriangleStream xe_stream) { XeVertex xe_out; xe_out.interpolators = xe_in[0].interpolators; xe_out.position.zw = xe_in[0].position.zw; - xe_out.point_size = xe_in[0].point_size; + xe_out.point_params.z = xe_in[0].point_params.z; // Shader header writes -1.0f to point_size by default, so any positive value // means that it was overwritten by the translated vertex shader. float2 point_size = - (xe_in[0].point_size > 0.0f ? xe_in[0].point_size.xx : xe_point_size) * - xe_ndc_scale.xy; + (xe_in[0].point_params.z > 0.0f ? xe_in[0].point_params.zz + : xe_point_size) * xe_ndc_scale.xy; - xe_out.point_coord = float2(0.0, 1.0); + xe_out.point_params.xy = float2(0.0, 1.0); xe_out.position.xy = xe_in[0].position.xy + float2(-1.0, 1.0) * point_size; xe_stream.Append(xe_out); - xe_out.point_coord = float2(1.0, 1.0); + xe_out.point_params.xy = float2(1.0, 1.0); xe_out.position.xy = xe_in[0].position.xy + point_size; xe_stream.Append(xe_out); - xe_out.point_coord = float2(0.0, 0.0); + xe_out.point_params.xy = float2(0.0, 0.0); xe_out.position.xy = xe_in[0].position.xy - point_size; xe_stream.Append(xe_out); - xe_out.point_coord = float2(1.0, 0.0); + xe_out.point_params.xy = float2(1.0, 0.0); xe_out.position.xy = xe_in[0].position.xy + float2(1.0, -1.0) * point_size; xe_stream.Append(xe_out); xe_stream.RestartStrip(); diff --git a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl index a4384911f..1a5736a36 100644 --- a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl @@ -33,9 +33,9 @@ void main(triangle XeVertex xe_in[3], xe_in[0].interpolators[i] + xe_in[2].interpolators[i]; } - xe_out.point_coord = xe_in[1].point_coord + - xe_in[0].point_coord - - xe_in[2].point_coord; + xe_out.point_params.xy = xe_in[1].point_params.xy + + xe_in[0].point_params.xy - + xe_in[2].point_params.xy; xe_out.position = float4(xe_in[1].position.xy - xe_in[0].position.xy + xe_in[2].position.xy, @@ -53,15 +53,15 @@ void main(triangle XeVertex xe_in[3], xe_in[1].interpolators[i] + xe_in[2].interpolators[i]; } - xe_out.point_coord = xe_in[0].point_coord + - xe_in[1].point_coord - - xe_in[2].point_coord; + xe_out.point_params.xy = xe_in[0].point_params.xy + + xe_in[1].point_params.xy - + xe_in[2].point_params.xy; xe_out.position = float4(xe_in[0].position.xy - xe_in[1].position.xy + xe_in[2].position.xy, xe_in[2].position.zw); } - xe_out.point_size = xe_in[2].point_size; + xe_out.point_params.z = xe_in[2].point_params.z; xe_stream.Append(xe_out); xe_stream.RestartStrip(); } diff --git a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli index f4bc50fa5..dccaa8342 100644 --- a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli +++ b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli @@ -26,9 +26,8 @@ cbuffer xe_system_cbuffer : register(b0) { struct XeVertex { float4 interpolators[16] : TEXCOORD0; - float2 point_coord : TEXCOORD16; + float3 point_params : TEXCOORD16; float4 position : SV_Position; - float point_size : PSIZE; }; #endif // XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_ diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 5cb8f3d5c..165c37308 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -165,7 +165,7 @@ std::vector DxbcShaderTranslator::CompleteTranslation() { uint32_t DxbcShaderTranslator::AppendString(std::vector& dest, const char* source) { size_t size = std::strlen(source) + 1; - size_t size_aligned = xe::align(size_aligned, sizeof(uint32_t)); + size_t size_aligned = xe::align(size, sizeof(uint32_t)); size_t dest_position = dest.size(); dest.resize(dest_position + size_aligned / sizeof(uint32_t)); std::memcpy(&dest[dest_position], source, size);