[D3D12] Switch to DXBC shader translator (currently unusable)

This commit is contained in:
Triang3l 2018-08-30 20:42:22 +03:00
parent 8376918bb7
commit 8268825f3b
9 changed files with 59 additions and 133 deletions

View File

@ -20,7 +20,7 @@
#include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/shared_memory.h"
#include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/d3d12/texture_cache.h"
#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h" #include "xenia/kernel/kernel_state.h"
#include "xenia/ui/d3d12/command_list.h" #include "xenia/ui/d3d12/command_list.h"
@ -266,7 +266,7 @@ class D3D12CommandProcessor : public CommandProcessor {
ID3D12DescriptorHeap* current_sampler_heap_; ID3D12DescriptorHeap* current_sampler_heap_;
// System shader constants. // System shader constants.
HlslShaderTranslator::SystemConstants system_constants_; DxbcShaderTranslator::SystemConstants system_constants_;
// Constant buffer bindings. // Constant buffer bindings.
struct ConstantBufferBinding { struct ConstantBufferBinding {

View File

@ -9,15 +9,10 @@
#include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/d3d12/d3d12_shader.h"
#include <gflags/gflags.h>
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
DEFINE_bool(d3d12_shader_disasm, false,
"Disassemble translated shaders after compilation.");
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace d3d12 { namespace d3d12 {
@ -32,6 +27,7 @@ D3D12Shader::~D3D12Shader() {
} }
} }
#if 0
void D3D12Shader::SetTexturesAndSamplers( void D3D12Shader::SetTexturesAndSamplers(
const HlslShaderTranslator::TextureSRV* texture_srvs, const HlslShaderTranslator::TextureSRV* texture_srvs,
uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants,
@ -52,91 +48,23 @@ void D3D12Shader::SetTexturesAndSamplers(
} }
sampler_count_ = sampler_count; sampler_count_ = sampler_count;
} }
#endif
bool D3D12Shader::Prepare() { bool D3D12Shader::DisassembleDXBC() {
assert_null(blob_); if (!host_disassembly_.empty()) {
assert_true(is_valid()); return true;
}
const char* target; ID3DBlob* blob;
switch (shader_type_) { if (FAILED(D3DDisassemble(translated_binary().data(),
case ShaderType::kVertex: translated_binary().size(),
target = "vs_5_1"; D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING |
break; D3D_DISASM_ENABLE_INSTRUCTION_OFFSET,
case ShaderType::kPixel: nullptr, &blob))) {
target = "ps_5_1"; return false;
break; }
default: host_disassembly_ = reinterpret_cast<const char*>(blob->GetBufferPointer());
assert_unhandled_case(shader_type_); blob->Release();
is_valid_ = false;
return false;
}
// TODO(Triang3l): Choose the appropriate optimization level based on compile
// time and how invariance is handled in vertex shaders.
ID3DBlob* error_blob = nullptr;
bool compiled = SUCCEEDED(
D3DCompile(translated_binary_.data(), translated_binary_.size(), nullptr,
nullptr, nullptr, "main", target,
D3DCOMPILE_SKIP_OPTIMIZATION, 0, &blob_, &error_blob));
if (!compiled) {
XELOGE("%s shader %.16llX compilation failed!", target, ucode_data_hash());
}
if (error_blob != nullptr) {
const char* error_log =
reinterpret_cast<const char*>(error_blob->GetBufferPointer());
host_error_log_ = error_log;
if (compiled) {
XELOGW("%s shader %.16llX compiled with warnings!", target,
ucode_data_hash());
XELOGW("%s", error_log);
XELOGW("HLSL source:");
// The buffer isn't terminated.
translated_binary_.push_back(0);
XELOGW("%s", reinterpret_cast<const char*>(translated_binary_.data()));
translated_binary_.pop_back();
} else {
XELOGE("%s", error_log);
XELOGE("HLSL source:");
translated_binary_.push_back(0);
XELOGE("%s", reinterpret_cast<const char*>(translated_binary_.data()));
translated_binary_.pop_back();
}
error_blob->Release();
}
if (!compiled) {
is_valid_ = false;
return false;
}
if (FLAGS_d3d12_shader_disasm) {
ID3DBlob* disassembly_blob;
if (SUCCEEDED(D3DDisassemble(blob_->GetBufferPointer(),
blob_->GetBufferSize(),
D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING |
D3D_DISASM_ENABLE_INSTRUCTION_OFFSET, nullptr,
&disassembly_blob))) {
host_disassembly_ =
reinterpret_cast<const char*>(disassembly_blob->GetBufferPointer());
disassembly_blob->Release();
} else {
XELOGE("Failed to disassemble DXBC for %s shader %.16llX", target,
ucode_data_hash());
}
}
return true; return true;
}
const uint8_t* D3D12Shader::GetDXBC() const {
assert_not_null(blob_);
return reinterpret_cast<const uint8_t*>(blob_->GetBufferPointer());
}
size_t D3D12Shader::GetDXBCSize() const {
assert_not_null(blob_);
return blob_->GetBufferSize();
} }
} // namespace d3d12 } // namespace d3d12

View File

@ -10,6 +10,7 @@
#ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_ #ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_
#define XENIA_GPU_D3D12_D3D12_SHADER_H_ #define XENIA_GPU_D3D12_D3D12_SHADER_H_
// TODO(Triang3l): Remove hlsl_shader_translator.
#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/hlsl_shader_translator.h"
#include "xenia/gpu/shader.h" #include "xenia/gpu/shader.h"
#include "xenia/ui/d3d12/d3d12_api.h" #include "xenia/ui/d3d12/d3d12_api.h"
@ -24,15 +25,14 @@ class D3D12Shader : public Shader {
const uint32_t* dword_ptr, uint32_t dword_count); const uint32_t* dword_ptr, uint32_t dword_count);
~D3D12Shader() override; ~D3D12Shader() override;
#if 0
void SetTexturesAndSamplers( void SetTexturesAndSamplers(
const HlslShaderTranslator::TextureSRV* texture_srvs, const HlslShaderTranslator::TextureSRV* texture_srvs,
uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants,
uint32_t sampler_count); uint32_t sampler_count);
#endif
bool Prepare(); bool DisassembleDXBC();
const uint8_t* GetDXBC() const;
size_t GetDXBCSize() const;
struct TextureSRV { struct TextureSRV {
uint32_t fetch_constant; uint32_t fetch_constant;

View File

@ -9,6 +9,8 @@
#include "xenia/gpu/d3d12/pipeline_cache.h" #include "xenia/gpu/d3d12/pipeline_cache.h"
#include <gflags/gflags.h>
#include <algorithm> #include <algorithm>
#include <cinttypes> #include <cinttypes>
#include <cmath> #include <cmath>
@ -18,9 +20,10 @@
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/hlsl_shader_translator.h"
DEFINE_bool(d3d12_dxbc_disasm, false,
"Disassemble DXBC shaders after generation.");
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -34,7 +37,7 @@ namespace d3d12 {
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file) RegisterFile* register_file)
: command_processor_(command_processor), register_file_(register_file) { : command_processor_(command_processor), register_file_(register_file) {
shader_translator_ = std::make_unique<HlslShaderTranslator>(); shader_translator_ = std::make_unique<DxbcShaderTranslator>();
// Set pipeline state description values we never change. // Set pipeline state description values we never change.
// Zero out tessellation, stream output, blend state and formats for render // Zero out tessellation, stream output, blend state and formats for render
@ -94,14 +97,6 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader,
XELOGE("Failed to translate the pixel shader!"); XELOGE("Failed to translate the pixel shader!");
return false; return false;
} }
if (!vertex_shader->is_valid()) {
XELOGE("Failed to prepare the vertex shader!");
return false;
}
if (pixel_shader != nullptr && !pixel_shader->is_valid()) {
XELOGE("Failed to prepare the pixel shader!");
return false;
}
return true; return true;
} }
@ -201,6 +196,9 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
return false; return false;
} }
// TODO(Triang3l): Re-enable this when the DXBC shader translators supports
// textures.
#if 0
uint32_t texture_srv_count, sampler_count; uint32_t texture_srv_count, sampler_count;
const HlslShaderTranslator::TextureSRV* texture_srvs = const HlslShaderTranslator::TextureSRV* texture_srvs =
shader_translator_->GetTextureSRVs(texture_srv_count); shader_translator_->GetTextureSRVs(texture_srv_count);
@ -208,14 +206,7 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
shader_translator_->GetSamplerFetchConstants(sampler_count); shader_translator_->GetSamplerFetchConstants(sampler_count);
shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count, shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count,
sampler_fetch_constants, sampler_count); sampler_fetch_constants, sampler_count);
#endif
// Prepare the shader for use (creates the Shader Model bytecode).
// It could still fail at this point.
if (!shader->Prepare()) {
XELOGE("Shader %.16" PRIX64 "preparation failed; marking as ignored",
shader->ucode_data_hash());
return false;
}
if (shader->is_valid()) { if (shader->is_valid()) {
XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n", XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n",
@ -224,6 +215,14 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
shader->ucode_disassembly().c_str()); shader->ucode_disassembly().c_str());
} }
// Disassemble the shader for dumping.
if (FLAGS_d3d12_dxbc_disasm) {
if (!shader->DisassembleDXBC()) {
XELOGE("Failed to disassemble DXBC shader %.16" PRIX64,
shader->ucode_data_hash());
}
}
// Dump shader files if desired. // Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) { if (!FLAGS_dump_shaders.empty()) {
shader->Dump(FLAGS_dump_shaders, "d3d12"); shader->Dump(FLAGS_dump_shaders, "d3d12");
@ -318,11 +317,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
if (update_desc_.pRootSignature == nullptr) { if (update_desc_.pRootSignature == nullptr) {
return UpdateStatus::kError; return UpdateStatus::kError;
} }
update_desc_.VS.pShaderBytecode = vertex_shader->GetDXBC(); update_desc_.VS.pShaderBytecode = vertex_shader->translated_binary().data();
update_desc_.VS.BytecodeLength = vertex_shader->GetDXBCSize(); update_desc_.VS.BytecodeLength = vertex_shader->translated_binary().size();
if (pixel_shader != nullptr) { if (pixel_shader != nullptr) {
update_desc_.PS.pShaderBytecode = pixel_shader->GetDXBC(); update_desc_.PS.pShaderBytecode = pixel_shader->translated_binary().data();
update_desc_.PS.BytecodeLength = pixel_shader->GetDXBCSize(); update_desc_.PS.BytecodeLength = pixel_shader->translated_binary().size();
} else { } else {
update_desc_.PS.pShaderBytecode = nullptr; update_desc_.PS.pShaderBytecode = nullptr;
update_desc_.PS.BytecodeLength = 0; update_desc_.PS.BytecodeLength = 0;

View File

@ -16,7 +16,7 @@
#include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
@ -86,7 +86,7 @@ class PipelineCache {
RegisterFile* register_file_; RegisterFile* register_file_;
// Reusable shader translator. // Reusable shader translator.
std::unique_ptr<HlslShaderTranslator> shader_translator_ = nullptr; std::unique_ptr<DxbcShaderTranslator> shader_translator_ = nullptr;
// All loaded shaders mapped by their guest hash key. // All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, D3D12Shader*> shader_map_; std::unordered_map<uint64_t, D3D12Shader*> shader_map_;

View File

@ -5,24 +5,24 @@ void main(point XeVertex xe_in[1], inout TriangleStream<XeVertex> xe_stream) {
XeVertex xe_out; XeVertex xe_out;
xe_out.interpolators = xe_in[0].interpolators; xe_out.interpolators = xe_in[0].interpolators;
xe_out.position.zw = xe_in[0].position.zw; xe_out.position.zw = xe_in[0].position.zw;
xe_out.point_size = xe_in[0].point_size; xe_out.point_params.z = xe_in[0].point_params.z;
// Shader header writes -1.0f to point_size by default, so any positive value // Shader header writes -1.0f to point_size by default, so any positive value
// means that it was overwritten by the translated vertex shader. // means that it was overwritten by the translated vertex shader.
float2 point_size = float2 point_size =
(xe_in[0].point_size > 0.0f ? xe_in[0].point_size.xx : xe_point_size) * (xe_in[0].point_params.z > 0.0f ? xe_in[0].point_params.zz
xe_ndc_scale.xy; : xe_point_size) * xe_ndc_scale.xy;
xe_out.point_coord = float2(0.0, 1.0); xe_out.point_params.xy = float2(0.0, 1.0);
xe_out.position.xy = xe_in[0].position.xy + float2(-1.0, 1.0) * point_size; xe_out.position.xy = xe_in[0].position.xy + float2(-1.0, 1.0) * point_size;
xe_stream.Append(xe_out); xe_stream.Append(xe_out);
xe_out.point_coord = float2(1.0, 1.0); xe_out.point_params.xy = float2(1.0, 1.0);
xe_out.position.xy = xe_in[0].position.xy + point_size; xe_out.position.xy = xe_in[0].position.xy + point_size;
xe_stream.Append(xe_out); xe_stream.Append(xe_out);
xe_out.point_coord = float2(0.0, 0.0); xe_out.point_params.xy = float2(0.0, 0.0);
xe_out.position.xy = xe_in[0].position.xy - point_size; xe_out.position.xy = xe_in[0].position.xy - point_size;
xe_stream.Append(xe_out); xe_stream.Append(xe_out);
xe_out.point_coord = float2(1.0, 0.0); xe_out.point_params.xy = float2(1.0, 0.0);
xe_out.position.xy = xe_in[0].position.xy + float2(1.0, -1.0) * point_size; xe_out.position.xy = xe_in[0].position.xy + float2(1.0, -1.0) * point_size;
xe_stream.Append(xe_out); xe_stream.Append(xe_out);
xe_stream.RestartStrip(); xe_stream.RestartStrip();

View File

@ -33,9 +33,9 @@ void main(triangle XeVertex xe_in[3],
xe_in[0].interpolators[i] + xe_in[0].interpolators[i] +
xe_in[2].interpolators[i]; xe_in[2].interpolators[i];
} }
xe_out.point_coord = xe_in[1].point_coord + xe_out.point_params.xy = xe_in[1].point_params.xy +
xe_in[0].point_coord - xe_in[0].point_params.xy -
xe_in[2].point_coord; xe_in[2].point_params.xy;
xe_out.position = float4(xe_in[1].position.xy - xe_out.position = float4(xe_in[1].position.xy -
xe_in[0].position.xy + xe_in[0].position.xy +
xe_in[2].position.xy, xe_in[2].position.xy,
@ -53,15 +53,15 @@ void main(triangle XeVertex xe_in[3],
xe_in[1].interpolators[i] + xe_in[1].interpolators[i] +
xe_in[2].interpolators[i]; xe_in[2].interpolators[i];
} }
xe_out.point_coord = xe_in[0].point_coord + xe_out.point_params.xy = xe_in[0].point_params.xy +
xe_in[1].point_coord - xe_in[1].point_params.xy -
xe_in[2].point_coord; xe_in[2].point_params.xy;
xe_out.position = float4(xe_in[0].position.xy - xe_out.position = float4(xe_in[0].position.xy -
xe_in[1].position.xy + xe_in[1].position.xy +
xe_in[2].position.xy, xe_in[2].position.xy,
xe_in[2].position.zw); xe_in[2].position.zw);
} }
xe_out.point_size = xe_in[2].point_size; xe_out.point_params.z = xe_in[2].point_params.z;
xe_stream.Append(xe_out); xe_stream.Append(xe_out);
xe_stream.RestartStrip(); xe_stream.RestartStrip();
} }

View File

@ -26,9 +26,8 @@ cbuffer xe_system_cbuffer : register(b0) {
struct XeVertex { struct XeVertex {
float4 interpolators[16] : TEXCOORD0; float4 interpolators[16] : TEXCOORD0;
float2 point_coord : TEXCOORD16; float3 point_params : TEXCOORD16;
float4 position : SV_Position; float4 position : SV_Position;
float point_size : PSIZE;
}; };
#endif // XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_ #endif // XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_

View File

@ -165,7 +165,7 @@ std::vector<uint8_t> DxbcShaderTranslator::CompleteTranslation() {
uint32_t DxbcShaderTranslator::AppendString(std::vector<uint32_t>& dest, uint32_t DxbcShaderTranslator::AppendString(std::vector<uint32_t>& dest,
const char* source) { const char* source) {
size_t size = std::strlen(source) + 1; size_t size = std::strlen(source) + 1;
size_t size_aligned = xe::align(size_aligned, sizeof(uint32_t)); size_t size_aligned = xe::align(size, sizeof(uint32_t));
size_t dest_position = dest.size(); size_t dest_position = dest.size();
dest.resize(dest_position + size_aligned / sizeof(uint32_t)); dest.resize(dest_position + size_aligned / sizeof(uint32_t));
std::memcpy(&dest[dest_position], source, size); std::memcpy(&dest[dest_position], source, size);