[D3D12] Shaders (not all compiling yet)

This commit is contained in:
Triang3l 2018-07-24 14:57:21 +03:00
parent b0421de496
commit 87aecfa1b8
10 changed files with 671 additions and 59 deletions

View File

@ -9,6 +9,12 @@
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
namespace d3d12 {
@ -18,33 +24,133 @@ D3D12CommandProcessor::D3D12CommandProcessor(
: CommandProcessor(graphics_system, kernel_state) {}
D3D12CommandProcessor::~D3D12CommandProcessor() = default;
void D3D12CommandProcessor::ClearCaches() {
CommandProcessor::ClearCaches();
cache_clear_requested_ = true;
}
bool D3D12CommandProcessor::SetupContext() {
return CommandProcessor::SetupContext();
if (!CommandProcessor::SetupContext()) {
XELOGE("Unable to initialize base command processor context");
return false;
}
auto context = GetD3D12Context();
pipeline_cache_ = std::make_unique<PipelineCache>(register_file_, context);
return true;
}
void D3D12CommandProcessor::ShutdownContext() {
return CommandProcessor::ShutdownContext();
auto context = GetD3D12Context();
context->AwaitAllFramesCompletion();
pipeline_cache_.reset();
CommandProcessor::ShutdownContext();
}
void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
uint32_t frontbuffer_width,
uint32_t frontbuffer_height) {}
uint32_t frontbuffer_height) {
SCOPE_profile_cpu_f("gpu");
if (current_queue_frame_ != UINT32_MAX) {
EndFrame();
}
if (cache_clear_requested_) {
cache_clear_requested_ = false;
GetD3D12Context()->AwaitAllFramesCompletion();
pipeline_cache_->ClearCache();
}
}
Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type,
uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) {
return nullptr;
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
dword_count);
}
bool D3D12CommandProcessor::IssueDraw(PrimitiveType prim_type,
bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
uint32_t index_count,
IndexBufferInfo* index_buffer_info) {
auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
auto enable_mode = static_cast<xenos::ModeControl>(
regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
if (enable_mode == xenos::ModeControl::kIgnore) {
// Ignored.
return true;
} else if (enable_mode == xenos::ModeControl::kCopy) {
// Special copy handling.
return IssueCopy();
}
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
// Doesn't actually draw.
return true;
}
// Shaders will have already been defined by previous loads.
// We need them to do just about anything so validate here.
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
auto pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
if (!vertex_shader) {
// Always need a vertex shader.
return false;
}
// Depth-only mode doesn't need a pixel shader (we'll use a fake one).
if (enable_mode == xenos::ModeControl::kDepth) {
// Use a dummy pixel shader when required.
pixel_shader = nullptr;
} else if (!pixel_shader) {
// Need a pixel shader in normal color mode.
return true;
}
bool full_update = false;
if (current_queue_frame_ == UINT32_MAX) {
BeginFrame();
full_update = true;
}
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type);
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
return false;
}
return true;
}
bool D3D12CommandProcessor::IssueCopy() { return true; }
void D3D12CommandProcessor::BeginFrame() {
assert_true(current_queue_frame_ == UINT32_MAX);
auto context = GetD3D12Context();
context->BeginSwap();
current_queue_frame_ = context->GetCurrentQueueFrame();
}
void D3D12CommandProcessor::EndFrame() {
assert_true(current_queue_frame_ != UINT32_MAX);
auto context = GetD3D12Context();
context->EndSwap();
current_queue_frame_ = UINT32_MAX;
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -10,10 +10,14 @@
#ifndef XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
#define XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
#include <memory>
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
#include "xenia/gpu/d3d12/pipeline_cache.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/d3d12/d3d12_context.h"
namespace xe {
namespace gpu {
@ -25,7 +29,14 @@ class D3D12CommandProcessor : public CommandProcessor {
kernel::KernelState* kernel_state);
~D3D12CommandProcessor();
private:
void ClearCaches() override;
// Needed by everything that owns transient objects.
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
}
protected:
bool SetupContext() override;
void ShutdownContext() override;
@ -36,9 +47,19 @@ class D3D12CommandProcessor : public CommandProcessor {
const uint32_t* host_address,
uint32_t dword_count) override;
bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
IndexBufferInfo* index_buffer_info) override;
bool IssueCopy() override;
private:
void BeginFrame();
void EndFrame();
bool cache_clear_requested_ = false;
std::unique_ptr<PipelineCache> pipeline_cache_;
uint32_t current_queue_frame_ = UINT32_MAX;
};
} // namespace d3d12

View File

@ -0,0 +1,117 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include <gflags/gflags.h>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/gpu/gpu_flags.h"
DEFINE_bool(d3d12_shader_disasm, true,
"Disassemble translated shaders after compilation.");
namespace xe {
namespace gpu {
namespace d3d12 {
D3D12Shader::D3D12Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
D3D12Shader::~D3D12Shader() {
if (blob_ != nullptr) {
blob_->Release();
}
}
bool D3D12Shader::Prepare() {
assert_null(blob_);
assert_true(is_valid());
const char* target;
switch (shader_type_) {
case ShaderType::kVertex:
target = "vs_5_1";
break;
case ShaderType::kPixel:
target = "ps_5_1";
break;
default:
assert_unhandled_case(shader_type_);
return false;
}
// TODO(Triang3l): Choose the appropriate optimization level based on compile
// time and how invariance is handled in vertex shaders.
ID3DBlob* error_blob = nullptr;
bool compiled =
SUCCEEDED(D3DCompile(translated_binary_.data(), translated_binary_.size(),
nullptr, nullptr, nullptr, "main", target,
D3DCOMPILE_OPTIMIZATION_LEVEL0, 0, &blob_,
&error_blob));
if (!compiled) {
XELOGE("%s shader %.16llX compilation failed!", target, ucode_data_hash());
}
if (error_blob != nullptr) {
if (compiled) {
XELOGW("%s shader %.16llX compiled with warnings!", target,
ucode_data_hash());
XELOGW("%s", reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
XELOGW("HLSL source:");
// The buffer isn't terminated.
translated_binary_.push_back(0);
XELOGW("%s", reinterpret_cast<const char*>(translated_binary_.data()));
translated_binary_.pop_back();
} else {
XELOGE("%s", reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
XELOGE("HLSL source:");
translated_binary_.push_back(0);
XELOGE("%s", reinterpret_cast<const char*>(translated_binary_.data()));
translated_binary_.pop_back();
}
error_blob->Release();
}
if (!compiled) {
return false;
}
if (FLAGS_d3d12_shader_disasm) {
ID3DBlob* disassembly_blob;
if (SUCCEEDED(D3DDisassemble(blob_->GetBufferPointer(),
blob_->GetBufferSize(), 0, nullptr,
&disassembly_blob))) {
host_disassembly_ =
reinterpret_cast<const char*>(disassembly_blob->GetBufferPointer());
disassembly_blob->Release();
} else {
XELOGE("Failed to disassemble DXBC for %s shader %.16llX", target,
ucode_data_hash());
}
}
return true;
}
const uint8_t* D3D12Shader::GetDXBC() const {
assert_not_null(blob_);
return reinterpret_cast<const uint8_t*>(blob_->GetBufferPointer());
}
size_t D3D12Shader::GetDXBCSize() const {
assert_not_null(blob_);
return blob_->GetBufferSize();
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,39 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_
#define XENIA_GPU_D3D12_D3D12_SHADER_H_
#include "xenia/gpu/shader.h"
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe {
namespace gpu {
namespace d3d12 {
class D3D12Shader : public Shader {
public:
D3D12Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
~D3D12Shader() override;
bool Prepare();
const uint8_t* GetDXBC() const;
size_t GetDXBCSize() const;
private:
ID3DBlob* blob_ = nullptr;
};
} // namespace d3d12
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D12_D3D12_SHADER_H_

View File

@ -0,0 +1,199 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/d3d12/pipeline_cache.h"
#include <cinttypes>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/hlsl_shader_translator.h"
namespace xe {
namespace gpu {
namespace d3d12 {
PipelineCache::PipelineCache(RegisterFile* register_file,
ui::d3d12::D3D12Context* context)
: register_file_(register_file), context_(context) {
shader_translator_.reset(new HlslShaderTranslator());
}
PipelineCache::~PipelineCache() { Shutdown(); }
void PipelineCache::Shutdown() {
ClearCache();
}
D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type,
uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(data_hash);
if (it != shader_map_.end()) {
// Shader has been previously loaded.
return it->second;
}
// Always create the shader and stash it away.
// We need to track it even if it fails translation so we know not to try
// again.
D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address,
dword_count);
shader_map_.insert({data_hash, shader});
return shader;
}
PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
return UpdateState(vertex_shader, pixel_shader, primitive_type);
}
void PipelineCache::ClearCache() {
// Destroy all shaders.
for (auto it : shader_map_) {
delete it.second;
}
shader_map_.clear();
}
bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
uint32_t value = register_file_->values[register_name].u32;
if (*dest == value) {
return false;
}
*dest = value;
return true;
}
bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) {
float value = register_file_->values[register_name].f32;
if (*dest == value) {
return false;
}
*dest = value;
return true;
}
bool PipelineCache::TranslateShader(D3D12Shader* shader,
xenos::xe_gpu_program_cntl_t cntl) {
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_->Translate(shader, cntl)) {
XELOGE("Shader translation failed; marking shader as ignored");
return false;
}
// Prepare the shader for use (creates the Shader Model bytecode).
// It could still fail at this point.
if (!shader->Prepare()) {
XELOGE("Shader preparation failed; marking shader as ignored");
return false;
}
if (shader->is_valid()) {
XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n",
shader->type() == ShaderType::kVertex ? "vertex" : "pixel",
shader->ucode_dword_count() * 4, shader->ucode_data_hash(),
shader->ucode_disassembly().c_str());
}
// Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) {
shader->Dump(FLAGS_dump_shaders, "d3d12");
}
return shader->is_valid();
}
PipelineCache::UpdateStatus PipelineCache::UpdateState(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type) {
bool mismatch = false;
// Reset hash so we can build it up.
XXH64_reset(&hash_state_, 0);
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
{ \
if (status == UpdateStatus::kError) { \
XELOGE(error_message); \
return status; \
} else if (status == UpdateStatus::kMismatch) { \
mismatch = true; \
} \
}
UpdateStatus status;
status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages");
#undef CHECK_UPDATE_STATUS
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
}
PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type) {
auto& regs = update_shader_stages_regs_;
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ.
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
0x000FF000 ||
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
0x000FF100 ||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
dirty |= regs.vertex_shader != vertex_shader;
dirty |= regs.pixel_shader != pixel_shader;
dirty |= regs.primitive_type != primitive_type;
regs.vertex_shader = vertex_shader;
regs.pixel_shader = pixel_shader;
regs.primitive_type = primitive_type;
XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
xenos::xe_gpu_program_cntl_t sq_program_cntl;
sq_program_cntl.dword_0 = regs.sq_program_cntl;
if (!vertex_shader->is_translated() &&
!TranslateShader(vertex_shader, sq_program_cntl)) {
XELOGE("Failed to translate the vertex shader!");
return UpdateStatus::kError;
}
if (pixel_shader && !pixel_shader->is_translated() &&
!TranslateShader(pixel_shader, sq_program_cntl)) {
XELOGE("Failed to translate the pixel shader!");
return UpdateStatus::kError;
}
return UpdateStatus::kMismatch;
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,93 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D12_PIPELINE_CACHE_H_
#define XENIA_GPU_D3D12_PIPELINE_CACHE_H_
#include <unordered_map>
#include "third_party/xxhash/xxhash.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader_translator.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_context.h"
namespace xe {
namespace gpu {
namespace d3d12 {
class PipelineCache {
public:
enum class UpdateStatus {
kCompatible,
kMismatch,
kError,
};
PipelineCache(RegisterFile* register_file, ui::d3d12::D3D12Context* context);
~PipelineCache();
void Shutdown();
D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count);
UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader,
PrimitiveType primitive_type);
void ClearCache();
private:
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
bool SetShadowRegister(float* dest, uint32_t register_name);
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl);
UpdateStatus UpdateState(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader,
PrimitiveType primitive_type);
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader,
PrimitiveType primitive_type);
RegisterFile* register_file_ = nullptr;
ui::d3d12::D3D12Context* context_ = nullptr;
// Reusable shader translator.
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
// All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
// Hash state used to incrementally produce pipeline hashes during update.
// By the time the full update pass has run the hash will represent the
// current state in a way that can uniquely identify the produced
// ID3D12PipelineState.
XXH64_state_t hash_state_;
struct UpdateShaderStagesRegisters {
PrimitiveType primitive_type;
uint32_t pa_su_sc_mode_cntl;
uint32_t sq_program_cntl;
D3D12Shader* vertex_shader;
D3D12Shader* pixel_shader;
UpdateShaderStagesRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_shader_stages_regs_;
};
} // namespace d3d12
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D12_PIPELINE_CACHE_H_

View File

@ -76,13 +76,22 @@ void HlslShaderTranslator::StartTranslation() {
Indent();
// Switch level (3).
Indent();
EmitSourceDepth("case 0:\n");
EmitSourceDepth("case 0u:\n");
}
std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
// Add the declarations, the prologue and the epilogue knowing what is needed.
StringBuffer source;
// Common preprocessor statements.
// 3557 is the "loop only executes for 1 iteration" warning caused by the
// control flow loop design.
source.Append(
"#pragma warning(disable : 3557)\n"
"\n"
"#define XE_FLT_MAX 3.402823466e+38\n"
"\n");
// Cubemap sampling. XeCubeTo2D emulates the cube vector ALU instruction that
// gives (t, s, 2 * major axis, face index), XeCubeTo3D reverts its effects
// in tfetchCube because sampling a cubemap as an array doesn't work properly
@ -132,7 +141,6 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
"}\n"
"\n"
"float3 XeCubeTo3D(float3 xe_cube_2d) {\n"
"{\n"
" xe_cube_2d.xy = (xe_cube_2d.xy * 2.0) + 1.0;\n"
" float3 xe_cube_3d;\n"
" uint xe_cube_face_index = uint(xe_cube_2d.z);\n"
@ -157,24 +165,22 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
// Common declarations.
source.Append(
"#define XE_FLT_MAX 3.402823466e+38\n"
"\n"
"cbuffer xe_system_constants : register(b0) {\n"
" float2 xe_viewport_inv_scale;\n"
" uint xe_vertex_index_endian;\n"
" uint xe_textures_are_3d;\n"
"}\n"
"};\n"
"\n"
"struct XeFloatConstantPage {\n"
" float4 c[16];\n"
"}\n"
"};\n"
"ConstantBuffer<XeFloatConstantPage> "
"xe_float_constants[16] : register(b1);\n"
"\n"
"cbuffer xe_loop_bool_constants : register(b17) {\n"
" uint xe_bool_constants[8];\n"
" uint xe_loop_constants[32];\n"
"}\n"
"};\n"
"\n");
if (is_vertex_shader()) {
@ -184,15 +190,17 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
// 11 for 16-in-32. This means we can check bits 0 ^ 1 to see if we need to
// do a 8-in-16 swap, and bit 1 to see if a 16-in-32 swap is needed.
// Vertex element is a temporary integer value for fetches.
// -1 point size means the geometry shader will use the global setting by
// default.
source.AppendFormat(
"cbuffer xe_vertex_fetch_constants : register(b18) {\n"
" uint2 xe_vertex_fetch[96];\n"
"}\n"
"};\n"
"\n"
"ByteAddressBuffer xe_virtual_memory : register(t0, space1);\n"
"\n"
"#define XE_SWAP_OVERLOAD(XeSwapType) \\\n"
"XeSwapType XeSwap(XeSwapType v, uint endian) { \\\n"
"#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \\\n"
"XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \\\n"
" [flatten] if (((endian ^ (endian >> 1u)) & 1u) != 0u) { \\\n"
" v = ((v & 0x00FF00FFu) << 8u) | ((v & 0xFF00FF00u) >> 8u); \\\n"
" } \\\n"
@ -201,25 +209,30 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
" } \\\n"
" return v; \\\n"
"}\n"
"XE_SWAP_OVERLOAD(uint)\n"
"XE_SWAP_OVERLOAD(uint2)\n"
"XE_SWAP_OVERLOAD(uint3)\n"
"XE_SWAP_OVERLOAD(uint4)\n"
"XE_BYTE_SWAP_OVERLOAD(uint)\n"
"XE_BYTE_SWAP_OVERLOAD(uint2)\n"
"XE_BYTE_SWAP_OVERLOAD(uint3)\n"
"XE_BYTE_SWAP_OVERLOAD(uint4)\n"
"\n"
"struct XeVertexShaderOutput {\n"
" float4 position : SV_Position;\n"
" float4 interpolators[%u] : TEXCOORD;\n"
" float4 point_size : PSIZE;\n"
"}\n"
" float point_size : PSIZE;\n"
"};\n"
"\n"
"XeVertexShaderOutput main(uint xe_vertex_index_be : SV_VertexID) {\n"
" float4 xe_r[%u];\n"
" uint xe_vertex_index =\n"
" XeSwap(xe_vertex_index_be, xe_vertex_index_endian);\n"
" XeByteSwap(xe_vertex_index_be, xe_vertex_index_endian);\n"
" uint4 xe_vertex_element;\n"
" xe_r[0].r = float(xe_vertex_index);\n"
" XeVertexShaderOutput xe_output;\n",
" XeVertexShaderOutput xe_output;\n"
" xe_output.position = float4(0.0, 0.0, 0.0, 1.0);\n"
" xe_output.point_size = -1.0;\n",
kMaxInterpolators, register_count());
for (uint32_t i = 0; i < kMaxInterpolators; ++i) {
source.AppendFormat(" xe_output.interpolators[%u] = (0.0).xxxx;\n", i);
}
// TODO(Triang3l): Reset interpolators to zero if really needed.
} else if (is_pixel_shader()) {
// Pixel shader inputs, outputs and prologue.
@ -229,18 +242,26 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
"struct XePixelShaderInput {\n"
" float4 position : SV_Position;\n"
" float4 interpolators[%u] : TEXCOORD;\n"
"}\n"
"};\n"
"\n"
"struct XePixelShaderOutput {\n"
" float4 colors[4] : SV_Target;\n"
"%s"
"}\n"
"};\n"
"\n"
"XePixelShaderOutput main(XePixelShaderInput xe_input) {\n"
" float4 xe_r[%u];\n"
" XePixelShaderOutput xe_output;\n",
" XePixelShaderOutput xe_output;\n"
" xe_output.colors[0] = (0.0).xxxx;\n"
" xe_output.colors[1] = (0.0).xxxx;\n"
" xe_output.colors[2] = (0.0).xxxx;\n"
" xe_output.colors[3] = (0.0).xxxx;\n",
kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "",
register_count());
// Initialize SV_Depth if using it.
if (writes_depth_) {
source.Append(" xe_output.depth = xe_input.position.z;\n");
}
// Copy interpolants to the first registers.
uint32_t interpolator_register_count =
std::min(register_count(), kMaxInterpolators);
@ -262,9 +283,9 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
// Sources for instructions.
" float4 xe_src0, xe_src1, xe_src2;\n"
// Previous vector result (used as a scratch).
" float4 xe_pv;\n"
" float4 xe_pv = float4(0.0, 0.0, 0.0, 0.0);\n"
// Previous scalar result (used for RETAIN_PREV).
" float xe_ps;\n"
" float xe_ps = 0.0;\n"
// Predicate temp, clause-local. Initially false like cf_exec_pred_cond_.
" bool xe_p0 = false;\n"
// Address register when using absolute addressing.
@ -284,9 +305,15 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
source.Append(source_inner_.GetString());
// Epilogue.
if (!cf_wrote_pc_) {
source.Append(
" xe_pc = 0xFFFFu;\n"
" break;\n");
}
source.Append(
" default:\n"
" pc = 0xFFFFu;\n"
" xe_pc = 0xFFFFu;\n"
" break;\n"
" }\n"
" } while (xe_pc != 0xFFFFu);\n");
// TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma.
@ -300,7 +327,11 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
void HlslShaderTranslator::ProcessLabel(uint32_t cf_index) {
// 0 is always added in the beginning.
if (cf_index != 0) {
EmitSourceDepth("case %u:\n", cf_index);
if (!cf_wrote_pc_) {
EmitSourceDepth("xe_pc = %uu;\n", cf_index);
EmitSourceDepth("break;");
}
EmitSourceDepth("case %uu:\n", cf_index);
}
}
@ -311,14 +342,12 @@ void HlslShaderTranslator::ProcessControlFlowNopInstruction(uint32_t cf_index) {
void HlslShaderTranslator::ProcessControlFlowInstructionBegin(
uint32_t cf_index) {
cf_wrote_pc_ = false;
Indent();
}
void HlslShaderTranslator::ProcessControlFlowInstructionEnd(uint32_t cf_index) {
if (!cf_wrote_pc_) {
EmitSourceDepth("// Falling through to L%u\n", cf_index + 1);
}
Unindent();
}
void HlslShaderTranslator::ProcessExecInstructionBegin(
@ -364,21 +393,23 @@ void HlslShaderTranslator::ProcessLoopStartInstruction(
// Setup counter.
EmitSourceDepth("xe_loop_count.yzw = xe_loop_count.xyz;\n");
EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u] & 0xFFu;\n");
EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u] & 0xFFu;\n",
instr.loop_constant_index);
// Setup relative indexing.
EmitSourceDepth("xe_aL = xe_aL.xxyz;\n");
if (!instr.is_repeat) {
// Push new loop starting index if not reusing the current one.
EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u] >> 8u) & 0xFFu);\n");
EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u] >> 8u) & 0xFFu);\n",
instr.loop_constant_index);
}
// Quick skip loop if zero count.
EmitSourceDepth("if (xe_loop_count.x == 0u) {\n");
EmitSourceDepth(" xe_pc = %u; // Skip loop to L%u\n",
EmitSourceDepth(" xe_pc = %uu; // Skip loop to L%u\n",
instr.loop_skip_address, instr.loop_skip_address);
EmitSourceDepth("} else {\n");
EmitSourceDepth(" xe_pc = %u; // Fallthrough to loop body L%u\n",
EmitSourceDepth(" xe_pc = %uu; // Fallthrough to loop body L%u\n",
instr.dword_index + 1, instr.dword_index + 1);
EmitSourceDepth("}\n");
EmitSourceDepth("break;\n");
@ -406,7 +437,7 @@ void HlslShaderTranslator::ProcessLoopEndInstruction(
EmitSourceDepth("xe_loop_count.w = 0u;\n");
EmitSourceDepth("xe_aL.xyz = xe_aL.yzw;\n");
EmitSourceDepth("xe_aL.w = 0;\n");
EmitSourceDepth("xe_pc = %u; // Exit loop to L%u\n", instr.dword_index + 1,
EmitSourceDepth("xe_pc = %uu; // Exit loop to L%u\n", instr.dword_index + 1,
instr.dword_index + 1);
Unindent();
@ -416,7 +447,7 @@ void HlslShaderTranslator::ProcessLoopEndInstruction(
// Still looping. Adjust index and jump back to body.
EmitSourceDepth("xe_aL.x += int(xe_loop_constants[%u] << 8u) >> 24;\n",
instr.loop_constant_index);
EmitSourceDepth("xe_pc = %u; // Loop back to body L%u\n",
EmitSourceDepth("xe_pc = %uu; // Loop back to body L%u\n",
instr.loop_body_address, instr.loop_body_address);
Unindent();
@ -465,7 +496,7 @@ void HlslShaderTranslator::ProcessJumpInstruction(
}
Indent();
EmitSourceDepth("xe_pc = %u; // L%u\n", instr.target_address,
EmitSourceDepth("xe_pc = %uu; // L%u\n", instr.target_address,
instr.target_address);
EmitSourceDepth("break;\n");
@ -473,7 +504,7 @@ void HlslShaderTranslator::ProcessJumpInstruction(
if (needs_fallthrough) {
uint32_t next_address = instr.dword_index + 1;
EmitSourceDepth("} else {\n");
EmitSourceDepth(" xe_pc = %u; // Fallthrough to L%u\n", next_address,
EmitSourceDepth(" xe_pc = %uu; // Fallthrough to L%u\n", next_address,
next_address);
}
EmitSourceDepth("}\n");
@ -687,9 +718,6 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
break;
}
} else {
if (result.is_clamped) {
EmitSource("saturate(");
}
bool has_const_writes = false;
uint32_t component_write_count = 0;
EmitSource(".");
@ -704,6 +732,9 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
}
}
EmitSource(" = ");
if (result.is_clamped) {
EmitSource("saturate(");
}
if (has_const_writes) {
if (component_write_count > 1) {
EmitSource("float%u(", component_write_count);
@ -804,12 +835,18 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction(
load_function_suffix = "";
break;
}
EmitSourceDepth("xe_vertex_element%s = XeSwap(xe_virtual_memory.Load%s(\n",
load_swizzle, load_function_suffix);
EmitSourceDepth(" (xe_vertex_fetch[%u].x & 0x1FFFFFFCu) + "
"uint(xe_src0.x) * %u + %u),\n",
instr.operands[1].storage_index, instr.attributes.stride * 4,
instr.attributes.offset * 4);
EmitSourceDepth(
"xe_vertex_element%s = XeByteSwap(xe_virtual_memory.Load%s(\n",
load_swizzle, load_function_suffix);
EmitSourceDepth(" (xe_vertex_fetch[%uu].x & 0x1FFFFFFCu)",
instr.operands[1].storage_index);
if (instr.attributes.stride != 0) {
EmitSource(" + uint(xe_src0.x) * %uu", instr.attributes.stride * 4);
}
if (instr.attributes.offset != 0) {
EmitSource(" + %uu", instr.attributes.offset * 4);
}
EmitSource("),\n");
EmitSourceDepth(" xe_vertex_fetch[%u].y);\n",
instr.operands[1].storage_index);

View File

@ -15,6 +15,7 @@
#include <dxgi1_4.h>
#include <d3d12.h>
#include <d3dcompiler.h>
#define XELOGD3D XELOGI

View File

@ -9,18 +9,13 @@
#include "xenia/ui/d3d12/d3d12_context.h"
#include <gflags/gflags.h>
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/ui/d3d12/d3d12_immediate_drawer.h"
#include "xenia/ui/d3d12/d3d12_provider.h"
#include "xenia/ui/window.h"
DEFINE_int32(d3d12_sync_interval, 1,
"Vertical synchronization interval. 0 to disable vertical sync, "
"1 to enable it, 2/3/4 to sync every 2/3/4 vertical blanks.");
namespace xe {
namespace ui {
namespace d3d12 {
@ -284,8 +279,7 @@ void D3D12Context::EndSwap() {
graphics_command_list->ResourceBarrier(1, &barrier);
command_list->Execute();
// Present and check if the context was lost.
HRESULT result =
swap_chain_->Present(xe::clamp(FLAGS_d3d12_sync_interval, 0, 4), 0);
HRESULT result = swap_chain_->Present(FLAGS_vsync ? 1 : 0, 0);
if (result == DXGI_ERROR_DEVICE_RESET ||
result == DXGI_ERROR_DEVICE_REMOVED) {
context_lost_ = true;
@ -311,6 +305,9 @@ std::unique_ptr<RawImage> D3D12Context::Capture() {
void D3D12Context::AwaitAllFramesCompletion() {
// Await the last frame since previous frames must be completed before it.
if (context_lost_) {
return;
}
uint32_t await_frame = current_queue_frame_ + (kQueuedFrames - 1);
if (await_frame >= kQueuedFrames) {
await_frame -= kQueuedFrames;

View File

@ -18,6 +18,8 @@
#include "xenia/ui/d3d12/d3d12_provider.h"
#include "xenia/ui/graphics_context.h"
#define FINE_GRAINED_DRAW_SCOPES 1
namespace xe {
namespace ui {
namespace d3d12 {