[D3D12] Shaders (not all compiling yet)
This commit is contained in:
parent
b0421de496
commit
87aecfa1b8
|
@ -9,6 +9,12 @@
|
||||||
|
|
||||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
|
|
||||||
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/profiling.h"
|
||||||
|
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||||
|
#include "xenia/gpu/xenos.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
@ -18,33 +24,133 @@ D3D12CommandProcessor::D3D12CommandProcessor(
|
||||||
: CommandProcessor(graphics_system, kernel_state) {}
|
: CommandProcessor(graphics_system, kernel_state) {}
|
||||||
D3D12CommandProcessor::~D3D12CommandProcessor() = default;
|
D3D12CommandProcessor::~D3D12CommandProcessor() = default;
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::ClearCaches() {
|
||||||
|
CommandProcessor::ClearCaches();
|
||||||
|
cache_clear_requested_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::SetupContext() {
|
bool D3D12CommandProcessor::SetupContext() {
|
||||||
return CommandProcessor::SetupContext();
|
if (!CommandProcessor::SetupContext()) {
|
||||||
|
XELOGE("Unable to initialize base command processor context");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto context = GetD3D12Context();
|
||||||
|
|
||||||
|
pipeline_cache_ = std::make_unique<PipelineCache>(register_file_, context);
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::ShutdownContext() {
|
void D3D12CommandProcessor::ShutdownContext() {
|
||||||
return CommandProcessor::ShutdownContext();
|
auto context = GetD3D12Context();
|
||||||
|
context->AwaitAllFramesCompletion();
|
||||||
|
|
||||||
|
pipeline_cache_.reset();
|
||||||
|
|
||||||
|
CommandProcessor::ShutdownContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
||||||
uint32_t frontbuffer_width,
|
uint32_t frontbuffer_width,
|
||||||
uint32_t frontbuffer_height) {}
|
uint32_t frontbuffer_height) {
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
if (current_queue_frame_ != UINT32_MAX) {
|
||||||
|
EndFrame();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_clear_requested_) {
|
||||||
|
cache_clear_requested_ = false;
|
||||||
|
GetD3D12Context()->AwaitAllFramesCompletion();
|
||||||
|
pipeline_cache_->ClearCache();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type,
|
Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type,
|
||||||
uint32_t guest_address,
|
uint32_t guest_address,
|
||||||
const uint32_t* host_address,
|
const uint32_t* host_address,
|
||||||
uint32_t dword_count) {
|
uint32_t dword_count) {
|
||||||
return nullptr;
|
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
|
||||||
|
dword_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::IssueDraw(PrimitiveType prim_type,
|
bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
uint32_t index_count,
|
uint32_t index_count,
|
||||||
IndexBufferInfo* index_buffer_info) {
|
IndexBufferInfo* index_buffer_info) {
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
|
||||||
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
|
auto enable_mode = static_cast<xenos::ModeControl>(
|
||||||
|
regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||||
|
if (enable_mode == xenos::ModeControl::kIgnore) {
|
||||||
|
// Ignored.
|
||||||
|
return true;
|
||||||
|
} else if (enable_mode == xenos::ModeControl::kCopy) {
|
||||||
|
// Special copy handling.
|
||||||
|
return IssueCopy();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
|
||||||
|
// Doesn't actually draw.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shaders will have already been defined by previous loads.
|
||||||
|
// We need them to do just about anything so validate here.
|
||||||
|
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
||||||
|
auto pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
|
||||||
|
if (!vertex_shader) {
|
||||||
|
// Always need a vertex shader.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Depth-only mode doesn't need a pixel shader (we'll use a fake one).
|
||||||
|
if (enable_mode == xenos::ModeControl::kDepth) {
|
||||||
|
// Use a dummy pixel shader when required.
|
||||||
|
pixel_shader = nullptr;
|
||||||
|
} else if (!pixel_shader) {
|
||||||
|
// Need a pixel shader in normal color mode.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool full_update = false;
|
||||||
|
if (current_queue_frame_ == UINT32_MAX) {
|
||||||
|
BeginFrame();
|
||||||
|
full_update = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
||||||
|
vertex_shader, pixel_shader, primitive_type);
|
||||||
|
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::IssueCopy() { return true; }
|
bool D3D12CommandProcessor::IssueCopy() { return true; }
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::BeginFrame() {
|
||||||
|
assert_true(current_queue_frame_ == UINT32_MAX);
|
||||||
|
auto context = GetD3D12Context();
|
||||||
|
|
||||||
|
context->BeginSwap();
|
||||||
|
|
||||||
|
current_queue_frame_ = context->GetCurrentQueueFrame();
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::EndFrame() {
|
||||||
|
assert_true(current_queue_frame_ != UINT32_MAX);
|
||||||
|
auto context = GetD3D12Context();
|
||||||
|
|
||||||
|
context->EndSwap();
|
||||||
|
|
||||||
|
current_queue_frame_ = UINT32_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -10,10 +10,14 @@
|
||||||
#ifndef XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
|
#ifndef XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
|
||||||
#define XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
|
#define XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "xenia/gpu/command_processor.h"
|
#include "xenia/gpu/command_processor.h"
|
||||||
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
||||||
|
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/kernel/kernel_state.h"
|
#include "xenia/kernel/kernel_state.h"
|
||||||
|
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
@ -25,7 +29,14 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
kernel::KernelState* kernel_state);
|
kernel::KernelState* kernel_state);
|
||||||
~D3D12CommandProcessor();
|
~D3D12CommandProcessor();
|
||||||
|
|
||||||
private:
|
void ClearCaches() override;
|
||||||
|
|
||||||
|
// Needed by everything that owns transient objects.
|
||||||
|
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
|
||||||
|
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
void ShutdownContext() override;
|
void ShutdownContext() override;
|
||||||
|
|
||||||
|
@ -36,9 +47,19 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
const uint32_t* host_address,
|
const uint32_t* host_address,
|
||||||
uint32_t dword_count) override;
|
uint32_t dword_count) override;
|
||||||
|
|
||||||
bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
|
bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
|
||||||
IndexBufferInfo* index_buffer_info) override;
|
IndexBufferInfo* index_buffer_info) override;
|
||||||
bool IssueCopy() override;
|
bool IssueCopy() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void BeginFrame();
|
||||||
|
void EndFrame();
|
||||||
|
|
||||||
|
bool cache_clear_requested_ = false;
|
||||||
|
|
||||||
|
std::unique_ptr<PipelineCache> pipeline_cache_;
|
||||||
|
|
||||||
|
uint32_t current_queue_frame_ = UINT32_MAX;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -0,0 +1,117 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||||
|
|
||||||
|
#include <gflags/gflags.h>
|
||||||
|
|
||||||
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
|
||||||
|
DEFINE_bool(d3d12_shader_disasm, true,
|
||||||
|
"Disassemble translated shaders after compilation.");
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace d3d12 {
|
||||||
|
|
||||||
|
D3D12Shader::D3D12Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
|
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||||
|
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||||
|
|
||||||
|
D3D12Shader::~D3D12Shader() {
|
||||||
|
if (blob_ != nullptr) {
|
||||||
|
blob_->Release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool D3D12Shader::Prepare() {
|
||||||
|
assert_null(blob_);
|
||||||
|
assert_true(is_valid());
|
||||||
|
|
||||||
|
const char* target;
|
||||||
|
switch (shader_type_) {
|
||||||
|
case ShaderType::kVertex:
|
||||||
|
target = "vs_5_1";
|
||||||
|
break;
|
||||||
|
case ShaderType::kPixel:
|
||||||
|
target = "ps_5_1";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(shader_type_);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(Triang3l): Choose the appropriate optimization level based on compile
|
||||||
|
// time and how invariance is handled in vertex shaders.
|
||||||
|
ID3DBlob* error_blob = nullptr;
|
||||||
|
bool compiled =
|
||||||
|
SUCCEEDED(D3DCompile(translated_binary_.data(), translated_binary_.size(),
|
||||||
|
nullptr, nullptr, nullptr, "main", target,
|
||||||
|
D3DCOMPILE_OPTIMIZATION_LEVEL0, 0, &blob_,
|
||||||
|
&error_blob));
|
||||||
|
|
||||||
|
if (!compiled) {
|
||||||
|
XELOGE("%s shader %.16llX compilation failed!", target, ucode_data_hash());
|
||||||
|
}
|
||||||
|
if (error_blob != nullptr) {
|
||||||
|
if (compiled) {
|
||||||
|
XELOGW("%s shader %.16llX compiled with warnings!", target,
|
||||||
|
ucode_data_hash());
|
||||||
|
XELOGW("%s", reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
|
||||||
|
XELOGW("HLSL source:");
|
||||||
|
// The buffer isn't terminated.
|
||||||
|
translated_binary_.push_back(0);
|
||||||
|
XELOGW("%s", reinterpret_cast<const char*>(translated_binary_.data()));
|
||||||
|
translated_binary_.pop_back();
|
||||||
|
} else {
|
||||||
|
XELOGE("%s", reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
|
||||||
|
XELOGE("HLSL source:");
|
||||||
|
translated_binary_.push_back(0);
|
||||||
|
XELOGE("%s", reinterpret_cast<const char*>(translated_binary_.data()));
|
||||||
|
translated_binary_.pop_back();
|
||||||
|
}
|
||||||
|
error_blob->Release();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!compiled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FLAGS_d3d12_shader_disasm) {
|
||||||
|
ID3DBlob* disassembly_blob;
|
||||||
|
if (SUCCEEDED(D3DDisassemble(blob_->GetBufferPointer(),
|
||||||
|
blob_->GetBufferSize(), 0, nullptr,
|
||||||
|
&disassembly_blob))) {
|
||||||
|
host_disassembly_ =
|
||||||
|
reinterpret_cast<const char*>(disassembly_blob->GetBufferPointer());
|
||||||
|
disassembly_blob->Release();
|
||||||
|
} else {
|
||||||
|
XELOGE("Failed to disassemble DXBC for %s shader %.16llX", target,
|
||||||
|
ucode_data_hash());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8_t* D3D12Shader::GetDXBC() const {
|
||||||
|
assert_not_null(blob_);
|
||||||
|
return reinterpret_cast<const uint8_t*>(blob_->GetBufferPointer());
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t D3D12Shader::GetDXBCSize() const {
|
||||||
|
assert_not_null(blob_);
|
||||||
|
return blob_->GetBufferSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace d3d12
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,39 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_
|
||||||
|
#define XENIA_GPU_D3D12_D3D12_SHADER_H_
|
||||||
|
|
||||||
|
#include "xenia/gpu/shader.h"
|
||||||
|
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace d3d12 {
|
||||||
|
|
||||||
|
class D3D12Shader : public Shader {
|
||||||
|
public:
|
||||||
|
D3D12Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
|
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||||
|
~D3D12Shader() override;
|
||||||
|
|
||||||
|
bool Prepare();
|
||||||
|
|
||||||
|
const uint8_t* GetDXBC() const;
|
||||||
|
size_t GetDXBCSize() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
ID3DBlob* blob_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace d3d12
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_D3D12_D3D12_SHADER_H_
|
|
@ -0,0 +1,199 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
||||||
|
|
||||||
|
#include <cinttypes>
|
||||||
|
|
||||||
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
#include "xenia/gpu/hlsl_shader_translator.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace d3d12 {
|
||||||
|
|
||||||
|
PipelineCache::PipelineCache(RegisterFile* register_file,
|
||||||
|
ui::d3d12::D3D12Context* context)
|
||||||
|
: register_file_(register_file), context_(context) {
|
||||||
|
shader_translator_.reset(new HlslShaderTranslator());
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::~PipelineCache() { Shutdown(); }
|
||||||
|
|
||||||
|
void PipelineCache::Shutdown() {
|
||||||
|
ClearCache();
|
||||||
|
}
|
||||||
|
|
||||||
|
D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type,
|
||||||
|
uint32_t guest_address,
|
||||||
|
const uint32_t* host_address,
|
||||||
|
uint32_t dword_count) {
|
||||||
|
// Hash the input memory and lookup the shader.
|
||||||
|
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||||
|
auto it = shader_map_.find(data_hash);
|
||||||
|
if (it != shader_map_.end()) {
|
||||||
|
// Shader has been previously loaded.
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always create the shader and stash it away.
|
||||||
|
// We need to track it even if it fails translation so we know not to try
|
||||||
|
// again.
|
||||||
|
D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address,
|
||||||
|
dword_count);
|
||||||
|
shader_map_.insert({data_hash, shader});
|
||||||
|
|
||||||
|
return shader;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
|
||||||
|
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||||
|
PrimitiveType primitive_type) {
|
||||||
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
return UpdateState(vertex_shader, pixel_shader, primitive_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PipelineCache::ClearCache() {
|
||||||
|
// Destroy all shaders.
|
||||||
|
for (auto it : shader_map_) {
|
||||||
|
delete it.second;
|
||||||
|
}
|
||||||
|
shader_map_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
|
||||||
|
uint32_t value = register_file_->values[register_name].u32;
|
||||||
|
if (*dest == value) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*dest = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) {
|
||||||
|
float value = register_file_->values[register_name].f32;
|
||||||
|
if (*dest == value) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*dest = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PipelineCache::TranslateShader(D3D12Shader* shader,
|
||||||
|
xenos::xe_gpu_program_cntl_t cntl) {
|
||||||
|
// Perform translation.
|
||||||
|
// If this fails the shader will be marked as invalid and ignored later.
|
||||||
|
if (!shader_translator_->Translate(shader, cntl)) {
|
||||||
|
XELOGE("Shader translation failed; marking shader as ignored");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare the shader for use (creates the Shader Model bytecode).
|
||||||
|
// It could still fail at this point.
|
||||||
|
if (!shader->Prepare()) {
|
||||||
|
XELOGE("Shader preparation failed; marking shader as ignored");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shader->is_valid()) {
|
||||||
|
XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n",
|
||||||
|
shader->type() == ShaderType::kVertex ? "vertex" : "pixel",
|
||||||
|
shader->ucode_dword_count() * 4, shader->ucode_data_hash(),
|
||||||
|
shader->ucode_disassembly().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dump shader files if desired.
|
||||||
|
if (!FLAGS_dump_shaders.empty()) {
|
||||||
|
shader->Dump(FLAGS_dump_shaders, "d3d12");
|
||||||
|
}
|
||||||
|
|
||||||
|
return shader->is_valid();
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
||||||
|
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||||
|
PrimitiveType primitive_type) {
|
||||||
|
bool mismatch = false;
|
||||||
|
|
||||||
|
// Reset hash so we can build it up.
|
||||||
|
XXH64_reset(&hash_state_, 0);
|
||||||
|
|
||||||
|
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
|
||||||
|
{ \
|
||||||
|
if (status == UpdateStatus::kError) { \
|
||||||
|
XELOGE(error_message); \
|
||||||
|
return status; \
|
||||||
|
} else if (status == UpdateStatus::kMismatch) { \
|
||||||
|
mismatch = true; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdateStatus status;
|
||||||
|
status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type);
|
||||||
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages");
|
||||||
|
|
||||||
|
#undef CHECK_UPDATE_STATUS
|
||||||
|
|
||||||
|
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
||||||
|
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||||
|
PrimitiveType primitive_type) {
|
||||||
|
auto& regs = update_shader_stages_regs_;
|
||||||
|
|
||||||
|
// These are the constant base addresses/ranges for shaders.
|
||||||
|
// We have these hardcoded right now cause nothing seems to differ.
|
||||||
|
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
|
||||||
|
0x000FF000 ||
|
||||||
|
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||||
|
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
|
||||||
|
0x000FF100 ||
|
||||||
|
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||||
|
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||||
|
dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||||
|
dirty |= regs.vertex_shader != vertex_shader;
|
||||||
|
dirty |= regs.pixel_shader != pixel_shader;
|
||||||
|
dirty |= regs.primitive_type != primitive_type;
|
||||||
|
regs.vertex_shader = vertex_shader;
|
||||||
|
regs.pixel_shader = pixel_shader;
|
||||||
|
regs.primitive_type = primitive_type;
|
||||||
|
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||||
|
if (!dirty) {
|
||||||
|
return UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
xenos::xe_gpu_program_cntl_t sq_program_cntl;
|
||||||
|
sq_program_cntl.dword_0 = regs.sq_program_cntl;
|
||||||
|
|
||||||
|
if (!vertex_shader->is_translated() &&
|
||||||
|
!TranslateShader(vertex_shader, sq_program_cntl)) {
|
||||||
|
XELOGE("Failed to translate the vertex shader!");
|
||||||
|
return UpdateStatus::kError;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pixel_shader && !pixel_shader->is_translated() &&
|
||||||
|
!TranslateShader(pixel_shader, sq_program_cntl)) {
|
||||||
|
XELOGE("Failed to translate the pixel shader!");
|
||||||
|
return UpdateStatus::kError;
|
||||||
|
}
|
||||||
|
|
||||||
|
return UpdateStatus::kMismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace d3d12
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,93 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_D3D12_PIPELINE_CACHE_H_
|
||||||
|
#define XENIA_GPU_D3D12_PIPELINE_CACHE_H_
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "third_party/xxhash/xxhash.h"
|
||||||
|
|
||||||
|
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||||
|
#include "xenia/gpu/register_file.h"
|
||||||
|
#include "xenia/gpu/shader_translator.h"
|
||||||
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace d3d12 {
|
||||||
|
|
||||||
|
class PipelineCache {
|
||||||
|
public:
|
||||||
|
enum class UpdateStatus {
|
||||||
|
kCompatible,
|
||||||
|
kMismatch,
|
||||||
|
kError,
|
||||||
|
};
|
||||||
|
|
||||||
|
PipelineCache(RegisterFile* register_file, ui::d3d12::D3D12Context* context);
|
||||||
|
~PipelineCache();
|
||||||
|
|
||||||
|
void Shutdown();
|
||||||
|
|
||||||
|
D3D12Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
|
||||||
|
const uint32_t* host_address, uint32_t dword_count);
|
||||||
|
|
||||||
|
UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader,
|
||||||
|
D3D12Shader* pixel_shader,
|
||||||
|
PrimitiveType primitive_type);
|
||||||
|
|
||||||
|
void ClearCache();
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||||
|
bool SetShadowRegister(float* dest, uint32_t register_name);
|
||||||
|
|
||||||
|
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl);
|
||||||
|
|
||||||
|
UpdateStatus UpdateState(D3D12Shader* vertex_shader,
|
||||||
|
D3D12Shader* pixel_shader,
|
||||||
|
PrimitiveType primitive_type);
|
||||||
|
|
||||||
|
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
|
||||||
|
D3D12Shader* pixel_shader,
|
||||||
|
PrimitiveType primitive_type);
|
||||||
|
|
||||||
|
RegisterFile* register_file_ = nullptr;
|
||||||
|
ui::d3d12::D3D12Context* context_ = nullptr;
|
||||||
|
|
||||||
|
// Reusable shader translator.
|
||||||
|
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
|
||||||
|
// All loaded shaders mapped by their guest hash key.
|
||||||
|
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
|
||||||
|
|
||||||
|
// Hash state used to incrementally produce pipeline hashes during update.
|
||||||
|
// By the time the full update pass has run the hash will represent the
|
||||||
|
// current state in a way that can uniquely identify the produced
|
||||||
|
// ID3D12PipelineState.
|
||||||
|
XXH64_state_t hash_state_;
|
||||||
|
|
||||||
|
struct UpdateShaderStagesRegisters {
|
||||||
|
PrimitiveType primitive_type;
|
||||||
|
uint32_t pa_su_sc_mode_cntl;
|
||||||
|
uint32_t sq_program_cntl;
|
||||||
|
D3D12Shader* vertex_shader;
|
||||||
|
D3D12Shader* pixel_shader;
|
||||||
|
|
||||||
|
UpdateShaderStagesRegisters() { Reset(); }
|
||||||
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
} update_shader_stages_regs_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace d3d12
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_D3D12_PIPELINE_CACHE_H_
|
|
@ -76,13 +76,22 @@ void HlslShaderTranslator::StartTranslation() {
|
||||||
Indent();
|
Indent();
|
||||||
// Switch level (3).
|
// Switch level (3).
|
||||||
Indent();
|
Indent();
|
||||||
EmitSourceDepth("case 0:\n");
|
EmitSourceDepth("case 0u:\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
// Add the declarations, the prologue and the epilogue knowing what is needed.
|
// Add the declarations, the prologue and the epilogue knowing what is needed.
|
||||||
StringBuffer source;
|
StringBuffer source;
|
||||||
|
|
||||||
|
// Common preprocessor statements.
|
||||||
|
// 3557 is the "loop only executes for 1 iteration" warning caused by the
|
||||||
|
// control flow loop design.
|
||||||
|
source.Append(
|
||||||
|
"#pragma warning(disable : 3557)\n"
|
||||||
|
"\n"
|
||||||
|
"#define XE_FLT_MAX 3.402823466e+38\n"
|
||||||
|
"\n");
|
||||||
|
|
||||||
// Cubemap sampling. XeCubeTo2D emulates the cube vector ALU instruction that
|
// Cubemap sampling. XeCubeTo2D emulates the cube vector ALU instruction that
|
||||||
// gives (t, s, 2 * major axis, face index), XeCubeTo3D reverts its effects
|
// gives (t, s, 2 * major axis, face index), XeCubeTo3D reverts its effects
|
||||||
// in tfetchCube because sampling a cubemap as an array doesn't work properly
|
// in tfetchCube because sampling a cubemap as an array doesn't work properly
|
||||||
|
@ -132,7 +141,6 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
"}\n"
|
"}\n"
|
||||||
"\n"
|
"\n"
|
||||||
"float3 XeCubeTo3D(float3 xe_cube_2d) {\n"
|
"float3 XeCubeTo3D(float3 xe_cube_2d) {\n"
|
||||||
"{\n"
|
|
||||||
" xe_cube_2d.xy = (xe_cube_2d.xy * 2.0) + 1.0;\n"
|
" xe_cube_2d.xy = (xe_cube_2d.xy * 2.0) + 1.0;\n"
|
||||||
" float3 xe_cube_3d;\n"
|
" float3 xe_cube_3d;\n"
|
||||||
" uint xe_cube_face_index = uint(xe_cube_2d.z);\n"
|
" uint xe_cube_face_index = uint(xe_cube_2d.z);\n"
|
||||||
|
@ -157,24 +165,22 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
|
|
||||||
// Common declarations.
|
// Common declarations.
|
||||||
source.Append(
|
source.Append(
|
||||||
"#define XE_FLT_MAX 3.402823466e+38\n"
|
|
||||||
"\n"
|
|
||||||
"cbuffer xe_system_constants : register(b0) {\n"
|
"cbuffer xe_system_constants : register(b0) {\n"
|
||||||
" float2 xe_viewport_inv_scale;\n"
|
" float2 xe_viewport_inv_scale;\n"
|
||||||
" uint xe_vertex_index_endian;\n"
|
" uint xe_vertex_index_endian;\n"
|
||||||
" uint xe_textures_are_3d;\n"
|
" uint xe_textures_are_3d;\n"
|
||||||
"}\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
"struct XeFloatConstantPage {\n"
|
"struct XeFloatConstantPage {\n"
|
||||||
" float4 c[16];\n"
|
" float4 c[16];\n"
|
||||||
"}\n"
|
"};\n"
|
||||||
"ConstantBuffer<XeFloatConstantPage> "
|
"ConstantBuffer<XeFloatConstantPage> "
|
||||||
"xe_float_constants[16] : register(b1);\n"
|
"xe_float_constants[16] : register(b1);\n"
|
||||||
"\n"
|
"\n"
|
||||||
"cbuffer xe_loop_bool_constants : register(b17) {\n"
|
"cbuffer xe_loop_bool_constants : register(b17) {\n"
|
||||||
" uint xe_bool_constants[8];\n"
|
" uint xe_bool_constants[8];\n"
|
||||||
" uint xe_loop_constants[32];\n"
|
" uint xe_loop_constants[32];\n"
|
||||||
"}\n"
|
"};\n"
|
||||||
"\n");
|
"\n");
|
||||||
|
|
||||||
if (is_vertex_shader()) {
|
if (is_vertex_shader()) {
|
||||||
|
@ -184,15 +190,17 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
// 11 for 16-in-32. This means we can check bits 0 ^ 1 to see if we need to
|
// 11 for 16-in-32. This means we can check bits 0 ^ 1 to see if we need to
|
||||||
// do a 8-in-16 swap, and bit 1 to see if a 16-in-32 swap is needed.
|
// do a 8-in-16 swap, and bit 1 to see if a 16-in-32 swap is needed.
|
||||||
// Vertex element is a temporary integer value for fetches.
|
// Vertex element is a temporary integer value for fetches.
|
||||||
|
// -1 point size means the geometry shader will use the global setting by
|
||||||
|
// default.
|
||||||
source.AppendFormat(
|
source.AppendFormat(
|
||||||
"cbuffer xe_vertex_fetch_constants : register(b18) {\n"
|
"cbuffer xe_vertex_fetch_constants : register(b18) {\n"
|
||||||
" uint2 xe_vertex_fetch[96];\n"
|
" uint2 xe_vertex_fetch[96];\n"
|
||||||
"}\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
"ByteAddressBuffer xe_virtual_memory : register(t0, space1);\n"
|
"ByteAddressBuffer xe_virtual_memory : register(t0, space1);\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#define XE_SWAP_OVERLOAD(XeSwapType) \\\n"
|
"#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \\\n"
|
||||||
"XeSwapType XeSwap(XeSwapType v, uint endian) { \\\n"
|
"XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \\\n"
|
||||||
" [flatten] if (((endian ^ (endian >> 1u)) & 1u) != 0u) { \\\n"
|
" [flatten] if (((endian ^ (endian >> 1u)) & 1u) != 0u) { \\\n"
|
||||||
" v = ((v & 0x00FF00FFu) << 8u) | ((v & 0xFF00FF00u) >> 8u); \\\n"
|
" v = ((v & 0x00FF00FFu) << 8u) | ((v & 0xFF00FF00u) >> 8u); \\\n"
|
||||||
" } \\\n"
|
" } \\\n"
|
||||||
|
@ -201,25 +209,30 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
" } \\\n"
|
" } \\\n"
|
||||||
" return v; \\\n"
|
" return v; \\\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"XE_SWAP_OVERLOAD(uint)\n"
|
"XE_BYTE_SWAP_OVERLOAD(uint)\n"
|
||||||
"XE_SWAP_OVERLOAD(uint2)\n"
|
"XE_BYTE_SWAP_OVERLOAD(uint2)\n"
|
||||||
"XE_SWAP_OVERLOAD(uint3)\n"
|
"XE_BYTE_SWAP_OVERLOAD(uint3)\n"
|
||||||
"XE_SWAP_OVERLOAD(uint4)\n"
|
"XE_BYTE_SWAP_OVERLOAD(uint4)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"struct XeVertexShaderOutput {\n"
|
"struct XeVertexShaderOutput {\n"
|
||||||
" float4 position : SV_Position;\n"
|
" float4 position : SV_Position;\n"
|
||||||
" float4 interpolators[%u] : TEXCOORD;\n"
|
" float4 interpolators[%u] : TEXCOORD;\n"
|
||||||
" float4 point_size : PSIZE;\n"
|
" float point_size : PSIZE;\n"
|
||||||
"}\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
"XeVertexShaderOutput main(uint xe_vertex_index_be : SV_VertexID) {\n"
|
"XeVertexShaderOutput main(uint xe_vertex_index_be : SV_VertexID) {\n"
|
||||||
" float4 xe_r[%u];\n"
|
" float4 xe_r[%u];\n"
|
||||||
" uint xe_vertex_index =\n"
|
" uint xe_vertex_index =\n"
|
||||||
" XeSwap(xe_vertex_index_be, xe_vertex_index_endian);\n"
|
" XeByteSwap(xe_vertex_index_be, xe_vertex_index_endian);\n"
|
||||||
" uint4 xe_vertex_element;\n"
|
" uint4 xe_vertex_element;\n"
|
||||||
" xe_r[0].r = float(xe_vertex_index);\n"
|
" xe_r[0].r = float(xe_vertex_index);\n"
|
||||||
" XeVertexShaderOutput xe_output;\n",
|
" XeVertexShaderOutput xe_output;\n"
|
||||||
|
" xe_output.position = float4(0.0, 0.0, 0.0, 1.0);\n"
|
||||||
|
" xe_output.point_size = -1.0;\n",
|
||||||
kMaxInterpolators, register_count());
|
kMaxInterpolators, register_count());
|
||||||
|
for (uint32_t i = 0; i < kMaxInterpolators; ++i) {
|
||||||
|
source.AppendFormat(" xe_output.interpolators[%u] = (0.0).xxxx;\n", i);
|
||||||
|
}
|
||||||
// TODO(Triang3l): Reset interpolators to zero if really needed.
|
// TODO(Triang3l): Reset interpolators to zero if really needed.
|
||||||
} else if (is_pixel_shader()) {
|
} else if (is_pixel_shader()) {
|
||||||
// Pixel shader inputs, outputs and prologue.
|
// Pixel shader inputs, outputs and prologue.
|
||||||
|
@ -229,18 +242,26 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
"struct XePixelShaderInput {\n"
|
"struct XePixelShaderInput {\n"
|
||||||
" float4 position : SV_Position;\n"
|
" float4 position : SV_Position;\n"
|
||||||
" float4 interpolators[%u] : TEXCOORD;\n"
|
" float4 interpolators[%u] : TEXCOORD;\n"
|
||||||
"}\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
"struct XePixelShaderOutput {\n"
|
"struct XePixelShaderOutput {\n"
|
||||||
" float4 colors[4] : SV_Target;\n"
|
" float4 colors[4] : SV_Target;\n"
|
||||||
"%s"
|
"%s"
|
||||||
"}\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
"XePixelShaderOutput main(XePixelShaderInput xe_input) {\n"
|
"XePixelShaderOutput main(XePixelShaderInput xe_input) {\n"
|
||||||
" float4 xe_r[%u];\n"
|
" float4 xe_r[%u];\n"
|
||||||
" XePixelShaderOutput xe_output;\n",
|
" XePixelShaderOutput xe_output;\n"
|
||||||
|
" xe_output.colors[0] = (0.0).xxxx;\n"
|
||||||
|
" xe_output.colors[1] = (0.0).xxxx;\n"
|
||||||
|
" xe_output.colors[2] = (0.0).xxxx;\n"
|
||||||
|
" xe_output.colors[3] = (0.0).xxxx;\n",
|
||||||
kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "",
|
kMaxInterpolators, writes_depth_ ? " float depth : SV_Depth;\n" : "",
|
||||||
register_count());
|
register_count());
|
||||||
|
// Initialize SV_Depth if using it.
|
||||||
|
if (writes_depth_) {
|
||||||
|
source.Append(" xe_output.depth = xe_input.position.z;\n");
|
||||||
|
}
|
||||||
// Copy interpolants to the first registers.
|
// Copy interpolants to the first registers.
|
||||||
uint32_t interpolator_register_count =
|
uint32_t interpolator_register_count =
|
||||||
std::min(register_count(), kMaxInterpolators);
|
std::min(register_count(), kMaxInterpolators);
|
||||||
|
@ -262,9 +283,9 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
// Sources for instructions.
|
// Sources for instructions.
|
||||||
" float4 xe_src0, xe_src1, xe_src2;\n"
|
" float4 xe_src0, xe_src1, xe_src2;\n"
|
||||||
// Previous vector result (used as a scratch).
|
// Previous vector result (used as a scratch).
|
||||||
" float4 xe_pv;\n"
|
" float4 xe_pv = float4(0.0, 0.0, 0.0, 0.0);\n"
|
||||||
// Previous scalar result (used for RETAIN_PREV).
|
// Previous scalar result (used for RETAIN_PREV).
|
||||||
" float xe_ps;\n"
|
" float xe_ps = 0.0;\n"
|
||||||
// Predicate temp, clause-local. Initially false like cf_exec_pred_cond_.
|
// Predicate temp, clause-local. Initially false like cf_exec_pred_cond_.
|
||||||
" bool xe_p0 = false;\n"
|
" bool xe_p0 = false;\n"
|
||||||
// Address register when using absolute addressing.
|
// Address register when using absolute addressing.
|
||||||
|
@ -284,9 +305,15 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
source.Append(source_inner_.GetString());
|
source.Append(source_inner_.GetString());
|
||||||
|
|
||||||
// Epilogue.
|
// Epilogue.
|
||||||
|
if (!cf_wrote_pc_) {
|
||||||
|
source.Append(
|
||||||
|
" xe_pc = 0xFFFFu;\n"
|
||||||
|
" break;\n");
|
||||||
|
}
|
||||||
source.Append(
|
source.Append(
|
||||||
" default:\n"
|
" default:\n"
|
||||||
" pc = 0xFFFFu;\n"
|
" xe_pc = 0xFFFFu;\n"
|
||||||
|
" break;\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
" } while (xe_pc != 0xFFFFu);\n");
|
" } while (xe_pc != 0xFFFFu);\n");
|
||||||
// TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma.
|
// TODO(Triang3l): Window offset, half pixel offset, alpha test, gamma.
|
||||||
|
@ -300,7 +327,11 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
||||||
void HlslShaderTranslator::ProcessLabel(uint32_t cf_index) {
|
void HlslShaderTranslator::ProcessLabel(uint32_t cf_index) {
|
||||||
// 0 is always added in the beginning.
|
// 0 is always added in the beginning.
|
||||||
if (cf_index != 0) {
|
if (cf_index != 0) {
|
||||||
EmitSourceDepth("case %u:\n", cf_index);
|
if (!cf_wrote_pc_) {
|
||||||
|
EmitSourceDepth("xe_pc = %uu;\n", cf_index);
|
||||||
|
EmitSourceDepth("break;");
|
||||||
|
}
|
||||||
|
EmitSourceDepth("case %uu:\n", cf_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,14 +342,12 @@ void HlslShaderTranslator::ProcessControlFlowNopInstruction(uint32_t cf_index) {
|
||||||
void HlslShaderTranslator::ProcessControlFlowInstructionBegin(
|
void HlslShaderTranslator::ProcessControlFlowInstructionBegin(
|
||||||
uint32_t cf_index) {
|
uint32_t cf_index) {
|
||||||
cf_wrote_pc_ = false;
|
cf_wrote_pc_ = false;
|
||||||
Indent();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void HlslShaderTranslator::ProcessControlFlowInstructionEnd(uint32_t cf_index) {
|
void HlslShaderTranslator::ProcessControlFlowInstructionEnd(uint32_t cf_index) {
|
||||||
if (!cf_wrote_pc_) {
|
if (!cf_wrote_pc_) {
|
||||||
EmitSourceDepth("// Falling through to L%u\n", cf_index + 1);
|
EmitSourceDepth("// Falling through to L%u\n", cf_index + 1);
|
||||||
}
|
}
|
||||||
Unindent();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void HlslShaderTranslator::ProcessExecInstructionBegin(
|
void HlslShaderTranslator::ProcessExecInstructionBegin(
|
||||||
|
@ -364,21 +393,23 @@ void HlslShaderTranslator::ProcessLoopStartInstruction(
|
||||||
|
|
||||||
// Setup counter.
|
// Setup counter.
|
||||||
EmitSourceDepth("xe_loop_count.yzw = xe_loop_count.xyz;\n");
|
EmitSourceDepth("xe_loop_count.yzw = xe_loop_count.xyz;\n");
|
||||||
EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u] & 0xFFu;\n");
|
EmitSourceDepth("xe_loop_count.x = xe_loop_constants[%u] & 0xFFu;\n",
|
||||||
|
instr.loop_constant_index);
|
||||||
|
|
||||||
// Setup relative indexing.
|
// Setup relative indexing.
|
||||||
EmitSourceDepth("xe_aL = xe_aL.xxyz;\n");
|
EmitSourceDepth("xe_aL = xe_aL.xxyz;\n");
|
||||||
if (!instr.is_repeat) {
|
if (!instr.is_repeat) {
|
||||||
// Push new loop starting index if not reusing the current one.
|
// Push new loop starting index if not reusing the current one.
|
||||||
EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u] >> 8u) & 0xFFu);\n");
|
EmitSourceDepth("xe_aL.x = int((xe_loop_constants[%u] >> 8u) & 0xFFu);\n",
|
||||||
|
instr.loop_constant_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quick skip loop if zero count.
|
// Quick skip loop if zero count.
|
||||||
EmitSourceDepth("if (xe_loop_count.x == 0u) {\n");
|
EmitSourceDepth("if (xe_loop_count.x == 0u) {\n");
|
||||||
EmitSourceDepth(" xe_pc = %u; // Skip loop to L%u\n",
|
EmitSourceDepth(" xe_pc = %uu; // Skip loop to L%u\n",
|
||||||
instr.loop_skip_address, instr.loop_skip_address);
|
instr.loop_skip_address, instr.loop_skip_address);
|
||||||
EmitSourceDepth("} else {\n");
|
EmitSourceDepth("} else {\n");
|
||||||
EmitSourceDepth(" xe_pc = %u; // Fallthrough to loop body L%u\n",
|
EmitSourceDepth(" xe_pc = %uu; // Fallthrough to loop body L%u\n",
|
||||||
instr.dword_index + 1, instr.dword_index + 1);
|
instr.dword_index + 1, instr.dword_index + 1);
|
||||||
EmitSourceDepth("}\n");
|
EmitSourceDepth("}\n");
|
||||||
EmitSourceDepth("break;\n");
|
EmitSourceDepth("break;\n");
|
||||||
|
@ -406,7 +437,7 @@ void HlslShaderTranslator::ProcessLoopEndInstruction(
|
||||||
EmitSourceDepth("xe_loop_count.w = 0u;\n");
|
EmitSourceDepth("xe_loop_count.w = 0u;\n");
|
||||||
EmitSourceDepth("xe_aL.xyz = xe_aL.yzw;\n");
|
EmitSourceDepth("xe_aL.xyz = xe_aL.yzw;\n");
|
||||||
EmitSourceDepth("xe_aL.w = 0;\n");
|
EmitSourceDepth("xe_aL.w = 0;\n");
|
||||||
EmitSourceDepth("xe_pc = %u; // Exit loop to L%u\n", instr.dword_index + 1,
|
EmitSourceDepth("xe_pc = %uu; // Exit loop to L%u\n", instr.dword_index + 1,
|
||||||
instr.dword_index + 1);
|
instr.dword_index + 1);
|
||||||
|
|
||||||
Unindent();
|
Unindent();
|
||||||
|
@ -416,7 +447,7 @@ void HlslShaderTranslator::ProcessLoopEndInstruction(
|
||||||
// Still looping. Adjust index and jump back to body.
|
// Still looping. Adjust index and jump back to body.
|
||||||
EmitSourceDepth("xe_aL.x += int(xe_loop_constants[%u] << 8u) >> 24;\n",
|
EmitSourceDepth("xe_aL.x += int(xe_loop_constants[%u] << 8u) >> 24;\n",
|
||||||
instr.loop_constant_index);
|
instr.loop_constant_index);
|
||||||
EmitSourceDepth("xe_pc = %u; // Loop back to body L%u\n",
|
EmitSourceDepth("xe_pc = %uu; // Loop back to body L%u\n",
|
||||||
instr.loop_body_address, instr.loop_body_address);
|
instr.loop_body_address, instr.loop_body_address);
|
||||||
|
|
||||||
Unindent();
|
Unindent();
|
||||||
|
@ -465,7 +496,7 @@ void HlslShaderTranslator::ProcessJumpInstruction(
|
||||||
}
|
}
|
||||||
Indent();
|
Indent();
|
||||||
|
|
||||||
EmitSourceDepth("xe_pc = %u; // L%u\n", instr.target_address,
|
EmitSourceDepth("xe_pc = %uu; // L%u\n", instr.target_address,
|
||||||
instr.target_address);
|
instr.target_address);
|
||||||
EmitSourceDepth("break;\n");
|
EmitSourceDepth("break;\n");
|
||||||
|
|
||||||
|
@ -473,7 +504,7 @@ void HlslShaderTranslator::ProcessJumpInstruction(
|
||||||
if (needs_fallthrough) {
|
if (needs_fallthrough) {
|
||||||
uint32_t next_address = instr.dword_index + 1;
|
uint32_t next_address = instr.dword_index + 1;
|
||||||
EmitSourceDepth("} else {\n");
|
EmitSourceDepth("} else {\n");
|
||||||
EmitSourceDepth(" xe_pc = %u; // Fallthrough to L%u\n", next_address,
|
EmitSourceDepth(" xe_pc = %uu; // Fallthrough to L%u\n", next_address,
|
||||||
next_address);
|
next_address);
|
||||||
}
|
}
|
||||||
EmitSourceDepth("}\n");
|
EmitSourceDepth("}\n");
|
||||||
|
@ -687,9 +718,6 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (result.is_clamped) {
|
|
||||||
EmitSource("saturate(");
|
|
||||||
}
|
|
||||||
bool has_const_writes = false;
|
bool has_const_writes = false;
|
||||||
uint32_t component_write_count = 0;
|
uint32_t component_write_count = 0;
|
||||||
EmitSource(".");
|
EmitSource(".");
|
||||||
|
@ -704,6 +732,9 @@ void HlslShaderTranslator::EmitStoreResult(const InstructionResult& result,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EmitSource(" = ");
|
EmitSource(" = ");
|
||||||
|
if (result.is_clamped) {
|
||||||
|
EmitSource("saturate(");
|
||||||
|
}
|
||||||
if (has_const_writes) {
|
if (has_const_writes) {
|
||||||
if (component_write_count > 1) {
|
if (component_write_count > 1) {
|
||||||
EmitSource("float%u(", component_write_count);
|
EmitSource("float%u(", component_write_count);
|
||||||
|
@ -804,12 +835,18 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction(
|
||||||
load_function_suffix = "";
|
load_function_suffix = "";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
EmitSourceDepth("xe_vertex_element%s = XeSwap(xe_virtual_memory.Load%s(\n",
|
EmitSourceDepth(
|
||||||
load_swizzle, load_function_suffix);
|
"xe_vertex_element%s = XeByteSwap(xe_virtual_memory.Load%s(\n",
|
||||||
EmitSourceDepth(" (xe_vertex_fetch[%u].x & 0x1FFFFFFCu) + "
|
load_swizzle, load_function_suffix);
|
||||||
"uint(xe_src0.x) * %u + %u),\n",
|
EmitSourceDepth(" (xe_vertex_fetch[%uu].x & 0x1FFFFFFCu)",
|
||||||
instr.operands[1].storage_index, instr.attributes.stride * 4,
|
instr.operands[1].storage_index);
|
||||||
instr.attributes.offset * 4);
|
if (instr.attributes.stride != 0) {
|
||||||
|
EmitSource(" + uint(xe_src0.x) * %uu", instr.attributes.stride * 4);
|
||||||
|
}
|
||||||
|
if (instr.attributes.offset != 0) {
|
||||||
|
EmitSource(" + %uu", instr.attributes.offset * 4);
|
||||||
|
}
|
||||||
|
EmitSource("),\n");
|
||||||
EmitSourceDepth(" xe_vertex_fetch[%u].y);\n",
|
EmitSourceDepth(" xe_vertex_fetch[%u].y);\n",
|
||||||
instr.operands[1].storage_index);
|
instr.operands[1].storage_index);
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
|
|
||||||
#include <dxgi1_4.h>
|
#include <dxgi1_4.h>
|
||||||
#include <d3d12.h>
|
#include <d3d12.h>
|
||||||
|
#include <d3dcompiler.h>
|
||||||
|
|
||||||
#define XELOGD3D XELOGI
|
#define XELOGD3D XELOGI
|
||||||
|
|
||||||
|
|
|
@ -9,18 +9,13 @@
|
||||||
|
|
||||||
#include "xenia/ui/d3d12/d3d12_context.h"
|
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
|
||||||
|
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_immediate_drawer.h"
|
#include "xenia/ui/d3d12/d3d12_immediate_drawer.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_provider.h"
|
#include "xenia/ui/d3d12/d3d12_provider.h"
|
||||||
#include "xenia/ui/window.h"
|
#include "xenia/ui/window.h"
|
||||||
|
|
||||||
DEFINE_int32(d3d12_sync_interval, 1,
|
|
||||||
"Vertical synchronization interval. 0 to disable vertical sync, "
|
|
||||||
"1 to enable it, 2/3/4 to sync every 2/3/4 vertical blanks.");
|
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace ui {
|
namespace ui {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
@ -284,8 +279,7 @@ void D3D12Context::EndSwap() {
|
||||||
graphics_command_list->ResourceBarrier(1, &barrier);
|
graphics_command_list->ResourceBarrier(1, &barrier);
|
||||||
command_list->Execute();
|
command_list->Execute();
|
||||||
// Present and check if the context was lost.
|
// Present and check if the context was lost.
|
||||||
HRESULT result =
|
HRESULT result = swap_chain_->Present(FLAGS_vsync ? 1 : 0, 0);
|
||||||
swap_chain_->Present(xe::clamp(FLAGS_d3d12_sync_interval, 0, 4), 0);
|
|
||||||
if (result == DXGI_ERROR_DEVICE_RESET ||
|
if (result == DXGI_ERROR_DEVICE_RESET ||
|
||||||
result == DXGI_ERROR_DEVICE_REMOVED) {
|
result == DXGI_ERROR_DEVICE_REMOVED) {
|
||||||
context_lost_ = true;
|
context_lost_ = true;
|
||||||
|
@ -311,6 +305,9 @@ std::unique_ptr<RawImage> D3D12Context::Capture() {
|
||||||
|
|
||||||
void D3D12Context::AwaitAllFramesCompletion() {
|
void D3D12Context::AwaitAllFramesCompletion() {
|
||||||
// Await the last frame since previous frames must be completed before it.
|
// Await the last frame since previous frames must be completed before it.
|
||||||
|
if (context_lost_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
uint32_t await_frame = current_queue_frame_ + (kQueuedFrames - 1);
|
uint32_t await_frame = current_queue_frame_ + (kQueuedFrames - 1);
|
||||||
if (await_frame >= kQueuedFrames) {
|
if (await_frame >= kQueuedFrames) {
|
||||||
await_frame -= kQueuedFrames;
|
await_frame -= kQueuedFrames;
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#include "xenia/ui/d3d12/d3d12_provider.h"
|
#include "xenia/ui/d3d12/d3d12_provider.h"
|
||||||
#include "xenia/ui/graphics_context.h"
|
#include "xenia/ui/graphics_context.h"
|
||||||
|
|
||||||
|
#define FINE_GRAINED_DRAW_SCOPES 1
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace ui {
|
namespace ui {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
|
Loading…
Reference in New Issue