[D3D12] Pipeline cache

This commit is contained in:
Triang3l 2018-07-28 16:30:47 +03:00
parent 4f7edff19d
commit ae7ff58f81
5 changed files with 845 additions and 36 deletions

View File

@ -122,6 +122,12 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Doesn't actually draw.
return true;
}
if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
primitive_type != PrimitiveType::kPointList &&
primitive_type != PrimitiveType::kRectangleList) {
// Both sides are culled - can't reproduce this with rasterizer state.
return true;
}
// Shaders will have already been defined by previous loads.
// We need them to do just about anything so validate here.
@ -131,9 +137,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Always need a vertex shader.
return false;
}
// Depth-only mode doesn't need a pixel shader (we'll use a fake one).
// Depth-only mode doesn't need a pixel shader.
if (enable_mode == xenos::ModeControl::kDepth) {
// Use a dummy pixel shader when required.
pixel_shader = nullptr;
} else if (!pixel_shader) {
// Need a pixel shader in normal color mode.
@ -142,8 +147,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
bool full_update = BeginFrame();
ID3D12PipelineState* pipeline;
ID3D12RootSignature* root_signature;
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type);
vertex_shader, pixel_shader, primitive_type,
index_buffer_info != nullptr ? index_buffer_info->format :
IndexFormat::kInt16,
&pipeline, &root_signature);
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
return false;
}

View File

@ -28,6 +28,10 @@ class D3D12Shader : public Shader {
const uint8_t* GetDXBC() const;
size_t GetDXBCSize() const;
// TODO(Triang3l): Real texture counts.
uint32_t GetTextureSRVCount() const { return 0; }
uint32_t GetSamplerCount() const { return 0; }
private:
ID3DBlob* blob_ = nullptr;
};

View File

@ -10,6 +10,7 @@
#include "xenia/gpu/d3d12/pipeline_cache.h"
#include <cinttypes>
#include <cmath>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
@ -55,14 +56,70 @@ D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type,
PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type) {
PrimitiveType primitive_type, IndexFormat index_format,
ID3D12PipelineState** pipeline_out,
ID3D12RootSignature** root_signature_out) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
return UpdateState(vertex_shader, pixel_shader, primitive_type);
assert_not_null(pipeline_out);
assert_not_null(root_signature_out);
Pipeline* pipeline = nullptr;
auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type,
index_format);
switch (update_status) {
case UpdateStatus::kCompatible:
// Requested pipeline is compatible with our previous one, so use that.
// Note that there still may be dynamic state that needs updating.
pipeline = current_pipeline_;
break;
case UpdateStatus::kMismatch:
// Pipeline state has changed. We need to either create a new one or find
// an old one that matches.
current_pipeline_ = nullptr;
break;
case UpdateStatus::kError:
// Error updating state - bail out.
// We are in an indeterminate state, so reset things for the next attempt.
current_pipeline_ = nullptr;
return update_status;
}
if (!pipeline) {
// Should have a hash key produced by the UpdateState pass.
uint64_t hash_key = XXH64_digest(&hash_state_);
pipeline = GetPipeline(hash_key);
current_pipeline_ = pipeline;
if (!pipeline) {
// Unable to create pipeline.
return UpdateStatus::kError;
}
}
*pipeline_out = pipeline->state;
*root_signature_out = pipeline->root_signature;
return update_status;
}
void PipelineCache::ClearCache() {
// Remove references to the current pipeline.
current_pipeline_ = nullptr;
// Destroy all pipelines.
for (auto it : pipelines_) {
it.second->state->Release();
delete it.second;
}
pipelines_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
// Destroy all root signatures.
for (auto it : root_signatures_) {
it.second->Release();
}
root_signatures_.clear();
// Destroy all shaders.
for (auto it : shader_map_) {
delete it.second;
@ -121,7 +178,7 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
PipelineCache::UpdateStatus PipelineCache::UpdateState(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
PrimitiveType primitive_type) {
PrimitiveType primitive_type, IndexFormat index_format) {
bool mismatch = false;
// Reset hash so we can build it up.
@ -136,11 +193,21 @@ PipelineCache::UpdateStatus PipelineCache::UpdateState(
mismatch = true; \
} \
}
UpdateStatus status;
status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages");
status = UpdateBlendState(pixel_shader);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
status = UpdateRasterizerState(primitive_type);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
status = UpdateDepthStencilState();
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
status = UpdateIBStripCutValue(index_format);
CHECK_UPDATE_STATUS(status, mismatch,
"Unable to update index buffer strip cut value");
status = UpdateRenderTargetFormats();
CHECK_UPDATE_STATUS(status, mismatch,
"Unable to update render target formats");
#undef CHECK_UPDATE_STATUS
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
@ -160,16 +227,20 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
0x000FF100 ||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
bool dirty = current_pipeline_ == nullptr;
dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
dirty |= regs.vertex_shader != vertex_shader;
dirty |= regs.pixel_shader != pixel_shader;
dirty |= regs.primitive_type != primitive_type;
regs.vertex_shader = vertex_shader;
regs.pixel_shader = pixel_shader;
regs.primitive_type = primitive_type;
// Points are emulated via a geometry shader because Direct3D 10+ doesn't
// support point sizes other than 1.
bool primitive_topology_is_line =
primitive_type == PrimitiveType::kLineList ||
primitive_type == PrimitiveType::kLineStrip ||
primitive_type == PrimitiveType::kLineLoop ||
primitive_type == PrimitiveType::k2DLineStrip;
dirty |= regs.primitive_topology_is_line != primitive_topology_is_line;
XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
@ -177,22 +248,630 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
xenos::xe_gpu_program_cntl_t sq_program_cntl;
sq_program_cntl.dword_0 = regs.sq_program_cntl;
if (!vertex_shader->is_translated() &&
!TranslateShader(vertex_shader, sq_program_cntl)) {
XELOGE("Failed to translate the vertex shader!");
return UpdateStatus::kError;
}
if (pixel_shader && !pixel_shader->is_translated() &&
if (pixel_shader != nullptr && !pixel_shader->is_translated() &&
!TranslateShader(pixel_shader, sq_program_cntl)) {
XELOGE("Failed to translate the pixel shader!");
return UpdateStatus::kError;
}
update_desc_.VS.pShaderBytecode = vertex_shader->GetDXBC();
update_desc_.VS.BytecodeLength = vertex_shader->GetDXBCSize();
if (pixel_shader != nullptr) {
update_desc_.PS.pShaderBytecode = pixel_shader->GetDXBC();
update_desc_.PS.BytecodeLength = pixel_shader->GetDXBCSize();
} else {
update_desc_.PS.pShaderBytecode = nullptr;
update_desc_.PS.BytecodeLength = 0;
}
update_desc_.DS.pShaderBytecode = nullptr;
update_desc_.DS.BytecodeLength = 0;
update_desc_.HS.pShaderBytecode = nullptr;
update_desc_.HS.BytecodeLength = 0;
// TODO(Triang3l): Geometry shaders.
update_desc_.GS.pShaderBytecode = nullptr;
update_desc_.GS.BytecodeLength = 0;
update_desc_.pRootSignature = GetRootSignature(vertex_shader, pixel_shader);
if (update_desc_.pRootSignature == nullptr) {
return UpdateStatus::kError;
}
update_desc_.PrimitiveTopologyType =
primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE :
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
D3D12Shader* pixel_shader) {
auto& regs = update_blend_state_regs_;
bool dirty = current_pipeline_ == nullptr;
uint32_t color_mask;
if (pixel_shader != nullptr) {
color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
// If the pixel shader doesn't write to a render target, writing to it is
// disabled in the blend state. Otherwise, in Halo 3, one important render
// target is destroyed by a shader not writing to one of the outputs.
for (uint32_t i = 0; i < 4; ++i) {
if (!pixel_shader->writes_color_target(i)) {
color_mask &= ~(0xF << (i * 4));
}
}
} else {
color_mask = 0;
}
dirty |= regs.color_mask != color_mask;
regs.color_mask = color_mask;
bool blend_enable =
color_mask != 0 &&
!(register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0x20);
dirty |= regs.colorcontrol_blend_enable != blend_enable;
regs.colorcontrol_blend_enable = blend_enable;
static const Register kBlendControlRegs[] = {
XE_GPU_REG_RB_BLENDCONTROL_0, XE_GPU_REG_RB_BLENDCONTROL_1,
XE_GPU_REG_RB_BLENDCONTROL_2, XE_GPU_REG_RB_BLENDCONTROL_3
};
for (uint32_t i = 0; i < 4; ++i) {
if (blend_enable && (color_mask & (0xF << (i * 4)))) {
dirty |= SetShadowRegister(&regs.blendcontrol[i], kBlendControlRegs[i]);
} else {
// Zero out blend color for unused render targets and when not blending
// for a stable hash.
regs.blendcontrol[i] = 0;
}
}
XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
update_desc_.BlendState.AlphaToCoverageEnable = FALSE;
update_desc_.BlendState.IndependentBlendEnable = FALSE;
static const D3D12_BLEND kBlendFactorMap[] = {
/* 0 */ D3D12_BLEND_ZERO,
/* 1 */ D3D12_BLEND_ONE,
/* 2 */ D3D12_BLEND_ZERO, // ?
/* 3 */ D3D12_BLEND_ZERO, // ?
/* 4 */ D3D12_BLEND_SRC_COLOR,
/* 5 */ D3D12_BLEND_INV_SRC_COLOR,
/* 6 */ D3D12_BLEND_SRC_ALPHA,
/* 7 */ D3D12_BLEND_INV_SRC_ALPHA,
/* 8 */ D3D12_BLEND_DEST_COLOR,
/* 9 */ D3D12_BLEND_INV_DEST_COLOR,
/* 10 */ D3D12_BLEND_DEST_ALPHA,
/* 11 */ D3D12_BLEND_INV_DEST_ALPHA,
/* 12 */ D3D12_BLEND_BLEND_FACTOR, // CONSTANT_COLOR
/* 13 */ D3D12_BLEND_INV_BLEND_FACTOR, // ONE_MINUS_CONSTANT_COLOR
/* 14 */ D3D12_BLEND_BLEND_FACTOR, // CONSTANT_ALPHA
/* 15 */ D3D12_BLEND_INV_BLEND_FACTOR, // ONE_MINUS_CONSTANT_ALPHA
/* 16 */ D3D12_BLEND_SRC_ALPHA_SAT,
};
static const D3D12_BLEND_OP kBlendOpMap[] = {
/* 0 */ D3D12_BLEND_OP_ADD,
/* 1 */ D3D12_BLEND_OP_SUBTRACT,
/* 2 */ D3D12_BLEND_OP_MIN,
/* 3 */ D3D12_BLEND_OP_MAX,
/* 4 */ D3D12_BLEND_OP_REV_SUBTRACT,
};
for (uint32_t i = 0; i < 4; ++i) {
auto& blend_desc = update_desc_.BlendState.RenderTarget[i];
if (blend_enable && (color_mask & (0xF << (i * 4)))) {
uint32_t blend_control = regs.blendcontrol[i];
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
blend_desc.SrcBlend = kBlendFactorMap[(blend_control & 0x0000001F) >> 0];
// A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND
blend_desc.DestBlend = kBlendFactorMap[(blend_control & 0x00001F00) >> 8];
// A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN
blend_desc.BlendOp = kBlendOpMap[(blend_control & 0x000000E0) >> 5];
// A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND
blend_desc.SrcBlendAlpha =
kBlendFactorMap[(blend_control & 0x001F0000) >> 16];
// A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND
blend_desc.DestBlendAlpha =
kBlendFactorMap[(blend_control & 0x1F000000) >> 24];
// A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN
blend_desc.BlendOpAlpha = kBlendOpMap[(blend_control & 0x00E00000) >> 21];
} else {
blend_desc.BlendEnable = FALSE;
blend_desc.SrcBlend = D3D12_BLEND_ONE;
blend_desc.DestBlend = D3D12_BLEND_ZERO;
blend_desc.BlendOp = D3D12_BLEND_OP_ADD;
blend_desc.SrcBlendAlpha = D3D12_BLEND_ONE;
blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO;
blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD;
}
blend_desc.LogicOpEnable = FALSE;
blend_desc.LogicOp = D3D12_LOGIC_OP_NOOP;
blend_desc.RenderTargetWriteMask = (color_mask >> (i * 4)) & 0xF;
}
update_desc_.SampleMask = UINT_MAX;
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
PrimitiveType primitive_type) {
auto& regs = update_rasterizer_state_regs_;
bool dirty = current_pipeline_ == nullptr;
uint32_t pa_su_sc_mode_cntl =
register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t cull_mode = pa_su_sc_mode_cntl & 0x3;
if (primitive_type == PrimitiveType::kPointList ||
primitive_type == PrimitiveType::kRectangleList) {
cull_mode = 0;
}
dirty |= regs.cull_mode != cull_mode;
regs.cull_mode = cull_mode;
// Because Direct3D 12 doesn't support per-side fill mode and depth bias, the
// values to use depends on the current culling state.
// If front faces are culled, use the ones for back faces.
// If back faces are culled, it's the other way around.
// If culling is not enabled, assume the developer wanted to draw things in a
// more special way - so if one side is wireframe or has a depth bias, then
// that's intentional (if both sides have a depth bias, the one for the front
// faces is used, though it's unlikely that they will ever be different -
// SetRenderState sets the same offset for both sides).
// Points fill mode (0) also isn't supported in Direct3D 12, but assume the
// developer didn't want to fill the whole primitive and use wireframe (like
// Xenos fill mode 1).
// Here we also assume that only one side is culled - if two sides are culled,
// the D3D12 command processor will drop such draw early.
bool fill_mode_wireframe = false;
float poly_offset = 0.0f, poly_offset_scale = 0.0f;
if (!(cull_mode & 1)) {
// Front faces aren't culled.
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7;
if (fill_mode == 0 || fill_mode == 1) {
fill_mode_wireframe = true;
}
if ((pa_su_sc_mode_cntl >> 11) & 0x1) {
poly_offset =
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
poly_offset_scale =
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
}
}
if (!(cull_mode & 2)) {
// Back faces aren't culled.
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7;
if (fill_mode == 0 || fill_mode == 1) {
fill_mode_wireframe = true;
}
// Prefer front depth bias because in general, front faces are the ones that
// are rendered (except for shadow volumes).
if (((pa_su_sc_mode_cntl >> 12) & 0x1) && poly_offset == 0.0f &&
poly_offset_scale == 0.0f) {
poly_offset =
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
poly_offset_scale =
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
}
}
if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) {
// Fill mode is disabled.
fill_mode_wireframe = false;
}
dirty |= regs.fill_mode_wireframe != fill_mode_wireframe;
regs.fill_mode_wireframe = fill_mode_wireframe;
dirty |= regs.poly_offset != poly_offset;
regs.poly_offset = poly_offset;
dirty |= regs.poly_offset_scale != poly_offset_scale;
regs.poly_offset_scale = poly_offset_scale;
bool front_counter_clockwise = !(pa_su_sc_mode_cntl & 0x4);
dirty |= regs.front_counter_clockwise != front_counter_clockwise;
regs.front_counter_clockwise = front_counter_clockwise;
uint32_t pa_cl_clip_cntl =
register_file_->values[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
// CLIP_DISABLE
bool depth_clamp_enable = !!(pa_cl_clip_cntl & (1 << 16));
// TODO(DrChat): This seem to differ. Need to examine this.
// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11
// ZCLIP_NEAR_DISABLE
// bool depth_clamp_enable = !(pa_cl_clip_cntl & (1 << 26));
// RASTERIZER_DISABLE
// Disable rendering in command processor if regs.pa_cl_clip_cntl & (1 << 22)?
dirty |= regs.depth_clamp_enable != depth_clamp_enable;
regs.depth_clamp_enable = depth_clamp_enable;
XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
update_desc_.RasterizerState.FillMode =
fill_mode_wireframe ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
if (cull_mode & 1) {
update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT;
} else if (cull_mode & 2) {
update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
} else {
update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
}
update_desc_.RasterizerState.FrontCounterClockwise =
front_counter_clockwise ? TRUE : FALSE;
// Conversion based on the calculations in Call of Duty 4 and the values it
// writes to the registers, and also on:
// https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L943
// Call of Duty 4 sets the constant bias of 1/32768 and the slope scale of 32.
// However, it's calculated from a console variable in 2 parts: first it's
// divided by 65536, and then it's multiplied by 2.
// TODO(Triang3l): Find the best scale. According to si_state.c, the value in
// the register should be divided by 2 to get the value suitable for PC
// graphics APIs if the depth buffer is 24-bit. However, even multiplying by
// 65536 rather than 32768 still doesn't remove shadow acne in Bomberman Live
// completely. Maybe 131072 would work the best.
// Using ceil here just in case a game wants the offset but passes a value
// that is too small - it's better to apply more offset than to make depth
// fighting worse or to disable the offset completely (Direct3D 12 takes an
// integer value).
update_desc_.RasterizerState.DepthBias =
int32_t(std::ceil(std::abs(poly_offset) * 131072.0f));
update_desc_.RasterizerState.DepthBias *= poly_offset < 0.0f ? -1 : 1;
update_desc_.RasterizerState.DepthBiasClamp = 0.0f;
update_desc_.RasterizerState.SlopeScaledDepthBias =
poly_offset_scale * (1.0f / 16.0f);
update_desc_.RasterizerState.DepthClipEnable =
!depth_clamp_enable ? TRUE : FALSE;
update_desc_.RasterizerState.MultisampleEnable = FALSE;
update_desc_.RasterizerState.AntialiasedLineEnable = FALSE;
update_desc_.RasterizerState.ForcedSampleCount = 0;
update_desc_.RasterizerState.ConservativeRaster =
D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
auto& regs = update_depth_stencil_state_regs_;
bool dirty = current_pipeline_ == nullptr;
dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
update_desc_.DepthStencilState.DepthEnable =
(regs.rb_depthcontrol & 0x2) ? TRUE : FALSE;
update_desc_.DepthStencilState.DepthWriteMask =
(regs.rb_depthcontrol & 0x4) ? D3D12_DEPTH_WRITE_MASK_ALL :
D3D12_DEPTH_WRITE_MASK_ZERO;
// Comparison functions are the same in Direct3D 12 but plus one (minus one,
// bit 0 for less, bit 1 for equal, bit 2 for greater).
update_desc_.DepthStencilState.DepthFunc =
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 4) & 0x7) + 1);
update_desc_.DepthStencilState.StencilEnable =
(regs.rb_depthcontrol & 0x1) ? TRUE : FALSE;
update_desc_.DepthStencilState.StencilReadMask =
(regs.rb_stencilrefmask >> 8) & 0xFF;
update_desc_.DepthStencilState.StencilWriteMask =
(regs.rb_stencilrefmask >> 16) & 0xFF;
// Stencil operations are the same in Direct3D 12 too but plus one.
update_desc_.DepthStencilState.FrontFace.StencilFailOp =
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 11) & 0x7) + 1);
update_desc_.DepthStencilState.FrontFace.StencilDepthFailOp =
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 17) & 0x7) + 1);
update_desc_.DepthStencilState.FrontFace.StencilPassOp =
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 14) & 0x7) + 1);
update_desc_.DepthStencilState.FrontFace.StencilFunc =
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 8) & 0x7) + 1);
// BACKFACE_ENABLE.
if (regs.rb_depthcontrol & 0x80) {
update_desc_.DepthStencilState.BackFace.StencilFailOp =
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 23) & 0x7) + 1);
update_desc_.DepthStencilState.BackFace.StencilDepthFailOp =
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 29) & 0x7) + 1);
update_desc_.DepthStencilState.BackFace.StencilPassOp =
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 26) & 0x7) + 1);
update_desc_.DepthStencilState.BackFace.StencilFunc =
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 20) & 0x7) + 1);
} else {
// Back state is identical to front state.
update_desc_.DepthStencilState.BackFace =
update_desc_.DepthStencilState.FrontFace;
}
// TODO(Triang3l): EARLY_Z_ENABLE (needs to be enabled in shaders, but alpha
// test is dynamic - should be enabled anyway if there's no alpha test,
// discarding and depth output).
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateIBStripCutValue(
IndexFormat index_format) {
auto& regs = update_ib_strip_cut_value_regs_;
bool dirty = current_pipeline_ == nullptr;
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value =
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
if (register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) {
ib_strip_cut_value = index_format == IndexFormat::kInt32 ?
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF :
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
}
dirty |= regs.ib_strip_cut_value != ib_strip_cut_value;
regs.ib_strip_cut_value = ib_strip_cut_value;
if (!dirty) {
return UpdateStatus::kCompatible;
}
update_desc_.IBStripCutValue = ib_strip_cut_value;
// TODO(Triang3l): Geometry shaders for non-0xFFFF values if they are used.
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargetFormats() {
bool dirty = current_pipeline_ == nullptr;
if (!dirty) {
return UpdateStatus::kCompatible;
}
// TODO(Triang3l): Set the formats when RT cache is added.
update_desc_.NumRenderTargets = 0;
update_desc_.DSVFormat = DXGI_FORMAT_UNKNOWN;
return UpdateStatus::kMismatch;
}
PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) {
// Lookup the pipeline in the cache.
auto it = pipelines_.find(hash_key);
if (it != pipelines_.end()) {
// Found existing pipeline.
return it->second;
}
// Set the unused fields of the pipeline description.
update_desc_.StreamOutput.pSODeclaration = nullptr;
update_desc_.StreamOutput.NumEntries = 0;
update_desc_.StreamOutput.pBufferStrides = nullptr;
update_desc_.StreamOutput.NumStrides = 0;
update_desc_.StreamOutput.RasterizedStream = 0;
update_desc_.InputLayout.pInputElementDescs = nullptr;
update_desc_.InputLayout.NumElements = 0;
update_desc_.SampleDesc.Count = 1;
update_desc_.SampleDesc.Quality = 0;
update_desc_.NodeMask = 0;
// TODO(Triang3l): Cache create pipelines.
update_desc_.CachedPSO.pCachedBlob = nullptr;
update_desc_.CachedPSO.CachedBlobSizeInBytes = 0;
update_desc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
auto device = context_->GetD3D12Provider()->GetDevice();
ID3D12PipelineState* state;
if (FAILED(device->CreateGraphicsPipelineState(&update_desc_,
IID_PPV_ARGS(&state)))) {
XELOGE("Failed to create graphics pipeline state");
return nullptr;
}
// TODO(Triang3l): Set the name for the pipeline, with shader hashes.
// Add to cache with the hash key for reuse.
Pipeline* pipeline = new Pipeline;
pipeline->state = state;
pipeline->root_signature = update_desc_.pRootSignature;
pipelines_.insert({hash_key, pipeline});
return pipeline;
}
ID3D12RootSignature* PipelineCache::GetRootSignature(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
uint32_t pixel_textures =
pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0;
uint32_t pixel_samplers =
pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0;
uint32_t vertex_textures = vertex_shader->GetTextureSRVCount();
uint32_t vertex_samplers = vertex_shader->GetSamplerCount();
// Max 96 textures (if all kinds of tfetch instructions are used for all fetch
// registers) and 32 samplers (one sampler per used fetch), but different
// shader stages have different texture sets.
uint32_t index = pixel_textures | (pixel_samplers << 7) |
(vertex_textures << 12) | (vertex_samplers << 19);
// Try an existing root signature.
auto it = root_signatures_.find(index);
if (it != root_signatures_.end()) {
return it->second;
}
// Create a new one.
D3D12_ROOT_SIGNATURE_DESC desc;
D3D12_ROOT_PARAMETER parameters[RootParameter::kCountWithTwoStageTextures];
D3D12_DESCRIPTOR_RANGE ranges[RootParameter::kCountWithTwoStageTextures];
desc.NumParameters = UINT(RootParameter::kCountNoTextures);
desc.pParameters = parameters;
desc.NumStaticSamplers = 0;
desc.pStaticSamplers = nullptr;
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
// Vertex constants - float and fetch.
{
auto& parameter = parameters[size_t(RootParameter::kVertexConstants)];
auto& range = ranges[size_t(RootParameter::kVertexConstants)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 9;
range.BaseShaderRegister = 2;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Pixel constants - float.
{
auto& parameter = parameters[size_t(RootParameter::kPixelConstants)];
auto& range = ranges[size_t(RootParameter::kPixelConstants)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 8;
range.BaseShaderRegister = 2;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Common constants - system and loop/bool.
{
auto& parameter = parameters[size_t(RootParameter::kCommonConstants)];
auto& range = ranges[size_t(RootParameter::kCommonConstants)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 2;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Virtual shared memory.
{
auto& parameter = parameters[size_t(RootParameter::kVirtualMemory)];
auto& range = ranges[size_t(RootParameter::kVirtualMemory)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.NumDescriptors = 1;
range.BaseShaderRegister = 0;
range.RegisterSpace = 1;
range.OffsetInDescriptorsFromTableStart = 0;
}
if (pixel_textures > 0 || vertex_textures > 0) {
desc.NumParameters = UINT(RootParameter::kCountWithOneStageTextures);
// Pixel or vertex textures.
{
auto& parameter =
parameters[size_t(RootParameter::kPixelOrVertexTextures)];
auto& range = ranges[size_t(RootParameter::kPixelOrVertexTextures)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
if (pixel_textures > 0) {
assert_true(pixel_samplers > 0);
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.NumDescriptors = pixel_textures;
} else {
assert_true(vertex_samplers > 0);
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.NumDescriptors = vertex_textures;
}
}
// Pixel or vertex samplers.
{
auto& parameter =
parameters[size_t(RootParameter::kPixelOrVertexSamplers)];
auto& range = ranges[size_t(RootParameter::kPixelOrVertexSamplers)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
if (pixel_samplers > 0) {
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.NumDescriptors = pixel_samplers;
} else {
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.NumDescriptors = vertex_samplers;
}
}
if (pixel_textures > 0 && vertex_textures > 0) {
assert_true(vertex_samplers > 0);
desc.NumParameters = UINT(RootParameter::kCountWithTwoStageTextures);
// Vertex textures.
{
auto& parameter = parameters[size_t(RootParameter::kVertexTextures)];
auto& range = ranges[size_t(RootParameter::kVertexTextures)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.NumDescriptors = vertex_textures;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Vertex samplers.
{
auto& parameter = parameters[size_t(RootParameter::kVertexSamplers)];
auto& range = ranges[size_t(RootParameter::kVertexSamplers)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
range.NumDescriptors = vertex_samplers;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
}
}
ID3DBlob* blob;
ID3DBlob* error_blob = nullptr;
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1,
&blob, &error_blob))) {
XELOGE("Failed to serialize a root signature with %u pixel textures, %u "
"pixel samplers, %u vertex textures and %u vertex samplers",
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
if (error_blob != nullptr) {
XELOGE("%s",
reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
error_blob->Release();
}
return nullptr;
}
if (error_blob != nullptr) {
error_blob->Release();
}
auto device = context_->GetD3D12Provider()->GetDevice();
ID3D12RootSignature* root_signature;
if (FAILED(device->CreateRootSignature(0, blob->GetBufferPointer(),
blob->GetBufferSize(),
IID_PPV_ARGS(&root_signature)))) {
XELOGE("Failed to create a root signature with %u pixel textures, %u pixel "
"samplers, %u vertex textures and %u vertex samplers",
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
blob->Release();
return nullptr;
}
blob->Release();
root_signatures_.insert({index, root_signature});
return root_signature;
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -42,10 +42,57 @@ class PipelineCache {
UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader,
PrimitiveType primitive_type);
PrimitiveType primitive_type,
IndexFormat index_format,
ID3D12PipelineState** pipeline_out,
ID3D12RootSignature** root_signature_out);
void ClearCache();
enum class RootParameter {
// These are always present.
// Most frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices, also vertex offsets for
// objects drawn in multiple parts).
// This constants 8 pages of float constants (b2-b9) and fetch constants
// (b10).
kVertexConstants,
// Less frequently changed (per-material) - 8 pages of float constants
// (b2-b9).
kPixelConstants,
// Rarely changed - system constants like viewport and alpha testing (b0)
// and loop and bool constants (b1).
kCommonConstants,
// Never changed - shared memory byte address buffer (t0, space1).
kVirtualMemory,
kCountNoTextures,
// These are there only if textures are fetched (they are changed pretty
// frequently, but for the ease of maintenance they're in the end).
// If the pixel shader samples textures, these are for pixel textures
// (changed more frequently), otherwise, if the vertex shader samples
// textures, these are for vertex textures.
// Used textures of all types (t0+, space0).
kPixelOrVertexTextures = kCountNoTextures,
// Used samplers (s0+).
kPixelOrVertexSamplers,
kCountWithOneStageTextures,
// These are only present if both pixel and vertex shaders sample textures
// for vertex textures.
// Used textures of all types (t0+, space0).
kVertexTextures = kCountWithOneStageTextures,
// Used samplers (s0+).
kVertexSamplers,
kCountWithTwoStageTextures,
};
private:
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
bool SetShadowRegister(float* dest, uint32_t register_name);
@ -54,11 +101,23 @@ class PipelineCache {
UpdateStatus UpdateState(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader,
PrimitiveType primitive_type);
PrimitiveType primitive_type,
IndexFormat index_format);
// pRootSignature, VS, PS, DS, HS, GS, PrimitiveTopologyType.
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader,
PrimitiveType primitive_type);
// BlendState, SampleMask.
UpdateStatus UpdateBlendState(D3D12Shader* pixel_shader);
// RasterizerState.
UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type);
// DepthStencilState.
UpdateStatus UpdateDepthStencilState();
// IBStripCutValue.
UpdateStatus UpdateIBStripCutValue(IndexFormat index_format);
// NumRenderTargets, RTVFormats, DSVFormat.
UpdateStatus UpdateRenderTargetFormats();
RegisterFile* register_file_ = nullptr;
ui::d3d12::D3D12Context* context_ = nullptr;
@ -68,22 +127,82 @@ class PipelineCache {
// All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
// Root signatures for different descriptor counts.
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_;
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader);
// Hash state used to incrementally produce pipeline hashes during update.
// By the time the full update pass has run the hash will represent the
// current state in a way that can uniquely identify the produced
// ID3D12PipelineState.
XXH64_state_t hash_state_;
struct Pipeline {
ID3D12PipelineState* state;
// From root_signatures_ - not owned.
ID3D12RootSignature* root_signature;
};
// All previously generated pipelines mapped by hash.
std::unordered_map<uint64_t, Pipeline*> pipelines_;
// Sets StreamOutput, InputLayout, SampleDesc, NodeMask, CachedPSO, Flags.
Pipeline* GetPipeline(uint64_t hash_key);
// Previously used pipeline. This matches our current state settings
// and allows us to quickly(ish) reuse the pipeline if no registers have
// changed.
Pipeline* current_pipeline_ = nullptr;
// Description of the pipeline being created.
D3D12_GRAPHICS_PIPELINE_STATE_DESC update_desc_;
struct UpdateShaderStagesRegisters {
PrimitiveType primitive_type;
uint32_t pa_su_sc_mode_cntl;
uint32_t sq_program_cntl;
D3D12Shader* vertex_shader;
D3D12Shader* pixel_shader;
uint32_t sq_program_cntl;
bool primitive_topology_is_line;
UpdateShaderStagesRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_shader_stages_regs_;
struct UpdateBlendStateRegisters {
// RB_COLOR_MASK with unused render targets removed.
uint32_t color_mask;
// Blend control updated only for used render targets.
uint32_t blendcontrol[4];
bool colorcontrol_blend_enable;
UpdateBlendStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_blend_state_regs_;
struct UpdateRasterizerStateRegisters {
// Polygon offset is in Xenos units.
float poly_offset;
float poly_offset_scale;
uint8_t cull_mode;
bool fill_mode_wireframe;
bool front_counter_clockwise;
bool depth_clamp_enable;
UpdateRasterizerStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_rasterizer_state_regs_;
struct UpdateDepthStencilStateRegisters {
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
UpdateDepthStencilStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_depth_stencil_state_regs_;
struct UpdateIBStripCutValueRegisters {
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value;
UpdateIBStripCutValueRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_ib_strip_cut_value_regs_;
};
} // namespace d3d12

View File

@ -164,6 +164,7 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
}
// Common declarations.
// Only up to 14 constant buffers can be used on binding tiers 1 and 2.
source.Append(
"cbuffer xe_system_constants : register(b0) {\n"
" float2 xe_viewport_inv_scale;\n"
@ -171,16 +172,16 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
" uint xe_textures_are_3d;\n"
"};\n"
"\n"
"struct XeFloatConstantPage {\n"
" float4 c[16];\n"
"};\n"
"ConstantBuffer<XeFloatConstantPage> "
"xe_float_constants[16] : register(b1);\n"
"\n"
"cbuffer xe_loop_bool_constants : register(b17) {\n"
"cbuffer xe_loop_bool_constants : register(b1) {\n"
" uint xe_bool_constants[8];\n"
" uint xe_loop_constants[32];\n"
"};\n"
"\n"
"struct XeFloatConstantPage {\n"
" float4 c[32];\n"
"};\n"
"ConstantBuffer<XeFloatConstantPage> "
"xe_float_constants[8] : register(b2);\n"
"\n");
if (is_vertex_shader()) {
@ -193,7 +194,7 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
// -1 point size means the geometry shader will use the global setting by
// default.
source.AppendFormat(
"cbuffer xe_vertex_fetch_constants : register(b18) {\n"
"cbuffer xe_vertex_fetch_constants : register(b10) {\n"
" uint2 xe_vertex_fetch[96];\n"
"};\n"
"\n"
@ -268,10 +269,6 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
for (uint32_t i = 0; i < interpolator_register_count; ++i) {
source.AppendFormat(" xe_r[%u] = xe_input.interpolators[%u];\n", i, i);
}
// No need to write zero to every output because in case an output is
// completely unused, writing to that render target will be disabled in the
// blending state (in Halo 3, one important render target is destroyed by a
// shader not writing to one of the outputs otherwise).
// TODO(Triang3l): ps_param_gen.
}
@ -581,8 +578,8 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index,
EmitSource("xe_r[%u]", op.storage_index);
break;
case InstructionStorageSource::kConstantFloat:
EmitSource("xe_float_constants[%u].c[%u]", op.storage_index >> 4,
op.storage_index & 15);
EmitSource("xe_float_constants[%u].c[%u]", op.storage_index >> 5,
op.storage_index & 31);
break;
case InstructionStorageSource::kConstantInt:
EmitSource("xe_loop_constants[%u]", op.storage_index);
@ -602,7 +599,7 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index,
break;
case InstructionStorageSource::kConstantFloat:
EmitSource(
"xe_float_constants[xe_src_index >> 4u].c[xe_src_index & 15u]");
"xe_float_constants[xe_src_index >> 5u].c[xe_src_index & 31u]");
break;
case InstructionStorageSource::kConstantInt:
EmitSource("xe_loop_constants[xe_src_index]");