[D3D12] Pipeline cache
This commit is contained in:
parent
4f7edff19d
commit
ae7ff58f81
|
@ -122,6 +122,12 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Doesn't actually draw.
|
||||
return true;
|
||||
}
|
||||
if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
|
||||
primitive_type != PrimitiveType::kPointList &&
|
||||
primitive_type != PrimitiveType::kRectangleList) {
|
||||
// Both sides are culled - can't reproduce this with rasterizer state.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Shaders will have already been defined by previous loads.
|
||||
// We need them to do just about anything so validate here.
|
||||
|
@ -131,9 +137,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Always need a vertex shader.
|
||||
return false;
|
||||
}
|
||||
// Depth-only mode doesn't need a pixel shader (we'll use a fake one).
|
||||
// Depth-only mode doesn't need a pixel shader.
|
||||
if (enable_mode == xenos::ModeControl::kDepth) {
|
||||
// Use a dummy pixel shader when required.
|
||||
pixel_shader = nullptr;
|
||||
} else if (!pixel_shader) {
|
||||
// Need a pixel shader in normal color mode.
|
||||
|
@ -142,8 +147,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
|
||||
bool full_update = BeginFrame();
|
||||
|
||||
ID3D12PipelineState* pipeline;
|
||||
ID3D12RootSignature* root_signature;
|
||||
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
||||
vertex_shader, pixel_shader, primitive_type);
|
||||
vertex_shader, pixel_shader, primitive_type,
|
||||
index_buffer_info != nullptr ? index_buffer_info->format :
|
||||
IndexFormat::kInt16,
|
||||
&pipeline, &root_signature);
|
||||
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,10 @@ class D3D12Shader : public Shader {
|
|||
const uint8_t* GetDXBC() const;
|
||||
size_t GetDXBCSize() const;
|
||||
|
||||
// TODO(Triang3l): Real texture counts.
|
||||
uint32_t GetTextureSRVCount() const { return 0; }
|
||||
uint32_t GetSamplerCount() const { return 0; }
|
||||
|
||||
private:
|
||||
ID3DBlob* blob_ = nullptr;
|
||||
};
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
|
@ -55,14 +56,70 @@ D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type,
|
|||
|
||||
PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
PrimitiveType primitive_type) {
|
||||
PrimitiveType primitive_type, IndexFormat index_format,
|
||||
ID3D12PipelineState** pipeline_out,
|
||||
ID3D12RootSignature** root_signature_out) {
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
return UpdateState(vertex_shader, pixel_shader, primitive_type);
|
||||
|
||||
assert_not_null(pipeline_out);
|
||||
assert_not_null(root_signature_out);
|
||||
|
||||
Pipeline* pipeline = nullptr;
|
||||
auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type,
|
||||
index_format);
|
||||
switch (update_status) {
|
||||
case UpdateStatus::kCompatible:
|
||||
// Requested pipeline is compatible with our previous one, so use that.
|
||||
// Note that there still may be dynamic state that needs updating.
|
||||
pipeline = current_pipeline_;
|
||||
break;
|
||||
case UpdateStatus::kMismatch:
|
||||
// Pipeline state has changed. We need to either create a new one or find
|
||||
// an old one that matches.
|
||||
current_pipeline_ = nullptr;
|
||||
break;
|
||||
case UpdateStatus::kError:
|
||||
// Error updating state - bail out.
|
||||
// We are in an indeterminate state, so reset things for the next attempt.
|
||||
current_pipeline_ = nullptr;
|
||||
return update_status;
|
||||
}
|
||||
if (!pipeline) {
|
||||
// Should have a hash key produced by the UpdateState pass.
|
||||
uint64_t hash_key = XXH64_digest(&hash_state_);
|
||||
pipeline = GetPipeline(hash_key);
|
||||
current_pipeline_ = pipeline;
|
||||
if (!pipeline) {
|
||||
// Unable to create pipeline.
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
}
|
||||
|
||||
*pipeline_out = pipeline->state;
|
||||
*root_signature_out = pipeline->root_signature;
|
||||
return update_status;
|
||||
}
|
||||
|
||||
void PipelineCache::ClearCache() {
|
||||
// Remove references to the current pipeline.
|
||||
current_pipeline_ = nullptr;
|
||||
|
||||
// Destroy all pipelines.
|
||||
for (auto it : pipelines_) {
|
||||
it.second->state->Release();
|
||||
delete it.second;
|
||||
}
|
||||
pipelines_.clear();
|
||||
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
|
||||
|
||||
// Destroy all root signatures.
|
||||
for (auto it : root_signatures_) {
|
||||
it.second->Release();
|
||||
}
|
||||
root_signatures_.clear();
|
||||
|
||||
// Destroy all shaders.
|
||||
for (auto it : shader_map_) {
|
||||
delete it.second;
|
||||
|
@ -121,7 +178,7 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
|
|||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
||||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
PrimitiveType primitive_type) {
|
||||
PrimitiveType primitive_type, IndexFormat index_format) {
|
||||
bool mismatch = false;
|
||||
|
||||
// Reset hash so we can build it up.
|
||||
|
@ -136,11 +193,21 @@ PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
|||
mismatch = true; \
|
||||
} \
|
||||
}
|
||||
|
||||
UpdateStatus status;
|
||||
status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type);
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages");
|
||||
|
||||
status = UpdateBlendState(pixel_shader);
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
|
||||
status = UpdateRasterizerState(primitive_type);
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
|
||||
status = UpdateDepthStencilState();
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
|
||||
status = UpdateIBStripCutValue(index_format);
|
||||
CHECK_UPDATE_STATUS(status, mismatch,
|
||||
"Unable to update index buffer strip cut value");
|
||||
status = UpdateRenderTargetFormats();
|
||||
CHECK_UPDATE_STATUS(status, mismatch,
|
||||
"Unable to update render target formats");
|
||||
#undef CHECK_UPDATE_STATUS
|
||||
|
||||
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
||||
|
@ -160,16 +227,20 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
0x000FF100 ||
|
||||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
bool dirty = current_pipeline_ == nullptr;
|
||||
dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||
dirty |= regs.vertex_shader != vertex_shader;
|
||||
dirty |= regs.pixel_shader != pixel_shader;
|
||||
dirty |= regs.primitive_type != primitive_type;
|
||||
regs.vertex_shader = vertex_shader;
|
||||
regs.pixel_shader = pixel_shader;
|
||||
regs.primitive_type = primitive_type;
|
||||
// Points are emulated via a geometry shader because Direct3D 10+ doesn't
|
||||
// support point sizes other than 1.
|
||||
bool primitive_topology_is_line =
|
||||
primitive_type == PrimitiveType::kLineList ||
|
||||
primitive_type == PrimitiveType::kLineStrip ||
|
||||
primitive_type == PrimitiveType::kLineLoop ||
|
||||
primitive_type == PrimitiveType::k2DLineStrip;
|
||||
dirty |= regs.primitive_topology_is_line != primitive_topology_is_line;
|
||||
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
|
@ -177,22 +248,630 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
|
||||
xenos::xe_gpu_program_cntl_t sq_program_cntl;
|
||||
sq_program_cntl.dword_0 = regs.sq_program_cntl;
|
||||
|
||||
if (!vertex_shader->is_translated() &&
|
||||
!TranslateShader(vertex_shader, sq_program_cntl)) {
|
||||
XELOGE("Failed to translate the vertex shader!");
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
|
||||
if (pixel_shader && !pixel_shader->is_translated() &&
|
||||
if (pixel_shader != nullptr && !pixel_shader->is_translated() &&
|
||||
!TranslateShader(pixel_shader, sq_program_cntl)) {
|
||||
XELOGE("Failed to translate the pixel shader!");
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
|
||||
update_desc_.VS.pShaderBytecode = vertex_shader->GetDXBC();
|
||||
update_desc_.VS.BytecodeLength = vertex_shader->GetDXBCSize();
|
||||
if (pixel_shader != nullptr) {
|
||||
update_desc_.PS.pShaderBytecode = pixel_shader->GetDXBC();
|
||||
update_desc_.PS.BytecodeLength = pixel_shader->GetDXBCSize();
|
||||
} else {
|
||||
update_desc_.PS.pShaderBytecode = nullptr;
|
||||
update_desc_.PS.BytecodeLength = 0;
|
||||
}
|
||||
update_desc_.DS.pShaderBytecode = nullptr;
|
||||
update_desc_.DS.BytecodeLength = 0;
|
||||
update_desc_.HS.pShaderBytecode = nullptr;
|
||||
update_desc_.HS.BytecodeLength = 0;
|
||||
// TODO(Triang3l): Geometry shaders.
|
||||
update_desc_.GS.pShaderBytecode = nullptr;
|
||||
update_desc_.GS.BytecodeLength = 0;
|
||||
update_desc_.pRootSignature = GetRootSignature(vertex_shader, pixel_shader);
|
||||
if (update_desc_.pRootSignature == nullptr) {
|
||||
return UpdateStatus::kError;
|
||||
}
|
||||
update_desc_.PrimitiveTopologyType =
|
||||
primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE :
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
|
||||
D3D12Shader* pixel_shader) {
|
||||
auto& regs = update_blend_state_regs_;
|
||||
|
||||
bool dirty = current_pipeline_ == nullptr;
|
||||
uint32_t color_mask;
|
||||
if (pixel_shader != nullptr) {
|
||||
color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
|
||||
// If the pixel shader doesn't write to a render target, writing to it is
|
||||
// disabled in the blend state. Otherwise, in Halo 3, one important render
|
||||
// target is destroyed by a shader not writing to one of the outputs.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!pixel_shader->writes_color_target(i)) {
|
||||
color_mask &= ~(0xF << (i * 4));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
color_mask = 0;
|
||||
}
|
||||
dirty |= regs.color_mask != color_mask;
|
||||
regs.color_mask = color_mask;
|
||||
bool blend_enable =
|
||||
color_mask != 0 &&
|
||||
!(register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0x20);
|
||||
dirty |= regs.colorcontrol_blend_enable != blend_enable;
|
||||
regs.colorcontrol_blend_enable = blend_enable;
|
||||
static const Register kBlendControlRegs[] = {
|
||||
XE_GPU_REG_RB_BLENDCONTROL_0, XE_GPU_REG_RB_BLENDCONTROL_1,
|
||||
XE_GPU_REG_RB_BLENDCONTROL_2, XE_GPU_REG_RB_BLENDCONTROL_3
|
||||
};
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (blend_enable && (color_mask & (0xF << (i * 4)))) {
|
||||
dirty |= SetShadowRegister(®s.blendcontrol[i], kBlendControlRegs[i]);
|
||||
} else {
|
||||
// Zero out blend color for unused render targets and when not blending
|
||||
// for a stable hash.
|
||||
regs.blendcontrol[i] = 0;
|
||||
}
|
||||
}
|
||||
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
update_desc_.BlendState.AlphaToCoverageEnable = FALSE;
|
||||
update_desc_.BlendState.IndependentBlendEnable = FALSE;
|
||||
static const D3D12_BLEND kBlendFactorMap[] = {
|
||||
/* 0 */ D3D12_BLEND_ZERO,
|
||||
/* 1 */ D3D12_BLEND_ONE,
|
||||
/* 2 */ D3D12_BLEND_ZERO, // ?
|
||||
/* 3 */ D3D12_BLEND_ZERO, // ?
|
||||
/* 4 */ D3D12_BLEND_SRC_COLOR,
|
||||
/* 5 */ D3D12_BLEND_INV_SRC_COLOR,
|
||||
/* 6 */ D3D12_BLEND_SRC_ALPHA,
|
||||
/* 7 */ D3D12_BLEND_INV_SRC_ALPHA,
|
||||
/* 8 */ D3D12_BLEND_DEST_COLOR,
|
||||
/* 9 */ D3D12_BLEND_INV_DEST_COLOR,
|
||||
/* 10 */ D3D12_BLEND_DEST_ALPHA,
|
||||
/* 11 */ D3D12_BLEND_INV_DEST_ALPHA,
|
||||
/* 12 */ D3D12_BLEND_BLEND_FACTOR, // CONSTANT_COLOR
|
||||
/* 13 */ D3D12_BLEND_INV_BLEND_FACTOR, // ONE_MINUS_CONSTANT_COLOR
|
||||
/* 14 */ D3D12_BLEND_BLEND_FACTOR, // CONSTANT_ALPHA
|
||||
/* 15 */ D3D12_BLEND_INV_BLEND_FACTOR, // ONE_MINUS_CONSTANT_ALPHA
|
||||
/* 16 */ D3D12_BLEND_SRC_ALPHA_SAT,
|
||||
};
|
||||
static const D3D12_BLEND_OP kBlendOpMap[] = {
|
||||
/* 0 */ D3D12_BLEND_OP_ADD,
|
||||
/* 1 */ D3D12_BLEND_OP_SUBTRACT,
|
||||
/* 2 */ D3D12_BLEND_OP_MIN,
|
||||
/* 3 */ D3D12_BLEND_OP_MAX,
|
||||
/* 4 */ D3D12_BLEND_OP_REV_SUBTRACT,
|
||||
};
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
auto& blend_desc = update_desc_.BlendState.RenderTarget[i];
|
||||
if (blend_enable && (color_mask & (0xF << (i * 4)))) {
|
||||
uint32_t blend_control = regs.blendcontrol[i];
|
||||
// A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND
|
||||
blend_desc.SrcBlend = kBlendFactorMap[(blend_control & 0x0000001F) >> 0];
|
||||
// A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND
|
||||
blend_desc.DestBlend = kBlendFactorMap[(blend_control & 0x00001F00) >> 8];
|
||||
// A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN
|
||||
blend_desc.BlendOp = kBlendOpMap[(blend_control & 0x000000E0) >> 5];
|
||||
// A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND
|
||||
blend_desc.SrcBlendAlpha =
|
||||
kBlendFactorMap[(blend_control & 0x001F0000) >> 16];
|
||||
// A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND
|
||||
blend_desc.DestBlendAlpha =
|
||||
kBlendFactorMap[(blend_control & 0x1F000000) >> 24];
|
||||
// A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN
|
||||
blend_desc.BlendOpAlpha = kBlendOpMap[(blend_control & 0x00E00000) >> 21];
|
||||
} else {
|
||||
blend_desc.BlendEnable = FALSE;
|
||||
blend_desc.SrcBlend = D3D12_BLEND_ONE;
|
||||
blend_desc.DestBlend = D3D12_BLEND_ZERO;
|
||||
blend_desc.BlendOp = D3D12_BLEND_OP_ADD;
|
||||
blend_desc.SrcBlendAlpha = D3D12_BLEND_ONE;
|
||||
blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO;
|
||||
blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||
}
|
||||
blend_desc.LogicOpEnable = FALSE;
|
||||
blend_desc.LogicOp = D3D12_LOGIC_OP_NOOP;
|
||||
blend_desc.RenderTargetWriteMask = (color_mask >> (i * 4)) & 0xF;
|
||||
}
|
||||
update_desc_.SampleMask = UINT_MAX;
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
|
||||
PrimitiveType primitive_type) {
|
||||
auto& regs = update_rasterizer_state_regs_;
|
||||
|
||||
bool dirty = current_pipeline_ == nullptr;
|
||||
uint32_t pa_su_sc_mode_cntl =
|
||||
register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
uint32_t cull_mode = pa_su_sc_mode_cntl & 0x3;
|
||||
if (primitive_type == PrimitiveType::kPointList ||
|
||||
primitive_type == PrimitiveType::kRectangleList) {
|
||||
cull_mode = 0;
|
||||
}
|
||||
dirty |= regs.cull_mode != cull_mode;
|
||||
regs.cull_mode = cull_mode;
|
||||
// Because Direct3D 12 doesn't support per-side fill mode and depth bias, the
|
||||
// values to use depends on the current culling state.
|
||||
// If front faces are culled, use the ones for back faces.
|
||||
// If back faces are culled, it's the other way around.
|
||||
// If culling is not enabled, assume the developer wanted to draw things in a
|
||||
// more special way - so if one side is wireframe or has a depth bias, then
|
||||
// that's intentional (if both sides have a depth bias, the one for the front
|
||||
// faces is used, though it's unlikely that they will ever be different -
|
||||
// SetRenderState sets the same offset for both sides).
|
||||
// Points fill mode (0) also isn't supported in Direct3D 12, but assume the
|
||||
// developer didn't want to fill the whole primitive and use wireframe (like
|
||||
// Xenos fill mode 1).
|
||||
// Here we also assume that only one side is culled - if two sides are culled,
|
||||
// the D3D12 command processor will drop such draw early.
|
||||
bool fill_mode_wireframe = false;
|
||||
float poly_offset = 0.0f, poly_offset_scale = 0.0f;
|
||||
if (!(cull_mode & 1)) {
|
||||
// Front faces aren't culled.
|
||||
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7;
|
||||
if (fill_mode == 0 || fill_mode == 1) {
|
||||
fill_mode_wireframe = true;
|
||||
}
|
||||
if ((pa_su_sc_mode_cntl >> 11) & 0x1) {
|
||||
poly_offset =
|
||||
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||
poly_offset_scale =
|
||||
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||
}
|
||||
}
|
||||
if (!(cull_mode & 2)) {
|
||||
// Back faces aren't culled.
|
||||
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7;
|
||||
if (fill_mode == 0 || fill_mode == 1) {
|
||||
fill_mode_wireframe = true;
|
||||
}
|
||||
// Prefer front depth bias because in general, front faces are the ones that
|
||||
// are rendered (except for shadow volumes).
|
||||
if (((pa_su_sc_mode_cntl >> 12) & 0x1) && poly_offset == 0.0f &&
|
||||
poly_offset_scale == 0.0f) {
|
||||
poly_offset =
|
||||
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
||||
poly_offset_scale =
|
||||
register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
||||
}
|
||||
}
|
||||
if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) {
|
||||
// Fill mode is disabled.
|
||||
fill_mode_wireframe = false;
|
||||
}
|
||||
dirty |= regs.fill_mode_wireframe != fill_mode_wireframe;
|
||||
regs.fill_mode_wireframe = fill_mode_wireframe;
|
||||
dirty |= regs.poly_offset != poly_offset;
|
||||
regs.poly_offset = poly_offset;
|
||||
dirty |= regs.poly_offset_scale != poly_offset_scale;
|
||||
regs.poly_offset_scale = poly_offset_scale;
|
||||
bool front_counter_clockwise = !(pa_su_sc_mode_cntl & 0x4);
|
||||
dirty |= regs.front_counter_clockwise != front_counter_clockwise;
|
||||
regs.front_counter_clockwise = front_counter_clockwise;
|
||||
uint32_t pa_cl_clip_cntl =
|
||||
register_file_->values[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
|
||||
// CLIP_DISABLE
|
||||
bool depth_clamp_enable = !!(pa_cl_clip_cntl & (1 << 16));
|
||||
// TODO(DrChat): This seem to differ. Need to examine this.
|
||||
// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11
|
||||
// ZCLIP_NEAR_DISABLE
|
||||
// bool depth_clamp_enable = !(pa_cl_clip_cntl & (1 << 26));
|
||||
// RASTERIZER_DISABLE
|
||||
// Disable rendering in command processor if regs.pa_cl_clip_cntl & (1 << 22)?
|
||||
dirty |= regs.depth_clamp_enable != depth_clamp_enable;
|
||||
regs.depth_clamp_enable = depth_clamp_enable;
|
||||
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
update_desc_.RasterizerState.FillMode =
|
||||
fill_mode_wireframe ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
|
||||
if (cull_mode & 1) {
|
||||
update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT;
|
||||
} else if (cull_mode & 2) {
|
||||
update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
|
||||
} else {
|
||||
update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
}
|
||||
update_desc_.RasterizerState.FrontCounterClockwise =
|
||||
front_counter_clockwise ? TRUE : FALSE;
|
||||
// Conversion based on the calculations in Call of Duty 4 and the values it
|
||||
// writes to the registers, and also on:
|
||||
// https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L943
|
||||
// Call of Duty 4 sets the constant bias of 1/32768 and the slope scale of 32.
|
||||
// However, it's calculated from a console variable in 2 parts: first it's
|
||||
// divided by 65536, and then it's multiplied by 2.
|
||||
// TODO(Triang3l): Find the best scale. According to si_state.c, the value in
|
||||
// the register should be divided by 2 to get the value suitable for PC
|
||||
// graphics APIs if the depth buffer is 24-bit. However, even multiplying by
|
||||
// 65536 rather than 32768 still doesn't remove shadow acne in Bomberman Live
|
||||
// completely. Maybe 131072 would work the best.
|
||||
// Using ceil here just in case a game wants the offset but passes a value
|
||||
// that is too small - it's better to apply more offset than to make depth
|
||||
// fighting worse or to disable the offset completely (Direct3D 12 takes an
|
||||
// integer value).
|
||||
update_desc_.RasterizerState.DepthBias =
|
||||
int32_t(std::ceil(std::abs(poly_offset) * 131072.0f));
|
||||
update_desc_.RasterizerState.DepthBias *= poly_offset < 0.0f ? -1 : 1;
|
||||
update_desc_.RasterizerState.DepthBiasClamp = 0.0f;
|
||||
update_desc_.RasterizerState.SlopeScaledDepthBias =
|
||||
poly_offset_scale * (1.0f / 16.0f);
|
||||
update_desc_.RasterizerState.DepthClipEnable =
|
||||
!depth_clamp_enable ? TRUE : FALSE;
|
||||
update_desc_.RasterizerState.MultisampleEnable = FALSE;
|
||||
update_desc_.RasterizerState.AntialiasedLineEnable = FALSE;
|
||||
update_desc_.RasterizerState.ForcedSampleCount = 0;
|
||||
update_desc_.RasterizerState.ConservativeRaster =
|
||||
D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
|
||||
auto& regs = update_depth_stencil_state_regs_;
|
||||
|
||||
bool dirty = current_pipeline_ == nullptr;
|
||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
update_desc_.DepthStencilState.DepthEnable =
|
||||
(regs.rb_depthcontrol & 0x2) ? TRUE : FALSE;
|
||||
update_desc_.DepthStencilState.DepthWriteMask =
|
||||
(regs.rb_depthcontrol & 0x4) ? D3D12_DEPTH_WRITE_MASK_ALL :
|
||||
D3D12_DEPTH_WRITE_MASK_ZERO;
|
||||
// Comparison functions are the same in Direct3D 12 but plus one (minus one,
|
||||
// bit 0 for less, bit 1 for equal, bit 2 for greater).
|
||||
update_desc_.DepthStencilState.DepthFunc =
|
||||
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 4) & 0x7) + 1);
|
||||
update_desc_.DepthStencilState.StencilEnable =
|
||||
(regs.rb_depthcontrol & 0x1) ? TRUE : FALSE;
|
||||
update_desc_.DepthStencilState.StencilReadMask =
|
||||
(regs.rb_stencilrefmask >> 8) & 0xFF;
|
||||
update_desc_.DepthStencilState.StencilWriteMask =
|
||||
(regs.rb_stencilrefmask >> 16) & 0xFF;
|
||||
// Stencil operations are the same in Direct3D 12 too but plus one.
|
||||
update_desc_.DepthStencilState.FrontFace.StencilFailOp =
|
||||
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 11) & 0x7) + 1);
|
||||
update_desc_.DepthStencilState.FrontFace.StencilDepthFailOp =
|
||||
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 17) & 0x7) + 1);
|
||||
update_desc_.DepthStencilState.FrontFace.StencilPassOp =
|
||||
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 14) & 0x7) + 1);
|
||||
update_desc_.DepthStencilState.FrontFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 8) & 0x7) + 1);
|
||||
// BACKFACE_ENABLE.
|
||||
if (regs.rb_depthcontrol & 0x80) {
|
||||
update_desc_.DepthStencilState.BackFace.StencilFailOp =
|
||||
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 23) & 0x7) + 1);
|
||||
update_desc_.DepthStencilState.BackFace.StencilDepthFailOp =
|
||||
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 29) & 0x7) + 1);
|
||||
update_desc_.DepthStencilState.BackFace.StencilPassOp =
|
||||
D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 26) & 0x7) + 1);
|
||||
update_desc_.DepthStencilState.BackFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 20) & 0x7) + 1);
|
||||
} else {
|
||||
// Back state is identical to front state.
|
||||
update_desc_.DepthStencilState.BackFace =
|
||||
update_desc_.DepthStencilState.FrontFace;
|
||||
}
|
||||
// TODO(Triang3l): EARLY_Z_ENABLE (needs to be enabled in shaders, but alpha
|
||||
// test is dynamic - should be enabled anyway if there's no alpha test,
|
||||
// discarding and depth output).
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateIBStripCutValue(
|
||||
IndexFormat index_format) {
|
||||
auto& regs = update_ib_strip_cut_value_regs_;
|
||||
|
||||
bool dirty = current_pipeline_ == nullptr;
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value =
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
|
||||
if (register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) {
|
||||
ib_strip_cut_value = index_format == IndexFormat::kInt32 ?
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF :
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
|
||||
}
|
||||
dirty |= regs.ib_strip_cut_value != ib_strip_cut_value;
|
||||
regs.ib_strip_cut_value = ib_strip_cut_value;
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
update_desc_.IBStripCutValue = ib_strip_cut_value;
|
||||
|
||||
// TODO(Triang3l): Geometry shaders for non-0xFFFF values if they are used.
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargetFormats() {
|
||||
bool dirty = current_pipeline_ == nullptr;
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Set the formats when RT cache is added.
|
||||
update_desc_.NumRenderTargets = 0;
|
||||
update_desc_.DSVFormat = DXGI_FORMAT_UNKNOWN;
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) {
|
||||
// Lookup the pipeline in the cache.
|
||||
auto it = pipelines_.find(hash_key);
|
||||
if (it != pipelines_.end()) {
|
||||
// Found existing pipeline.
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Set the unused fields of the pipeline description.
|
||||
update_desc_.StreamOutput.pSODeclaration = nullptr;
|
||||
update_desc_.StreamOutput.NumEntries = 0;
|
||||
update_desc_.StreamOutput.pBufferStrides = nullptr;
|
||||
update_desc_.StreamOutput.NumStrides = 0;
|
||||
update_desc_.StreamOutput.RasterizedStream = 0;
|
||||
update_desc_.InputLayout.pInputElementDescs = nullptr;
|
||||
update_desc_.InputLayout.NumElements = 0;
|
||||
update_desc_.SampleDesc.Count = 1;
|
||||
update_desc_.SampleDesc.Quality = 0;
|
||||
update_desc_.NodeMask = 0;
|
||||
// TODO(Triang3l): Cache create pipelines.
|
||||
update_desc_.CachedPSO.pCachedBlob = nullptr;
|
||||
update_desc_.CachedPSO.CachedBlobSizeInBytes = 0;
|
||||
update_desc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
|
||||
|
||||
auto device = context_->GetD3D12Provider()->GetDevice();
|
||||
ID3D12PipelineState* state;
|
||||
if (FAILED(device->CreateGraphicsPipelineState(&update_desc_,
|
||||
IID_PPV_ARGS(&state)))) {
|
||||
XELOGE("Failed to create graphics pipeline state");
|
||||
return nullptr;
|
||||
}
|
||||
// TODO(Triang3l): Set the name for the pipeline, with shader hashes.
|
||||
|
||||
// Add to cache with the hash key for reuse.
|
||||
Pipeline* pipeline = new Pipeline;
|
||||
pipeline->state = state;
|
||||
pipeline->root_signature = update_desc_.pRootSignature;
|
||||
pipelines_.insert({hash_key, pipeline});
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
ID3D12RootSignature* PipelineCache::GetRootSignature(
|
||||
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
|
||||
uint32_t pixel_textures =
|
||||
pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0;
|
||||
uint32_t pixel_samplers =
|
||||
pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0;
|
||||
uint32_t vertex_textures = vertex_shader->GetTextureSRVCount();
|
||||
uint32_t vertex_samplers = vertex_shader->GetSamplerCount();
|
||||
// Max 96 textures (if all kinds of tfetch instructions are used for all fetch
|
||||
// registers) and 32 samplers (one sampler per used fetch), but different
|
||||
// shader stages have different texture sets.
|
||||
uint32_t index = pixel_textures | (pixel_samplers << 7) |
|
||||
(vertex_textures << 12) | (vertex_samplers << 19);
|
||||
|
||||
// Try an existing root signature.
|
||||
auto it = root_signatures_.find(index);
|
||||
if (it != root_signatures_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Create a new one.
|
||||
D3D12_ROOT_SIGNATURE_DESC desc;
|
||||
D3D12_ROOT_PARAMETER parameters[RootParameter::kCountWithTwoStageTextures];
|
||||
D3D12_DESCRIPTOR_RANGE ranges[RootParameter::kCountWithTwoStageTextures];
|
||||
desc.NumParameters = UINT(RootParameter::kCountNoTextures);
|
||||
desc.pParameters = parameters;
|
||||
desc.NumStaticSamplers = 0;
|
||||
desc.pStaticSamplers = nullptr;
|
||||
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
||||
|
||||
// Vertex constants - float and fetch.
|
||||
{
|
||||
auto& parameter = parameters[size_t(RootParameter::kVertexConstants)];
|
||||
auto& range = ranges[size_t(RootParameter::kVertexConstants)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range.NumDescriptors = 9;
|
||||
range.BaseShaderRegister = 2;
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
||||
// Pixel constants - float.
|
||||
{
|
||||
auto& parameter = parameters[size_t(RootParameter::kPixelConstants)];
|
||||
auto& range = ranges[size_t(RootParameter::kPixelConstants)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range.NumDescriptors = 8;
|
||||
range.BaseShaderRegister = 2;
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
||||
// Common constants - system and loop/bool.
|
||||
{
|
||||
auto& parameter = parameters[size_t(RootParameter::kCommonConstants)];
|
||||
auto& range = ranges[size_t(RootParameter::kCommonConstants)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range.NumDescriptors = 2;
|
||||
range.BaseShaderRegister = 0;
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
||||
// Virtual shared memory.
|
||||
{
|
||||
auto& parameter = parameters[size_t(RootParameter::kVirtualMemory)];
|
||||
auto& range = ranges[size_t(RootParameter::kVirtualMemory)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
range.NumDescriptors = 1;
|
||||
range.BaseShaderRegister = 0;
|
||||
range.RegisterSpace = 1;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
||||
if (pixel_textures > 0 || vertex_textures > 0) {
|
||||
desc.NumParameters = UINT(RootParameter::kCountWithOneStageTextures);
|
||||
|
||||
// Pixel or vertex textures.
|
||||
{
|
||||
auto& parameter =
|
||||
parameters[size_t(RootParameter::kPixelOrVertexTextures)];
|
||||
auto& range = ranges[size_t(RootParameter::kPixelOrVertexTextures)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
range.BaseShaderRegister = 0;
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
if (pixel_textures > 0) {
|
||||
assert_true(pixel_samplers > 0);
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||
range.NumDescriptors = pixel_textures;
|
||||
} else {
|
||||
assert_true(vertex_samplers > 0);
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
range.NumDescriptors = vertex_textures;
|
||||
}
|
||||
}
|
||||
|
||||
// Pixel or vertex samplers.
|
||||
{
|
||||
auto& parameter =
|
||||
parameters[size_t(RootParameter::kPixelOrVertexSamplers)];
|
||||
auto& range = ranges[size_t(RootParameter::kPixelOrVertexSamplers)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
||||
range.BaseShaderRegister = 0;
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
if (pixel_samplers > 0) {
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||
range.NumDescriptors = pixel_samplers;
|
||||
} else {
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
range.NumDescriptors = vertex_samplers;
|
||||
}
|
||||
}
|
||||
|
||||
if (pixel_textures > 0 && vertex_textures > 0) {
|
||||
assert_true(vertex_samplers > 0);
|
||||
|
||||
desc.NumParameters = UINT(RootParameter::kCountWithTwoStageTextures);
|
||||
|
||||
// Vertex textures.
|
||||
{
|
||||
auto& parameter = parameters[size_t(RootParameter::kVertexTextures)];
|
||||
auto& range = ranges[size_t(RootParameter::kVertexTextures)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
range.NumDescriptors = vertex_textures;
|
||||
range.BaseShaderRegister = 0;
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
||||
// Vertex samplers.
|
||||
{
|
||||
auto& parameter = parameters[size_t(RootParameter::kVertexSamplers)];
|
||||
auto& range = ranges[size_t(RootParameter::kVertexSamplers)];
|
||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
||||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
||||
range.NumDescriptors = vertex_samplers;
|
||||
range.BaseShaderRegister = 0;
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ID3DBlob* blob;
|
||||
ID3DBlob* error_blob = nullptr;
|
||||
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1,
|
||||
&blob, &error_blob))) {
|
||||
XELOGE("Failed to serialize a root signature with %u pixel textures, %u "
|
||||
"pixel samplers, %u vertex textures and %u vertex samplers",
|
||||
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
|
||||
if (error_blob != nullptr) {
|
||||
XELOGE("%s",
|
||||
reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
|
||||
error_blob->Release();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
if (error_blob != nullptr) {
|
||||
error_blob->Release();
|
||||
}
|
||||
|
||||
auto device = context_->GetD3D12Provider()->GetDevice();
|
||||
ID3D12RootSignature* root_signature;
|
||||
if (FAILED(device->CreateRootSignature(0, blob->GetBufferPointer(),
|
||||
blob->GetBufferSize(),
|
||||
IID_PPV_ARGS(&root_signature)))) {
|
||||
XELOGE("Failed to create a root signature with %u pixel textures, %u pixel "
|
||||
"samplers, %u vertex textures and %u vertex samplers",
|
||||
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
|
||||
blob->Release();
|
||||
return nullptr;
|
||||
}
|
||||
blob->Release();
|
||||
|
||||
root_signatures_.insert({index, root_signature});
|
||||
return root_signature;
|
||||
}
|
||||
|
||||
} // namespace d3d12
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -42,10 +42,57 @@ class PipelineCache {
|
|||
|
||||
UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader,
|
||||
D3D12Shader* pixel_shader,
|
||||
PrimitiveType primitive_type);
|
||||
PrimitiveType primitive_type,
|
||||
IndexFormat index_format,
|
||||
ID3D12PipelineState** pipeline_out,
|
||||
ID3D12RootSignature** root_signature_out);
|
||||
|
||||
void ClearCache();
|
||||
|
||||
enum class RootParameter {
|
||||
// These are always present.
|
||||
|
||||
// Most frequently changed (for one object drawn multiple times, for
|
||||
// instance - may contain projection matrices, also vertex offsets for
|
||||
// objects drawn in multiple parts).
|
||||
// This constants 8 pages of float constants (b2-b9) and fetch constants
|
||||
// (b10).
|
||||
kVertexConstants,
|
||||
// Less frequently changed (per-material) - 8 pages of float constants
|
||||
// (b2-b9).
|
||||
kPixelConstants,
|
||||
// Rarely changed - system constants like viewport and alpha testing (b0)
|
||||
// and loop and bool constants (b1).
|
||||
kCommonConstants,
|
||||
// Never changed - shared memory byte address buffer (t0, space1).
|
||||
kVirtualMemory,
|
||||
|
||||
kCountNoTextures,
|
||||
|
||||
// These are there only if textures are fetched (they are changed pretty
|
||||
// frequently, but for the ease of maintenance they're in the end).
|
||||
// If the pixel shader samples textures, these are for pixel textures
|
||||
// (changed more frequently), otherwise, if the vertex shader samples
|
||||
// textures, these are for vertex textures.
|
||||
|
||||
// Used textures of all types (t0+, space0).
|
||||
kPixelOrVertexTextures = kCountNoTextures,
|
||||
// Used samplers (s0+).
|
||||
kPixelOrVertexSamplers,
|
||||
|
||||
kCountWithOneStageTextures,
|
||||
|
||||
// These are only present if both pixel and vertex shaders sample textures
|
||||
// for vertex textures.
|
||||
|
||||
// Used textures of all types (t0+, space0).
|
||||
kVertexTextures = kCountWithOneStageTextures,
|
||||
// Used samplers (s0+).
|
||||
kVertexSamplers,
|
||||
|
||||
kCountWithTwoStageTextures,
|
||||
};
|
||||
|
||||
private:
|
||||
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||
bool SetShadowRegister(float* dest, uint32_t register_name);
|
||||
|
@ -54,11 +101,23 @@ class PipelineCache {
|
|||
|
||||
UpdateStatus UpdateState(D3D12Shader* vertex_shader,
|
||||
D3D12Shader* pixel_shader,
|
||||
PrimitiveType primitive_type);
|
||||
PrimitiveType primitive_type,
|
||||
IndexFormat index_format);
|
||||
|
||||
// pRootSignature, VS, PS, DS, HS, GS, PrimitiveTopologyType.
|
||||
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
|
||||
D3D12Shader* pixel_shader,
|
||||
PrimitiveType primitive_type);
|
||||
// BlendState, SampleMask.
|
||||
UpdateStatus UpdateBlendState(D3D12Shader* pixel_shader);
|
||||
// RasterizerState.
|
||||
UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type);
|
||||
// DepthStencilState.
|
||||
UpdateStatus UpdateDepthStencilState();
|
||||
// IBStripCutValue.
|
||||
UpdateStatus UpdateIBStripCutValue(IndexFormat index_format);
|
||||
// NumRenderTargets, RTVFormats, DSVFormat.
|
||||
UpdateStatus UpdateRenderTargetFormats();
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
ui::d3d12::D3D12Context* context_ = nullptr;
|
||||
|
@ -68,22 +127,82 @@ class PipelineCache {
|
|||
// All loaded shaders mapped by their guest hash key.
|
||||
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
|
||||
|
||||
// Root signatures for different descriptor counts.
|
||||
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_;
|
||||
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
||||
const D3D12Shader* pixel_shader);
|
||||
|
||||
// Hash state used to incrementally produce pipeline hashes during update.
|
||||
// By the time the full update pass has run the hash will represent the
|
||||
// current state in a way that can uniquely identify the produced
|
||||
// ID3D12PipelineState.
|
||||
XXH64_state_t hash_state_;
|
||||
struct Pipeline {
|
||||
ID3D12PipelineState* state;
|
||||
// From root_signatures_ - not owned.
|
||||
ID3D12RootSignature* root_signature;
|
||||
};
|
||||
// All previously generated pipelines mapped by hash.
|
||||
std::unordered_map<uint64_t, Pipeline*> pipelines_;
|
||||
// Sets StreamOutput, InputLayout, SampleDesc, NodeMask, CachedPSO, Flags.
|
||||
Pipeline* GetPipeline(uint64_t hash_key);
|
||||
|
||||
// Previously used pipeline. This matches our current state settings
|
||||
// and allows us to quickly(ish) reuse the pipeline if no registers have
|
||||
// changed.
|
||||
Pipeline* current_pipeline_ = nullptr;
|
||||
|
||||
// Description of the pipeline being created.
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC update_desc_;
|
||||
|
||||
struct UpdateShaderStagesRegisters {
|
||||
PrimitiveType primitive_type;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t sq_program_cntl;
|
||||
D3D12Shader* vertex_shader;
|
||||
D3D12Shader* pixel_shader;
|
||||
uint32_t sq_program_cntl;
|
||||
bool primitive_topology_is_line;
|
||||
|
||||
UpdateShaderStagesRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_shader_stages_regs_;
|
||||
|
||||
struct UpdateBlendStateRegisters {
|
||||
// RB_COLOR_MASK with unused render targets removed.
|
||||
uint32_t color_mask;
|
||||
// Blend control updated only for used render targets.
|
||||
uint32_t blendcontrol[4];
|
||||
bool colorcontrol_blend_enable;
|
||||
|
||||
UpdateBlendStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_blend_state_regs_;
|
||||
|
||||
struct UpdateRasterizerStateRegisters {
|
||||
// Polygon offset is in Xenos units.
|
||||
float poly_offset;
|
||||
float poly_offset_scale;
|
||||
uint8_t cull_mode;
|
||||
bool fill_mode_wireframe;
|
||||
bool front_counter_clockwise;
|
||||
bool depth_clamp_enable;
|
||||
|
||||
UpdateRasterizerStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_rasterizer_state_regs_;
|
||||
|
||||
struct UpdateDepthStencilStateRegisters {
|
||||
uint32_t rb_depthcontrol;
|
||||
uint32_t rb_stencilrefmask;
|
||||
|
||||
UpdateDepthStencilStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_depth_stencil_state_regs_;
|
||||
|
||||
struct UpdateIBStripCutValueRegisters {
|
||||
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value;
|
||||
|
||||
UpdateIBStripCutValueRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_ib_strip_cut_value_regs_;
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
|
@ -164,6 +164,7 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
}
|
||||
|
||||
// Common declarations.
|
||||
// Only up to 14 constant buffers can be used on binding tiers 1 and 2.
|
||||
source.Append(
|
||||
"cbuffer xe_system_constants : register(b0) {\n"
|
||||
" float2 xe_viewport_inv_scale;\n"
|
||||
|
@ -171,16 +172,16 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
" uint xe_textures_are_3d;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct XeFloatConstantPage {\n"
|
||||
" float4 c[16];\n"
|
||||
"};\n"
|
||||
"ConstantBuffer<XeFloatConstantPage> "
|
||||
"xe_float_constants[16] : register(b1);\n"
|
||||
"\n"
|
||||
"cbuffer xe_loop_bool_constants : register(b17) {\n"
|
||||
"cbuffer xe_loop_bool_constants : register(b1) {\n"
|
||||
" uint xe_bool_constants[8];\n"
|
||||
" uint xe_loop_constants[32];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct XeFloatConstantPage {\n"
|
||||
" float4 c[32];\n"
|
||||
"};\n"
|
||||
"ConstantBuffer<XeFloatConstantPage> "
|
||||
"xe_float_constants[8] : register(b2);\n"
|
||||
"\n");
|
||||
|
||||
if (is_vertex_shader()) {
|
||||
|
@ -193,7 +194,7 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
// -1 point size means the geometry shader will use the global setting by
|
||||
// default.
|
||||
source.AppendFormat(
|
||||
"cbuffer xe_vertex_fetch_constants : register(b18) {\n"
|
||||
"cbuffer xe_vertex_fetch_constants : register(b10) {\n"
|
||||
" uint2 xe_vertex_fetch[96];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
|
@ -268,10 +269,6 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
for (uint32_t i = 0; i < interpolator_register_count; ++i) {
|
||||
source.AppendFormat(" xe_r[%u] = xe_input.interpolators[%u];\n", i, i);
|
||||
}
|
||||
// No need to write zero to every output because in case an output is
|
||||
// completely unused, writing to that render target will be disabled in the
|
||||
// blending state (in Halo 3, one important render target is destroyed by a
|
||||
// shader not writing to one of the outputs otherwise).
|
||||
// TODO(Triang3l): ps_param_gen.
|
||||
}
|
||||
|
||||
|
@ -581,8 +578,8 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index,
|
|||
EmitSource("xe_r[%u]", op.storage_index);
|
||||
break;
|
||||
case InstructionStorageSource::kConstantFloat:
|
||||
EmitSource("xe_float_constants[%u].c[%u]", op.storage_index >> 4,
|
||||
op.storage_index & 15);
|
||||
EmitSource("xe_float_constants[%u].c[%u]", op.storage_index >> 5,
|
||||
op.storage_index & 31);
|
||||
break;
|
||||
case InstructionStorageSource::kConstantInt:
|
||||
EmitSource("xe_loop_constants[%u]", op.storage_index);
|
||||
|
@ -602,7 +599,7 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index,
|
|||
break;
|
||||
case InstructionStorageSource::kConstantFloat:
|
||||
EmitSource(
|
||||
"xe_float_constants[xe_src_index >> 4u].c[xe_src_index & 15u]");
|
||||
"xe_float_constants[xe_src_index >> 5u].c[xe_src_index & 31u]");
|
||||
break;
|
||||
case InstructionStorageSource::kConstantInt:
|
||||
EmitSource("xe_loop_constants[xe_src_index]");
|
||||
|
|
Loading…
Reference in New Issue