From ae7ff58f818c3b86e60d93395213dbc007cbad3d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 28 Jul 2018 16:30:47 +0300 Subject: [PATCH] [D3D12] Pipeline cache --- .../gpu/d3d12/d3d12_command_processor.cc | 16 +- src/xenia/gpu/d3d12/d3d12_shader.h | 4 + src/xenia/gpu/d3d12/pipeline_cache.cc | 705 +++++++++++++++++- src/xenia/gpu/d3d12/pipeline_cache.h | 129 +++- src/xenia/gpu/hlsl_shader_translator.cc | 27 +- 5 files changed, 845 insertions(+), 36 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 43116e91a..f1cc85b0b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -122,6 +122,12 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Doesn't actually draw. return true; } + if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 && + primitive_type != PrimitiveType::kPointList && + primitive_type != PrimitiveType::kRectangleList) { + // Both sides are culled - can't reproduce this with rasterizer state. + return true; + } // Shaders will have already been defined by previous loads. // We need them to do just about anything so validate here. @@ -131,9 +137,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Always need a vertex shader. return false; } - // Depth-only mode doesn't need a pixel shader (we'll use a fake one). + // Depth-only mode doesn't need a pixel shader. if (enable_mode == xenos::ModeControl::kDepth) { - // Use a dummy pixel shader when required. pixel_shader = nullptr; } else if (!pixel_shader) { // Need a pixel shader in normal color mode. @@ -142,8 +147,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, bool full_update = BeginFrame(); + ID3D12PipelineState* pipeline; + ID3D12RootSignature* root_signature; auto pipeline_status = pipeline_cache_->ConfigurePipeline( - vertex_shader, pixel_shader, primitive_type); + vertex_shader, pixel_shader, primitive_type, + index_buffer_info != nullptr ? index_buffer_info->format : + IndexFormat::kInt16, + &pipeline, &root_signature); if (pipeline_status == PipelineCache::UpdateStatus::kError) { return false; } diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index f7cca96f0..893548794 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -28,6 +28,10 @@ class D3D12Shader : public Shader { const uint8_t* GetDXBC() const; size_t GetDXBCSize() const; + // TODO(Triang3l): Real texture counts. + uint32_t GetTextureSRVCount() const { return 0; } + uint32_t GetSamplerCount() const { return 0; } + private: ID3DBlob* blob_ = nullptr; }; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 169c76fb2..3210612a2 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -10,6 +10,7 @@ #include "xenia/gpu/d3d12/pipeline_cache.h" #include +#include #include "xenia/base/assert.h" #include "xenia/base/logging.h" @@ -55,14 +56,70 @@ D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type, PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline( D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, - PrimitiveType primitive_type) { + PrimitiveType primitive_type, IndexFormat index_format, + ID3D12PipelineState** pipeline_out, + ID3D12RootSignature** root_signature_out) { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - return UpdateState(vertex_shader, pixel_shader, primitive_type); + + assert_not_null(pipeline_out); + assert_not_null(root_signature_out); + + Pipeline* pipeline = nullptr; + auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type, + index_format); + switch (update_status) { + case UpdateStatus::kCompatible: + // Requested pipeline is compatible with our previous one, so use that. + // Note that there still may be dynamic state that needs updating. + pipeline = current_pipeline_; + break; + case UpdateStatus::kMismatch: + // Pipeline state has changed. We need to either create a new one or find + // an old one that matches. + current_pipeline_ = nullptr; + break; + case UpdateStatus::kError: + // Error updating state - bail out. + // We are in an indeterminate state, so reset things for the next attempt. + current_pipeline_ = nullptr; + return update_status; + } + if (!pipeline) { + // Should have a hash key produced by the UpdateState pass. + uint64_t hash_key = XXH64_digest(&hash_state_); + pipeline = GetPipeline(hash_key); + current_pipeline_ = pipeline; + if (!pipeline) { + // Unable to create pipeline. + return UpdateStatus::kError; + } + } + + *pipeline_out = pipeline->state; + *root_signature_out = pipeline->root_signature; + return update_status; } void PipelineCache::ClearCache() { + // Remove references to the current pipeline. + current_pipeline_ = nullptr; + + // Destroy all pipelines. + for (auto it : pipelines_) { + it.second->state->Release(); + delete it.second; + } + pipelines_.clear(); + COUNT_profile_set("gpu/pipeline_cache/pipelines", 0); + + // Destroy all root signatures. + for (auto it : root_signatures_) { + it.second->Release(); + } + root_signatures_.clear(); + // Destroy all shaders. for (auto it : shader_map_) { delete it.second; @@ -121,7 +178,7 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader, PipelineCache::UpdateStatus PipelineCache::UpdateState( D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, - PrimitiveType primitive_type) { + PrimitiveType primitive_type, IndexFormat index_format) { bool mismatch = false; // Reset hash so we can build it up. @@ -136,11 +193,21 @@ PipelineCache::UpdateStatus PipelineCache::UpdateState( mismatch = true; \ } \ } - UpdateStatus status; status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type); CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages"); - + status = UpdateBlendState(pixel_shader); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); + status = UpdateRasterizerState(primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); + status = UpdateDepthStencilState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); + status = UpdateIBStripCutValue(index_format); + CHECK_UPDATE_STATUS(status, mismatch, + "Unable to update index buffer strip cut value"); + status = UpdateRenderTargetFormats(); + CHECK_UPDATE_STATUS(status, mismatch, + "Unable to update render target formats"); #undef CHECK_UPDATE_STATUS return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; @@ -160,16 +227,20 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( 0x000FF100 || register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); + bool dirty = current_pipeline_ == nullptr; dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); dirty |= regs.vertex_shader != vertex_shader; dirty |= regs.pixel_shader != pixel_shader; - dirty |= regs.primitive_type != primitive_type; regs.vertex_shader = vertex_shader; regs.pixel_shader = pixel_shader; - regs.primitive_type = primitive_type; + // Points are emulated via a geometry shader because Direct3D 10+ doesn't + // support point sizes other than 1. + bool primitive_topology_is_line = + primitive_type == PrimitiveType::kLineList || + primitive_type == PrimitiveType::kLineStrip || + primitive_type == PrimitiveType::kLineLoop || + primitive_type == PrimitiveType::k2DLineStrip; + dirty |= regs.primitive_topology_is_line != primitive_topology_is_line; XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; @@ -177,22 +248,630 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( xenos::xe_gpu_program_cntl_t sq_program_cntl; sq_program_cntl.dword_0 = regs.sq_program_cntl; - if (!vertex_shader->is_translated() && !TranslateShader(vertex_shader, sq_program_cntl)) { XELOGE("Failed to translate the vertex shader!"); return UpdateStatus::kError; } - - if (pixel_shader && !pixel_shader->is_translated() && + if (pixel_shader != nullptr && !pixel_shader->is_translated() && !TranslateShader(pixel_shader, sq_program_cntl)) { XELOGE("Failed to translate the pixel shader!"); return UpdateStatus::kError; } + update_desc_.VS.pShaderBytecode = vertex_shader->GetDXBC(); + update_desc_.VS.BytecodeLength = vertex_shader->GetDXBCSize(); + if (pixel_shader != nullptr) { + update_desc_.PS.pShaderBytecode = pixel_shader->GetDXBC(); + update_desc_.PS.BytecodeLength = pixel_shader->GetDXBCSize(); + } else { + update_desc_.PS.pShaderBytecode = nullptr; + update_desc_.PS.BytecodeLength = 0; + } + update_desc_.DS.pShaderBytecode = nullptr; + update_desc_.DS.BytecodeLength = 0; + update_desc_.HS.pShaderBytecode = nullptr; + update_desc_.HS.BytecodeLength = 0; + // TODO(Triang3l): Geometry shaders. + update_desc_.GS.pShaderBytecode = nullptr; + update_desc_.GS.BytecodeLength = 0; + update_desc_.pRootSignature = GetRootSignature(vertex_shader, pixel_shader); + if (update_desc_.pRootSignature == nullptr) { + return UpdateStatus::kError; + } + update_desc_.PrimitiveTopologyType = + primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE : + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + return UpdateStatus::kMismatch; } +PipelineCache::UpdateStatus PipelineCache::UpdateBlendState( + D3D12Shader* pixel_shader) { + auto& regs = update_blend_state_regs_; + + bool dirty = current_pipeline_ == nullptr; + uint32_t color_mask; + if (pixel_shader != nullptr) { + color_mask = register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF; + // If the pixel shader doesn't write to a render target, writing to it is + // disabled in the blend state. Otherwise, in Halo 3, one important render + // target is destroyed by a shader not writing to one of the outputs. + for (uint32_t i = 0; i < 4; ++i) { + if (!pixel_shader->writes_color_target(i)) { + color_mask &= ~(0xF << (i * 4)); + } + } + } else { + color_mask = 0; + } + dirty |= regs.color_mask != color_mask; + regs.color_mask = color_mask; + bool blend_enable = + color_mask != 0 && + !(register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32 & 0x20); + dirty |= regs.colorcontrol_blend_enable != blend_enable; + regs.colorcontrol_blend_enable = blend_enable; + static const Register kBlendControlRegs[] = { + XE_GPU_REG_RB_BLENDCONTROL_0, XE_GPU_REG_RB_BLENDCONTROL_1, + XE_GPU_REG_RB_BLENDCONTROL_2, XE_GPU_REG_RB_BLENDCONTROL_3 + }; + for (uint32_t i = 0; i < 4; ++i) { + if (blend_enable && (color_mask & (0xF << (i * 4)))) { + dirty |= SetShadowRegister(®s.blendcontrol[i], kBlendControlRegs[i]); + } else { + // Zero out blend color for unused render targets and when not blending + // for a stable hash. + regs.blendcontrol[i] = 0; + } + } + XXH64_update(&hash_state_, ®s, sizeof(regs)); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + update_desc_.BlendState.AlphaToCoverageEnable = FALSE; + update_desc_.BlendState.IndependentBlendEnable = FALSE; + static const D3D12_BLEND kBlendFactorMap[] = { + /* 0 */ D3D12_BLEND_ZERO, + /* 1 */ D3D12_BLEND_ONE, + /* 2 */ D3D12_BLEND_ZERO, // ? + /* 3 */ D3D12_BLEND_ZERO, // ? + /* 4 */ D3D12_BLEND_SRC_COLOR, + /* 5 */ D3D12_BLEND_INV_SRC_COLOR, + /* 6 */ D3D12_BLEND_SRC_ALPHA, + /* 7 */ D3D12_BLEND_INV_SRC_ALPHA, + /* 8 */ D3D12_BLEND_DEST_COLOR, + /* 9 */ D3D12_BLEND_INV_DEST_COLOR, + /* 10 */ D3D12_BLEND_DEST_ALPHA, + /* 11 */ D3D12_BLEND_INV_DEST_ALPHA, + /* 12 */ D3D12_BLEND_BLEND_FACTOR, // CONSTANT_COLOR + /* 13 */ D3D12_BLEND_INV_BLEND_FACTOR, // ONE_MINUS_CONSTANT_COLOR + /* 14 */ D3D12_BLEND_BLEND_FACTOR, // CONSTANT_ALPHA + /* 15 */ D3D12_BLEND_INV_BLEND_FACTOR, // ONE_MINUS_CONSTANT_ALPHA + /* 16 */ D3D12_BLEND_SRC_ALPHA_SAT, + }; + static const D3D12_BLEND_OP kBlendOpMap[] = { + /* 0 */ D3D12_BLEND_OP_ADD, + /* 1 */ D3D12_BLEND_OP_SUBTRACT, + /* 2 */ D3D12_BLEND_OP_MIN, + /* 3 */ D3D12_BLEND_OP_MAX, + /* 4 */ D3D12_BLEND_OP_REV_SUBTRACT, + }; + for (uint32_t i = 0; i < 4; ++i) { + auto& blend_desc = update_desc_.BlendState.RenderTarget[i]; + if (blend_enable && (color_mask & (0xF << (i * 4)))) { + uint32_t blend_control = regs.blendcontrol[i]; + // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND + blend_desc.SrcBlend = kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; + // A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND + blend_desc.DestBlend = kBlendFactorMap[(blend_control & 0x00001F00) >> 8]; + // A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN + blend_desc.BlendOp = kBlendOpMap[(blend_control & 0x000000E0) >> 5]; + // A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND + blend_desc.SrcBlendAlpha = + kBlendFactorMap[(blend_control & 0x001F0000) >> 16]; + // A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND + blend_desc.DestBlendAlpha = + kBlendFactorMap[(blend_control & 0x1F000000) >> 24]; + // A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN + blend_desc.BlendOpAlpha = kBlendOpMap[(blend_control & 0x00E00000) >> 21]; + } else { + blend_desc.BlendEnable = FALSE; + blend_desc.SrcBlend = D3D12_BLEND_ONE; + blend_desc.DestBlend = D3D12_BLEND_ZERO; + blend_desc.BlendOp = D3D12_BLEND_OP_ADD; + blend_desc.SrcBlendAlpha = D3D12_BLEND_ONE; + blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO; + blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD; + } + blend_desc.LogicOpEnable = FALSE; + blend_desc.LogicOp = D3D12_LOGIC_OP_NOOP; + blend_desc.RenderTargetWriteMask = (color_mask >> (i * 4)) & 0xF; + } + update_desc_.SampleMask = UINT_MAX; + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( + PrimitiveType primitive_type) { + auto& regs = update_rasterizer_state_regs_; + + bool dirty = current_pipeline_ == nullptr; + uint32_t pa_su_sc_mode_cntl = + register_file_->values[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t cull_mode = pa_su_sc_mode_cntl & 0x3; + if (primitive_type == PrimitiveType::kPointList || + primitive_type == PrimitiveType::kRectangleList) { + cull_mode = 0; + } + dirty |= regs.cull_mode != cull_mode; + regs.cull_mode = cull_mode; + // Because Direct3D 12 doesn't support per-side fill mode and depth bias, the + // values to use depends on the current culling state. + // If front faces are culled, use the ones for back faces. + // If back faces are culled, it's the other way around. + // If culling is not enabled, assume the developer wanted to draw things in a + // more special way - so if one side is wireframe or has a depth bias, then + // that's intentional (if both sides have a depth bias, the one for the front + // faces is used, though it's unlikely that they will ever be different - + // SetRenderState sets the same offset for both sides). + // Points fill mode (0) also isn't supported in Direct3D 12, but assume the + // developer didn't want to fill the whole primitive and use wireframe (like + // Xenos fill mode 1). + // Here we also assume that only one side is culled - if two sides are culled, + // the D3D12 command processor will drop such draw early. + bool fill_mode_wireframe = false; + float poly_offset = 0.0f, poly_offset_scale = 0.0f; + if (!(cull_mode & 1)) { + // Front faces aren't culled. + uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7; + if (fill_mode == 0 || fill_mode == 1) { + fill_mode_wireframe = true; + } + if ((pa_su_sc_mode_cntl >> 11) & 0x1) { + poly_offset = + register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + poly_offset_scale = + register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + } + } + if (!(cull_mode & 2)) { + // Back faces aren't culled. + uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7; + if (fill_mode == 0 || fill_mode == 1) { + fill_mode_wireframe = true; + } + // Prefer front depth bias because in general, front faces are the ones that + // are rendered (except for shadow volumes). + if (((pa_su_sc_mode_cntl >> 12) & 0x1) && poly_offset == 0.0f && + poly_offset_scale == 0.0f) { + poly_offset = + register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + poly_offset_scale = + register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; + } + } + if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) { + // Fill mode is disabled. + fill_mode_wireframe = false; + } + dirty |= regs.fill_mode_wireframe != fill_mode_wireframe; + regs.fill_mode_wireframe = fill_mode_wireframe; + dirty |= regs.poly_offset != poly_offset; + regs.poly_offset = poly_offset; + dirty |= regs.poly_offset_scale != poly_offset_scale; + regs.poly_offset_scale = poly_offset_scale; + bool front_counter_clockwise = !(pa_su_sc_mode_cntl & 0x4); + dirty |= regs.front_counter_clockwise != front_counter_clockwise; + regs.front_counter_clockwise = front_counter_clockwise; + uint32_t pa_cl_clip_cntl = + register_file_->values[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; + // CLIP_DISABLE + bool depth_clamp_enable = !!(pa_cl_clip_cntl & (1 << 16)); + // TODO(DrChat): This seem to differ. Need to examine this. + // https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11 + // ZCLIP_NEAR_DISABLE + // bool depth_clamp_enable = !(pa_cl_clip_cntl & (1 << 26)); + // RASTERIZER_DISABLE + // Disable rendering in command processor if regs.pa_cl_clip_cntl & (1 << 22)? + dirty |= regs.depth_clamp_enable != depth_clamp_enable; + regs.depth_clamp_enable = depth_clamp_enable; + XXH64_update(&hash_state_, ®s, sizeof(regs)); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + update_desc_.RasterizerState.FillMode = + fill_mode_wireframe ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID; + if (cull_mode & 1) { + update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT; + } else if (cull_mode & 2) { + update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; + } else { + update_desc_.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + } + update_desc_.RasterizerState.FrontCounterClockwise = + front_counter_clockwise ? TRUE : FALSE; + // Conversion based on the calculations in Call of Duty 4 and the values it + // writes to the registers, and also on: + // https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L943 + // Call of Duty 4 sets the constant bias of 1/32768 and the slope scale of 32. + // However, it's calculated from a console variable in 2 parts: first it's + // divided by 65536, and then it's multiplied by 2. + // TODO(Triang3l): Find the best scale. According to si_state.c, the value in + // the register should be divided by 2 to get the value suitable for PC + // graphics APIs if the depth buffer is 24-bit. However, even multiplying by + // 65536 rather than 32768 still doesn't remove shadow acne in Bomberman Live + // completely. Maybe 131072 would work the best. + // Using ceil here just in case a game wants the offset but passes a value + // that is too small - it's better to apply more offset than to make depth + // fighting worse or to disable the offset completely (Direct3D 12 takes an + // integer value). + update_desc_.RasterizerState.DepthBias = + int32_t(std::ceil(std::abs(poly_offset) * 131072.0f)); + update_desc_.RasterizerState.DepthBias *= poly_offset < 0.0f ? -1 : 1; + update_desc_.RasterizerState.DepthBiasClamp = 0.0f; + update_desc_.RasterizerState.SlopeScaledDepthBias = + poly_offset_scale * (1.0f / 16.0f); + update_desc_.RasterizerState.DepthClipEnable = + !depth_clamp_enable ? TRUE : FALSE; + update_desc_.RasterizerState.MultisampleEnable = FALSE; + update_desc_.RasterizerState.AntialiasedLineEnable = FALSE; + update_desc_.RasterizerState.ForcedSampleCount = 0; + update_desc_.RasterizerState.ConservativeRaster = + D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { + auto& regs = update_depth_stencil_state_regs_; + + bool dirty = current_pipeline_ == nullptr; + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + XXH64_update(&hash_state_, ®s, sizeof(regs)); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + update_desc_.DepthStencilState.DepthEnable = + (regs.rb_depthcontrol & 0x2) ? TRUE : FALSE; + update_desc_.DepthStencilState.DepthWriteMask = + (regs.rb_depthcontrol & 0x4) ? D3D12_DEPTH_WRITE_MASK_ALL : + D3D12_DEPTH_WRITE_MASK_ZERO; + // Comparison functions are the same in Direct3D 12 but plus one (minus one, + // bit 0 for less, bit 1 for equal, bit 2 for greater). + update_desc_.DepthStencilState.DepthFunc = + D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 4) & 0x7) + 1); + update_desc_.DepthStencilState.StencilEnable = + (regs.rb_depthcontrol & 0x1) ? TRUE : FALSE; + update_desc_.DepthStencilState.StencilReadMask = + (regs.rb_stencilrefmask >> 8) & 0xFF; + update_desc_.DepthStencilState.StencilWriteMask = + (regs.rb_stencilrefmask >> 16) & 0xFF; + // Stencil operations are the same in Direct3D 12 too but plus one. + update_desc_.DepthStencilState.FrontFace.StencilFailOp = + D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 11) & 0x7) + 1); + update_desc_.DepthStencilState.FrontFace.StencilDepthFailOp = + D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 17) & 0x7) + 1); + update_desc_.DepthStencilState.FrontFace.StencilPassOp = + D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 14) & 0x7) + 1); + update_desc_.DepthStencilState.FrontFace.StencilFunc = + D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 8) & 0x7) + 1); + // BACKFACE_ENABLE. + if (regs.rb_depthcontrol & 0x80) { + update_desc_.DepthStencilState.BackFace.StencilFailOp = + D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 23) & 0x7) + 1); + update_desc_.DepthStencilState.BackFace.StencilDepthFailOp = + D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 29) & 0x7) + 1); + update_desc_.DepthStencilState.BackFace.StencilPassOp = + D3D12_STENCIL_OP(((regs.rb_depthcontrol >> 26) & 0x7) + 1); + update_desc_.DepthStencilState.BackFace.StencilFunc = + D3D12_COMPARISON_FUNC(((regs.rb_depthcontrol >> 20) & 0x7) + 1); + } else { + // Back state is identical to front state. + update_desc_.DepthStencilState.BackFace = + update_desc_.DepthStencilState.FrontFace; + } + // TODO(Triang3l): EARLY_Z_ENABLE (needs to be enabled in shaders, but alpha + // test is dynamic - should be enabled anyway if there's no alpha test, + // discarding and depth output). + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateIBStripCutValue( + IndexFormat index_format) { + auto& regs = update_ib_strip_cut_value_regs_; + + bool dirty = current_pipeline_ == nullptr; + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value = + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; + if (register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) { + ib_strip_cut_value = index_format == IndexFormat::kInt32 ? + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF : + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF; + } + dirty |= regs.ib_strip_cut_value != ib_strip_cut_value; + regs.ib_strip_cut_value = ib_strip_cut_value; + if (!dirty) { + return UpdateStatus::kCompatible; + } + + update_desc_.IBStripCutValue = ib_strip_cut_value; + + // TODO(Triang3l): Geometry shaders for non-0xFFFF values if they are used. + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargetFormats() { + bool dirty = current_pipeline_ == nullptr; + if (!dirty) { + return UpdateStatus::kCompatible; + } + + // TODO(Triang3l): Set the formats when RT cache is added. + update_desc_.NumRenderTargets = 0; + update_desc_.DSVFormat = DXGI_FORMAT_UNKNOWN; + + return UpdateStatus::kMismatch; +} + +PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) { + // Lookup the pipeline in the cache. + auto it = pipelines_.find(hash_key); + if (it != pipelines_.end()) { + // Found existing pipeline. + return it->second; + } + + // Set the unused fields of the pipeline description. + update_desc_.StreamOutput.pSODeclaration = nullptr; + update_desc_.StreamOutput.NumEntries = 0; + update_desc_.StreamOutput.pBufferStrides = nullptr; + update_desc_.StreamOutput.NumStrides = 0; + update_desc_.StreamOutput.RasterizedStream = 0; + update_desc_.InputLayout.pInputElementDescs = nullptr; + update_desc_.InputLayout.NumElements = 0; + update_desc_.SampleDesc.Count = 1; + update_desc_.SampleDesc.Quality = 0; + update_desc_.NodeMask = 0; + // TODO(Triang3l): Cache create pipelines. + update_desc_.CachedPSO.pCachedBlob = nullptr; + update_desc_.CachedPSO.CachedBlobSizeInBytes = 0; + update_desc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + + auto device = context_->GetD3D12Provider()->GetDevice(); + ID3D12PipelineState* state; + if (FAILED(device->CreateGraphicsPipelineState(&update_desc_, + IID_PPV_ARGS(&state)))) { + XELOGE("Failed to create graphics pipeline state"); + return nullptr; + } + // TODO(Triang3l): Set the name for the pipeline, with shader hashes. + + // Add to cache with the hash key for reuse. + Pipeline* pipeline = new Pipeline; + pipeline->state = state; + pipeline->root_signature = update_desc_.pRootSignature; + pipelines_.insert({hash_key, pipeline}); + return pipeline; +} + +ID3D12RootSignature* PipelineCache::GetRootSignature( + const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) { + uint32_t pixel_textures = + pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0; + uint32_t pixel_samplers = + pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0; + uint32_t vertex_textures = vertex_shader->GetTextureSRVCount(); + uint32_t vertex_samplers = vertex_shader->GetSamplerCount(); + // Max 96 textures (if all kinds of tfetch instructions are used for all fetch + // registers) and 32 samplers (one sampler per used fetch), but different + // shader stages have different texture sets. + uint32_t index = pixel_textures | (pixel_samplers << 7) | + (vertex_textures << 12) | (vertex_samplers << 19); + + // Try an existing root signature. + auto it = root_signatures_.find(index); + if (it != root_signatures_.end()) { + return it->second; + } + + // Create a new one. + D3D12_ROOT_SIGNATURE_DESC desc; + D3D12_ROOT_PARAMETER parameters[RootParameter::kCountWithTwoStageTextures]; + D3D12_DESCRIPTOR_RANGE ranges[RootParameter::kCountWithTwoStageTextures]; + desc.NumParameters = UINT(RootParameter::kCountNoTextures); + desc.pParameters = parameters; + desc.NumStaticSamplers = 0; + desc.pStaticSamplers = nullptr; + desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + // Vertex constants - float and fetch. + { + auto& parameter = parameters[size_t(RootParameter::kVertexConstants)]; + auto& range = ranges[size_t(RootParameter::kVertexConstants)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 9; + range.BaseShaderRegister = 2; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Pixel constants - float. + { + auto& parameter = parameters[size_t(RootParameter::kPixelConstants)]; + auto& range = ranges[size_t(RootParameter::kPixelConstants)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 8; + range.BaseShaderRegister = 2; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Common constants - system and loop/bool. + { + auto& parameter = parameters[size_t(RootParameter::kCommonConstants)]; + auto& range = ranges[size_t(RootParameter::kCommonConstants)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 2; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Virtual shared memory. + { + auto& parameter = parameters[size_t(RootParameter::kVirtualMemory)]; + auto& range = ranges[size_t(RootParameter::kVirtualMemory)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = 1; + range.BaseShaderRegister = 0; + range.RegisterSpace = 1; + range.OffsetInDescriptorsFromTableStart = 0; + } + + if (pixel_textures > 0 || vertex_textures > 0) { + desc.NumParameters = UINT(RootParameter::kCountWithOneStageTextures); + + // Pixel or vertex textures. + { + auto& parameter = + parameters[size_t(RootParameter::kPixelOrVertexTextures)]; + auto& range = ranges[size_t(RootParameter::kPixelOrVertexTextures)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + if (pixel_textures > 0) { + assert_true(pixel_samplers > 0); + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.NumDescriptors = pixel_textures; + } else { + assert_true(vertex_samplers > 0); + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.NumDescriptors = vertex_textures; + } + } + + // Pixel or vertex samplers. + { + auto& parameter = + parameters[size_t(RootParameter::kPixelOrVertexSamplers)]; + auto& range = ranges[size_t(RootParameter::kPixelOrVertexSamplers)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + if (pixel_samplers > 0) { + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + range.NumDescriptors = pixel_samplers; + } else { + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.NumDescriptors = vertex_samplers; + } + } + + if (pixel_textures > 0 && vertex_textures > 0) { + assert_true(vertex_samplers > 0); + + desc.NumParameters = UINT(RootParameter::kCountWithTwoStageTextures); + + // Vertex textures. + { + auto& parameter = parameters[size_t(RootParameter::kVertexTextures)]; + auto& range = ranges[size_t(RootParameter::kVertexTextures)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = vertex_textures; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + + // Vertex samplers. + { + auto& parameter = parameters[size_t(RootParameter::kVertexSamplers)]; + auto& range = ranges[size_t(RootParameter::kVertexSamplers)]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.pDescriptorRanges = ⦥ + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + range.NumDescriptors = vertex_samplers; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 0; + } + } + } + + ID3DBlob* blob; + ID3DBlob* error_blob = nullptr; + if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, + &blob, &error_blob))) { + XELOGE("Failed to serialize a root signature with %u pixel textures, %u " + "pixel samplers, %u vertex textures and %u vertex samplers", + pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); + if (error_blob != nullptr) { + XELOGE("%s", + reinterpret_cast(error_blob->GetBufferPointer())); + error_blob->Release(); + } + return nullptr; + } + if (error_blob != nullptr) { + error_blob->Release(); + } + + auto device = context_->GetD3D12Provider()->GetDevice(); + ID3D12RootSignature* root_signature; + if (FAILED(device->CreateRootSignature(0, blob->GetBufferPointer(), + blob->GetBufferSize(), + IID_PPV_ARGS(&root_signature)))) { + XELOGE("Failed to create a root signature with %u pixel textures, %u pixel " + "samplers, %u vertex textures and %u vertex samplers", + pixel_textures, pixel_samplers, vertex_textures, vertex_samplers); + blob->Release(); + return nullptr; + } + blob->Release(); + + root_signatures_.insert({index, root_signature}); + return root_signature; +} + } // namespace d3d12 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 9fe2b7dce..fedb4865a 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -42,10 +42,57 @@ class PipelineCache { UpdateStatus ConfigurePipeline(D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, - PrimitiveType primitive_type); + PrimitiveType primitive_type, + IndexFormat index_format, + ID3D12PipelineState** pipeline_out, + ID3D12RootSignature** root_signature_out); void ClearCache(); + enum class RootParameter { + // These are always present. + + // Most frequently changed (for one object drawn multiple times, for + // instance - may contain projection matrices, also vertex offsets for + // objects drawn in multiple parts). + // This constants 8 pages of float constants (b2-b9) and fetch constants + // (b10). + kVertexConstants, + // Less frequently changed (per-material) - 8 pages of float constants + // (b2-b9). + kPixelConstants, + // Rarely changed - system constants like viewport and alpha testing (b0) + // and loop and bool constants (b1). + kCommonConstants, + // Never changed - shared memory byte address buffer (t0, space1). + kVirtualMemory, + + kCountNoTextures, + + // These are there only if textures are fetched (they are changed pretty + // frequently, but for the ease of maintenance they're in the end). + // If the pixel shader samples textures, these are for pixel textures + // (changed more frequently), otherwise, if the vertex shader samples + // textures, these are for vertex textures. + + // Used textures of all types (t0+, space0). + kPixelOrVertexTextures = kCountNoTextures, + // Used samplers (s0+). + kPixelOrVertexSamplers, + + kCountWithOneStageTextures, + + // These are only present if both pixel and vertex shaders sample textures + // for vertex textures. + + // Used textures of all types (t0+, space0). + kVertexTextures = kCountWithOneStageTextures, + // Used samplers (s0+). + kVertexSamplers, + + kCountWithTwoStageTextures, + }; + private: bool SetShadowRegister(uint32_t* dest, uint32_t register_name); bool SetShadowRegister(float* dest, uint32_t register_name); @@ -54,11 +101,23 @@ class PipelineCache { UpdateStatus UpdateState(D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, - PrimitiveType primitive_type); + PrimitiveType primitive_type, + IndexFormat index_format); + // pRootSignature, VS, PS, DS, HS, GS, PrimitiveTopologyType. UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, PrimitiveType primitive_type); + // BlendState, SampleMask. + UpdateStatus UpdateBlendState(D3D12Shader* pixel_shader); + // RasterizerState. + UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type); + // DepthStencilState. + UpdateStatus UpdateDepthStencilState(); + // IBStripCutValue. + UpdateStatus UpdateIBStripCutValue(IndexFormat index_format); + // NumRenderTargets, RTVFormats, DSVFormat. + UpdateStatus UpdateRenderTargetFormats(); RegisterFile* register_file_ = nullptr; ui::d3d12::D3D12Context* context_ = nullptr; @@ -68,22 +127,82 @@ class PipelineCache { // All loaded shaders mapped by their guest hash key. std::unordered_map shader_map_; + // Root signatures for different descriptor counts. + std::unordered_map root_signatures_; + ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, + const D3D12Shader* pixel_shader); + // Hash state used to incrementally produce pipeline hashes during update. // By the time the full update pass has run the hash will represent the // current state in a way that can uniquely identify the produced // ID3D12PipelineState. XXH64_state_t hash_state_; + struct Pipeline { + ID3D12PipelineState* state; + // From root_signatures_ - not owned. + ID3D12RootSignature* root_signature; + }; + // All previously generated pipelines mapped by hash. + std::unordered_map pipelines_; + // Sets StreamOutput, InputLayout, SampleDesc, NodeMask, CachedPSO, Flags. + Pipeline* GetPipeline(uint64_t hash_key); + + // Previously used pipeline. This matches our current state settings + // and allows us to quickly(ish) reuse the pipeline if no registers have + // changed. + Pipeline* current_pipeline_ = nullptr; + + // Description of the pipeline being created. + D3D12_GRAPHICS_PIPELINE_STATE_DESC update_desc_; struct UpdateShaderStagesRegisters { - PrimitiveType primitive_type; - uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; D3D12Shader* vertex_shader; D3D12Shader* pixel_shader; + uint32_t sq_program_cntl; + bool primitive_topology_is_line; UpdateShaderStagesRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_shader_stages_regs_; + + struct UpdateBlendStateRegisters { + // RB_COLOR_MASK with unused render targets removed. + uint32_t color_mask; + // Blend control updated only for used render targets. + uint32_t blendcontrol[4]; + bool colorcontrol_blend_enable; + + UpdateBlendStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_blend_state_regs_; + + struct UpdateRasterizerStateRegisters { + // Polygon offset is in Xenos units. + float poly_offset; + float poly_offset_scale; + uint8_t cull_mode; + bool fill_mode_wireframe; + bool front_counter_clockwise; + bool depth_clamp_enable; + + UpdateRasterizerStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_rasterizer_state_regs_; + + struct UpdateDepthStencilStateRegisters { + uint32_t rb_depthcontrol; + uint32_t rb_stencilrefmask; + + UpdateDepthStencilStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_depth_stencil_state_regs_; + + struct UpdateIBStripCutValueRegisters { + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value; + + UpdateIBStripCutValueRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_ib_strip_cut_value_regs_; }; } // namespace d3d12 diff --git a/src/xenia/gpu/hlsl_shader_translator.cc b/src/xenia/gpu/hlsl_shader_translator.cc index 197b58294..8451fdbba 100644 --- a/src/xenia/gpu/hlsl_shader_translator.cc +++ b/src/xenia/gpu/hlsl_shader_translator.cc @@ -164,6 +164,7 @@ std::vector HlslShaderTranslator::CompleteTranslation() { } // Common declarations. + // Only up to 14 constant buffers can be used on binding tiers 1 and 2. source.Append( "cbuffer xe_system_constants : register(b0) {\n" " float2 xe_viewport_inv_scale;\n" @@ -171,16 +172,16 @@ std::vector HlslShaderTranslator::CompleteTranslation() { " uint xe_textures_are_3d;\n" "};\n" "\n" - "struct XeFloatConstantPage {\n" - " float4 c[16];\n" - "};\n" - "ConstantBuffer " - "xe_float_constants[16] : register(b1);\n" - "\n" - "cbuffer xe_loop_bool_constants : register(b17) {\n" + "cbuffer xe_loop_bool_constants : register(b1) {\n" " uint xe_bool_constants[8];\n" " uint xe_loop_constants[32];\n" "};\n" + "\n" + "struct XeFloatConstantPage {\n" + " float4 c[32];\n" + "};\n" + "ConstantBuffer " + "xe_float_constants[8] : register(b2);\n" "\n"); if (is_vertex_shader()) { @@ -193,7 +194,7 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // -1 point size means the geometry shader will use the global setting by // default. source.AppendFormat( - "cbuffer xe_vertex_fetch_constants : register(b18) {\n" + "cbuffer xe_vertex_fetch_constants : register(b10) {\n" " uint2 xe_vertex_fetch[96];\n" "};\n" "\n" @@ -268,10 +269,6 @@ std::vector HlslShaderTranslator::CompleteTranslation() { for (uint32_t i = 0; i < interpolator_register_count; ++i) { source.AppendFormat(" xe_r[%u] = xe_input.interpolators[%u];\n", i, i); } - // No need to write zero to every output because in case an output is - // completely unused, writing to that render target will be disabled in the - // blending state (in Halo 3, one important render target is destroyed by a - // shader not writing to one of the outputs otherwise). // TODO(Triang3l): ps_param_gen. } @@ -581,8 +578,8 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index, EmitSource("xe_r[%u]", op.storage_index); break; case InstructionStorageSource::kConstantFloat: - EmitSource("xe_float_constants[%u].c[%u]", op.storage_index >> 4, - op.storage_index & 15); + EmitSource("xe_float_constants[%u].c[%u]", op.storage_index >> 5, + op.storage_index & 31); break; case InstructionStorageSource::kConstantInt: EmitSource("xe_loop_constants[%u]", op.storage_index); @@ -602,7 +599,7 @@ void HlslShaderTranslator::EmitLoadOperand(size_t src_index, break; case InstructionStorageSource::kConstantFloat: EmitSource( - "xe_float_constants[xe_src_index >> 4u].c[xe_src_index & 15u]"); + "xe_float_constants[xe_src_index >> 5u].c[xe_src_index & 31u]"); break; case InstructionStorageSource::kConstantInt: EmitSource("xe_loop_constants[xe_src_index]");