diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index 422ca06cf..ec2e20184 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -25,6 +25,7 @@ #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/shader_translator.h" #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/spirv_tools_context.h" // For D3DDisassemble: @@ -168,10 +169,11 @@ int shader_compiler_main(const std::vector& args) { switch (shader_type) { case xenos::ShaderType::kVertex: modification = translator->GetDefaultVertexShaderModification( - 64, host_vertex_shader_type); + xenos::kMaxShaderTempRegisters, host_vertex_shader_type); break; case xenos::ShaderType::kPixel: - modification = translator->GetDefaultPixelShaderModification(64); + modification = translator->GetDefaultPixelShaderModification( + xenos::kMaxShaderTempRegisters); break; default: assert_unhandled_case(shader_type); diff --git a/src/xenia/gpu/shader_interpreter.h b/src/xenia/gpu/shader_interpreter.h index 6182acecf..dca530221 100644 --- a/src/xenia/gpu/shader_interpreter.h +++ b/src/xenia/gpu/shader_interpreter.h @@ -110,14 +110,15 @@ class ShaderInterpreter { return *reinterpret_cast(&bits); } - const float* GetTempRegister(uint32_t address, bool is_relative) const { - return temp_registers_[( - int32_t(address) + (is_relative ? state_.GetLoopAddress() : 0) & 63)]; + uint32_t GetTempRegisterIndex(uint32_t address, bool is_relative) const { + return (int32_t(address) + (is_relative ? state_.GetLoopAddress() : 0)) & + ((UINT32_C(1) << xenos::kMaxShaderTempRegistersLog2) - 1); + } + const float* GetTempRegister(uint32_t address, bool is_relative) const { + return temp_registers_[GetTempRegisterIndex(address, is_relative)]; } - // For simplicity (due to writability), not bounds-checking. float* GetTempRegister(uint32_t address, bool is_relative) { - return temp_registers_[( - int32_t(address) + (is_relative ? state_.GetLoopAddress() : 0) & 63)]; + return temp_registers_[GetTempRegisterIndex(address, is_relative)]; } const float* GetFloatConstant(uint32_t address, bool is_relative, bool relative_address_is_a0) const; @@ -138,7 +139,7 @@ class ShaderInterpreter { const uint32_t* ucode_ = nullptr; // For both inputs and locals. - float temp_registers_[64][4]; + float temp_registers_[xenos::kMaxShaderTempRegisters][4]; State state_; }; diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index ee0f0221a..593907952 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -55,7 +55,9 @@ class ShaderTranslator { // Register count from SQ_PROGRAM_CNTL, stored by the implementation in its // modification bits. - virtual uint32_t GetModificationRegisterCount() const { return 64; } + virtual uint32_t GetModificationRegisterCount() const { + return xenos::kMaxShaderTempRegisters; + } // True if the current shader is a vertex shader. bool is_vertex_shader() const { diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 95c104a01..6a6fb55cc 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -714,6 +714,16 @@ enum class ArbitraryFilter : uint32_t { kUseFetchConst = 7, }; +// While instructions contain 6-bit register index fields (allowing literal +// indices, or literal index offsets, depending on the addressing mode, of up to +// 63), the maximum total register count for a vertex and a pixel shader +// combined is 128, and the boundary between vertex and pixel shaders can be +// moved via SQ_PROGRAM_CNTL::VS/PS_NUM_REG, according to the IPR2015-00325 +// specification (section 8 "Register file allocation"). +constexpr uint32_t kMaxShaderTempRegistersLog2 = 7; +constexpr uint32_t kMaxShaderTempRegisters = UINT32_C(1) + << kMaxShaderTempRegistersLog2; + // a2xx_sq_ps_vtx_mode enum class VertexShaderExportMode : uint32_t { kPosition1Vector = 0,