[GPU] GPR count limit is 128, not 64

This commit is contained in:
Triang3l 2022-06-26 14:45:49 +03:00
parent 4812b4ba8b
commit b787f2dec1
4 changed files with 25 additions and 10 deletions

View File

@ -22,6 +22,7 @@
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/shader_translator.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/spirv/spirv_disassembler.h"
// For D3DDisassemble:
@ -164,10 +165,11 @@ int shader_compiler_main(const std::vector<std::string>& args) {
switch (shader_type) {
case xenos::ShaderType::kVertex:
modification = translator->GetDefaultVertexShaderModification(
64, host_vertex_shader_type);
xenos::kMaxShaderTempRegisters, host_vertex_shader_type);
break;
case xenos::ShaderType::kPixel:
modification = translator->GetDefaultPixelShaderModification(64);
modification = translator->GetDefaultPixelShaderModification(
xenos::kMaxShaderTempRegisters);
break;
default:
assert_unhandled_case(shader_type);

View File

@ -110,14 +110,15 @@ class ShaderInterpreter {
return *reinterpret_cast<const float*>(&bits);
}
const float* GetTempRegister(uint32_t address, bool is_relative) const {
return temp_registers_[(
int32_t(address) + (is_relative ? state_.GetLoopAddress() : 0) & 63)];
uint32_t GetTempRegisterIndex(uint32_t address, bool is_relative) const {
return (int32_t(address) + (is_relative ? state_.GetLoopAddress() : 0)) &
((UINT32_C(1) << xenos::kMaxShaderTempRegistersLog2) - 1);
}
const float* GetTempRegister(uint32_t address, bool is_relative) const {
return temp_registers_[GetTempRegisterIndex(address, is_relative)];
}
// For simplicity (due to writability), not bounds-checking.
float* GetTempRegister(uint32_t address, bool is_relative) {
return temp_registers_[(
int32_t(address) + (is_relative ? state_.GetLoopAddress() : 0) & 63)];
return temp_registers_[GetTempRegisterIndex(address, is_relative)];
}
const float* GetFloatConstant(uint32_t address, bool is_relative,
bool relative_address_is_a0) const;
@ -138,7 +139,7 @@ class ShaderInterpreter {
const uint32_t* ucode_ = nullptr;
// For both inputs and locals.
float temp_registers_[64][4];
float temp_registers_[xenos::kMaxShaderTempRegisters][4];
State state_;
};

View File

@ -55,7 +55,9 @@ class ShaderTranslator {
// Register count from SQ_PROGRAM_CNTL, stored by the implementation in its
// modification bits.
virtual uint32_t GetModificationRegisterCount() const { return 64; }
virtual uint32_t GetModificationRegisterCount() const {
return xenos::kMaxShaderTempRegisters;
}
// True if the current shader is a vertex shader.
bool is_vertex_shader() const {

View File

@ -714,6 +714,16 @@ enum class ArbitraryFilter : uint32_t {
kUseFetchConst = 7,
};
// While instructions contain 6-bit register index fields (allowing literal
// indices, or literal index offsets, depending on the addressing mode, of up to
// 63), the maximum total register count for a vertex and a pixel shader
// combined is 128, and the boundary between vertex and pixel shaders can be
// moved via SQ_PROGRAM_CNTL::VS/PS_NUM_REG, according to the IPR2015-00325
// specification (section 8 "Register file allocation").
constexpr uint32_t kMaxShaderTempRegistersLog2 = 7;
constexpr uint32_t kMaxShaderTempRegisters = UINT32_C(1)
<< kMaxShaderTempRegistersLog2;
// a2xx_sq_ps_vtx_mode
enum class VertexShaderExportMode : uint32_t {
kPosition1Vector = 0,