diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 4d65a8d37..900ec42b2 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -42,6 +42,8 @@ using namespace ucode; // Lots of naming comes from the disassembly spit out by the XNA GS compiler // and dumps of d3dcompiler and games: https://pastebin.com/i4kAv7bB +constexpr uint32_t ShaderTranslator::kMaxMemExports; + ShaderTranslator::ShaderTranslator() = default; ShaderTranslator::~ShaderTranslator() = default; @@ -63,6 +65,9 @@ void ShaderTranslator::Reset() { writes_color_targets_[i] = false; } writes_depth_ = false; + memexport_alloc_count_ = 0; + memexport_eA_written_ = 0; + std::memset(&memexport_eM_written_, 0, sizeof(memexport_eM_written_)); memexport_stream_constants_.clear(); } @@ -287,17 +292,30 @@ void ShaderTranslator::GatherInstructionInformation( writes_depth_ = true; } } - // Store used memexport constants because CPU code needs addresses - // and sizes. eA is (hopefully) always written to using: - // mad eA, r#, const0100, c# - // (though there are some exceptions, shaders in Halo 3 for some - // reason set eA to zeros, but the swizzle of the constant is not - // .xyzw in this case, and they don't write to eM#). - if (op.vector_dest() == 32 && - op.vector_opcode() == AluVectorOpcode::kMad && - op.vector_write_mask() == 0b1111 && !op.src_is_temp(3) && - op.src_swizzle(3) == 0) { - memexport_stream_constants_.insert(op.src_reg(3)); + if (memexport_alloc_count_ > 0 && + memexport_alloc_count_ <= kMaxMemExports) { + // Store used memexport constants because CPU code needs + // addresses and sizes, and also whether there have been writes + // to eA and eM# for register allocation in shader translator + // implementations. + // eA is (hopefully) always written to using: + // mad eA, r#, const0100, c# + // (though there are some exceptions, shaders in Halo 3 for some + // reason set eA to zeros, but the swizzle of the constant is + // not .xyzw in this case, and they don't write to eM#). + uint32_t memexport_alloc_index = memexport_alloc_count_ - 1; + if (op.vector_dest() == 32 && + op.vector_opcode() == AluVectorOpcode::kMad && + op.vector_write_mask() == 0b1111 && !op.src_is_temp(3) && + op.src_swizzle(3) == 0) { + memexport_eA_written_ |= 1u << memexport_alloc_index; + memexport_stream_constants_.insert(op.src_reg(3)); + } else if (op.vector_dest() >= 33 && op.vector_dest() <= 37) { + if (memexport_eA_written_ & (1u << memexport_alloc_index)) { + memexport_eM_written_[memexport_alloc_index] |= + 1 << (op.vector_dest() - 33); + } + } } } else { if (op.is_vector_dest_relative()) { @@ -320,6 +338,15 @@ void ShaderTranslator::GatherInstructionInformation( writes_depth_ = true; } } + if (memexport_alloc_count_ > 0 && + memexport_alloc_count_ <= kMaxMemExports && + op.scalar_dest() >= 33 && op.scalar_dest() <= 37) { + uint32_t memexport_alloc_index = memexport_alloc_count_ - 1; + if (memexport_eA_written_ & (1u << memexport_alloc_index)) { + memexport_eM_written_[memexport_alloc_index] |= + 1 << (op.scalar_dest() - 33); + } + } } else { if (op.is_scalar_dest_relative()) { uses_register_dynamic_addressing_ = true; @@ -329,6 +356,11 @@ void ShaderTranslator::GatherInstructionInformation( } } } break; + case ControlFlowOpcode::kAlloc: + if (cf.alloc.alloc_type() == AllocType::kMemory) { + ++memexport_alloc_count_; + } + break; default: break; } diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index b79bf2fac..9aacba3bb 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -66,6 +66,15 @@ class ShaderTranslator { const std::vector& texture_bindings() const { return texture_bindings_; } + + // Based on the number of AS_VS/PS_EXPORT_STREAM_* enum sets found in a game + // .pdb. + static constexpr uint32_t kMaxMemExports = 16; + // Bits indicating which eM# registers have been written to after each + // `alloc export`, for up to kMaxMemExports exports. This will contain zero + // for certain corrupt exports - that don't write to eA before writing to eM#, + // or if the write was done any way other than MAD with a stream constant. + const uint8_t* memexport_eM_written() const { return memexport_eM_written_; } // All c# registers used as the addend in MAD operations to eA, populated // before translation occurs. const std::set& memexport_stream_constants() const { @@ -236,6 +245,12 @@ class ShaderTranslator { bool uses_register_dynamic_addressing_ = false; bool writes_color_targets_[4] = {false, false, false, false}; bool writes_depth_ = false; + + uint32_t memexport_alloc_count_ = 0; + // For register allocation in implementations - what was used after each + // `alloc export`. + uint32_t memexport_eA_written_ = 0; + uint8_t memexport_eM_written_[kMaxMemExports] = {0}; std::set memexport_stream_constants_; static const AluOpcodeInfo alu_vector_opcode_infos_[0x20];