VertexLoader: Convert count register to remaining register
This more accurately represents what's going on, and also ends at 0 instead of 1, making some indexing operations easier. This also changes it so that position_matrix_index_cache actually starts from index 0 instead of index 1.
This commit is contained in:
parent
97d0ff58c8
commit
39b2854b98
|
@ -22,8 +22,8 @@ u8* g_vertex_manager_write_ptr;
|
||||||
static void PosMtx_ReadDirect_UByte(VertexLoader* loader)
|
static void PosMtx_ReadDirect_UByte(VertexLoader* loader)
|
||||||
{
|
{
|
||||||
u32 posmtx = DataRead<u8>() & 0x3f;
|
u32 posmtx = DataRead<u8>() & 0x3f;
|
||||||
if (loader->m_counter < 3)
|
if (loader->m_remaining < 3)
|
||||||
VertexLoaderManager::position_matrix_index_cache[loader->m_counter + 1] = posmtx;
|
VertexLoaderManager::position_matrix_index_cache[loader->m_remaining] = posmtx;
|
||||||
DataWrite<u32>(posmtx);
|
DataWrite<u32>(posmtx);
|
||||||
PRIM_LOG("posmtx: {}, ", posmtx);
|
PRIM_LOG("posmtx: {}, ", posmtx);
|
||||||
}
|
}
|
||||||
|
@ -257,7 +257,7 @@ int VertexLoader::RunVertices(DataReader src, DataReader dst, int count)
|
||||||
m_numLoadedVertices += count;
|
m_numLoadedVertices += count;
|
||||||
m_skippedVertices = 0;
|
m_skippedVertices = 0;
|
||||||
|
|
||||||
for (m_counter = count - 1; m_counter >= 0; m_counter--)
|
for (m_remaining = count - 1; m_remaining >= 0; m_remaining--)
|
||||||
{
|
{
|
||||||
m_tcIndex = 0;
|
m_tcIndex = 0;
|
||||||
m_colIndex = 0;
|
m_colIndex = 0;
|
||||||
|
|
|
@ -35,7 +35,7 @@ public:
|
||||||
int m_texmtxread;
|
int m_texmtxread;
|
||||||
bool m_vertexSkip;
|
bool m_vertexSkip;
|
||||||
int m_skippedVertices;
|
int m_skippedVertices;
|
||||||
int m_counter;
|
int m_remaining;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Pipeline.
|
// Pipeline.
|
||||||
|
|
|
@ -14,7 +14,7 @@ using namespace Arm64Gen;
|
||||||
|
|
||||||
constexpr ARM64Reg src_reg = ARM64Reg::X0;
|
constexpr ARM64Reg src_reg = ARM64Reg::X0;
|
||||||
constexpr ARM64Reg dst_reg = ARM64Reg::X1;
|
constexpr ARM64Reg dst_reg = ARM64Reg::X1;
|
||||||
constexpr ARM64Reg count_reg = ARM64Reg::W2;
|
constexpr ARM64Reg remaining_reg = ARM64Reg::W2;
|
||||||
constexpr ARM64Reg skipped_reg = ARM64Reg::W17;
|
constexpr ARM64Reg skipped_reg = ARM64Reg::W17;
|
||||||
constexpr ARM64Reg scratch1_reg = ARM64Reg::W16;
|
constexpr ARM64Reg scratch1_reg = ARM64Reg::W16;
|
||||||
constexpr ARM64Reg scratch2_reg = ARM64Reg::W15;
|
constexpr ARM64Reg scratch2_reg = ARM64Reg::W15;
|
||||||
|
@ -209,13 +209,10 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm
|
||||||
// Z-Freeze
|
// Z-Freeze
|
||||||
if (native_format == &m_native_vtx_decl.position)
|
if (native_format == &m_native_vtx_decl.position)
|
||||||
{
|
{
|
||||||
CMP(count_reg, 3);
|
CMP(remaining_reg, 3);
|
||||||
FixupBranch dont_store = B(CC_GT);
|
FixupBranch dont_store = B(CC_GE);
|
||||||
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_cache.data());
|
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_cache.data());
|
||||||
ADD(EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg), EncodeRegTo64(count_reg),
|
m_float_emit.STR(128, coords, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true));
|
||||||
ArithOption(EncodeRegTo64(count_reg), ShiftType::LSL, 4));
|
|
||||||
m_float_emit.STUR(write_size, coords, EncodeRegTo64(scratch1_reg),
|
|
||||||
-int(sizeof(decltype(VertexLoaderManager::position_cache[0]))));
|
|
||||||
SetJumpTarget(dont_store);
|
SetJumpTarget(dont_store);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -404,7 +401,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
|
||||||
AlignCode16();
|
AlignCode16();
|
||||||
if (IsIndexed(m_VtxDesc.low.Position))
|
if (IsIndexed(m_VtxDesc.low.Position))
|
||||||
MOV(skipped_reg, ARM64Reg::WZR);
|
MOV(skipped_reg, ARM64Reg::WZR);
|
||||||
MOV(saved_count, count_reg);
|
ADD(saved_count, remaining_reg, 1);
|
||||||
|
|
||||||
MOVP2R(stride_reg, g_main_cp_state.array_strides.data());
|
MOVP2R(stride_reg, g_main_cp_state.array_strides.data());
|
||||||
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data());
|
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data());
|
||||||
|
@ -421,10 +418,10 @@ void VertexLoaderARM64::GenerateVertexLoader()
|
||||||
STR(IndexType::Unsigned, scratch1_reg, dst_reg, m_dst_ofs);
|
STR(IndexType::Unsigned, scratch1_reg, dst_reg, m_dst_ofs);
|
||||||
|
|
||||||
// Z-Freeze
|
// Z-Freeze
|
||||||
CMP(count_reg, 3);
|
CMP(remaining_reg, 3);
|
||||||
FixupBranch dont_store = B(CC_GT);
|
FixupBranch dont_store = B(CC_GE);
|
||||||
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_matrix_index_cache.data());
|
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_matrix_index_cache.data());
|
||||||
STR(scratch1_reg, EncodeRegTo64(scratch2_reg), ArithOption(count_reg, true));
|
STR(scratch1_reg, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true));
|
||||||
SetJumpTarget(dont_store);
|
SetJumpTarget(dont_store);
|
||||||
|
|
||||||
m_native_vtx_decl.posmtx.components = 4;
|
m_native_vtx_decl.posmtx.components = 4;
|
||||||
|
@ -584,8 +581,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
|
||||||
const u8* cont = GetCodePtr();
|
const u8* cont = GetCodePtr();
|
||||||
ADD(src_reg, src_reg, m_src_ofs);
|
ADD(src_reg, src_reg, m_src_ofs);
|
||||||
|
|
||||||
SUB(count_reg, count_reg, 1);
|
SUBS(remaining_reg, remaining_reg, 1);
|
||||||
CBNZ(count_reg, loop_start);
|
B(CCFlags::CC_GE, loop_start);
|
||||||
|
|
||||||
if (IsIndexed(m_VtxDesc.low.Position))
|
if (IsIndexed(m_VtxDesc.low.Position))
|
||||||
{
|
{
|
||||||
|
@ -612,5 +609,5 @@ int VertexLoaderARM64::RunVertices(DataReader src, DataReader dst, int count)
|
||||||
{
|
{
|
||||||
m_numLoadedVertices += count;
|
m_numLoadedVertices += count;
|
||||||
return ((int (*)(u8 * src, u8 * dst, int count)) region)(src.GetPointer(), dst.GetPointer(),
|
return ((int (*)(u8 * src, u8 * dst, int count)) region)(src.GetPointer(), dst.GetPointer(),
|
||||||
count);
|
count - 1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,10 +32,9 @@
|
||||||
namespace VertexLoaderManager
|
namespace VertexLoaderManager
|
||||||
{
|
{
|
||||||
// Used by zfreeze
|
// Used by zfreeze
|
||||||
std::array<std::array<float, 4>, 3> position_cache;
|
std::array<u32, 3> position_matrix_index_cache;
|
||||||
// The counter added to the address of the array is 1, 2, or 3, but never zero.
|
// 3 vertices, 4 floats each to allow SIMD overwrite
|
||||||
// So only index 1 - 3 are used.
|
alignas(sizeof(std::array<float, 4>)) std::array<std::array<float, 4>, 3> position_cache;
|
||||||
std::array<u32, 4> position_matrix_index_cache;
|
|
||||||
|
|
||||||
static NativeVertexFormatMap s_native_vertex_map;
|
static NativeVertexFormatMap s_native_vertex_map;
|
||||||
static NativeVertexFormat* s_current_vtx_fmt;
|
static NativeVertexFormat* s_current_vtx_fmt;
|
||||||
|
@ -251,8 +250,9 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
|
||||||
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
|
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
|
||||||
bool is_preprocess)
|
bool is_preprocess)
|
||||||
{
|
{
|
||||||
if (!count)
|
if (count == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
ASSERT(count > 0);
|
||||||
|
|
||||||
VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);
|
VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,7 @@ void UpdateVertexArrayPointers();
|
||||||
// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
|
// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
|
||||||
// These arrays are in reverse order.
|
// These arrays are in reverse order.
|
||||||
extern std::array<std::array<float, 4>, 3> position_cache;
|
extern std::array<std::array<float, 4>, 3> position_cache;
|
||||||
extern std::array<u32, 4> position_matrix_index_cache;
|
extern std::array<u32, 3> position_matrix_index_cache;
|
||||||
|
|
||||||
// VB_HAS_X. Bitmask telling what vertex components are present.
|
// VB_HAS_X. Bitmask telling what vertex components are present.
|
||||||
extern u32 g_current_components;
|
extern u32 g_current_components;
|
||||||
|
|
|
@ -26,7 +26,9 @@ static const X64Reg dst_reg = ABI_PARAM2;
|
||||||
static const X64Reg scratch1 = RAX;
|
static const X64Reg scratch1 = RAX;
|
||||||
static const X64Reg scratch2 = ABI_PARAM3;
|
static const X64Reg scratch2 = ABI_PARAM3;
|
||||||
static const X64Reg scratch3 = ABI_PARAM4;
|
static const X64Reg scratch3 = ABI_PARAM4;
|
||||||
static const X64Reg count_reg = R10;
|
// The remaining number of vertices to be processed. Starts at count - 1, and the final loop has it
|
||||||
|
// at 0.
|
||||||
|
static const X64Reg remaining_reg = R10;
|
||||||
static const X64Reg skipped_reg = R11;
|
static const X64Reg skipped_reg = R11;
|
||||||
static const X64Reg base_reg = RBX;
|
static const X64Reg base_reg = RBX;
|
||||||
|
|
||||||
|
@ -117,10 +119,11 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
|
||||||
const auto write_zfreeze = [&]() { // zfreeze
|
const auto write_zfreeze = [&]() { // zfreeze
|
||||||
if (native_format == &m_native_vtx_decl.position)
|
if (native_format == &m_native_vtx_decl.position)
|
||||||
{
|
{
|
||||||
CMP(32, R(count_reg), Imm8(3));
|
CMP(32, R(remaining_reg), Imm8(3));
|
||||||
FixupBranch dont_store = J_CC(CC_A);
|
FixupBranch dont_store = J_CC(CC_AE);
|
||||||
LEA(32, scratch3,
|
// The position cache is composed of 3 rows of 4 floats each; since each float is 4 bytes,
|
||||||
MScaled(count_reg, SCALE_4, -int(VertexLoaderManager::position_cache[0].size())));
|
// we need to scale by 4 twice to cover the 4 floats.
|
||||||
|
LEA(32, scratch3, MScaled(remaining_reg, SCALE_4, 0));
|
||||||
MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords);
|
MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords);
|
||||||
SetJumpTarget(dont_store);
|
SetJumpTarget(dont_store);
|
||||||
}
|
}
|
||||||
|
@ -381,7 +384,7 @@ void VertexLoaderX64::ReadColor(OpArg data, VertexComponentFormat attribute, Col
|
||||||
void VertexLoaderX64::GenerateVertexLoader()
|
void VertexLoaderX64::GenerateVertexLoader()
|
||||||
{
|
{
|
||||||
BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2,
|
BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2,
|
||||||
scratch3, count_reg, skipped_reg, base_reg};
|
scratch3, remaining_reg, skipped_reg, base_reg};
|
||||||
regs &= ABI_ALL_CALLEE_SAVED;
|
regs &= ABI_ALL_CALLEE_SAVED;
|
||||||
ABI_PushRegistersAndAdjustStack(regs, 0);
|
ABI_PushRegistersAndAdjustStack(regs, 0);
|
||||||
|
|
||||||
|
@ -389,7 +392,9 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
PUSH(32, R(ABI_PARAM3));
|
PUSH(32, R(ABI_PARAM3));
|
||||||
|
|
||||||
// ABI_PARAM3 is one of the lower registers, so free it for scratch2.
|
// ABI_PARAM3 is one of the lower registers, so free it for scratch2.
|
||||||
MOV(32, R(count_reg), R(ABI_PARAM3));
|
// We also have it end at a value of 0, to simplify indexing for zfreeze;
|
||||||
|
// this requires subtracting 1 at the start.
|
||||||
|
LEA(32, remaining_reg, MDisp(ABI_PARAM3, -1));
|
||||||
|
|
||||||
MOV(64, R(base_reg), R(ABI_PARAM4));
|
MOV(64, R(base_reg), R(ABI_PARAM4));
|
||||||
|
|
||||||
|
@ -407,9 +412,9 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
|
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
|
||||||
|
|
||||||
// zfreeze
|
// zfreeze
|
||||||
CMP(32, R(count_reg), Imm8(3));
|
CMP(32, R(remaining_reg), Imm8(3));
|
||||||
FixupBranch dont_store = J_CC(CC_A);
|
FixupBranch dont_store = J_CC(CC_AE);
|
||||||
MOV(32, MPIC(VertexLoaderManager::position_matrix_index_cache.data(), count_reg, SCALE_4),
|
MOV(32, MPIC(VertexLoaderManager::position_matrix_index_cache.data(), remaining_reg, SCALE_4),
|
||||||
R(scratch1));
|
R(scratch1));
|
||||||
SetJumpTarget(dont_store);
|
SetJumpTarget(dont_store);
|
||||||
|
|
||||||
|
@ -509,8 +514,8 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
const u8* cont = GetCodePtr();
|
const u8* cont = GetCodePtr();
|
||||||
ADD(64, R(src_reg), Imm32(m_src_ofs));
|
ADD(64, R(src_reg), Imm32(m_src_ofs));
|
||||||
|
|
||||||
SUB(32, R(count_reg), Imm8(1));
|
SUB(32, R(remaining_reg), Imm8(1));
|
||||||
J_CC(CC_NZ, loop_start);
|
J_CC(CC_AE, loop_start);
|
||||||
|
|
||||||
// Get the original count.
|
// Get the original count.
|
||||||
POP(32, R(ABI_RETURN));
|
POP(32, R(ABI_RETURN));
|
||||||
|
|
|
@ -41,8 +41,8 @@ void Pos_ReadDirect(VertexLoader* loader)
|
||||||
for (int i = 0; i < N; ++i)
|
for (int i = 0; i < N; ++i)
|
||||||
{
|
{
|
||||||
const float value = PosScale(src.Read<T>(), scale);
|
const float value = PosScale(src.Read<T>(), scale);
|
||||||
if (loader->m_counter < 3)
|
if (loader->m_remaining < 3)
|
||||||
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
|
VertexLoaderManager::position_cache[loader->m_remaining][i] = value;
|
||||||
dst.Write(value);
|
dst.Write(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,8 +68,8 @@ void Pos_ReadIndex(VertexLoader* loader)
|
||||||
for (int i = 0; i < N; ++i)
|
for (int i = 0; i < N; ++i)
|
||||||
{
|
{
|
||||||
const float value = PosScale(Common::FromBigEndian(data[i]), scale);
|
const float value = PosScale(Common::FromBigEndian(data[i]), scale);
|
||||||
if (loader->m_counter < 3)
|
if (loader->m_remaining < 3)
|
||||||
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
|
VertexLoaderManager::position_cache[loader->m_remaining][i] = value;
|
||||||
dst.Write(value);
|
dst.Write(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -558,7 +558,7 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
|
||||||
{
|
{
|
||||||
// If this vertex format has per-vertex position matrix IDs, look it up.
|
// If this vertex format has per-vertex position matrix IDs, look it up.
|
||||||
if (vert_decl.posmtx.enable)
|
if (vert_decl.posmtx.enable)
|
||||||
mtxIdx = VertexLoaderManager::position_matrix_index_cache[3 - i];
|
mtxIdx = VertexLoaderManager::position_matrix_index_cache[2 - i];
|
||||||
|
|
||||||
if (vert_decl.position.components == 2)
|
if (vert_decl.position.components == 2)
|
||||||
VertexLoaderManager::position_cache[2 - i][2] = 0;
|
VertexLoaderManager::position_cache[2 - i][2] = 0;
|
||||||
|
|
Loading…
Reference in New Issue