diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index cf321baa60..ebd0026237 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -47,17 +47,36 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at void VertexLoaderARM64::GetVertexAddr(int array, u64 attribute, ARM64Reg reg) { - ADD(reg, src_reg, m_src_ofs); if (attribute & MASK_INDEXED) { if (attribute == INDEX8) { - LDRB(INDEX_UNSIGNED, scratch1_reg, reg, 0); + if (m_src_ofs < 4096) + { + LDRB(INDEX_UNSIGNED, scratch1_reg, src_reg, m_src_ofs); + } + else + { + ADD(reg, src_reg, m_src_ofs); + LDRB(INDEX_UNSIGNED, scratch1_reg, reg, 0); + } m_src_ofs += 1; } else { - LDRH(INDEX_UNSIGNED, scratch1_reg, reg, 0); + if (m_src_ofs < 256) + { + LDURH(scratch1_reg, src_reg, m_src_ofs); + } + else if (m_src_ofs <= 8190 && !(m_src_ofs & 1)) + { + LDRH(INDEX_UNSIGNED, scratch1_reg, src_reg, m_src_ofs); + } + else + { + ADD(reg, src_reg, m_src_ofs); + LDRH(INDEX_UNSIGNED, scratch1_reg, reg, 0); + } m_src_ofs += 2; REV16(scratch1_reg, scratch1_reg); } @@ -74,6 +93,8 @@ void VertexLoaderARM64::GetVertexAddr(int array, u64 attribute, ARM64Reg reg) LDR(INDEX_UNSIGNED, EncodeRegTo64(scratch2_reg), arraybase_reg, array * 8); ADD(EncodeRegTo64(reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg)); } + else + ADD(reg, src_reg, m_src_ofs); } s32 VertexLoaderARM64::GetAddressImm(int array, u64 attribute, Arm64Gen::ARM64Reg reg, u32 align) @@ -171,8 +192,7 @@ int VertexLoaderARM64::ReadVertex(u64 attribute, int format, int count_in, int c CMP(count_reg, 3); FixupBranch dont_store = B(CC_GT); MOVI2R(EncodeRegTo64(scratch2_reg), (u64)VertexLoaderManager::position_cache); - ORR(scratch1_reg, WSP, count_reg, ArithOption(count_reg, ST_LSL, 4)); - ADD(EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg)); + ADD(EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg), EncodeRegTo64(count_reg), ArithOption(EncodeRegTo64(count_reg), ST_LSL, 4)); m_float_emit.STUR(write_size, coords, EncodeRegTo64(scratch1_reg), -16); SetJumpTarget(dont_store); } @@ -347,12 +367,33 @@ void VertexLoaderARM64::GenerateVertexLoader() // We can touch all except v8-v15 // If we need to use those, we need to retain the lower 64bits(!) of the register - MOV(skipped_reg, WSP); + const u64 tc[8] = { + m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord, + m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord, + }; + + bool has_tc = false; + bool has_tc_scale = false; + for (int i = 0; i < 8; i++) + { + has_tc |= tc[i]; + has_tc_scale |= !!m_VtxAttr.texCoord[i].Frac; + } + + bool need_scale = (m_VtxAttr.ByteDequant && m_VtxAttr.PosFrac) || + (has_tc && has_tc_scale) || + m_VtxDesc.Normal; + + AlignCode16(); + if (m_VtxDesc.Position & MASK_INDEXED) + MOV(skipped_reg, WZR); MOV(saved_count, count_reg); MOVI2R(stride_reg, (u64)&g_main_cp_state.array_strides); MOVI2R(arraybase_reg, (u64)&VertexLoaderManager::cached_arraybases); - MOVI2R(scale_reg, (u64)&scale_factors); + + if (need_scale) + MOVI2R(scale_reg, (u64)&scale_factors); const u8* loop_start = GetCodePtr(); @@ -465,10 +506,7 @@ void VertexLoaderARM64::GenerateVertexLoader() } } - const u64 tc[8] = { - m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord, - m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord, - }; + for (int i = 0; i < 8; i++) {