From 9b5a6907065639350cb8095ba97d4d08047ff3d6 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 21 May 2024 10:00:57 -0700 Subject: [PATCH] [a64] Optimize `OPCODE_MEMSET` Use pair-stores rather than singular-stores to write 32-bytes of data at a time. --- src/xenia/cpu/backend/a64/a64_seq_memory.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/xenia/cpu/backend/a64/a64_seq_memory.cc b/src/xenia/cpu/backend/a64/a64_seq_memory.cc index 8b66c1c68..dc9cfca7e 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_memory.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_memory.cc @@ -1126,22 +1126,17 @@ struct MEMSET_I64_I8_I64 assert_true(i.src2.is_constant); assert_true(i.src3.is_constant); assert_true(i.src2.constant() == 0); - e.EOR(Q0.B16(), Q0.B16(), Q0.B16()); + e.MOVI(Q0.B16(), 0); auto addr_reg = ComputeMemoryAddress(e, i.src1); switch (i.src3.constant()) { case 32: - e.STR(Q0, addr_reg, 0 * 16); - e.STR(Q0, addr_reg, 1 * 16); + e.STP(Q0, Q0, addr_reg, 0 * 16); break; case 128: - e.STR(Q0, addr_reg, 0 * 16); - e.STR(Q0, addr_reg, 1 * 16); - e.STR(Q0, addr_reg, 2 * 16); - e.STR(Q0, addr_reg, 3 * 16); - e.STR(Q0, addr_reg, 4 * 16); - e.STR(Q0, addr_reg, 5 * 16); - e.STR(Q0, addr_reg, 6 * 16); - e.STR(Q0, addr_reg, 7 * 16); + e.STP(Q0, Q0, addr_reg, 0 * 16); + e.STP(Q0, Q0, addr_reg, 2 * 16); + e.STP(Q0, Q0, addr_reg, 4 * 16); + e.STP(Q0, Q0, addr_reg, 6 * 16); break; default: assert_unhandled_case(i.src3.constant());