Smaller ComputeMemoryAddress/Offset sequence

Replace a movzx after setae in both ComputeMemoryAddressOffset and ComputeMemoryAddress with a xor_ of eax prior to the cmp. This reduces the length in bytes of both sequences by 1, and should be a moderate ICache usage reduction thanks to the frequency of these sequences.
2020-01-17 07:28:36 -08:00 · 2020-01-17 07:28:36 -08:00 · 9dfbef8acf
parent 166be463be
commit 9dfbef8acf
1 changed files with 2 additions and 2 deletions
--- a/src/xenia/cpu/backend/x64/x64_seq_memory.cc
+++ b/src/xenia/cpu/backend/x64/x64_seq_memory.cc
@ -52,9 +52,9 @@ RegExp ComputeMemoryAddressOffset(X64Emitter& e, const T& guest,
    if (xe::memory::allocation_granularity() > 0x1000) {
      // Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
      // it via memory mapping.
+      e.xor_(e.eax, e.eax);
      e.cmp(guest.reg().cvt32(), 0xE0000000 - offset_const);
      e.setae(e.al);
-      e.movzx(e.eax, e.al);
      e.shl(e.eax, 12);
      e.add(e.eax, guest.reg().cvt32());
    } else {
@ -89,9 +89,9 @@ RegExp ComputeMemoryAddress(X64Emitter& e, const T& guest) {
    if (xe::memory::allocation_granularity() > 0x1000) {
      // Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
      // it via memory mapping.
+      e.xor_(e.eax, e.eax);
      e.cmp(guest.reg().cvt32(), 0xE0000000);
      e.setae(e.al);
-      e.movzx(e.eax, e.al);
      e.shl(e.eax, 12);
      e.add(e.eax, guest.reg().cvt32());
    } else {