Merge pull request #10162 from JosJuice/jitarm64-memcheck

JitArm64: Implement memcheck
This commit is contained in:
JMC47 2021-11-20 23:36:43 -05:00 committed by GitHub
commit d2ca1037d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 316 additions and 190 deletions

View File

@ -498,24 +498,41 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always
B(dispatcher);
}
void JitArm64::WriteConditionalExceptionExit(int exception)
void JitArm64::WriteConditionalExceptionExit(int exception, u64 increment_sp_on_exit)
{
ARM64Reg WA = gpr.GetReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch noException = TBZ(WA, IntLog2(exception));
WriteConditionalExceptionExit(exception, WA, Arm64Gen::ARM64Reg::INVALID_REG,
increment_sp_on_exit);
gpr.Unlock(WA);
}
FixupBranch handleException = B();
SwitchToFarCode();
SetJumpTarget(handleException);
void JitArm64::WriteConditionalExceptionExit(int exception, ARM64Reg temp_gpr, ARM64Reg temp_fpr,
u64 increment_sp_on_exit)
{
LDR(IndexType::Unsigned, temp_gpr, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch no_exception = TBZ(temp_gpr, IntLog2(exception));
gpr.Flush(FlushMode::MaintainState, WA);
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
const bool switch_to_far_code = !IsInFarCode();
if (switch_to_far_code)
{
FixupBranch handle_exception = B();
SwitchToFarCode();
SetJumpTarget(handle_exception);
}
if (increment_sp_on_exit != 0)
ADDI2R(ARM64Reg::SP, ARM64Reg::SP, increment_sp_on_exit, temp_gpr);
gpr.Flush(FlushMode::MaintainState, temp_gpr);
fpr.Flush(FlushMode::MaintainState, temp_fpr);
WriteExceptionExit(js.compilerPC, false, true);
SwitchToNearCode();
SetJumpTarget(noException);
gpr.Unlock(WA);
if (switch_to_far_code)
SwitchToNearCode();
SetJumpTarget(no_exception);
}
bool JitArm64::HandleFunctionHooking(u32 address)

View File

@ -221,7 +221,7 @@ protected:
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
// Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, bool update);
// If lookup succeeds, writes upper 15 bits of physical address to addr_out. If not,
// jumps to the returned FixupBranch. Clobbers tmp and the 17 lower bits of addr_out.
Arm64Gen::FixupBranch BATAddressLookup(Arm64Gen::ARM64Reg addr_out, Arm64Gen::ARM64Reg addr_in,
@ -265,7 +265,10 @@ protected:
bool always_exception = false);
void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false,
bool always_exception = false);
void WriteConditionalExceptionExit(int exception);
void WriteConditionalExceptionExit(int exception, u64 increment_sp_on_exit = 0);
void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_gpr,
Arm64Gen::ARM64Reg temp_fpr = Arm64Gen::ARM64Reg::INVALID_REG,
u64 increment_sp_on_exit = 0);
void FakeLKExit(u32 exit_address_after_return);
void WriteBLRExit(Arm64Gen::ARM64Reg dest);

View File

@ -6,6 +6,7 @@
#include <optional>
#include <string>
#include "Common/Align.h"
#include "Common/BitSet.h"
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
@ -15,6 +16,7 @@
#include "Common/Swap.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/Jit_Util.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
@ -56,6 +58,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
ARM64Reg addr, BitSet32 gprs_to_push, BitSet32 fprs_to_push,
bool emitting_routine)
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
bool in_far_code = false;
const u8* fastmem_start = GetCodePtr();
std::optional<FixupBranch> slowmem_fixup;
@ -75,11 +79,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
ARM64Reg temp = ARM64Reg::D0;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
m_float_emit.STR(BackPatchInfo::GetFlagSize(flags), temp, MEM_REG, addr);
m_float_emit.STR(access_size, temp, MEM_REG, addr);
}
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
{
m_float_emit.LDR(BackPatchInfo::GetFlagSize(flags), EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr);
ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
true, false);
@ -120,6 +124,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
if (!fastmem || do_farcode)
{
const bool memcheck = jo.memcheck && !emitting_routine;
if (fastmem && do_farcode)
{
in_far_code = true;
@ -136,12 +142,28 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
if (slowmem_fixup)
SetJumpTarget(*slowmem_fixup);
ABI_PushRegisters(gprs_to_push);
const ARM64Reg temp_gpr = flags & BackPatchInfo::FLAG_LOAD ? ARM64Reg::W30 : ARM64Reg::W0;
const int temp_gpr_index = DecodeReg(temp_gpr);
BitSet32 gprs_to_push_early = {};
if (memcheck)
gprs_to_push_early[temp_gpr_index] = true;
if (flags & BackPatchInfo::FLAG_LOAD)
gprs_to_push_early[0] = true;
// If we're already pushing one register in the first PushRegisters call, we can push a
// second one for free. Let's do so, since it might save one instruction in the second
// PushRegisters call. (Do not do this for caller-saved registers which may be in the register
// cache, or else EmitMemcheck will not be able to flush the register cache correctly!)
if (gprs_to_push & gprs_to_push_early)
gprs_to_push_early[30] = true;
ABI_PushRegisters(gprs_to_push & gprs_to_push_early);
ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early);
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
if (flags & BackPatchInfo::FLAG_STORE)
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
ARM64Reg src_reg = RS;
const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0;
@ -185,8 +207,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
}
else
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
if (access_size == 64)
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U64);
else if (access_size == 32)
@ -197,7 +217,21 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U8);
BLR(ARM64Reg::X8);
}
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
ABI_PopRegisters(gprs_to_push & ~gprs_to_push_early);
if (memcheck)
{
const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0;
const u64 early_push_size = Common::AlignUp(gprs_to_push_early.Count(), 2) * 8;
WriteConditionalExceptionExit(EXCEPTION_DSI, temp_gpr, temp_fpr, early_push_size);
}
if (flags & BackPatchInfo::FLAG_LOAD)
{
ARM64Reg src_reg = access_size == 64 ? ARM64Reg::X0 : ARM64Reg::W0;
if (flags & BackPatchInfo::FLAG_PAIR)
@ -221,8 +255,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
ByteswapAfterLoad(this, &m_float_emit, RS, src_reg, flags, false, false);
}
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
ABI_PopRegisters(gprs_to_push);
ABI_PopRegisters(gprs_to_push & gprs_to_push_early);
}
if (in_far_code)

View File

@ -26,7 +26,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest);
ARM64Reg up_reg = ARM64Reg::INVALID_REG;
ARM64Reg off_reg = ARM64Reg::INVALID_REG;
@ -101,19 +101,26 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
bool addr_reg_set = !is_immediate;
const auto set_addr_reg_if_needed = [&] {
if (!addr_reg_set)
MOVI2R(XA, imm_addr);
};
if (update)
const bool early_update = !jo.memcheck && dest != static_cast<u32>(addr);
if (update && early_update)
{
gpr.BindToRegister(addr, false);
set_addr_reg_if_needed();
MOV(gpr.R(addr), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(dest_reg)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -122,6 +129,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0));
}
else if (mmio_address)
@ -131,13 +139,25 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use);
}
gpr.BindToRegister(dest, false, true);
ASSERT(dest_reg == gpr.R(dest));
if (update && !early_update)
{
gpr.BindToRegister(addr, false);
set_addr_reg_if_needed();
MOV(gpr.R(addr), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
}
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset)
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset,
bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
@ -152,11 +172,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
ARM64Reg addr_reg = ARM64Reg::W1;
u32 imm_addr = 0;
@ -222,6 +237,26 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
ARM64Reg XA = EncodeRegTo64(addr_reg);
bool addr_reg_set = !is_immediate;
const auto set_addr_reg_if_needed = [&] {
if (!addr_reg_set)
MOVI2R(XA, imm_addr);
};
const bool early_update = !jo.memcheck && value != static_cast<u32>(dest);
if (update && early_update)
{
gpr.BindToRegister(dest, false);
set_addr_reg_if_needed();
MOV(gpr.R(dest), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
if (is_immediate)
@ -255,7 +290,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
MOVI2R(XA, imm_addr);
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0));
}
else if (mmio_address)
@ -265,12 +300,17 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
else
{
if (is_immediate)
MOVI2R(XA, imm_addr);
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use);
}
if (update && !early_update)
{
gpr.BindToRegister(dest, false);
set_addr_reg_if_needed();
MOV(gpr.R(dest), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
}
@ -306,7 +346,6 @@ void JitArm64::lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(jo.memcheck);
u32 a = inst.RA, b = inst.RB, d = inst.RD;
s32 offset = inst.SIMM_16;
@ -385,7 +424,6 @@ void JitArm64::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(jo.memcheck);
u32 a = inst.RA, b = inst.RB, s = inst.RS;
s32 offset = inst.SIMM_16;
@ -444,122 +482,104 @@ void JitArm64::stX(UGeckoInstruction inst)
break;
}
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset);
if (update)
{
gpr.BindToRegister(a, false);
ARM64Reg WA = gpr.GetReg();
ARM64Reg RB = {};
ARM64Reg RA = gpr.R(a);
if (regOffset != -1)
RB = gpr.R(regOffset);
if (regOffset == -1)
{
ADDI2R(RA, RA, offset, WA);
}
else
{
ADD(RA, RA, RB);
}
gpr.Unlock(WA);
}
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset, update);
}
void JitArm64::lmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(!jo.fastmem || jo.memcheck);
u32 a = inst.RA;
u32 a = inst.RA, d = inst.RD;
s32 offset = inst.SIMM_16;
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
// MMU games make use of a >= d despite this being invalid according to the PEM.
// Because of this, make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W0;
if (a)
{
ADDI2R(WA, gpr.R(a), inst.SIMM_16, WA);
ADD(XA, XA, MEM_REG);
if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{
ADDI2R(XA, MEM_REG, (u32)(s32)(s16)inst.SIMM_16, XA);
MOVI2R(addr_reg, offset);
}
for (int i = inst.RD; i < 32; i++)
// TODO: This doesn't handle rollback on DSI correctly
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_32;
for (u32 i = d; i < 32; i++)
{
int remaining = 32 - i;
if (remaining >= 4)
{
gpr.BindToRegister(i + 3, false);
gpr.BindToRegister(i + 2, false);
gpr.BindToRegister(i + 1, false);
gpr.BindToRegister(i, false);
ARM64Reg RX4 = gpr.R(i + 3);
ARM64Reg RX3 = gpr.R(i + 2);
ARM64Reg RX2 = gpr.R(i + 1);
ARM64Reg RX1 = gpr.R(i);
LDP(IndexType::Post, EncodeRegTo64(RX1), EncodeRegTo64(RX3), XA, 16);
REV32(EncodeRegTo64(RX1), EncodeRegTo64(RX1));
REV32(EncodeRegTo64(RX3), EncodeRegTo64(RX3));
LSR(EncodeRegTo64(RX2), EncodeRegTo64(RX1), 32);
LSR(EncodeRegTo64(RX4), EncodeRegTo64(RX3), 32);
i += 3;
}
else if (remaining >= 2)
{
gpr.BindToRegister(i + 1, false);
gpr.BindToRegister(i, false);
ARM64Reg RX2 = gpr.R(i + 1);
ARM64Reg RX1 = gpr.R(i);
LDP(IndexType::Post, RX1, RX2, XA, 8);
REV32(RX1, RX1);
REV32(RX2, RX2);
++i;
}
else
{
gpr.BindToRegister(i, false);
ARM64Reg RX = gpr.R(i);
LDR(IndexType::Post, RX, XA, 4);
REV32(RX, RX);
}
gpr.BindToRegister(i, false, false);
ARM64Reg dest_reg = gpr.R(i);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, EncodeRegTo64(addr_reg),
regs_in_use, fprs_in_use);
gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i));
if (i != 31)
ADD(addr_reg, addr_reg, 4);
}
gpr.Unlock(WA);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
}
void JitArm64::stmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(!jo.fastmem || jo.memcheck);
u32 a = inst.RA;
u32 a = inst.RA, s = inst.RS;
s32 offset = inst.SIMM_16;
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg WB = gpr.GetReg();
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
ARM64Reg addr_reg = ARM64Reg::W1;
if (a)
{
ADDI2R(WA, gpr.R(a), inst.SIMM_16, WA);
ADD(XA, XA, MEM_REG);
if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{
ADDI2R(XA, MEM_REG, (u32)(s32)(s16)inst.SIMM_16, XA);
MOVI2R(addr_reg, offset);
}
for (int i = inst.RD; i < 32; i++)
// TODO: This doesn't handle rollback on DSI correctly
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_32;
for (u32 i = s; i < 32; i++)
{
ARM64Reg RX = gpr.R(i);
REV32(WB, RX);
STR(IndexType::Unsigned, WB, XA, (i - inst.RD) * 4);
ARM64Reg src_reg = gpr.R(i);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, src_reg, EncodeRegTo64(addr_reg),
regs_in_use, fprs_in_use);
if (i != 31)
ADD(addr_reg, addr_reg, 4);
}
gpr.Unlock(WA, WB);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
}
void JitArm64::dcbx(UGeckoInstruction inst)
@ -743,7 +763,6 @@ void JitArm64::dcbz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
FALLBACK_IF(jo.memcheck || !jo.fastmem_arena);
FALLBACK_IF(SConfig::GetInstance().bLowDCBZHack);
int a = inst.RA, b = inst.RB;
@ -796,7 +815,7 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, ARM64Reg::W0,
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, jo.fastmem, jo.fastmem, ARM64Reg::W0,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);

View File

@ -21,7 +21,6 @@ void JitArm64::lfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
FALLBACK_IF(jo.memcheck);
u32 a = inst.RA, b = inst.RB;
@ -80,7 +79,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
const ARM64Reg VD = fpr.RW(inst.FD, type);
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W0;
if (update)
@ -155,7 +154,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
if (is_immediate)
MOVI2R(XA, imm_addr);
if (update)
const bool early_update = !jo.memcheck;
if (update && early_update)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), addr_reg);
@ -163,9 +163,11 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
fprs_in_use[DecodeReg(VD)] = 0;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0;
if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
@ -176,6 +178,15 @@ void JitArm64::lfXX(UGeckoInstruction inst)
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use);
}
const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
ASSERT(VD == VD_again);
if (update && !early_update)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
}
@ -184,7 +195,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
FALLBACK_IF(jo.memcheck);
u32 a = inst.RA, b = inst.RB;
@ -334,26 +344,25 @@ void JitArm64::stfXX(UGeckoInstruction inst)
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate && !(jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)))
{
MOVI2R(XA, imm_addr);
bool addr_reg_set = !is_immediate;
const auto set_addr_reg_if_needed = [&] {
if (!addr_reg_set)
MOVI2R(XA, imm_addr);
};
if (update)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), addr_reg);
}
}
else if (!is_immediate && update)
const bool early_update = !jo.memcheck;
if (update && early_update)
{
gpr.BindToRegister(a, false);
set_addr_reg_if_needed();
MOV(gpr.R(a), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (is_immediate)
@ -378,28 +387,31 @@ void JitArm64::stfXX(UGeckoInstruction inst)
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
if (update)
{
// Chance of this happening is fairly low, but support it
gpr.BindToRegister(a, false);
MOVI2R(gpr.R(a), imm_addr);
}
}
else if (jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0));
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use);
}
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use);
}
if (update && !early_update)
{
gpr.BindToRegister(a, false);
set_addr_reg_if_needed();
MOV(gpr.R(a), addr_reg);
}
if (want_single && !have_single)
fpr.Unlock(V0);

View File

@ -8,6 +8,7 @@
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/PPCTables.h"
@ -19,7 +20,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck);
// If we have a fastmem arena, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem_arena && !MSR.DR);
@ -47,7 +47,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2;
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single);
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false);
if (inst.RA || update) // Always uses the register on update
{
@ -66,7 +66,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
MOVI2R(addr_reg, (u32)offset);
}
if (update)
const bool early_update = !jo.memcheck;
if (update && early_update)
{
gpr.BindToRegister(inst.RA, false);
MOV(gpr.R(inst.RA), addr_reg);
@ -78,9 +79,11 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
// Wipe the registers we are using as temporaries
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
fprs_in_use[DecodeReg(VS)] = 0;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0;
u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
@ -99,6 +102,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(EncodeRegTo64(type_reg));
WriteConditionalExceptionExit(EXCEPTION_DSI, ARM64Reg::X30, ARM64Reg::Q1);
m_float_emit.ORR(EncodeRegToDouble(VS), ARM64Reg::D0, ARM64Reg::D0);
}
@ -108,6 +113,15 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0);
}
const ARM64Reg VS_again = fpr.RW(inst.RS, RegType::Single, true);
ASSERT(VS == VS_again);
if (update && !early_update)
{
gpr.BindToRegister(inst.RA, false);
MOV(gpr.R(inst.RA), addr_reg);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
@ -121,7 +135,6 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(jo.memcheck);
// If we have a fastmem arena, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem_arena && !MSR.DR);
@ -198,7 +211,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
MOVI2R(addr_reg, (u32)offset);
}
if (update)
const bool early_update = !jo.memcheck;
if (update && early_update)
{
gpr.BindToRegister(inst.RA, false);
MOV(gpr.R(inst.RA), addr_reg);
@ -211,7 +225,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
// Wipe the registers we are using as temporaries
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
@ -229,6 +244,14 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
MOVP2R(ARM64Reg::X30, w ? single_store_quantized : paired_store_quantized);
LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(EncodeRegTo64(type_reg));
WriteConditionalExceptionExit(EXCEPTION_DSI, ARM64Reg::X30, ARM64Reg::Q1);
}
if (update && !early_update)
{
gpr.BindToRegister(inst.RA, false);
MOV(gpr.R(inst.RA), addr_reg);
}
if (js.assumeNoPairedQuantize && !have_single)

View File

@ -343,25 +343,29 @@ void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm)
reg.LoadToImm(imm);
}
void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load)
void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load, bool set_dirty)
{
OpArg& reg = guest_reg.reg;
const size_t bitsize = guest_reg.bitsize;
reg.ResetLastUsed();
reg.SetDirty(true);
const RegType reg_type = reg.GetType();
if (reg_type == RegType::NotLoaded || reg_type == RegType::Discarded)
{
const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
reg.Load(host_reg);
reg.SetDirty(set_dirty);
if (do_load)
{
ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value");
m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
}
}
else if (set_dirty)
{
reg.SetDirty(true);
}
}
void Arm64GPRCache::GetAllocationOrder()
@ -570,26 +574,15 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
return ARM64Reg::INVALID_REG;
}
ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty)
{
OpArg& reg = m_guest_registers[preg];
bool was_dirty = reg.IsDirty();
IncrementAllUsed();
reg.ResetLastUsed();
reg.SetDirty(true);
// If not loaded at all, just alloc a new one.
if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded)
{
reg.Load(GetReg(), type);
return reg.GetReg();
}
// Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty.
if ((type == RegType::LowerPair || type == RegType::LowerPairSingle) && was_dirty)
if (reg.IsDirty() && (type == RegType::LowerPair || type == RegType::LowerPairSingle))
{
// We must *not* change host_reg as this register might still be in use. So it's fine to
// store this register, but it's *not* fine to convert it to double. So for double conversion,
@ -612,6 +605,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
m_jit->ConvertSingleToDoubleLower(preg, flush_reg, flush_reg, scratch_reg);
m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG, u32(PPCSTATE_OFF_PS1(preg)));
Unlock(scratch_reg);
reg.Load(host_reg, RegType::LowerPairSingle);
break;
}
else
@ -619,6 +613,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
m_jit->ConvertSingleToDoublePair(preg, flush_reg, host_reg, flush_reg);
m_float_emit->STR(128, IndexType::Unsigned, flush_reg, PPC_REG,
u32(PPCSTATE_OFF_PS0(preg)));
reg.SetDirty(false);
}
break;
case RegType::Register:
@ -627,6 +622,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
m_float_emit->STR(128, IndexType::Unsigned, flush_reg, PPC_REG,
static_cast<s32>(PPCSTATE_OFF_PS0(preg)));
reg.SetDirty(false);
break;
case RegType::DuplicatedSingle:
flush_reg = GetReg();
@ -636,6 +632,8 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
// Store PSR1 (which is equal to PSR0) in memory.
m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG,
static_cast<s32>(PPCSTATE_OFF_PS1(preg)));
reg.Load(host_reg, reg.GetType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle :
RegType::LowerPair);
break;
default:
// All other types doesn't store anything in PSR1.
@ -646,7 +644,18 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
Unlock(flush_reg);
}
reg.Load(reg.GetReg(), type);
if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded)
{
// If not loaded at all, just alloc a new one.
reg.Load(GetReg(), type);
reg.SetDirty(set_dirty);
}
else if (set_dirty)
{
reg.Load(reg.GetReg(), type);
reg.SetDirty(true);
}
return reg.GetReg();
}

View File

@ -266,9 +266,15 @@ public:
// Gets the immediate that a register is set to, only valid for guest GPRs
u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); }
// Binds a guest GPR to a host register, optionally loading its value
void BindToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestGPR(preg), do_load); }
void BindToRegister(size_t preg, bool do_load, bool set_dirty = true)
{
BindToRegister(GetGuestGPR(preg), do_load, set_dirty);
}
// Binds a guest CR to a host register, optionally loading its value
void BindCRToRegister(size_t preg, bool do_load) { BindToRegister(GetGuestCR(preg), do_load); }
void BindCRToRegister(size_t preg, bool do_load, bool set_dirty = true)
{
BindToRegister(GetGuestCR(preg), do_load, set_dirty);
}
BitSet32 GetCallerSavedUsed() const override;
void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG)
@ -307,7 +313,7 @@ private:
Arm64Gen::ARM64Reg R(const GuestRegInfo& guest_reg);
void SetImmediate(const GuestRegInfo& guest_reg, u32 imm);
void BindToRegister(const GuestRegInfo& guest_reg, bool do_load);
void BindToRegister(const GuestRegInfo& guest_reg, bool do_load, bool set_dirty = true);
void FlushRegisters(BitSet32 regs, bool maintain_state, Arm64Gen::ARM64Reg tmp_reg);
void FlushCRRegisters(BitSet32 regs, bool maintain_state, Arm64Gen::ARM64Reg tmp_reg);
@ -326,7 +332,7 @@ public:
// Will dump an immediate to the host register as well
Arm64Gen::ARM64Reg R(size_t preg, RegType type);
Arm64Gen::ARM64Reg RW(size_t preg, RegType type);
Arm64Gen::ARM64Reg RW(size_t preg, RegType type, bool set_dirty = true);
BitSet32 GetCallerSavedUsed() const override;

View File

@ -495,7 +495,9 @@ void JitArm64::GenerateQuantizedLoads()
// Q1 is a temporary
ARM64Reg addr_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2, 3};
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{2, 3};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{0};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this);
@ -524,8 +526,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -542,8 +544,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -559,8 +561,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -576,8 +578,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -605,8 +607,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -623,8 +625,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -640,8 +642,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -657,8 +659,8 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
@ -701,7 +703,9 @@ void JitArm64::GenerateQuantizedStores()
// Q1 is a temporary
ARM64Reg scale_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1, 2};
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this);