JitArm64: Implement indexed paired loadstore instructions

After writing 23b81ef without realizing that we hadn't actually
implemented the indexed paired loadstore instructions yet,
I am now implementing them.
This commit is contained in:
JosJuice 2021-08-17 11:29:38 +02:00
parent d162015112
commit b24b79e373
3 changed files with 33 additions and 26 deletions

View File

@ -159,8 +159,8 @@ public:
void ps_cmpXX(UGeckoInstruction inst); void ps_cmpXX(UGeckoInstruction inst);
// Loadstore paired // Loadstore paired
void psq_l(UGeckoInstruction inst); void psq_lXX(UGeckoInstruction inst);
void psq_st(UGeckoInstruction inst); void psq_stXX(UGeckoInstruction inst);
void ConvertDoubleToSingleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg, void ConvertDoubleToSingleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
Arm64Gen::ARM64Reg src_reg); Arm64Gen::ARM64Reg src_reg);

View File

@ -15,7 +15,7 @@
using namespace Arm64Gen; using namespace Arm64Gen;
void JitArm64::psq_l(UGeckoInstruction inst) void JitArm64::psq_lXX(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
@ -39,7 +39,6 @@ void JitArm64::psq_l(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
const ARM64Reg arm_addr = gpr.R(inst.RA);
constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2; constexpr ARM64Reg type_reg = ARM64Reg::W2;
@ -47,20 +46,25 @@ void JitArm64::psq_l(UGeckoInstruction inst)
if (inst.RA || update) // Always uses the register on update if (inst.RA || update) // Always uses the register on update
{ {
if (offset >= 0) if (indexed)
ADD(addr_reg, arm_addr, offset); ADD(addr_reg, gpr.R(inst.RA), gpr.R(inst.RB));
else if (offset >= 0)
ADD(addr_reg, gpr.R(inst.RA), offset);
else else
SUB(addr_reg, arm_addr, std::abs(offset)); SUB(addr_reg, gpr.R(inst.RA), std::abs(offset));
} }
else else
{ {
MOVI2R(addr_reg, (u32)offset); if (indexed)
MOV(addr_reg, gpr.R(inst.RB));
else
MOVI2R(addr_reg, (u32)offset);
} }
if (update) if (update)
{ {
gpr.BindToRegister(inst.RA, true); gpr.BindToRegister(inst.RA, false);
MOV(arm_addr, addr_reg); MOV(gpr.R(inst.RA), addr_reg);
} }
if (js.assumeNoPairedQuantize) if (js.assumeNoPairedQuantize)
@ -101,7 +105,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
} }
void JitArm64::psq_st(UGeckoInstruction inst) void JitArm64::psq_stXX(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
@ -158,8 +162,6 @@ void JitArm64::psq_st(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
const ARM64Reg arm_addr = gpr.R(inst.RA);
constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2; constexpr ARM64Reg type_reg = ARM64Reg::W2;
@ -173,20 +175,25 @@ void JitArm64::psq_st(UGeckoInstruction inst)
if (inst.RA || update) // Always uses the register on update if (inst.RA || update) // Always uses the register on update
{ {
if (offset >= 0) if (indexed)
ADD(addr_reg, gpr.R(inst.RA), gpr.R(inst.RB));
else if (offset >= 0)
ADD(addr_reg, gpr.R(inst.RA), offset); ADD(addr_reg, gpr.R(inst.RA), offset);
else else
SUB(addr_reg, gpr.R(inst.RA), std::abs(offset)); SUB(addr_reg, gpr.R(inst.RA), std::abs(offset));
} }
else else
{ {
MOVI2R(addr_reg, (u32)offset); if (indexed)
MOV(addr_reg, gpr.R(inst.RB));
else
MOVI2R(addr_reg, (u32)offset);
} }
if (update) if (update)
{ {
gpr.BindToRegister(inst.RA, true); gpr.BindToRegister(inst.RA, false);
MOV(arm_addr, addr_reg); MOV(gpr.R(inst.RA), addr_reg);
} }
if (js.assumeNoPairedQuantize) if (js.assumeNoPairedQuantize)

View File

@ -78,12 +78,12 @@ constexpr std::array<GekkoOPTemplate, 54> primarytable{{
{54, &JitArm64::stfXX}, // stfd {54, &JitArm64::stfXX}, // stfd
{55, &JitArm64::stfXX}, // stfdu {55, &JitArm64::stfXX}, // stfdu
{56, &JitArm64::psq_l}, // psq_l {56, &JitArm64::psq_lXX}, // psq_l
{57, &JitArm64::psq_l}, // psq_lu {57, &JitArm64::psq_lXX}, // psq_lu
{60, &JitArm64::psq_st}, // psq_st {60, &JitArm64::psq_stXX}, // psq_st
{61, &JitArm64::psq_st}, // psq_stu {61, &JitArm64::psq_stXX}, // psq_stu
// missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58 // missing: 0, 1, 2, 5, 6, 9, 22, 30, 58, 62
}}; }};
constexpr std::array<GekkoOPTemplate, 13> table4{{ constexpr std::array<GekkoOPTemplate, 13> table4{{
@ -125,10 +125,10 @@ constexpr std::array<GekkoOPTemplate, 17> table4_2{{
}}; }};
constexpr std::array<GekkoOPTemplate, 4> table4_3{{ constexpr std::array<GekkoOPTemplate, 4> table4_3{{
{6, &JitArm64::FallBackToInterpreter}, // psq_lx {6, &JitArm64::psq_lXX}, // psq_lx
{7, &JitArm64::FallBackToInterpreter}, // psq_stx {7, &JitArm64::psq_stXX}, // psq_stx
{38, &JitArm64::FallBackToInterpreter}, // psq_lux {38, &JitArm64::psq_lXX}, // psq_lux
{39, &JitArm64::FallBackToInterpreter}, // psq_stux {39, &JitArm64::psq_stXX}, // psq_stux
}}; }};
constexpr std::array<GekkoOPTemplate, 13> table19{{ constexpr std::array<GekkoOPTemplate, 13> table19{{