JitArm64: Implement FPRF updates
This commit is contained in:
parent
749db94dec
commit
bfe8b1068d
|
@ -399,6 +399,7 @@ union UReg_MSR
|
||||||
};
|
};
|
||||||
|
|
||||||
#define FPRF_SHIFT 12
|
#define FPRF_SHIFT 12
|
||||||
|
#define FPRF_WIDTH 5
|
||||||
#define FPRF_MASK (0x1F << FPRF_SHIFT)
|
#define FPRF_MASK (0x1F << FPRF_SHIFT)
|
||||||
|
|
||||||
// FPSCR exception flags
|
// FPSCR exception flags
|
||||||
|
|
|
@ -234,6 +234,7 @@ protected:
|
||||||
void GenerateCommonAsm();
|
void GenerateCommonAsm();
|
||||||
void GenerateConvertDoubleToSingle();
|
void GenerateConvertDoubleToSingle();
|
||||||
void GenerateConvertSingleToDouble();
|
void GenerateConvertSingleToDouble();
|
||||||
|
void GenerateFPRF(bool single);
|
||||||
void GenerateQuantizedLoadStores();
|
void GenerateQuantizedLoadStores();
|
||||||
|
|
||||||
// Profiling
|
// Profiling
|
||||||
|
@ -262,6 +263,8 @@ protected:
|
||||||
Arm64Gen::ARM64Reg),
|
Arm64Gen::ARM64Reg),
|
||||||
bool Rc = false);
|
bool Rc = false);
|
||||||
|
|
||||||
|
void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg);
|
||||||
|
|
||||||
// <Fastmem fault location, slowmem handler location>
|
// <Fastmem fault location, slowmem handler location>
|
||||||
std::map<const u8*, FastmemArea> m_fault_to_handler;
|
std::map<const u8*, FastmemArea> m_fault_to_handler;
|
||||||
std::map<SlowmemHandler, const u8*> m_handler_to_loc;
|
std::map<SlowmemHandler, const u8*> m_handler_to_loc;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "Core/ConfigManager.h"
|
#include "Core/ConfigManager.h"
|
||||||
#include "Core/Core.h"
|
#include "Core/Core.h"
|
||||||
#include "Core/CoreTiming.h"
|
#include "Core/CoreTiming.h"
|
||||||
|
#include "Core/PowerPC/Gekko.h"
|
||||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||||
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
|
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
|
||||||
#include "Core/PowerPC/PPCTables.h"
|
#include "Core/PowerPC/PPCTables.h"
|
||||||
|
@ -16,12 +17,25 @@
|
||||||
|
|
||||||
using namespace Arm64Gen;
|
using namespace Arm64Gen;
|
||||||
|
|
||||||
|
void JitArm64::SetFPRFIfNeeded(bool single, ARM64Reg reg)
|
||||||
|
{
|
||||||
|
if (!SConfig::GetInstance().bFPRF || !js.op->wantsFPRF)
|
||||||
|
return;
|
||||||
|
|
||||||
|
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||||
|
|
||||||
|
reg = single ? EncodeRegToSingle(reg) : EncodeRegToDouble(reg);
|
||||||
|
m_float_emit.FMOV(single ? ARM64Reg::W0 : ARM64Reg::X0, reg);
|
||||||
|
BL(single ? GetAsmRoutines()->fprf_single : GetAsmRoutines()->fprf_double);
|
||||||
|
|
||||||
|
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W4, ARM64Reg::W30);
|
||||||
|
}
|
||||||
|
|
||||||
void JitArm64::fp_arith(UGeckoInstruction inst)
|
void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITFloatingPointOff);
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||||
u32 op5 = inst.SUBOP5;
|
u32 op5 = inst.SUBOP5;
|
||||||
|
@ -120,13 +134,17 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (single || packed)
|
const bool outputs_are_singles = single || packed;
|
||||||
|
|
||||||
|
if (outputs_are_singles)
|
||||||
{
|
{
|
||||||
ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
|
ASSERT_MSG(DYNA_REC, inputs_are_singles == inputs_are_singles_func(),
|
||||||
"Register allocation turned singles into doubles in the middle of fp_arith");
|
"Register allocation turned singles into doubles in the middle of fp_arith");
|
||||||
|
|
||||||
fpr.FixSinglePrecision(d);
|
fpr.FixSinglePrecision(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SetFPRFIfNeeded(outputs_are_singles, VD);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::fp_logic(UGeckoInstruction inst)
|
void JitArm64::fp_logic(UGeckoInstruction inst)
|
||||||
|
@ -252,7 +270,6 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITFloatingPointOff);
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
const u32 b = inst.FB;
|
const u32 b = inst.FB;
|
||||||
const u32 d = inst.FD;
|
const u32 d = inst.FD;
|
||||||
|
@ -269,6 +286,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
||||||
|
|
||||||
ASSERT_MSG(DYNA_REC, fpr.IsSingle(b, true),
|
ASSERT_MSG(DYNA_REC, fpr.IsSingle(b, true),
|
||||||
"Register allocation turned singles into doubles in the middle of frspx");
|
"Register allocation turned singles into doubles in the middle of frspx");
|
||||||
|
|
||||||
|
SetFPRFIfNeeded(true, VD);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -276,6 +295,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
||||||
const ARM64Reg VD = fpr.RW(d, RegType::DuplicatedSingle);
|
const ARM64Reg VD = fpr.RW(d, RegType::DuplicatedSingle);
|
||||||
|
|
||||||
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||||
|
|
||||||
|
SetFPRFIfNeeded(true, VD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -283,7 +304,8 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITFloatingPointOff);
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
const bool fprf = SConfig::GetInstance().bFPRF && js.op->wantsFPRF;
|
||||||
|
|
||||||
const u32 a = inst.FA;
|
const u32 a = inst.FA;
|
||||||
const u32 b = inst.FB;
|
const u32 b = inst.FB;
|
||||||
|
@ -299,6 +321,14 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
gpr.BindCRToRegister(crf, false);
|
gpr.BindCRToRegister(crf, false);
|
||||||
const ARM64Reg XA = gpr.CR(crf);
|
const ARM64Reg XA = gpr.CR(crf);
|
||||||
|
|
||||||
|
ARM64Reg fpscr_reg;
|
||||||
|
if (fprf)
|
||||||
|
{
|
||||||
|
fpscr_reg = gpr.GetReg();
|
||||||
|
LDR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||||
|
ANDI2R(fpscr_reg, fpscr_reg, ~FPRF_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
FixupBranch pNaN, pLesser, pGreater;
|
FixupBranch pNaN, pLesser, pGreater;
|
||||||
FixupBranch continue1, continue2, continue3;
|
FixupBranch continue1, continue2, continue3;
|
||||||
ORR(XA, ARM64Reg::ZR, 32, 0, true);
|
ORR(XA, ARM64Reg::ZR, 32, 0, true);
|
||||||
|
@ -317,11 +347,16 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
|
|
||||||
// A == B
|
// A == B
|
||||||
ORR(XA, XA, 64 - 63, 0, true);
|
ORR(XA, XA, 64 - 63, 0, true);
|
||||||
|
if (fprf)
|
||||||
|
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_EQ << FPRF_SHIFT);
|
||||||
|
|
||||||
continue1 = B();
|
continue1 = B();
|
||||||
|
|
||||||
SetJumpTarget(pNaN);
|
SetJumpTarget(pNaN);
|
||||||
|
|
||||||
MOVI2R(XA, PowerPC::ConditionRegister::PPCToInternal(PowerPC::CR_SO));
|
MOVI2R(XA, PowerPC::ConditionRegister::PPCToInternal(PowerPC::CR_SO));
|
||||||
|
if (fprf)
|
||||||
|
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_SO << FPRF_SHIFT);
|
||||||
|
|
||||||
if (a != b)
|
if (a != b)
|
||||||
{
|
{
|
||||||
|
@ -329,12 +364,16 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
|
|
||||||
SetJumpTarget(pGreater);
|
SetJumpTarget(pGreater);
|
||||||
ORR(XA, XA, 0, 0, true);
|
ORR(XA, XA, 0, 0, true);
|
||||||
|
if (fprf)
|
||||||
|
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_GT << FPRF_SHIFT);
|
||||||
|
|
||||||
continue3 = B();
|
continue3 = B();
|
||||||
|
|
||||||
SetJumpTarget(pLesser);
|
SetJumpTarget(pLesser);
|
||||||
ORR(XA, XA, 64 - 62, 1, true);
|
ORR(XA, XA, 64 - 62, 1, true);
|
||||||
ORR(XA, XA, 0, 0, true);
|
ORR(XA, XA, 0, 0, true);
|
||||||
|
if (fprf)
|
||||||
|
ORRI2R(fpscr_reg, fpscr_reg, PowerPC::CR_LT << FPRF_SHIFT);
|
||||||
|
|
||||||
SetJumpTarget(continue2);
|
SetJumpTarget(continue2);
|
||||||
SetJumpTarget(continue3);
|
SetJumpTarget(continue3);
|
||||||
|
@ -343,6 +382,12 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||||
|
|
||||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
|
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a, true) && fpr.IsSingle(b, true)),
|
||||||
"Register allocation turned singles into doubles in the middle of fcmpX");
|
"Register allocation turned singles into doubles in the middle of fcmpX");
|
||||||
|
|
||||||
|
if (fprf)
|
||||||
|
{
|
||||||
|
STR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||||
|
gpr.Unlock(fpscr_reg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||||
|
@ -371,12 +416,12 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const ARM64Reg V1 = gpr.GetReg();
|
const ARM64Reg WA = gpr.GetReg();
|
||||||
|
|
||||||
m_float_emit.FCVTS(V1, EncodeRegToDouble(VB), RoundingMode::Z);
|
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z);
|
||||||
m_float_emit.FMOV(EncodeRegToSingle(VD), V1);
|
m_float_emit.FMOV(EncodeRegToSingle(VD), WA);
|
||||||
|
|
||||||
gpr.Unlock(V1);
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
|
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
|
||||||
fpr.Unlock(V0);
|
fpr.Unlock(V0);
|
||||||
|
|
|
@ -76,7 +76,6 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITPairedOff);
|
JITDISABLE(bJITPairedOff);
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
const u32 a = inst.FA;
|
const u32 a = inst.FA;
|
||||||
const u32 c = inst.FC;
|
const u32 c = inst.FC;
|
||||||
|
@ -99,6 +98,8 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
|
||||||
"Register allocation turned singles into doubles in the middle of ps_mulsX");
|
"Register allocation turned singles into doubles in the middle of ps_mulsX");
|
||||||
|
|
||||||
fpr.FixSinglePrecision(d);
|
fpr.FixSinglePrecision(d);
|
||||||
|
|
||||||
|
SetFPRFIfNeeded(true, VD);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ps_maddXX(UGeckoInstruction inst)
|
void JitArm64::ps_maddXX(UGeckoInstruction inst)
|
||||||
|
@ -106,7 +107,6 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITPairedOff);
|
JITDISABLE(bJITPairedOff);
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
const u32 a = inst.FA;
|
const u32 a = inst.FA;
|
||||||
const u32 b = inst.FB;
|
const u32 b = inst.FB;
|
||||||
|
@ -257,13 +257,15 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (V0Q != ARM64Reg::INVALID_REG)
|
||||||
|
fpr.Unlock(V0Q);
|
||||||
|
|
||||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
|
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
|
||||||
"Register allocation turned singles into doubles in the middle of ps_maddXX");
|
"Register allocation turned singles into doubles in the middle of ps_maddXX");
|
||||||
|
|
||||||
fpr.FixSinglePrecision(d);
|
fpr.FixSinglePrecision(d);
|
||||||
|
|
||||||
if (V0Q != ARM64Reg::INVALID_REG)
|
SetFPRFIfNeeded(true, VD);
|
||||||
fpr.Unlock(V0Q);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ps_sel(UGeckoInstruction inst)
|
void JitArm64::ps_sel(UGeckoInstruction inst)
|
||||||
|
@ -311,7 +313,6 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITPairedOff);
|
JITDISABLE(bJITPairedOff);
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
|
||||||
|
|
||||||
const u32 a = inst.FA;
|
const u32 a = inst.FA;
|
||||||
const u32 b = inst.FB;
|
const u32 b = inst.FB;
|
||||||
|
@ -343,10 +344,12 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
|
||||||
m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
|
m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fpr.Unlock(V0);
|
||||||
|
|
||||||
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
|
ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)),
|
||||||
"Register allocation turned singles into doubles in the middle of ps_sumX");
|
"Register allocation turned singles into doubles in the middle of ps_sumX");
|
||||||
|
|
||||||
fpr.FixSinglePrecision(d);
|
fpr.FixSinglePrecision(d);
|
||||||
|
|
||||||
fpr.Unlock(V0);
|
SetFPRFIfNeeded(true, VD);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,11 +4,14 @@
|
||||||
|
|
||||||
#include "Common/Arm64Emitter.h"
|
#include "Common/Arm64Emitter.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
#include "Common/FloatUtils.h"
|
||||||
#include "Common/JitRegister.h"
|
#include "Common/JitRegister.h"
|
||||||
#include "Common/MathUtil.h"
|
#include "Common/MathUtil.h"
|
||||||
|
|
||||||
#include "Core/CoreTiming.h"
|
#include "Core/CoreTiming.h"
|
||||||
#include "Core/HW/CPU.h"
|
#include "Core/HW/CPU.h"
|
||||||
#include "Core/HW/Memmap.h"
|
#include "Core/HW/Memmap.h"
|
||||||
|
#include "Core/PowerPC/Gekko.h"
|
||||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||||
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
|
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
|
||||||
#include "Core/PowerPC/JitCommon/JitCache.h"
|
#include "Core/PowerPC/JitCommon/JitCache.h"
|
||||||
|
@ -203,6 +206,12 @@ void JitArm64::GenerateCommonAsm()
|
||||||
GenerateConvertSingleToDouble();
|
GenerateConvertSingleToDouble();
|
||||||
JitRegister::Register(GetAsmRoutines()->cstd, GetCodePtr(), "JIT_cstd");
|
JitRegister::Register(GetAsmRoutines()->cstd, GetCodePtr(), "JIT_cstd");
|
||||||
|
|
||||||
|
GetAsmRoutines()->fprf_single = GetCodePtr();
|
||||||
|
GenerateFPRF(true);
|
||||||
|
GetAsmRoutines()->fprf_double = GetCodePtr();
|
||||||
|
GenerateFPRF(false);
|
||||||
|
JitRegister::Register(GetAsmRoutines()->fprf_single, GetCodePtr(), "JIT_FPRF");
|
||||||
|
|
||||||
GenerateQuantizedLoadStores();
|
GenerateQuantizedLoadStores();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,6 +281,91 @@ void JitArm64::GenerateConvertSingleToDouble()
|
||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Input in X0. Outputs to memory (PPCState). Clobbers X0-X4 and flags.
|
||||||
|
void JitArm64::GenerateFPRF(bool single)
|
||||||
|
{
|
||||||
|
const auto reg_encoder = single ? EncodeRegTo32 : EncodeRegTo64;
|
||||||
|
|
||||||
|
const ARM64Reg input_reg = reg_encoder(ARM64Reg::W0);
|
||||||
|
const ARM64Reg temp_reg = reg_encoder(ARM64Reg::W1);
|
||||||
|
const ARM64Reg exp_reg = reg_encoder(ARM64Reg::W2);
|
||||||
|
|
||||||
|
constexpr ARM64Reg fprf_reg = ARM64Reg::W3;
|
||||||
|
constexpr ARM64Reg fpscr_reg = ARM64Reg::W4;
|
||||||
|
|
||||||
|
const auto INPUT_EXP_MASK = single ? Common::FLOAT_EXP : Common::DOUBLE_EXP;
|
||||||
|
const auto INPUT_FRAC_MASK = single ? Common::FLOAT_FRAC : Common::DOUBLE_FRAC;
|
||||||
|
constexpr u32 OUTPUT_SIGN_MASK = 0xC;
|
||||||
|
|
||||||
|
// This code is duplicated for the most common cases for performance.
|
||||||
|
// For the less common cases, we branch to an existing copy of this code.
|
||||||
|
auto emit_write_fprf_and_ret = [&] {
|
||||||
|
BFI(fpscr_reg, fprf_reg, FPRF_SHIFT, FPRF_WIDTH);
|
||||||
|
STR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||||
|
RET();
|
||||||
|
};
|
||||||
|
|
||||||
|
// First of all, start the load of the old FPSCR value, in case it takes a while
|
||||||
|
LDR(IndexType::Unsigned, fpscr_reg, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||||
|
|
||||||
|
CMP(input_reg, 0); // Grab sign bit (conveniently the same bit for floats as for integers)
|
||||||
|
ANDI2R(exp_reg, input_reg, INPUT_EXP_MASK); // Grab exponent
|
||||||
|
|
||||||
|
// Most branches handle the sign in the same way. Perform that handling before branching
|
||||||
|
MOVI2R(ARM64Reg::W3, Common::PPC_FPCLASS_PN);
|
||||||
|
MOVI2R(ARM64Reg::W1, Common::PPC_FPCLASS_NN);
|
||||||
|
CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W3, CCFlags::CC_LT);
|
||||||
|
|
||||||
|
FixupBranch zero_or_denormal = CBZ(exp_reg);
|
||||||
|
|
||||||
|
// exp != 0
|
||||||
|
MOVI2R(temp_reg, INPUT_EXP_MASK);
|
||||||
|
CMP(exp_reg, temp_reg);
|
||||||
|
FixupBranch nan_or_inf = B(CCFlags::CC_EQ);
|
||||||
|
|
||||||
|
// exp != 0 && exp != EXP_MASK
|
||||||
|
const u8* normal = GetCodePtr();
|
||||||
|
emit_write_fprf_and_ret();
|
||||||
|
|
||||||
|
// exp == 0
|
||||||
|
SetJumpTarget(zero_or_denormal);
|
||||||
|
TSTI2R(input_reg, INPUT_FRAC_MASK);
|
||||||
|
FixupBranch denormal;
|
||||||
|
if (single)
|
||||||
|
{
|
||||||
|
// To match the interpreter, what we output should be based on how the input would be classified
|
||||||
|
// after conversion to double. Converting a denormal single to a double always results in a
|
||||||
|
// normal double, so for denormal singles we need to output PPC_FPCLASS_PN/PPC_FPCLASS_NN.
|
||||||
|
// TODO: Hardware test that the interpreter actually is correct.
|
||||||
|
B(CCFlags::CC_NEQ, normal);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
denormal = B(CCFlags::CC_NEQ);
|
||||||
|
}
|
||||||
|
|
||||||
|
// exp == 0 && frac == 0
|
||||||
|
LSR(ARM64Reg::W1, fprf_reg, 3);
|
||||||
|
MOVI2R(fprf_reg, Common::PPC_FPCLASS_PZ & ~OUTPUT_SIGN_MASK);
|
||||||
|
BFI(fprf_reg, ARM64Reg::W1, 4, 1);
|
||||||
|
const u8* write_fprf_and_ret = GetCodePtr();
|
||||||
|
emit_write_fprf_and_ret();
|
||||||
|
|
||||||
|
// exp == 0 && frac != 0
|
||||||
|
if (!single)
|
||||||
|
SetJumpTarget(denormal);
|
||||||
|
ORRI2R(fprf_reg, fprf_reg, Common::PPC_FPCLASS_PD & ~OUTPUT_SIGN_MASK);
|
||||||
|
B(write_fprf_and_ret);
|
||||||
|
|
||||||
|
// exp == EXP_MASK
|
||||||
|
SetJumpTarget(nan_or_inf);
|
||||||
|
TSTI2R(input_reg, INPUT_FRAC_MASK);
|
||||||
|
ORRI2R(ARM64Reg::W1, fprf_reg, Common::PPC_FPCLASS_PINF & ~OUTPUT_SIGN_MASK);
|
||||||
|
MOVI2R(ARM64Reg::W2, Common::PPC_FPCLASS_QNAN);
|
||||||
|
CSEL(fprf_reg, ARM64Reg::W1, ARM64Reg::W2, CCFlags::CC_EQ);
|
||||||
|
B(write_fprf_and_ret);
|
||||||
|
}
|
||||||
|
|
||||||
void JitArm64::GenerateQuantizedLoadStores()
|
void JitArm64::GenerateQuantizedLoadStores()
|
||||||
{
|
{
|
||||||
// X0 is the scale
|
// X0 is the scale
|
||||||
|
|
|
@ -27,6 +27,8 @@ struct CommonAsmRoutinesBase
|
||||||
const u8* mfcr;
|
const u8* mfcr;
|
||||||
const u8* cdts;
|
const u8* cdts;
|
||||||
const u8* cstd;
|
const u8* cstd;
|
||||||
|
const u8* fprf_single;
|
||||||
|
const u8* fprf_double;
|
||||||
|
|
||||||
// In: array index: GQR to use.
|
// In: array index: GQR to use.
|
||||||
// In: ECX: Address to read from.
|
// In: ECX: Address to read from.
|
||||||
|
|
Loading…
Reference in New Issue