[AArch64] Implement fdivx/fdivsx/mfcr/mtcrf.
Gets the povray bench to better times than the Wii.
This commit is contained in:
parent
d96be9250c
commit
0666c0750b
|
@ -119,6 +119,8 @@ public:
|
|||
void mftb(UGeckoInstruction inst);
|
||||
void mtspr(UGeckoInstruction inst);
|
||||
void crXXX(UGeckoInstruction inst);
|
||||
void mfcr(UGeckoInstruction inst);
|
||||
void mtcrf(UGeckoInstruction inst);
|
||||
|
||||
// LoadStore
|
||||
void lXX(UGeckoInstruction inst);
|
||||
|
@ -154,6 +156,8 @@ public:
|
|||
void fcmpx(UGeckoInstruction inst);
|
||||
void frspx(UGeckoInstruction inst);
|
||||
void fctiwzx(UGeckoInstruction inst);
|
||||
void fdivx(UGeckoInstruction inst);
|
||||
void fdivsx(UGeckoInstruction inst);
|
||||
|
||||
// Paired
|
||||
void ps_abs(UGeckoInstruction inst);
|
||||
|
|
|
@ -593,3 +593,46 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
|||
}
|
||||
fpr.Unlock(V0);
|
||||
}
|
||||
|
||||
void JitArm64::fdivx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
||||
fpr.BindToRegister(d, true);
|
||||
|
||||
ARM64Reg VA = fpr.R(a);
|
||||
ARM64Reg VB = fpr.R(b);
|
||||
ARM64Reg VD = fpr.R(d);
|
||||
|
||||
if (fpr.IsLower(d))
|
||||
{
|
||||
m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
|
||||
}
|
||||
else
|
||||
{
|
||||
ARM64Reg V0 = fpr.GetReg();
|
||||
m_float_emit.FDIV(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
|
||||
m_float_emit.INS(64, VD, 0, V0, 0);
|
||||
fpr.Unlock(V0);
|
||||
}
|
||||
}
|
||||
|
||||
void JitArm64::fdivsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
||||
fpr.BindToRegister(d, d == a || d == b, false);
|
||||
|
||||
ARM64Reg VA = fpr.R(a);
|
||||
ARM64Reg VB = fpr.R(b);
|
||||
ARM64Reg VD = fpr.R(d, false);
|
||||
|
||||
m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
|
||||
m_float_emit.INS(64, VD, 1, VD, 0);
|
||||
}
|
||||
|
|
|
@ -579,3 +579,56 @@ void JitArm64::crXXX(UGeckoInstruction inst)
|
|||
gpr.Unlock(WA);
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
|
||||
void JitArm64::mfcr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
|
||||
gpr.Lock(W0, W1, W2, W30);
|
||||
MOVI2R(X0, (u64)asm_routines.mfcr);
|
||||
BLR(X0);
|
||||
gpr.Unlock(W1, W2, W30);
|
||||
|
||||
gpr.BindToRegister(inst.RD, false);
|
||||
MOV(gpr.R(inst.RD), W0);
|
||||
|
||||
gpr.Unlock(W0);
|
||||
}
|
||||
|
||||
void JitArm64::mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
|
||||
u32 crm = inst.CRM;
|
||||
if (crm != 0)
|
||||
{
|
||||
ARM64Reg RS = gpr.R(inst.RS);
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg XA = EncodeRegTo64(WA);
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
ARM64Reg XB = EncodeRegTo64(WB);
|
||||
MOVI2R(XB, (u64)m_crTable);
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
if ((crm & (0x80 >> i)) != 0)
|
||||
{
|
||||
if (i != 7)
|
||||
LSR(WA, RS, 28 - i * 4);
|
||||
if (i != 0)
|
||||
{
|
||||
if (i != 7)
|
||||
UBFX(WA, WA, 0, 4);
|
||||
else
|
||||
UBFX(WA, RS, 0, 4);
|
||||
}
|
||||
|
||||
LDR(XA, XB, ArithOption(XA, true));
|
||||
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * i);
|
||||
}
|
||||
}
|
||||
gpr.Unlock(WA, WB);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -286,9 +286,9 @@ static GekkoOPTemplate table31[] =
|
|||
{759, &JitArm64::stfXX}, // stfdux
|
||||
{983, &JitArm64::FallBackToInterpreter}, // stfiwx
|
||||
|
||||
{19, &JitArm64::FallBackToInterpreter}, // mfcr
|
||||
{19, &JitArm64::mfcr}, // mfcr
|
||||
{83, &JitArm64::mfmsr}, // mfmsr
|
||||
{144, &JitArm64::FallBackToInterpreter}, // mtcrf
|
||||
{144, &JitArm64::mtcrf}, // mtcrf
|
||||
{146, &JitArm64::mtmsr}, // mtmsr
|
||||
{210, &JitArm64::mtsr}, // mtsr
|
||||
{242, &JitArm64::mtsrin}, // mtsrin
|
||||
|
@ -313,7 +313,7 @@ static GekkoOPTemplate table31[] =
|
|||
|
||||
static GekkoOPTemplate table59[] =
|
||||
{
|
||||
{18, &JitArm64::FallBackToInterpreter}, // fdivsx
|
||||
{18, &JitArm64::fdivsx}, // fdivsx
|
||||
{20, &JitArm64::fsubsx}, // fsubsx
|
||||
{21, &JitArm64::faddsx}, // faddsx
|
||||
{24, &JitArm64::FallBackToInterpreter}, // fresx
|
||||
|
@ -346,7 +346,7 @@ static GekkoOPTemplate table63[] =
|
|||
|
||||
static GekkoOPTemplate table63_2[] =
|
||||
{
|
||||
{18, &JitArm64::FallBackToInterpreter}, // fdivx
|
||||
{18, &JitArm64::fdivx}, // fdivx
|
||||
{20, &JitArm64::fsubx}, // fsubx
|
||||
{21, &JitArm64::faddx}, // faddx
|
||||
{23, &JitArm64::fselx}, // fselx
|
||||
|
|
|
@ -569,4 +569,47 @@ void JitArm64AsmRoutineManager::GenerateCommon()
|
|||
pairedStoreQuantized[30] = storeSingleS8Slow;
|
||||
pairedStoreQuantized[31] = storeSingleS16Slow;
|
||||
|
||||
mfcr = AlignCode16();
|
||||
GenMfcr();
|
||||
}
|
||||
|
||||
void JitArm64AsmRoutineManager::GenMfcr()
|
||||
{
|
||||
// Input: Nothing
|
||||
// Returns: W0
|
||||
// Clobbers: X1, X2
|
||||
const u8* start = GetCodePtr();
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
LDR(INDEX_UNSIGNED, X1, X29, PPCSTATE_OFF(cr_val) + 8 * i);
|
||||
|
||||
// SO
|
||||
if (i == 0)
|
||||
{
|
||||
UBFX(X0, X1, 61, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ORR(W0, WZR, W0, ArithOption(W0, ST_LSL, 4));
|
||||
UBFX(X2, X1, 61, 1);
|
||||
ORR(X0, X0, X2);
|
||||
}
|
||||
|
||||
// EQ
|
||||
ORR(W2, W0, 32 - 1, 0); // W0 | 1<<1
|
||||
CMP(W1, WZR);
|
||||
CSEL(W0, W2, W0, CC_EQ);
|
||||
|
||||
// GT
|
||||
ORR(W2, W0, 32 - 2, 0); // W0 | 1<<2
|
||||
CMP(X1, ZR);
|
||||
CSEL(W0, W2, W0, CC_GT);
|
||||
|
||||
// LT
|
||||
UBFX(X2, X1, 62, 1);
|
||||
ORR(W0, W0, W2, ArithOption(W2, ST_LSL, 3));
|
||||
}
|
||||
|
||||
RET(X30);
|
||||
JitRegister::Register(start, GetCodePtr(), "JIT_Mfcr");
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ class JitArm64AsmRoutineManager : public CommonAsmRoutinesBase, public Arm64Gen:
|
|||
private:
|
||||
void Generate();
|
||||
void GenerateCommon();
|
||||
void GenMfcr();
|
||||
|
||||
public:
|
||||
void Init()
|
||||
|
|
Loading…
Reference in New Issue