JitArm64: Optimize ConvertDoubleToSingle

This commit is contained in:
JosJuice 2021-01-24 20:18:43 +01:00
parent 6e0a5876ef
commit 28e4869c43
3 changed files with 43 additions and 10 deletions

View File

@ -220,6 +220,8 @@ private:
// AsmRoutines
void GenerateAsm();
void GenerateCommonAsm();
void GenerateConvertDoubleToSingle();
void GenerateQuantizedLoadStores();
// Profiling
void BeginTimeProfile(JitBlock* b);

View File

@ -397,12 +397,12 @@ void JitArm64::ConvertDoubleToSingleLower(ARM64Reg dest_reg, ARM64Reg src_reg)
{
FlushCarry();
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed();
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
ABI_PushRegisters(gpr_saved);
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0);
QuickCallFunction(ARM64Reg::X1, &ConvertToSingle);
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W0);
BL(cdts);
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1);
ABI_PopRegisters(gpr_saved);
}
@ -411,16 +411,16 @@ void JitArm64::ConvertDoubleToSinglePair(ARM64Reg dest_reg, ARM64Reg src_reg)
{
FlushCarry();
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed();
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
ABI_PushRegisters(gpr_saved);
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0);
QuickCallFunction(ARM64Reg::X1, &ConvertToSingle);
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W0);
BL(cdts);
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1);
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 1);
QuickCallFunction(ARM64Reg::X1, &ConvertToSingle);
m_float_emit.INS(32, dest_reg, 1, ARM64Reg::W0);
BL(cdts);
m_float_emit.INS(32, dest_reg, 1, ARM64Reg::W1);
ABI_PopRegisters(gpr_saved);
}

View File

@ -194,6 +194,39 @@ void JitArm64::GenerateAsm()
}
void JitArm64::GenerateCommonAsm()
{
GetAsmRoutines()->cdts = GetCodePtr();
GenerateConvertDoubleToSingle();
JitRegister::Register(GetAsmRoutines()->cdts, GetCodePtr(), "JIT_cdts");
GenerateQuantizedLoadStores();
}
// Input in X0, output in W1, clobbers X0-X3 and flags.
void JitArm64::GenerateConvertDoubleToSingle()
{
UBFX(ARM64Reg::X2, ARM64Reg::X0, 52, 11);
SUB(ARM64Reg::W3, ARM64Reg::W2, 874);
CMP(ARM64Reg::W3, 896 - 874);
LSR(ARM64Reg::X1, ARM64Reg::X0, 32);
FixupBranch denormal = B(CCFlags::CC_LS);
ANDI2R(ARM64Reg::X1, ARM64Reg::X1, 0xc0000000);
BFXIL(ARM64Reg::X1, ARM64Reg::X0, 29, 30);
RET();
SetJumpTarget(denormal);
LSR(ARM64Reg::X3, ARM64Reg::X0, 21);
MOVZ(ARM64Reg::X0, 905);
ORRI2R(ARM64Reg::W3, ARM64Reg::W3, 0x80000000);
SUB(ARM64Reg::W2, ARM64Reg::W0, ARM64Reg::W2);
LSRV(ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W2);
ANDI2R(ARM64Reg::X3, ARM64Reg::X1, 0x80000000);
ORR(ARM64Reg::X1, ARM64Reg::X3, ARM64Reg::X2);
RET();
}
void JitArm64::GenerateQuantizedLoadStores()
{
// X0 is the scale
// X1 is address
@ -654,6 +687,4 @@ void JitArm64::GenerateCommonAsm()
paired_store_quantized[29] = storeSingleU16Slow;
paired_store_quantized[30] = storeSingleS8Slow;
paired_store_quantized[31] = storeSingleS16Slow;
GetAsmRoutines()->mfcr = nullptr;
}