Merge pull request #8158 from Sintendo/jitopts

x64 micro-optimizations
This commit is contained in:
Tilka 2020-01-06 14:09:43 +01:00 committed by GitHub
commit 98f645daac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 41 deletions

View File

@ -701,9 +701,7 @@ void DSPEmitter::addi(const UDSPInstruction opc)
// s64 imm = (s16)dsp_fetch_code();
s16 imm = dsp_imem_read(m_compile_pc + 1);
// imm <<= 16;
MOV(16, R(RDX), Imm16(imm));
MOVSX(64, 16, RDX, R(RDX));
SHL(64, R(RDX), Imm8(16));
MOV(64, R(RDX), Imm32(imm << 16));
// s64 res = acc + imm;
ADD(64, R(RAX), R(RDX));
// dsp_set_long_acc(areg, res);
@ -737,9 +735,8 @@ void DSPEmitter::addis(const UDSPInstruction opc)
MOV(64, R(RAX), R(tmp1));
// s64 imm = (s8)(u8)opc;
// imm <<= 16;
MOV(8, R(RDX), Imm8((u8)opc));
MOVSX(64, 8, RDX, R(RDX));
SHL(64, R(RDX), Imm8(16));
s32 imm = static_cast<u8>(opc) << 24 >> 8;
MOV(64, R(RDX), Imm32(imm));
// s64 res = acc + imm;
ADD(64, R(RAX), R(RDX));
// dsp_set_long_acc(dreg, res);

View File

@ -317,12 +317,11 @@ void DSPEmitter::increment_addr_reg(int reg)
m_gpr.PutReg(DSP_REG_WR0 + reg, false);
const OpArg ar_reg = m_gpr.GetReg(DSP_REG_AR0 + reg);
MOVZX(32, 16, EAX, ar_reg);
X64Reg tmp1 = m_gpr.GetFreeXReg();
MOVZX(32, 16, tmp1, ar_reg);
// u32 nar = ar + 1;
MOV(32, R(tmp1), R(EAX));
ADD(32, R(EAX), Imm8(1));
LEA(32, EAX, MDisp(tmp1, 1));
// if ((nar ^ ar) > ((wr | 1) << 1))
// nar -= wr + 1;
@ -666,28 +665,12 @@ void DSPEmitter::get_long_prod(X64Reg long_prod)
}
// Returns s64 in RAX
// Clobbers RCX
void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod)
{
// s64 prod = dsp_get_long_prod();
get_long_prod(long_prod);
X64Reg tmp = m_gpr.GetFreeXReg();
// if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff;
TEST(32, R(long_prod), Imm32(0x10000));
FixupBranch jump = J_CC(CC_Z);
ADD(64, R(long_prod), Imm32(0x8000));
MOV(64, R(tmp), Imm64(~0xffff));
AND(64, R(long_prod), R(tmp));
FixupBranch _ret = J();
// else prod = (prod + 0x7fff) & ~0xffff;
SetJumpTarget(jump);
ADD(64, R(long_prod), Imm32(0x7fff));
MOV(64, R(tmp), Imm64(~0xffff));
AND(64, R(long_prod), R(tmp));
SetJumpTarget(_ret);
// return prod;
m_gpr.PutXReg(tmp);
round_long_acc(long_prod);
}
// For accurate emulation, this is wrong - but the real prod registers behave
@ -708,22 +691,13 @@ void DSPEmitter::set_long_prod()
}
// Returns s64 in RAX
// Clobbers RCX
void DSPEmitter::round_long_acc(X64Reg long_acc)
{
// if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff;
TEST(32, R(long_acc), Imm32(0x10000));
FixupBranch jump = J_CC(CC_Z);
ADD(64, R(long_acc), Imm32(0x8000));
MOV(64, R(ECX), Imm64(~0xffff));
AND(64, R(long_acc), R(RCX));
FixupBranch _ret = J();
// else prod = (prod + 0x7fff) & ~0xffff;
SetJumpTarget(jump);
ADD(64, R(long_acc), Imm32(0x7fff));
MOV(64, R(RCX), Imm64(~0xffff));
AND(64, R(long_acc), R(RCX));
SetJumpTarget(_ret);
BT(32, R(long_acc), Imm8(16));
ADC(64, R(long_acc), Imm32(0x7FFF));
XOR(16, R(long_acc), R(long_acc));
// return prod;
}

View File

@ -101,8 +101,8 @@ void CommonAsmRoutines::GenFrsqrte()
MOVQ_xmm(XMM0, R(RSCRATCH));
RET();
SetJumpTarget(inf);
BT(64, R(RSCRATCH), Imm8(63));
FixupBranch negative = J_CC(CC_C);
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch negative = J_CC(CC_S);
XORPD(XMM0, R(XMM0));
RET();