Merge pull request #8158 from Sintendo/jitopts

x64 micro-optimizations
This commit is contained in:
Tilka 2020-01-06 14:09:43 +01:00 committed by GitHub
commit 98f645daac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 41 deletions

View File

@ -701,9 +701,7 @@ void DSPEmitter::addi(const UDSPInstruction opc)
// s64 imm = (s16)dsp_fetch_code(); // s64 imm = (s16)dsp_fetch_code();
s16 imm = dsp_imem_read(m_compile_pc + 1); s16 imm = dsp_imem_read(m_compile_pc + 1);
// imm <<= 16; // imm <<= 16;
MOV(16, R(RDX), Imm16(imm)); MOV(64, R(RDX), Imm32(imm << 16));
MOVSX(64, 16, RDX, R(RDX));
SHL(64, R(RDX), Imm8(16));
// s64 res = acc + imm; // s64 res = acc + imm;
ADD(64, R(RAX), R(RDX)); ADD(64, R(RAX), R(RDX));
// dsp_set_long_acc(areg, res); // dsp_set_long_acc(areg, res);
@ -737,9 +735,8 @@ void DSPEmitter::addis(const UDSPInstruction opc)
MOV(64, R(RAX), R(tmp1)); MOV(64, R(RAX), R(tmp1));
// s64 imm = (s8)(u8)opc; // s64 imm = (s8)(u8)opc;
// imm <<= 16; // imm <<= 16;
MOV(8, R(RDX), Imm8((u8)opc)); s32 imm = static_cast<u8>(opc) << 24 >> 8;
MOVSX(64, 8, RDX, R(RDX)); MOV(64, R(RDX), Imm32(imm));
SHL(64, R(RDX), Imm8(16));
// s64 res = acc + imm; // s64 res = acc + imm;
ADD(64, R(RAX), R(RDX)); ADD(64, R(RAX), R(RDX));
// dsp_set_long_acc(dreg, res); // dsp_set_long_acc(dreg, res);

View File

@ -317,12 +317,11 @@ void DSPEmitter::increment_addr_reg(int reg)
m_gpr.PutReg(DSP_REG_WR0 + reg, false); m_gpr.PutReg(DSP_REG_WR0 + reg, false);
const OpArg ar_reg = m_gpr.GetReg(DSP_REG_AR0 + reg); const OpArg ar_reg = m_gpr.GetReg(DSP_REG_AR0 + reg);
MOVZX(32, 16, EAX, ar_reg);
X64Reg tmp1 = m_gpr.GetFreeXReg(); X64Reg tmp1 = m_gpr.GetFreeXReg();
MOVZX(32, 16, tmp1, ar_reg);
// u32 nar = ar + 1; // u32 nar = ar + 1;
MOV(32, R(tmp1), R(EAX)); LEA(32, EAX, MDisp(tmp1, 1));
ADD(32, R(EAX), Imm8(1));
// if ((nar ^ ar) > ((wr | 1) << 1)) // if ((nar ^ ar) > ((wr | 1) << 1))
// nar -= wr + 1; // nar -= wr + 1;
@ -666,28 +665,12 @@ void DSPEmitter::get_long_prod(X64Reg long_prod)
} }
// Returns s64 in RAX // Returns s64 in RAX
// Clobbers RCX
void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod) void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod)
{ {
// s64 prod = dsp_get_long_prod(); // s64 prod = dsp_get_long_prod();
get_long_prod(long_prod); get_long_prod(long_prod);
X64Reg tmp = m_gpr.GetFreeXReg(); round_long_acc(long_prod);
// if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff;
TEST(32, R(long_prod), Imm32(0x10000));
FixupBranch jump = J_CC(CC_Z);
ADD(64, R(long_prod), Imm32(0x8000));
MOV(64, R(tmp), Imm64(~0xffff));
AND(64, R(long_prod), R(tmp));
FixupBranch _ret = J();
// else prod = (prod + 0x7fff) & ~0xffff;
SetJumpTarget(jump);
ADD(64, R(long_prod), Imm32(0x7fff));
MOV(64, R(tmp), Imm64(~0xffff));
AND(64, R(long_prod), R(tmp));
SetJumpTarget(_ret);
// return prod;
m_gpr.PutXReg(tmp);
} }
// For accurate emulation, this is wrong - but the real prod registers behave // For accurate emulation, this is wrong - but the real prod registers behave
@ -708,22 +691,13 @@ void DSPEmitter::set_long_prod()
} }
// Returns s64 in RAX // Returns s64 in RAX
// Clobbers RCX
void DSPEmitter::round_long_acc(X64Reg long_acc) void DSPEmitter::round_long_acc(X64Reg long_acc)
{ {
// if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff; // if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff;
TEST(32, R(long_acc), Imm32(0x10000));
FixupBranch jump = J_CC(CC_Z);
ADD(64, R(long_acc), Imm32(0x8000));
MOV(64, R(ECX), Imm64(~0xffff));
AND(64, R(long_acc), R(RCX));
FixupBranch _ret = J();
// else prod = (prod + 0x7fff) & ~0xffff; // else prod = (prod + 0x7fff) & ~0xffff;
SetJumpTarget(jump); BT(32, R(long_acc), Imm8(16));
ADD(64, R(long_acc), Imm32(0x7fff)); ADC(64, R(long_acc), Imm32(0x7FFF));
MOV(64, R(RCX), Imm64(~0xffff)); XOR(16, R(long_acc), R(long_acc));
AND(64, R(long_acc), R(RCX));
SetJumpTarget(_ret);
// return prod; // return prod;
} }

View File

@ -101,8 +101,8 @@ void CommonAsmRoutines::GenFrsqrte()
MOVQ_xmm(XMM0, R(RSCRATCH)); MOVQ_xmm(XMM0, R(RSCRATCH));
RET(); RET();
SetJumpTarget(inf); SetJumpTarget(inf);
BT(64, R(RSCRATCH), Imm8(63)); TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch negative = J_CC(CC_C); FixupBranch negative = J_CC(CC_S);
XORPD(XMM0, R(XMM0)); XORPD(XMM0, R(XMM0));
RET(); RET();