Merge pull request #11144 from JosJuice/jit64-arith-inline-avx

Jit64: Inline avx_op into fp_arith
This commit is contained in:
Tilka 2024-04-20 22:03:47 +01:00 committed by GitHub
commit 020e7b3e2d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 71 additions and 34 deletions

View File

@ -282,7 +282,9 @@ void Jit64::fp_arith(UGeckoInstruction inst)
RCOpArg Rarg2 = fpr.Use(arg2, RCMode::Read);
RegCache::Realize(Rd, Ra, Rarg2);
X64Reg dest = preserve_inputs ? XMM1 : static_cast<X64Reg>(Rd);
X64Reg dest = X64Reg(Rd);
if (preserve_inputs && (a == d || arg2 == d))
dest = XMM1;
if (round_rhs)
{
if (a == d && !preserve_inputs)
@ -298,7 +300,33 @@ void Jit64::fp_arith(UGeckoInstruction inst)
}
else
{
avx_op(avxOp, sseOp, dest, Ra, Rarg2, packed, reversible);
if (Ra.IsSimpleReg(dest))
{
(this->*sseOp)(dest, Rarg2);
}
else if (reversible && Rarg2.IsSimpleReg(dest))
{
(this->*sseOp)(dest, Ra);
}
else if (cpu_info.bAVX && Ra.IsSimpleReg())
{
(this->*avxOp)(dest, Ra.GetSimpleReg(), Rarg2);
}
else if (cpu_info.bAVX && reversible && Rarg2.IsSimpleReg())
{
(this->*avxOp)(dest, Rarg2.GetSimpleReg(), Ra);
}
else
{
if (Rarg2.IsSimpleReg(dest))
dest = XMM1;
if (packed)
MOVAPD(dest, Ra);
else
MOVSD(dest, Ra);
(this->*sseOp)(dest, a == arg2 ? R(dest) : Rarg2);
}
}
switch (inst.SUBOP5)

View File

@ -748,36 +748,19 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
{
(this->*sseOp)(regOp, arg2);
}
else if (arg1.IsSimpleReg() && cpu_info.bAVX)
else if (reversible && arg2.IsSimpleReg(regOp))
{
(this->*sseOp)(regOp, arg1);
}
else if (cpu_info.bAVX && arg1.IsSimpleReg())
{
(this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2);
}
else if (arg2.IsSimpleReg(regOp))
else if (cpu_info.bAVX && reversible && arg2.IsSimpleReg())
{
if (reversible)
{
(this->*sseOp)(regOp, arg1);
}
else
{
// The ugly case: regOp == arg2 without AVX, or with arg1 == memory
if (!arg1.IsSimpleReg(XMM0))
MOVAPD(XMM0, arg1);
if (cpu_info.bAVX)
{
(this->*avxOp)(regOp, XMM0, arg2);
}
else
{
(this->*sseOp)(XMM0, arg2);
if (packed)
MOVAPD(regOp, R(XMM0));
else
MOVSD(regOp, R(XMM0));
}
}
(this->*avxOp)(regOp, arg2.GetSimpleReg(), arg1);
}
else
else if (!arg2.IsSimpleReg(regOp))
{
if (packed)
MOVAPD(regOp, arg1);
@ -785,6 +768,32 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
MOVSD(regOp, arg1);
(this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2);
}
else if (reversible && !arg1.IsSimpleReg(regOp))
{
if (packed)
MOVAPD(regOp, arg2);
else
MOVSD(regOp, arg2);
(this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg1);
}
else
{
// The ugly case: Not reversible, and we have regOp == arg2 without AVX or with arg1 == memory
if (!arg1.IsSimpleReg(XMM0))
MOVAPD(XMM0, arg1);
if (cpu_info.bAVX)
{
(this->*avxOp)(regOp, XMM0, arg2);
}
else
{
(this->*sseOp)(XMM0, arg2);
if (packed)
MOVAPD(regOp, R(XMM0));
else
MOVSD(regOp, R(XMM0));
}
}
}
// Abstract between AVX and SSE: automatically handle 3-operand instructions
@ -796,11 +805,16 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&,
{
(this->*sseOp)(regOp, arg2, imm);
}
else if (arg1.IsSimpleReg() && cpu_info.bAVX)
else if (cpu_info.bAVX && arg1.IsSimpleReg())
{
(this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2, imm);
}
else if (arg2.IsSimpleReg(regOp))
else if (!arg2.IsSimpleReg(regOp))
{
MOVAPD(regOp, arg1);
(this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2, imm);
}
else
{
// The ugly case: regOp == arg2 without AVX, or with arg1 == memory
if (!arg1.IsSimpleReg(XMM0))
@ -816,11 +830,6 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&,
MOVAPD(regOp, R(XMM0));
}
}
else
{
MOVAPD(regOp, arg1);
(this->*sseOp)(regOp, arg1 == arg2 ? R(regOp) : arg2, imm);
}
}
alignas(16) static const u64 psMantissaTruncate[2] = {0xFFFFFFFFF8000000ULL, 0xFFFFFFFFF8000000ULL};