Jit64AsmCommon: Reduce branches in fast-path and inline most behavior
The only behavior requires a function call is denormal behaviour. We also fix fpscr exception raising.
This commit is contained in:
parent
826bcad3a5
commit
89a0b0de32
|
@ -32,17 +32,14 @@ void CommonAsmRoutines::GenFrsqrte()
|
|||
// This function clobbers all three RSCRATCH.
|
||||
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||
|
||||
// Negative and zero inputs set an exception and take the complex path.
|
||||
TEST(64, R(RSCRATCH), R(RSCRATCH));
|
||||
FixupBranch zero = J_CC(CC_Z, true);
|
||||
FixupBranch negative = J_CC(CC_S, true);
|
||||
// Extract exponent
|
||||
MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||
SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
|
||||
|
||||
// Zero and max exponents (non-normal floats) take the complex path.
|
||||
FixupBranch complex1 = J_CC(CC_Z, true);
|
||||
CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
|
||||
FixupBranch complex2 = J_CC(CC_E, true);
|
||||
// Negatives, zeros, denormals, infinities and NaNs take the complex path.
|
||||
LEA(32, RSCRATCH2, MDisp(RSCRATCH_EXTRA, -1));
|
||||
CMP(32, R(RSCRATCH2), Imm32(0x7FE));
|
||||
FixupBranch complex = J_CC(CC_AE, true);
|
||||
|
||||
SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD));
|
||||
SAR(32, R(RSCRATCH_EXTRA), Imm8(1));
|
||||
|
@ -75,24 +72,53 @@ void CommonAsmRoutines::GenFrsqrte()
|
|||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
RET();
|
||||
|
||||
// Exception flags for zero input.
|
||||
SetJumpTarget(zero);
|
||||
SetJumpTarget(complex);
|
||||
AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
|
||||
CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
|
||||
FixupBranch nan_or_inf = J_CC(CC_E);
|
||||
|
||||
MOV(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
SHL(64, R(RSCRATCH2), Imm8(1));
|
||||
FixupBranch nonzero = J_CC(CC_NZ);
|
||||
|
||||
// +0.0 or -0.0
|
||||
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
|
||||
FixupBranch skip_set_fx1 = J_CC(CC_NZ);
|
||||
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX));
|
||||
FixupBranch complex3 = J();
|
||||
SetJumpTarget(skip_set_fx1);
|
||||
MOV(64, R(RSCRATCH2), Imm64(0x7FF0'0000'0000'0000));
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
RET();
|
||||
|
||||
// Exception flags for negative input.
|
||||
// SNaN or QNaN or +Inf or -Inf
|
||||
SetJumpTarget(nan_or_inf);
|
||||
MOV(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
SHL(64, R(RSCRATCH2), Imm8(12));
|
||||
FixupBranch inf = J_CC(CC_Z);
|
||||
BTS(64, R(RSCRATCH), Imm8(51));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||
RET();
|
||||
SetJumpTarget(inf);
|
||||
BT(64, R(RSCRATCH), Imm8(63));
|
||||
FixupBranch negative = J_CC(CC_C);
|
||||
XORPD(XMM0, R(XMM0));
|
||||
RET();
|
||||
|
||||
SetJumpTarget(nonzero);
|
||||
FixupBranch denormal = J_CC(CC_NC);
|
||||
|
||||
// Negative sign
|
||||
SetJumpTarget(negative);
|
||||
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT));
|
||||
FixupBranch skip_set_fx2 = J_CC(CC_NZ);
|
||||
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_VXSQRT));
|
||||
|
||||
SetJumpTarget(skip_set_fx1);
|
||||
SetJumpTarget(skip_set_fx2);
|
||||
SetJumpTarget(complex1);
|
||||
SetJumpTarget(complex2);
|
||||
SetJumpTarget(complex3);
|
||||
MOV(64, R(RSCRATCH2), Imm64(0x7FF8'0000'0000'0000));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
RET();
|
||||
|
||||
SetJumpTarget(denormal);
|
||||
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||
ABI_CallFunction(Common::ApproximateReciprocalSquareRoot);
|
||||
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||
|
|
Loading…
Reference in New Issue