Jit64AsmCommon: Reduce branches in fast-path and inline most behavior
The only behavior requires a function call is denormal behaviour. We also fix fpscr exception raising.
This commit is contained in:
parent
826bcad3a5
commit
89a0b0de32
|
@ -32,17 +32,14 @@ void CommonAsmRoutines::GenFrsqrte()
|
||||||
// This function clobbers all three RSCRATCH.
|
// This function clobbers all three RSCRATCH.
|
||||||
MOVQ_xmm(R(RSCRATCH), XMM0);
|
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||||
|
|
||||||
// Negative and zero inputs set an exception and take the complex path.
|
// Extract exponent
|
||||||
TEST(64, R(RSCRATCH), R(RSCRATCH));
|
|
||||||
FixupBranch zero = J_CC(CC_Z, true);
|
|
||||||
FixupBranch negative = J_CC(CC_S, true);
|
|
||||||
MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||||
SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
|
SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
|
||||||
|
|
||||||
// Zero and max exponents (non-normal floats) take the complex path.
|
// Negatives, zeros, denormals, infinities and NaNs take the complex path.
|
||||||
FixupBranch complex1 = J_CC(CC_Z, true);
|
LEA(32, RSCRATCH2, MDisp(RSCRATCH_EXTRA, -1));
|
||||||
CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
|
CMP(32, R(RSCRATCH2), Imm32(0x7FE));
|
||||||
FixupBranch complex2 = J_CC(CC_E, true);
|
FixupBranch complex = J_CC(CC_AE, true);
|
||||||
|
|
||||||
SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD));
|
SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD));
|
||||||
SAR(32, R(RSCRATCH_EXTRA), Imm8(1));
|
SAR(32, R(RSCRATCH_EXTRA), Imm8(1));
|
||||||
|
@ -75,24 +72,53 @@ void CommonAsmRoutines::GenFrsqrte()
|
||||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
// Exception flags for zero input.
|
SetJumpTarget(complex);
|
||||||
SetJumpTarget(zero);
|
AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
|
||||||
|
CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
|
||||||
|
FixupBranch nan_or_inf = J_CC(CC_E);
|
||||||
|
|
||||||
|
MOV(64, R(RSCRATCH2), R(RSCRATCH));
|
||||||
|
SHL(64, R(RSCRATCH2), Imm8(1));
|
||||||
|
FixupBranch nonzero = J_CC(CC_NZ);
|
||||||
|
|
||||||
|
// +0.0 or -0.0
|
||||||
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
|
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
|
||||||
FixupBranch skip_set_fx1 = J_CC(CC_NZ);
|
FixupBranch skip_set_fx1 = J_CC(CC_NZ);
|
||||||
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX));
|
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX));
|
||||||
FixupBranch complex3 = J();
|
SetJumpTarget(skip_set_fx1);
|
||||||
|
MOV(64, R(RSCRATCH2), Imm64(0x7FF0'0000'0000'0000));
|
||||||
|
OR(64, R(RSCRATCH2), R(RSCRATCH));
|
||||||
|
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||||
|
RET();
|
||||||
|
|
||||||
// Exception flags for negative input.
|
// SNaN or QNaN or +Inf or -Inf
|
||||||
|
SetJumpTarget(nan_or_inf);
|
||||||
|
MOV(64, R(RSCRATCH2), R(RSCRATCH));
|
||||||
|
SHL(64, R(RSCRATCH2), Imm8(12));
|
||||||
|
FixupBranch inf = J_CC(CC_Z);
|
||||||
|
BTS(64, R(RSCRATCH), Imm8(51));
|
||||||
|
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||||
|
RET();
|
||||||
|
SetJumpTarget(inf);
|
||||||
|
BT(64, R(RSCRATCH), Imm8(63));
|
||||||
|
FixupBranch negative = J_CC(CC_C);
|
||||||
|
XORPD(XMM0, R(XMM0));
|
||||||
|
RET();
|
||||||
|
|
||||||
|
SetJumpTarget(nonzero);
|
||||||
|
FixupBranch denormal = J_CC(CC_NC);
|
||||||
|
|
||||||
|
// Negative sign
|
||||||
SetJumpTarget(negative);
|
SetJumpTarget(negative);
|
||||||
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT));
|
TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT));
|
||||||
FixupBranch skip_set_fx2 = J_CC(CC_NZ);
|
FixupBranch skip_set_fx2 = J_CC(CC_NZ);
|
||||||
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_VXSQRT));
|
OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_VXSQRT));
|
||||||
|
|
||||||
SetJumpTarget(skip_set_fx1);
|
|
||||||
SetJumpTarget(skip_set_fx2);
|
SetJumpTarget(skip_set_fx2);
|
||||||
SetJumpTarget(complex1);
|
MOV(64, R(RSCRATCH2), Imm64(0x7FF8'0000'0000'0000));
|
||||||
SetJumpTarget(complex2);
|
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||||
SetJumpTarget(complex3);
|
RET();
|
||||||
|
|
||||||
|
SetJumpTarget(denormal);
|
||||||
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||||
ABI_CallFunction(Common::ApproximateReciprocalSquareRoot);
|
ABI_CallFunction(Common::ApproximateReciprocalSquareRoot);
|
||||||
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
||||||
|
|
Loading…
Reference in New Issue