diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 65f72805c8..17442add83 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -32,17 +32,14 @@ void CommonAsmRoutines::GenFrsqrte() // This function clobbers all three RSCRATCH. MOVQ_xmm(R(RSCRATCH), XMM0); - // Negative and zero inputs set an exception and take the complex path. - TEST(64, R(RSCRATCH), R(RSCRATCH)); - FixupBranch zero = J_CC(CC_Z, true); - FixupBranch negative = J_CC(CC_S, true); + // Extract exponent MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); - // Zero and max exponents (non-normal floats) take the complex path. - FixupBranch complex1 = J_CC(CC_Z, true); - CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); - FixupBranch complex2 = J_CC(CC_E, true); + // Negatives, zeros, denormals, infinities and NaNs take the complex path. + LEA(32, RSCRATCH2, MDisp(RSCRATCH_EXTRA, -1)); + CMP(32, R(RSCRATCH2), Imm32(0x7FE)); + FixupBranch complex = J_CC(CC_AE, true); SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD)); SAR(32, R(RSCRATCH_EXTRA), Imm8(1)); @@ -75,24 +72,53 @@ void CommonAsmRoutines::GenFrsqrte() MOVQ_xmm(XMM0, R(RSCRATCH2)); RET(); - // Exception flags for zero input. - SetJumpTarget(zero); + SetJumpTarget(complex); + AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); + CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); + FixupBranch nan_or_inf = J_CC(CC_E); + + MOV(64, R(RSCRATCH2), R(RSCRATCH)); + SHL(64, R(RSCRATCH2), Imm8(1)); + FixupBranch nonzero = J_CC(CC_NZ); + + // +0.0 or -0.0 TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch skip_set_fx1 = J_CC(CC_NZ); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX)); - FixupBranch complex3 = J(); + SetJumpTarget(skip_set_fx1); + MOV(64, R(RSCRATCH2), Imm64(0x7FF0'0000'0000'0000)); + OR(64, R(RSCRATCH2), R(RSCRATCH)); + MOVQ_xmm(XMM0, R(RSCRATCH2)); + RET(); - // Exception flags for negative input. + // SNaN or QNaN or +Inf or -Inf + SetJumpTarget(nan_or_inf); + MOV(64, R(RSCRATCH2), R(RSCRATCH)); + SHL(64, R(RSCRATCH2), Imm8(12)); + FixupBranch inf = J_CC(CC_Z); + BTS(64, R(RSCRATCH), Imm8(51)); + MOVQ_xmm(XMM0, R(RSCRATCH)); + RET(); + SetJumpTarget(inf); + BT(64, R(RSCRATCH), Imm8(63)); + FixupBranch negative = J_CC(CC_C); + XORPD(XMM0, R(XMM0)); + RET(); + + SetJumpTarget(nonzero); + FixupBranch denormal = J_CC(CC_NC); + + // Negative sign SetJumpTarget(negative); TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); FixupBranch skip_set_fx2 = J_CC(CC_NZ); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_VXSQRT)); - - SetJumpTarget(skip_set_fx1); SetJumpTarget(skip_set_fx2); - SetJumpTarget(complex1); - SetJumpTarget(complex2); - SetJumpTarget(complex3); + MOV(64, R(RSCRATCH2), Imm64(0x7FF8'0000'0000'0000)); + MOVQ_xmm(XMM0, R(RSCRATCH2)); + RET(); + + SetJumpTarget(denormal); ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); ABI_CallFunction(Common::ApproximateReciprocalSquareRoot); ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); diff --git a/Source/UnitTests/Core/CMakeLists.txt b/Source/UnitTests/Core/CMakeLists.txt index 621cfd3f61..894310e126 100644 --- a/Source/UnitTests/Core/CMakeLists.txt +++ b/Source/UnitTests/Core/CMakeLists.txt @@ -13,3 +13,7 @@ add_dolphin_test(DSPAssemblyTest add_dolphin_test(ESFormatsTest IOS/ES/FormatsTest.cpp IOS/ES/TestBinaryData.cpp) add_dolphin_test(FileSystemTest IOS/FS/FileSystemTest.cpp) + +if(_M_X86) + add_dolphin_test(PowerPCTest PowerPC/Jit64Common/Frsqrte.cpp) +endif() diff --git a/Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp b/Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp new file mode 100644 index 0000000000..472bfdf450 --- /dev/null +++ b/Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp @@ -0,0 +1,101 @@ +// Copyright 2018 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/BitUtils.h" +#include "Common/CommonTypes.h" +#include "Common/FloatUtils.h" +#include "Common/x64ABI.h" +#include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" +#include "Core/PowerPC/Jit64Common/Jit64Base.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" + +#include + +class TestCommonAsmRoutines : public CommonAsmRoutines +{ +public: + TestCommonAsmRoutines() + { + using namespace Gen; + + AllocCodeSpace(4096); + m_const_pool.Init(AllocChildCodeSpace(1024), 1024); + + const auto raw_frsqrte = reinterpret_cast(AlignCode4()); + GenFrsqrte(); + + wrapped_frsqrte = reinterpret_cast(AlignCode4()); + ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); + + // We know the frsqrte implementation only accesses the fpscr. We manufacture a + // PPCSTATE pointer so we read/write to our provided fpscr argument instead. + XOR(32, R(RPPCSTATE), R(RPPCSTATE)); + LEA(64, RSCRATCH, PPCSTATE(fpscr)); + SUB(64, R(ABI_PARAM2), R(RSCRATCH)); + MOV(64, R(RPPCSTATE), R(ABI_PARAM2)); + + // Call + MOVQ_xmm(XMM0, R(ABI_PARAM1)); + ABI_CallFunction(raw_frsqrte); + MOVQ_xmm(R(ABI_RETURN), XMM0); + + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); + RET(); + } + + u64 (*wrapped_frsqrte)(u64, UReg_FPSCR&); +}; + +TEST(Jit64, Frsqrte) +{ + TestCommonAsmRoutines routines; + + const std::vector special_values{ + 0x0000'0000'0000'0000, // positive zero + 0x0000'0000'0000'0001, // smallest positive denormal + 0x0000'0000'0100'0000, + 0x000F'FFFF'FFFF'FFFF, // largest positive denormal + 0x0010'0000'0000'0000, // smallest positive normal + 0x0010'0000'0000'0002, + 0x3FF0'0000'0000'0000, // 1.0 + 0x7FEF'FFFF'FFFF'FFFF, // largest positive normal + 0x7FF0'0000'0000'0000, // positive infinity + 0x7FF0'0000'0000'0001, // first positive SNaN + 0x7FF7'FFFF'FFFF'FFFF, // last positive SNaN + 0x7FF8'0000'0000'0000, // first positive QNaN + 0x7FFF'FFFF'FFFF'FFFF, // last positive QNaN + 0x8000'0000'0000'0000, // negative zero + 0x8000'0000'0000'0001, // smallest negative denormal + 0x8000'0000'0100'0000, + 0x800F'FFFF'FFFF'FFFF, // largest negative denormal + 0x8010'0000'0000'0000, // smallest negative normal + 0x8010'0000'0000'0002, + 0xBFF0'0000'0000'0000, // -1.0 + 0xFFEF'FFFF'FFFF'FFFF, // largest negative normal + 0xFFF0'0000'0000'0000, // negative infinity + 0xFFF0'0000'0000'0001, // first negative SNaN + 0xFFF7'FFFF'FFFF'FFFF, // last negative SNaN + 0xFFF8'0000'0000'0000, // first negative QNaN + 0xFFFF'FFFF'FFFF'FFFF, // last negative QNaN + }; + + UReg_FPSCR fpscr; + + for (u64 ivalue : special_values) + { + double dvalue = Common::BitCast(ivalue); + + u64 expected = Common::BitCast(Common::ApproximateReciprocalSquareRoot(dvalue)); + + u64 actual = routines.wrapped_frsqrte(ivalue, fpscr); + + printf("%016llx -> %016llx == %016llx\n", ivalue, actual, expected); + + EXPECT_EQ(expected, actual); + } +}