diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 681fb3cd60..7928cc22fa 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -69,6 +69,7 @@ target_sources(common PRIVATE Easing.h EnumOps.h Error.h + FPControl.h FastJmp.h FileSystem.h General.h @@ -122,7 +123,6 @@ target_sources(common PRIVATE emitter/legacy_instructions.h emitter/legacy_internal.h emitter/legacy_types.h - emitter/tools.h emitter/x86emitter.h emitter/x86types.h Darwin/DarwinMisc.h diff --git a/common/Darwin/DarwinThreads.cpp b/common/Darwin/DarwinThreads.cpp index 454b776dec..860a093389 100644 --- a/common/Darwin/DarwinThreads.cpp +++ b/common/Darwin/DarwinThreads.cpp @@ -29,7 +29,11 @@ __forceinline void Threading::SpinWait() { // If this doesn't compile you can just comment it out (it only serves as a // performance hint and isn't required). +#if defined(_M_X86) __asm__("pause"); +#elif defined(_M_ARM64) + __asm__ __volatile__("isb"); +#endif } __forceinline void Threading::EnableHiresScheduler() diff --git a/common/FPControl.h b/common/FPControl.h new file mode 100644 index 0000000000..fc9a8e0660 --- /dev/null +++ b/common/FPControl.h @@ -0,0 +1,216 @@ +// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team +// SPDX-License-Identifier: LGPL-3.0+ + +// This file abstracts the floating-point control registers, known as MXCSR on x86, and FPCR on AArch64. + +#pragma once + +#include "common/Pcsx2Defs.h" +#include "common/VectorIntrin.h" + +enum class FPRoundMode : u8 +{ + Nearest, + NegativeInfinity, + PositiveInfinity, + ChopZero, + + MaxCount +}; + +struct FPControlRegister +{ +#ifdef _M_X86 + u32 bitmask; + + static constexpr u32 EXCEPTION_MASK = (0x3Fu << 7); + static constexpr u32 ROUNDING_CONTROL_SHIFT = 13; + static constexpr u32 ROUNDING_CONTROL_MASK = 3u; + static constexpr u32 ROUNDING_CONTROL_BITS = (ROUNDING_CONTROL_MASK << ROUNDING_CONTROL_SHIFT); + static constexpr u32 DENORMALS_ARE_ZERO_BIT = (1u << 6); + static constexpr u32 FLUSH_TO_ZERO_BIT = (1u << 15); + + __fi static FPControlRegister GetCurrent() + { + return FPControlRegister{_mm_getcsr()}; + } + + __fi static void SetCurrent(FPControlRegister value) + { + _mm_setcsr(value.bitmask); + } + + __fi static constexpr FPControlRegister GetDefault() + { + // 0x1f80 - all exceptions masked, nearest rounding + return FPControlRegister{0x1f80}; + } + + __fi constexpr FPControlRegister& EnableExceptions() + { + bitmask &= ~EXCEPTION_MASK; + return *this; + } + + __fi constexpr FPControlRegister DisableExceptions() + { + bitmask |= EXCEPTION_MASK; + return *this; + } + + __fi constexpr FPRoundMode GetRoundMode() const + { + return static_cast((bitmask >> ROUNDING_CONTROL_SHIFT) & ROUNDING_CONTROL_MASK); + } + + __fi constexpr FPControlRegister& SetRoundMode(FPRoundMode mode) + { + // These bits match on x86. + bitmask = (bitmask & ~ROUNDING_CONTROL_BITS) | ((static_cast(mode) & ROUNDING_CONTROL_MASK) << ROUNDING_CONTROL_SHIFT); + return *this; + } + + __fi constexpr bool GetDenormalsAreZero() const + { + return ((bitmask & DENORMALS_ARE_ZERO_BIT) != 0); + } + + __fi constexpr FPControlRegister SetDenormalsAreZero(bool daz) + { + if (daz) + bitmask |= DENORMALS_ARE_ZERO_BIT; + else + bitmask &= ~DENORMALS_ARE_ZERO_BIT; + return *this; + } + + __fi constexpr bool GetFlushToZero() const + { + return ((bitmask & FLUSH_TO_ZERO_BIT) != 0); + } + + __fi constexpr FPControlRegister SetFlushToZero(bool ftz) + { + if (ftz) + bitmask |= FLUSH_TO_ZERO_BIT; + else + bitmask &= ~FLUSH_TO_ZERO_BIT; + return *this; + } + + __fi constexpr bool operator==(const FPControlRegister& rhs) const { return bitmask == rhs.bitmask; } + __fi constexpr bool operator!=(const FPControlRegister& rhs) const { return bitmask != rhs.bitmask; } + +#elif defined(_M_ARM64) + u64 bitmask; + + static constexpr u64 FZ_BIT = (0x1ULL << 24); + static constexpr u32 RMODE_SHIFT = 22; + static constexpr u64 RMODE_MASK = 0x3ULL; + static constexpr u64 RMODE_BITS = (RMODE_MASK << RMODE_SHIFT); + static constexpr u32 EXCEPTION_MASK = (0x3Fu << 5); + + __fi static FPControlRegister GetCurrent() + { + u64 value; + asm volatile("\tmrs %0, FPCR\n" + : "=r"(value)); + return FPControlRegister{value}; + } + + __fi static void SetCurrent(FPControlRegister value) + { + asm volatile("\tmsr FPCR, %0\n" ::"r"(value.bitmask)); + } + + __fi static constexpr FPControlRegister GetDefault() + { + // 0x0 - all exceptions masked, nearest rounding + return FPControlRegister{0x0}; + } + + __fi constexpr FPControlRegister& EnableExceptions() + { + bitmask |= EXCEPTION_MASK; + return *this; + } + + __fi constexpr FPControlRegister& DisableExceptions() + { + bitmask &= ~EXCEPTION_MASK; + return *this; + } + + __fi constexpr FPRoundMode GetRoundMode() const + { + // Negative/Positive infinity rounding is flipped on A64. + const u64 RMode = (bitmask >> RMODE_SHIFT) & RMODE_MASK; + return static_cast((RMode == 0b00 || RMode == 0b11) ? RMode : (RMode ^ 0b11)); + } + + __fi constexpr FPControlRegister& SetRoundMode(FPRoundMode mode) + { + const u64 RMode = ((mode == FPRoundMode::Nearest || mode == FPRoundMode::ChopZero) ? static_cast(mode) : (static_cast(mode) ^ 0b11)); + bitmask = (bitmask & ~RMODE_BITS) | ((RMode & RMODE_MASK) << RMODE_SHIFT); + return *this; + } + + __fi constexpr bool GetDenormalsAreZero() const + { + // Without FEAT_AFP, most ARM chips don't have separate DaZ/FtZ. This includes Apple Silicon, which + // implements x86-like behavior with a vendor-specific extension that we cannot access from usermode. + // The FZ bit causes both inputs and outputs to be flushed to zero. + return ((bitmask & FZ_BIT) != 0); + } + + __fi constexpr FPControlRegister SetDenormalsAreZero(bool daz) + { + if (daz) + bitmask |= FZ_BIT; + else + bitmask &= ~FZ_BIT; + return *this; + } + + __fi constexpr bool GetFlushToZero() const + { + // See note in GetDenormalsAreZero(). + return ((bitmask & FZ_BIT) != 0); + } + + __fi constexpr FPControlRegister SetFlushToZero(bool ftz) + { + if (ftz) + bitmask |= FZ_BIT; + else + bitmask &= ~FZ_BIT; + return *this; + } + + __fi constexpr bool operator==(const FPControlRegister& rhs) const { return bitmask == rhs.bitmask; } + __fi constexpr bool operator!=(const FPControlRegister& rhs) const { return bitmask != rhs.bitmask; } +#else +#error Unknown architecture. +#endif +}; + +/// Helper to back up/restore FPCR. +class FPControlRegisterBackup +{ +public: + __fi FPControlRegisterBackup(FPControlRegister new_value) + : m_prev_val(FPControlRegister::GetCurrent()) + { + FPControlRegister::SetCurrent(new_value); + } + __fi ~FPControlRegisterBackup() + { + FPControlRegister::SetCurrent(m_prev_val); + } + + FPControlRegisterBackup(const FPControlRegisterBackup&) = delete; + FPControlRegisterBackup& operator=(const FPControlRegisterBackup&) = delete; + +private: + FPControlRegister m_prev_val; +}; diff --git a/common/FastJmp.cpp b/common/FastJmp.cpp index a458012208..435f1bbd2d 100644 --- a/common/FastJmp.cpp +++ b/common/FastJmp.cpp @@ -3,7 +3,7 @@ #include "FastJmp.h" -#ifndef _WIN32 +#if !defined(_MSC_VER) || defined(__clang__) #if defined(__APPLE__) #define PREFIX "_" @@ -11,6 +11,8 @@ #define PREFIX "" #endif +#if defined(_M_X86) + asm( "\t.global " PREFIX "fastjmp_set\n" "\t.global " PREFIX "fastjmp_jmp\n" @@ -43,4 +45,47 @@ asm( jmp *%rdx )"); +#elif defined(_M_ARM64) + +asm( + "\t.global " PREFIX "fastjmp_set\n" + "\t.global " PREFIX "fastjmp_jmp\n" + "\t.text\n" + "\t.align 16\n" + "\t" PREFIX "fastjmp_set:" R"( + mov x16, sp + stp x16, x30, [x0] + stp x19, x20, [x0, #16] + stp x21, x22, [x0, #32] + stp x23, x24, [x0, #48] + stp x25, x26, [x0, #64] + stp x27, x28, [x0, #80] + str x29, [x0, #96] + stp d8, d9, [x0, #112] + stp d10, d11, [x0, #128] + stp d12, d13, [x0, #144] + stp d14, d15, [x0, #160] + mov w0, wzr + br x30 +)" +".align 16\n" +"\t" PREFIX "fastjmp_jmp:" R"( + ldp x16, x30, [x0] + mov sp, x16 + ldp x19, x20, [x0, #16] + ldp x21, x22, [x0, #32] + ldp x23, x24, [x0, #48] + ldp x25, x26, [x0, #64] + ldp x27, x28, [x0, #80] + ldr x29, [x0, #96] + ldp d8, d9, [x0, #112] + ldp d10, d11, [x0, #128] + ldp d12, d13, [x0, #144] + ldp d14, d15, [x0, #160] + mov w0, w1 + br x30 +)"); + +#endif + #endif // __WIN32 diff --git a/common/FastJmp.h b/common/FastJmp.h index 63ae80ef1c..9b34a6e3d5 100644 --- a/common/FastJmp.h +++ b/common/FastJmp.h @@ -10,6 +10,8 @@ struct fastjmp_buf { #if defined(_WIN32) static constexpr std::size_t BUF_SIZE = 240; +#elif defined(_M_ARM64) + static constexpr std::size_t BUF_SIZE = 168; #else static constexpr std::size_t BUF_SIZE = 64; #endif diff --git a/common/General.h b/common/General.h index 923452e64d..c9cef33640 100644 --- a/common/General.h +++ b/common/General.h @@ -152,6 +152,25 @@ namespace HostSys /// Removes the page fault handler. handler is only specified to check against the active callback. void RemovePageFaultHandler(PageFaultHandler handler); + + /// JIT write protect for Apple Silicon. Needs to be called prior to writing to any RWX pages. +#if !defined(__APPLE__) || !defined(_M_ARM64) + // clang-format -off + [[maybe_unused]] __fi static void BeginCodeWrite() {} + [[maybe_unused]] __fi static void EndCodeWrite() {} + // clang-format on +#else + void BeginCodeWrite(); + void EndCodeWrite(); +#endif + + /// Flushes the instruction cache on the host for the specified range. + /// Only needed on ARM64, X86 has coherent D/I cache. +#ifdef _M_X86 + [[maybe_unused]] __fi static void FlushInstructionCache(void* address, u32 size) {} +#else + void FlushInstructionCache(void* address, u32 size); +#endif } class SharedMemoryMappingArea diff --git a/common/Linux/LnxHostSys.cpp b/common/Linux/LnxHostSys.cpp index d3ad89c0b3..9a8efa483d 100644 --- a/common/Linux/LnxHostSys.cpp +++ b/common/Linux/LnxHostSys.cpp @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team // SPDX-License-Identifier: LGPL-3.0+ +#if defined(__APPLE__) +#define _XOPEN_SOURCE +#endif + #if !defined(_WIN32) #include #include @@ -38,14 +42,23 @@ static PageFaultHandler s_exception_handler_callback; static bool s_in_exception_handler; #ifdef __APPLE__ +#include +#include +#include +#endif + +#if defined(__APPLE__) || defined(__aarch64__) static struct sigaction s_old_sigbus_action; -#else +#endif +#if !defined(__APPLE__) || defined(__aarch64__) static struct sigaction s_old_sigsegv_action; #endif static void CallExistingSignalHandler(int signal, siginfo_t* siginfo, void* ctx) { -#ifdef __APPLE__ +#if defined(__aarch64__) + const struct sigaction& sa = (signal == SIGBUS) ? s_old_sigbus_action : s_old_sigsegv_action; +#elif defined(__APPLE__) const struct sigaction& sa = s_old_sigbus_action; #else const struct sigaction& sa = s_old_sigsegv_action; @@ -92,6 +105,12 @@ static void SysPageFaultSignalFilter(int signal, siginfo_t* siginfo, void* ctx) void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.mc_rip); #elif defined(__x86_64__) void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.gregs[REG_RIP]); +#elif defined(__aarch64__) + #ifndef __APPLE__ + void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.pc); + #else + void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext->__ss.__pc); + #endif #else void* const exception_pc = nullptr; #endif @@ -129,13 +148,18 @@ bool HostSys::InstallPageFaultHandler(PageFaultHandler handler) // Don't block the signal from executing recursively, we want to fire the original handler. sa.sa_flags |= SA_NODEFER; #endif -#ifdef __APPLE__ - // MacOS uses SIGBUS for memory permission violations +#if defined(__APPLE__) || defined(__aarch64__) + // MacOS uses SIGBUS for memory permission violations, as well as SIGSEGV on ARM64. if (sigaction(SIGBUS, &sa, &s_old_sigbus_action) != 0) return false; -#else +#endif +#if !defined(__APPLE__) || defined(__aarch64__) if (sigaction(SIGSEGV, &sa, &s_old_sigsegv_action) != 0) return false; +#endif +#if defined(__APPLE__) && defined(__aarch64__) + // Stops LLDB getting in a EXC_BAD_ACCESS loop when passing page faults to PCSX2. + task_set_exception_ports(mach_task_self(), EXC_MASK_BAD_ACCESS, MACH_PORT_NULL, EXCEPTION_DEFAULT, 0); #endif } @@ -154,9 +178,10 @@ void HostSys::RemovePageFaultHandler(PageFaultHandler handler) s_exception_handler_callback = nullptr; struct sigaction sa; -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__aarch64__) sigaction(SIGBUS, &s_old_sigbus_action, &sa); -#else +#endif +#if !defined(__APPLE__) || defined(__aarch64__) sigaction(SIGSEGV, &s_old_sigsegv_action, &sa); #endif } @@ -188,6 +213,11 @@ void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode) if (base) flags |= MAP_FIXED; +#if defined(__APPLE__) && defined(_M_ARM64) + if (mode.CanExecute()) + flags |= MAP_JIT; +#endif + void* res = mmap(base, size, prot, flags, -1, 0); if (res == MAP_FAILED) return nullptr; @@ -270,6 +300,15 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size) pxFailRel("Failed to unmap shared memory"); } +#ifdef _M_ARM64 + +void HostSys::FlushInstructionCache(void* address, u32 size) +{ + __builtin___clear_cache(reinterpret_cast(address), reinterpret_cast(address) + size); +} + +#endif + SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages) : m_base_ptr(base_ptr) , m_size(size) @@ -323,3 +362,22 @@ bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size) } #endif + +#if defined(_M_ARM64) && defined(__APPLE__) + +static thread_local int s_code_write_depth = 0; + +void HostSys::BeginCodeWrite() +{ + if ((s_code_write_depth++) == 0) + pthread_jit_write_protect_np(0); +} + +void HostSys::EndCodeWrite() +{ + pxAssert(s_code_write_depth > 0); + if ((--s_code_write_depth) == 0) + pthread_jit_write_protect_np(1); +} + +#endif diff --git a/common/Linux/LnxThreads.cpp b/common/Linux/LnxThreads.cpp index ec458e883d..bc6e719978 100644 --- a/common/Linux/LnxThreads.cpp +++ b/common/Linux/LnxThreads.cpp @@ -28,10 +28,6 @@ #include "common/Threading.h" #include "common/Assertions.h" -// We wont need this until we actually have this more then just stubbed out, so I'm commenting this out -// to remove an unneeded dependency. -//#include "x86emitter/tools.h" - #if !defined(__unix__) #pragma message("LnxThreads.cpp should only be compiled by projects or makefiles targeted at Linux/BSD distros.") @@ -53,7 +49,11 @@ __forceinline void Threading::SpinWait() { // If this doesn't compile you can just comment it out (it only serves as a // performance hint and isn't required). +#if defined(_M_X86) __asm__("pause"); +#elif defined(_M_ARM64) + __asm__ __volatile__("isb"); +#endif } __forceinline void Threading::EnableHiresScheduler() diff --git a/common/Perf.cpp b/common/Perf.cpp index 9435f93d7d..35893d79ed 100644 --- a/common/Perf.cpp +++ b/common/Perf.cpp @@ -138,7 +138,13 @@ namespace Perf pxAssertRel(perf_marker != MAP_FAILED, "Map perf marker"); JITDUMP_HEADER jh = {}; +#if defined(_M_X86) jh.elf_mach = EM_X86_64; +#elif defined(_M_ARM64) + jh.elf_mach = EM_AARCH64; +#else +#error Unhandled architecture. +#endif jh.pid = getpid(); jh.timestamp = JitDumpTimestamp(); std::fwrite(&jh, sizeof(jh), 1, s_jitdump_file); diff --git a/common/StringUtil.h b/common/StringUtil.h index 2a8e5792a8..438a16488f 100644 --- a/common/StringUtil.h +++ b/common/StringUtil.h @@ -14,7 +14,16 @@ #include #include +// Work around us defining _M_ARM64 but fast_float thinking that it means MSVC. +#if defined(_M_ARM64) && !defined(_WIN32) +#define HAD_M_ARM64 _M_ARM64 +#undef _M_ARM64 +#endif #include "fast_float/fast_float.h" +#if defined(HAD_M_ARM64) && !defined(_WIN32) +#define _M_ARM64 HAD_M_ARM64 +#undef HAD_M_ARM64 +#endif // Older versions of libstdc++ are missing support for from_chars() with floats, and was only recently // merged in libc++. So, just fall back to stringstream (yuck!) on everywhere except MSVC. diff --git a/common/Windows/WinHostSys.cpp b/common/Windows/WinHostSys.cpp index 59d2d9b091..c9828661a5 100644 --- a/common/Windows/WinHostSys.cpp +++ b/common/Windows/WinHostSys.cpp @@ -34,7 +34,13 @@ long __stdcall SysPageFaultExceptionFilter(EXCEPTION_POINTERS* eps) if (eps->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) return EXCEPTION_CONTINUE_SEARCH; +#if defined(_M_AMD64) void* const exception_pc = reinterpret_cast(eps->ContextRecord->Rip); +#elif defined(_M_ARM64) + void* const exception_pc = reinterpret_cast(eps->ContextRecord->Pc); +#else + void* const exception_pc = nullptr; +#endif const PageFaultInfo pfi{(uptr)exception_pc, (uptr)eps->ExceptionRecord->ExceptionInformation[1]}; @@ -161,6 +167,15 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size) pxFail("Failed to unmap shared memory"); } +#ifdef _M_ARM64 + +void HostSys::FlushInstructionCache(void* address, u32 size) +{ + ::FlushInstructionCache(GetCurrentProcess(), address, size); +} + +#endif + SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages) : m_base_ptr(base_ptr) , m_size(size) diff --git a/common/Windows/WinThreads.cpp b/common/Windows/WinThreads.cpp index 8c8e80e055..55bf48360a 100644 --- a/common/Windows/WinThreads.cpp +++ b/common/Windows/WinThreads.cpp @@ -5,7 +5,6 @@ #include "common/Threading.h" #include "common/Assertions.h" -#include "common/emitter/tools.h" #include "common/RedtapeWindows.h" #include #include @@ -20,7 +19,11 @@ __fi void Threading::Timeslice() // improve performance and reduce cpu power consumption. __fi void Threading::SpinWait() { +#ifdef _M_X86 _mm_pause(); +#else + YieldProcessor(); +#endif } __fi void Threading::EnableHiresScheduler() diff --git a/common/common.vcxproj b/common/common.vcxproj index 2bc8fc8458..1353ed22cc 100644 --- a/common/common.vcxproj +++ b/common/common.vcxproj @@ -111,6 +111,7 @@ + @@ -151,7 +152,6 @@ - diff --git a/common/common.vcxproj.filters b/common/common.vcxproj.filters index db89b4f2aa..416e15b660 100644 --- a/common/common.vcxproj.filters +++ b/common/common.vcxproj.filters @@ -234,9 +234,6 @@ Header Files - - Header Files - Header Files @@ -349,6 +346,7 @@ Header Files + diff --git a/common/emitter/avx.cpp b/common/emitter/avx.cpp index b5a2abc4cc..c33c6f2628 100644 --- a/common/emitter/avx.cpp +++ b/common/emitter/avx.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: LGPL-3.0+ #include "common/emitter/internal.h" -#include "common/emitter/tools.h" // warning: suggest braces around initialization of subobject [-Wmissing-braces] #ifdef __clang__ diff --git a/common/emitter/bmi.cpp b/common/emitter/bmi.cpp index b74e9df04d..c801c147ce 100644 --- a/common/emitter/bmi.cpp +++ b/common/emitter/bmi.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: LGPL-3.0+ #include "common/emitter/internal.h" -#include "common/emitter/tools.h" namespace x86Emitter { diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index 420052e1c1..cc899dd0c5 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -2,68 +2,8 @@ // SPDX-License-Identifier: LGPL-3.0+ #include "common/emitter/internal.h" -#include "common/emitter/tools.h" #include "common/VectorIntrin.h" -const char* EnumToString(SSE_RoundMode sse) -{ - switch (sse) - { - case SSEround_Nearest: - return "Nearest"; - case SSEround_NegInf: - return "NegativeInfinity"; - case SSEround_PosInf: - return "PositiveInfinity"; - case SSEround_Chop: - return "Chop"; - default: - return "Invalid"; - } -} - -SSE_MXCSR SSE_MXCSR::GetCurrent() -{ - SSE_MXCSR ret; - ret.bitmask = _mm_getcsr(); - return ret; -} - -void SSE_MXCSR::SetCurrent(const SSE_MXCSR& value) -{ - _mm_setcsr(value.bitmask); -} - -SSE_RoundMode SSE_MXCSR::GetRoundMode() const -{ - return (SSE_RoundMode)RoundingControl; -} - -SSE_MXCSR& SSE_MXCSR::SetRoundMode(SSE_RoundMode mode) -{ - pxAssert((uint)mode < 4); - RoundingControl = (u32)mode; - return *this; -} - -SSE_MXCSR& SSE_MXCSR::ClearExceptionFlags() -{ - bitmask &= ~0x3f; - return *this; -} - -SSE_MXCSR& SSE_MXCSR::EnableExceptions() -{ - bitmask &= ~(0x3f << 7); - return *this; -} - -SSE_MXCSR& SSE_MXCSR::DisableExceptions() -{ - bitmask |= 0x3f << 7; - return *this; -} - namespace x86Emitter { diff --git a/common/emitter/tools.h b/common/emitter/tools.h deleted file mode 100644 index 89b1854543..0000000000 --- a/common/emitter/tools.h +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team -// SPDX-License-Identifier: LGPL-3.0+ - -#pragma once - -#include "common/Pcsx2Defs.h" - -enum SSE_RoundMode -{ - SSE_RoundMode_FIRST = 0, - SSEround_Nearest = 0, - SSEround_NegInf, - SSEround_PosInf, - SSEround_Chop, - SSE_RoundMode_COUNT -}; - -ImplementEnumOperators(SSE_RoundMode); - - -// -------------------------------------------------------------------------------------- -// SSE_MXCSR - Control/Status Register (bitfield) -// -------------------------------------------------------------------------------------- -// Bits 0-5 are exception flags; used only if SSE exceptions have been enabled. -// Bits in this field are "sticky" and, once an exception has occured, must be manually -// cleared using LDMXCSR or FXRSTOR. -// -// Bits 7-12 are the masks for disabling the exceptions in bits 0-5. Cleared bits allow -// exceptions, set bits mask exceptions from being raised. -// -union SSE_MXCSR -{ - u32 bitmask; - struct - { - u32 - InvalidOpFlag : 1, - DenormalFlag : 1, - DivideByZeroFlag : 1, - OverflowFlag : 1, - UnderflowFlag : 1, - PrecisionFlag : 1, - - DenormalsAreZero : 1, - - InvalidOpMask : 1, - DenormalMask : 1, - DivideByZeroMask : 1, - OverflowMask : 1, - UnderflowMask : 1, - PrecisionMask : 1, - - RoundingControl : 2, - FlushToZero : 1; - }; - - static SSE_MXCSR GetCurrent(); - static void SetCurrent(const SSE_MXCSR& value); - - SSE_RoundMode GetRoundMode() const; - SSE_MXCSR& SetRoundMode(SSE_RoundMode mode); - SSE_MXCSR& ClearExceptionFlags(); - SSE_MXCSR& EnableExceptions(); - SSE_MXCSR& DisableExceptions(); - - bool operator==(const SSE_MXCSR& right) const - { - return bitmask == right.bitmask; - } - - bool operator!=(const SSE_MXCSR& right) const - { - return bitmask != right.bitmask; - } -}; diff --git a/common/emitter/x86emitter.cpp b/common/emitter/x86emitter.cpp index 2d00cf1c73..8d13e1b0c0 100644 --- a/common/emitter/x86emitter.cpp +++ b/common/emitter/x86emitter.cpp @@ -17,7 +17,6 @@ */ #include "common/emitter/internal.h" -#include "common/emitter/tools.h" #include // ------------------------------------------------------------------------ diff --git a/common/emitter/x86emitter.h b/common/emitter/x86emitter.h index c6c74103da..8b682f28a3 100644 --- a/common/emitter/x86emitter.h +++ b/common/emitter/x86emitter.h @@ -30,7 +30,6 @@ #pragma once #include "common/emitter/x86types.h" -#include "common/emitter/tools.h" #include "common/emitter/instructions.h" // Including legacy items for now, but these should be removed eventually, diff --git a/pcsx2/Config.h b/pcsx2/Config.h index cffc89f4cc..fe9afaac36 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -3,8 +3,8 @@ #pragma once -#include "common/emitter/tools.h" #include "common/General.h" +#include "common/FPControl.h" #include #include #include @@ -385,11 +385,6 @@ typename std::underlying_type::type enum_cast(Enumeration E) ImplementEnumOperators(GamefixId); -//------------ DEFAULT sseMXCSR VALUES --------------- -#define DEFAULT_sseMXCSR 0xffc0 //FPU rounding > DaZ, FtZ, "chop" -#define DEFAULT_sseVUMXCSR 0xffc0 //VU rounding > DaZ, FtZ, "chop" -#define SYSTEM_sseMXCSR 0x1f80 - // -------------------------------------------------------------------------------------- // TraceFiltersEE // -------------------------------------------------------------------------------------- @@ -597,9 +592,9 @@ struct Pcsx2Config { RecompilerOptions Recompiler; - SSE_MXCSR sseMXCSR; - SSE_MXCSR sseVU0MXCSR; - SSE_MXCSR sseVU1MXCSR; + FPControlRegister FPUFPCR; + FPControlRegister VU0FPCR; + FPControlRegister VU1FPCR; u32 AffinityControlMode; @@ -611,7 +606,7 @@ struct Pcsx2Config bool operator==(const CpuOptions& right) const { - return OpEqu(sseMXCSR) && OpEqu(sseVU0MXCSR) && OpEqu(sseVU1MXCSR) && OpEqu(AffinityControlMode) && OpEqu(Recompiler); + return OpEqu(FPUFPCR) && OpEqu(VU0FPCR) && OpEqu(VU1FPCR) && OpEqu(AffinityControlMode) && OpEqu(Recompiler); } bool operator!=(const CpuOptions& right) const diff --git a/pcsx2/GS/Renderers/Common/GSFunctionMap.h b/pcsx2/GS/Renderers/Common/GSFunctionMap.h index 949054f93d..2107043cde 100644 --- a/pcsx2/GS/Renderers/Common/GSFunctionMap.h +++ b/pcsx2/GS/Renderers/Common/GSFunctionMap.h @@ -6,7 +6,6 @@ #include "GS/GSExtra.h" #include "GS/Renderers/SW/GSScanlineEnvironment.h" #include "System.h" -#include "common/emitter/tools.h" template class GSFunctionMap diff --git a/pcsx2/GameDatabase.cpp b/pcsx2/GameDatabase.cpp index d5e166047c..164a046c6e 100644 --- a/pcsx2/GameDatabase.cpp +++ b/pcsx2/GameDatabase.cpp @@ -114,26 +114,42 @@ void GameDatabase::parseAndInsert(const std::string_view& serial, const c4::yml: { int eeVal = -1; node["roundModes"]["eeRoundMode"] >> eeVal; - gameEntry.eeRoundMode = static_cast(eeVal); + if (eeVal >= 0 && eeVal < static_cast(FPRoundMode::MaxCount)) + gameEntry.eeRoundMode = static_cast(eeVal); + else + Console.Error(fmt::format("[GameDB] Invalid EE round mode '{}', specified for serial: '{}'.", eeVal, serial)); } if (node["roundModes"].has_child("vuRoundMode")) { int vuVal = -1; node["roundModes"]["vuRoundMode"] >> vuVal; - gameEntry.vu0RoundMode = static_cast(vuVal); - gameEntry.vu1RoundMode = static_cast(vuVal); + if (vuVal >= 0 && vuVal < static_cast(FPRoundMode::MaxCount)) + { + gameEntry.vu0RoundMode = static_cast(vuVal); + gameEntry.vu1RoundMode = static_cast(vuVal); + } + else + { + Console.Error(fmt::format("[GameDB] Invalid VU round mode '{}', specified for serial: '{}'.", vuVal, serial)); + } } if (node["roundModes"].has_child("vu0RoundMode")) { int vuVal = -1; node["roundModes"]["vu0RoundMode"] >> vuVal; - gameEntry.vu0RoundMode = static_cast(vuVal); + if (vuVal >= 0 && vuVal < static_cast(FPRoundMode::MaxCount)) + gameEntry.vu0RoundMode = static_cast(vuVal); + else + Console.Error(fmt::format("[GameDB] Invalid VU0 round mode '{}', specified for serial: '{}'.", vuVal, serial)); } if (node["roundModes"].has_child("vu1RoundMode")) { int vuVal = -1; node["roundModes"]["vu1RoundMode"] >> vuVal; - gameEntry.vu1RoundMode = static_cast(vuVal); + if (vuVal >= 0 && vuVal < static_cast(FPRoundMode::MaxCount)) + gameEntry.vu1RoundMode = static_cast(vuVal); + else + Console.Error(fmt::format("[GameDB] Invalid VU1 round mode '{}', specified for serial: '{}'.", vuVal, serial)); } } if (node.has_child("clampModes")) @@ -327,6 +343,13 @@ void GameDatabase::parseAndInsert(const std::string_view& serial, const c4::yml: s_game_db.emplace(std::move(serial), std::move(gameEntry)); } +static const char* s_round_modes[static_cast(FPRoundMode::MaxCount)] = { + "Nearest", + "NegativeInfinity", + "PositiveInfinity", + "Chop" +}; + static const char* s_gs_hw_fix_names[] = { "autoFlush", "cpuFramebufferConversion", @@ -410,48 +433,42 @@ void GameDatabaseSchema::GameEntry::applyGameFixes(Pcsx2Config& config, bool app if (!applyAuto) Console.Warning("[GameDB] Game Fixes are disabled"); - if (eeRoundMode != GameDatabaseSchema::RoundMode::Undefined) + if (eeRoundMode < FPRoundMode::MaxCount) { - const SSE_RoundMode eeRM = (SSE_RoundMode)enum_cast(eeRoundMode); - if (EnumIsValid(eeRM)) + if (applyAuto) { - if (applyAuto) - { - Console.WriteLn("(GameDB) Changing EE/FPU roundmode to %d [%s]", eeRM, EnumToString(eeRM)); - config.Cpu.sseMXCSR.SetRoundMode(eeRM); - } - else - Console.Warning("[GameDB] Skipping changing EE/FPU roundmode to %d [%s]", eeRM, EnumToString(eeRM)); + Console.WriteLn("(GameDB) Changing EE/FPU roundmode to %d [%s]", eeRoundMode, s_round_modes[static_cast(eeRoundMode)]); + config.Cpu.FPUFPCR.SetRoundMode(eeRoundMode); + } + else + { + Console.Warning("[GameDB] Skipping changing EE/FPU roundmode to %d [%s]", eeRoundMode, s_round_modes[static_cast(eeRoundMode)]); } } - if (vu0RoundMode != GameDatabaseSchema::RoundMode::Undefined) + if (vu0RoundMode < FPRoundMode::MaxCount) { - const SSE_RoundMode vuRM = (SSE_RoundMode)enum_cast(vu0RoundMode); - if (EnumIsValid(vuRM)) + if (applyAuto) { - if (applyAuto) - { - Console.WriteLn("(GameDB) Changing VU0 roundmode to %d [%s]", vuRM, EnumToString(vuRM)); - config.Cpu.sseVU0MXCSR.SetRoundMode(vuRM); - } - else - Console.Warning("[GameDB] Skipping changing VU0 roundmode to %d [%s]", vuRM, EnumToString(vuRM)); + Console.WriteLn("(GameDB) Changing VU0 roundmode to %d [%s]", vu0RoundMode, s_round_modes[static_cast(vu0RoundMode)]); + config.Cpu.VU0FPCR.SetRoundMode(vu0RoundMode); + } + else + { + Console.Warning("[GameDB] Skipping changing VU0 roundmode to %d [%s]", vu0RoundMode, s_round_modes[static_cast(vu0RoundMode)]); } } - if (vu1RoundMode != GameDatabaseSchema::RoundMode::Undefined) + if (vu1RoundMode < FPRoundMode::MaxCount) { - const SSE_RoundMode vuRM = (SSE_RoundMode)enum_cast(vu1RoundMode); - if (EnumIsValid(vuRM)) + if (applyAuto) { - if (applyAuto) - { - Console.WriteLn("(GameDB) Changing VU1 roundmode to %d [%s]", vuRM, EnumToString(vuRM)); - config.Cpu.sseVU1MXCSR.SetRoundMode(vuRM); - } - else - Console.Warning("[GameDB] Skipping changing VU1 roundmode to %d [%s]", vuRM, EnumToString(vuRM)); + Console.WriteLn("(GameDB) Changing VU1 roundmode to %d [%s]", vu1RoundMode, s_round_modes[static_cast(vu1RoundMode)]); + config.Cpu.VU1FPCR.SetRoundMode(vu1RoundMode); + } + else + { + Console.Warning("[GameDB] Skipping changing VU1 roundmode to %d [%s]", vu1RoundMode, s_round_modes[static_cast(vu1RoundMode)]); } } diff --git a/pcsx2/GameDatabase.h b/pcsx2/GameDatabase.h index 159d1a80de..f2f4f9af42 100644 --- a/pcsx2/GameDatabase.h +++ b/pcsx2/GameDatabase.h @@ -5,6 +5,9 @@ #include "Config.h" #include "Patch.h" + +#include "common/FPControl.h" + #include #include #include @@ -27,22 +30,14 @@ namespace GameDatabaseSchema Perfect }; - enum class RoundMode - { - Undefined = -1, - Nearest = 0, - NegativeInfinity, - PositiveInfinity, - ChopZero - }; - enum class ClampMode { Undefined = -1, Disabled = 0, Normal, Extra, - Full + Full, + Count }; enum class GSHWFixId : u32 @@ -96,9 +91,9 @@ namespace GameDatabaseSchema std::string name_en; std::string region; Compatibility compat = Compatibility::Unknown; - RoundMode eeRoundMode = RoundMode::Undefined; - RoundMode vu0RoundMode = RoundMode::Undefined; - RoundMode vu1RoundMode = RoundMode::Undefined; + FPRoundMode eeRoundMode = FPRoundMode::MaxCount; + FPRoundMode vu0RoundMode = FPRoundMode::MaxCount; + FPRoundMode vu1RoundMode = FPRoundMode::MaxCount; ClampMode eeClampMode = ClampMode::Undefined; ClampMode vu0ClampMode = ClampMode::Undefined; ClampMode vu1ClampMode = ClampMode::Undefined; diff --git a/pcsx2/ImGui/ImGuiOverlays.cpp b/pcsx2/ImGui/ImGuiOverlays.cpp index f0dd5d4d90..54efe28374 100644 --- a/pcsx2/ImGui/ImGuiOverlays.cpp +++ b/pcsx2/ImGui/ImGuiOverlays.cpp @@ -345,8 +345,8 @@ void ImGuiManager::DrawSettingsOverlay() if (EmuConfig.Speedhacks.vuThread) APPEND("MTVU "); - APPEND("EER={} EEC={} VUR={} VUC={} VQS={} ", static_cast(EmuConfig.Cpu.sseMXCSR.GetRoundMode()), - EmuConfig.Cpu.Recompiler.GetEEClampMode(), static_cast(EmuConfig.Cpu.sseVU0MXCSR.GetRoundMode()), + APPEND("EER={} EEC={} VUR={} VUC={} VQS={} ", static_cast(EmuConfig.Cpu.FPUFPCR.GetRoundMode()), + EmuConfig.Cpu.Recompiler.GetEEClampMode(), static_cast(EmuConfig.Cpu.VU0FPCR.GetRoundMode()), EmuConfig.Cpu.Recompiler.GetVUClampMode(), EmuConfig.GS.VsyncQueueSize); if (EmuConfig.EnableCheats || EmuConfig.EnableWideScreenPatches || EmuConfig.EnableNoInterlacingPatches) diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 2b40881585..4f4c5a4ddf 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -6,6 +6,7 @@ #include "common/SettingsInterface.h" #include "common/SettingsWrapper.h" #include "common/StringUtil.h" +#include "common/SmallString.h" #include "Config.h" #include "GS.h" #include "CDVD/CDVDcommon.h" @@ -21,6 +22,18 @@ #include #endif +// Default EE/VU control registers have exceptions off, DaZ/FTZ, and the rounding mode set to Chop/Zero. +static constexpr FPControlRegister DEFAULT_FPU_FP_CONTROL_REGISTER = FPControlRegister::GetDefault() + .DisableExceptions() + .SetDenormalsAreZero(true) + .SetFlushToZero(true) + .SetRoundMode(FPRoundMode::ChopZero); +static constexpr FPControlRegister DEFAULT_VU_FP_CONTROL_REGISTER = FPControlRegister::GetDefault() + .DisableExceptions() + .SetDenormalsAreZero(true) + .SetFlushToZero(true) + .SetRoundMode(FPRoundMode::ChopZero); + const char* SettingInfo::StringDefaultValue() const { return default_value ? default_value : ""; @@ -389,17 +402,14 @@ bool Pcsx2Config::CpuOptions::CpusChanged(const CpuOptions& right) const Pcsx2Config::CpuOptions::CpuOptions() { - sseMXCSR.bitmask = DEFAULT_sseMXCSR; - sseVU0MXCSR.bitmask = DEFAULT_sseVUMXCSR; - sseVU1MXCSR.bitmask = DEFAULT_sseVUMXCSR; + FPUFPCR = DEFAULT_FPU_FP_CONTROL_REGISTER; + VU0FPCR = DEFAULT_VU_FP_CONTROL_REGISTER; + VU1FPCR = DEFAULT_VU_FP_CONTROL_REGISTER; AffinityControlMode = 0; } void Pcsx2Config::CpuOptions::ApplySanityCheck() { - sseMXCSR.ClearExceptionFlags().DisableExceptions(); - sseVU0MXCSR.ClearExceptionFlags().DisableExceptions(); - sseVU1MXCSR.ClearExceptionFlags().DisableExceptions(); AffinityControlMode = std::min(AffinityControlMode, 6); Recompiler.ApplySanityCheck(); @@ -409,17 +419,23 @@ void Pcsx2Config::CpuOptions::LoadSave(SettingsWrapper& wrap) { SettingsWrapSection("EmuCore/CPU"); - SettingsWrapBitBoolEx(sseMXCSR.DenormalsAreZero, "FPU.DenormalsAreZero"); - SettingsWrapBitBoolEx(sseMXCSR.FlushToZero, "FPU.FlushToZero"); - SettingsWrapBitfieldEx(sseMXCSR.RoundingControl, "FPU.Roundmode"); - SettingsWrapEntry(AffinityControlMode); + const auto read_fpcr = [&wrap, &CURRENT_SETTINGS_SECTION](FPControlRegister& fpcr, std::string_view prefix) { + fpcr.SetDenormalsAreZero(wrap.EntryBitBool(CURRENT_SETTINGS_SECTION, TinyString::from_fmt("{}.DenormalsAreZero", prefix), + fpcr.GetDenormalsAreZero(), fpcr.GetDenormalsAreZero())); + fpcr.SetFlushToZero(wrap.EntryBitBool(CURRENT_SETTINGS_SECTION, TinyString::from_fmt("{}.DenormalsAreZero", prefix), + fpcr.GetFlushToZero(), fpcr.GetFlushToZero())); - SettingsWrapBitBoolEx(sseVU0MXCSR.DenormalsAreZero, "VU0.DenormalsAreZero"); - SettingsWrapBitBoolEx(sseVU0MXCSR.FlushToZero, "VU0.FlushToZero"); - SettingsWrapBitfieldEx(sseVU0MXCSR.RoundingControl, "VU0.Roundmode"); - SettingsWrapBitBoolEx(sseVU1MXCSR.DenormalsAreZero, "VU1.DenormalsAreZero"); - SettingsWrapBitBoolEx(sseVU1MXCSR.FlushToZero, "VU1.FlushToZero"); - SettingsWrapBitfieldEx(sseVU1MXCSR.RoundingControl, "VU1.Roundmode"); + uint round_mode = static_cast(fpcr.GetRoundMode()); + wrap.Entry(CURRENT_SETTINGS_SECTION, TinyString::from_fmt("{}.Roundmode", prefix), round_mode, round_mode); + round_mode = std::min(round_mode, static_cast(FPRoundMode::MaxCount) - 1u); + fpcr.SetRoundMode(static_cast(round_mode)); + }; + + read_fpcr(FPUFPCR, "FPU"); + read_fpcr(VU0FPCR, "VU0"); + read_fpcr(VU1FPCR, "VU1"); + + SettingsWrapEntry(AffinityControlMode); Recompiler.LoadSave(wrap); } @@ -461,6 +477,7 @@ const char* Pcsx2Config::GSOptions::GetRendererName(GSRendererType type) { switch (type) { + // clang-format off case GSRendererType::Auto: return "Auto"; case GSRendererType::DX11: return "Direct3D 11"; case GSRendererType::DX12: return "Direct3D 12"; @@ -470,6 +487,7 @@ const char* Pcsx2Config::GSOptions::GetRendererName(GSRendererType type) case GSRendererType::SW: return "Software"; case GSRendererType::Null: return "Null"; default: return ""; + // clang-format on } } @@ -636,7 +654,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(AudioCaptureBitrate) && OpEqu(Adapter) && - + OpEqu(HWDumpDirectory) && OpEqu(SWDumpDirectory)); } @@ -1093,26 +1111,26 @@ std::string Pcsx2Config::DEV9Options::SaveIPHelper(u8* field) } static const char* const tbl_GamefixNames[] = -{ - "FpuMul", - "FpuNegDiv", - "GoemonTlb", - "SoftwareRendererFMV", - "SkipMPEG", - "OPHFlag", - "EETiming", - "InstantDMA", - "DMABusy", - "GIFFIFO", - "VIFFIFO", - "VIF1Stall", - "VuAddSub", - "Ibit", - "VUSync", - "VUOverflow", - "XGKick", - "BlitInternalFPS", - "FullVU0Sync", + { + "FpuMul", + "FpuNegDiv", + "GoemonTlb", + "SoftwareRendererFMV", + "SkipMPEG", + "OPHFlag", + "EETiming", + "InstantDMA", + "DMABusy", + "GIFFIFO", + "VIFFIFO", + "VIF1Stall", + "VuAddSub", + "Ibit", + "VUSync", + "VUOverflow", + "XGKick", + "BlitInternalFPS", + "FullVU0Sync", }; const char* EnumToString(GamefixId id) @@ -1137,6 +1155,7 @@ void Pcsx2Config::GamefixOptions::Set(GamefixId id, bool enabled) pxAssert(EnumIsValid(id)); switch (id) { + // clang-format off case Fix_VuAddSub: VuAddSubHack = enabled; break; case Fix_FpuMultiply: FpuMulHack = enabled; break; case Fix_FpuNegDiv: FpuNegDivHack = enabled; break; @@ -1157,6 +1176,7 @@ void Pcsx2Config::GamefixOptions::Set(GamefixId id, bool enabled) case Fix_BlitInternalFPS: BlitInternalFPSHack = enabled; break; case Fix_FullVU0Sync: FullVU0SyncHack = enabled; break; jNO_DEFAULT; + // clang-format on } } @@ -1165,6 +1185,7 @@ bool Pcsx2Config::GamefixOptions::Get(GamefixId id) const pxAssert(EnumIsValid(id)); switch (id) { + // clang-format off case Fix_VuAddSub: return VuAddSubHack; case Fix_FpuMultiply: return FpuMulHack; case Fix_FpuNegDiv: return FpuNegDivHack; @@ -1185,6 +1206,7 @@ bool Pcsx2Config::GamefixOptions::Get(GamefixId id) const case Fix_BlitInternalFPS: return BlitInternalFPSHack; case Fix_FullVU0Sync: return FullVU0SyncHack; jNO_DEFAULT; + // clang-format on } return false; // unreachable, but we still need to suppress warnings >_< } @@ -1497,11 +1519,6 @@ Pcsx2Config::Pcsx2Config() void Pcsx2Config::LoadSaveCore(SettingsWrapper& wrap) { - // Switch the rounding mode back to the system default for loading settings. - // That way, we'll get exactly the same values as what we loaded when we first started. - const SSE_MXCSR prev_mxcsr(SSE_MXCSR::GetCurrent()); - SSE_MXCSR::SetCurrent(SSE_MXCSR{SYSTEM_sseMXCSR}); - SettingsWrapSection("EmuCore"); SettingsWrapBitBool(CdvdVerboseReads); @@ -1561,8 +1578,6 @@ void Pcsx2Config::LoadSaveCore(SettingsWrapper& wrap) { CurrentAspectRatio = GS.AspectRatio; } - - SSE_MXCSR::SetCurrent(prev_mxcsr); } void Pcsx2Config::LoadSave(SettingsWrapper& wrap) @@ -1622,6 +1637,8 @@ void Pcsx2Config::CopyRuntimeConfig(Pcsx2Config& cfg) void Pcsx2Config::CopyConfiguration(SettingsInterface* dest_si, SettingsInterface& src_si) { + FPControlRegisterBackup fpcr_backup(FPControlRegister::GetDefault()); + Pcsx2Config temp; { SettingsLoadWrapper wrapper(src_si); @@ -1635,6 +1652,8 @@ void Pcsx2Config::CopyConfiguration(SettingsInterface* dest_si, SettingsInterfac void Pcsx2Config::ClearConfiguration(SettingsInterface* dest_si) { + FPControlRegisterBackup fpcr_backup(FPControlRegister::GetDefault()); + Pcsx2Config temp; SettingsClearWrapper wrapper(*dest_si); temp.LoadSaveCore(wrapper); diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index cce0e8cb36..89bb614b4d 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -21,18 +21,10 @@ #include "common/Perf.h" #include "common/StringUtil.h" -#ifdef _M_X86 -#include "common/emitter/tools.h" -#endif - extern R5900cpu GSDumpReplayerCpu; Pcsx2Config EmuConfig; -SSE_MXCSR g_sseMXCSR = {DEFAULT_sseMXCSR}; -SSE_MXCSR g_sseVU0MXCSR = {DEFAULT_sseVUMXCSR}; -SSE_MXCSR g_sseVU1MXCSR = {DEFAULT_sseVUMXCSR}; - namespace SysMemory { static u8* TryAllocateVirtualMemory(const char* name, void* file_handle, uptr base, size_t size); @@ -47,19 +39,6 @@ namespace SysMemory static u8* s_code_memory; } // namespace SysMemory -// SetCPUState -- for assignment of SSE roundmodes and clampmodes. -// -void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVU0MXCSR, SSE_MXCSR sseVU1MXCSR) -{ - //Msgbox::Alert("SetCPUState: Config.sseMXCSR = %x; Config.sseVUMXCSR = %x \n", Config.sseMXCSR, Config.sseVUMXCSR); - - g_sseMXCSR = sseMXCSR; - g_sseVU0MXCSR = sseVU0MXCSR; - g_sseVU1MXCSR = sseVU1MXCSR; - - _mm_setcsr(g_sseMXCSR.bitmask); -} - // This function should be called once during program execution. void SysLogMachineCaps() { diff --git a/pcsx2/System.h b/pcsx2/System.h index 3ddff7978e..54ae1e51f1 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -156,10 +156,8 @@ public: // GetCpuProviders - this function is not implemented by PCSX2 core -- it must be // implemented by the provisioning interface. +// TODO: Purge this and the class above extern SysCpuProviderPack& GetCpuProviders(); extern void SysLogMachineCaps(); // Detects cpu type and fills cpuInfo structs. extern void SysClearExecutionCache(); // clears recompiled execution caches! - -extern void SetCPUState(SSE_MXCSR sseMXCSR, SSE_MXCSR sseVU0MXCSR, SSE_MXCSR sseVU1MXCSR); -extern SSE_MXCSR g_sseVU0MXCSR, g_sseVU1MXCSR, g_sseMXCSR; diff --git a/pcsx2/VMManager.cpp b/pcsx2/VMManager.cpp index a1e95360a6..517d37169a 100644 --- a/pcsx2/VMManager.cpp +++ b/pcsx2/VMManager.cpp @@ -43,6 +43,7 @@ #include "common/Console.h" #include "common/Error.h" #include "common/FileSystem.h" +#include "common/FPControl.h" #include "common/ScopedGuard.h" #include "common/SettingsWrapper.h" #include "common/SmallString.h" @@ -59,10 +60,6 @@ #include #include -#ifdef _M_X86 -#include "common/emitter/tools.h" -#endif - #ifdef _WIN32 #include "common/RedtapeWindows.h" #include @@ -160,7 +157,6 @@ static bool s_elf_executed = false; static std::string s_elf_override; static std::string s_input_profile_name; static u32 s_frame_advance_count = 0; -static u32 s_mxcsr_saved; static bool s_fast_boot_requested = false; static bool s_gs_open_on_initialize = false; @@ -342,6 +338,9 @@ bool VMManager::Internal::CPUThreadInitialize() } #endif + // Use the default rounding mode, just in case it differs on some platform. + FPControlRegister::SetCurrent(FPControlRegister::GetDefault()); + if (!cpuinfo_initialize()) Console.Error("cpuinfo_initialize() failed."); @@ -431,6 +430,8 @@ void VMManager::Internal::LoadStartupSettings() void VMManager::SetDefaultSettings( SettingsInterface& si, bool folders, bool core, bool controllers, bool hotkeys, bool ui) { + FPControlRegisterBackup fpcr_backup(FPControlRegister::GetDefault()); + if (si.GetUIntValue("UI", "SettingsVersion", 0u) != SETTINGS_VERSION) si.SetUIntValue("UI", "SettingsVersion", SETTINGS_VERSION); @@ -461,6 +462,11 @@ void VMManager::SetDefaultSettings( void VMManager::LoadSettings() { + // Switch the rounding mode back to the system default for loading settings. + // We might have a different mode, because this can be called during setting updates while a VM is active, + // and the rounding mode has an impact on the conversion of floating-point values to/from strings. + FPControlRegisterBackup fpcr_backup(FPControlRegister::GetDefault()); + std::unique_lock lock = Host::GetSettingsLock(); SettingsInterface* si = Host::GetSettingsInterface(); LoadCoreSettings(si); @@ -561,6 +567,7 @@ void VMManager::ApplyCoreSettings() EmuConfig.CopyRuntimeConfig(old_config); { + FPControlRegisterBackup fpcr_backup(FPControlRegister::GetDefault()); std::unique_lock lock = Host::GetSettingsLock(); LoadCoreSettings(Host::GetSettingsInterface()); WarnAboutUnsafeSettings(); @@ -1226,7 +1233,7 @@ bool VMManager::Initialize(VMBootParameters boot_params) s_cpu_implementation_changed = false; s_cpu_provider_pack->ApplyConfig(); - SetCPUState(EmuConfig.Cpu.sseMXCSR, EmuConfig.Cpu.sseVU0MXCSR, EmuConfig.Cpu.sseVU1MXCSR); + FPControlRegister::SetCurrent(EmuConfig.Cpu.FPUFPCR); SysClearExecutionCache(); memBindConditionalHandlers(); SysMemory::Reset(); @@ -1324,12 +1331,6 @@ bool VMManager::Initialize(VMBootParameters boot_params) close_cdvd_files.Cancel(); close_state.Cancel(); -#if defined(_M_X86) - s_mxcsr_saved = _mm_getcsr(); -#elif defined(_M_ARM64) - s_mxcsr_saved = static_cast(a64_getfpcr()); -#endif - hwReset(); Console.WriteLn("VM subsystems initialized in %.2f ms", init_timer.GetTimeMilliseconds()); @@ -1393,11 +1394,7 @@ void VMManager::Shutdown(bool save_resume_state) UpdateGameSettingsLayer(); -#ifdef _M_X86 - _mm_setcsr(s_mxcsr_saved); -#elif defined(_M_ARM64) - a64_setfpcr(s_mxcsr_saved); -#endif + FPControlRegister::SetCurrent(FPControlRegister::GetDefault()); Patch::UnloadPatches(); R3000A::ioman::reset(); @@ -2293,7 +2290,7 @@ void VMManager::CheckForCPUConfigChanges(const Pcsx2Config& old_config) } Console.WriteLn("Updating CPU configuration..."); - SetCPUState(EmuConfig.Cpu.sseMXCSR, EmuConfig.Cpu.sseVU0MXCSR, EmuConfig.Cpu.sseVU1MXCSR); + FPControlRegister::SetCurrent(EmuConfig.Cpu.FPUFPCR); SysClearExecutionCache(); memBindConditionalHandlers(); @@ -2600,7 +2597,7 @@ void VMManager::WarnAboutUnsafeSettings() TRANSLATE_SV( "VMManager", "Hardware Download Mode is not set to Accurate, this may break rendering in some games.")); } - if (EmuConfig.Cpu.sseMXCSR.GetRoundMode() != SSEround_Chop) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::ChopZero) { append(ICON_FA_MICROCHIP, TRANSLATE_SV("VMManager", "EE FPU Round Mode is not set to default, this may break some games.")); @@ -2611,11 +2608,15 @@ void VMManager::WarnAboutUnsafeSettings() append(ICON_FA_MICROCHIP, TRANSLATE_SV("VMManager", "EE FPU Clamp Mode is not set to default, this may break some games.")); } - if (EmuConfig.Cpu.sseVU0MXCSR.GetRoundMode() != SSEround_Chop || - EmuConfig.Cpu.sseVU1MXCSR.GetRoundMode() != SSEround_Chop) + if (EmuConfig.Cpu.VU0FPCR.GetRoundMode() != FPRoundMode::ChopZero) { append(ICON_FA_MICROCHIP, - TRANSLATE_SV("VMManager", "VU Round Mode is not set to default, this may break some games.")); + TRANSLATE_SV("VMManager", "VU0 Round Mode is not set to default, this may break some games.")); + } + if (EmuConfig.Cpu.VU1FPCR.GetRoundMode() != FPRoundMode::ChopZero) + { + append(ICON_FA_MICROCHIP, + TRANSLATE_SV("VMManager", "VU1 Round Mode is not set to default, this may break some games.")); } if (!EmuConfig.Cpu.Recompiler.vu0Overflow || EmuConfig.Cpu.Recompiler.vu0ExtraOverflow || EmuConfig.Cpu.Recompiler.vu0SignOverflow || !EmuConfig.Cpu.Recompiler.vu1Overflow || diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index b683eab41e..2a467324a2 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -248,8 +248,7 @@ void InterpVU0::Step() void InterpVU0::Execute(u32 cycles) { - const int originalRounding = fegetround(); - fesetround(g_sseVU0MXCSR.RoundingControl << 8); + const FPControlRegisterBackup fpcr_backup(EmuConfig.Cpu.VU0FPCR); VU0.VI[REG_TPC].UL <<= 3; VU0.flags &= ~VUFLAG_MFLAGSET; @@ -302,7 +301,6 @@ void InterpVU0::Execute(u32 cycles) } VU0.cycle += cycle_change; } - fesetround(originalRounding); VU0.nextBlockCycles = (VU0.cycle - cpuRegs.cycle) + 1; } diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index d9a6f9fcf0..13844f1eac 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -258,8 +258,7 @@ void InterpVU1::Step() void InterpVU1::Execute(u32 cycles) { - const int originalRounding = fegetround(); - fesetround(g_sseVU1MXCSR.RoundingControl << 8); + const FPControlRegisterBackup fpcr_backup(EmuConfig.Cpu.VU1FPCR); VU1.VI[REG_TPC].UL <<= 3; u32 startcycles = VU1.cycle; @@ -279,5 +278,4 @@ void InterpVU1::Execute(u32 cycles) } VU1.VI[REG_TPC].UL >>= 3; VU1.nextBlockCycles = (VU1.cycle - cpuRegs.cycle) + 1; - fesetround(originalRounding); } diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index 5acad9ee8d..5267fcfd71 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -1093,7 +1093,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags ClampValues(regd); } -alignas(16) static SSE_MXCSR roundmode_nearest, roundmode_neg; +alignas(16) static FPControlRegister roundmode_nearest, roundmode_neg; void recDIV_S_xmm(int info) { @@ -1104,26 +1104,26 @@ void recDIV_S_xmm(int info) if (CHECK_FPUNEGDIVHACK) { - if (g_sseMXCSR.GetRoundMode() != SSEround_NegInf) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::NegativeInfinity) { // Set roundmode to nearest since it isn't already //Console.WriteLn("div to negative inf"); - roundmode_neg = g_sseMXCSR; - roundmode_neg.SetRoundMode(SSEround_NegInf); + roundmode_neg = EmuConfig.Cpu.FPUFPCR; + roundmode_neg.SetRoundMode(FPRoundMode::NegativeInfinity); xLDMXCSR(ptr32[&roundmode_neg.bitmask]); roundmodeFlag = true; } } else { - if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::Nearest) { // Set roundmode to nearest since it isn't already //Console.WriteLn("div to nearest"); - roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode(SSEround_Nearest); + roundmode_nearest = EmuConfig.Cpu.FPUFPCR; + roundmode_nearest.SetRoundMode(FPRoundMode::Nearest); xLDMXCSR(ptr32[&roundmode_nearest.bitmask]); roundmodeFlag = true; } @@ -1191,7 +1191,7 @@ void recDIV_S_xmm(int info) break; } if (roundmodeFlag) - xLDMXCSR(ptr32[&g_sseMXCSR.bitmask]); + xLDMXCSR(ptr32[&EmuConfig.Cpu.FPUFPCR.bitmask]); _freeXMMreg(t0reg); } @@ -1768,12 +1768,12 @@ void recSQRT_S_xmm(int info) bool roundmodeFlag = false; //Console.WriteLn("FPU: SQRT"); - if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::Nearest) { // Set roundmode to nearest if it isn't already //Console.WriteLn("sqrt to nearest"); - roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode(SSEround_Nearest); + roundmode_nearest = EmuConfig.Cpu.FPUFPCR; + roundmode_nearest.SetRoundMode(FPRoundMode::Nearest); xLDMXCSR(ptr32[&roundmode_nearest.bitmask]); roundmodeFlag = true; } @@ -1805,7 +1805,7 @@ void recSQRT_S_xmm(int info) ClampValues(EEREC_D); if (roundmodeFlag) - xLDMXCSR(ptr32[&g_sseMXCSR.bitmask]); + xLDMXCSR(ptr32[&EmuConfig.Cpu.FPUFPCR.bitmask]); } FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED | XMMINFO_READT); diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index b0897996d1..62f5510bad 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -652,7 +652,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags ToPS2FPU(regd, false, regt, false); } -alignas(16) static SSE_MXCSR roundmode_nearest, roundmode_neg; +alignas(16) static FPControlRegister roundmode_nearest, roundmode_neg; void recDIV_S_xmm(int info) { @@ -662,26 +662,26 @@ void recDIV_S_xmm(int info) if (CHECK_FPUNEGDIVHACK) { - if (g_sseMXCSR.GetRoundMode() != SSEround_NegInf) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::NegativeInfinity) { // Set roundmode to nearest since it isn't already //Console.WriteLn("div to negative inf"); - roundmode_neg = g_sseMXCSR; - roundmode_neg.SetRoundMode(SSEround_NegInf); + roundmode_neg = EmuConfig.Cpu.FPUFPCR; + roundmode_neg.SetRoundMode(FPRoundMode::NegativeInfinity); xLDMXCSR(ptr32[&roundmode_neg.bitmask]); roundmodeFlag = true; } } else { - if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::Nearest) { // Set roundmode to nearest since it isn't already //Console.WriteLn("div to nearest"); - roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode(SSEround_Nearest); + roundmode_nearest = EmuConfig.Cpu.FPUFPCR; + roundmode_nearest.SetRoundMode(FPRoundMode::Nearest); xLDMXCSR(ptr32[&roundmode_nearest.bitmask]); roundmodeFlag = true; } @@ -699,7 +699,7 @@ void recDIV_S_xmm(int info) xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(sreg)); if (roundmodeFlag) - xLDMXCSR(ptr32[&g_sseMXCSR]); + xLDMXCSR(ptr32[&EmuConfig.Cpu.FPUFPCR.bitmask]); _freeXMMreg(sreg); _freeXMMreg(treg); } @@ -952,12 +952,12 @@ void recSQRT_S_xmm(int info) const int t1reg = _allocTempXMMreg(XMMT_FPS); //Console.WriteLn("FPU: SQRT"); - if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::Nearest) { // Set roundmode to nearest if it isn't already //Console.WriteLn("sqrt to nearest"); - roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode(SSEround_Nearest); + roundmode_nearest = EmuConfig.Cpu.FPUFPCR; + roundmode_nearest.SetRoundMode(FPRoundMode::Nearest); xLDMXCSR(ptr32[&roundmode_nearest.bitmask]); roundmodeFlag = 1; } @@ -989,7 +989,7 @@ void recSQRT_S_xmm(int info) ToPS2FPU(EEREC_D, false, t1reg, false); if (roundmodeFlag == 1) - xLDMXCSR(ptr32[&g_sseMXCSR.bitmask]); + xLDMXCSR(ptr32[&EmuConfig.Cpu.FPUFPCR.bitmask]); _freeXMMreg(t1reg); } @@ -1074,12 +1074,12 @@ void recRSQRT_S_xmm(int info) // behavior for both recs? --air bool roundmodeFlag = false; - if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest) + if (EmuConfig.Cpu.FPUFPCR.GetRoundMode() != FPRoundMode::Nearest) { // Set roundmode to nearest if it isn't already //Console.WriteLn("sqrt to nearest"); - roundmode_nearest = g_sseMXCSR; - roundmode_nearest.SetRoundMode(SSEround_Nearest); + roundmode_nearest = EmuConfig.Cpu.FPUFPCR; + roundmode_nearest.SetRoundMode(FPRoundMode::Nearest); xLDMXCSR(ptr32[&roundmode_nearest.bitmask]); roundmodeFlag = true; } @@ -1096,7 +1096,7 @@ void recRSQRT_S_xmm(int info) _freeXMMreg(treg); _freeXMMreg(sreg); if (roundmodeFlag) - xLDMXCSR(ptr32[&g_sseMXCSR.bitmask]); + xLDMXCSR(ptr32[&EmuConfig.Cpu.FPUFPCR.bitmask]); } FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 8d02e747c3..f2e5d1f580 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -3,6 +3,7 @@ #pragma once +#include "Config.h" #include "cpuinfo.h" //------------------------------------------------------------------ @@ -15,7 +16,7 @@ static bool mvuNeedsFPCRUpdate(mV) return true; // otherwise only emit when it's different to the EE - return g_sseMXCSR.bitmask != (isVU0 ? g_sseVU0MXCSR.bitmask : g_sseVU1MXCSR.bitmask); + return EmuConfig.Cpu.FPUFPCR.bitmask != (isVU0 ? EmuConfig.Cpu.VU0FPCR.bitmask : EmuConfig.Cpu.VU1FPCR.bitmask); } // Generates the code for entering/exit recompiled blocks @@ -32,7 +33,7 @@ void mVUdispatcherAB(mV) // Load VU's MXCSR state if (mvuNeedsFPCRUpdate(mVU)) - xLDMXCSR(ptr32[isVU0 ? &g_sseVU0MXCSR.bitmask : &g_sseVU1MXCSR.bitmask]); + xLDMXCSR(ptr32[isVU0 ? &EmuConfig.Cpu.VU0FPCR.bitmask : &EmuConfig.Cpu.VU1FPCR.bitmask]); // Load Regs xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_P].UL]); @@ -72,7 +73,7 @@ void mVUdispatcherAB(mV) // Load EE's MXCSR state if (mvuNeedsFPCRUpdate(mVU)) - xLDMXCSR(ptr32[&g_sseMXCSR.bitmask]); + xLDMXCSR(ptr32[&EmuConfig.Cpu.FPUFPCR.bitmask]); // = The first two DWORD or smaller arguments are passed in ECX and EDX registers; // all other arguments are passed right to left. @@ -96,7 +97,7 @@ void mVUdispatcherCD(mV) // Load VU's MXCSR state if (mvuNeedsFPCRUpdate(mVU)) - xLDMXCSR(ptr32[isVU0 ? &g_sseVU0MXCSR.bitmask : &g_sseVU1MXCSR.bitmask]); + xLDMXCSR(ptr32[isVU0 ? &EmuConfig.Cpu.VU0FPCR.bitmask : &EmuConfig.Cpu.VU1FPCR.bitmask]); mVUrestoreRegs(mVU); xMOV(gprF0, ptr32[&mVU.regs().micro_statusflags[0]]); @@ -117,7 +118,7 @@ void mVUdispatcherCD(mV) // Load EE's MXCSR state if (mvuNeedsFPCRUpdate(mVU)) - xLDMXCSR(ptr32[&g_sseMXCSR.bitmask]); + xLDMXCSR(ptr32[&EmuConfig.Cpu.FPUFPCR.bitmask]); } xRET(); diff --git a/tests/ctest/core/GS/swizzle_test_main.cpp b/tests/ctest/core/GS/swizzle_test_main.cpp index dae58cf8e9..f8f4fc8717 100644 --- a/tests/ctest/core/GS/swizzle_test_main.cpp +++ b/tests/ctest/core/GS/swizzle_test_main.cpp @@ -4,7 +4,6 @@ #include "pcsx2/GS/GSBlock.h" #include "pcsx2/GS/GSClut.h" #include "pcsx2/GS/MultiISA.h" -#include "common/emitter/tools.h" #include #include @@ -12,8 +11,6 @@ #ifdef MULTI_ISA_UNSHARED_COMPILATION -#include "common/emitter/tools.h" - enum class TestISA { isa_sse4,