dolphin/Source/Core/Common/ArmFPURoundMode.cpp

95 lines
2.5 KiB
C++

// Copyright 2021 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Common/CPUDetect.h"
#include "Common/CommonTypes.h"
#include "Common/FPURoundMode.h"
#include "Common/Logging/Log.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
static u64 GetFPCR()
{
#ifdef _MSC_VER
return _ReadStatusReg(ARM64_FPCR);
#else
u64 fpcr;
__asm__ __volatile__("mrs %0, fpcr" : "=r"(fpcr));
return fpcr;
#endif
}
static void SetFPCR(u64 fpcr)
{
#ifdef _MSC_VER
_WriteStatusReg(ARM64_FPCR, fpcr);
#else
__asm__ __volatile__("msr fpcr, %0" : : "ri"(fpcr));
#endif
}
namespace FPURoundMode
{
static const u64 default_fpcr = GetFPCR();
static u64 saved_fpcr = default_fpcr;
void SetRoundMode(int mode)
{
// We don't need to do anything here since SetSIMDMode is always called after calling this
}
void SetSIMDMode(int rounding_mode, bool non_ieee_mode)
{
// When AH is disabled, FZ controls flush-to-zero for both inputs and outputs. When AH is enabled,
// FZ controls flush-to-zero for outputs, and FIZ controls flush-to-zero for inputs.
constexpr u32 FZ = 1 << 24;
constexpr u32 AH = 1 << 1;
constexpr u32 FIZ = 1 << 0;
constexpr u32 flush_to_zero_mask = FZ | AH | FIZ;
// On CPUs with FEAT_AFP support, setting AH = 1, FZ = 1, FIZ = 0 emulates the GC/Wii CPU's
// "non-IEEE mode". Unfortunately, FEAT_AFP didn't exist until 2020, so we can't count on setting
// AH actually doing anything. But flushing both inputs and outputs seems to cause less problems
// than flushing nothing, so let's just set FZ and AH and roll with whatever behavior we get.
const u32 flush_to_zero_bits = (non_ieee_mode ? FZ | AH : 0);
static bool afp_warning_shown = false;
if (!afp_warning_shown && !cpu_info.bAFP && non_ieee_mode)
{
afp_warning_shown = true;
WARN_LOG_FMT(POWERPC,
"Non-IEEE mode was requested, but host CPU is not known to support FEAT_AFP");
}
// lookup table for FPSCR.RN-to-FPCR.RMode translation
constexpr u32 rounding_mode_table[] = {
(0 << 22), // nearest
(3 << 22), // zero
(1 << 22), // +inf
(2 << 22), // -inf
};
constexpr u32 rounding_mode_mask = 3 << 22;
const u32 rounding_mode_bits = rounding_mode_table[rounding_mode];
const u64 base = default_fpcr & ~(flush_to_zero_mask | rounding_mode_mask);
SetFPCR(base | rounding_mode_bits | flush_to_zero_bits);
}
void SaveSIMDState()
{
saved_fpcr = GetFPCR();
}
void LoadSIMDState()
{
SetFPCR(saved_fpcr);
}
void LoadDefaultSIMDState()
{
SetFPCR(default_fpcr);
}
} // namespace FPURoundMode