BizHawk/waterbox/ares64/ares/nall/float-env.hpp

150 lines
4.3 KiB
C++

#pragma once
#include <nall/platform.hpp>
//the c/c++ standard library fenv.h has numerous design and implementation flaws:
//- forces updates to both x87 and sse state on amd64
//- 'set' operations require a register read + modify + write
//- some implementations define api flags differently from native registers (msvc)
//- some implementations are so buggy they don't even use the correct registers (mingw/arm64)
//here we provide our own thin abstraction that falls back on fenv.h as a last resort.
//note: the state of the control register is cached, so changes made via external means
//will be reverted the next time it is modified by this wrapper.
namespace nall {
struct float_env {
#if defined(ARCHITECTURE_AMD64)
static constexpr u32 allExcept = _MM_EXCEPT_MASK;
static constexpr u32 denormal = _MM_EXCEPT_DENORM;
static constexpr u32 inexact = _MM_EXCEPT_INEXACT;
static constexpr u32 underflow = _MM_EXCEPT_UNDERFLOW;
static constexpr u32 overflow = _MM_EXCEPT_OVERFLOW;
static constexpr u32 divByZero = _MM_EXCEPT_DIV_ZERO;
static constexpr u32 invalid = _MM_EXCEPT_INVALID;
static constexpr u32 downward = _MM_ROUND_DOWN;
static constexpr u32 toNearest = _MM_ROUND_NEAREST;
static constexpr u32 towardZero = _MM_ROUND_TOWARD_ZERO;
static constexpr u32 upward = _MM_ROUND_UP;
#elif defined(ARCHITECTURE_ARM64)
static constexpr u32 allExcept = 0x3f;
static constexpr u32 denormal = 0x20;
static constexpr u32 inexact = 0x10;
static constexpr u32 underflow = 0x08;
static constexpr u32 overflow = 0x04;
static constexpr u32 divByZero = 0x02;
static constexpr u32 invalid = 0x01;
static constexpr u32 downward = 2 << 22;
static constexpr u32 toNearest = 0 << 22;
static constexpr u32 towardZero = 3 << 22;
static constexpr u32 upward = 1 << 22;
#else
static constexpr u32 allExcept = FE_ALL_EXCEPT;
#if defined(FE_DENORMAL)
static constexpr u32 denormal = FE_DENORMAL;
#else
static constexpr u32 denormal = 0;
#endif
static constexpr u32 inexact = FE_INEXACT;
static constexpr u32 underflow = FE_UNDERFLOW;
static constexpr u32 overflow = FE_OVERFLOW;
static constexpr u32 divByZero = FE_DIVBYZERO;
static constexpr u32 invalid = FE_INVALID;
static constexpr u32 downward = FE_DOWNWARD;
static constexpr u32 toNearest = FE_TONEAREST;
static constexpr u32 towardZero = FE_TOWARDZERO;
static constexpr u32 upward = FE_UPWARD;
#endif
static constexpr u32 roundMask = downward | toNearest | towardZero | upward;
u32 control = 0;
float_env() {
control = getControl();
}
auto setRound(u32 mode) -> void {
control &= ~roundMask;
control |= mode & roundMask;
setControl();
}
auto getRound() -> u32 {
return control & roundMask;
}
auto testExcept(u32 mask) -> u32 {
return getStatus() & mask & allExcept;
}
auto clearExcept() -> void {
clearStatus();
}
private:
auto getControl() -> u32 {
#if defined(ARCHITECTURE_AMD64)
return _mm_getcsr() & ~allExcept;
#elif defined(ARCHITECTURE_ARM64)
#if defined(COMPILER_MICROSOFT)
return _ReadStatusReg(ARM64_FPCR);
#else
u64 value;
__asm__ __volatile__("mrs %0, FPCR" : "=r"(value));
return value;
#endif
#else
return fegetround();
#endif
}
auto getStatus() -> u32 {
#if defined(ARCHITECTURE_AMD64)
return _mm_getcsr() & allExcept;
#elif defined(ARCHITECTURE_ARM64)
#if defined(COMPILER_MICROSOFT)
return _ReadStatusReg(ARM64_FPSR);
#else
u64 value;
__asm__ __volatile__("mrs %0, FPSR" : "=r"(value));
return value;
#endif
#else
return fetestexcept(allExcept);
#endif
}
auto setControl() -> void {
#if defined(ARCHITECTURE_AMD64)
_mm_setcsr(control | getStatus());
#elif defined(ARCHITECTURE_ARM64)
#if defined(COMPILER_MICROSOFT)
_WriteStatusReg(ARM64_FPCR, control);
#else
u64 value = control;
__asm__ __volatile__("msr FPCR, %0" : : "r"(value));
#endif
#else
fesetround(control & roundMask);
#endif
}
auto clearStatus() -> void {
#if defined(ARCHITECTURE_AMD64)
_mm_setcsr(control);
#elif defined(ARCHITECTURE_ARM64)
#if defined(COMPILER_MICROSOFT)
_WriteStatusReg(ARM64_FPSR, 0);
#else
u64 value = 0;
__asm__ __volatile__("msr FPSR, %0" : : "r"(value));
#endif
#else
feclearexcept(allExcept);
#endif
}
};
}