150 lines
4.3 KiB
C++
150 lines
4.3 KiB
C++
#pragma once
|
|
|
|
#include <nall/platform.hpp>
|
|
|
|
//the c/c++ standard library fenv.h has numerous design and implementation flaws:
|
|
//- forces updates to both x87 and sse state on amd64
|
|
//- 'set' operations require a register read + modify + write
|
|
//- some implementations define api flags differently from native registers (msvc)
|
|
//- some implementations are so buggy they don't even use the correct registers (mingw/arm64)
|
|
|
|
//here we provide our own thin abstraction that falls back on fenv.h as a last resort.
|
|
//note: the state of the control register is cached, so changes made via external means
|
|
//will be reverted the next time it is modified by this wrapper.
|
|
|
|
namespace nall {
|
|
|
|
struct float_env {
|
|
#if defined(ARCHITECTURE_AMD64)
|
|
static constexpr u32 allExcept = _MM_EXCEPT_MASK;
|
|
static constexpr u32 denormal = _MM_EXCEPT_DENORM;
|
|
static constexpr u32 inexact = _MM_EXCEPT_INEXACT;
|
|
static constexpr u32 underflow = _MM_EXCEPT_UNDERFLOW;
|
|
static constexpr u32 overflow = _MM_EXCEPT_OVERFLOW;
|
|
static constexpr u32 divByZero = _MM_EXCEPT_DIV_ZERO;
|
|
static constexpr u32 invalid = _MM_EXCEPT_INVALID;
|
|
static constexpr u32 downward = _MM_ROUND_DOWN;
|
|
static constexpr u32 toNearest = _MM_ROUND_NEAREST;
|
|
static constexpr u32 towardZero = _MM_ROUND_TOWARD_ZERO;
|
|
static constexpr u32 upward = _MM_ROUND_UP;
|
|
#elif defined(ARCHITECTURE_ARM64)
|
|
static constexpr u32 allExcept = 0x3f;
|
|
static constexpr u32 denormal = 0x20;
|
|
static constexpr u32 inexact = 0x10;
|
|
static constexpr u32 underflow = 0x08;
|
|
static constexpr u32 overflow = 0x04;
|
|
static constexpr u32 divByZero = 0x02;
|
|
static constexpr u32 invalid = 0x01;
|
|
static constexpr u32 downward = 2 << 22;
|
|
static constexpr u32 toNearest = 0 << 22;
|
|
static constexpr u32 towardZero = 3 << 22;
|
|
static constexpr u32 upward = 1 << 22;
|
|
#else
|
|
static constexpr u32 allExcept = FE_ALL_EXCEPT;
|
|
#if defined(FE_DENORMAL)
|
|
static constexpr u32 denormal = FE_DENORMAL;
|
|
#else
|
|
static constexpr u32 denormal = 0;
|
|
#endif
|
|
static constexpr u32 inexact = FE_INEXACT;
|
|
static constexpr u32 underflow = FE_UNDERFLOW;
|
|
static constexpr u32 overflow = FE_OVERFLOW;
|
|
static constexpr u32 divByZero = FE_DIVBYZERO;
|
|
static constexpr u32 invalid = FE_INVALID;
|
|
static constexpr u32 downward = FE_DOWNWARD;
|
|
static constexpr u32 toNearest = FE_TONEAREST;
|
|
static constexpr u32 towardZero = FE_TOWARDZERO;
|
|
static constexpr u32 upward = FE_UPWARD;
|
|
#endif
|
|
static constexpr u32 roundMask = downward | toNearest | towardZero | upward;
|
|
|
|
u32 control = 0;
|
|
|
|
float_env() {
|
|
control = getControl();
|
|
}
|
|
|
|
auto setRound(u32 mode) -> void {
|
|
control &= ~roundMask;
|
|
control |= mode & roundMask;
|
|
setControl();
|
|
}
|
|
|
|
auto getRound() -> u32 {
|
|
return control & roundMask;
|
|
}
|
|
|
|
auto testExcept(u32 mask) -> u32 {
|
|
return getStatus() & mask & allExcept;
|
|
}
|
|
|
|
auto clearExcept() -> void {
|
|
clearStatus();
|
|
}
|
|
|
|
private:
|
|
auto getControl() -> u32 {
|
|
#if defined(ARCHITECTURE_AMD64)
|
|
return _mm_getcsr() & ~allExcept;
|
|
#elif defined(ARCHITECTURE_ARM64)
|
|
#if defined(COMPILER_MICROSOFT)
|
|
return _ReadStatusReg(ARM64_FPCR);
|
|
#else
|
|
u64 value;
|
|
__asm__ __volatile__("mrs %0, FPCR" : "=r"(value));
|
|
return value;
|
|
#endif
|
|
#else
|
|
return fegetround();
|
|
#endif
|
|
}
|
|
|
|
auto getStatus() -> u32 {
|
|
#if defined(ARCHITECTURE_AMD64)
|
|
return _mm_getcsr() & allExcept;
|
|
#elif defined(ARCHITECTURE_ARM64)
|
|
#if defined(COMPILER_MICROSOFT)
|
|
return _ReadStatusReg(ARM64_FPSR);
|
|
#else
|
|
u64 value;
|
|
__asm__ __volatile__("mrs %0, FPSR" : "=r"(value));
|
|
return value;
|
|
#endif
|
|
#else
|
|
return fetestexcept(allExcept);
|
|
#endif
|
|
}
|
|
|
|
auto setControl() -> void {
|
|
#if defined(ARCHITECTURE_AMD64)
|
|
_mm_setcsr(control | getStatus());
|
|
#elif defined(ARCHITECTURE_ARM64)
|
|
#if defined(COMPILER_MICROSOFT)
|
|
_WriteStatusReg(ARM64_FPCR, control);
|
|
#else
|
|
u64 value = control;
|
|
__asm__ __volatile__("msr FPCR, %0" : : "r"(value));
|
|
#endif
|
|
#else
|
|
fesetround(control & roundMask);
|
|
#endif
|
|
}
|
|
|
|
auto clearStatus() -> void {
|
|
#if defined(ARCHITECTURE_AMD64)
|
|
_mm_setcsr(control);
|
|
#elif defined(ARCHITECTURE_ARM64)
|
|
#if defined(COMPILER_MICROSOFT)
|
|
_WriteStatusReg(ARM64_FPSR, 0);
|
|
#else
|
|
u64 value = 0;
|
|
__asm__ __volatile__("msr FPSR, %0" : : "r"(value));
|
|
#endif
|
|
#else
|
|
feclearexcept(allExcept);
|
|
#endif
|
|
}
|
|
};
|
|
|
|
}
|