dolphin/Source/Core/Common/x64FPURoundMode.cpp

123 lines
3.1 KiB
C++

// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Common.h"
#include "Common/CPUDetect.h"
#include "Common/FPURoundMode.h"
#ifdef _WIN32
# include <mmintrin.h>
#else
# include <xmmintrin.h>
#endif
namespace FPURoundMode
{
// Get the default SSE states here.
static u32 saved_sse_state = _mm_getcsr();
static const u32 default_sse_state = _mm_getcsr();
void SetRoundMode(RoundModes mode)
{
// Set FPU rounding mode to mimic the PowerPC's
#ifdef _M_IX86
// This shouldn't really be needed anymore since we use SSE
#ifdef _WIN32
const int table[4] =
{
_RC_NEAR,
_RC_CHOP,
_RC_UP,
_RC_DOWN
};
_set_controlfp(_MCW_RC, table[mode]);
#else
const unsigned short X87_ROUND_MASK = 3 << 10;
const unsigned short x87_rounding_table[] =
{
0 << 10, // nearest
3 << 10, // zero
2 << 10, // +inf
1 << 10, // -inf
};
unsigned short _mode;
asm ("fstcw %0" : "=m" (_mode));
_mode = (_mode & ~X87_ROUND_MASK) | x87_rounding_table[mode];
asm ("fldcw %0" : : "m" (_mode));
#endif
#endif
}
void SetPrecisionMode(PrecisionModes mode)
{
#ifdef _M_IX86
// sets the floating-point lib to 53-bit
// PowerPC has a 53bit floating pipeline only
// eg: sscanf is very sensitive
#ifdef _WIN32
_control87(_PC_53, MCW_PC);
#else
const unsigned short PRECISION_MASK = 3 << 8;
const unsigned short precision_table[] = {
0 << 8, // 24 bits
2 << 8, // 53 bits
3 << 8, // 64 bits
};
unsigned short _mode;
asm ("fstcw %0" : "=m" (_mode));
_mode = (_mode & ~PRECISION_MASK) | precision_table[mode];
asm ("fldcw %0" : : "m" (_mode));
#endif
#else
//x64 doesn't need this - fpu is done with SSE
//but still - set any useful sse options here
#endif
}
void SetSIMDMode(RoundModes rounding_mode, bool non_ieee_mode)
{
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
const u32 EXCEPTION_MASK = 0x1F80;
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
const u32 DAZ = 0x40;
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
const u32 FTZ = 0x8000;
// lookup table for FPSCR.RN-to-MXCSR.RC translation
static const u32 simd_rounding_table[] =
{
(0 << 13) | EXCEPTION_MASK, // nearest
(3 << 13) | EXCEPTION_MASK, // -inf
(2 << 13) | EXCEPTION_MASK, // +inf
(1 << 13) | EXCEPTION_MASK, // zero
};
u32 csr = simd_rounding_table[rounding_mode];
// Some initial steppings of Pentium 4 CPUs support FTZ but not DAZ.
// They will not flush input operands but flushing outputs only is better than nothing.
static const u32 denormalLUT[2] =
{
FTZ, // flush-to-zero only
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
};
if (non_ieee_mode)
{
csr |= denormalLUT[cpu_info.bFlushToZero];
}
_mm_setcsr(csr);
}
void SaveSIMDState()
{
saved_sse_state = _mm_getcsr();
}
void LoadSIMDState()
{
_mm_setcsr(saved_sse_state);
}
void LoadDefaultSIMDState()
{
_mm_setcsr(default_sse_state);
}
}