Interpreter: software-based flush-to-zero

bDAZ is now called bFlushToZero to better reflect what it's actually
used for.

I decided not to support any hardware-based flush-to-zero on systems
that don't support this for both inputs _and_ outputs. It makes the code
cleaner and the intersection of CPUs that support SSE2 but not DAZ
should be very small.
This commit is contained in:
Tillmann Karras 2013-10-24 22:05:53 +02:00
parent 466a7afde3
commit cd069fdce1
5 changed files with 26 additions and 22 deletions

View File

@ -45,7 +45,10 @@ struct CPUInfo
bool bAES;
// FXSAVE/FXRSTOR
bool bFXSR;
bool bDAZ;
// This flag indicates that the hardware supports some mode
// in which denormal inputs _and_ outputs are automatically set to (signed) zero.
// TODO: ARM
bool bFlushToZero;
bool bLAHFSAHF64;
bool bLongMode;

View File

@ -64,10 +64,10 @@ inline float FlushToZero(float f)
return x.f;
}
inline double FlushToZeroAsFloat(double d)
inline double FlushToZero(double d)
{
IntDouble x; x.d = d;
if ((x.i & DOUBLE_EXP) < 0x3800000000000000ULL)
if ((x.i & DOUBLE_EXP) == 0)
x.i &= DOUBLE_SIGN; // turn into signed zero
return x.d;
}

View File

@ -162,6 +162,7 @@ void CPUInfo::Detect()
if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
if ((cpu_id[2] >> 25) & 1) bAES = true;
// To check DAZ support, we first need to check FXSAVE support.
if ((cpu_id[3] >> 24) & 1)
{
// We can use FXSAVE.
@ -181,7 +182,12 @@ void CPUInfo::Detect()
// lowest byte of MXCSR_MASK
if ((fx_state[0x1C] >> 6) & 1)
bDAZ = true;
{
// On x86, the FTZ field (supported since SSE1) only flushes denormal _outputs_ to zero,
// now that we checked DAZ support (flushing denormal _inputs_ to zero),
// we can set our generic flag.
bFlushToZero = true;
}
}
// AVX support requires 3 separate checks:

View File

@ -103,7 +103,7 @@ namespace FPURoundMode
};
if (nonIEEEMode)
{
csr |= denormalLUT[cpu_info.bDAZ];
csr |= denormalLUT[cpu_info.bFlushToZero];
}
_mm_setcsr(csr);
}

View File

@ -5,6 +5,7 @@
#ifndef _INTERPRETER_FPUTILS_H
#define _INTERPRETER_FPUTILS_H
#include "CPUDetect.h"
#include "Interpreter.h"
#include "MathUtil.h"
@ -69,28 +70,22 @@ inline void UpdateFPSCR()
inline double ForceSingle(double _x)
{
//if (FPSCR.RN != 0)
// PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC);
if (FPSCR.NI)
_x = FlushToZeroAsFloat(_x);
double x = static_cast<float>(_x);
// convert to float...
float x = _x;
if (!cpu_info.bFlushToZero && FPSCR.NI)
{
x = FlushToZero(x);
}
// ...and back to double:
return x;
}
inline double ForceDouble(double d)
{
//if (FPSCR.RN != 0)
// PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC);
//if (FPSCR.NI)
//{
// IntDouble x; x.d = d;
//if ((x.i & DOUBLE_EXP) == 0)
// x.i &= DOUBLE_SIGN; // turn into signed zero
// return x.d;
//}
if (!cpu_info.bFlushToZero && FPSCR.NI)
{
d = FlushToZero(d);
}
return d;
}