Merge pull request #202 from Tilka/mathutil
MathUtil: add IntFloat/IntDouble constructors and test FlushToZero()
This commit is contained in:
commit
c929c3ebc1
|
@ -15,8 +15,7 @@ namespace MathUtil
|
||||||
u32 ClassifyDouble(double dvalue)
|
u32 ClassifyDouble(double dvalue)
|
||||||
{
|
{
|
||||||
// TODO: Optimize the below to be as fast as possible.
|
// TODO: Optimize the below to be as fast as possible.
|
||||||
IntDouble value;
|
IntDouble value(dvalue);
|
||||||
value.d = dvalue;
|
|
||||||
u64 sign = value.i & DOUBLE_SIGN;
|
u64 sign = value.i & DOUBLE_SIGN;
|
||||||
u64 exp = value.i & DOUBLE_EXP;
|
u64 exp = value.i & DOUBLE_EXP;
|
||||||
if (exp > DOUBLE_ZERO && exp < DOUBLE_EXP)
|
if (exp > DOUBLE_ZERO && exp < DOUBLE_EXP)
|
||||||
|
@ -55,8 +54,7 @@ u32 ClassifyDouble(double dvalue)
|
||||||
u32 ClassifyFloat(float fvalue)
|
u32 ClassifyFloat(float fvalue)
|
||||||
{
|
{
|
||||||
// TODO: Optimize the below to be as fast as possible.
|
// TODO: Optimize the below to be as fast as possible.
|
||||||
IntFloat value;
|
IntFloat value(fvalue);
|
||||||
value.f = fvalue;
|
|
||||||
u32 sign = value.i & FLOAT_SIGN;
|
u32 sign = value.i & FLOAT_SIGN;
|
||||||
u32 exp = value.i & FLOAT_EXP;
|
u32 exp = value.i & FLOAT_EXP;
|
||||||
if (exp > FLOAT_ZERO && exp < FLOAT_EXP)
|
if (exp > FLOAT_ZERO && exp < FLOAT_EXP)
|
||||||
|
|
|
@ -36,35 +36,41 @@ static const u32 FLOAT_SIGN = 0x80000000,
|
||||||
union IntDouble {
|
union IntDouble {
|
||||||
double d;
|
double d;
|
||||||
u64 i;
|
u64 i;
|
||||||
|
|
||||||
|
explicit IntDouble(u64 _i) : i(_i) {}
|
||||||
|
explicit IntDouble(double _d) : d(_d) {}
|
||||||
};
|
};
|
||||||
union IntFloat {
|
union IntFloat {
|
||||||
float f;
|
float f;
|
||||||
u32 i;
|
u32 i;
|
||||||
|
|
||||||
|
explicit IntFloat(u32 _i) : i(_i) {}
|
||||||
|
explicit IntFloat(float _f) : f(_f) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool IsINF(double d)
|
inline bool IsINF(double d)
|
||||||
{
|
{
|
||||||
IntDouble x; x.d = d;
|
IntDouble x(d);
|
||||||
return (x.i & ~DOUBLE_SIGN) == DOUBLE_EXP;
|
return (x.i & ~DOUBLE_SIGN) == DOUBLE_EXP;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool IsNAN(double d)
|
inline bool IsNAN(double d)
|
||||||
{
|
{
|
||||||
IntDouble x; x.d = d;
|
IntDouble x(d);
|
||||||
return ((x.i & DOUBLE_EXP) == DOUBLE_EXP) &&
|
return ((x.i & DOUBLE_EXP) == DOUBLE_EXP) &&
|
||||||
((x.i & DOUBLE_FRAC) != DOUBLE_ZERO);
|
((x.i & DOUBLE_FRAC) != DOUBLE_ZERO);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool IsQNAN(double d)
|
inline bool IsQNAN(double d)
|
||||||
{
|
{
|
||||||
IntDouble x; x.d = d;
|
IntDouble x(d);
|
||||||
return ((x.i & DOUBLE_EXP) == DOUBLE_EXP) &&
|
return ((x.i & DOUBLE_EXP) == DOUBLE_EXP) &&
|
||||||
((x.i & DOUBLE_QBIT) == DOUBLE_QBIT);
|
((x.i & DOUBLE_QBIT) == DOUBLE_QBIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool IsSNAN(double d)
|
inline bool IsSNAN(double d)
|
||||||
{
|
{
|
||||||
IntDouble x; x.d = d;
|
IntDouble x(d);
|
||||||
return ((x.i & DOUBLE_EXP) == DOUBLE_EXP) &&
|
return ((x.i & DOUBLE_EXP) == DOUBLE_EXP) &&
|
||||||
((x.i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
|
((x.i & DOUBLE_FRAC) != DOUBLE_ZERO) &&
|
||||||
((x.i & DOUBLE_QBIT) == DOUBLE_ZERO);
|
((x.i & DOUBLE_QBIT) == DOUBLE_ZERO);
|
||||||
|
@ -72,7 +78,7 @@ inline bool IsSNAN(double d)
|
||||||
|
|
||||||
inline float FlushToZero(float f)
|
inline float FlushToZero(float f)
|
||||||
{
|
{
|
||||||
IntFloat x; x.f = f;
|
IntFloat x(f);
|
||||||
if ((x.i & FLOAT_EXP) == 0)
|
if ((x.i & FLOAT_EXP) == 0)
|
||||||
{
|
{
|
||||||
x.i &= FLOAT_SIGN; // turn into signed zero
|
x.i &= FLOAT_SIGN; // turn into signed zero
|
||||||
|
@ -82,7 +88,7 @@ inline float FlushToZero(float f)
|
||||||
|
|
||||||
inline double FlushToZero(double d)
|
inline double FlushToZero(double d)
|
||||||
{
|
{
|
||||||
IntDouble x; x.d = d;
|
IntDouble x(d);
|
||||||
if ((x.i & DOUBLE_EXP) == 0)
|
if ((x.i & DOUBLE_EXP) == 0)
|
||||||
{
|
{
|
||||||
x.i &= DOUBLE_SIGN; // turn into signed zero
|
x.i &= DOUBLE_SIGN; // turn into signed zero
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
#include "Common/MathUtil.h"
|
#include "Common/MathUtil.h"
|
||||||
|
|
||||||
|
@ -29,7 +30,7 @@ TEST(MathUtil, Clamp)
|
||||||
|
|
||||||
TEST(MathUtil, IsINF)
|
TEST(MathUtil, IsINF)
|
||||||
{
|
{
|
||||||
EXPECT_TRUE(MathUtil::IsINF( std::numeric_limits<double>::infinity()));
|
EXPECT_TRUE(MathUtil::IsINF(+std::numeric_limits<double>::infinity()));
|
||||||
EXPECT_TRUE(MathUtil::IsINF(-std::numeric_limits<double>::infinity()));
|
EXPECT_TRUE(MathUtil::IsINF(-std::numeric_limits<double>::infinity()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,3 +64,43 @@ TEST(MathUtil, Log2)
|
||||||
EXPECT_EQ(3, Log2(15));
|
EXPECT_EQ(3, Log2(15));
|
||||||
EXPECT_EQ(63, Log2(0xFFFFFFFFFFFFFFFFull));
|
EXPECT_EQ(63, Log2(0xFFFFFFFFFFFFFFFFull));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(MathUtil, FlushToZero)
|
||||||
|
{
|
||||||
|
// To test the software implementation we need to make sure FTZ and DAZ are disabled.
|
||||||
|
// Using volatile here to ensure the compiler doesn't constant-fold it,
|
||||||
|
// we want the multiplication to occur at test runtime.
|
||||||
|
volatile float s = std::numeric_limits<float>::denorm_min();
|
||||||
|
volatile double d = std::numeric_limits<double>::denorm_min();
|
||||||
|
EXPECT_LT(0, s * 2);
|
||||||
|
EXPECT_LT(0, d * 2);
|
||||||
|
|
||||||
|
EXPECT_EQ(+0, MathUtil::FlushToZero(+std::numeric_limits<double>::denorm_min()));
|
||||||
|
EXPECT_EQ(-0, MathUtil::FlushToZero(-std::numeric_limits<double>::denorm_min()));
|
||||||
|
EXPECT_EQ(+0, MathUtil::FlushToZero(+std::numeric_limits<double>::min() / 2));
|
||||||
|
EXPECT_EQ(-0, MathUtil::FlushToZero(-std::numeric_limits<double>::min() / 2));
|
||||||
|
EXPECT_EQ(std::numeric_limits<double>::min(), MathUtil::FlushToZero(std::numeric_limits<double>::min()));
|
||||||
|
EXPECT_EQ(std::numeric_limits<double>::max(), MathUtil::FlushToZero(std::numeric_limits<double>::max()));
|
||||||
|
EXPECT_EQ(+std::numeric_limits<double>::infinity(), MathUtil::FlushToZero(+std::numeric_limits<double>::infinity()));
|
||||||
|
EXPECT_EQ(-std::numeric_limits<double>::infinity(), MathUtil::FlushToZero(-std::numeric_limits<double>::infinity()));
|
||||||
|
|
||||||
|
// Test all subnormals as well as an equally large set of random normal floats.
|
||||||
|
std::default_random_engine engine(0);
|
||||||
|
std::uniform_int_distribution<u32> dist(0x00800000u, 0x7fffffffu);
|
||||||
|
for (u32 i = 0; i <= 0x007fffffu; ++i)
|
||||||
|
{
|
||||||
|
MathUtil::IntFloat x(i);
|
||||||
|
EXPECT_EQ(+0, MathUtil::FlushToZero(x.f));
|
||||||
|
|
||||||
|
x.i = i | 0x80000000u;
|
||||||
|
EXPECT_EQ(-0, MathUtil::FlushToZero(x.f));
|
||||||
|
|
||||||
|
x.i = dist(engine);
|
||||||
|
MathUtil::IntFloat y(MathUtil::FlushToZero(x.f));
|
||||||
|
EXPECT_EQ(x.i, y.i);
|
||||||
|
|
||||||
|
x.i |= 0x80000000u;
|
||||||
|
y.f = MathUtil::FlushToZero(x.f);
|
||||||
|
EXPECT_EQ(x.i, y.i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue