From 26c24b4726f40a785d59129455901e1da276e035 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 19 Jul 2019 23:12:11 +0300 Subject: [PATCH] [CPU] Move saturating add/sub to base/math.h --- src/xenia/base/math.h | 33 +++++++++++ src/xenia/cpu/hir/value.cc | 112 +++++++++++-------------------------- 2 files changed, 65 insertions(+), 80 deletions(-) diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index d2c58ee3a..06b0b5002 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "xenia/base/platform.h" @@ -302,6 +303,38 @@ int64_t m128_i64(const __m128& v) { uint16_t float_to_half(float value); float half_to_float(uint16_t value); +// http://locklessinc.com/articles/sat_arithmetic/ +template +inline T sat_add(T a, T b) { + using TU = std::make_unsigned::type; + TU result = TU(a) + TU(b); + if (std::is_unsigned::value) { + result |= TU(-static_cast::type>(result < TU(a))); + } else { + TU overflowed = + (TU(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits::max(); + if (T((overflowed ^ TU(b)) | ~(TU(b) ^ result)) >= 0) { + result = overflowed; + } + } + return T(result); +} +template +inline T sat_sub(T a, T b) { + using TU = std::make_unsigned::type; + TU result = TU(a) - TU(b); + if (std::is_unsigned::value) { + result &= TU(-static_cast::type>(result <= TU(a))); + } else { + TU overflowed = + (TU(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits::max(); + if (T((overflowed ^ TU(b)) & (overflowed ^ result)) < 0) { + result = overflowed; + } + } + return T(result); +} + } // namespace xe #endif // XENIA_BASE_MATH_H_ diff --git a/src/xenia/cpu/hir/value.cc b/src/xenia/cpu/hir/value.cc index 07d48ad12..2bf02bae1 100644 --- a/src/xenia/cpu/hir/value.cc +++ b/src/xenia/cpu/hir/value.cc @@ -1202,20 +1202,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT8_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 16; i++) { - uint8_t src1 = constant.v128.u8[i]; - uint8_t src2 = other->constant.v128.u8[i]; - uint8_t result = src1 + src2; if (is_unsigned) { - result |= -int8_t(result < src1); + constant.v128.u8[i] = + xe::sat_add(constant.v128.u8[i], other->constant.v128.u8[i]); } else { - uint8_t overflowed = (src1 >> 7) + INT8_MAX; - if (int8_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { - result = overflowed; - } + constant.v128.i8[i] = + xe::sat_add(constant.v128.i8[i], other->constant.v128.i8[i]); } - constant.v128.u8[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else { @@ -1230,20 +1224,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT16_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 8; i++) { - uint16_t src1 = constant.v128.u16[i]; - uint16_t src2 = other->constant.v128.u16[i]; - uint16_t result = src1 + src2; if (is_unsigned) { - result |= -int16_t(result < src1); + constant.v128.u16[i] = + xe::sat_add(constant.v128.u16[i], other->constant.v128.u16[i]); } else { - uint16_t overflowed = (src1 >> 15) + INT16_MAX; - if (int16_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { - result = overflowed; - } + constant.v128.i16[i] = + xe::sat_add(constant.v128.i16[i], other->constant.v128.i16[i]); } - constant.v128.u16[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else { @@ -1258,20 +1246,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT32_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 4; i++) { - uint32_t src1 = constant.v128.u32[i]; - uint32_t src2 = other->constant.v128.u32[i]; - uint32_t result = src1 + src2; if (is_unsigned) { - result |= -int32_t(result < src1); + constant.v128.u32[i] = + xe::sat_add(constant.v128.u32[i], other->constant.v128.u32[i]); } else { - uint32_t overflowed = (src1 >> 31) + INT32_MAX; - if (int32_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { - result = overflowed; - } + constant.v128.i32[i] = + xe::sat_add(constant.v128.i32[i], other->constant.v128.i32[i]); } - constant.v128.u32[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else { @@ -1286,20 +1268,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT64_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 2; i++) { - uint64_t src1 = constant.v128.u64[i]; - uint64_t src2 = other->constant.v128.u64[i]; - uint64_t result = src1 + src2; if (is_unsigned) { - result |= -int64_t(result < src1); + constant.v128.u64[i] = + xe::sat_add(constant.v128.u64[i], other->constant.v128.u64[i]); } else { - uint64_t overflowed = (src1 >> 63) + INT64_MAX; - if (int64_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { - result = overflowed; - } + constant.v128.i64[i] = + xe::sat_add(constant.v128.i64[i], other->constant.v128.i64[i]); } - constant.v128.u64[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else { @@ -1342,20 +1318,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT8_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 16; i++) { - uint8_t src1 = constant.v128.u8[i]; - uint8_t src2 = other->constant.v128.u8[i]; - uint8_t result = src1 - src2; if (is_unsigned) { - result &= -int8_t(result <= src1); + constant.v128.u8[i] = + xe::sat_sub(constant.v128.u8[i], other->constant.v128.u8[i]); } else { - uint8_t overflowed = (src1 >> 7) + INT8_MAX; - if (int8_t((overflowed ^ src2) & (overflowed ^ result)) < 0) { - result = overflowed; - } + constant.v128.i8[i] = + xe::sat_sub(constant.v128.i8[i], other->constant.v128.i8[i]); } - constant.v128.u8[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else { @@ -1370,20 +1340,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT16_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 8; i++) { - uint16_t src1 = constant.v128.u16[i]; - uint16_t src2 = other->constant.v128.u16[i]; - uint16_t result = src1 - src2; if (is_unsigned) { - result &= -int16_t(result <= src1); + constant.v128.u16[i] = + xe::sat_sub(constant.v128.u16[i], other->constant.v128.u16[i]); } else { - uint16_t overflowed = (src1 >> 15) + INT16_MAX; - if (int16_t((overflowed ^ src2) & (overflowed ^ result)) < 0) { - result = overflowed; - } + constant.v128.i16[i] = + xe::sat_sub(constant.v128.i16[i], other->constant.v128.i16[i]); } - constant.v128.u16[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else { @@ -1398,20 +1362,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT32_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 4; i++) { - uint32_t src1 = constant.v128.u32[i]; - uint32_t src2 = other->constant.v128.u32[i]; - uint32_t result = src1 - src2; if (is_unsigned) { - result &= -int32_t(result <= src1); + constant.v128.u32[i] = + xe::sat_sub(constant.v128.u32[i], other->constant.v128.u32[i]); } else { - uint32_t overflowed = (src1 >> 31) + INT32_MAX; - if (int32_t((overflowed ^ src2) & (overflowed ^ result)) < 0) { - result = overflowed; - } + constant.v128.i32[i] = + xe::sat_sub(constant.v128.i32[i], other->constant.v128.i32[i]); } - constant.v128.u32[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else { @@ -1426,20 +1384,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT64_TYPE: if (saturate) { - // http://locklessinc.com/articles/sat_arithmetic/ for (int i = 0; i < 2; i++) { - uint64_t src1 = constant.v128.u64[i]; - uint64_t src2 = other->constant.v128.u64[i]; - uint64_t result = src1 - src2; if (is_unsigned) { - result &= -int64_t(result <= src1); + constant.v128.u64[i] = + xe::sat_sub(constant.v128.u64[i], other->constant.v128.u64[i]); } else { - uint64_t overflowed = (src1 >> 63) + INT64_MAX; - if (int64_t((overflowed ^ src2) & (overflowed ^ result)) < 0) { - result = overflowed; - } + constant.v128.i64[i] = + xe::sat_sub(constant.v128.i64[i], other->constant.v128.i64[i]); } - constant.v128.u64[i] = result; } // TODO(Triang3l): Trace DID_SATURATE. } else {