Merge branch 'master' into d3d12
This commit is contained in:
commit
0488766452
|
@ -14,6 +14,7 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include "xenia/base/platform.h"
|
#include "xenia/base/platform.h"
|
||||||
|
|
||||||
|
@ -302,6 +303,38 @@ int64_t m128_i64(const __m128& v) {
|
||||||
uint16_t float_to_half(float value);
|
uint16_t float_to_half(float value);
|
||||||
float half_to_float(uint16_t value);
|
float half_to_float(uint16_t value);
|
||||||
|
|
||||||
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
template <typename T>
|
||||||
|
inline T sat_add(T a, T b) {
|
||||||
|
using TU = std::make_unsigned<T>::type;
|
||||||
|
TU result = TU(a) + TU(b);
|
||||||
|
if (std::is_unsigned<T>::value) {
|
||||||
|
result |= TU(-static_cast<std::make_signed<T>::type>(result < TU(a)));
|
||||||
|
} else {
|
||||||
|
TU overflowed =
|
||||||
|
(TU(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits<T>::max();
|
||||||
|
if (T((overflowed ^ TU(b)) | ~(TU(b) ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return T(result);
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
inline T sat_sub(T a, T b) {
|
||||||
|
using TU = std::make_unsigned<T>::type;
|
||||||
|
TU result = TU(a) - TU(b);
|
||||||
|
if (std::is_unsigned<T>::value) {
|
||||||
|
result &= TU(-static_cast<std::make_signed<T>::type>(result <= TU(a)));
|
||||||
|
} else {
|
||||||
|
TU overflowed =
|
||||||
|
(TU(a) >> (sizeof(T) * 8 - 1)) + std::numeric_limits<T>::max();
|
||||||
|
if (T((overflowed ^ TU(b)) & (overflowed ^ result)) < 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return T(result);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
||||||
#endif // XENIA_BASE_MATH_H_
|
#endif // XENIA_BASE_MATH_H_
|
||||||
|
|
|
@ -1202,20 +1202,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT8_TYPE:
|
case INT8_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
uint8_t src1 = constant.v128.u8[i];
|
|
||||||
uint8_t src2 = other->constant.v128.u8[i];
|
|
||||||
uint8_t result = src1 + src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result |= -int8_t(result < src1);
|
constant.v128.u8[i] =
|
||||||
|
xe::sat_add(constant.v128.u8[i], other->constant.v128.u8[i]);
|
||||||
} else {
|
} else {
|
||||||
uint8_t overflowed = (src1 >> 7) + INT8_MAX;
|
constant.v128.i8[i] =
|
||||||
if (int8_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
xe::sat_add(constant.v128.i8[i], other->constant.v128.i8[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u8[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
@ -1230,20 +1224,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT16_TYPE:
|
case INT16_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
uint16_t src1 = constant.v128.u16[i];
|
|
||||||
uint16_t src2 = other->constant.v128.u16[i];
|
|
||||||
uint16_t result = src1 + src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result |= -int16_t(result < src1);
|
constant.v128.u16[i] =
|
||||||
|
xe::sat_add(constant.v128.u16[i], other->constant.v128.u16[i]);
|
||||||
} else {
|
} else {
|
||||||
uint16_t overflowed = (src1 >> 15) + INT16_MAX;
|
constant.v128.i16[i] =
|
||||||
if (int16_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
xe::sat_add(constant.v128.i16[i], other->constant.v128.i16[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u16[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
@ -1258,20 +1246,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
uint32_t src1 = constant.v128.u32[i];
|
|
||||||
uint32_t src2 = other->constant.v128.u32[i];
|
|
||||||
uint32_t result = src1 + src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result |= -int32_t(result < src1);
|
constant.v128.u32[i] =
|
||||||
|
xe::sat_add(constant.v128.u32[i], other->constant.v128.u32[i]);
|
||||||
} else {
|
} else {
|
||||||
uint32_t overflowed = (src1 >> 31) + INT32_MAX;
|
constant.v128.i32[i] =
|
||||||
if (int32_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
xe::sat_add(constant.v128.i32[i], other->constant.v128.i32[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u32[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
@ -1286,20 +1268,14 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT64_TYPE:
|
case INT64_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
uint64_t src1 = constant.v128.u64[i];
|
|
||||||
uint64_t src2 = other->constant.v128.u64[i];
|
|
||||||
uint64_t result = src1 + src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result |= -int64_t(result < src1);
|
constant.v128.u64[i] =
|
||||||
|
xe::sat_add(constant.v128.u64[i], other->constant.v128.u64[i]);
|
||||||
} else {
|
} else {
|
||||||
uint64_t overflowed = (src1 >> 63) + INT64_MAX;
|
constant.v128.i64[i] =
|
||||||
if (int64_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
xe::sat_add(constant.v128.i64[i], other->constant.v128.i64[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u64[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
@ -1342,20 +1318,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT8_TYPE:
|
case INT8_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
uint8_t src1 = constant.v128.u8[i];
|
|
||||||
uint8_t src2 = other->constant.v128.u8[i];
|
|
||||||
uint8_t result = src1 - src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result &= -int8_t(result <= src1);
|
constant.v128.u8[i] =
|
||||||
|
xe::sat_sub(constant.v128.u8[i], other->constant.v128.u8[i]);
|
||||||
} else {
|
} else {
|
||||||
uint8_t overflowed = (src1 >> 7) + INT8_MAX;
|
constant.v128.i8[i] =
|
||||||
if (int8_t((overflowed ^ src2) & (overflowed ^ result)) < 0) {
|
xe::sat_sub(constant.v128.i8[i], other->constant.v128.i8[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u8[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
@ -1370,20 +1340,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT16_TYPE:
|
case INT16_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
uint16_t src1 = constant.v128.u16[i];
|
|
||||||
uint16_t src2 = other->constant.v128.u16[i];
|
|
||||||
uint16_t result = src1 - src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result &= -int16_t(result <= src1);
|
constant.v128.u16[i] =
|
||||||
|
xe::sat_sub(constant.v128.u16[i], other->constant.v128.u16[i]);
|
||||||
} else {
|
} else {
|
||||||
uint16_t overflowed = (src1 >> 15) + INT16_MAX;
|
constant.v128.i16[i] =
|
||||||
if (int16_t((overflowed ^ src2) & (overflowed ^ result)) < 0) {
|
xe::sat_sub(constant.v128.i16[i], other->constant.v128.i16[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u16[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
@ -1398,20 +1362,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
uint32_t src1 = constant.v128.u32[i];
|
|
||||||
uint32_t src2 = other->constant.v128.u32[i];
|
|
||||||
uint32_t result = src1 - src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result &= -int32_t(result <= src1);
|
constant.v128.u32[i] =
|
||||||
|
xe::sat_sub(constant.v128.u32[i], other->constant.v128.u32[i]);
|
||||||
} else {
|
} else {
|
||||||
uint32_t overflowed = (src1 >> 31) + INT32_MAX;
|
constant.v128.i32[i] =
|
||||||
if (int32_t((overflowed ^ src2) & (overflowed ^ result)) < 0) {
|
xe::sat_sub(constant.v128.i32[i], other->constant.v128.i32[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u32[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
@ -1426,20 +1384,14 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT64_TYPE:
|
case INT64_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
uint64_t src1 = constant.v128.u64[i];
|
|
||||||
uint64_t src2 = other->constant.v128.u64[i];
|
|
||||||
uint64_t result = src1 - src2;
|
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
result &= -int64_t(result <= src1);
|
constant.v128.u64[i] =
|
||||||
|
xe::sat_sub(constant.v128.u64[i], other->constant.v128.u64[i]);
|
||||||
} else {
|
} else {
|
||||||
uint64_t overflowed = (src1 >> 63) + INT64_MAX;
|
constant.v128.i64[i] =
|
||||||
if (int64_t((overflowed ^ src2) & (overflowed ^ result)) < 0) {
|
xe::sat_sub(constant.v128.i64[i], other->constant.v128.i64[i]);
|
||||||
result = overflowed;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
constant.v128.u64[i] = result;
|
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Trace DID_SATURATE.
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue