From f14f44e7291cc6076644fbb4f6da862e5e3a8e23 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 19 Jul 2019 08:17:55 +0300 Subject: [PATCH] [CPU] Saturating VectorAdd/Sub of constants without DID_SATURATE yet --- src/xenia/cpu/hir/value.cc | 136 ++++++++++++++++++++++++++++++++++--- 1 file changed, 128 insertions(+), 8 deletions(-) diff --git a/src/xenia/cpu/hir/value.cc b/src/xenia/cpu/hir/value.cc index ed79d65a2..23fd12403 100644 --- a/src/xenia/cpu/hir/value.cc +++ b/src/xenia/cpu/hir/value.cc @@ -1202,7 +1202,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT8_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 16; i++) { + uint8_t src1 = constant.v128.u8[i]; + uint8_t src2 = other->constant.v128.u8[i]; + uint8_t result = src1 + src2; + if (is_unsigned) { + result |= -int8_t(result < src1); + } else { + uint8_t overflowed = (src1 >> 7) + INT8_MAX; + if (int8_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u8[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { for (int i = 0; i < 16; i++) { if (is_unsigned) { @@ -1215,7 +1230,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT16_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 8; i++) { + uint16_t src1 = constant.v128.u16[i]; + uint16_t src2 = other->constant.v128.u16[i]; + uint16_t result = src1 + src2; + if (is_unsigned) { + result |= -int16_t(result < src1); + } else { + uint16_t overflowed = (src1 >> 15) + INT16_MAX; + if (int16_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u16[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { for (int i = 0; i < 8; i++) { if (is_unsigned) { @@ -1228,7 +1258,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT32_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 4; i++) { + uint32_t src1 = constant.v128.u32[i]; + uint32_t src2 = other->constant.v128.u32[i]; + uint32_t result = src1 + src2; + if (is_unsigned) { + result |= -int32_t(result < src1); + } else { + uint32_t overflowed = (src1 >> 31) + INT32_MAX; + if (int32_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u32[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { for (int i = 0; i < 4; i++) { if (is_unsigned) { @@ -1241,7 +1286,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned, break; case INT64_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 2; i++) { + uint64_t src1 = constant.v128.u64[i]; + uint64_t src2 = other->constant.v128.u64[i]; + uint64_t result = src1 + src2; + if (is_unsigned) { + result |= -int64_t(result < src1); + } else { + uint64_t overflowed = (src1 >> 63) + INT64_MAX; + if (int64_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u64[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { if (is_unsigned) { constant.v128.u64[0] += other->constant.v128.u64[0]; @@ -1282,7 +1342,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT8_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 16; i++) { + uint8_t src1 = constant.v128.u8[i]; + uint8_t src2 = other->constant.v128.u8[i]; + uint8_t result = src1 - src2; + if (is_unsigned) { + result &= -int8_t(result <= src1); + } else { + uint8_t overflowed = (src1 >> 7) + INT8_MAX; + if (int8_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u8[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { for (int i = 0; i < 16; i++) { if (is_unsigned) { @@ -1295,7 +1370,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT16_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 8; i++) { + uint16_t src1 = constant.v128.u16[i]; + uint16_t src2 = other->constant.v128.u16[i]; + uint16_t result = src1 - src2; + if (is_unsigned) { + result &= -int16_t(result <= src1); + } else { + uint16_t overflowed = (src1 >> 15) + INT16_MAX; + if (int16_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u16[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { for (int i = 0; i < 8; i++) { if (is_unsigned) { @@ -1308,7 +1398,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT32_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 4; i++) { + uint32_t src1 = constant.v128.u32[i]; + uint32_t src2 = other->constant.v128.u32[i]; + uint32_t result = src1 - src2; + if (is_unsigned) { + result &= -int32_t(result <= src1); + } else { + uint32_t overflowed = (src1 >> 31) + INT32_MAX; + if (int32_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u32[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { for (int i = 0; i < 4; i++) { if (is_unsigned) { @@ -1321,7 +1426,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned, break; case INT64_TYPE: if (saturate) { - assert_always(); + // http://locklessinc.com/articles/sat_arithmetic/ + for (int i = 0; i < 2; i++) { + uint64_t src1 = constant.v128.u64[i]; + uint64_t src2 = other->constant.v128.u64[i]; + uint64_t result = src1 - src2; + if (is_unsigned) { + result &= -int64_t(result <= src1); + } else { + uint64_t overflowed = (src1 >> 63) + INT64_MAX; + if (int64_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) { + result = overflowed; + } + } + constant.v128.u64[i] = result; + } + // TODO(Triang3l): Trace DID_SATURATE. } else { if (is_unsigned) { constant.v128.u64[0] -= other->constant.v128.u64[0];