[CPU] Saturating VectorAdd/Sub of constants without DID_SATURATE yet
This commit is contained in:
parent
0bed6e565b
commit
f14f44e729
|
@ -1202,7 +1202,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT8_TYPE:
|
case INT8_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
uint8_t src1 = constant.v128.u8[i];
|
||||||
|
uint8_t src2 = other->constant.v128.u8[i];
|
||||||
|
uint8_t result = src1 + src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result |= -int8_t(result < src1);
|
||||||
|
} else {
|
||||||
|
uint8_t overflowed = (src1 >> 7) + INT8_MAX;
|
||||||
|
if (int8_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u8[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
|
@ -1215,7 +1230,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT16_TYPE:
|
case INT16_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
uint16_t src1 = constant.v128.u16[i];
|
||||||
|
uint16_t src2 = other->constant.v128.u16[i];
|
||||||
|
uint16_t result = src1 + src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result |= -int16_t(result < src1);
|
||||||
|
} else {
|
||||||
|
uint16_t overflowed = (src1 >> 15) + INT16_MAX;
|
||||||
|
if (int16_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u16[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
|
@ -1228,7 +1258,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
uint32_t src1 = constant.v128.u32[i];
|
||||||
|
uint32_t src2 = other->constant.v128.u32[i];
|
||||||
|
uint32_t result = src1 + src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result |= -int32_t(result < src1);
|
||||||
|
} else {
|
||||||
|
uint32_t overflowed = (src1 >> 31) + INT32_MAX;
|
||||||
|
if (int32_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u32[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
|
@ -1241,7 +1286,22 @@ void Value::VectorAdd(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT64_TYPE:
|
case INT64_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
uint64_t src1 = constant.v128.u64[i];
|
||||||
|
uint64_t src2 = other->constant.v128.u64[i];
|
||||||
|
uint64_t result = src1 + src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result |= -int64_t(result < src1);
|
||||||
|
} else {
|
||||||
|
uint64_t overflowed = (src1 >> 63) + INT64_MAX;
|
||||||
|
if (int64_t((overflowed ^ src2) | ~(src2 ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u64[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
constant.v128.u64[0] += other->constant.v128.u64[0];
|
constant.v128.u64[0] += other->constant.v128.u64[0];
|
||||||
|
@ -1282,7 +1342,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT8_TYPE:
|
case INT8_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
uint8_t src1 = constant.v128.u8[i];
|
||||||
|
uint8_t src2 = other->constant.v128.u8[i];
|
||||||
|
uint8_t result = src1 - src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result &= -int8_t(result <= src1);
|
||||||
|
} else {
|
||||||
|
uint8_t overflowed = (src1 >> 7) + INT8_MAX;
|
||||||
|
if (int8_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u8[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
|
@ -1295,7 +1370,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT16_TYPE:
|
case INT16_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
uint16_t src1 = constant.v128.u16[i];
|
||||||
|
uint16_t src2 = other->constant.v128.u16[i];
|
||||||
|
uint16_t result = src1 - src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result &= -int16_t(result <= src1);
|
||||||
|
} else {
|
||||||
|
uint16_t overflowed = (src1 >> 15) + INT16_MAX;
|
||||||
|
if (int16_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u16[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
|
@ -1308,7 +1398,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
uint32_t src1 = constant.v128.u32[i];
|
||||||
|
uint32_t src2 = other->constant.v128.u32[i];
|
||||||
|
uint32_t result = src1 - src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result &= -int32_t(result <= src1);
|
||||||
|
} else {
|
||||||
|
uint32_t overflowed = (src1 >> 31) + INT32_MAX;
|
||||||
|
if (int32_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u32[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
|
@ -1321,7 +1426,22 @@ void Value::VectorSub(Value* other, TypeName type, bool is_unsigned,
|
||||||
break;
|
break;
|
||||||
case INT64_TYPE:
|
case INT64_TYPE:
|
||||||
if (saturate) {
|
if (saturate) {
|
||||||
assert_always();
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
uint64_t src1 = constant.v128.u64[i];
|
||||||
|
uint64_t src2 = other->constant.v128.u64[i];
|
||||||
|
uint64_t result = src1 - src2;
|
||||||
|
if (is_unsigned) {
|
||||||
|
result &= -int64_t(result <= src1);
|
||||||
|
} else {
|
||||||
|
uint64_t overflowed = (src1 >> 63) + INT64_MAX;
|
||||||
|
if (int64_t((overflowed ^ src2) & (overflowed ^ result)) >= 0) {
|
||||||
|
result = overflowed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constant.v128.u64[i] = result;
|
||||||
|
}
|
||||||
|
// TODO(Triang3l): Trace DID_SATURATE.
|
||||||
} else {
|
} else {
|
||||||
if (is_unsigned) {
|
if (is_unsigned) {
|
||||||
constant.v128.u64[0] -= other->constant.v128.u64[0];
|
constant.v128.u64[0] -= other->constant.v128.u64[0];
|
||||||
|
|
Loading…
Reference in New Issue