JIT - vctsxs: Properly saturate signed integers
This commit is contained in:
parent
ea89a5d179
commit
d5e7bc8dca
|
@ -670,9 +670,9 @@ static const vec128_t xmm_consts[] = {
|
|||
0x80000000u),
|
||||
/* XMMShortMinPS */ vec128f(SHRT_MIN),
|
||||
/* XMMShortMaxPS */ vec128f(SHRT_MAX),
|
||||
/* XMMIntMaxPS */ vec128f(float(INT_MAX)),
|
||||
/* XMMIntMin */ vec128i(INT_MIN),
|
||||
/* XMMIntMax */ vec128i(INT_MAX),
|
||||
/* XMMIntMaxPD */ vec128d(INT_MAX),
|
||||
/* XMMInt64MaxPD */ vec128d(double(INT64_MAX)),
|
||||
};
|
||||
|
||||
// First location to try and place constants.
|
||||
|
|
|
@ -89,9 +89,9 @@ enum XmmConst {
|
|||
XMMSignMaskF32,
|
||||
XMMShortMinPS,
|
||||
XMMShortMaxPS,
|
||||
XMMIntMaxPS,
|
||||
XMMIntMin,
|
||||
XMMIntMax,
|
||||
XMMIntMaxPD,
|
||||
XMMInt64MaxPD,
|
||||
};
|
||||
|
||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||
|
|
|
@ -1429,8 +1429,11 @@ struct CONVERT_I32_F32
|
|||
: Sequence<CONVERT_I32_F32, I<OPCODE_CONVERT, I32Op, F32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
||||
e.vminss(e.xmm0, i.src1, e.GetXmmConstPtr(XMMIntMaxPS));
|
||||
e.vcvtss2si(i.dest, e.xmm0);
|
||||
if (i.instr->flags == ROUND_TO_ZERO) {
|
||||
e.vcvttss2si(i.dest, e.xmm0);
|
||||
} else {
|
||||
e.vcvtss2si(i.dest, e.xmm0);
|
||||
}
|
||||
}
|
||||
};
|
||||
struct CONVERT_I32_F64
|
||||
|
@ -1440,14 +1443,22 @@ struct CONVERT_I32_F64
|
|||
// PPC saturates the value instead.
|
||||
// So, we can clamp the double value to (double)0x7FFFFFFF.
|
||||
e.vminsd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMIntMaxPD));
|
||||
e.vcvttsd2si(i.dest, e.xmm0);
|
||||
if (i.instr->flags == ROUND_TO_ZERO) {
|
||||
e.vcvttsd2si(i.dest, e.xmm0);
|
||||
} else {
|
||||
e.vcvtsd2si(i.dest, e.xmm0);
|
||||
}
|
||||
}
|
||||
};
|
||||
struct CONVERT_I64_F64
|
||||
: Sequence<CONVERT_I64_F64, I<OPCODE_CONVERT, I64Op, F64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vminsd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMInt64MaxPD));
|
||||
e.vcvttsd2si(i.dest, e.xmm0);
|
||||
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
||||
if (i.instr->flags == ROUND_TO_ZERO) {
|
||||
e.vcvttsd2si(i.dest, e.xmm0);
|
||||
} else {
|
||||
e.vcvtsd2si(i.dest, e.xmm0);
|
||||
}
|
||||
}
|
||||
};
|
||||
struct CONVERT_F32_I32
|
||||
|
@ -1568,13 +1579,28 @@ struct VECTOR_CONVERT_F2I
|
|||
: Sequence<VECTOR_CONVERT_F2I,
|
||||
I<OPCODE_VECTOR_CONVERT_F2I, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// flags = ARITHMETIC_UNSIGNED | ARITHMETIC_UNSIGNED
|
||||
// TODO(benvanik): are these really the same? VC++ thinks so.
|
||||
e.vcvttps2dq(i.dest, i.src1);
|
||||
if (i.instr->flags & ARITHMETIC_SATURATE) {
|
||||
// TODO(benvanik): check saturation.
|
||||
// In theory cvt throws if it saturates.
|
||||
Xmm src1 = i.src1;
|
||||
|
||||
// Copy src1 if necessary.
|
||||
bool copy_src1 = !!(i.instr->flags & ARITHMETIC_SATURATE);
|
||||
if (copy_src1 && i.dest == i.src1) {
|
||||
e.vmovdqa(e.xmm1, i.src1);
|
||||
src1 = e.xmm1;
|
||||
}
|
||||
|
||||
e.vcvttps2dq(i.dest, i.src1);
|
||||
if (i.instr->flags & ARITHMETIC_SATURATE &&
|
||||
!(i.instr->flags & ARITHMETIC_UNSIGNED)) {
|
||||
// if dest is indeterminate and i.src1 >= 0 (i.e. !(i.src1 & 0x80000000))
|
||||
// i.dest = 0x7FFFFFFF
|
||||
e.vpcmpeqd(e.xmm0, i.dest, e.GetXmmConstPtr(XMMIntMin));
|
||||
e.vpandn(e.xmm0, src1, e.xmm0);
|
||||
|
||||
// (high bit of xmm0 = is ind. && i.src1 >= 0)
|
||||
e.vblendvps(i.dest, i.dest, e.GetXmmConstPtr(XMMIntMax), e.xmm0);
|
||||
}
|
||||
|
||||
// TODO(DrChat): Unsigned saturation!
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_CONVERT_F2I, VECTOR_CONVERT_F2I);
|
||||
|
|
|
@ -32,6 +32,7 @@ enum RoundMode {
|
|||
ROUND_TO_NEAREST,
|
||||
ROUND_TO_MINUS_INFINITY,
|
||||
ROUND_TO_POSITIVE_INFINITY,
|
||||
ROUND_DYNAMIC, // Round based on the host's rounding mode.
|
||||
};
|
||||
|
||||
enum LoadStoreFlags {
|
||||
|
|
|
@ -1,32 +1,71 @@
|
|||
# 0 * 2^31
|
||||
test_vctsxs_1:
|
||||
#_ REGISTER_IN v3 [3f800000, 3fc00000, 3f8ccccd, 3ff33333]
|
||||
# 1.0, 1.5, 1.1, 1.9
|
||||
vctsxs v3, v3, 0
|
||||
#_ REGISTER_IN v0 [00000000, 00000000, 00000000, 00000000]
|
||||
vctsxs v3, v0, 31
|
||||
blr
|
||||
#_ REGISTER_OUT v0 [00000000, 00000000, 00000000, 00000000]
|
||||
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||
|
||||
# -0 ^ 2^31
|
||||
test_vctsxs_2:
|
||||
#_ REGISTER_IN v0 [80000000, 80000000, 80000000, 80000000]
|
||||
vctsxs v3, v0, 31
|
||||
blr
|
||||
#_ REGISTER_OUT v0 [80000000, 80000000, 80000000, 80000000]
|
||||
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||
|
||||
# smallest positive subnormal * 2^31
|
||||
test_vctsxs_3:
|
||||
#_ REGISTER_IN v0 [00000001, 00000001, 00000001, 00000001]
|
||||
vctsxs v3, v0, 31
|
||||
blr
|
||||
#_ REGISTER_OUT v0 [00000001, 00000001, 00000001, 00000001]
|
||||
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||
|
||||
# largest subnormal * 2^31
|
||||
test_vctsxs_4:
|
||||
#_ REGISTER_IN v0 [007FFFFF, 007FFFFF, 007FFFFF, 007FFFFF]
|
||||
vctsxs v3, v0, 31
|
||||
blr
|
||||
#_ REGISTER_OUT v0 [007FFFFF, 007FFFFF, 007FFFFF, 007FFFFF]
|
||||
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||
|
||||
# +1 * 2^0
|
||||
test_vctsxs_5:
|
||||
#_ REGISTER_IN v0 [3F800000, 3F800000, 3F800000, 3F800000]
|
||||
vctsxs v3, v0, 0
|
||||
blr
|
||||
#_ REGISTER_OUT v0 [3F800000, 3F800000, 3F800000, 3F800000]
|
||||
#_ REGISTER_OUT v3 [00000001, 00000001, 00000001, 00000001]
|
||||
|
||||
test_vctsxs_2:
|
||||
#_ REGISTER_IN v3 [3f800000, 3fc00000, 3f8ccccd, 3ff33333]
|
||||
# 1.0, 1.5, 1.1, 1.9
|
||||
vctsxs v3, v3, 1
|
||||
# -1 * 2^0
|
||||
test_vctsxs_6:
|
||||
#_ REGISTER_IN v0 [BF800000, BF800000, BF800000, BF800000]
|
||||
vctsxs v3, v0, 0
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [00000002, 00000003, 00000002, 00000003]
|
||||
#_ REGISTER_OUT v0 [BF800000, BF800000, BF800000, BF800000]
|
||||
#_ REGISTER_OUT v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
|
||||
|
||||
test_vctsxs_3:
|
||||
#_ REGISTER_IN v3 [3f800000, 3fc00000, 3f8ccccd, 3ff33333]
|
||||
# 1.0, 1.5, 1.1, 1.9
|
||||
vctsxs v3, v3, 2
|
||||
# 2^31 * 2^0
|
||||
test_vctsxs_7:
|
||||
#_ REGISTER_IN v0 [4F000000, 4F000000, 4F000000, 4F000000]
|
||||
vctsxs v3, v0, 0
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [00000004, 00000006, 00000004, 00000007]
|
||||
#_ REGISTER_OUT v0 [4F000000, 4F000000, 4F000000, 4F000000]
|
||||
#_ REGISTER_OUT v3 [7FFFFFFF, 7FFFFFFF, 7FFFFFFF, 7FFFFFFF]
|
||||
|
||||
test_vctsxs_4:
|
||||
#_ REGISTER_IN v3 [42c83333, 43480000, 449a4000, c49a4000]
|
||||
vctsxs v3, v3, 0
|
||||
# +infinity * 2^0
|
||||
test_vctsxs_8:
|
||||
#_ REGISTER_IN v0 [7F800000, 7F800000, 7F800000, 7F800000]
|
||||
vctsxs v3, v0, 0
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [00000064, 000000c8, 000004d2, fffffb2e]
|
||||
#_ REGISTER_OUT v0 [7F800000, 7F800000, 7F800000, 7F800000]
|
||||
#_ REGISTER_OUT v3 [7FFFFFFF, 7FFFFFFF, 7FFFFFFF, 7FFFFFFF]
|
||||
|
||||
test_vctsxs_5:
|
||||
#_ REGISTER_IN v3 [42c83333, 43480000, 449a4000, c49a4000]
|
||||
vctsxs v3, v3, 1
|
||||
# -infinity * 2^0
|
||||
test_vctsxs_9:
|
||||
#_ REGISTER_IN v0 [FF800000, FF800000, FF800000, FF800000]
|
||||
vctsxs v3, v0, 0
|
||||
blr
|
||||
#_ REGISTER_OUT v3 [000000c8, 00000190, 000009a4, fffff65c]
|
||||
#_ REGISTER_OUT v0 [FF800000, FF800000, FF800000, FF800000]
|
||||
#_ REGISTER_OUT v3 [80000000, 80000000, 80000000, 80000000]
|
||||
|
|
Loading…
Reference in New Issue