[JIT] Full vctuxs support
This commit is contained in:
parent
4766a5ec24
commit
0577b6c9be
|
@ -677,6 +677,7 @@ static const vec128_t xmm_consts[] = {
|
||||||
/* XMMIntMin */ vec128i(INT_MIN),
|
/* XMMIntMin */ vec128i(INT_MIN),
|
||||||
/* XMMIntMax */ vec128i(INT_MAX),
|
/* XMMIntMax */ vec128i(INT_MAX),
|
||||||
/* XMMIntMaxPD */ vec128d(INT_MAX),
|
/* XMMIntMaxPD */ vec128d(INT_MAX),
|
||||||
|
/* XMMPosIntMinPS */ vec128f((float)0x80000000u),
|
||||||
};
|
};
|
||||||
|
|
||||||
// First location to try and place constants.
|
// First location to try and place constants.
|
||||||
|
|
|
@ -95,6 +95,7 @@ enum XmmConst {
|
||||||
XMMIntMin,
|
XMMIntMin,
|
||||||
XMMIntMax,
|
XMMIntMax,
|
||||||
XMMIntMaxPD,
|
XMMIntMaxPD,
|
||||||
|
XMMPosIntMinPS,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||||
|
|
|
@ -1601,18 +1601,38 @@ struct VECTOR_CONVERT_F2I
|
||||||
: Sequence<VECTOR_CONVERT_F2I,
|
: Sequence<VECTOR_CONVERT_F2I,
|
||||||
I<OPCODE_VECTOR_CONVERT_F2I, V128Op, V128Op>> {
|
I<OPCODE_VECTOR_CONVERT_F2I, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
Xmm src1 = i.src1;
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||||
|
// clamp to min 0
|
||||||
|
e.vmaxps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMZero));
|
||||||
|
|
||||||
// Copy src1 if necessary.
|
// xmm1 = mask of values >= (unsigned)INT_MIN
|
||||||
bool copy_src1 = !!(i.instr->flags & ARITHMETIC_SATURATE);
|
e.vcmpgeps(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
|
||||||
if (copy_src1 && i.dest == i.src1) {
|
|
||||||
e.vmovdqa(e.xmm1, i.src1);
|
// scale any values >= (unsigned)INT_MIN back to [0, ...]
|
||||||
src1 = e.xmm1;
|
e.vsubps(e.xmm2, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
|
||||||
}
|
e.vandps(e.xmm2, e.xmm1, e.xmm2); // 0 if < (unsigned)INT_MIN
|
||||||
|
e.vandnps(e.xmm0, e.xmm1, e.xmm0); // 0 if >= (unsigned)INT_MIN
|
||||||
|
|
||||||
|
// xmm0 = [0, INT_MAX]
|
||||||
|
// this may still contain values > INT_MAX (if src has vals > UINT_MAX)
|
||||||
|
e.vorps(e.xmm0, e.xmm0, e.xmm2);
|
||||||
|
e.vcvttps2dq(i.dest, e.xmm0);
|
||||||
|
|
||||||
|
// xmm0 = mask of values that need saturation
|
||||||
|
e.vpcmpeqd(e.xmm0, i.dest, e.GetXmmConstPtr(XMMIntMin));
|
||||||
|
|
||||||
|
// scale values back above [INT_MIN, UINT_MAX]
|
||||||
|
e.vpand(e.xmm1, e.xmm1, e.GetXmmConstPtr(XMMIntMin));
|
||||||
|
e.vpaddd(i.dest, i.dest, e.xmm1);
|
||||||
|
|
||||||
|
// saturate values > UINT_MAX
|
||||||
|
e.vpor(i.dest, i.dest, e.xmm0);
|
||||||
|
} else {
|
||||||
|
Xmm src1 = e.xmm2;
|
||||||
|
e.vmovdqa(src1, i.src1); // Duplicate src1.
|
||||||
|
|
||||||
e.vcvttps2dq(i.dest, i.src1);
|
e.vcvttps2dq(i.dest, i.src1);
|
||||||
if (i.instr->flags & ARITHMETIC_SATURATE &&
|
|
||||||
!(i.instr->flags & ARITHMETIC_UNSIGNED)) {
|
|
||||||
// if dest is indeterminate and i.src1 >= 0 (i.e. !(i.src1 & 0x80000000))
|
// if dest is indeterminate and i.src1 >= 0 (i.e. !(i.src1 & 0x80000000))
|
||||||
// i.dest = 0x7FFFFFFF
|
// i.dest = 0x7FFFFFFF
|
||||||
e.vpcmpeqd(e.xmm0, i.dest, e.GetXmmConstPtr(XMMIntMin));
|
e.vpcmpeqd(e.xmm0, i.dest, e.GetXmmConstPtr(XMMIntMin));
|
||||||
|
@ -1621,8 +1641,6 @@ struct VECTOR_CONVERT_F2I
|
||||||
// (high bit of xmm0 = is ind. && i.src1 >= 0)
|
// (high bit of xmm0 = is ind. && i.src1 >= 0)
|
||||||
e.vblendvps(i.dest, i.dest, e.GetXmmConstPtr(XMMIntMax), e.xmm0);
|
e.vblendvps(i.dest, i.dest, e.GetXmmConstPtr(XMMIntMax), e.xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(DrChat): Unsigned saturation!
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_CONVERT_F2I, VECTOR_CONVERT_F2I);
|
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_CONVERT_F2I, VECTOR_CONVERT_F2I);
|
||||||
|
|
|
@ -554,7 +554,7 @@ int InstrEmit_vctsxs_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb,
|
||||||
float fuimm = static_cast<float>(std::exp2(uimm));
|
float fuimm = static_cast<float>(std::exp2(uimm));
|
||||||
Value* v =
|
Value* v =
|
||||||
f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
||||||
v = f.VectorConvertF2I(v, ARITHMETIC_SATURATE);
|
v = f.VectorConvertF2I(v);
|
||||||
f.StoreSAT(f.DidSaturate(v));
|
f.StoreSAT(f.DidSaturate(v));
|
||||||
f.StoreVR(vd, v);
|
f.StoreVR(vd, v);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -572,7 +572,7 @@ int InstrEmit_vctuxs_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb,
|
||||||
float fuimm = static_cast<float>(std::exp2(uimm));
|
float fuimm = static_cast<float>(std::exp2(uimm));
|
||||||
Value* v =
|
Value* v =
|
||||||
f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE));
|
||||||
v = f.VectorConvertF2I(v, ARITHMETIC_UNSIGNED | ARITHMETIC_SATURATE);
|
v = f.VectorConvertF2I(v, ARITHMETIC_UNSIGNED);
|
||||||
f.StoreSAT(f.DidSaturate(v));
|
f.StoreSAT(f.DidSaturate(v));
|
||||||
f.StoreVR(vd, v);
|
f.StoreVR(vd, v);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
# 0 * 2^31
|
||||||
|
test_vctuxs_1:
|
||||||
|
#_ REGISTER_IN v0 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
vctuxs v3, v0, 31
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
||||||
|
# -0 ^ 2^31
|
||||||
|
test_vctuxs_2:
|
||||||
|
#_ REGISTER_IN v0 [80000000, 80000000, 80000000, 80000000]
|
||||||
|
vctuxs v3, v0, 31
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [80000000, 80000000, 80000000, 80000000]
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
||||||
|
# smallest positive subnormal * 2^31
|
||||||
|
test_vctuxs_3:
|
||||||
|
#_ REGISTER_IN v0 [00000001, 00000001, 00000001, 00000001]
|
||||||
|
vctuxs v3, v0, 31
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [00000001, 00000001, 00000001, 00000001]
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
||||||
|
# largest subnormal * 2^31
|
||||||
|
test_vctuxs_4:
|
||||||
|
#_ REGISTER_IN v0 [007FFFFF, 007FFFFF, 007FFFFF, 007FFFFF]
|
||||||
|
vctuxs v3, v0, 31
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [007FFFFF, 007FFFFF, 007FFFFF, 007FFFFF]
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
||||||
|
# +1 * 2^0
|
||||||
|
test_vctuxs_5:
|
||||||
|
#_ REGISTER_IN v0 [3F800000, 3F800000, 3F800000, 3F800000]
|
||||||
|
vctuxs v3, v0, 0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [3F800000, 3F800000, 3F800000, 3F800000]
|
||||||
|
#_ REGISTER_OUT v3 [00000001, 00000001, 00000001, 00000001]
|
||||||
|
|
||||||
|
# -1 * 2^0
|
||||||
|
test_vctuxs_6:
|
||||||
|
#_ REGISTER_IN v0 [BF800000, BF800000, BF800000, BF800000]
|
||||||
|
vctuxs v3, v0, 0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [BF800000, BF800000, BF800000, BF800000]
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
||||||
|
|
||||||
|
# 2^31 * 2^0
|
||||||
|
test_vctuxs_7:
|
||||||
|
#_ REGISTER_IN v0 [4F000000, 4F000000, 4F000000, 4F000000]
|
||||||
|
vctuxs v3, v0, 0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [4F000000, 4F000000, 4F000000, 4F000000]
|
||||||
|
#_ REGISTER_OUT v3 [80000000, 80000000, 80000000, 80000000]
|
||||||
|
|
||||||
|
# 2^32 * 2^0
|
||||||
|
test_vctuxs_8:
|
||||||
|
#_ REGISTER_IN v0 [4F800000, 4F800000, 4F800000, 4F800000]
|
||||||
|
vctuxs v3, v0, 0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [4F800000, 4F800000, 4F800000, 4F800000]
|
||||||
|
#_ REGISTER_OUT v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
|
||||||
|
|
||||||
|
# +infinity * 2^0
|
||||||
|
test_vctuxs_9:
|
||||||
|
#_ REGISTER_IN v0 [7F800000, 7F800000, 7F800000, 7F800000]
|
||||||
|
vctuxs v3, v0, 0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [7F800000, 7F800000, 7F800000, 7F800000]
|
||||||
|
#_ REGISTER_OUT v3 [FFFFFFFF, FFFFFFFF, FFFFFFFF, FFFFFFFF]
|
||||||
|
|
||||||
|
# -infinity * 2^0
|
||||||
|
test_vctuxs_10:
|
||||||
|
#_ REGISTER_IN v0 [FF800000, FF800000, FF800000, FF800000]
|
||||||
|
vctuxs v3, v0, 0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v0 [FF800000, FF800000, FF800000, FF800000]
|
||||||
|
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
|
Loading…
Reference in New Issue