Saturating unsigned VECTOR_ADD.
This commit is contained in:
parent
8619a15ee3
commit
5436cde0fc
|
@ -444,6 +444,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
|||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
||||
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
||||
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
|
||||
/* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u),
|
||||
};
|
||||
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
||||
// prevent this move.
|
||||
|
|
|
@ -52,6 +52,7 @@ enum XmmConst {
|
|||
XMMOneOver255 = 13,
|
||||
XMMShiftMaskPS = 14,
|
||||
XMMShiftByteMask = 15,
|
||||
XMMUnsignedDwordMax = 16,
|
||||
};
|
||||
|
||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||
|
|
|
@ -2566,7 +2566,41 @@ EMITTER(VECTOR_ADD, MATCH(I<OPCODE_VECTOR_ADD, V128<>, V128<>, V128<>>)) {
|
|||
}
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
XEASSERTALWAYS();
|
||||
if (saturate) {
|
||||
if (is_unsigned) {
|
||||
// We reuse all these temps...
|
||||
XEASSERT(src1 != e.xmm0 && src1 != e.xmm1 && src1 != e.xmm2);
|
||||
XEASSERT(src2 != e.xmm0 && src2 != e.xmm1 && src2 != e.xmm2);
|
||||
// Clamp to 0xFFFFFFFF.
|
||||
// Wish there was a vpaddusd...
|
||||
// | A | B | C | D |
|
||||
// | B | D |
|
||||
e.db(0xCC);
|
||||
e.vpsllq(e.xmm0, src1, 32);
|
||||
e.vpsllq(e.xmm1, src2, 32);
|
||||
e.vpsrlq(e.xmm0, 32);
|
||||
e.vpsrlq(e.xmm1, 32);
|
||||
e.vpaddq(e.xmm0, e.xmm1);
|
||||
e.vpcmpgtq(e.xmm0, e.GetXmmConstPtr(XMMUnsignedDwordMax));
|
||||
e.vpsllq(e.xmm0, 32);
|
||||
e.vpsrlq(e.xmm0, 32);
|
||||
// | A | C |
|
||||
e.vpsrlq(e.xmm1, src1, 32);
|
||||
e.vpsrlq(e.xmm2, src2, 32);
|
||||
e.vpaddq(e.xmm1, e.xmm2);
|
||||
e.vpcmpgtq(e.xmm1, e.GetXmmConstPtr(XMMUnsignedDwordMax));
|
||||
e.vpsllq(e.xmm1, 32);
|
||||
// xmm0 = mask for with saturated dwords == 111...
|
||||
e.vpor(e.xmm0, e.xmm1);
|
||||
e.vpaddd(dest, src1, src2);
|
||||
// dest.f[n] = xmm1.f[n] ? xmm1.f[n] : dest.f[n];
|
||||
e.vblendvps(dest, dest, e.xmm1, e.xmm1);
|
||||
} else {
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
} else {
|
||||
e.vpaddd(dest, src1, src2);
|
||||
}
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.vaddps(dest, src1, src2);
|
||||
|
|
Loading…
Reference in New Issue