Saturating unsigned VECTOR_ADD.
This commit is contained in:
parent
8619a15ee3
commit
5436cde0fc
|
@ -444,6 +444,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
/* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f),
|
||||||
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
/* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
||||||
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
|
/* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
|
||||||
|
/* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u),
|
||||||
};
|
};
|
||||||
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
// TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to
|
||||||
// prevent this move.
|
// prevent this move.
|
||||||
|
|
|
@ -52,6 +52,7 @@ enum XmmConst {
|
||||||
XMMOneOver255 = 13,
|
XMMOneOver255 = 13,
|
||||||
XMMShiftMaskPS = 14,
|
XMMShiftMaskPS = 14,
|
||||||
XMMShiftByteMask = 15,
|
XMMShiftByteMask = 15,
|
||||||
|
XMMUnsignedDwordMax = 16,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
// Unfortunately due to the design of xbyak we have to pass this to the ctor.
|
||||||
|
|
|
@ -2566,7 +2566,41 @@ EMITTER(VECTOR_ADD, MATCH(I<OPCODE_VECTOR_ADD, V128<>, V128<>, V128<>>)) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case INT32_TYPE:
|
||||||
XEASSERTALWAYS();
|
if (saturate) {
|
||||||
|
if (is_unsigned) {
|
||||||
|
// We reuse all these temps...
|
||||||
|
XEASSERT(src1 != e.xmm0 && src1 != e.xmm1 && src1 != e.xmm2);
|
||||||
|
XEASSERT(src2 != e.xmm0 && src2 != e.xmm1 && src2 != e.xmm2);
|
||||||
|
// Clamp to 0xFFFFFFFF.
|
||||||
|
// Wish there was a vpaddusd...
|
||||||
|
// | A | B | C | D |
|
||||||
|
// | B | D |
|
||||||
|
e.db(0xCC);
|
||||||
|
e.vpsllq(e.xmm0, src1, 32);
|
||||||
|
e.vpsllq(e.xmm1, src2, 32);
|
||||||
|
e.vpsrlq(e.xmm0, 32);
|
||||||
|
e.vpsrlq(e.xmm1, 32);
|
||||||
|
e.vpaddq(e.xmm0, e.xmm1);
|
||||||
|
e.vpcmpgtq(e.xmm0, e.GetXmmConstPtr(XMMUnsignedDwordMax));
|
||||||
|
e.vpsllq(e.xmm0, 32);
|
||||||
|
e.vpsrlq(e.xmm0, 32);
|
||||||
|
// | A | C |
|
||||||
|
e.vpsrlq(e.xmm1, src1, 32);
|
||||||
|
e.vpsrlq(e.xmm2, src2, 32);
|
||||||
|
e.vpaddq(e.xmm1, e.xmm2);
|
||||||
|
e.vpcmpgtq(e.xmm1, e.GetXmmConstPtr(XMMUnsignedDwordMax));
|
||||||
|
e.vpsllq(e.xmm1, 32);
|
||||||
|
// xmm0 = mask for with saturated dwords == 111...
|
||||||
|
e.vpor(e.xmm0, e.xmm1);
|
||||||
|
e.vpaddd(dest, src1, src2);
|
||||||
|
// dest.f[n] = xmm1.f[n] ? xmm1.f[n] : dest.f[n];
|
||||||
|
e.vblendvps(dest, dest, e.xmm1, e.xmm1);
|
||||||
|
} else {
|
||||||
|
XEASSERTALWAYS();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
e.vpaddd(dest, src1, src2);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case FLOAT32_TYPE:
|
case FLOAT32_TYPE:
|
||||||
e.vaddps(dest, src1, src2);
|
e.vaddps(dest, src1, src2);
|
||||||
|
|
Loading…
Reference in New Issue