[CPU] Bring back saturation in D3DCOLOR vpkd3d128

This commit is contained in:
Triang3l 2018-05-31 16:17:27 +03:00
parent 958aa04cc5
commit 06fcdef968
3 changed files with 12 additions and 14 deletions

View File

@ -626,6 +626,7 @@ static const vec128_t xmm_consts[] = {
vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu), vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu),
/* XMMPermuteControl15 */ vec128b(15), /* XMMPermuteControl15 */ vec128b(15),
/* XMMPermuteByteMask */ vec128b(0x1F), /* XMMPermuteByteMask */ vec128b(0x1F),
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
/* XMMPackD3DCOLOR */ /* XMMPackD3DCOLOR */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u), vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
/* XMMUnpackD3DCOLOR */ /* XMMUnpackD3DCOLOR */

View File

@ -64,6 +64,7 @@ enum XmmConst {
XMMByteOrderMask, XMMByteOrderMask,
XMMPermuteControl15, XMMPermuteControl15,
XMMPermuteByteMask, XMMPermuteByteMask,
XMMPackD3DCOLORSat,
XMMPackD3DCOLOR, XMMPackD3DCOLOR,
XMMUnpackD3DCOLOR, XMMUnpackD3DCOLOR,
XMMPackFLOAT16_2, XMMPackFLOAT16_2,

View File

@ -6964,24 +6964,20 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
} }
static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) { static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->IsConstantZero()); assert_true(i.src2.value->IsConstantZero());
// No saturation done here. // Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
// Unpacking D3DCOLOR gives (1.0f | bits), or from 3F800000 to 3F8000FF. // are valid.
// However, you can pack 3.0f + (value / (float) (1 << 22)), which creates if (i.src1.is_constant) {
// a number between 40400000 and 404000FF: e.LoadConstantXmm(i.dest, i.src1.constant());
// https://github.com/ValveSoftware/source-sdk-2013/blob/master/sp/src/public/pixelwriter.h#L648 e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
// With saturation, you will get 0 when re-packing after unpacking. } else {
// The above code also has to perform clamping explicitly. e.vminps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
}
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMM3333));
// Extract bytes. // Extract bytes.
// RGBA (XYZW) -> ARGB (WXYZ) // RGBA (XYZW) -> ARGB (WXYZ)
// w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) | // w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) |
// ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF) // ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF)
if (i.src1.is_constant) { e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
e.LoadConstantXmm(i.dest, i.src1.constant());
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
} else {
e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLOR));
}
} }
static __m128i EmulateFLOAT16_2(void*, __m128 src1) { static __m128i EmulateFLOAT16_2(void*, __m128 src1) {
alignas(16) float a[4]; alignas(16) float a[4];