Merge pull request #1167 from Triang3l/vpkd3dcolorsat

[CPU] Bring back saturation in D3DCOLOR vpkd3d128
This commit is contained in:
Rick Gibbed 2018-05-31 08:19:46 -05:00 committed by GitHub
commit 0dc8ed4411
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 14 deletions

View File

@ -626,6 +626,7 @@ static const vec128_t xmm_consts[] = {
vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu),
/* XMMPermuteControl15 */ vec128b(15),
/* XMMPermuteByteMask */ vec128b(0x1F),
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
/* XMMPackD3DCOLOR */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
/* XMMUnpackD3DCOLOR */

View File

@ -64,6 +64,7 @@ enum XmmConst {
XMMByteOrderMask,
XMMPermuteControl15,
XMMPermuteByteMask,
XMMPackD3DCOLORSat,
XMMPackD3DCOLOR,
XMMUnpackD3DCOLOR,
XMMPackFLOAT16_2,

View File

@ -6964,24 +6964,20 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
}
static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->IsConstantZero());
// No saturation done here.
// Unpacking D3DCOLOR gives (1.0f | bits), or from 3F800000 to 3F8000FF.
// However, you can pack 3.0f + (value / (float) (1 << 22)), which creates
// a number between 40400000 and 404000FF:
// https://github.com/ValveSoftware/source-sdk-2013/blob/master/sp/src/public/pixelwriter.h#L648
// With saturation, you will get 0 when re-packing after unpacking.
// The above code also has to perform clamping explicitly.
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
// are valid.
if (i.src1.is_constant) {
e.LoadConstantXmm(i.dest, i.src1.constant());
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
} else {
e.vminps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
}
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMM3333));
// Extract bytes.
// RGBA (XYZW) -> ARGB (WXYZ)
// w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) |
// ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF)
if (i.src1.is_constant) {
e.LoadConstantXmm(i.dest, i.src1.constant());
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
} else {
e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLOR));
}
}
static __m128i EmulateFLOAT16_2(void*, __m128 src1) {
alignas(16) float a[4];