Merge pull request #1167 from Triang3l/vpkd3dcolorsat
[CPU] Bring back saturation in D3DCOLOR vpkd3d128
This commit is contained in:
commit
0dc8ed4411
|
@ -626,6 +626,7 @@ static const vec128_t xmm_consts[] = {
|
|||
vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu),
|
||||
/* XMMPermuteControl15 */ vec128b(15),
|
||||
/* XMMPermuteByteMask */ vec128b(0x1F),
|
||||
/* XMMPackD3DCOLORSat */ vec128i(0x404000FFu),
|
||||
/* XMMPackD3DCOLOR */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
|
||||
/* XMMUnpackD3DCOLOR */
|
||||
|
|
|
@ -64,6 +64,7 @@ enum XmmConst {
|
|||
XMMByteOrderMask,
|
||||
XMMPermuteControl15,
|
||||
XMMPermuteByteMask,
|
||||
XMMPackD3DCOLORSat,
|
||||
XMMPackD3DCOLOR,
|
||||
XMMUnpackD3DCOLOR,
|
||||
XMMPackFLOAT16_2,
|
||||
|
|
|
@ -6964,24 +6964,20 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
|||
}
|
||||
static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->IsConstantZero());
|
||||
// No saturation done here.
|
||||
// Unpacking D3DCOLOR gives (1.0f | bits), or from 3F800000 to 3F8000FF.
|
||||
// However, you can pack 3.0f + (value / (float) (1 << 22)), which creates
|
||||
// a number between 40400000 and 404000FF:
|
||||
// https://github.com/ValveSoftware/source-sdk-2013/blob/master/sp/src/public/pixelwriter.h#L648
|
||||
// With saturation, you will get 0 when re-packing after unpacking.
|
||||
// The above code also has to perform clamping explicitly.
|
||||
|
||||
// Saturate to [3,3....] so that only values between 3...[00] and 3...[FF]
|
||||
// are valid.
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(i.dest, i.src1.constant());
|
||||
e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
|
||||
} else {
|
||||
e.vminps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLORSat));
|
||||
}
|
||||
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMM3333));
|
||||
// Extract bytes.
|
||||
// RGBA (XYZW) -> ARGB (WXYZ)
|
||||
// w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) |
|
||||
// ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF)
|
||||
if (i.src1.is_constant) {
|
||||
e.LoadConstantXmm(i.dest, i.src1.constant());
|
||||
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
|
||||
} else {
|
||||
e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLOR));
|
||||
}
|
||||
}
|
||||
static __m128i EmulateFLOAT16_2(void*, __m128 src1) {
|
||||
alignas(16) float a[4];
|
||||
|
|
Loading…
Reference in New Issue