From 06fcdef96825a9dd4f94dcdda605d696227a8758 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 31 May 2018 16:17:27 +0300 Subject: [PATCH] [CPU] Bring back saturation in D3DCOLOR vpkd3d128 --- src/xenia/cpu/backend/x64/x64_emitter.cc | 1 + src/xenia/cpu/backend/x64/x64_emitter.h | 1 + src/xenia/cpu/backend/x64/x64_sequences.cc | 24 +++++++++------------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 6479da0e3..bff56de0a 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -626,6 +626,7 @@ static const vec128_t xmm_consts[] = { vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu), /* XMMPermuteControl15 */ vec128b(15), /* XMMPermuteByteMask */ vec128b(0x1F), + /* XMMPackD3DCOLORSat */ vec128i(0x404000FFu), /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u), /* XMMUnpackD3DCOLOR */ diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 039d67509..5d9b91241 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -64,6 +64,7 @@ enum XmmConst { XMMByteOrderMask, XMMPermuteControl15, XMMPermuteByteMask, + XMMPackD3DCOLORSat, XMMPackD3DCOLOR, XMMUnpackD3DCOLOR, XMMPackFLOAT16_2, diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 84b91ea10..b8b337230 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -6964,24 +6964,20 @@ struct PACK : Sequence> { } static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) { assert_true(i.src2.value->IsConstantZero()); - // No saturation done here. - // Unpacking D3DCOLOR gives (1.0f | bits), or from 3F800000 to 3F8000FF. - // However, you can pack 3.0f + (value / (float) (1 << 22)), which creates - // a number between 40400000 and 404000FF: - // https://github.com/ValveSoftware/source-sdk-2013/blob/master/sp/src/public/pixelwriter.h#L648 - // With saturation, you will get 0 when re-packing after unpacking. - // The above code also has to perform clamping explicitly. - + // Saturate to [3,3....] so that only values between 3...[00] and 3...[FF] + // are valid. + if (i.src1.is_constant) { + e.LoadConstantXmm(i.dest, i.src1.constant()); + e.vminps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLORSat)); + } else { + e.vminps(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLORSat)); + } + e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMM3333)); // Extract bytes. // RGBA (XYZW) -> ARGB (WXYZ) // w = ((src1.uw & 0xFF) << 24) | ((src1.ux & 0xFF) << 16) | // ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF) - if (i.src1.is_constant) { - e.LoadConstantXmm(i.dest, i.src1.constant()); - e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR)); - } else { - e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMPackD3DCOLOR)); - } + e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR)); } static __m128i EmulateFLOAT16_2(void*, __m128 src1) { alignas(16) float a[4];