diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 684fcaa86..3ee6ab59c 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -431,8 +431,8 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { /* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f), /* XMMOne */ vec128f(1.0f, 1.0f, 1.0f, 1.0f), /* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f), - /* XMMMaskX16Y16 */ vec128i(0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000), - /* XMMFlipX16Y16 */ vec128i(0x00008000, 0x00000000, 0x00000000, 0x00000000), + /* XMMMaskX16Y16 */ vec128i(0x0000FFFFu, 0xFFFF0000u, 0x00000000u, 0x00000000u), + /* XMMFlipX16Y16 */ vec128i(0x00008000u, 0x00000000u, 0x00000000u, 0x00000000u), /* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f), /* XMMNormalizeX16Y16 */ vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), @@ -440,11 +440,13 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) { /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u), /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu), /* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15), - /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02), + /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u), + /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02u, 0xFFFFFF01u, 0xFFFFFF00u, 0xFFFFFF03u), /* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f), /* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu), /* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu), /* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u), + /* XMM255 */ vec128f(255.0f, 255.0f, 255.0f, 255.0f), }; // TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to // prevent this move. diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index 3ac92be3f..4b05e5134 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -48,11 +48,13 @@ enum XmmConst { XMMSignMaskPD = 9, XMMByteSwapMask = 10, XMMPermuteControl15 = 11, - XMMUnpackD3DCOLOR = 12, - XMMOneOver255 = 13, - XMMShiftMaskPS = 14, - XMMShiftByteMask = 15, - XMMUnsignedDwordMax = 16, + XMMPackD3DCOLOR = 12, + XMMUnpackD3DCOLOR = 13, + XMMOneOver255 = 14, + XMMShiftMaskPS = 15, + XMMShiftByteMask = 16, + XMMUnsignedDwordMax = 17, + XMM255 = 18, }; // Unfortunately due to the design of xbyak we have to pass this to the ctor. diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index d60404aa1..3c5da0755 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -4582,7 +4582,27 @@ EMITTER(PACK, MATCH(I, V128<>>)) { } } static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) { - XEASSERTALWAYS(); + // RGBA (XYZW) -> ARGB (WXYZ) + // float r = roundf(((src1.x < 0) ? 0 : ((1 < src1.x) ? 1 : src1.x)) * 255); + // float g = roundf(((src1.y < 0) ? 0 : ((1 < src1.y) ? 1 : src1.y)) * 255); + // float b = roundf(((src1.z < 0) ? 0 : ((1 < src1.z) ? 1 : src1.z)) * 255); + // float a = roundf(((src1.w < 0) ? 0 : ((1 < src1.w) ? 1 : src1.w)) * 255); + // dest.iw = ((uint32_t)a << 24) | + // ((uint32_t)r << 16) | + // ((uint32_t)g << 8) | + // ((uint32_t)b); + // f2i(clamp(src, 0, 1) * 255) + e.vpxor(e.xmm0, e.xmm0); + if (i.src1.is_constant) { + e.LoadConstantXmm(e.xmm1, i.src1.constant()); + e.vmaxps(e.xmm0, e.xmm1); + } else { + e.vmaxps(e.xmm0, i.src1); + } + e.vminps(e.xmm0, e.GetXmmConstPtr(XMMOne)); + e.vmulps(e.xmm0, e.GetXmmConstPtr(XMM255)); + e.vcvttps2dq(e.xmm0, e.xmm0); + e.vpshufb(i.dest, e.xmm0, e.GetXmmConstPtr(XMMPackD3DCOLOR)); } static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) { XEASSERTALWAYS();