diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index bff56de0a..77361aa54 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -611,6 +611,7 @@ static const vec128_t xmm_consts[] = { vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), /* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f), /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), + /* XMM3331 */ vec128f(3.0f, 3.0f, 3.0f, 1.0f), /* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f), /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), @@ -647,8 +648,6 @@ static const vec128_t xmm_consts[] = { vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu), /* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu), - /* XMMUnpackSHORT_2_Min */ - vec128i(0x403F8001u, 0x403F8001u, 0x00000000u, 0x00000000u), /* XMMUnpackSHORT_4 */ vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu), /* XMMPackUINT_2101010_MinUnpacked */ diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 5d9b91241..479fcc865 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -55,6 +55,7 @@ enum XmmConst { XMMNormalizeX16Y16, XMM0001, XMM3301, + XMM3331, XMM3333, XMMSignMaskPS, XMMSignMaskPD, @@ -76,7 +77,6 @@ enum XmmConst { XMMPackSHORT_2, XMMPackSHORT_4, XMMUnpackSHORT_2, - XMMUnpackSHORT_2_Min, XMMUnpackSHORT_4, XMMPackUINT_2101010_MinUnpacked, XMMPackUINT_2101010_MaxUnpacked, diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index b8b337230..d0eb88e11 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -7426,8 +7426,6 @@ struct UNPACK : Sequence> { e.vpsrad(i.dest, 16); // Add 3,3,0,1. e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301)); - // Clamp the absolute value to the maximum positive value. - e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMUnpackSHORT_2_Min)); } static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) { // (VD.x) = 3.0 + (VB.x>>16)*2^-22 @@ -7454,14 +7452,12 @@ struct UNPACK : Sequence> { e.vpsrad(i.dest, 16); // Add 3,3,3,3. e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333)); - // Clamp the absolute value to the maximum positive value. - e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_Min)); } static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) { Xmm src; if (i.src1.is_constant) { if (i.src1.value->IsConstantZero()) { - e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3333)); + e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3331)); return; } src = e.xmm0; @@ -7487,14 +7483,14 @@ struct UNPACK : Sequence> { // Remove higher duplicate components. e.vpand(i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MaskUnpacked)); } - // If negative, make smaller than 3 - sign extend XYZ before adding. + // If XYZ are negative, make smaller than 3 - sign extend XYZ before adding. + // W is unsigned. e.vpslld(i.dest, 22); e.vpsrad(i.dest, 22); - // Add 3,3,3,3. - e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333)); - // Clamp the absolute values of XYZ to the maximum positive value. - e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MinUnpacked)); - // To convert XYZ to -1 to 1, games multiply by 0x46004020 & add 0xC6C06030. + // Add 3,3,3,1. + e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3331)); + // To convert XYZ to -1 to 1, games multiply by 0x46004020 & sub 0x46C06030. + // For W to 0 to 1, they multiply by and subtract 0x4A2AAAAB. } static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) { assert_false(IsPackOutSaturate(flags));