[CPU] Fix W in 2101010 vupkd3d and revert untested saturation

This commit is contained in:
Triang3l 2018-06-11 17:22:15 +03:00
parent 5f16e46282
commit 0378f532ab
3 changed files with 9 additions and 14 deletions

View File

@ -611,6 +611,7 @@ static const vec128_t xmm_consts[] = {
vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f), /* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* XMM3331 */ vec128f(3.0f, 3.0f, 3.0f, 1.0f),
/* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f), /* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
/* XMMSignMaskPS */ /* XMMSignMaskPS */
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
@ -647,8 +648,6 @@ static const vec128_t xmm_consts[] = {
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu), vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu),
/* XMMUnpackSHORT_2 */ /* XMMUnpackSHORT_2 */
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu), vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
/* XMMUnpackSHORT_2_Min */
vec128i(0x403F8001u, 0x403F8001u, 0x00000000u, 0x00000000u),
/* XMMUnpackSHORT_4 */ /* XMMUnpackSHORT_4 */
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu), vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
/* XMMPackUINT_2101010_MinUnpacked */ /* XMMPackUINT_2101010_MinUnpacked */

View File

@ -55,6 +55,7 @@ enum XmmConst {
XMMNormalizeX16Y16, XMMNormalizeX16Y16,
XMM0001, XMM0001,
XMM3301, XMM3301,
XMM3331,
XMM3333, XMM3333,
XMMSignMaskPS, XMMSignMaskPS,
XMMSignMaskPD, XMMSignMaskPD,
@ -76,7 +77,6 @@ enum XmmConst {
XMMPackSHORT_2, XMMPackSHORT_2,
XMMPackSHORT_4, XMMPackSHORT_4,
XMMUnpackSHORT_2, XMMUnpackSHORT_2,
XMMUnpackSHORT_2_Min,
XMMUnpackSHORT_4, XMMUnpackSHORT_4,
XMMPackUINT_2101010_MinUnpacked, XMMPackUINT_2101010_MinUnpacked,
XMMPackUINT_2101010_MaxUnpacked, XMMPackUINT_2101010_MaxUnpacked,

View File

@ -7426,8 +7426,6 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
e.vpsrad(i.dest, 16); e.vpsrad(i.dest, 16);
// Add 3,3,0,1. // Add 3,3,0,1.
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301)); e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
// Clamp the absolute value to the maximum positive value.
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMUnpackSHORT_2_Min));
} }
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) { static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
// (VD.x) = 3.0 + (VB.x>>16)*2^-22 // (VD.x) = 3.0 + (VB.x>>16)*2^-22
@ -7454,14 +7452,12 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
e.vpsrad(i.dest, 16); e.vpsrad(i.dest, 16);
// Add 3,3,3,3. // Add 3,3,3,3.
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333)); e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
// Clamp the absolute value to the maximum positive value.
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_Min));
} }
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) { static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
Xmm src; Xmm src;
if (i.src1.is_constant) { if (i.src1.is_constant) {
if (i.src1.value->IsConstantZero()) { if (i.src1.value->IsConstantZero()) {
e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3333)); e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3331));
return; return;
} }
src = e.xmm0; src = e.xmm0;
@ -7487,14 +7483,14 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
// Remove higher duplicate components. // Remove higher duplicate components.
e.vpand(i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MaskUnpacked)); e.vpand(i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MaskUnpacked));
} }
// If negative, make smaller than 3 - sign extend XYZ before adding. // If XYZ are negative, make smaller than 3 - sign extend XYZ before adding.
// W is unsigned.
e.vpslld(i.dest, 22); e.vpslld(i.dest, 22);
e.vpsrad(i.dest, 22); e.vpsrad(i.dest, 22);
// Add 3,3,3,3. // Add 3,3,3,1.
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333)); e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3331));
// Clamp the absolute values of XYZ to the maximum positive value. // To convert XYZ to -1 to 1, games multiply by 0x46004020 & sub 0x46C06030.
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MinUnpacked)); // For W to 0 to 1, they multiply by and subtract 0x4A2AAAAB.
// To convert XYZ to -1 to 1, games multiply by 0x46004020 & add 0xC6C06030.
} }
static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) { static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
assert_false(IsPackOutSaturate(flags)); assert_false(IsPackOutSaturate(flags));