Merge pull request #1185 from Triang3l/vupkd3dfixes.
[CPU] Fix W in 2101010 vupkd3d and revert untested saturation
This commit is contained in:
commit
03099bd6c0
|
@ -611,6 +611,7 @@ static const vec128_t xmm_consts[] = {
|
||||||
vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
|
vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
|
||||||
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
|
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
|
||||||
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
||||||
|
/* XMM3331 */ vec128f(3.0f, 3.0f, 3.0f, 1.0f),
|
||||||
/* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
|
/* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
|
||||||
/* XMMSignMaskPS */
|
/* XMMSignMaskPS */
|
||||||
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||||
|
@ -647,8 +648,6 @@ static const vec128_t xmm_consts[] = {
|
||||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu),
|
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu),
|
||||||
/* XMMUnpackSHORT_2 */
|
/* XMMUnpackSHORT_2 */
|
||||||
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||||
/* XMMUnpackSHORT_2_Min */
|
|
||||||
vec128i(0x403F8001u, 0x403F8001u, 0x00000000u, 0x00000000u),
|
|
||||||
/* XMMUnpackSHORT_4 */
|
/* XMMUnpackSHORT_4 */
|
||||||
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
|
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
|
||||||
/* XMMPackUINT_2101010_MinUnpacked */
|
/* XMMPackUINT_2101010_MinUnpacked */
|
||||||
|
|
|
@ -55,6 +55,7 @@ enum XmmConst {
|
||||||
XMMNormalizeX16Y16,
|
XMMNormalizeX16Y16,
|
||||||
XMM0001,
|
XMM0001,
|
||||||
XMM3301,
|
XMM3301,
|
||||||
|
XMM3331,
|
||||||
XMM3333,
|
XMM3333,
|
||||||
XMMSignMaskPS,
|
XMMSignMaskPS,
|
||||||
XMMSignMaskPD,
|
XMMSignMaskPD,
|
||||||
|
@ -76,7 +77,6 @@ enum XmmConst {
|
||||||
XMMPackSHORT_2,
|
XMMPackSHORT_2,
|
||||||
XMMPackSHORT_4,
|
XMMPackSHORT_4,
|
||||||
XMMUnpackSHORT_2,
|
XMMUnpackSHORT_2,
|
||||||
XMMUnpackSHORT_2_Min,
|
|
||||||
XMMUnpackSHORT_4,
|
XMMUnpackSHORT_4,
|
||||||
XMMPackUINT_2101010_MinUnpacked,
|
XMMPackUINT_2101010_MinUnpacked,
|
||||||
XMMPackUINT_2101010_MaxUnpacked,
|
XMMPackUINT_2101010_MaxUnpacked,
|
||||||
|
|
|
@ -7426,8 +7426,6 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
e.vpsrad(i.dest, 16);
|
e.vpsrad(i.dest, 16);
|
||||||
// Add 3,3,0,1.
|
// Add 3,3,0,1.
|
||||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
|
||||||
// Clamp the absolute value to the maximum positive value.
|
|
||||||
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMUnpackSHORT_2_Min));
|
|
||||||
}
|
}
|
||||||
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
|
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
|
||||||
// (VD.x) = 3.0 + (VB.x>>16)*2^-22
|
// (VD.x) = 3.0 + (VB.x>>16)*2^-22
|
||||||
|
@ -7454,14 +7452,12 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
e.vpsrad(i.dest, 16);
|
e.vpsrad(i.dest, 16);
|
||||||
// Add 3,3,3,3.
|
// Add 3,3,3,3.
|
||||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
|
||||||
// Clamp the absolute value to the maximum positive value.
|
|
||||||
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_Min));
|
|
||||||
}
|
}
|
||||||
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
||||||
Xmm src;
|
Xmm src;
|
||||||
if (i.src1.is_constant) {
|
if (i.src1.is_constant) {
|
||||||
if (i.src1.value->IsConstantZero()) {
|
if (i.src1.value->IsConstantZero()) {
|
||||||
e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3333));
|
e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3331));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
src = e.xmm0;
|
src = e.xmm0;
|
||||||
|
@ -7487,14 +7483,14 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
// Remove higher duplicate components.
|
// Remove higher duplicate components.
|
||||||
e.vpand(i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MaskUnpacked));
|
e.vpand(i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MaskUnpacked));
|
||||||
}
|
}
|
||||||
// If negative, make smaller than 3 - sign extend XYZ before adding.
|
// If XYZ are negative, make smaller than 3 - sign extend XYZ before adding.
|
||||||
|
// W is unsigned.
|
||||||
e.vpslld(i.dest, 22);
|
e.vpslld(i.dest, 22);
|
||||||
e.vpsrad(i.dest, 22);
|
e.vpsrad(i.dest, 22);
|
||||||
// Add 3,3,3,3.
|
// Add 3,3,3,1.
|
||||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3331));
|
||||||
// Clamp the absolute values of XYZ to the maximum positive value.
|
// To convert XYZ to -1 to 1, games multiply by 0x46004020 & sub 0x46C06030.
|
||||||
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MinUnpacked));
|
// For W to 0 to 1, they multiply by and subtract 0x4A2AAAAB.
|
||||||
// To convert XYZ to -1 to 1, games multiply by 0x46004020 & add 0xC6C06030.
|
|
||||||
}
|
}
|
||||||
static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
|
static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
|
||||||
assert_false(IsPackOutSaturate(flags));
|
assert_false(IsPackOutSaturate(flags));
|
||||||
|
|
Loading…
Reference in New Issue