Merge pull request #1185 from Triang3l/vupkd3dfixes.

[CPU] Fix W in 2101010 vupkd3d and revert untested saturation
This commit is contained in:
Rick Gibbed 2018-06-11 11:35:18 -05:00 committed by GitHub
commit 03099bd6c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 14 deletions

View File

@ -611,6 +611,7 @@ static const vec128_t xmm_consts[] = {
vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
/* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
/* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* XMM3331 */ vec128f(3.0f, 3.0f, 3.0f, 1.0f),
/* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
/* XMMSignMaskPS */
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
@ -647,8 +648,6 @@ static const vec128_t xmm_consts[] = {
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu),
/* XMMUnpackSHORT_2 */
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
/* XMMUnpackSHORT_2_Min */
vec128i(0x403F8001u, 0x403F8001u, 0x00000000u, 0x00000000u),
/* XMMUnpackSHORT_4 */
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
/* XMMPackUINT_2101010_MinUnpacked */

View File

@ -55,6 +55,7 @@ enum XmmConst {
XMMNormalizeX16Y16,
XMM0001,
XMM3301,
XMM3331,
XMM3333,
XMMSignMaskPS,
XMMSignMaskPD,
@ -76,7 +77,6 @@ enum XmmConst {
XMMPackSHORT_2,
XMMPackSHORT_4,
XMMUnpackSHORT_2,
XMMUnpackSHORT_2_Min,
XMMUnpackSHORT_4,
XMMPackUINT_2101010_MinUnpacked,
XMMPackUINT_2101010_MaxUnpacked,

View File

@ -7426,8 +7426,6 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
e.vpsrad(i.dest, 16);
// Add 3,3,0,1.
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
// Clamp the absolute value to the maximum positive value.
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMUnpackSHORT_2_Min));
}
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
// (VD.x) = 3.0 + (VB.x>>16)*2^-22
@ -7454,14 +7452,12 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
e.vpsrad(i.dest, 16);
// Add 3,3,3,3.
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
// Clamp the absolute value to the maximum positive value.
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackSHORT_Min));
}
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
Xmm src;
if (i.src1.is_constant) {
if (i.src1.value->IsConstantZero()) {
e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3333));
e.vmovdqa(i.dest, e.GetXmmConstPtr(XMM3331));
return;
}
src = e.xmm0;
@ -7487,14 +7483,14 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
// Remove higher duplicate components.
e.vpand(i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MaskUnpacked));
}
// If negative, make smaller than 3 - sign extend XYZ before adding.
// If XYZ are negative, make smaller than 3 - sign extend XYZ before adding.
// W is unsigned.
e.vpslld(i.dest, 22);
e.vpsrad(i.dest, 22);
// Add 3,3,3,3.
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
// Clamp the absolute values of XYZ to the maximum positive value.
e.vmaxps(i.dest, i.dest, e.GetXmmConstPtr(XMMPackUINT_2101010_MinUnpacked));
// To convert XYZ to -1 to 1, games multiply by 0x46004020 & add 0xC6C06030.
// Add 3,3,3,1.
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3331));
// To convert XYZ to -1 to 1, games multiply by 0x46004020 & sub 0x46C06030.
// For W to 0 to 1, they multiply by and subtract 0x4A2AAAAB.
}
static void Emit8_IN_16(X64Emitter& e, const EmitArgType& i, uint32_t flags) {
assert_false(IsPackOutSaturate(flags));