Merge pull request #1186 from Triang3l/vupkd3d_nan.
[CPU] vupkd3d: NaN on negative overflow and tests
This commit is contained in:
commit
d124e17618
|
@ -650,6 +650,7 @@ static const vec128_t xmm_consts[] = {
|
||||||
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||||
/* XMMUnpackSHORT_4 */
|
/* XMMUnpackSHORT_4 */
|
||||||
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
|
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
|
||||||
|
/* XMMUnpackSHORT_Overflow */ vec128i(0x403F8000u),
|
||||||
/* XMMPackUINT_2101010_MinUnpacked */
|
/* XMMPackUINT_2101010_MinUnpacked */
|
||||||
vec128i(0x403FFE01u, 0x403FFE01u, 0x403FFE01u, 0x40400000u),
|
vec128i(0x403FFE01u, 0x403FFE01u, 0x403FFE01u, 0x40400000u),
|
||||||
/* XMMPackUINT_2101010_MaxUnpacked */
|
/* XMMPackUINT_2101010_MaxUnpacked */
|
||||||
|
@ -659,6 +660,8 @@ static const vec128_t xmm_consts[] = {
|
||||||
/* XMMPackUINT_2101010_MaskPacked */
|
/* XMMPackUINT_2101010_MaskPacked */
|
||||||
vec128i(0x3FFu, 0x3FFu << 10, 0x3FFu << 20, 0x3u << 30),
|
vec128i(0x3FFu, 0x3FFu << 10, 0x3FFu << 20, 0x3u << 30),
|
||||||
/* XMMPackUINT_2101010_Shift */ vec128i(0, 10, 20, 30),
|
/* XMMPackUINT_2101010_Shift */ vec128i(0, 10, 20, 30),
|
||||||
|
/* XMMUnpackUINT_2101010_Overflow */ vec128i(0x403FFE00u),
|
||||||
|
/* XMMUnpackOverflowNaN */ vec128i(0x7FC00000u),
|
||||||
/* XMMOneOver255 */ vec128f(1.0f / 255.0f),
|
/* XMMOneOver255 */ vec128f(1.0f / 255.0f),
|
||||||
/* XMMMaskEvenPI16 */
|
/* XMMMaskEvenPI16 */
|
||||||
vec128i(0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu),
|
vec128i(0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu),
|
||||||
|
|
|
@ -78,11 +78,14 @@ enum XmmConst {
|
||||||
XMMPackSHORT_4,
|
XMMPackSHORT_4,
|
||||||
XMMUnpackSHORT_2,
|
XMMUnpackSHORT_2,
|
||||||
XMMUnpackSHORT_4,
|
XMMUnpackSHORT_4,
|
||||||
|
XMMUnpackSHORT_Overflow,
|
||||||
XMMPackUINT_2101010_MinUnpacked,
|
XMMPackUINT_2101010_MinUnpacked,
|
||||||
XMMPackUINT_2101010_MaxUnpacked,
|
XMMPackUINT_2101010_MaxUnpacked,
|
||||||
XMMPackUINT_2101010_MaskUnpacked,
|
XMMPackUINT_2101010_MaskUnpacked,
|
||||||
XMMPackUINT_2101010_MaskPacked,
|
XMMPackUINT_2101010_MaskPacked,
|
||||||
XMMPackUINT_2101010_Shift,
|
XMMPackUINT_2101010_Shift,
|
||||||
|
XMMUnpackUINT_2101010_Overflow,
|
||||||
|
XMMUnpackOverflowNaN,
|
||||||
XMMOneOver255,
|
XMMOneOver255,
|
||||||
XMMMaskEvenPI16,
|
XMMMaskEvenPI16,
|
||||||
XMMShiftMaskEvenPI16,
|
XMMShiftMaskEvenPI16,
|
||||||
|
|
|
@ -7426,6 +7426,9 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
e.vpsrad(i.dest, 16);
|
e.vpsrad(i.dest, 16);
|
||||||
// Add 3,3,0,1.
|
// Add 3,3,0,1.
|
||||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3301));
|
||||||
|
// Return quiet NaNs in case of negative overflow.
|
||||||
|
e.vcmpeqps(e.xmm0, i.dest, e.GetXmmConstPtr(XMMUnpackSHORT_Overflow));
|
||||||
|
e.vblendvps(i.dest, i.dest, e.GetXmmConstPtr(XMMUnpackOverflowNaN), e.xmm0);
|
||||||
}
|
}
|
||||||
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
|
static void EmitSHORT_4(X64Emitter& e, const EmitArgType& i) {
|
||||||
// (VD.x) = 3.0 + (VB.x>>16)*2^-22
|
// (VD.x) = 3.0 + (VB.x>>16)*2^-22
|
||||||
|
@ -7452,6 +7455,9 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
e.vpsrad(i.dest, 16);
|
e.vpsrad(i.dest, 16);
|
||||||
// Add 3,3,3,3.
|
// Add 3,3,3,3.
|
||||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3333));
|
||||||
|
// Return quiet NaNs in case of negative overflow.
|
||||||
|
e.vcmpeqps(e.xmm0, i.dest, e.GetXmmConstPtr(XMMUnpackSHORT_Overflow));
|
||||||
|
e.vblendvps(i.dest, i.dest, e.GetXmmConstPtr(XMMUnpackOverflowNaN), e.xmm0);
|
||||||
}
|
}
|
||||||
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
static void EmitUINT_2101010(X64Emitter& e, const EmitArgType& i) {
|
||||||
Xmm src;
|
Xmm src;
|
||||||
|
@ -7489,6 +7495,10 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||||
e.vpsrad(i.dest, 22);
|
e.vpsrad(i.dest, 22);
|
||||||
// Add 3,3,3,1.
|
// Add 3,3,3,1.
|
||||||
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3331));
|
e.vpaddd(i.dest, e.GetXmmConstPtr(XMM3331));
|
||||||
|
// Return quiet NaNs in case of negative overflow.
|
||||||
|
e.vcmpeqps(e.xmm0, i.dest,
|
||||||
|
e.GetXmmConstPtr(XMMUnpackUINT_2101010_Overflow));
|
||||||
|
e.vblendvps(i.dest, i.dest, e.GetXmmConstPtr(XMMUnpackOverflowNaN), e.xmm0);
|
||||||
// To convert XYZ to -1 to 1, games multiply by 0x46004020 & sub 0x46C06030.
|
// To convert XYZ to -1 to 1, games multiply by 0x46004020 & sub 0x46C06030.
|
||||||
// For W to 0 to 1, they multiply by and subtract 0x4A2AAAAB.
|
// For W to 0 to 1, they multiply by and subtract 0x4A2AAAAB.
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,12 @@ test_vupkd3d128_short2_2:
|
||||||
.long 0x18641FF0
|
.long 0x18641FF0
|
||||||
blr
|
blr
|
||||||
#_ REGISTER_OUT v3 [40407FFF, 403FF333, 00000000, 3f800000]
|
#_ REGISTER_OUT v3 [40407FFF, 403FF333, 00000000, 3f800000]
|
||||||
|
test_vupkd3d128_short2_3:
|
||||||
|
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 00008000]
|
||||||
|
# vupkd3d128 v3, v3, 1
|
||||||
|
.long 0x18641FF0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [40400000, 7FC00000, 00000000, 3f800000]
|
||||||
|
|
||||||
test_vupkd3d128_short4_0:
|
test_vupkd3d128_short4_0:
|
||||||
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, 7FFFFFFF, 007FFFF8]
|
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, 7FFFFFFF, 007FFFF8]
|
||||||
|
@ -53,3 +59,22 @@ test_vupkd3d128_float16_4_0:
|
||||||
.long 0x18741FF0
|
.long 0x18741FF0
|
||||||
blr
|
blr
|
||||||
#_ REGISTER_OUT v3 [3F000000, bf002000, 3f004000, bf006000]
|
#_ REGISTER_OUT v3 [3F000000, bf002000, 3f004000, bf006000]
|
||||||
|
|
||||||
|
test_vupkd3d128_uint_2101010_0:
|
||||||
|
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 400001FF]
|
||||||
|
# vupkd3d128 v3, v3, 2
|
||||||
|
.long 0x18681FF0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [404001FF, 40400000, 40400000, 3F800001]
|
||||||
|
test_vupkd3d128_uint_2101010_1:
|
||||||
|
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 40000201]
|
||||||
|
# vupkd3d128 v3, v3, 2
|
||||||
|
.long 0x18681FF0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [403FFE01, 40400000, 40400000, 3F800001]
|
||||||
|
test_vupkd3d128_uint_2101010_2:
|
||||||
|
#_ REGISTER_IN v3 [CDCDCDCD, CDCDCDCD, CDCDCDCD, 40000200]
|
||||||
|
# vupkd3d128 v3, v3, 2
|
||||||
|
.long 0x18681FF0
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [7FC00000, 40400000, 40400000, 3F800001]
|
||||||
|
|
Loading…
Reference in New Issue