From ea89a5d179d103419ad49e0c8c87271a2767f45e Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 5 Mar 2017 14:59:52 -0600 Subject: [PATCH] JIT - fctixx: Properly handle numbers > INT_MAX --- src/xenia/base/vec128.h | 13 +++ src/xenia/cpu/backend/x64/x64_emitter.cc | 3 + src/xenia/cpu/backend/x64/x64_emitter.h | 3 + src/xenia/cpu/backend/x64/x64_sequences.cc | 14 ++-- src/xenia/cpu/ppc/testing/instr_fcti.s | 97 ++++++++++++++++++++++ 5 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 src/xenia/cpu/ppc/testing/instr_fcti.s diff --git a/src/xenia/base/vec128.h b/src/xenia/base/vec128.h index 49743f8bb..9d694a5f0 100644 --- a/src/xenia/base/vec128.h +++ b/src/xenia/base/vec128.h @@ -171,6 +171,19 @@ static inline vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { v.u32[3] = w; return v; } +static inline vec128_t vec128d(double src) { + vec128_t v; + for (auto i = 0; i < 2; ++i) { + v.f64[i] = src; + } + return v; +} +static inline vec128_t vec128d(double x, double y) { + vec128_t v; + v.f64[0] = x; + v.f64[1] = y; + return v; +} static inline vec128_t vec128f(float src) { vec128_t v; for (auto i = 0; i < 4; ++i) { diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 14e7ae508..69ab8b169 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -670,6 +670,9 @@ static const vec128_t xmm_consts[] = { 0x80000000u), /* XMMShortMinPS */ vec128f(SHRT_MIN), /* XMMShortMaxPS */ vec128f(SHRT_MAX), + /* XMMIntMaxPS */ vec128f(float(INT_MAX)), + /* XMMIntMaxPD */ vec128d(INT_MAX), + /* XMMInt64MaxPD */ vec128d(double(INT64_MAX)), }; // First location to try and place constants. diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index c3d3a4356..79f863091 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -89,6 +89,9 @@ enum XmmConst { XMMSignMaskF32, XMMShortMinPS, XMMShortMaxPS, + XMMIntMaxPS, + XMMIntMaxPD, + XMMInt64MaxPD, }; // Unfortunately due to the design of xbyak we have to pass this to the ctor. diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 98b3bf3e0..498d8ab3c 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -1429,21 +1429,25 @@ struct CONVERT_I32_F32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { // TODO(benvanik): saturation check? cvtt* (trunc?) - e.vcvtss2si(i.dest, i.src1); + e.vminss(e.xmm0, i.src1, e.GetXmmConstPtr(XMMIntMaxPS)); + e.vcvtss2si(i.dest, e.xmm0); } }; struct CONVERT_I32_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // TODO(benvanik): saturation check? cvtt* (trunc?) - e.vcvttsd2si(i.dest, i.src1); + // Intel returns 0x80000000 if the double value does not fit within an int32 + // PPC saturates the value instead. + // So, we can clamp the double value to (double)0x7FFFFFFF. + e.vminsd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMIntMaxPD)); + e.vcvttsd2si(i.dest, e.xmm0); } }; struct CONVERT_I64_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // TODO(benvanik): saturation check? cvtt* (trunc?) - e.vcvttsd2si(i.dest, i.src1); + e.vminsd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMInt64MaxPD)); + e.vcvttsd2si(i.dest, e.xmm0); } }; struct CONVERT_F32_I32 diff --git a/src/xenia/cpu/ppc/testing/instr_fcti.s b/src/xenia/cpu/ppc/testing/instr_fcti.s new file mode 100644 index 000000000..3ea41eeec --- /dev/null +++ b/src/xenia/cpu/ppc/testing/instr_fcti.s @@ -0,0 +1,97 @@ +# Credits: These tests stolen from https://github.com/dolphin-emu/hwtests +# +0 +test_fctiwz_1: + #_ REGISTER_IN f0 0x0000000000000000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0x0000000000000000 + #_ REGISTER_OUT f1 0x0000000000000000 + +# -0 +test_fctiwz_2: + #_ REGISTER_IN f0 0x8000000000000000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0x8000000000000000 + #_ REGISTER_OUT f1 0x0000000000000000 + +# smallest positive subnormal +test_fctiwz_3: + #_ REGISTER_IN f0 0x0000000000000001 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0x0000000000000001 + #_ REGISTER_OUT f1 0x0000000000000000 + +# largest subnormal +test_fctiwz_4: + #_ REGISTER_IN f0 0x000fffffffffffff + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0x000fffffffffffff + #_ REGISTER_OUT f1 0x0000000000000000 + +# +1 +test_fctiwz_5: + #_ REGISTER_IN f0 0x3ff0000000000000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0x3ff0000000000000 + #_ REGISTER_OUT f1 0x0000000000000001 + +# -1 +test_fctiwz_6: + #_ REGISTER_IN f0 0xbff0000000000000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0xbff0000000000000 + #_ REGISTER_OUT f1 0x00000000ffffffff + +# -(2^31) +test_fctiwz_7: + #_ REGISTER_IN f0 0xc1e0000000000000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0xc1e0000000000000 + #_ REGISTER_OUT f1 0x0000000080000000 + +# 2^31 - 1 +test_fctiwz_8: + #_ REGISTER_IN f0 0x41dfffffffc00000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0x41dfffffffc00000 + #_ REGISTER_OUT f1 0x000000007fffffff + +# +infinity +test_fctiwz_9: + #_ REGISTER_IN f0 0x7ff0000000000000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0x7ff0000000000000 + #_ REGISTER_OUT f1 0x000000007fffffff + +# -infinity +test_fctiwz_10: + #_ REGISTER_IN f0 0xfff0000000000000 + fctiwz f1, f0 + blr + #_ REGISTER_OUT f0 0xfff0000000000000 + #_ REGISTER_OUT f1 0x0000000080000000 + +# TODO(DrChat): Xenia doesn't handle NaNs yet. +# # QNaN +# test_fctiwz_11: +# #_ REGISTER_IN f0 0xfff8000000000000 +# fctiwz f1, f0 +# blr +# #_ REGISTER_OUT f0 0xfff8000000000000 +# #_ REGISTER_OUT f1 0x0000000080000000 +# +# # SNaN +# test_fctiwz_12: +# #_ REGISTER_IN f0 0xfff4000000000000 +# fctiwz f1, f0 +# blr +# #_ REGISTER_OUT f0 0xfff4000000000000 +# #_ REGISTER_OUT f1 0x0000000080000000