diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index 146fc8ca1..b97f44f1f 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -99,6 +99,19 @@ struct VECTOR_CONVERT_F2I e.ChangeMxcsrMode(MXCSRMode::Vmx); Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3); if (i.instr->flags & ARITHMETIC_UNSIGNED) { + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + Opmask mask = e.k1; + // Mask positive values and unordered values + // _CMP_NLT_UQ + e.vcmpps(mask, i.src1, e.GetXmmConstPtr(XMMZero), 0x15); + + // vcvttps2udq will saturate overflowing positive values and unordered + // values to UINT_MAX. Mask registers will write zero everywhere + // else (negative values) + e.vcvttps2udq(i.dest.reg() | mask | e.T_z, i.src1); + return; + } + // clamp to min 0 e.vmaxps(e.xmm0, src1, e.GetXmmConstPtr(XMMZero)); @@ -621,6 +634,15 @@ struct VECTOR_ADD case INT32_TYPE: if (saturate) { if (is_unsigned) { + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vpaddd(dest, src1, src2); + Opmask saturate = e.k1; + // _mm_cmplt_epu32_mask + e.vpcmpud(saturate, dest, src1, 0x1); + e.vpternlogd(dest | saturate, dest, dest, 0xFF); + return; + } + // xmm0 is the only temp register that can be used by // src1/src2. e.vpaddd(e.xmm1, src1, src2); @@ -637,6 +659,20 @@ struct VECTOR_ADD } else { e.vpaddd(e.xmm1, src1, src2); + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho | + kX64EmitAVX512DQ)) { + e.vmovdqa32(e.xmm3, src1); + e.vpternlogd(e.xmm3, e.xmm1, src2, 0b00100100); + + const Opmask saturate = e.k1; + e.vpmovd2m(saturate, e.xmm3); + + e.vpsrad(e.xmm2, e.xmm1, 31); + e.vpxord(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMSignMaskI32)); + e.vpblendmd(dest | saturate, e.xmm1, e.xmm2); + return; + } + // Overflow results if two inputs are the same sign and the // result isn't the same sign. if ((s32b)(~(src1 ^ src2) & // (src1 ^ res)) < 0) then overflowed diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 28b33fd76..57b038613 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -2839,6 +2839,10 @@ struct NOT_I64 : Sequence> { }; struct NOT_V128 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vpternlogd(i.dest, i.src1, i.src1, 0b01010101); + return; + } SimdDomain domain = e.DeduceSimdDomain(i.src1.value); if (domain == SimdDomain::FLOATING) { e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMFFFF /* FF... */)); diff --git a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc index bcc7e0c6c..5faa4998e 100644 --- a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc +++ b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc @@ -349,8 +349,8 @@ class TestRunner { uint32_t expected = std::strtoul(ccs, nullptr, 16); uint8_t actual = *p; - expecteds.AppendFormat(" %02X", expected); - actuals.AppendFormat(" %02X", actual); + expecteds.AppendFormat(" {:02X}", expected); + actuals.AppendFormat(" {:02X}", actual); if (expected != actual) { any_failed = true;