From b79f33526dbe0fe293f3940d1eeb6d916905fee6 Mon Sep 17 00:00:00 2001 From: chrisps Date: Wed, 15 Jan 2020 15:57:09 -0800 Subject: [PATCH] Optimized CONVERT_I64_TO_F64 with neat overflow trick Reduced instruction count from 11 to 8, eliminated a movq stall. --- src/xenia/cpu/backend/x64/x64_sequences.cc | 31 ++++++++-------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 70956b52b..d91a674f1 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -317,31 +317,22 @@ struct CONVERT_I32_F64 struct CONVERT_I64_F64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // Copy src1. - e.movq(e.rcx, i.src1); - // TODO(benvanik): saturation check? cvtt* (trunc?) + e.xor_ (e.eax, e.eax); + + e.vcomisd(i.src1, e.GetXmmConstPtr(XmmConst::XMMZero)); if (i.instr->flags == ROUND_TO_ZERO) { e.vcvttsd2si(i.dest, i.src1); - } else { + } + else { e.vcvtsd2si(i.dest, i.src1); } - - // 0x8000000000000000 - e.mov(e.rax, 0x1); - e.shl(e.rax, 63); - - // Saturate positive overflow - // TODO(DrChat): Find a shorter equivalent sequence. - // if (result ind. && src1 >= 0) - // result = 0x7FFFFFFFFFFFFFFF; - e.cmp(e.rax, i.dest); - e.sete(e.al); - e.movzx(e.rax, e.al); - e.shr(e.rcx, 63); - e.xor_(e.rcx, 0x01); - e.and_(e.rax, e.rcx); - + // cf set if less than + e.setnc(e.cl); + e.cmp(i.dest, -1LL); + // if dest == 0x80000000 and not inp < 0 then dest = 0x7FFFFFFF + e.seto(e.al); + e.and_ (e.al, e.cl); e.sub(i.dest, e.rax); } };