Optimized CONVERT_I64_TO_F64 with neat overflow trick

Reduced instruction count from 11 to 8, eliminated a movq stall.
This commit is contained in:
chrisps 2020-01-15 15:57:09 -08:00 committed by Gliniak
parent 9dfbef8acf
commit 3ad80810b5
1 changed files with 8 additions and 19 deletions

View File

@ -317,31 +317,20 @@ struct CONVERT_I32_F64
struct CONVERT_I64_F64 struct CONVERT_I64_F64
: Sequence<CONVERT_I64_F64, I<OPCODE_CONVERT, I64Op, F64Op>> { : Sequence<CONVERT_I64_F64, I<OPCODE_CONVERT, I64Op, F64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
// Copy src1. e.xor_(e.eax, e.eax);
e.movq(e.rcx, i.src1);
// TODO(benvanik): saturation check? cvtt* (trunc?) e.vcomisd(i.src1, e.GetXmmConstPtr(XmmConst::XMMZero));
if (i.instr->flags == ROUND_TO_ZERO) { if (i.instr->flags == ROUND_TO_ZERO) {
e.vcvttsd2si(i.dest, i.src1); e.vcvttsd2si(i.dest, i.src1);
} else { } else {
e.vcvtsd2si(i.dest, i.src1); e.vcvtsd2si(i.dest, i.src1);
} }
// cf set if less than
// 0x8000000000000000 e.setnc(e.cl);
e.mov(e.rax, 0x1); e.cmp(i.dest, -1LL);
e.shl(e.rax, 63); // if dest == 0x80000000 and not inp < 0 then dest = 0x7FFFFFFF
e.seto(e.al);
// Saturate positive overflow e.and_(e.al, e.cl);
// TODO(DrChat): Find a shorter equivalent sequence.
// if (result ind. && src1 >= 0)
// result = 0x7FFFFFFFFFFFFFFF;
e.cmp(e.rax, i.dest);
e.sete(e.al);
e.movzx(e.rax, e.al);
e.shr(e.rcx, 63);
e.xor_(e.rcx, 0x01);
e.and_(e.rax, e.rcx);
e.sub(i.dest, e.rax); e.sub(i.dest, e.rax);
} }
}; };