Jit64: divwx - Optimize constant divisor
Optimize division by a constant into multiplication. This method is also
used by GCC and LLVM.
We also add optimized paths for divisors 0, 1, and -1, because they
don't work using this method. They don't occur very often, but are
necessary for correctness.
- Division by 1
Before:
41 BF 01 00 00 00 mov r15d,1
41 8B C5 mov eax,r13d
45 85 FF test r15d,r15d
74 0D je overflow
3D 00 00 00 80 cmp eax,80000000h
75 0E jne normal_path
41 83 FF FF cmp r15d,0FFFFFFFFh
75 08 jne normal_path
overflow:
C1 F8 1F sar eax,1Fh
44 8B F8 mov r15d,eax
EB 07 jmp done
normal_path:
99 cdq
41 F7 FF idiv eax,r15d
44 8B F8 mov r15d,eax
done:
After:
45 8B FD mov r15d,r13d
- Division by 30307
Before:
41 BA 63 76 00 00 mov r10d,7663h
41 8B C5 mov eax,r13d
45 85 D2 test r10d,r10d
74 0D je overflow
3D 00 00 00 80 cmp eax,80000000h
75 0E jne normal_path
41 83 FA FF cmp r10d,0FFFFFFFFh
75 08 jne normal_path
overflow:
C1 F8 1F sar eax,1Fh
44 8B C0 mov r8d,eax
EB 07 jmp done
normal_path:
99 cdq
41 F7 FA idiv eax,r10d
44 8B C0 mov r8d,eax
done:
After:
49 63 C5 movsxd rax,r13d
48 69 C0 65 6B 32 45 imul rax,rax,45326B65h
4C 8B C0 mov r8,rax
48 C1 E8 3F shr rax,3Fh
49 C1 F8 2D sar r8,2Dh
44 03 C0 add r8d,eax
- Division by 30323
Before:
41 BA 73 76 00 00 mov r10d,7673h
41 8B C5 mov eax,r13d
45 85 D2 test r10d,r10d
74 0D je overflow
3D 00 00 00 80 cmp eax,80000000h
75 0E jne normal_path
41 83 FA FF cmp r10d,0FFFFFFFFh
75 08 jne normal_path
overflow:
C1 F8 1F sar eax,1Fh
44 8B C0 mov r8d,eax
EB 07 jmp 00000000161737E7
normal_path:
99 cdq
41 F7 FA idiv eax,r10d
44 8B C0 mov r8d,eax
done:
After:
49 63 C5 movsxd rax,r13d
4C 69 C0 19 25 52 8A imul r8,rax,0FFFFFFFF8A522519h
49 C1 E8 20 shr r8,20h
44 03 C0 add r8d,eax
C1 E8 1F shr eax,1Fh
41 C1 F8 0E sar r8d,0Eh
44 03 C0 add r8d,eax