Improve perf of vcmp cr6 update.
This commit is contained in:
parent
e2842ea4b4
commit
d3bf7813ea
|
@ -442,38 +442,27 @@ XEEMITTER(vcmpbfp128, VX128(6, 384), VX128_R)(X64Emitter& e, X86Compiler&
|
||||||
void InstrEmit_vcmp_cr6_(X64Emitter& e, X86Compiler& c, XmmVar& v) {
|
void InstrEmit_vcmp_cr6_(X64Emitter& e, X86Compiler& c, XmmVar& v) {
|
||||||
// Testing for all 1's and all 0's.
|
// Testing for all 1's and all 0's.
|
||||||
// if (Rc) CR6 = all_equal | 0 | none_equal | 0
|
// if (Rc) CR6 = all_equal | 0 | none_equal | 0
|
||||||
// Since none_equal and all_equal are mutually exclusive we optimize
|
|
||||||
// a bit here. This is still terrible.
|
|
||||||
GpVar lo(c.newGpVar());
|
|
||||||
GpVar hi(c.newGpVar());
|
|
||||||
c.pextrq(hi.m64(), v, imm(1));
|
|
||||||
c.movq(lo.m64(), v);
|
|
||||||
|
|
||||||
GpVar gt(c.newGpVar());
|
GpVar gt(c.newGpVar());
|
||||||
GpVar cr(c.newGpVar());
|
GpVar cr(c.newGpVar());
|
||||||
c.xor_(cr, cr);
|
c.xor_(cr, cr);
|
||||||
Label skip(c.newLabel());
|
|
||||||
|
|
||||||
// cmp with 0xFF... and set all_equal
|
// We do this fast by extracting the high bits (as all bits are the same)
|
||||||
c.mov(gt, lo);
|
// and testing those.
|
||||||
c.and_(gt, hi);
|
GpVar bmask(c.newGpVar());
|
||||||
c.test(gt, imm(0));
|
c.pmovmskb(bmask, v);
|
||||||
// !eq = all_equal
|
|
||||||
// all_equal= 0b1000
|
|
||||||
c.mov(gt, imm(0x8)); // 0b1000
|
|
||||||
c.cmovne(cr, gt);
|
|
||||||
c.jne(skip);
|
|
||||||
|
|
||||||
// cmp with 0 and set none_equal
|
// zero = none_equal
|
||||||
c.mov(gt, lo);
|
c.test(bmask, bmask);
|
||||||
c.or_(gt, hi);
|
c.mov(gt, imm(0x2)); // none_equal=0b0010
|
||||||
c.test(gt, imm(0));
|
c.cmovz(cr, gt);
|
||||||
// eq = none_equal
|
|
||||||
// none_equal= 0b0010
|
// !zero = all_equal
|
||||||
c.mov(gt, imm(0x2)); // 0b0010
|
c.not_(bmask);
|
||||||
c.cmove(cr, gt);
|
c.test(bmask, bmask);
|
||||||
|
c.mov(gt, imm(0x8)); // all_equal=0b1000
|
||||||
|
c.cmovz(cr, gt);
|
||||||
|
|
||||||
c.bind(skip);
|
|
||||||
e.update_cr_value(6, cr);
|
e.update_cr_value(6, cr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue