From 43f9cebb0ccb7b1b41616baeeaa946d2968ba42d Mon Sep 17 00:00:00 2001 From: DrChat Date: Fri, 16 Feb 2018 12:51:39 -0600 Subject: [PATCH] [JIT] Rewrite CNTLZ and remove one branch --- src/xenia/cpu/backend/x64/x64_sequences.cc | 84 ++++++++-------------- 1 file changed, 28 insertions(+), 56 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index ac2a158c0..cc868d758 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -6484,24 +6484,17 @@ struct CNTLZ_I8 : Sequence> { e.lzcnt(i.dest.reg().cvt16(), i.dest.reg().cvt16()); e.sub(i.dest, 8); } else { - Xbyak::Label jz, jend; - + Xbyak::Label end; e.inLocalLabel(); - // BSR: searches $2 until MSB 1 found, stores idx (from bit 0) in $1 - // if input is 0, results are undefined (and ZF is set) - e.bsr(i.dest, i.src1); - e.jz(jz); // Jump if zero + e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0 + e.mov(i.dest, 0x8); + e.jz(end); - // Invert the result (7 - i.dest) - e.xor_(i.dest, 0x7); - e.jmp(jend); // Jmp to end + e.xor_(e.rax, 0x7); + e.mov(i.dest, e.rax); - // src1 was zero, so write 8 to the dest reg - e.L(jz); - e.mov(i.dest, 8); - - e.L(jend); + e.L(end); e.outLocalLabel(); } } @@ -6512,24 +6505,17 @@ struct CNTLZ_I16 : Sequence> { // LZCNT: searches $2 until MSB 1 found, stores idx (from last bit) in $1 e.lzcnt(i.dest.reg().cvt32(), i.src1); } else { - Xbyak::Label jz, jend; - + Xbyak::Label end; e.inLocalLabel(); - // BSR: searches $2 until MSB 1 found, stores idx (from bit 0) in $1 - // if input is 0, results are undefined (and ZF is set) - e.bsr(i.dest, i.src1); - e.jz(jz); // Jump if zero + e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0 + e.mov(i.dest, 0x10); + e.jz(end); - // Invert the result (15 - i.dest) - e.xor_(i.dest, 0xF); - e.jmp(jend); // Jmp to end + e.xor_(e.rax, 0x0F); + e.mov(i.dest, e.rax); - // src1 was zero, so write 16 to the dest reg - e.L(jz); - e.mov(i.dest, 16); - - e.L(jend); + e.L(end); e.outLocalLabel(); } } @@ -6539,24 +6525,17 @@ struct CNTLZ_I32 : Sequence> { if (e.IsFeatureEnabled(kX64EmitLZCNT)) { e.lzcnt(i.dest.reg().cvt32(), i.src1); } else { - Xbyak::Label jz, jend; - + Xbyak::Label end; e.inLocalLabel(); - // BSR: searches $2 until MSB 1 found, stores idx (from bit 0) in $1 - // if input is 0, results are undefined (and ZF is set) - e.bsr(i.dest, i.src1); - e.jz(jz); // Jump if zero + e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0 + e.mov(i.dest, 0x20); + e.jz(end); - // Invert the result (31 - i.dest) - e.xor_(i.dest, 0x1F); - e.jmp(jend); // Jmp to end + e.xor_(e.rax, 0x1F); + e.mov(i.dest, e.rax); - // src1 was zero, so write 32 to the dest reg - e.L(jz); - e.mov(i.dest, 32); - - e.L(jend); + e.L(end); e.outLocalLabel(); } } @@ -6566,24 +6545,17 @@ struct CNTLZ_I64 : Sequence> { if (e.IsFeatureEnabled(kX64EmitLZCNT)) { e.lzcnt(i.dest.reg().cvt64(), i.src1); } else { - Xbyak::Label jz, jend; - + Xbyak::Label end; e.inLocalLabel(); - // BSR: searches $2 until MSB 1 found, stores idx (from bit 0) in $1 - // if input is 0, results are undefined (and ZF is set) - e.bsr(i.dest, i.src1); - e.jz(jz); // Jump if zero + e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0 + e.mov(i.dest, 0x40); + e.jz(end); - // Invert the result (63 - i.dest) - e.xor_(i.dest, 0x3F); - e.jmp(jend); // Jmp to end + e.xor_(e.rax, 0x3F); + e.mov(i.dest, e.rax); - // src1 was zero, so write 64 to the dest reg - e.L(jz); - e.mov(i.dest, 64); - - e.L(jend); + e.L(end); e.outLocalLabel(); } }