Merge pull request #1083 from FioraAeterna/lzcnt
Add LZCNT support, use in cntlzw
This commit is contained in:
commit
4e16abd742
|
@ -197,6 +197,7 @@ void CPUInfo::Detect()
|
|||
// Check for more features.
|
||||
__cpuid(cpu_id, 0x80000001);
|
||||
if (cpu_id[2] & 1) bLAHFSAHF64 = true;
|
||||
if ((cpu_id[2] >> 5) & 1) bLZCNT = true;
|
||||
if ((cpu_id[3] >> 29) & 1) bLongMode = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -750,12 +750,14 @@ void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
|
|||
void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
|
||||
void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}
|
||||
|
||||
void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2)
|
||||
void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !src.IsImm(), "WriteBitSearchType - Imm argument");
|
||||
src.operandReg = (u8)dest;
|
||||
if (bits == 16)
|
||||
Write8(0x66);
|
||||
if (rep)
|
||||
Write8(0xF3);
|
||||
src.WriteRex(this, bits, bits);
|
||||
Write8(0x0F);
|
||||
Write8(byte2);
|
||||
|
@ -772,6 +774,19 @@ void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
|
|||
void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
|
||||
void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
|
||||
|
||||
void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
|
||||
{
|
||||
if (!cpu_info.bBMI1)
|
||||
PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
|
||||
WriteBitSearchType(bits, dest, src, 0xBC, true);
|
||||
}
|
||||
void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
|
||||
{
|
||||
if (!cpu_info.bLZCNT)
|
||||
PanicAlert("Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
|
||||
WriteBitSearchType(bits, dest, src, 0xBD, true);
|
||||
}
|
||||
|
||||
void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !src.IsImm(), "MOVSX - Imm argument");
|
||||
|
|
|
@ -266,7 +266,7 @@ private:
|
|||
void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
|
||||
void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
|
||||
void WriteMulDivType(int bits, OpArg src, int ext);
|
||||
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2);
|
||||
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
|
||||
void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
|
||||
void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
|
||||
void WriteMXCSR(OpArg arg, int ext);
|
||||
|
@ -454,6 +454,11 @@ public:
|
|||
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
|
||||
void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
|
||||
|
||||
// Available only on AMD >= Phenom or Intel >= Haswell
|
||||
void LZCNT(int bits, X64Reg dest, OpArg src);
|
||||
// Note: this one is actually part of BMI1
|
||||
void TZCNT(int bits, X64Reg dest, OpArg src);
|
||||
|
||||
// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
|
||||
void STMXCSR(OpArg memloc);
|
||||
void LDMXCSR(OpArg memloc);
|
||||
|
|
|
@ -1905,13 +1905,19 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
|
|||
else
|
||||
{
|
||||
gpr.Lock(a, s);
|
||||
gpr.KillImmediate(s, true, false);
|
||||
gpr.BindToRegister(a, (a == s), true);
|
||||
BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s));
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
if (cpu_info.bLZCNT)
|
||||
{
|
||||
LZCNT(32, gpr.RX(a), gpr.R(s));
|
||||
}
|
||||
else
|
||||
{
|
||||
BSR(32, gpr.RX(a), gpr.R(s));
|
||||
FixupBranch gotone = J_CC(CC_NZ);
|
||||
MOV(32, gpr.R(a), Imm32(63));
|
||||
SetJumpTarget(gotone);
|
||||
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
|
|
@ -318,41 +318,35 @@ TEST_F(x64EmitterTest, CMOVcc_Register)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(x64EmitterTest, BSF)
|
||||
{
|
||||
emitter->BSF(64, R12, R(RAX));
|
||||
emitter->BSF(32, R12, R(RAX));
|
||||
emitter->BSF(16, R12, R(RAX));
|
||||
|
||||
emitter->BSF(64, R12, MatR(RAX));
|
||||
emitter->BSF(32, R12, MatR(RAX));
|
||||
emitter->BSF(16, R12, MatR(RAX));
|
||||
|
||||
ExpectDisassembly("bsf r12, rax "
|
||||
"bsf r12d, eax "
|
||||
"bsf r12w, ax "
|
||||
"bsf r12, qword ptr ds:[rax] "
|
||||
"bsf r12d, dword ptr ds:[rax] "
|
||||
"bsf r12w, word ptr ds:[rax]");
|
||||
#define BITSEARCH_TEST(Name) \
|
||||
TEST_F(x64EmitterTest, Name) \
|
||||
{ \
|
||||
struct { \
|
||||
int bits; \
|
||||
std::vector<NamedReg> regs; \
|
||||
std::string size; \
|
||||
std::string rax_name; \
|
||||
} regsets[] = { \
|
||||
{ 16, reg16names, "word", "ax" }, \
|
||||
{ 32, reg32names, "dword", "eax" }, \
|
||||
{ 64, reg64names, "qword", "rax" }, \
|
||||
}; \
|
||||
for (const auto& regset : regsets) \
|
||||
for (const auto& r : regset.regs) \
|
||||
{ \
|
||||
emitter->Name(regset.bits, r.reg, R(RAX)); \
|
||||
emitter->Name(regset.bits, RAX, R(r.reg)); \
|
||||
emitter->Name(regset.bits, r.reg, MatR(RAX)); \
|
||||
ExpectDisassembly(#Name " " + r.name + ", " + regset.rax_name + " " \
|
||||
#Name " " + regset.rax_name + ", " + r.name + " " \
|
||||
#Name " " + r.name + ", " + regset.size + " ptr ds:[rax] " ); \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST_F(x64EmitterTest, BSR)
|
||||
{
|
||||
emitter->BSR(64, R12, R(RAX));
|
||||
emitter->BSR(32, R12, R(RAX));
|
||||
emitter->BSR(16, R12, R(RAX));
|
||||
|
||||
emitter->BSR(64, R12, MatR(RAX));
|
||||
emitter->BSR(32, R12, MatR(RAX));
|
||||
emitter->BSR(16, R12, MatR(RAX));
|
||||
|
||||
ExpectDisassembly("bsr r12, rax "
|
||||
"bsr r12d, eax "
|
||||
"bsr r12w, ax "
|
||||
"bsr r12, qword ptr ds:[rax] "
|
||||
"bsr r12d, dword ptr ds:[rax] "
|
||||
"bsr r12w, word ptr ds:[rax]");
|
||||
}
|
||||
BITSEARCH_TEST(BSR);
|
||||
BITSEARCH_TEST(BSF);
|
||||
BITSEARCH_TEST(LZCNT);
|
||||
BITSEARCH_TEST(TZCNT);
|
||||
|
||||
TEST_F(x64EmitterTest, PREFETCH)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue