Significantly faster byte swapping on xmm read/write.
This commit is contained in:
parent
d3bf7813ea
commit
e8429b75e0
|
@ -1869,6 +1869,7 @@ GpVar X64Emitter::ReadMemory(
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __m128i __xmm_byte_swap = _mm_set_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||||
XmmVar X64Emitter::ReadMemoryXmm(
|
XmmVar X64Emitter::ReadMemoryXmm(
|
||||||
uint32_t cia, GpVar& addr, uint32_t alignment) {
|
uint32_t cia, GpVar& addr, uint32_t alignment) {
|
||||||
X86Compiler& c = compiler_;
|
X86Compiler& c = compiler_;
|
||||||
|
@ -1892,15 +1893,9 @@ XmmVar X64Emitter::ReadMemoryXmm(
|
||||||
c.movaps(value, xmmword_ptr(real_address));
|
c.movaps(value, xmmword_ptr(real_address));
|
||||||
|
|
||||||
// Byte swap.
|
// Byte swap.
|
||||||
// http://www.asmcommunity.net/forums/topic/?id=29743
|
GpVar byte_swap_addr(c.newGpVar());
|
||||||
XmmVar temp(c.newXmmVar());
|
c.mov(byte_swap_addr, imm((sysint_t)&__xmm_byte_swap));
|
||||||
c.pshufd(value, value, imm(0x1B)); // 00011011b
|
c.pshufb(value, xmmword_ptr(byte_swap_addr));
|
||||||
c.pshuflw(value, value, imm(0xB1)); // 10110001b
|
|
||||||
c.pshufhw(value, value, imm(0xB1)); // 10110001b
|
|
||||||
c.movdqa(temp, value);
|
|
||||||
c.psrlw(temp, imm(8));
|
|
||||||
c.psllw(value, imm(8));
|
|
||||||
c.por(value, temp);
|
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
@ -1971,14 +1966,9 @@ void X64Emitter::WriteMemoryXmm(
|
||||||
|
|
||||||
// Byte swap.
|
// Byte swap.
|
||||||
// TODO(benvanik): clone value before modifying it?
|
// TODO(benvanik): clone value before modifying it?
|
||||||
XmmVar temp(c.newXmmVar());
|
GpVar byte_swap_addr(c.newGpVar());
|
||||||
c.pshufd(value, value, imm(0x1B)); // 00011011b
|
c.mov(byte_swap_addr, imm((sysint_t)&__xmm_byte_swap));
|
||||||
c.pshuflw(value, value, imm(0xB1)); // 10110001b
|
c.pshufb(value, xmmword_ptr(byte_swap_addr));
|
||||||
c.pshufhw(value, value, imm(0xB1)); // 10110001b
|
|
||||||
c.movdqa(temp, value);
|
|
||||||
c.psrlw(temp, imm(8));
|
|
||||||
c.psllw(value, imm(8));
|
|
||||||
c.por(value, temp);
|
|
||||||
|
|
||||||
c.movaps(xmmword_ptr(real_address), value);
|
c.movaps(xmmword_ptr(real_address), value);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue