Adding flag to make LOAD/STORE also perform a byte swap.

This commit is contained in:
Ben Vanik 2015-06-15 18:56:08 -07:00
parent 84fa09a2db
commit ddaf08ca8d
4 changed files with 81 additions and 37 deletions

View File

@ -1523,7 +1523,11 @@ EMITTER(LOAD_I8, MATCH(I<OPCODE_LOAD, I8<>, I64<>>)) {
EMITTER(LOAD_I16, MATCH(I<OPCODE_LOAD, I16<>, I64<>>)) { EMITTER(LOAD_I16, MATCH(I<OPCODE_LOAD, I16<>, I64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(i.dest, e.word[addr]); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.movbe(i.dest, e.word[addr]);
} else {
e.mov(i.dest, e.word[addr]);
}
if (IsTracingData()) { if (IsTracingData()) {
e.mov(e.r8w, i.dest); e.mov(e.r8w, i.dest);
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
@ -1534,7 +1538,11 @@ EMITTER(LOAD_I16, MATCH(I<OPCODE_LOAD, I16<>, I64<>>)) {
EMITTER(LOAD_I32, MATCH(I<OPCODE_LOAD, I32<>, I64<>>)) { EMITTER(LOAD_I32, MATCH(I<OPCODE_LOAD, I32<>, I64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(i.dest, e.dword[addr]); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.movbe(i.dest, e.dword[addr]);
} else {
e.mov(i.dest, e.dword[addr]);
}
if (IsTracingData()) { if (IsTracingData()) {
e.mov(e.r8d, i.dest); e.mov(e.r8d, i.dest);
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
@ -1545,7 +1553,11 @@ EMITTER(LOAD_I32, MATCH(I<OPCODE_LOAD, I32<>, I64<>>)) {
EMITTER(LOAD_I64, MATCH(I<OPCODE_LOAD, I64<>, I64<>>)) { EMITTER(LOAD_I64, MATCH(I<OPCODE_LOAD, I64<>, I64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(i.dest, e.qword[addr]); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.movbe(i.dest, e.qword[addr]);
} else {
e.mov(i.dest, e.qword[addr]);
}
if (IsTracingData()) { if (IsTracingData()) {
e.mov(e.r8, i.dest); e.mov(e.r8, i.dest);
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
@ -1557,6 +1569,9 @@ EMITTER(LOAD_F32, MATCH(I<OPCODE_LOAD, F32<>, I64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.vmovss(i.dest, e.dword[addr]); e.vmovss(i.dest, e.dword[addr]);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
assert_always("not implemented yet");
}
if (IsTracingData()) { if (IsTracingData()) {
e.lea(e.r8, e.dword[addr]); e.lea(e.r8, e.dword[addr]);
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
@ -1568,6 +1583,9 @@ EMITTER(LOAD_F64, MATCH(I<OPCODE_LOAD, F64<>, I64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.vmovsd(i.dest, e.qword[addr]); e.vmovsd(i.dest, e.qword[addr]);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
assert_always("not implemented yet");
}
if (IsTracingData()) { if (IsTracingData()) {
e.lea(e.r8, e.qword[addr]); e.lea(e.r8, e.qword[addr]);
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
@ -1580,6 +1598,10 @@ EMITTER(LOAD_V128, MATCH(I<OPCODE_LOAD, V128<>, I64<>>)) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
// TODO(benvanik): we should try to stick to movaps if possible. // TODO(benvanik): we should try to stick to movaps if possible.
e.vmovups(i.dest, e.ptr[addr]); e.vmovups(i.dest, e.ptr[addr]);
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
// TODO(benvanik): find a way to do this without the memory load.
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteSwapMask));
}
if (IsTracingData()) { if (IsTracingData()) {
e.lea(e.r8, e.ptr[addr]); e.lea(e.r8, e.ptr[addr]);
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
@ -1621,10 +1643,15 @@ EMITTER(STORE_I8, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I8<>>)) {
EMITTER(STORE_I16, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I16<>>)) { EMITTER(STORE_I16, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I16<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
if (i.src2.is_constant) { if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.mov(e.word[addr], i.src2.constant()); assert_false(i.src2.is_constant);
e.movbe(e.word[addr], i.src2);
} else { } else {
e.mov(e.word[addr], i.src2); if (i.src2.is_constant) {
e.mov(e.word[addr], i.src2.constant());
} else {
e.mov(e.word[addr], i.src2);
}
} }
if (IsTracingData()) { if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1); addr = ComputeMemoryAddress(e, i.src1);
@ -1637,10 +1664,15 @@ EMITTER(STORE_I16, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I16<>>)) {
EMITTER(STORE_I32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I32<>>)) { EMITTER(STORE_I32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
if (i.src2.is_constant) { if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.mov(e.dword[addr], i.src2.constant()); assert_false(i.src2.is_constant);
e.movbe(e.dword[addr], i.src2);
} else { } else {
e.mov(e.dword[addr], i.src2); if (i.src2.is_constant) {
e.mov(e.dword[addr], i.src2.constant());
} else {
e.mov(e.dword[addr], i.src2);
}
} }
if (IsTracingData()) { if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1); addr = ComputeMemoryAddress(e, i.src1);
@ -1653,10 +1685,15 @@ EMITTER(STORE_I32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I32<>>)) {
EMITTER(STORE_I64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I64<>>)) { EMITTER(STORE_I64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
if (i.src2.is_constant) { if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.MovMem64(addr, i.src2.constant()); assert_false(i.src2.is_constant);
e.movbe(e.qword[addr], i.src2);
} else { } else {
e.mov(e.qword[addr], i.src2); if (i.src2.is_constant) {
e.MovMem64(addr, i.src2.constant());
} else {
e.mov(e.qword[addr], i.src2);
}
} }
if (IsTracingData()) { if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1); addr = ComputeMemoryAddress(e, i.src1);
@ -1669,10 +1706,15 @@ EMITTER(STORE_I64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I64<>>)) {
EMITTER(STORE_F32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F32<>>)) { EMITTER(STORE_F32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
if (i.src2.is_constant) { if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.mov(e.dword[addr], i.src2.value->constant.i32); assert_false(i.src2.is_constant);
assert_always("not yet implemented");
} else { } else {
e.vmovss(e.dword[addr], i.src2); if (i.src2.is_constant) {
e.mov(e.dword[addr], i.src2.value->constant.i32);
} else {
e.vmovss(e.dword[addr], i.src2);
}
} }
if (IsTracingData()) { if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1); addr = ComputeMemoryAddress(e, i.src1);
@ -1685,10 +1727,15 @@ EMITTER(STORE_F32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F32<>>)) {
EMITTER(STORE_F64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F64<>>)) { EMITTER(STORE_F64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F64<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
if (i.src2.is_constant) { if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.MovMem64(addr, i.src2.value->constant.i64); assert_false(i.src2.is_constant);
assert_always("not yet implemented");
} else { } else {
e.vmovsd(e.qword[addr], i.src2); if (i.src2.is_constant) {
e.MovMem64(addr, i.src2.value->constant.i64);
} else {
e.vmovsd(e.qword[addr], i.src2);
}
} }
if (IsTracingData()) { if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1); addr = ComputeMemoryAddress(e, i.src1);
@ -1701,11 +1748,17 @@ EMITTER(STORE_F64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F64<>>)) {
EMITTER(STORE_V128, MATCH(I<OPCODE_STORE, VoidOp, I64<>, V128<>>)) { EMITTER(STORE_V128, MATCH(I<OPCODE_STORE, VoidOp, I64<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
if (i.src2.is_constant) { if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
e.LoadConstantXmm(e.xmm0, i.src2.constant()); assert_false(i.src2.is_constant);
e.vpshufb(e.xmm0, i.src2, e.GetXmmConstPtr(XMMByteSwapMask));
e.vmovaps(e.ptr[addr], e.xmm0); e.vmovaps(e.ptr[addr], e.xmm0);
} else { } else {
e.vmovaps(e.ptr[addr], i.src2); if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.vmovaps(e.ptr[addr], e.xmm0);
} else {
e.vmovaps(e.ptr[addr], i.src2);
}
} }
if (IsTracingData()) { if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1); addr = ComputeMemoryAddress(e, i.src1);

View File

@ -31,17 +31,8 @@ enum RoundMode {
ROUND_TO_MINUS_INFINITY, ROUND_TO_MINUS_INFINITY,
ROUND_TO_POSITIVE_INFINITY, ROUND_TO_POSITIVE_INFINITY,
}; };
enum LoadFlags { enum LoadStoreFlags {
LOAD_NO_ALIAS = (1 << 1), LOAD_STORE_BYTE_SWAP = 1 << 0,
LOAD_ALIGNED = (1 << 2),
LOAD_UNALIGNED = (1 << 3),
LOAD_VOLATILE = (1 << 4),
};
enum StoreFlags {
STORE_NO_ALIAS = (1 << 1),
STORE_ALIGNED = (1 << 2),
STORE_UNALIGNED = (1 << 3),
STORE_VOLATILE = (1 << 4),
}; };
enum PrefetchFlags { enum PrefetchFlags {
PREFETCH_LOAD = (1 << 1), PREFETCH_LOAD = (1 << 1),

View File

@ -219,10 +219,10 @@ DEFINE_OPCODE(
OPCODE_FLAG_MEMORY) OPCODE_FLAG_MEMORY)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_STORE_MMIO, OPCODE_STORE_MMIO,
"store_mmio", "store_mmio",
OPCODE_SIG_X_O_O_V, OPCODE_SIG_X_O_O_V,
OPCODE_FLAG_MEMORY) OPCODE_FLAG_MEMORY)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD, OPCODE_LOAD,

2
third_party/xbyak vendored

@ -1 +1 @@
Subproject commit 9d5bc03b264bc66434337db634af82e5f67db217 Subproject commit 77a774de97741027a90b12fd70c6d7ac0c20a431