From ddaf08ca8daecc4665188b8b6ee53e40d4089e78 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 15 Jun 2015 18:56:08 -0700 Subject: [PATCH] Adding flag to make LOAD/STORE also perform a byte swap. --- src/xenia/cpu/backend/x64/x64_sequences.cc | 95 +++++++++++++++++----- src/xenia/cpu/hir/opcodes.h | 13 +-- src/xenia/cpu/hir/opcodes.inl | 8 +- third_party/xbyak | 2 +- 4 files changed, 81 insertions(+), 37 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 65fd55efa..b8ab2456d 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -1523,7 +1523,11 @@ EMITTER(LOAD_I8, MATCH(I, I64<>>)) { EMITTER(LOAD_I16, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - e.mov(i.dest, e.word[addr]); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + e.movbe(i.dest, e.word[addr]); + } else { + e.mov(i.dest, e.word[addr]); + } if (IsTracingData()) { e.mov(e.r8w, i.dest); e.lea(e.rdx, e.ptr[addr]); @@ -1534,7 +1538,11 @@ EMITTER(LOAD_I16, MATCH(I, I64<>>)) { EMITTER(LOAD_I32, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - e.mov(i.dest, e.dword[addr]); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + e.movbe(i.dest, e.dword[addr]); + } else { + e.mov(i.dest, e.dword[addr]); + } if (IsTracingData()) { e.mov(e.r8d, i.dest); e.lea(e.rdx, e.ptr[addr]); @@ -1545,7 +1553,11 @@ EMITTER(LOAD_I32, MATCH(I, I64<>>)) { EMITTER(LOAD_I64, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - e.mov(i.dest, e.qword[addr]); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + e.movbe(i.dest, e.qword[addr]); + } else { + e.mov(i.dest, e.qword[addr]); + } if (IsTracingData()) { e.mov(e.r8, i.dest); e.lea(e.rdx, e.ptr[addr]); @@ -1557,6 +1569,9 @@ EMITTER(LOAD_F32, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); e.vmovss(i.dest, e.dword[addr]); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_always("not implemented yet"); + } if (IsTracingData()) { e.lea(e.r8, e.dword[addr]); e.lea(e.rdx, e.ptr[addr]); @@ -1568,6 +1583,9 @@ EMITTER(LOAD_F64, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); e.vmovsd(i.dest, e.qword[addr]); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_always("not implemented yet"); + } if (IsTracingData()) { e.lea(e.r8, e.qword[addr]); e.lea(e.rdx, e.ptr[addr]); @@ -1580,6 +1598,10 @@ EMITTER(LOAD_V128, MATCH(I, I64<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); // TODO(benvanik): we should try to stick to movaps if possible. e.vmovups(i.dest, e.ptr[addr]); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + // TODO(benvanik): find a way to do this without the memory load. + e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteSwapMask)); + } if (IsTracingData()) { e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); @@ -1621,10 +1643,15 @@ EMITTER(STORE_I8, MATCH(I, I8<>>)) { EMITTER(STORE_I16, MATCH(I, I16<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - if (i.src2.is_constant) { - e.mov(e.word[addr], i.src2.constant()); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_false(i.src2.is_constant); + e.movbe(e.word[addr], i.src2); } else { - e.mov(e.word[addr], i.src2); + if (i.src2.is_constant) { + e.mov(e.word[addr], i.src2.constant()); + } else { + e.mov(e.word[addr], i.src2); + } } if (IsTracingData()) { addr = ComputeMemoryAddress(e, i.src1); @@ -1637,10 +1664,15 @@ EMITTER(STORE_I16, MATCH(I, I16<>>)) { EMITTER(STORE_I32, MATCH(I, I32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - if (i.src2.is_constant) { - e.mov(e.dword[addr], i.src2.constant()); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_false(i.src2.is_constant); + e.movbe(e.dword[addr], i.src2); } else { - e.mov(e.dword[addr], i.src2); + if (i.src2.is_constant) { + e.mov(e.dword[addr], i.src2.constant()); + } else { + e.mov(e.dword[addr], i.src2); + } } if (IsTracingData()) { addr = ComputeMemoryAddress(e, i.src1); @@ -1653,10 +1685,15 @@ EMITTER(STORE_I32, MATCH(I, I32<>>)) { EMITTER(STORE_I64, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - if (i.src2.is_constant) { - e.MovMem64(addr, i.src2.constant()); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_false(i.src2.is_constant); + e.movbe(e.qword[addr], i.src2); } else { - e.mov(e.qword[addr], i.src2); + if (i.src2.is_constant) { + e.MovMem64(addr, i.src2.constant()); + } else { + e.mov(e.qword[addr], i.src2); + } } if (IsTracingData()) { addr = ComputeMemoryAddress(e, i.src1); @@ -1669,10 +1706,15 @@ EMITTER(STORE_I64, MATCH(I, I64<>>)) { EMITTER(STORE_F32, MATCH(I, F32<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - if (i.src2.is_constant) { - e.mov(e.dword[addr], i.src2.value->constant.i32); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_false(i.src2.is_constant); + assert_always("not yet implemented"); } else { - e.vmovss(e.dword[addr], i.src2); + if (i.src2.is_constant) { + e.mov(e.dword[addr], i.src2.value->constant.i32); + } else { + e.vmovss(e.dword[addr], i.src2); + } } if (IsTracingData()) { addr = ComputeMemoryAddress(e, i.src1); @@ -1685,10 +1727,15 @@ EMITTER(STORE_F32, MATCH(I, F32<>>)) { EMITTER(STORE_F64, MATCH(I, F64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - if (i.src2.is_constant) { - e.MovMem64(addr, i.src2.value->constant.i64); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_false(i.src2.is_constant); + assert_always("not yet implemented"); } else { - e.vmovsd(e.qword[addr], i.src2); + if (i.src2.is_constant) { + e.MovMem64(addr, i.src2.value->constant.i64); + } else { + e.vmovsd(e.qword[addr], i.src2); + } } if (IsTracingData()) { addr = ComputeMemoryAddress(e, i.src1); @@ -1701,11 +1748,17 @@ EMITTER(STORE_F64, MATCH(I, F64<>>)) { EMITTER(STORE_V128, MATCH(I, V128<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); - if (i.src2.is_constant) { - e.LoadConstantXmm(e.xmm0, i.src2.constant()); + if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { + assert_false(i.src2.is_constant); + e.vpshufb(e.xmm0, i.src2, e.GetXmmConstPtr(XMMByteSwapMask)); e.vmovaps(e.ptr[addr], e.xmm0); } else { - e.vmovaps(e.ptr[addr], i.src2); + if (i.src2.is_constant) { + e.LoadConstantXmm(e.xmm0, i.src2.constant()); + e.vmovaps(e.ptr[addr], e.xmm0); + } else { + e.vmovaps(e.ptr[addr], i.src2); + } } if (IsTracingData()) { addr = ComputeMemoryAddress(e, i.src1); diff --git a/src/xenia/cpu/hir/opcodes.h b/src/xenia/cpu/hir/opcodes.h index 6ff5f8a36..ceeb3cca9 100644 --- a/src/xenia/cpu/hir/opcodes.h +++ b/src/xenia/cpu/hir/opcodes.h @@ -31,17 +31,8 @@ enum RoundMode { ROUND_TO_MINUS_INFINITY, ROUND_TO_POSITIVE_INFINITY, }; -enum LoadFlags { - LOAD_NO_ALIAS = (1 << 1), - LOAD_ALIGNED = (1 << 2), - LOAD_UNALIGNED = (1 << 3), - LOAD_VOLATILE = (1 << 4), -}; -enum StoreFlags { - STORE_NO_ALIAS = (1 << 1), - STORE_ALIGNED = (1 << 2), - STORE_UNALIGNED = (1 << 3), - STORE_VOLATILE = (1 << 4), +enum LoadStoreFlags { + LOAD_STORE_BYTE_SWAP = 1 << 0, }; enum PrefetchFlags { PREFETCH_LOAD = (1 << 1), diff --git a/src/xenia/cpu/hir/opcodes.inl b/src/xenia/cpu/hir/opcodes.inl index 58a23f6e4..da2fa978c 100644 --- a/src/xenia/cpu/hir/opcodes.inl +++ b/src/xenia/cpu/hir/opcodes.inl @@ -219,10 +219,10 @@ DEFINE_OPCODE( OPCODE_FLAG_MEMORY) DEFINE_OPCODE( - OPCODE_STORE_MMIO, - "store_mmio", - OPCODE_SIG_X_O_O_V, - OPCODE_FLAG_MEMORY) + OPCODE_STORE_MMIO, + "store_mmio", + OPCODE_SIG_X_O_O_V, + OPCODE_FLAG_MEMORY) DEFINE_OPCODE( OPCODE_LOAD, diff --git a/third_party/xbyak b/third_party/xbyak index 9d5bc03b2..77a774de9 160000 --- a/third_party/xbyak +++ b/third_party/xbyak @@ -1 +1 @@ -Subproject commit 9d5bc03b264bc66434337db634af82e5f67db217 +Subproject commit 77a774de97741027a90b12fd70c6d7ac0c20a431