From 526b1a85b7dba00361e3f785c33afbb925fc0904 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 28 Sep 2013 23:14:43 -0700 Subject: [PATCH] Simple altivec loads/stores. --- src/xenia/cpu/x64/x64_emit_altivec.cc | 84 ++++++++----- src/xenia/cpu/x64/x64_emitter.cc | 166 +++++++++++++++++++++++--- src/xenia/cpu/x64/x64_emitter.h | 8 ++ 3 files changed, 209 insertions(+), 49 deletions(-) diff --git a/src/xenia/cpu/x64/x64_emit_altivec.cc b/src/xenia/cpu/x64/x64_emit_altivec.cc index 18df5010e..60c1a83da 100644 --- a/src/xenia/cpu/x64/x64_emit_altivec.cc +++ b/src/xenia/cpu/x64/x64_emit_altivec.cc @@ -94,23 +94,37 @@ XEEMITTER(lvsr128, VX128_1(4, 67), VX128_1)(X64Emitter& e, X86Compiler& } XEEMITTER(lvx, 0x7C0000CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + GpVar ea(c.newGpVar()); + c.mov(ea, e.gpr_value(i.X.RB)); + if (i.VX128_1.RA) { + c.add(ea, e.gpr_value(i.X.RA)); + } + XmmVar v = e.ReadMemoryXmm(i.address, ea, 4); + e.update_vr_value(i.X.RT, v); + + return 0; } XEEMITTER(lvx128, VX128_1(4, 195), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + const uint32_t vd = i.VX128_1.VD128l | (i.VX128_1.VD128h << 5); + + GpVar ea(c.newGpVar()); + c.mov(ea, e.gpr_value(i.VX128_1.RB)); + if (i.VX128_1.RA) { + c.add(ea, e.gpr_value(i.VX128_1.RA)); + } + XmmVar v = e.ReadMemoryXmm(i.address, ea, 4); + e.update_vr_value(vd, v); + + return 0; } XEEMITTER(lvxl, 0x7C0002CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_lvx(e, c, i); } XEEMITTER(lvxl128, VX128_1(4, 707), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_lvx128(e, c, i); } XEEMITTER(stvebx, 0x7C00010E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { @@ -134,23 +148,37 @@ XEEMITTER(stvewx128, VX128_1(4, 387), VX128_1)(X64Emitter& e, X86Compiler& } XEEMITTER(stvx, 0x7C0001CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + GpVar ea(c.newGpVar()); + c.mov(ea, e.gpr_value(i.X.RB)); + if (i.X.RA) { + c.add(ea, e.gpr_value(i.X.RA)); + } + XmmVar v = e.vr_value(i.X.RT); + e.WriteMemoryXmm(i.address, ea, 4, v); + + return 0; } XEEMITTER(stvx128, VX128_1(4, 451), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + const uint32_t vd = i.VX128_1.VD128l | (i.VX128_1.VD128h << 5); + + GpVar ea(c.newGpVar()); + c.mov(ea, e.gpr_value(i.VX128_1.RB)); + if (i.X.RA) { + c.add(ea, e.gpr_value(i.VX128_1.RA)); + } + XmmVar v = e.vr_value(vd); + e.WriteMemoryXmm(i.address, ea, 4, v); + + return 0; } XEEMITTER(stvxl, 0x7C0003CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_stvx(e, c, i); } XEEMITTER(stvxl128, VX128_1(4, 963), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_stvx128(e, c, i); } XEEMITTER(lvlx, 0x7C00040E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { @@ -164,13 +192,11 @@ XEEMITTER(lvlx128, VX128_1(4, 1027), VX128_1)(X64Emitter& e, X86Compiler& } XEEMITTER(lvlxl, 0x7C00060E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_lvlx(e, c, i); } XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_lvlx128(e, c, i); } XEEMITTER(lvrx, 0x7C00044E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { @@ -184,13 +210,11 @@ XEEMITTER(lvrx128, VX128_1(4, 1091), VX128_1)(X64Emitter& e, X86Compiler& } XEEMITTER(lvrxl, 0x7C00064E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_lvrx(e, c, i); } XEEMITTER(lvrxl128, VX128_1(4, 1603), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_lvrx128(e, c, i); } XEEMITTER(stvlx, 0x7C00050E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { @@ -204,13 +228,11 @@ XEEMITTER(stvlx128, VX128_1(4, 1283), VX128_1)(X64Emitter& e, X86Compiler& } XEEMITTER(stvlxl, 0x7C00070E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_stvlx(e, c, i); } XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_stvlx128(e, c, i); } XEEMITTER(stvrx, 0x7C00054E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { @@ -224,13 +246,11 @@ XEEMITTER(stvrx128, VX128_1(4, 1347), VX128_1)(X64Emitter& e, X86Compiler& } XEEMITTER(stvrxl, 0x7C00074E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_stvrx(e, c, i); } XEEMITTER(stvrxl128, VX128_1(4, 1859), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + return InstrEmit_stvrx128(e, c, i); } XEEMITTER(mfvscr, 0x10000604, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) { diff --git a/src/xenia/cpu/x64/x64_emitter.cc b/src/xenia/cpu/x64/x64_emitter.cc index e5118da16..1a2069735 100644 --- a/src/xenia/cpu/x64/x64_emitter.cc +++ b/src/xenia/cpu/x64/x64_emitter.cc @@ -1095,6 +1095,39 @@ void X64Emitter::SetupLocals() { } fpr_t >>= 2; } + + uint64_t vr31_0_t = access_bits_.vr31_0; + for (int n = 0; n < 32; n++) { + if (vr31_0_t & 3) { + xesnprintfa(name, XECOUNT(name), "vr%d", n); + locals_.vr[n] = c.newXmmVar(kX86VarTypeXmmPS, name); + } + vr31_0_t >>= 2; + } + uint64_t vr63_32_t = access_bits_.vr63_32; + for (int n = 0; n < 32; n++) { + if (vr63_32_t & 3) { + xesnprintfa(name, XECOUNT(name), "vr%d", n + 32); + locals_.vr[n + 32] = c.newXmmVar(kX86VarTypeXmmPS, name); + } + vr63_32_t >>= 2; + } + uint64_t vr95_64_t = access_bits_.vr95_64; + for (int n = 0; n < 32; n++) { + if (vr95_64_t & 3) { + xesnprintfa(name, XECOUNT(name), "vr%d", n + 64); + locals_.vr[n + 64] = c.newXmmVar(kX86VarTypeXmmPS, name); + } + vr95_64_t >>= 2; + } + uint64_t vr127_96_t = access_bits_.vr127_96; + for (int n = 0; n < 32; n++) { + if (vr127_96_t & 3) { + xesnprintfa(name, XECOUNT(name), "vr%d", n + 96); + locals_.vr[n + 96] = c.newXmmVar(kX86VarTypeXmmPS, name); + } + vr127_96_t >>= 2; + } } void X64Emitter::FillRegisters() { @@ -1182,6 +1215,17 @@ void X64Emitter::FillRegisters() { qword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, f) + 8 * n)); } } + + for (size_t n = 0; n < XECOUNT(locals_.vr); n++) { + if (locals_.vr[n].getId() != kInvalidValue) { + if (FLAGS_annotate_disassembly) { + c.comment("Filling vr%d", n); + } + c.movq(locals_.vr[n], + xmmword_ptr(c.getGpArg(0), + offsetof(xe_ppc_state_t, v) + 16 * n)); + } + } } void X64Emitter::SpillRegisters() { @@ -1273,6 +1317,18 @@ void X64Emitter::SpillRegisters() { v); } } + + for (size_t n = 0; n < XECOUNT(locals_.vr); n++) { + XmmVar& v = locals_.vr[n]; + if (v.getId() != kInvalidValue) { + if (FLAGS_annotate_disassembly) { + c.comment("Spilling vr%d", n); + } + c.movq(xmmword_ptr(c.getGpArg(0), + offsetof(xe_ppc_state_t, v) + 16 * n), + v); + } + } } bool X64Emitter::get_constant_gpr_value(uint32_t n, uint64_t* value) { @@ -1605,9 +1661,43 @@ void X64Emitter::update_fpr_value(uint32_t n, XmmVar& value) { } } +XmmVar X64Emitter::vr_value(uint32_t n) { + X86Compiler& c = compiler_; + XEASSERT(n >= 0 && n < 128); + if (FLAGS_cache_registers) { + XEASSERT(locals_.vr[n].getId() != kInvalidValue); + return locals_.vr[n]; + } else { + XmmVar value(c.newXmmVar()); + c.movq(value, + xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n)); + return value; + } +} + +void X64Emitter::update_vr_value(uint32_t n, XmmVar& value) { + X86Compiler& c = compiler_; + XEASSERT(n >= 0 && n < 128); + if (FLAGS_cache_registers) { + XEASSERT(locals_.vr[n].getId() != kInvalidValue); + c.movq(locals_.vr[n], value); + } else { + c.movq(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n), + value); + } +} + GpVar X64Emitter::TouchMemoryAddress(uint32_t cia, GpVar& addr) { X86Compiler& c = compiler_; +#if 0 + Label no_match(c.newLabel()); + c.cmp(addr, imm(0x21004220)); + c.jne(no_match, kCondHintLikely); + c.int3(); + c.bind(no_match); +#endif + // Input address is always in 32-bit space. GpVar real_address(c.newGpVar()); c.mov(real_address.r32(), addr.r32()); @@ -1643,14 +1733,6 @@ GpVar X64Emitter::ReadMemory( uint32_t cia, GpVar& addr, uint32_t size, bool acquire) { X86Compiler& c = compiler_; -#if 0 - Label no_match(c.newLabel()); - c.cmp(addr, imm(0x21004220)); - c.jne(no_match, kCondHintLikely); - c.int3(); - c.bind(no_match); -#endif - // Rebase off of memory base pointer. GpVar real_address = TouchMemoryAddress(cia, addr); @@ -1663,7 +1745,6 @@ GpVar X64Emitter::ReadMemory( } GpVar value(c.newGpVar()); - bool needs_swap = false; switch (size) { case 1: c.mov(value.r8(), byte_ptr(real_address)); @@ -1692,19 +1773,47 @@ GpVar X64Emitter::ReadMemory( return value; } +XmmVar X64Emitter::ReadMemoryXmm( + uint32_t cia, GpVar& addr, uint32_t alignment) { + X86Compiler& c = compiler_; + + // Align memory address. + GpVar aligned_addr(c.newGpVar()); + c.mov(aligned_addr, addr); + switch (alignment) { + case 4: + c.and_(aligned_addr, imm(~0xF)); + break; + default: + XEASSERTALWAYS(); + break; + } + + // Rebase off of memory base pointer. + GpVar real_address = TouchMemoryAddress(cia, addr); + + XmmVar value(c.newXmmVar()); + c.movq(value, xmmword_ptr(real_address)); + + // Byte swap. + // http://www.asmcommunity.net/forums/topic/?id=29743 + XmmVar temp(c.newXmmVar()); + c.pshufd(value, value, imm(0x1B)); // 00011011b + c.pshuflw(value, value, imm(0xB1)); // 10110001b + c.pshufhw(value, value, imm(0xB1)); // 10110001b + c.movdqa(temp, value); + c.psrlw(temp, imm(8)); + c.psllw(value, imm(8)); + c.por(value, temp); + + return value; +} + void X64Emitter::WriteMemory( uint32_t cia, GpVar& addr, uint32_t size, GpVar& value, bool release) { X86Compiler& c = compiler_; -#if 0 - Label no_match(c.newLabel()); - c.cmp(addr, imm(0x21004220)); - c.jne(no_match, kCondHintLikely); - c.int3(); - c.bind(no_match); -#endif - // Rebase off of memory base pointer. GpVar real_address = TouchMemoryAddress(cia, addr); @@ -1745,6 +1854,29 @@ void X64Emitter::WriteMemory( } } +void X64Emitter::WriteMemoryXmm( + uint32_t cia, GpVar& addr, uint32_t alignment, XmmVar& value) { + X86Compiler& c = compiler_; + + // Align memory address. + + // Rebase off of memory base pointer. + GpVar real_address = TouchMemoryAddress(cia, addr); + + // Byte swap. + // TODO(benvanik): clone value before modifying it? + XmmVar temp(c.newXmmVar()); + c.pshufd(value, value, imm(0x1B)); // 00011011b + c.pshuflw(value, value, imm(0xB1)); // 10110001b + c.pshufhw(value, value, imm(0xB1)); // 10110001b + c.movdqa(temp, value); + c.psrlw(temp, imm(8)); + c.psllw(value, imm(8)); + c.por(value, temp); + + c.movq(xmmword_ptr(real_address), value); +} + GpVar X64Emitter::get_uint64(uint64_t value) { X86Compiler& c = compiler_; GpVar v(c.newGpVar()); diff --git a/src/xenia/cpu/x64/x64_emitter.h b/src/xenia/cpu/x64/x64_emitter.h index 9517f7858..c52094879 100644 --- a/src/xenia/cpu/x64/x64_emitter.h +++ b/src/xenia/cpu/x64/x64_emitter.h @@ -92,13 +92,20 @@ public: void update_gpr_value(uint32_t n, AsmJit::GpVar& value); AsmJit::XmmVar fpr_value(uint32_t n); void update_fpr_value(uint32_t n, AsmJit::XmmVar& value); + AsmJit::XmmVar vr_value(uint32_t n); + void update_vr_value(uint32_t n, AsmJit::XmmVar& value); AsmJit::GpVar TouchMemoryAddress(uint32_t cia, AsmJit::GpVar& addr); AsmJit::GpVar ReadMemory( uint32_t cia, AsmJit::GpVar& addr, uint32_t size, bool acquire = false); + AsmJit::XmmVar ReadMemoryXmm( + uint32_t cia, AsmJit::GpVar& addr, uint32_t alignment); void WriteMemory( uint32_t cia, AsmJit::GpVar& addr, uint32_t size, AsmJit::GpVar& value, bool release = false); + void WriteMemoryXmm( + uint32_t cia, AsmJit::GpVar& addr, uint32_t alignment, + AsmJit::XmmVar& value); AsmJit::GpVar get_uint64(uint64_t value); AsmJit::GpVar sign_extend(AsmJit::GpVar& value, int from_size, int to_size); @@ -153,6 +160,7 @@ private: AsmJit::GpVar cr[8]; AsmJit::GpVar gpr[32]; AsmJit::XmmVar fpr[32]; + AsmJit::XmmVar vr[128]; } locals_; };