Simple altivec loads/stores.

This commit is contained in:
Ben Vanik 2013-09-28 23:14:43 -07:00
parent f398ccba56
commit 526b1a85b7
3 changed files with 209 additions and 49 deletions

View File

@ -94,23 +94,37 @@ XEEMITTER(lvsr128, VX128_1(4, 67), VX128_1)(X64Emitter& e, X86Compiler&
}
XEEMITTER(lvx, 0x7C0000CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
GpVar ea(c.newGpVar());
c.mov(ea, e.gpr_value(i.X.RB));
if (i.VX128_1.RA) {
c.add(ea, e.gpr_value(i.X.RA));
}
XmmVar v = e.ReadMemoryXmm(i.address, ea, 4);
e.update_vr_value(i.X.RT, v);
return 0;
}
XEEMITTER(lvx128, VX128_1(4, 195), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
const uint32_t vd = i.VX128_1.VD128l | (i.VX128_1.VD128h << 5);
GpVar ea(c.newGpVar());
c.mov(ea, e.gpr_value(i.VX128_1.RB));
if (i.VX128_1.RA) {
c.add(ea, e.gpr_value(i.VX128_1.RA));
}
XmmVar v = e.ReadMemoryXmm(i.address, ea, 4);
e.update_vr_value(vd, v);
return 0;
}
XEEMITTER(lvxl, 0x7C0002CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_lvx(e, c, i);
}
XEEMITTER(lvxl128, VX128_1(4, 707), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_lvx128(e, c, i);
}
XEEMITTER(stvebx, 0x7C00010E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
@ -134,23 +148,37 @@ XEEMITTER(stvewx128, VX128_1(4, 387), VX128_1)(X64Emitter& e, X86Compiler&
}
XEEMITTER(stvx, 0x7C0001CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
GpVar ea(c.newGpVar());
c.mov(ea, e.gpr_value(i.X.RB));
if (i.X.RA) {
c.add(ea, e.gpr_value(i.X.RA));
}
XmmVar v = e.vr_value(i.X.RT);
e.WriteMemoryXmm(i.address, ea, 4, v);
return 0;
}
XEEMITTER(stvx128, VX128_1(4, 451), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
const uint32_t vd = i.VX128_1.VD128l | (i.VX128_1.VD128h << 5);
GpVar ea(c.newGpVar());
c.mov(ea, e.gpr_value(i.VX128_1.RB));
if (i.X.RA) {
c.add(ea, e.gpr_value(i.VX128_1.RA));
}
XmmVar v = e.vr_value(vd);
e.WriteMemoryXmm(i.address, ea, 4, v);
return 0;
}
XEEMITTER(stvxl, 0x7C0003CE, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_stvx(e, c, i);
}
XEEMITTER(stvxl128, VX128_1(4, 963), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_stvx128(e, c, i);
}
XEEMITTER(lvlx, 0x7C00040E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
@ -164,13 +192,11 @@ XEEMITTER(lvlx128, VX128_1(4, 1027), VX128_1)(X64Emitter& e, X86Compiler&
}
XEEMITTER(lvlxl, 0x7C00060E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_lvlx(e, c, i);
}
XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_lvlx128(e, c, i);
}
XEEMITTER(lvrx, 0x7C00044E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
@ -184,13 +210,11 @@ XEEMITTER(lvrx128, VX128_1(4, 1091), VX128_1)(X64Emitter& e, X86Compiler&
}
XEEMITTER(lvrxl, 0x7C00064E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_lvrx(e, c, i);
}
XEEMITTER(lvrxl128, VX128_1(4, 1603), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_lvrx128(e, c, i);
}
XEEMITTER(stvlx, 0x7C00050E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
@ -204,13 +228,11 @@ XEEMITTER(stvlx128, VX128_1(4, 1283), VX128_1)(X64Emitter& e, X86Compiler&
}
XEEMITTER(stvlxl, 0x7C00070E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_stvlx(e, c, i);
}
XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_stvlx128(e, c, i);
}
XEEMITTER(stvrx, 0x7C00054E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
@ -224,13 +246,11 @@ XEEMITTER(stvrx128, VX128_1(4, 1347), VX128_1)(X64Emitter& e, X86Compiler&
}
XEEMITTER(stvrxl, 0x7C00074E, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_stvrx(e, c, i);
}
XEEMITTER(stvrxl128, VX128_1(4, 1859), VX128_1)(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
return InstrEmit_stvrx128(e, c, i);
}
XEEMITTER(mfvscr, 0x10000604, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {

View File

@ -1095,6 +1095,39 @@ void X64Emitter::SetupLocals() {
}
fpr_t >>= 2;
}
uint64_t vr31_0_t = access_bits_.vr31_0;
for (int n = 0; n < 32; n++) {
if (vr31_0_t & 3) {
xesnprintfa(name, XECOUNT(name), "vr%d", n);
locals_.vr[n] = c.newXmmVar(kX86VarTypeXmmPS, name);
}
vr31_0_t >>= 2;
}
uint64_t vr63_32_t = access_bits_.vr63_32;
for (int n = 0; n < 32; n++) {
if (vr63_32_t & 3) {
xesnprintfa(name, XECOUNT(name), "vr%d", n + 32);
locals_.vr[n + 32] = c.newXmmVar(kX86VarTypeXmmPS, name);
}
vr63_32_t >>= 2;
}
uint64_t vr95_64_t = access_bits_.vr95_64;
for (int n = 0; n < 32; n++) {
if (vr95_64_t & 3) {
xesnprintfa(name, XECOUNT(name), "vr%d", n + 64);
locals_.vr[n + 64] = c.newXmmVar(kX86VarTypeXmmPS, name);
}
vr95_64_t >>= 2;
}
uint64_t vr127_96_t = access_bits_.vr127_96;
for (int n = 0; n < 32; n++) {
if (vr127_96_t & 3) {
xesnprintfa(name, XECOUNT(name), "vr%d", n + 96);
locals_.vr[n + 96] = c.newXmmVar(kX86VarTypeXmmPS, name);
}
vr127_96_t >>= 2;
}
}
void X64Emitter::FillRegisters() {
@ -1182,6 +1215,17 @@ void X64Emitter::FillRegisters() {
qword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, f) + 8 * n));
}
}
for (size_t n = 0; n < XECOUNT(locals_.vr); n++) {
if (locals_.vr[n].getId() != kInvalidValue) {
if (FLAGS_annotate_disassembly) {
c.comment("Filling vr%d", n);
}
c.movq(locals_.vr[n],
xmmword_ptr(c.getGpArg(0),
offsetof(xe_ppc_state_t, v) + 16 * n));
}
}
}
void X64Emitter::SpillRegisters() {
@ -1273,6 +1317,18 @@ void X64Emitter::SpillRegisters() {
v);
}
}
for (size_t n = 0; n < XECOUNT(locals_.vr); n++) {
XmmVar& v = locals_.vr[n];
if (v.getId() != kInvalidValue) {
if (FLAGS_annotate_disassembly) {
c.comment("Spilling vr%d", n);
}
c.movq(xmmword_ptr(c.getGpArg(0),
offsetof(xe_ppc_state_t, v) + 16 * n),
v);
}
}
}
bool X64Emitter::get_constant_gpr_value(uint32_t n, uint64_t* value) {
@ -1605,9 +1661,43 @@ void X64Emitter::update_fpr_value(uint32_t n, XmmVar& value) {
}
}
XmmVar X64Emitter::vr_value(uint32_t n) {
X86Compiler& c = compiler_;
XEASSERT(n >= 0 && n < 128);
if (FLAGS_cache_registers) {
XEASSERT(locals_.vr[n].getId() != kInvalidValue);
return locals_.vr[n];
} else {
XmmVar value(c.newXmmVar());
c.movq(value,
xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n));
return value;
}
}
void X64Emitter::update_vr_value(uint32_t n, XmmVar& value) {
X86Compiler& c = compiler_;
XEASSERT(n >= 0 && n < 128);
if (FLAGS_cache_registers) {
XEASSERT(locals_.vr[n].getId() != kInvalidValue);
c.movq(locals_.vr[n], value);
} else {
c.movq(xmmword_ptr(c.getGpArg(0), offsetof(xe_ppc_state_t, v) + 16 * n),
value);
}
}
GpVar X64Emitter::TouchMemoryAddress(uint32_t cia, GpVar& addr) {
X86Compiler& c = compiler_;
#if 0
Label no_match(c.newLabel());
c.cmp(addr, imm(0x21004220));
c.jne(no_match, kCondHintLikely);
c.int3();
c.bind(no_match);
#endif
// Input address is always in 32-bit space.
GpVar real_address(c.newGpVar());
c.mov(real_address.r32(), addr.r32());
@ -1643,14 +1733,6 @@ GpVar X64Emitter::ReadMemory(
uint32_t cia, GpVar& addr, uint32_t size, bool acquire) {
X86Compiler& c = compiler_;
#if 0
Label no_match(c.newLabel());
c.cmp(addr, imm(0x21004220));
c.jne(no_match, kCondHintLikely);
c.int3();
c.bind(no_match);
#endif
// Rebase off of memory base pointer.
GpVar real_address = TouchMemoryAddress(cia, addr);
@ -1663,7 +1745,6 @@ GpVar X64Emitter::ReadMemory(
}
GpVar value(c.newGpVar());
bool needs_swap = false;
switch (size) {
case 1:
c.mov(value.r8(), byte_ptr(real_address));
@ -1692,19 +1773,47 @@ GpVar X64Emitter::ReadMemory(
return value;
}
XmmVar X64Emitter::ReadMemoryXmm(
uint32_t cia, GpVar& addr, uint32_t alignment) {
X86Compiler& c = compiler_;
// Align memory address.
GpVar aligned_addr(c.newGpVar());
c.mov(aligned_addr, addr);
switch (alignment) {
case 4:
c.and_(aligned_addr, imm(~0xF));
break;
default:
XEASSERTALWAYS();
break;
}
// Rebase off of memory base pointer.
GpVar real_address = TouchMemoryAddress(cia, addr);
XmmVar value(c.newXmmVar());
c.movq(value, xmmword_ptr(real_address));
// Byte swap.
// http://www.asmcommunity.net/forums/topic/?id=29743
XmmVar temp(c.newXmmVar());
c.pshufd(value, value, imm(0x1B)); // 00011011b
c.pshuflw(value, value, imm(0xB1)); // 10110001b
c.pshufhw(value, value, imm(0xB1)); // 10110001b
c.movdqa(temp, value);
c.psrlw(temp, imm(8));
c.psllw(value, imm(8));
c.por(value, temp);
return value;
}
void X64Emitter::WriteMemory(
uint32_t cia, GpVar& addr, uint32_t size, GpVar& value,
bool release) {
X86Compiler& c = compiler_;
#if 0
Label no_match(c.newLabel());
c.cmp(addr, imm(0x21004220));
c.jne(no_match, kCondHintLikely);
c.int3();
c.bind(no_match);
#endif
// Rebase off of memory base pointer.
GpVar real_address = TouchMemoryAddress(cia, addr);
@ -1745,6 +1854,29 @@ void X64Emitter::WriteMemory(
}
}
void X64Emitter::WriteMemoryXmm(
uint32_t cia, GpVar& addr, uint32_t alignment, XmmVar& value) {
X86Compiler& c = compiler_;
// Align memory address.
// Rebase off of memory base pointer.
GpVar real_address = TouchMemoryAddress(cia, addr);
// Byte swap.
// TODO(benvanik): clone value before modifying it?
XmmVar temp(c.newXmmVar());
c.pshufd(value, value, imm(0x1B)); // 00011011b
c.pshuflw(value, value, imm(0xB1)); // 10110001b
c.pshufhw(value, value, imm(0xB1)); // 10110001b
c.movdqa(temp, value);
c.psrlw(temp, imm(8));
c.psllw(value, imm(8));
c.por(value, temp);
c.movq(xmmword_ptr(real_address), value);
}
GpVar X64Emitter::get_uint64(uint64_t value) {
X86Compiler& c = compiler_;
GpVar v(c.newGpVar());

View File

@ -92,13 +92,20 @@ public:
void update_gpr_value(uint32_t n, AsmJit::GpVar& value);
AsmJit::XmmVar fpr_value(uint32_t n);
void update_fpr_value(uint32_t n, AsmJit::XmmVar& value);
AsmJit::XmmVar vr_value(uint32_t n);
void update_vr_value(uint32_t n, AsmJit::XmmVar& value);
AsmJit::GpVar TouchMemoryAddress(uint32_t cia, AsmJit::GpVar& addr);
AsmJit::GpVar ReadMemory(
uint32_t cia, AsmJit::GpVar& addr, uint32_t size, bool acquire = false);
AsmJit::XmmVar ReadMemoryXmm(
uint32_t cia, AsmJit::GpVar& addr, uint32_t alignment);
void WriteMemory(
uint32_t cia, AsmJit::GpVar& addr, uint32_t size, AsmJit::GpVar& value,
bool release = false);
void WriteMemoryXmm(
uint32_t cia, AsmJit::GpVar& addr, uint32_t alignment,
AsmJit::XmmVar& value);
AsmJit::GpVar get_uint64(uint64_t value);
AsmJit::GpVar sign_extend(AsmJit::GpVar& value, int from_size, int to_size);
@ -153,6 +160,7 @@ private:
AsmJit::GpVar cr[8];
AsmJit::GpVar gpr[32];
AsmJit::XmmVar fpr[32];
AsmJit::XmmVar vr[128];
} locals_;
};