vspltisb/ldarx/stdcx.

ldarx/stdcx are just as faked as the w equivalents.
This commit is contained in:
Ben Vanik 2013-10-14 00:13:40 -07:00
parent 72b54cfe2a
commit 602ed5393e
3 changed files with 120 additions and 55 deletions

View File

@ -1554,9 +1554,63 @@ XEEMITTER(vsl, 0x100001C4, VX )(X64Emitter& e, X86Compiler& c, Instr
XEEMITTER(vslb, 0x10000104, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
// o = {0}
// for each byte:
// t = shift input by VB[b] *bits*
// o = o | (t & mask)
// write o
return 1;
}
XEEMITTER(vslh, 0x10000144, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
int InstrEmit_vslw_(X64Emitter& e, X86Compiler& c, uint32_t vd, uint32_t va, uint32_t vb) {
// VA = |xxxxx|yyyyy|zzzzz|wwwww|
// VB = |...sh|...sh|...sh|...sh|
// VD = |x<<sh|y<<sh|z<<sh|w<<sh|
// There is no SSE op to do this, so we have to do each individually.
// TODO(benvanik): update to do in two ops by doing 0/2 and 1/3.
GpVar sh(c.newGpVar());
GpVar vt(c.newGpVar());
XmmVar v(c.newXmmVar());
// 0
c.pextrb(sh, e.vr_value(vb), imm(0));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(0));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(0));
// 1
c.pextrb(sh, e.vr_value(vb), imm(1 * 4));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(1));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(1));
// 2
c.pextrb(sh, e.vr_value(vb), imm(2 * 4));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(2));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(2));
// 3
c.pextrb(sh, e.vr_value(vb), imm(3 * 4));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(3));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(3));
e.update_vr_value(vd, v);
e.TraceVR(vd, va, vb);
return 0;
}
XEEMITTER(vslw, 0x10000184, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
return InstrEmit_vslw_(e, c, i.VX.VD, i.VX.VA, i.VX.VB);
}
XEEMITTER(vslw128, VX128(6, 208), VX128 )(X64Emitter& e, X86Compiler& c, InstrData& i) {
return InstrEmit_vslw_(e, c, VX128_VD128, VX128_VA128, VX128_VB128);
}
static __m128i __shift_table_out[16] = {
_mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), // unused
_mm_set_epi8( 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1),
@ -1637,11 +1691,6 @@ XEEMITTER(vsldoi128, VX128_5(4, 16), VX128_5)(X64Emitter& e, X86Compiler&
return InstrEmit_vsldoi_(e, c, VX128_5_VD128, VX128_5_VA128, VX128_5_VB128, VX128_5_SH);
}
XEEMITTER(vslh, 0x10000144, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vslo, 0x1000040C, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
@ -1651,50 +1700,6 @@ XEEMITTER(vslo128, VX128(5, 912), VX128 )(X64Emitter& e, X86Compiler&
return 1;
}
int InstrEmit_vslw_(X64Emitter& e, X86Compiler& c, uint32_t vd, uint32_t va, uint32_t vb) {
// VA = |xxxxx|yyyyy|zzzzz|wwwww|
// VB = |...sh|...sh|...sh|...sh|
// VD = |x<<sh|y<<sh|z<<sh|w<<sh|
// There is no SSE op to do this, so we have to do each individually.
// TODO(benvanik): update to do in two ops by doing 0/2 and 1/3.
GpVar sh(c.newGpVar());
GpVar vt(c.newGpVar());
XmmVar v(c.newXmmVar());
// 0
c.pextrb(sh, e.vr_value(vb), imm(0));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(0));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(0));
// 1
c.pextrb(sh, e.vr_value(vb), imm(1 * 4));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(1));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(1));
// 2
c.pextrb(sh, e.vr_value(vb), imm(2 * 4));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(2));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(2));
// 3
c.pextrb(sh, e.vr_value(vb), imm(3 * 4));
c.and_(sh, imm(0x1F));
c.pextrd(vt, e.vr_value(va), imm(3));
c.shl(vt, sh);
c.pinsrd(v, vt.r32(), imm(3));
e.update_vr_value(vd, v);
e.TraceVR(vd, va, vb);
return 0;
}
XEEMITTER(vslw, 0x10000184, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
return InstrEmit_vslw_(e, c, i.VX.VD, i.VX.VA, i.VX.VB);
}
XEEMITTER(vslw128, VX128(6, 208), VX128 )(X64Emitter& e, X86Compiler& c, InstrData& i) {
return InstrEmit_vslw_(e, c, VX128_VD128, VX128_VA128, VX128_VB128);
}
XEEMITTER(vspltb, 0x1000020C, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
@ -1706,8 +1711,24 @@ XEEMITTER(vsplth, 0x1000024C, VX )(X64Emitter& e, X86Compiler& c, Instr
}
XEEMITTER(vspltisb, 0x1000030C, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD.xyzw) <- sign_extend(uimm)
XmmVar v(c.newXmmVar());
if (i.VX.VA) {
// Sign extend from 5bits -> 8 and load.
int32_t simm = (i.VX.VA & 0x10) ? (i.VX.VA | 0xF0) : i.VX.VA;
GpVar simm_v(c.newGpVar());
c.mov(simm_v, imm(simm));
c.movd(v, simm_v.r32());
XmmVar z(c.newXmmVar());
c.xorps(z, z);
c.pshufb(v, z);
} else {
// Zero out the register.
c.xorps(v, v);
}
e.update_vr_value(i.VX.VD, v);
e.TraceVR(i.VX.VD);
return 0;
}
XEEMITTER(vspltish, 0x1000034C, VX )(X64Emitter& e, X86Compiler& c, InstrData& i) {

View File

@ -990,8 +990,29 @@ XEEMITTER(isync, 0x4C00012C, XL )(X64Emitter& e, X86Compiler& c, InstrDat
}
XEEMITTER(ldarx, 0x7C0000A8, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// if RA = 0 then
// b <- 0
// else
// b <- (RA)
// EA <- b + (RB)
// RESERVE <- 1
// RESERVE_LENGTH <- 8
// RESERVE_ADDR <- real_addr(EA)
// RT <- MEM(EA, 8)
// TODO(benvanik): make this right
GpVar ea(c.newGpVar());
c.mov(ea, e.gpr_value(i.X.RB));
if (i.X.RA) {
c.add(ea, e.gpr_value(i.X.RA));
}
GpVar v = e.ReadMemory(i.address, ea, 8, /* acquire */ true);
e.update_gpr_value(i.X.RT, v);
e.clear_constant_gpr_value(i.X.RT);
return 0;
}
XEEMITTER(lwarx, 0x7C000028, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
@ -1021,8 +1042,30 @@ XEEMITTER(lwarx, 0x7C000028, X )(X64Emitter& e, X86Compiler& c, InstrDat
}
XEEMITTER(stdcx, 0x7C0001AD, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// if RA = 0 then
// b <- 0
// else
// b <- (RA)
// EA <- b + (RB)
// RESERVE stuff...
// MEM(EA, 8) <- (RS)
// n <- 1 if store performed
// CR0[LT GT EQ SO] = 0b00 || n || XER[SO]
// TODO(benvanik): make this right
GpVar ea(c.newGpVar());
c.mov(ea, e.gpr_value(i.X.RB));
if (i.X.RA) {
c.add(ea, e.gpr_value(i.X.RA));
}
GpVar v = e.gpr_value(i.X.RT);
e.WriteMemory(i.address, ea, 8, v, /* release */ true);
// We always succeed.
e.update_cr_value(0, e.get_uint64(1 << 2));
return 0;
}
XEEMITTER(stwcx, 0x7C00012D, X )(X64Emitter& e, X86Compiler& c, InstrData& i) {

View File

@ -650,6 +650,7 @@ void X64Emitter::GenerateBasicBlock(FunctionBlock* block) {
// TODO(benvanik): assert this doesn't occur - means a bad sdb run!
XELOGCPU("SDB function scan error in %.8X: bb %.8X has unknown exit",
symbol_->start_address, block->start_address);
XEASSERTALWAYS();
c.ret();
}