From 091957e72e189082e0e9708610e6c4d587f86d2d Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 4 Oct 2013 09:34:36 -0700 Subject: [PATCH] Untested srawx/sradix (makes things run further, at least). --- src/xenia/cpu/x64/x64_emit_altivec.cc | 1 - src/xenia/cpu/x64/x64_emit_alu.cc | 123 +++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 5 deletions(-) diff --git a/src/xenia/cpu/x64/x64_emit_altivec.cc b/src/xenia/cpu/x64/x64_emit_altivec.cc index a184073bf..46bec20bf 100644 --- a/src/xenia/cpu/x64/x64_emit_altivec.cc +++ b/src/xenia/cpu/x64/x64_emit_altivec.cc @@ -1678,7 +1678,6 @@ XEEMITTER(vupkd3d128, VX128_3(6, 2032), VX128_3)(X64Emitter& e, X86Compiler& { // http://hlssmod.net/he_code/public/pixelwriter.h // ARGB (WXYZ) -> RGBA (XYZW) - c.int3(); // UNTESTED CONVERSION // zzzzZZZZzzzzARGB c.movaps(vt, e.vr_value(vb)); // zzzzZZZZzzzzARGB diff --git a/src/xenia/cpu/x64/x64_emit_alu.cc b/src/xenia/cpu/x64/x64_emit_alu.cc index 3e41e500f..59b63ef47 100644 --- a/src/xenia/cpu/x64/x64_emit_alu.cc +++ b/src/xenia/cpu/x64/x64_emit_alu.cc @@ -1293,6 +1293,9 @@ XEEMITTER(sradx, 0x7C000634, X )(X64Emitter& e, X86Compiler& c, InstrDat // rA <- (r & m) | (((64)S) & ¬ m) // XER[CA] <- S & ((r & ¬ m) ¦ 0) + // if n == 0: rA <- rS, XER[CA] = 0 + // if n >= 64: rA <- 64 sign bits of rS, XER[CA] = sign bit of rS + GpVar v(c.newGpVar()); c.mov(v, e.gpr_value(i.X.RT)); GpVar sh(c.newGpVar()); @@ -1338,13 +1341,122 @@ XEEMITTER(sradx, 0x7C000634, X )(X64Emitter& e, X86Compiler& c, InstrDat } XEEMITTER(sradix, 0x7C000674, XS )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // n <- sh[5] || sh[0-4] + // r <- ROTL[64](rS, 64 - n) + // m ← MASK(n, 63) + // S ← rS[0] + // rA <- (r & m) | (((64)S) & ¬ m) + // XER[CA] <- S & ((r & ¬ m) ¦ 0) + + // if n == 0: rA <- rS, XER[CA] = 0 + // if n >= 64: rA <- 64 sign bits of rS, XER[CA] = sign bit of rS + + GpVar v(c.newGpVar()); + c.mov(v, e.gpr_value(i.XS.RA)); + GpVar sh(c.newGpVar()); + c.mov(sh, imm((i.XS.SH5 << 5) | i.XS.SH)); + + // CA is set if any bits are shifted out of the right and if the result + // is negative. Start tracking that here. + GpVar ca(c.newGpVar()); + c.mov(ca, imm(0xFFFFFFFFFFFFFFFF)); + GpVar ca_sh(c.newGpVar()); + c.mov(ca_sh, imm(63)); + c.sub(ca_sh, sh); + c.shl(ca, ca_sh); + c.shr(ca, ca_sh); + c.and_(ca, v); + c.cmp(ca, imm(0)); + c.xor_(ca, ca); + c.setnz(ca.r8()); + + // Shift right. + c.sar(v, sh); + + // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number + // and any 1-bits are shifted out of position 63; otherwise CA is set to 0. + // We already have ca set to indicate the pos 63 bit, now just and in sign. + GpVar ca_2(c.newGpVar()); + c.mov(ca_2, v); + c.shr(ca_2, imm(63)); + c.and_(ca, ca_2); + + e.update_gpr_value(i.XS.RT, v); + e.update_xer_with_carry(ca); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v); + } + + e.clear_constant_gpr_value(i.X.RA); + + return 0; } XEEMITTER(srawx, 0x7C000630, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // n <- rB[59-63] + // r <- ROTL32((RS)[32:63], 64-n) + // m <- MASK(n+32, 63) + // s <- (RS)[32] + // RA <- r&m | (i64.s)&¬m + // CA <- s & ((r&¬m)[32:63]≠0) + + // if n == 0: rA <- sign_extend(rS), XER[CA] = 0 + // if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS) + + GpVar v(c.newGpVar()); + c.mov(v, e.gpr_value(i.X.RT)); + GpVar sh(c.newGpVar()); + c.mov(sh, e.gpr_value(i.X.RB)); + c.and_(sh, imm(0x7F)); + + GpVar ca(c.newGpVar()); + Label skip(c.newLabel()); + Label full(c.newLabel()); + c.test(sh, imm(0)); + c.jnz(full); + { + // No shift, just a fancy sign extend and CA clearer. + c.cdqe(v); + c.mov(ca, imm(0)); + } + c.jmp(skip); + c.bind(full); + { + // CA is set if any bits are shifted out of the right and if the result + // is negative. Start tracking that here. + c.mov(ca, v); + c.and_(ca, imm(~XEMASK(32 + i.X.RB, 64))); + c.cmp(ca, imm(0)); + c.xor_(ca, ca); + c.setnz(ca.r8()); + + // Shift right and sign extend the 32bit part. + c.sar(v.r32(), imm(i.X.RB)); + c.cdqe(v); + + // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number + // and any 1-bits are shifted out of position 63; otherwise CA is set to 0. + // We already have ca set to indicate the shift bits, now just and in sign. + GpVar ca_2(c.newGpVar()); + c.mov(ca_2, v.r32()); + c.shr(ca_2, imm(31)); + c.and_(ca, ca_2); + } + c.bind(skip); + + e.update_gpr_value(i.X.RA, v); + e.update_xer_with_carry(ca); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v); + } + + e.clear_constant_gpr_value(i.X.RA); + + return 0; } XEEMITTER(srawix, 0x7C000670, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { @@ -1355,6 +1467,9 @@ XEEMITTER(srawix, 0x7C000670, X )(X64Emitter& e, X86Compiler& c, InstrDat // RA <- r&m | (i64.s)&¬m // CA <- s & ((r&¬m)[32:63]≠0) + // if n == 0: rA <- sign_extend(rS), XER[CA] = 0 + // if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS) + GpVar v(c.newGpVar()); c.mov(v, e.gpr_value(i.X.RT));