srawx, vsldoi, vupkd3d of D3DCOLOR -- all untested
This commit is contained in:
parent
3ec930d9fc
commit
eb2d596c27
|
@ -1249,85 +1249,51 @@ XEEMITTER(vslw128, VX128(6, 208), VX128 )(PPCFunctionBuilder& f, Inst
|
|||
return InstrEmit_vslw_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
// static __m128i __shift_table_out[16] = {
|
||||
// _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), // unused
|
||||
// _mm_set_epi8( 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1),
|
||||
// _mm_set_epi8( 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2),
|
||||
// _mm_set_epi8( 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 14, 13, 12, 11, 10),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 14, 13, 12, 11),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 14, 13, 12),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 14, 13),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 14),
|
||||
// _mm_set_epi8( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15),
|
||||
// };
|
||||
// static __m128i __shift_table_in[16] = {
|
||||
// _mm_set_epi8(15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15), // unused
|
||||
// _mm_set_epi8( 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 1, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 2, 1, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 3, 2, 1, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 4, 3, 2, 1, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 5, 4, 3, 2, 1, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 6, 5, 4, 3, 2, 1, 0, 15, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 7, 6, 5, 4, 3, 2, 1, 0, 15, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8( 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8(10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 15, 15, 15, 15),
|
||||
// _mm_set_epi8(11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 15, 15, 15),
|
||||
// _mm_set_epi8(12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 15, 15),
|
||||
// _mm_set_epi8(13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 15),
|
||||
// _mm_set_epi8(14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15),
|
||||
// };
|
||||
// int InstrEmit_vsldoi_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t vb, uint32_t sh) {
|
||||
// // (VD) <- ((VA) || (VB)) << (SH << 3)
|
||||
// if (!sh) {
|
||||
// // No shift?
|
||||
// f.StoreVR(vd, f.LoadVR(va));
|
||||
// e.TraceVR(vd, va, vb);
|
||||
// return 0;
|
||||
// } else if (sh == 16) {
|
||||
// f.StoreVR(vd, f.LoadVR(vb));
|
||||
// e.TraceVR(vd, va, vb);
|
||||
// return 0;
|
||||
// }
|
||||
// // TODO(benvanik): optimize for the rotation case:
|
||||
// // vsldoi128 vr63,vr63,vr63,4
|
||||
// // (ABCD ABCD) << 4b = (BCDA)
|
||||
// // TODO(benvanik): rewrite this piece of shit.
|
||||
// XmmVar v(c.newXmmVar());
|
||||
// c.movaps(v, f.LoadVR(va));
|
||||
// XmmVar v_r(c.newXmmVar());
|
||||
// c.movaps(v_r, f.LoadVR(vb));
|
||||
// // (VA << SH) OR (VB >> (16 - SH))
|
||||
// GpVar gt(c.newGpVar());
|
||||
// c.xor_(gt, gt);
|
||||
// c.pinsrb(v, gt.r8(), imm(0));
|
||||
// c.pinsrb(v_r, gt.r8(), imm(15));
|
||||
// c.mov(gt, imm((sysint_t)&__shift_table_out[sh]));
|
||||
// XmmVar shuf(c.newXmmVar());
|
||||
// c.movaps(shuf, xmmword_ptr(gt));
|
||||
// c.pshufb(v, shuf);
|
||||
// c.mov(gt, imm((sysint_t)&__shift_table_in[sh]));
|
||||
// c.movaps(shuf, xmmword_ptr(gt));
|
||||
// c.pshufb(v_r, shuf);
|
||||
// c.por(v, v_r);
|
||||
// f.StoreVR(vd, v);
|
||||
// e.TraceVR(vd, va, vb);
|
||||
// return 0;
|
||||
// }
|
||||
// XEEMITTER(vsldoi, 0x1000002C, VXA )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
// return InstrEmit_vsldoi_(f, i.VXA.VD, i.VXA.VA, i.VXA.VB, i.VXA.VC & 0xF);
|
||||
// }
|
||||
// XEEMITTER(vsldoi128, VX128_5(4, 16), VX128_5)(PPCFunctionBuilder& f, InstrData& i) {
|
||||
// return InstrEmit_vsldoi_(f, VX128_5_VD128, VX128_5_VA128, VX128_5_VB128, VX128_5_SH);
|
||||
// }
|
||||
static uint8_t __vsldoi_table[16][16] = {
|
||||
{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, // unused
|
||||
{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
|
||||
{17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2},
|
||||
{18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3},
|
||||
{19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4},
|
||||
{20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5},
|
||||
{21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6},
|
||||
{22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7},
|
||||
{23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8},
|
||||
{24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9},
|
||||
{25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10},
|
||||
{26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11},
|
||||
{27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12},
|
||||
{28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13},
|
||||
{29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14},
|
||||
{30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15},
|
||||
};
|
||||
int InstrEmit_vsldoi_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t vb, uint32_t sh) {
|
||||
// (VD) <- ((VA) || (VB)) << (SH << 3)
|
||||
if (!sh) {
|
||||
f.StoreVR(vd, f.LoadVR(va));
|
||||
return 0;
|
||||
} else if (sh == 16) {
|
||||
f.StoreVR(vd, f.LoadVR(vb));
|
||||
return 0;
|
||||
}
|
||||
// TODO(benvanik): optimize for the rotation case:
|
||||
// vsldoi128 vr63,vr63,vr63,4
|
||||
// (ABCD ABCD) << 4b = (BCDA)
|
||||
// (VA << SH) OR (VB >> (16 - SH))
|
||||
Value* control = f.LoadConstant(*((vec128_t*)(__vsldoi_table[sh])));
|
||||
Value* v = f.Permute(
|
||||
control,
|
||||
f.LoadVR(va),
|
||||
f.LoadVR(vb), INT8_TYPE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vsldoi, 0x1000002C, VXA )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
return InstrEmit_vsldoi_(f, i.VXA.VD, i.VXA.VA, i.VXA.VB, i.VXA.VC & 0xF);
|
||||
}
|
||||
XEEMITTER(vsldoi128, VX128_5(4, 16), VX128_5)(PPCFunctionBuilder& f, InstrData& i) {
|
||||
return InstrEmit_vsldoi_(f, VX128_5_VD128, VX128_5_VA128, VX128_5_VB128, VX128_5_SH);
|
||||
}
|
||||
|
||||
XEEMITTER(vslo, 0x1000040C, VX )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
|
@ -1637,45 +1603,27 @@ XEEMITTER(vupkd3d128, VX128_3(6, 2032), VX128_3)(PPCFunctionBuilder& f, Inst
|
|||
switch (type) {
|
||||
case 0: // VPACK_D3DCOLOR
|
||||
{
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
// http://hlssmod.net/he_code/public/pixelwriter.h
|
||||
// ARGB (WXYZ) -> RGBA (XYZW)
|
||||
// zzzzZZZZzzzzARGB
|
||||
//c.movaps(vt, f.LoadVR(vb));
|
||||
//// zzzzZZZZzzzzARGB
|
||||
//// 000R000G000B000A
|
||||
//c.mov(gt, imm(
|
||||
// ((1ull << 7) << 56) |
|
||||
// ((1ull << 7) << 48) |
|
||||
// ((1ull << 7) << 40) |
|
||||
// ((0ull) << 32) | // B
|
||||
// ((1ull << 7) << 24) |
|
||||
// ((1ull << 7) << 16) |
|
||||
// ((1ull << 7) << 8) |
|
||||
// ((3ull) << 0)) // A
|
||||
// ); // lo
|
||||
//c.movq(v, gt);
|
||||
//c.mov(gt, imm(
|
||||
// ((1ull << 7) << 56) |
|
||||
// ((1ull << 7) << 48) |
|
||||
// ((1ull << 7) << 40) |
|
||||
// ((2ull) << 32) | // R
|
||||
// ((1ull << 7) << 24) |
|
||||
// ((1ull << 7) << 16) |
|
||||
// ((1ull << 7) << 8) |
|
||||
// ((1ull) << 0)) // G
|
||||
// ); // hi
|
||||
//c.pinsrq(v, gt, imm(1));
|
||||
//c.pshufb(vt, v);
|
||||
//// {256*R.0, 256*G.0, 256*B.0, 256*A.0}
|
||||
//c.cvtdq2ps(v, vt);
|
||||
//// {R.0, G.0, B.0 A.0}
|
||||
//// 1/256 = 0.00390625 = 0x3B800000
|
||||
//c.mov(gt, imm(0x3B800000));
|
||||
//c.movd(vt, gt.r32());
|
||||
//c.shufps(vt, vt, imm(0));
|
||||
//c.mulps(v, vt);
|
||||
v = f.LoadVR(vb);
|
||||
// 0zzzZZZZzzzzARGB
|
||||
v = f.Insert(v, 0, f.LoadConstant((int8_t)0));
|
||||
// 000R000G000B000A
|
||||
vec128_t shuf_v = { 0 };
|
||||
shuf_v.b16[3] = 13;
|
||||
shuf_v.b16[7] = 14;
|
||||
shuf_v.b16[11] = 15;
|
||||
shuf_v.b16[15] = 12;
|
||||
Value* shuf = f.LoadConstant(shuf_v);
|
||||
v = f.Permute(shuf, v, v, INT8_TYPE);
|
||||
// {256*R.0, 256*G.0, 256*B.0, 256*A.0}
|
||||
v = f.VectorConvertI2F(v);
|
||||
// {R.0, G.0, B.0 A.0}
|
||||
// 1/256 = 0.00390625 = 0x3B800000
|
||||
v = f.Mul(
|
||||
v,
|
||||
f.Splat(f.LoadConstant((uint32_t)0x3B800000), VEC128_TYPE));
|
||||
}
|
||||
break;
|
||||
case 1: // VPACK_NORMSHORT2
|
||||
|
@ -1955,8 +1903,8 @@ void RegisterEmitCategoryAltivec() {
|
|||
XEREGISTERINSTR(vslo128, VX128(5, 912));
|
||||
XEREGISTERINSTR(vslw, 0x10000184);
|
||||
XEREGISTERINSTR(vslw128, VX128(6, 208));
|
||||
// XEREGISTERINSTR(vsldoi, 0x1000002C);
|
||||
// XEREGISTERINSTR(vsldoi128, VX128_5(4, 16));
|
||||
XEREGISTERINSTR(vsldoi, 0x1000002C);
|
||||
XEREGISTERINSTR(vsldoi128, VX128_5(4, 16));
|
||||
XEREGISTERINSTR(vspltb, 0x1000020C);
|
||||
XEREGISTERINSTR(vsplth, 0x1000024C);
|
||||
XEREGISTERINSTR(vspltw, 0x1000028C);
|
||||
|
|
|
@ -1159,65 +1159,34 @@ XEEMITTER(sradix, 0x7C000674, XS )(PPCFunctionBuilder& f, InstrData& i) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// XEEMITTER(srawx, 0x7C000630, X )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
// // n <- rB[59-63]
|
||||
// // r <- ROTL32((RS)[32:63], 64-n)
|
||||
// // m <- MASK(n+32, 63)
|
||||
// // s <- (RS)[32]
|
||||
// // RA <- r&m | (i64.s)&¬m
|
||||
// // CA <- s & ((r&¬m)[32:63]≠0)
|
||||
|
||||
// // if n == 0: rA <- sign_extend(rS), XER[CA] = 0
|
||||
// // if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS)
|
||||
|
||||
// GpVar v(c.newGpVar());
|
||||
// c.mov(v, f.LoadGPR(i.X.RT));
|
||||
// GpVar sh(c.newGpVar());
|
||||
// c.mov(sh, f.LoadGPR(i.X.RB));
|
||||
// c.and_(sh, imm(0x7F));
|
||||
|
||||
// GpVar ca(c.newGpVar());
|
||||
// Label skip(c.newLabel());
|
||||
// Label full(c.newLabel());
|
||||
// c.test(sh, sh);
|
||||
// c.jnz(full);
|
||||
// {
|
||||
// // No shift, just a fancy sign extend and CA clearer.
|
||||
// c.cdqe(v);
|
||||
// c.mov(ca, imm(0));
|
||||
// }
|
||||
// c.jmp(skip);
|
||||
// c.bind(full);
|
||||
// {
|
||||
// // CA is set if any bits are shifted out of the right and if the result
|
||||
// // is negative. Start tracking that here.
|
||||
// c.mov(ca, v);
|
||||
// c.and_(ca, imm(~XEMASK(32 + i.X.RB, 64)));
|
||||
// c.cmp(ca, imm(0));
|
||||
// c.xor_(ca, ca);
|
||||
// c.setnz(ca.r8());
|
||||
|
||||
// // Shift right and sign extend the 32bit part.
|
||||
// c.sar(v.r32(), imm(i.X.RB));
|
||||
// c.cdqe(v);
|
||||
|
||||
// // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number
|
||||
// // and any 1-bits are shifted out of position 63; otherwise CA is set to 0.
|
||||
// // We already have ca set to indicate the shift bits, now just and in sign.
|
||||
// GpVar ca_2(c.newGpVar());
|
||||
// c.mov(ca_2, v.r32());
|
||||
// c.shr(ca_2, imm(31));
|
||||
// c.and_(ca, ca_2);
|
||||
// }
|
||||
// c.bind(skip);
|
||||
|
||||
// f.StoreGPR(i.X.RA, v);
|
||||
// e.update_xer_with_carry(ca);
|
||||
// if (i.X.Rc) {
|
||||
// f.UpdateCR(0, v);
|
||||
// }
|
||||
// return 0;
|
||||
// }
|
||||
XEEMITTER(srawx, 0x7C000630, X )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
// n <- rB[59-63]
|
||||
// r <- ROTL32((RS)[32:63], 64-n)
|
||||
// m <- MASK(n+32, 63)
|
||||
// s <- (RS)[32]
|
||||
// RA <- r&m | (i64.s)&¬m
|
||||
// CA <- s & ((r&¬m)[32:63]≠0)
|
||||
// if n == 0: rA <- sign_extend(rS), XER[CA] = 0
|
||||
// if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS)
|
||||
Value* v = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
|
||||
Value* sh = f.And(
|
||||
f.Truncate(f.LoadGPR(i.X.RB), INT32_TYPE),
|
||||
f.LoadConstant((int8_t)0x7F));
|
||||
// CA is set if any bits are shifted out of the right and if the result
|
||||
// is negative.
|
||||
Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh));
|
||||
Value* ca = f.And(
|
||||
f.Shr(v, 31),
|
||||
f.IsTrue(f.And(v, mask)));
|
||||
f.StoreCA(ca);
|
||||
v = f.Sha(v, sh),
|
||||
v = f.SignExtend(v, INT64_TYPE);
|
||||
f.StoreGPR(i.X.RA, v);
|
||||
if (i.X.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(srawix, 0x7C000670, X )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
// n <- SH
|
||||
|
@ -1226,10 +1195,8 @@ XEEMITTER(srawix, 0x7C000670, X )(PPCFunctionBuilder& f, InstrData& i) {
|
|||
// s <- (RS)[32]
|
||||
// RA <- r&m | (i64.s)&¬m
|
||||
// CA <- s & ((r&¬m)[32:63]≠0)
|
||||
|
||||
// if n == 0: rA <- sign_extend(rS), XER[CA] = 0
|
||||
// if n >= 32: rA <- 64 sign bits of rS, XER[CA] = sign bit of lo_32(rS)
|
||||
|
||||
Value* v = f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE);
|
||||
Value* ca;
|
||||
if (!i.X.RB) {
|
||||
|
@ -1323,7 +1290,7 @@ void RegisterEmitCategoryALU() {
|
|||
XEREGISTERINSTR(srwx, 0x7C000430);
|
||||
// XEREGISTERINSTR(sradx, 0x7C000634);
|
||||
XEREGISTERINSTR(sradix, 0x7C000674);
|
||||
// XEREGISTERINSTR(srawx, 0x7C000630);
|
||||
XEREGISTERINSTR(srawx, 0x7C000630);
|
||||
XEREGISTERINSTR(srawix, 0x7C000670);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue