Fixing permute functions.

This commit is contained in:
Ben Vanik 2013-12-14 06:24:29 -08:00
parent a1f41c656a
commit 21d1e7236b
3 changed files with 20 additions and 18 deletions

View File

@ -3071,12 +3071,10 @@ uint32_t IntCode_PERMUTE_V128_BY_INT32(IntCodeState& ics, const IntCode* i) {
const vec128_t& src3 = ics.rf[i->src3_reg].v128; const vec128_t& src3 = ics.rf[i->src3_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128;
for (size_t i = 0; i < 4; i++) { for (size_t i = 0; i < 4; i++) {
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4))) size_t b = (src1 >> ((3 - i) * 8)) & 0x7;
size_t m = SWAP_INLINE(i); dest.i4[i] = b < 4 ?
size_t b = (src1 >> (m * 8)) & 0x3; src2.i4[b] :
dest.i4[m] = b < 4 ? src3.i4[b - 4];
src2.i4[SWAP_INLINE(b)] :
src3.i4[SWAP_INLINE(b - 4)];
} }
return IA_NEXT; return IA_NEXT;
} }
@ -3086,12 +3084,10 @@ uint32_t IntCode_PERMUTE_V128_BY_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src3 = ics.rf[i->src3_reg].v128; const vec128_t& src3 = ics.rf[i->src3_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128;
for (size_t i = 0; i < 16; i++) { for (size_t i = 0; i < 16; i++) {
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4))) size_t b = src1.b16[i] & 0x1F;
size_t m = SWAP_INLINE(i); dest.b16[i] = b < 16 ?
size_t b = src1.b16[m] & 0x1F; src2.b16[b] :
dest.b16[m] = b < 16 ? src3.b16[b - 16];
src2.b16[SWAP_INLINE(b)] :
src3.b16[SWAP_INLINE(b - 16)];
} }
return IA_NEXT; return IA_NEXT;
} }

View File

@ -801,7 +801,7 @@ int InstrEmit_vmrghw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
// (VD.z) = (VA.y) // (VD.z) = (VA.y)
// (VD.w) = (VB.y) // (VD.w) = (VB.y)
Value* v = f.Permute( Value* v = f.Permute(
f.LoadConstant(0x05010400), f.LoadConstant(0x00040105),
f.LoadVR(va), f.LoadVR(va),
f.LoadVR(vb), f.LoadVR(vb),
INT32_TYPE); INT32_TYPE);
@ -831,7 +831,7 @@ int InstrEmit_vmrglw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
// (VD.z) = (VA.w) // (VD.z) = (VA.w)
// (VD.w) = (VB.w) // (VD.w) = (VB.w)
Value* v = f.Permute( Value* v = f.Permute(
f.LoadConstant(0x07030602), f.LoadConstant(0x02060307),
f.LoadVR(va), f.LoadVR(va),
f.LoadVR(vb), f.LoadVR(vb),
INT32_TYPE); INT32_TYPE);
@ -1164,7 +1164,11 @@ XEEMITTER(vrlw128, VX128(6, 80), VX128 )(PPCFunctionBuilder& f, Inst
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, InstrData& i) { XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, InstrData& i) {
const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5); const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5); const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
uint32_t blend_mask = i.VX128_4.IMM; uint32_t blend_mask_src = i.VX128_4.IMM;
uint32_t blend_mask = 0;
for (int n = 0; n < 4; n++) {
blend_mask |= ((blend_mask_src >> n) ? n : (n + 4)) << ((3 - n) * 8);
}
uint32_t rotate = i.VX128_4.z; uint32_t rotate = i.VX128_4.z;
// This is just a fancy permute. // This is just a fancy permute.
// X Y Z W, rotated left by 2 = Z W X Y // X Y Z W, rotated left by 2 = Z W X Y
@ -1193,8 +1197,10 @@ XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, Inst
} else { } else {
v = f.LoadVR(vb); v = f.LoadVR(vb);
} }
v = f.Permute( if (blend_mask != 0x00010203) {
f.LoadConstant(blend_mask), v, f.LoadVR(vd), FLOAT32_TYPE); v = f.Permute(
f.LoadConstant(blend_mask), v, f.LoadVR(vd), INT32_TYPE);
}
f.StoreVR(vd, v); f.StoreVR(vd, v);
return 0; return 0;
} }

View File

@ -49,7 +49,7 @@ enum ArithmeticFlags {
ARITHMETIC_SET_CARRY = (1 << 1), ARITHMETIC_SET_CARRY = (1 << 1),
}; };
enum Permutes { enum Permutes {
PERMUTE_XY_ZW = 0x05040100, PERMUTE_XY_ZW = 0x00010405,
}; };
enum Swizzles { enum Swizzles {
SWIZZLE_XYZW_TO_XYZW = 0xE4, SWIZZLE_XYZW_TO_XYZW = 0xE4,