Fixing permute functions.

This commit is contained in:
Ben Vanik 2013-12-14 06:24:29 -08:00
parent a1f41c656a
commit 21d1e7236b
3 changed files with 20 additions and 18 deletions

View File

@ -3071,12 +3071,10 @@ uint32_t IntCode_PERMUTE_V128_BY_INT32(IntCodeState& ics, const IntCode* i) {
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (size_t i = 0; i < 4; i++) {
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4)))
size_t m = SWAP_INLINE(i);
size_t b = (src1 >> (m * 8)) & 0x3;
dest.i4[m] = b < 4 ?
src2.i4[SWAP_INLINE(b)] :
src3.i4[SWAP_INLINE(b - 4)];
size_t b = (src1 >> ((3 - i) * 8)) & 0x7;
dest.i4[i] = b < 4 ?
src2.i4[b] :
src3.i4[b - 4];
}
return IA_NEXT;
}
@ -3086,12 +3084,10 @@ uint32_t IntCode_PERMUTE_V128_BY_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (size_t i = 0; i < 16; i++) {
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4)))
size_t m = SWAP_INLINE(i);
size_t b = src1.b16[m] & 0x1F;
dest.b16[m] = b < 16 ?
src2.b16[SWAP_INLINE(b)] :
src3.b16[SWAP_INLINE(b - 16)];
size_t b = src1.b16[i] & 0x1F;
dest.b16[i] = b < 16 ?
src2.b16[b] :
src3.b16[b - 16];
}
return IA_NEXT;
}

View File

@ -801,7 +801,7 @@ int InstrEmit_vmrghw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
// (VD.z) = (VA.y)
// (VD.w) = (VB.y)
Value* v = f.Permute(
f.LoadConstant(0x05010400),
f.LoadConstant(0x00040105),
f.LoadVR(va),
f.LoadVR(vb),
INT32_TYPE);
@ -831,7 +831,7 @@ int InstrEmit_vmrglw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
// (VD.z) = (VA.w)
// (VD.w) = (VB.w)
Value* v = f.Permute(
f.LoadConstant(0x07030602),
f.LoadConstant(0x02060307),
f.LoadVR(va),
f.LoadVR(vb),
INT32_TYPE);
@ -1164,7 +1164,11 @@ XEEMITTER(vrlw128, VX128(6, 80), VX128 )(PPCFunctionBuilder& f, Inst
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, InstrData& i) {
const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
uint32_t blend_mask = i.VX128_4.IMM;
uint32_t blend_mask_src = i.VX128_4.IMM;
uint32_t blend_mask = 0;
for (int n = 0; n < 4; n++) {
blend_mask |= ((blend_mask_src >> n) ? n : (n + 4)) << ((3 - n) * 8);
}
uint32_t rotate = i.VX128_4.z;
// This is just a fancy permute.
// X Y Z W, rotated left by 2 = Z W X Y
@ -1193,8 +1197,10 @@ XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, Inst
} else {
v = f.LoadVR(vb);
}
v = f.Permute(
f.LoadConstant(blend_mask), v, f.LoadVR(vd), FLOAT32_TYPE);
if (blend_mask != 0x00010203) {
v = f.Permute(
f.LoadConstant(blend_mask), v, f.LoadVR(vd), INT32_TYPE);
}
f.StoreVR(vd, v);
return 0;
}

View File

@ -49,7 +49,7 @@ enum ArithmeticFlags {
ARITHMETIC_SET_CARRY = (1 << 1),
};
enum Permutes {
PERMUTE_XY_ZW = 0x05040100,
PERMUTE_XY_ZW = 0x00010405,
};
enum Swizzles {
SWIZZLE_XYZW_TO_XYZW = 0xE4,