Fixing permute functions.
This commit is contained in:
parent
a1f41c656a
commit
21d1e7236b
|
@ -3071,12 +3071,10 @@ uint32_t IntCode_PERMUTE_V128_BY_INT32(IntCodeState& ics, const IntCode* i) {
|
|||
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
|
||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4)))
|
||||
size_t m = SWAP_INLINE(i);
|
||||
size_t b = (src1 >> (m * 8)) & 0x3;
|
||||
dest.i4[m] = b < 4 ?
|
||||
src2.i4[SWAP_INLINE(b)] :
|
||||
src3.i4[SWAP_INLINE(b - 4)];
|
||||
size_t b = (src1 >> ((3 - i) * 8)) & 0x7;
|
||||
dest.i4[i] = b < 4 ?
|
||||
src2.i4[b] :
|
||||
src3.i4[b - 4];
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
|
@ -3086,12 +3084,10 @@ uint32_t IntCode_PERMUTE_V128_BY_V128(IntCodeState& ics, const IntCode* i) {
|
|||
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
|
||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4)))
|
||||
size_t m = SWAP_INLINE(i);
|
||||
size_t b = src1.b16[m] & 0x1F;
|
||||
dest.b16[m] = b < 16 ?
|
||||
src2.b16[SWAP_INLINE(b)] :
|
||||
src3.b16[SWAP_INLINE(b - 16)];
|
||||
size_t b = src1.b16[i] & 0x1F;
|
||||
dest.b16[i] = b < 16 ?
|
||||
src2.b16[b] :
|
||||
src3.b16[b - 16];
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
|
|
|
@ -801,7 +801,7 @@ int InstrEmit_vmrghw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
|
|||
// (VD.z) = (VA.y)
|
||||
// (VD.w) = (VB.y)
|
||||
Value* v = f.Permute(
|
||||
f.LoadConstant(0x05010400),
|
||||
f.LoadConstant(0x00040105),
|
||||
f.LoadVR(va),
|
||||
f.LoadVR(vb),
|
||||
INT32_TYPE);
|
||||
|
@ -831,7 +831,7 @@ int InstrEmit_vmrglw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
|
|||
// (VD.z) = (VA.w)
|
||||
// (VD.w) = (VB.w)
|
||||
Value* v = f.Permute(
|
||||
f.LoadConstant(0x07030602),
|
||||
f.LoadConstant(0x02060307),
|
||||
f.LoadVR(va),
|
||||
f.LoadVR(vb),
|
||||
INT32_TYPE);
|
||||
|
@ -1164,7 +1164,11 @@ XEEMITTER(vrlw128, VX128(6, 80), VX128 )(PPCFunctionBuilder& f, Inst
|
|||
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, InstrData& i) {
|
||||
const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
|
||||
const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
|
||||
uint32_t blend_mask = i.VX128_4.IMM;
|
||||
uint32_t blend_mask_src = i.VX128_4.IMM;
|
||||
uint32_t blend_mask = 0;
|
||||
for (int n = 0; n < 4; n++) {
|
||||
blend_mask |= ((blend_mask_src >> n) ? n : (n + 4)) << ((3 - n) * 8);
|
||||
}
|
||||
uint32_t rotate = i.VX128_4.z;
|
||||
// This is just a fancy permute.
|
||||
// X Y Z W, rotated left by 2 = Z W X Y
|
||||
|
@ -1193,8 +1197,10 @@ XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, Inst
|
|||
} else {
|
||||
v = f.LoadVR(vb);
|
||||
}
|
||||
v = f.Permute(
|
||||
f.LoadConstant(blend_mask), v, f.LoadVR(vd), FLOAT32_TYPE);
|
||||
if (blend_mask != 0x00010203) {
|
||||
v = f.Permute(
|
||||
f.LoadConstant(blend_mask), v, f.LoadVR(vd), INT32_TYPE);
|
||||
}
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ enum ArithmeticFlags {
|
|||
ARITHMETIC_SET_CARRY = (1 << 1),
|
||||
};
|
||||
enum Permutes {
|
||||
PERMUTE_XY_ZW = 0x05040100,
|
||||
PERMUTE_XY_ZW = 0x00010405,
|
||||
};
|
||||
enum Swizzles {
|
||||
SWIZZLE_XYZW_TO_XYZW = 0xE4,
|
||||
|
|
Loading…
Reference in New Issue