Fixing permute functions.
This commit is contained in:
parent
a1f41c656a
commit
21d1e7236b
|
@ -3071,12 +3071,10 @@ uint32_t IntCode_PERMUTE_V128_BY_INT32(IntCodeState& ics, const IntCode* i) {
|
||||||
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
|
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
|
||||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||||
for (size_t i = 0; i < 4; i++) {
|
for (size_t i = 0; i < 4; i++) {
|
||||||
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4)))
|
size_t b = (src1 >> ((3 - i) * 8)) & 0x7;
|
||||||
size_t m = SWAP_INLINE(i);
|
dest.i4[i] = b < 4 ?
|
||||||
size_t b = (src1 >> (m * 8)) & 0x3;
|
src2.i4[b] :
|
||||||
dest.i4[m] = b < 4 ?
|
src3.i4[b - 4];
|
||||||
src2.i4[SWAP_INLINE(b)] :
|
|
||||||
src3.i4[SWAP_INLINE(b - 4)];
|
|
||||||
}
|
}
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
|
@ -3086,12 +3084,10 @@ uint32_t IntCode_PERMUTE_V128_BY_V128(IntCodeState& ics, const IntCode* i) {
|
||||||
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
|
const vec128_t& src3 = ics.rf[i->src3_reg].v128;
|
||||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
#define SWAP_INLINE(x) (((x) & ~0x3) + (3 - ((x) % 4)))
|
size_t b = src1.b16[i] & 0x1F;
|
||||||
size_t m = SWAP_INLINE(i);
|
dest.b16[i] = b < 16 ?
|
||||||
size_t b = src1.b16[m] & 0x1F;
|
src2.b16[b] :
|
||||||
dest.b16[m] = b < 16 ?
|
src3.b16[b - 16];
|
||||||
src2.b16[SWAP_INLINE(b)] :
|
|
||||||
src3.b16[SWAP_INLINE(b - 16)];
|
|
||||||
}
|
}
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
|
|
|
@ -801,7 +801,7 @@ int InstrEmit_vmrghw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
|
||||||
// (VD.z) = (VA.y)
|
// (VD.z) = (VA.y)
|
||||||
// (VD.w) = (VB.y)
|
// (VD.w) = (VB.y)
|
||||||
Value* v = f.Permute(
|
Value* v = f.Permute(
|
||||||
f.LoadConstant(0x05010400),
|
f.LoadConstant(0x00040105),
|
||||||
f.LoadVR(va),
|
f.LoadVR(va),
|
||||||
f.LoadVR(vb),
|
f.LoadVR(vb),
|
||||||
INT32_TYPE);
|
INT32_TYPE);
|
||||||
|
@ -831,7 +831,7 @@ int InstrEmit_vmrglw_(PPCFunctionBuilder& f, uint32_t vd, uint32_t va, uint32_t
|
||||||
// (VD.z) = (VA.w)
|
// (VD.z) = (VA.w)
|
||||||
// (VD.w) = (VB.w)
|
// (VD.w) = (VB.w)
|
||||||
Value* v = f.Permute(
|
Value* v = f.Permute(
|
||||||
f.LoadConstant(0x07030602),
|
f.LoadConstant(0x02060307),
|
||||||
f.LoadVR(va),
|
f.LoadVR(va),
|
||||||
f.LoadVR(vb),
|
f.LoadVR(vb),
|
||||||
INT32_TYPE);
|
INT32_TYPE);
|
||||||
|
@ -1164,7 +1164,11 @@ XEEMITTER(vrlw128, VX128(6, 80), VX128 )(PPCFunctionBuilder& f, Inst
|
||||||
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, InstrData& i) {
|
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, InstrData& i) {
|
||||||
const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
|
const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
|
||||||
const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
|
const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
|
||||||
uint32_t blend_mask = i.VX128_4.IMM;
|
uint32_t blend_mask_src = i.VX128_4.IMM;
|
||||||
|
uint32_t blend_mask = 0;
|
||||||
|
for (int n = 0; n < 4; n++) {
|
||||||
|
blend_mask |= ((blend_mask_src >> n) ? n : (n + 4)) << ((3 - n) * 8);
|
||||||
|
}
|
||||||
uint32_t rotate = i.VX128_4.z;
|
uint32_t rotate = i.VX128_4.z;
|
||||||
// This is just a fancy permute.
|
// This is just a fancy permute.
|
||||||
// X Y Z W, rotated left by 2 = Z W X Y
|
// X Y Z W, rotated left by 2 = Z W X Y
|
||||||
|
@ -1193,8 +1197,10 @@ XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCFunctionBuilder& f, Inst
|
||||||
} else {
|
} else {
|
||||||
v = f.LoadVR(vb);
|
v = f.LoadVR(vb);
|
||||||
}
|
}
|
||||||
v = f.Permute(
|
if (blend_mask != 0x00010203) {
|
||||||
f.LoadConstant(blend_mask), v, f.LoadVR(vd), FLOAT32_TYPE);
|
v = f.Permute(
|
||||||
|
f.LoadConstant(blend_mask), v, f.LoadVR(vd), INT32_TYPE);
|
||||||
|
}
|
||||||
f.StoreVR(vd, v);
|
f.StoreVR(vd, v);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ enum ArithmeticFlags {
|
||||||
ARITHMETIC_SET_CARRY = (1 << 1),
|
ARITHMETIC_SET_CARRY = (1 << 1),
|
||||||
};
|
};
|
||||||
enum Permutes {
|
enum Permutes {
|
||||||
PERMUTE_XY_ZW = 0x05040100,
|
PERMUTE_XY_ZW = 0x00010405,
|
||||||
};
|
};
|
||||||
enum Swizzles {
|
enum Swizzles {
|
||||||
SWIZZLE_XYZW_TO_XYZW = 0xE4,
|
SWIZZLE_XYZW_TO_XYZW = 0xE4,
|
||||||
|
|
Loading…
Reference in New Issue