PACK/UNPACK.

This commit is contained in:
Ben Vanik 2014-01-10 00:28:52 -08:00
parent 3fbebcfa08
commit 8085678f5a
6 changed files with 321 additions and 183 deletions

View File

@ -21,6 +21,10 @@ using namespace alloy::hir;
using namespace alloy::runtime; using namespace alloy::runtime;
// TODO(benvanik): reimplement packing functions
#include <DirectXPackedVector.h>
// TODO(benvanik): make a compile time flag? // TODO(benvanik): make a compile time flag?
//#define DYNAMIC_REGISTER_ACCESS_CHECK(address) false //#define DYNAMIC_REGISTER_ACCESS_CHECK(address) false
#define DYNAMIC_REGISTER_ACCESS_CHECK(address) ((address & 0xFF000000) == 0x7F000000) #define DYNAMIC_REGISTER_ACCESS_CHECK(address) ((address & 0xFF000000) == 0x7F000000)
@ -3726,6 +3730,128 @@ int Translate_SWIZZLE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->src1.value->type]); return DispatchToC(ctx, i, fns[i->src1.value->type]);
} }
uint32_t IntCode_PACK_D3DCOLOR(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
// RGBA (XYZW) -> ARGB (WXYZ)
dest.ix = dest.iy = dest.iz = 0;
float r = roundf(((src1.x < 0) ? 0 : ((1 < src1.x) ? 1 : src1.x)) * 255);
float g = roundf(((src1.y < 0) ? 0 : ((1 < src1.y) ? 1 : src1.y)) * 255);
float b = roundf(((src1.z < 0) ? 0 : ((1 < src1.z) ? 1 : src1.z)) * 255);
float a = roundf(((src1.w < 0) ? 0 : ((1 < src1.w) ? 1 : src1.w)) * 255);
dest.iw = ((uint32_t)a << 24) |
((uint32_t)r << 16) |
((uint32_t)g << 8) |
((uint32_t)b);
return IA_NEXT;
}
uint32_t IntCode_PACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
dest.ix = dest.iy = dest.iz = 0;
dest.iw =
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) |
DirectX::PackedVector::XMConvertFloatToHalf(src1.y);
return IA_NEXT;
}
uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
dest.iz =
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) |
DirectX::PackedVector::XMConvertFloatToHalf(src1.y);
dest.iw =
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.z) << 16) |
DirectX::PackedVector::XMConvertFloatToHalf(src1.w);
return IA_NEXT;
}
uint32_t IntCode_PACK_SHORT_2(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
// sx = 3 + (x / 1<<22)
// x = (sx - 3) * 1<<22
float sx = src1.x;
float sy = src1.y;
union {
int16_t dx;
int16_t dy;
};
dx = (int16_t)((sx - 3.0f) * (float)(1 << 22));
dy = (int16_t)((sy - 3.0f) * (float)(1 << 22));
dest.ix = dest.iy = dest.iz = 0;
dest.iw = ((uint32_t)dx << 16) | dy;
return IA_NEXT;
}
int Translate_PACK(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_PACK_D3DCOLOR,
IntCode_PACK_FLOAT16_2,
IntCode_PACK_FLOAT16_4,
IntCode_PACK_SHORT_2,
};
return DispatchToC(ctx, i, fns[i->flags]);
}
uint32_t IntCode_UNPACK_D3DCOLOR(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
// ARGB (WXYZ) -> RGBA (XYZW)
// XMLoadColor
int32_t src = (int32_t)src1.iw;
dest.f4[0] = (float)((src >> 16) & 0xFF) * (1.0f / 255.0f);
dest.f4[1] = (float)((src >> 8) & 0xFF) * (1.0f / 255.0f);
dest.f4[2] = (float)(src & 0xFF) * (1.0f / 255.0f);
dest.f4[3] = (float)((src >> 24) & 0xFF) * (1.0f / 255.0f);
return IA_NEXT;
}
uint32_t IntCode_UNPACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
uint32_t src = src1.iw;
for (int n = 0; n < 2; n++) {
dest.f4[n] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src);
src >>= 16;
}
dest.f4[2] = 0.0f;
dest.f4[3] = 1.0f;
return IA_NEXT;
}
uint32_t IntCode_UNPACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
uint64_t src = src1.iz | ((uint64_t)src1.iw << 32);
for (int n = 0; n < 4; n++) {
dest.f4[n] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src);
src >>= 16;
}
return IA_NEXT;
}
uint32_t IntCode_UNPACK_SHORT_2(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
// XMLoadShortN2
union {
int16_t sx;
int16_t sy;
};
sx = (int16_t)(src1.iw >> 16);
sy = (int16_t)src1.iw;
dest.f4[0] = 3.0f + ((float)sx / (float)(1 << 22));
dest.f4[1] = 3.0f + ((float)sy / (float)(1 << 22));
dest.f4[2] = 0.0f;
dest.f4[3] = 1.0f; // 3?
return IA_NEXT;
}
int Translate_UNPACK(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_UNPACK_D3DCOLOR,
IntCode_UNPACK_FLOAT16_2,
IntCode_UNPACK_FLOAT16_4,
IntCode_UNPACK_SHORT_2,
};
return DispatchToC(ctx, i, fns[i->flags]);
}
uint32_t IntCode_ATOMIC_EXCHANGE_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_ATOMIC_EXCHANGE_I32(IntCodeState& ics, const IntCode* i) {
auto address = (uint8_t*)ics.rf[i->src1_reg].u64; auto address = (uint8_t*)ics.rf[i->src1_reg].u64;
auto new_value = ics.rf[i->src2_reg].u32; auto new_value = ics.rf[i->src2_reg].u32;
@ -3860,6 +3986,8 @@ static const TranslateFn dispatch_table[] = {
Translate_SPLAT, Translate_SPLAT,
Translate_PERMUTE, Translate_PERMUTE,
Translate_SWIZZLE, Translate_SWIZZLE,
Translate_PACK,
Translate_UNPACK,
TranslateInvalid, //Translate_COMPARE_EXCHANGE, TranslateInvalid, //Translate_COMPARE_EXCHANGE,
Translate_ATOMIC_EXCHANGE, Translate_ATOMIC_EXCHANGE,

View File

@ -1128,88 +1128,6 @@ XEEMITTER(vpermwi128, VX128_P(6, 528), VX128_P)(PPCHIRBuilder& f, InstrData
return 0; return 0;
} }
XEEMITTER(vpkpx, 0x1000030E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshss, 0x1000018E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshss128, VX128(5, 512), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswss, 0x100001CE, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswss128, VX128(5, 640), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswus, 0x1000014E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswus128, VX128(5, 704), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhum, 0x1000000E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhum128, VX128(5, 768), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhus, 0x1000008E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhus128, VX128(5, 832), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshus, 0x1000010E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshus128, VX128(5, 576), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwum, 0x1000004E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwum128, VX128(5, 896), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwus, 0x100000CE, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwus128, VX128(5, 960), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
int InstrEmit_vrefp_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { int InstrEmit_vrefp_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) {
// (VD) <- 1/(VB) // (VD) <- 1/(VB)
vec128_t one = { 1, 1, 1, 1 }; vec128_t one = { 1, 1, 1, 1 };
@ -1696,6 +1614,84 @@ XEEMITTER(vsum4ubs, 0x10000608, VX )(PPCHIRBuilder& f, InstrData& i) {
return 1; return 1;
} }
XEEMITTER(vpkpx, 0x1000030E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshss, 0x1000018E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshss128, VX128(5, 512), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswss, 0x100001CE, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswss128, VX128(5, 640), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswus, 0x1000014E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkswus128, VX128(5, 704), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhum, 0x1000000E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhum128, VX128(5, 768), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhus, 0x1000008E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuhus128, VX128(5, 832), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshus, 0x1000010E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkshus128, VX128(5, 576), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwum, 0x1000004E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwum128, VX128(5, 896), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwus, 0x100000CE, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vpkuwus128, VX128(5, 960), VX128 )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
}
XEEMITTER(vupkhpx, 0x1000034E, VX )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vupkhpx, 0x1000034E, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); XEINSTRNOTIMPLEMENTED();
return 1; return 1;
@ -1734,31 +1730,69 @@ XEEMITTER(vupklsh, 0x100002CE, VX )(PPCHIRBuilder& f, InstrData& i) {
return 1; return 1;
} }
// __m128 half_to_float5_SSE2(__m128i h) { XEEMITTER(vpkd3d128, VX128_4(6, 1552), VX128_4)(PPCHIRBuilder& f, InstrData& i) {
// #define SSE_CONST4(name, val) static const __declspec(align(16)) uint name[4] = { (val), (val), (val), (val) } const uint32_t vd = i.VX128_4.VD128l | (i.VX128_4.VD128h << 5);
// #define SSE_CONST(name) *(const __m128i *)&name const uint32_t vb = i.VX128_4.VB128l | (i.VX128_4.VB128h << 5);
// #define SSE_CONSTF(name) *(const __m128 *)&name uint32_t type = i.VX128_4.IMM >> 2;
// SSE_CONST4(mask_nosign, 0x7fff); uint32_t shift = i.VX128_4.IMM & 0x3;
// SSE_CONST4(magic, (254 - 15) << 23); uint32_t pack = i.VX128_4.z;
// SSE_CONST4(was_infnan, 0x7bff); Value* v = f.LoadVR(vb);
// SSE_CONST4(exp_infnan, 255 << 23); switch (type) {
// __m128i mnosign = SSE_CONST(mask_nosign); case 0: // VPACK_D3DCOLOR
// __m128i expmant = _mm_and_si128(mnosign, h); v = f.Pack(v, PACK_TYPE_D3DCOLOR);
// __m128i justsign = _mm_xor_si128(h, expmant); break;
// __m128i expmant2 = expmant; // copy (just here for counting purposes) case 1: // VPACK_NORMSHORT2
// __m128i shifted = _mm_slli_epi32(expmant, 13); v = f.Pack(v, PACK_TYPE_SHORT_2);
// __m128 scaled = _mm_mul_ps(_mm_castsi128_ps(shifted), *(const __m128 *)&magic); break;
// __m128i b_wasinfnan = _mm_cmpgt_epi32(expmant2, SSE_CONST(was_infnan)); case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
// __m128i sign = _mm_slli_epi32(justsign, 16); v = f.Pack(v, PACK_TYPE_FLOAT16_2);
// __m128 infnanexp = _mm_and_ps(_mm_castsi128_ps(b_wasinfnan), SSE_CONSTF(exp_infnan)); break;
// __m128 sign_inf = _mm_or_ps(_mm_castsi128_ps(sign), infnanexp); case 5: // VPACK_... 4 FLOAT16s DXGI_FORMAT_R16G16B16A16_FLOAT
// __m128 final = _mm_or_ps(scaled, sign_inf); v = f.Pack(v, PACK_TYPE_FLOAT16_4);
// // ~11 SSE2 ops. break;
// return final; default:
// #undef SSE_CONST4 XEASSERTALWAYS();
// #undef CONST return 1;
// #undef CONSTF }
// } // http://hlssmod.net/he_code/public/pixelwriter.h
// control = prev:0123 | new:4567
uint32_t control = 0x00010203; // original
uint32_t src = _rotl(0x04050607, shift * 8);
uint32_t mask = 0;
switch (pack) {
case 1: // VPACK_32
// VPACK_32 & shift = 3 puts lower 32 bits in x (leftmost slot).
mask = 0x000000FF << (shift * 8);
control = (control & ~mask) | (src & mask);
break;
case 2: // 64bit
if (shift < 3) {
mask = 0x0000FFFF << (shift * 8);
} else {
// w
src = 0x00000007;
mask = 0x000000FF;
}
control = (control & ~mask) | (src & mask);
break;
case 3: // 64bit
if (shift < 3) {
mask = 0x0000FFFF << (shift * 8);
} else {
// z
src = 0x00000006;
mask = 0x000000FF;
}
control = (control & ~mask) | (src & mask);
break;
default:
XEASSERTALWAYS();
return 1;
}
v = f.Permute(f.LoadConstant(control), f.LoadVR(vd), v, INT32_TYPE);
f.StoreVR(vd, v);
return 0;
}
XEEMITTER(vupkd3d128, VX128_3(6, 2032), VX128_3)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vupkd3d128, VX128_3(6, 2032), VX128_3)(PPCHIRBuilder& f, InstrData& i) {
// Can't find many docs on this. Best reference is // Can't find many docs on this. Best reference is
@ -1768,86 +1802,19 @@ XEEMITTER(vupkd3d128, VX128_3(6, 2032), VX128_3)(PPCHIRBuilder& f, InstrData
const uint32_t vd = i.VX128_3.VD128l | (i.VX128_3.VD128h << 5); const uint32_t vd = i.VX128_3.VD128l | (i.VX128_3.VD128h << 5);
const uint32_t vb = i.VX128_3.VB128l | (i.VX128_3.VB128h << 5); const uint32_t vb = i.VX128_3.VB128l | (i.VX128_3.VB128h << 5);
const uint32_t type = i.VX128_3.IMM >> 2; const uint32_t type = i.VX128_3.IMM >> 2;
Value* v; Value* v = f.LoadVR(vb);
switch (type) { switch (type) {
case 0: // VPACK_D3DCOLOR case 0: // VPACK_D3DCOLOR
{ v = f.Unpack(v, PACK_TYPE_D3DCOLOR);
// http://hlssmod.net/he_code/public/pixelwriter.h
// ARGB (WXYZ) -> RGBA (XYZW)
// zzzzZZZZzzzzARGB
v = f.LoadVR(vb);
// 0zzzZZZZzzzzARGB
v = f.Insert(v, 0ull, f.LoadConstant((int8_t)0));
// 000R000G000B000A
vec128_t shuf_v = { 0 };
shuf_v.b16[3] = 13;
shuf_v.b16[7] = 14;
shuf_v.b16[11] = 15;
shuf_v.b16[15] = 12;
Value* shuf = f.LoadConstant(shuf_v);
v = f.Permute(shuf, v, v, INT8_TYPE);
// {256*R.0, 256*G.0, 256*B.0, 256*A.0}
v = f.VectorConvertI2F(v);
// {R.0, G.0, B.0 A.0}
// 1/256 = 0.00390625 = 0x3B800000
v = f.Mul(
v,
f.Splat(f.LoadConstant((uint32_t)0x3B800000), VEC128_TYPE));
}
break; break;
case 1: // VPACK_NORMSHORT2 case 1: // VPACK_NORMSHORT2
{ v = f.Unpack(v, PACK_TYPE_SHORT_2);
// (VD.x) = 3.0 + (VB.x)*2^-22
// (VD.y) = 3.0 + (VB.y)*2^-22
// (VD.z) = 0.0
// (VD.w) = 1.0
// v = VB.x|VB.y|0|0
v = f.Permute(
f.LoadConstant(PERMUTE_XY_ZW),
f.LoadVR(vb),
f.LoadZero(VEC128_TYPE),
INT32_TYPE);
// *= 2^-22 + {3.0, 3.0, 0, 1.0}
vec128_t v3301 = { 3.0, 3.0, 0, 1.0 };
v = f.MulAdd(
v,
f.Splat(f.LoadConstant(0x34800000), VEC128_TYPE),
f.LoadConstant(v3301));
}
break; break;
case 3: // VPACK_... 2 FLOAT16s case 3: // VPACK_... 2 FLOAT16s DXGI_FORMAT_R16G16_FLOAT
{ v = f.Unpack(v, PACK_TYPE_FLOAT16_2);
// (VD.x) = fixed_16_to_32(VB.x (low)) break;
// (VD.y) = fixed_16_to_32(VB.x (high)) case 5: // VPACK_... 4 FLOAT16s DXGI_FORMAT_R16G16B16A16_FLOAT
// (VD.z) = 0.0 v = f.Unpack(v, PACK_TYPE_FLOAT16_4);
// (VD.w) = 1.0
v = f.LoadZero(VEC128_TYPE);
f.DebugBreak();
// 1 bit sign, 5 bit exponent, 10 bit mantissa
// D3D10 half float format
// TODO(benvanik): fixed_16_to_32 in SSE?
// TODO(benvanik): http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx
// Use _mm_cvtph_ps -- requires very modern processors (SSE5+)
// Unpacking half floats: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
// Packing half floats: https://gist.github.com/rygorous/2156668
// Load source, move from tight pack of X16Y16.... to X16...Y16...
// Also zero out the high end.
//c.int3();
//c.movaps(vt, f.LoadVR(vb));
//c.save(vt);
//c.lea(gt, vt.m128());
//X86CompilerFuncCall* call = c.call(half_to_float5_SSE2);
//uint32_t args[] = {kX86VarTypeGpq};
//call->setPrototype(kX86FuncConvDefault, kX86VarTypeXmm, args, XECOUNT(args));
//call->setArgument(0, gt);
//call->setReturn(v);
//// Select XY00.
//c.xorps(vt, vt);
//c.shufps(v, vt, imm(0x04));
//// {0.0, 0.0, 0.0, 1.0}
//c.mov(gt, imm(0x3F800000));
//c.pinsrd(v, gt.r32(), imm(3));
}
break; break;
default: default:
XEASSERTALWAYS(); XEASSERTALWAYS();

View File

@ -1677,6 +1677,28 @@ Value* HIRBuilder::Swizzle(
return i->dest; return i->dest;
} }
Value* HIRBuilder::Pack(Value* value, uint32_t pack_flags) {
ASSERT_VECTOR_TYPE(value);
Instr* i = AppendInstr(
OPCODE_PACK_info, pack_flags,
AllocValue(VEC128_TYPE));
i->set_src1(value);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::Unpack(Value* value, uint32_t pack_flags) {
ASSERT_VECTOR_TYPE(value);
// TODO(benvanik): check if this is a constant - sometimes this is just used
// to initialize registers.
Instr* i = AppendInstr(
OPCODE_UNPACK_info, pack_flags,
AllocValue(VEC128_TYPE));
i->set_src1(value);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::CompareExchange( Value* HIRBuilder::CompareExchange(
Value* address, Value* compare_value, Value* exchange_value) { Value* address, Value* compare_value, Value* exchange_value) {
ASSERT_ADDRESS_TYPE(address); ASSERT_ADDRESS_TYPE(address);

View File

@ -194,7 +194,8 @@ public:
TypeName part_type); TypeName part_type);
Value* Swizzle(Value* value, TypeName part_type, uint32_t swizzle_mask); Value* Swizzle(Value* value, TypeName part_type, uint32_t swizzle_mask);
// SelectBits(cond, value1, value2) // SelectBits(cond, value1, value2)
// pack/unpack/etc Value* Pack(Value* value, uint32_t pack_flags = 0);
Value* Unpack(Value* value, uint32_t pack_flags = 0);
Value* CompareExchange(Value* address, Value* CompareExchange(Value* address,
Value* compare_value, Value* exchange_value); Value* compare_value, Value* exchange_value);

View File

@ -66,6 +66,12 @@ enum Swizzles {
SWIZZLE_XYZW_TO_ZWXY = SWIZZLE_MASK(2, 3, 0, 1), SWIZZLE_XYZW_TO_ZWXY = SWIZZLE_MASK(2, 3, 0, 1),
SWIZZLE_XYZW_TO_WXYZ = SWIZZLE_MASK(3, 0, 1, 2), SWIZZLE_XYZW_TO_WXYZ = SWIZZLE_MASK(3, 0, 1, 2),
}; };
enum PackType {
PACK_TYPE_D3DCOLOR = 0,
PACK_TYPE_FLOAT16_2 = 1,
PACK_TYPE_FLOAT16_4 = 2,
PACK_TYPE_SHORT_2 = 3,
};
enum Opcode { enum Opcode {
@ -173,6 +179,8 @@ enum Opcode {
OPCODE_SPLAT, OPCODE_SPLAT,
OPCODE_PERMUTE, OPCODE_PERMUTE,
OPCODE_SWIZZLE, OPCODE_SWIZZLE,
OPCODE_PACK,
OPCODE_UNPACK,
OPCODE_COMPARE_EXCHANGE, OPCODE_COMPARE_EXCHANGE,
OPCODE_ATOMIC_EXCHANGE, OPCODE_ATOMIC_EXCHANGE,

View File

@ -539,6 +539,18 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_O, OPCODE_SIG_V_V_O,
0); 0);
DEFINE_OPCODE(
OPCODE_PACK,
"pack",
OPCODE_SIG_V_V,
0);
DEFINE_OPCODE(
OPCODE_UNPACK,
"unpack",
OPCODE_SIG_V_V,
0);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_COMPARE_EXCHANGE, OPCODE_COMPARE_EXCHANGE,
"compare_exchange", "compare_exchange",