Untested PACK float16_2/_4.

This commit is contained in:
Ben Vanik 2014-05-31 11:23:10 -07:00
parent a7c0c1327a
commit e42460039f
2 changed files with 36 additions and 6 deletions

View File

@ -3927,6 +3927,7 @@ uint32_t IntCode_PACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) {
uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128;
dest.ix = dest.iy = 0;
dest.iz = dest.iz =
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) | ((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) |
DirectX::PackedVector::XMConvertFloatToHalf(src1.y); DirectX::PackedVector::XMConvertFloatToHalf(src1.y);

View File

@ -2976,7 +2976,12 @@ EMITTER(MUL_HI_I8, MATCH(I<OPCODE_MUL_HI, I8<>, I8<>, I8<>>)) {
e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32()); e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32());
} else { } else {
e.mov(e.al, i.src1); e.mov(e.al, i.src1);
if (i.src2.is_constant) {
e.mov(e.al, i.src2.constant());
e.imul(e.al);
} else {
e.imul(i.src2); e.imul(i.src2);
}
e.mov(i.dest, e.ah); e.mov(i.dest, e.ah);
} }
e.ReloadEDX(); e.ReloadEDX();
@ -2990,7 +2995,12 @@ EMITTER(MUL_HI_I16, MATCH(I<OPCODE_MUL_HI, I16<>, I16<>, I16<>>)) {
e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32()); e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32());
} else { } else {
e.mov(e.ax, i.src1); e.mov(e.ax, i.src1);
if (i.src2.is_constant) {
e.mov(e.dx, i.src2.constant());
e.imul(e.dx);
} else {
e.imul(i.src2); e.imul(i.src2);
}
e.mov(i.dest, e.dx); e.mov(i.dest, e.dx);
} }
e.ReloadEDX(); e.ReloadEDX();
@ -3009,7 +3019,12 @@ EMITTER(MUL_HI_I32, MATCH(I<OPCODE_MUL_HI, I32<>, I32<>, I32<>>)) {
} }
} else { } else {
e.mov(e.eax, i.src1); e.mov(e.eax, i.src1);
if (i.src2.is_constant) {
e.mov(e.edx, i.src2.constant());
e.imul(e.edx);
} else {
e.imul(i.src2); e.imul(i.src2);
}
e.mov(i.dest, e.edx); e.mov(i.dest, e.edx);
} }
e.ReloadEDX(); e.ReloadEDX();
@ -3028,7 +3043,12 @@ EMITTER(MUL_HI_I64, MATCH(I<OPCODE_MUL_HI, I64<>, I64<>, I64<>>)) {
} }
} else { } else {
e.mov(e.rax, i.src1); e.mov(e.rax, i.src1);
if (i.src2.is_constant) {
e.mov(e.rdx, i.src2.constant());
e.imul(e.rdx);
} else {
e.imul(i.src2); e.imul(i.src2);
}
e.mov(i.dest, e.rdx); e.mov(i.dest, e.rdx);
} }
e.ReloadEDX(); e.ReloadEDX();
@ -4781,10 +4801,19 @@ EMITTER(PACK, MATCH(I<OPCODE_PACK, V128<>, V128<>>)) {
e.vpshufb(i.dest, e.xmm0, e.GetXmmConstPtr(XMMPackD3DCOLOR)); e.vpshufb(i.dest, e.xmm0, e.GetXmmConstPtr(XMMPackD3DCOLOR));
} }
static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) { static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) {
XEASSERTALWAYS(); // http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx
// dest = [(src1.x | src1.y), 0, 0, 0]
e.db(0xCC);
e.vcvtps2ph(e.xmm0, i.src1, B00000011);
e.vxorps(i.dest, i.dest);
e.vpblendw(i.dest, e.xmm0, B00000011);
} }
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
XEASSERTALWAYS(); // dest = [(src1.x | src1.y), (src1.z | src1.w), 0, 0]
e.db(0xCC);
e.vcvtps2ph(e.xmm0, i.src1, B00000011);
e.vxorps(i.dest, i.dest);
e.vpblendw(i.dest, e.xmm0, B00001111);
} }
static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) { static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) {
XEASSERTALWAYS(); XEASSERTALWAYS();