Implementing the vavg instructions (mostly).

Fixes #155.
This commit is contained in:
Ben Vanik 2015-02-11 12:46:37 -08:00
parent 585e0b0e46
commit d19519e63c
6 changed files with 129 additions and 12 deletions

View File

@ -4748,6 +4748,83 @@ EMITTER_OPCODE_TABLE(
VECTOR_ROTATE_LEFT_V128); VECTOR_ROTATE_LEFT_V128);
// ============================================================================
// OPCODE_VECTOR_AVERAGE
// ============================================================================
EMITTER(VECTOR_AVERAGE, MATCH(I<OPCODE_VECTOR_AVERAGE, V128<>, V128<>, V128<>>)) {
static __m128i EmulateVectorAverageUnsignedI32(void*, __m128i src1, __m128i src2) {
alignas(16) uint32_t src1v[4];
alignas(16) uint32_t src2v[4];
alignas(16) uint32_t value[4];
_mm_store_si128(reinterpret_cast<__m128i*>(src1v), src1);
_mm_store_si128(reinterpret_cast<__m128i*>(src2v), src2);
for (size_t i = 0; i < 4; ++i) {
auto t = (uint64_t(src1v[i]) + uint64_t(src2v[i]) + 1) >> 1;
value[i] = uint32_t(t);
}
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
}
static __m128i EmulateVectorAverageSignedI32(void*, __m128i src1, __m128i src2) {
alignas(16) int32_t src1v[4];
alignas(16) int32_t src2v[4];
alignas(16) int32_t value[4];
_mm_store_si128(reinterpret_cast<__m128i*>(src1v), src1);
_mm_store_si128(reinterpret_cast<__m128i*>(src2v), src2);
for (size_t i = 0; i < 4; ++i) {
auto t = (int64_t(src1v[i]) + int64_t(src2v[i]) + 1) >> 1;
value[i] = int32_t(t);
}
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
EmitCommutativeBinaryXmmOp(e, i, [&i](X64Emitter& e, const Xmm& dest,
const Xmm& src1, const Xmm& src2) {
const TypeName part_type = static_cast<TypeName>(i.instr->flags & 0xFF);
const uint32_t arithmetic_flags = i.instr->flags >> 8;
bool is_unsigned = !!(arithmetic_flags & ARITHMETIC_UNSIGNED);
switch (part_type) {
case INT8_TYPE:
if (is_unsigned) {
e.vpavgb(dest, src1, src2);
} else {
assert_always();
}
break;
case INT16_TYPE:
if (is_unsigned) {
e.vpavgw(dest, src1, src2);
} else {
assert_always();
}
break;
case INT32_TYPE:
// No 32bit averages in AVX.
if (is_unsigned) {
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.r9, e.StashXmm(1, i.src2));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorAverageUnsignedI32));
e.vmovaps(i.dest, e.xmm0);
} else {
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.r9, e.StashXmm(1, i.src2));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorAverageSignedI32));
e.vmovaps(i.dest, e.xmm0);
}
break;
default:
assert_unhandled_case(part_type);
break;
}
});
}
};
EMITTER_OPCODE_TABLE(
OPCODE_VECTOR_AVERAGE,
VECTOR_AVERAGE);
// ============================================================================ // ============================================================================
// OPCODE_BYTE_SWAP // OPCODE_BYTE_SWAP
// ============================================================================ // ============================================================================
@ -5751,6 +5828,7 @@ void RegisterSequences() {
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ROTATE_LEFT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ROTATE_LEFT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_AVERAGE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BYTE_SWAP); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BYTE_SWAP);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CNTLZ); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CNTLZ);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_INSERT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_INSERT);

View File

@ -460,13 +460,17 @@ XEEMITTER(vandc128, VX128(5, 592), VX128)(PPCHIRBuilder& f, InstrData& i) {
} }
XEEMITTER(vavgsb, 0x10000502, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vavgsb, 0x10000502, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); Value* v =
return 1; f.VectorAverage(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE, 0);
f.StoreVR(i.VX.VD, v);
return 0;
} }
XEEMITTER(vavgsh, 0x10000542, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vavgsh, 0x10000542, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); Value* v =
return 1; f.VectorAverage(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE, 0);
f.StoreVR(i.VX.VD, v);
return 0;
} }
XEEMITTER(vavgsw, 0x10000582, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vavgsw, 0x10000582, VX)(PPCHIRBuilder& f, InstrData& i) {
@ -474,23 +478,31 @@ XEEMITTER(vavgsw, 0x10000582, VX)(PPCHIRBuilder& f, InstrData& i) {
// aop = EXTS((VRA)i:i + 31) // aop = EXTS((VRA)i:i + 31)
// bop = EXTS((VRB)i:i + 31) // bop = EXTS((VRB)i:i + 31)
// VRTi:i + 31 = Chop((aop + int bop + int 1) >> 1, 32) // VRTi:i + 31 = Chop((aop + int bop + int 1) >> 1, 32)
XEINSTRNOTIMPLEMENTED(); Value* v =
return 1; f.VectorAverage(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE, 0);
f.StoreVR(i.VX.VD, v);
return 0;
} }
XEEMITTER(vavgub, 0x10000402, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vavgub, 0x10000402, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); Value* v = f.VectorAverage(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
return 1; ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
} }
XEEMITTER(vavguh, 0x10000442, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vavguh, 0x10000442, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); Value* v = f.VectorAverage(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
return 1; ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
} }
XEEMITTER(vavguw, 0x10000482, VX)(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(vavguw, 0x10000482, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); Value* v = f.VectorAverage(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
return 1; ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
} }
int InstrEmit_vcfsx_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, int InstrEmit_vcfsx_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb,

View File

@ -1779,6 +1779,24 @@ Value* HIRBuilder::VectorRotateLeft(Value* value1, Value* value2,
return i->dest; return i->dest;
} }
Value* HIRBuilder::VectorAverage(Value* value1, Value* value2,
TypeName part_type,
uint32_t arithmetic_flags) {
ASSERT_VECTOR_TYPE(value1);
ASSERT_VECTOR_TYPE(value2);
// This is shady.
uint32_t flags = part_type | (arithmetic_flags << 8);
assert_zero(flags >> 16);
Instr* i = AppendInstr(OPCODE_VECTOR_AVERAGE_info, uint16_t(flags),
AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::ByteSwap(Value* value) { Value* HIRBuilder::ByteSwap(Value* value) {
if (value->type == INT8_TYPE) { if (value->type == INT8_TYPE) {
return value; return value;

View File

@ -204,6 +204,8 @@ class HIRBuilder {
Value* VectorSha(Value* value1, Value* value2, TypeName part_type); Value* VectorSha(Value* value1, Value* value2, TypeName part_type);
Value* RotateLeft(Value* value1, Value* value2); Value* RotateLeft(Value* value1, Value* value2);
Value* VectorRotateLeft(Value* value1, Value* value2, TypeName part_type); Value* VectorRotateLeft(Value* value1, Value* value2, TypeName part_type);
Value* VectorAverage(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags);
Value* ByteSwap(Value* value); Value* ByteSwap(Value* value);
Value* CountLeadingZeros(Value* value); Value* CountLeadingZeros(Value* value);
Value* Insert(Value* value, Value* index, Value* part); Value* Insert(Value* value, Value* index, Value* part);

View File

@ -200,6 +200,7 @@ enum Opcode {
OPCODE_VECTOR_SHA, OPCODE_VECTOR_SHA,
OPCODE_ROTATE_LEFT, OPCODE_ROTATE_LEFT,
OPCODE_VECTOR_ROTATE_LEFT, OPCODE_VECTOR_ROTATE_LEFT,
OPCODE_VECTOR_AVERAGE,
OPCODE_BYTE_SWAP, OPCODE_BYTE_SWAP,
OPCODE_CNTLZ, OPCODE_CNTLZ,
OPCODE_INSERT, OPCODE_INSERT,

View File

@ -551,6 +551,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0) 0)
DEFINE_OPCODE(
OPCODE_VECTOR_AVERAGE,
"vector_average",
OPCODE_SIG_V_V_V,
0)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_BYTE_SWAP, OPCODE_BYTE_SWAP,
"byte_swap", "byte_swap",