VECTOR_MIN and VECTOR_MAX instructions.

This commit is contained in:
Ben Vanik 2014-08-04 18:54:06 -07:00
parent e6275691cb
commit f0e9fd92a0
7 changed files with 348 additions and 33 deletions

View File

@ -9,6 +9,8 @@
#include <alloy/backend/ivm/ivm_intcode.h>
#include <algorithm>
#include <poly/poly.h>
#include <alloy/hir/label.h>
#include <alloy/runtime/runtime.h>
@ -1636,6 +1638,77 @@ int Translate_MAX(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
uint32_t IntCode_VECTOR_MAX_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 16; n++) {
dest.b16[n] = std::max(src1.b16[n], src2.b16[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MAX_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 8; n++) {
dest.s8[n] = std::max(src1.s8[n], src2.s8[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MAX_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 4; n++) {
dest.i4[n] = std::max(src1.i4[n], src2.i4[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MAX_I8_SIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 16; n++) {
dest.b16[n] = std::max((int8_t)src1.b16[n], (int8_t)src2.b16[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MAX_I16_SIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 8; n++) {
dest.s8[n] = std::max((int16_t)src1.s8[n], (int16_t)src2.s8[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MAX_I32_SIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 4; n++) {
dest.i4[n] = std::max((int32_t)src1.i4[n], (int32_t)src2.i4[n]);
}
return IA_NEXT;
}
int Translate_VECTOR_MAX(TranslationContext& ctx, Instr* i) {
static IntCodeFn unsigned_fns[] = {
IntCode_VECTOR_MAX_I8_UNSIGNED, IntCode_VECTOR_MAX_I16_UNSIGNED,
IntCode_VECTOR_MAX_I32_UNSIGNED,
};
static IntCodeFn signed_fns[] = {
IntCode_VECTOR_MAX_I8_SIGNED, IntCode_VECTOR_MAX_I16_SIGNED,
IntCode_VECTOR_MAX_I32_SIGNED,
};
uint32_t part_type = i->flags >> 8;
if (i->flags & ARITHMETIC_UNSIGNED) {
return DispatchToC(ctx, i, unsigned_fns[part_type]);
} else {
return DispatchToC(ctx, i, signed_fns[part_type]);
}
}
uint32_t IntCode_MIN_I8_I8(IntCodeState& ics, const IntCode* i) {
int8_t a = ics.rf[i->src1_reg].i8;
int8_t b = ics.rf[i->src2_reg].i8;
@ -1688,6 +1761,77 @@ int Translate_MIN(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
uint32_t IntCode_VECTOR_MIN_I8_UNSIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 16; n++) {
dest.b16[n] = std::min(src1.b16[n], src2.b16[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MIN_I16_UNSIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 8; n++) {
dest.s8[n] = std::min(src1.s8[n], src2.s8[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MIN_I32_UNSIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 4; n++) {
dest.i4[n] = std::min(src1.i4[n], src2.i4[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MIN_I8_SIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 16; n++) {
dest.b16[n] = std::min((int8_t)src1.b16[n], (int8_t)src2.b16[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MIN_I16_SIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 8; n++) {
dest.s8[n] = std::min((int16_t)src1.s8[n], (int16_t)src2.s8[n]);
}
return IA_NEXT;
}
uint32_t IntCode_VECTOR_MIN_I32_SIGNED(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 4; n++) {
dest.i4[n] = std::min((int32_t)src1.i4[n], (int32_t)src2.i4[n]);
}
return IA_NEXT;
}
int Translate_VECTOR_MIN(TranslationContext& ctx, Instr* i) {
static IntCodeFn unsigned_fns[] = {
IntCode_VECTOR_MIN_I8_UNSIGNED, IntCode_VECTOR_MIN_I16_UNSIGNED,
IntCode_VECTOR_MIN_I32_UNSIGNED,
};
static IntCodeFn signed_fns[] = {
IntCode_VECTOR_MIN_I8_SIGNED, IntCode_VECTOR_MIN_I16_SIGNED,
IntCode_VECTOR_MIN_I32_SIGNED,
};
uint32_t part_type = i->flags >> 8;
if (i->flags & ARITHMETIC_UNSIGNED) {
return DispatchToC(ctx, i, unsigned_fns[part_type]);
} else {
return DispatchToC(ctx, i, signed_fns[part_type]);
}
}
uint32_t IntCode_SELECT_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 =
ics.rf[i->src1_reg].i8 ? ics.rf[i->src2_reg].i8 : ics.rf[i->src3_reg].i8;
@ -4041,7 +4185,8 @@ static const TranslateFn dispatch_table[] = {
Translate_LOAD_CONTEXT, Translate_STORE_CONTEXT,
Translate_LOAD, Translate_STORE,
Translate_PREFETCH, Translate_MAX,
Translate_MIN, Translate_SELECT,
Translate_VECTOR_MAX, Translate_MIN,
Translate_VECTOR_MIN, Translate_SELECT,
Translate_IS_TRUE, Translate_IS_FALSE,
Translate_COMPARE_EQ, Translate_COMPARE_NE,
Translate_COMPARE_SLT, Translate_COMPARE_SLE,

View File

@ -1739,6 +1739,53 @@ EMITTER_OPCODE_TABLE(
MAX_V128);
// ============================================================================
// OPCODE_VECTOR_MAX
// ============================================================================
EMITTER(VECTOR_MAX, MATCH(I<OPCODE_VECTOR_MAX, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
uint32_t part_type = i.instr->flags >> 8;
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
switch (part_type) {
case INT8_TYPE:
e.vpmaxub(dest, src1, src2);
break;
case INT16_TYPE:
e.vpmaxuw(dest, src1, src2);
break;
case INT32_TYPE:
e.vpmaxud(dest, src1, src2);
break;
default:
assert_unhandled_case(part_type);
break;
}
} else {
switch (part_type) {
case INT8_TYPE:
e.vpmaxsb(dest, src1, src2);
break;
case INT16_TYPE:
e.vpmaxsw(dest, src1, src2);
break;
case INT32_TYPE:
e.vpmaxsd(dest, src1, src2);
break;
default:
assert_unhandled_case(part_type);
break;
}
}
});
}
};
EMITTER_OPCODE_TABLE(
OPCODE_VECTOR_MAX,
VECTOR_MAX);
// ============================================================================
// OPCODE_MIN
// ============================================================================
@ -1773,6 +1820,53 @@ EMITTER_OPCODE_TABLE(
MIN_V128);
// ============================================================================
// OPCODE_VECTOR_MIN
// ============================================================================
EMITTER(VECTOR_MIN, MATCH(I<OPCODE_VECTOR_MIN, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
EmitCommutativeBinaryXmmOp(e, i,
[&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
uint32_t part_type = i.instr->flags >> 8;
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
switch (part_type) {
case INT8_TYPE:
e.vpminub(dest, src1, src2);
break;
case INT16_TYPE:
e.vpminuw(dest, src1, src2);
break;
case INT32_TYPE:
e.vpminud(dest, src1, src2);
break;
default:
assert_unhandled_case(part_type);
break;
}
} else {
switch (part_type) {
case INT8_TYPE:
e.vpminsb(dest, src1, src2);
break;
case INT16_TYPE:
e.vpminsw(dest, src1, src2);
break;
case INT32_TYPE:
e.vpminsd(dest, src1, src2);
break;
default:
assert_unhandled_case(part_type);
break;
}
}
});
}
};
EMITTER_OPCODE_TABLE(
OPCODE_VECTOR_MIN,
VECTOR_MIN);
// ============================================================================
// OPCODE_SELECT
// ============================================================================
@ -5042,7 +5136,9 @@ void RegisterSequences() {
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PREFETCH);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MAX);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MAX);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MIN);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MIN);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SELECT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_TRUE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_FALSE);

View File

@ -803,33 +803,48 @@ XEEMITTER(vmaxfp128, VX128(6, 640), VX128)(PPCHIRBuilder& f, InstrData& i) {
}
XEEMITTER(vmaxsb, 0x10000102, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- max((VA), (VB)) (signed int8)
Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vmaxsh, 0x10000142, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- max((VA), (VB)) (signed int16)
Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vmaxsw, 0x10000182, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- max((VA), (VB)) (signed int32)
Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vmaxub, 0x10000002, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- max((VA), (VB)) (unsigned int8)
Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vmaxuh, 0x10000042, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- max((VA), (VB)) (unsigned int16)
Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vmaxuw, 0x10000082, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- max((VA), (VB)) (unsigned int32)
Value* v = f.VectorMax(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vmhaddshs, 0x10000020, VXA)(PPCHIRBuilder& f, InstrData& i) {
@ -856,33 +871,48 @@ XEEMITTER(vminfp128, VX128(6, 704), VX128)(PPCHIRBuilder& f, InstrData& i) {
}
XEEMITTER(vminsb, 0x10000302, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- min((VA), (VB)) (signed int8)
Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vminsh, 0x10000342, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- min((VA), (VB)) (signed int16)
Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vminsw, 0x10000382, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- min((VA), (VB)) (signed int32)
Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vminub, 0x10000202, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- min((VA), (VB)) (unsigned int8)
Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE,
ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vminuh, 0x10000242, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- min((VA), (VB)) (unsigned int16)
Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE,
ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vminuw, 0x10000282, VX)(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
// (VD) <- min((VA), (VB)) (unsigned int32)
Value* v = f.VectorMin(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT32_TYPE,
ARITHMETIC_UNSIGNED);
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vmladduhm, 0x10000022, VXA)(PPCHIRBuilder& f, InstrData& i) {

View File

@ -1035,6 +1035,19 @@ Value* HIRBuilder::Max(Value* value1, Value* value2) {
return i->dest;
}
Value* HIRBuilder::VectorMax(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags) {
ASSERT_TYPES_EQUAL(value1, value2);
uint16_t flags = arithmetic_flags | (part_type << 8);
Instr* i =
AppendInstr(OPCODE_VECTOR_MAX_info, flags, AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::Min(Value* value1, Value* value2) {
ASSERT_TYPES_EQUAL(value1, value2);
@ -1050,6 +1063,19 @@ Value* HIRBuilder::Min(Value* value1, Value* value2) {
return i->dest;
}
Value* HIRBuilder::VectorMin(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags) {
ASSERT_TYPES_EQUAL(value1, value2);
uint16_t flags = arithmetic_flags | (part_type << 8);
Instr* i =
AppendInstr(OPCODE_VECTOR_MIN_info, flags, AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::Select(Value* cond, Value* value1, Value* value2) {
assert_true(cond->type == INT8_TYPE); // for now
ASSERT_TYPES_EQUAL(value1, value2);

View File

@ -136,7 +136,11 @@ class HIRBuilder {
void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);
Value* Max(Value* value1, Value* value2);
Value* VectorMax(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags = 0);
Value* Min(Value* value1, Value* value2);
Value* VectorMin(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags = 0);
Value* Select(Value* cond, Value* value1, Value* value2);
Value* IsTrue(Value* value);
Value* IsFalse(Value* value);

View File

@ -112,7 +112,9 @@ enum Opcode {
OPCODE_STORE,
OPCODE_PREFETCH,
OPCODE_MAX,
OPCODE_VECTOR_MAX,
OPCODE_MIN,
OPCODE_VECTOR_MIN,
OPCODE_SELECT,
OPCODE_IS_TRUE,
OPCODE_IS_FALSE,

View File

@ -236,12 +236,24 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_V,
0)
DEFINE_OPCODE(
OPCODE_VECTOR_MAX,
"vector_max",
OPCODE_SIG_V_V_V,
0)
DEFINE_OPCODE(
OPCODE_MIN,
"min",
OPCODE_SIG_V_V_V,
0)
DEFINE_OPCODE(
OPCODE_VECTOR_MIN,
"vector_min",
OPCODE_SIG_V_V_V,
0)
DEFINE_OPCODE(
OPCODE_SELECT,
"select",