VectorAdd and saturation checks.

This commit is contained in:
Ben Vanik 2014-01-09 21:57:07 -08:00
parent 2980a30f30
commit 3fbebcfa08
11 changed files with 251 additions and 23 deletions

View File

@ -115,6 +115,7 @@ int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) {
ics.membase = memory->membase();
ics.reserve_address = memory->reserve_address();
ics.did_carry = 0;
ics.did_saturate = 0;
ics.access_callbacks = thread_state->runtime()->access_callbacks();
ics.thread_state = thread_state;
ics.return_address = return_address;

View File

@ -1164,8 +1164,10 @@ uint32_t IntCode_VECTOR_CONVERT_F2I_SAT(IntCodeState& ics, const IntCode* i) {
float src = src1.f4[n];
if (src < 0) {
dest.i4[n] = 0;
ics.did_saturate = 1;
} else if (src > UINT_MAX) {
dest.i4[n] = UINT_MAX;
ics.did_saturate = 1;
} else {
dest.i4[n] = (uint32_t)src;
}
@ -1175,8 +1177,10 @@ uint32_t IntCode_VECTOR_CONVERT_F2I_SAT(IntCodeState& ics, const IntCode* i) {
float src = src1.f4[n];
if (src < INT_MIN) {
dest.i4[n] = INT_MIN;
ics.did_saturate = 1;
} else if (src > INT_MAX) {
dest.i4[n] = INT_MAX;
ics.did_saturate = 1;
} else {
dest.i4[n] = (int32_t)src;
}
@ -2018,6 +2022,14 @@ int Translate_DID_CARRY(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_DID_CARRY);
}
uint32_t IntCode_DID_SATURATE(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = ics.did_saturate;
return IA_NEXT;
}
int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_DID_SATURATE);
}
#define VECTOR_COMPARER(type, value, count, op) \
const vec128_t& src1 = ics.rf[i->src1_reg].v128; \
const vec128_t& src2 = ics.rf[i->src2_reg].v128; \
@ -2157,6 +2169,7 @@ uint32_t IntCode_ADD_F64_F64(IntCodeState& ics, const IntCode* i) {
return IA_NEXT;
}
uint32_t IntCode_ADD_V128_V128(IntCodeState& ics, const IntCode* i) {
XEASSERT(!i->flags);
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
@ -2235,6 +2248,140 @@ int Translate_ADD_CARRY(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
uint32_t Translate_VECTOR_ADD_I8(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
const uint32_t arithmetic_flags = i->flags >> 8;
if (arithmetic_flags & ARITHMETIC_SATURATE) {
if (arithmetic_flags & ARITHMETIC_UNSIGNED) {
for (int n = 0; n < 16; n++) {
uint16_t v = src1.b16[n] + src2.b16[n];
if (v > 0xFF) {
dest.b16[n] = 0xFF;
ics.did_saturate = 1;
} else {
dest.b16[n] = (uint8_t)v;
}
}
} else {
for (int n = 0; n < 16; n++) {
int16_t v = (int8_t)src1.b16[n] + (int8_t)src2.b16[n];
if (v > 0x7F) {
dest.b16[n] = 0x7F;
ics.did_saturate = 1;
} else if (v < -0x80) {
dest.b16[n] = -0x80;
ics.did_saturate = 1;
} else {
dest.b16[n] = (uint8_t)v;
}
}
}
} else {
for (int n = 0; n < 16; n++) {
dest.b16[n] = src1.b16[n] + src2.b16[n];
}
}
return IA_NEXT;
}
uint32_t Translate_VECTOR_ADD_I16(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
const uint32_t arithmetic_flags = i->flags >> 8;
if (arithmetic_flags & ARITHMETIC_SATURATE) {
if (arithmetic_flags & ARITHMETIC_UNSIGNED) {
for (int n = 0; n < 8; n++) {
uint32_t v = src1.s8[n] + src2.s8[n];
if (v > 0xFFFF) {
dest.s8[n] = 0xFFFF;
ics.did_saturate = 1;
} else {
dest.s8[n] = (uint16_t)v;
}
}
} else {
for (int n = 0; n < 8; n++) {
int32_t v = (int16_t)src1.s8[n] + (int16_t)src2.s8[n];
if (v > 0x7FFF) {
dest.s8[n] = 0x7FFF;
ics.did_saturate = 1;
} else if (v < -0x8000) {
dest.s8[n] = -0x8000;
ics.did_saturate = 1;
} else {
dest.s8[n] = (uint16_t)v;
}
}
}
} else {
for (int n = 0; n < 8; n++) {
dest.s8[n] = src1.s8[n] + src2.s8[n];
}
}
return IA_NEXT;
}
uint32_t Translate_VECTOR_ADD_I32(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
const uint32_t arithmetic_flags = i->flags >> 8;
if (arithmetic_flags & ARITHMETIC_SATURATE) {
if (arithmetic_flags & ARITHMETIC_UNSIGNED) {
for (int n = 0; n < 4; n++) {
uint64_t v = src1.i4[n] + src2.i4[n];
if (v > 0xFFFFFFFF) {
dest.i4[n] = 0xFFFFFFFF;
ics.did_saturate = 1;
} else {
dest.i4[n] = (uint32_t)v;
}
}
} else {
for (int n = 0; n < 4; n++) {
int64_t v = (int32_t)src1.i4[n] + (int32_t)src2.i4[n];
if (v > 0x7FFFFFFF) {
dest.i4[n] = 0x7FFFFFFF;
ics.did_saturate = 1;
} else if (v < 0x80000000ull) {
dest.i4[n] = 0x80000000;
ics.did_saturate = 1;
} else {
dest.i4[n] = (uint32_t)v;
}
}
}
} else {
for (int n = 0; n < 4; n++) {
dest.i4[n] = src1.i4[n] + src2.i4[n];
}
}
return IA_NEXT;
}
uint32_t Translate_VECTOR_ADD_F32(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 4; n++) {
dest.f4[n] = src1.f4[n] + src2.f4[n];
}
return IA_NEXT;
}
int Translate_VECTOR_ADD(TranslationContext& ctx, Instr* i) {
TypeName part_type = (TypeName)(i->flags & 0xFF);
static IntCodeFn fns[] = {
Translate_VECTOR_ADD_I8,
Translate_VECTOR_ADD_I16,
Translate_VECTOR_ADD_I32,
IntCode_INVALID_TYPE,
Translate_VECTOR_ADD_F32,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
return DispatchToC(ctx, i, fns[part_type]);
}
#define SUB_DID_CARRY(a, b) \
((b) == 0) || CHECK_DID_CARRY(a, 0 - b)
uint32_t IntCode_SUB_I8_I8(IntCodeState& ics, const IntCode* i) {
@ -3670,6 +3817,7 @@ static const TranslateFn dispatch_table[] = {
Translate_COMPARE_UGE,
Translate_DID_CARRY,
TranslateInvalid, //Translate_DID_OVERFLOW,
Translate_DID_SATURATE,
Translate_VECTOR_COMPARE_EQ,
Translate_VECTOR_COMPARE_SGT,
Translate_VECTOR_COMPARE_SGE,
@ -3678,6 +3826,7 @@ static const TranslateFn dispatch_table[] = {
Translate_ADD,
Translate_ADD_CARRY,
Translate_VECTOR_ADD,
Translate_SUB,
Translate_MUL,
Translate_MUL_HI,

View File

@ -45,6 +45,7 @@ typedef struct {
uint8_t* membase;
uint32_t* reserve_address;
int8_t did_carry;
int8_t did_saturate;
runtime::RegisterAccessCallbacks* access_callbacks;
runtime::ThreadState* thread_state;
uint64_t return_address;

View File

@ -175,6 +175,8 @@ typedef struct XECACHEALIGN64 PPCContext_s {
} bits;
} fpscr; // Floating-point status and control register
uint8_t vscr_sat;
double f[32]; // Floating-point registers
vec128_t v[128]; // VMX128 vector registers

View File

@ -404,48 +404,75 @@ XEEMITTER(vaddfp128, VX128(5, 16), VX128 )(PPCHIRBuilder& f, InstrData
}
XEEMITTER(vaddsbs, 0x10000300, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT8_TYPE, ARITHMETIC_SATURATE);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vaddshs, 0x10000340, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT16_TYPE, ARITHMETIC_SATURATE);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vaddsws, 0x10000380, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT32_TYPE, ARITHMETIC_SATURATE);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vaddubm, 0x10000000, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT8_TYPE, ARITHMETIC_UNSIGNED);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vaddubs, 0x10000200, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT8_TYPE, ARITHMETIC_UNSIGNED | ARITHMETIC_SATURATE);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vadduhm, 0x10000040, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT16_TYPE, ARITHMETIC_UNSIGNED);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vadduhs, 0x10000240, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT16_TYPE, ARITHMETIC_UNSIGNED | ARITHMETIC_SATURATE);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vadduwm, 0x10000080, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT32_TYPE, ARITHMETIC_UNSIGNED);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
XEEMITTER(vadduws, 0x10000280, VX )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED();
return 1;
Value* v = f.VectorAdd(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB),
INT32_TYPE, ARITHMETIC_UNSIGNED | ARITHMETIC_SATURATE);
f.StoreSAT(f.DidSaturate(v));
f.StoreVR(i.VX.VD, v);
return 0;
}
int InstrEmit_vand_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {

View File

@ -286,6 +286,14 @@ void PPCHIRBuilder::StoreCA(Value* value) {
StoreContext(offsetof(PPCContext, xer_ca), value);
}
Value* PPCHIRBuilder::LoadSAT() {
return LoadContext(offsetof(PPCContext, vscr_sat), INT8_TYPE);
}
void PPCHIRBuilder::StoreSAT(Value* value) {
StoreContext(offsetof(PPCContext, vscr_sat), value);
}
Value* PPCHIRBuilder::LoadGPR(uint32_t reg) {
return LoadContext(
offsetof(PPCContext, r) + reg * 8, INT64_TYPE);

View File

@ -55,6 +55,8 @@ public:
//void StoreOV(Value* value);
Value* LoadCA();
void StoreCA(Value* value);
Value* LoadSAT();
void StoreSAT(Value* value);
Value* LoadGPR(uint32_t reg);
void StoreGPR(uint32_t reg, Value* value);

View File

@ -1052,6 +1052,15 @@ Value* HIRBuilder::DidOverflow(Value* value) {
return i->dest;
}
Value* HIRBuilder::DidSaturate(Value* value) {
Instr* i = AppendInstr(
OPCODE_DID_SATURATE_info, 0,
AllocValue(INT8_TYPE));
i->set_src1(value);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::VectorCompareXX(
const OpcodeInfo& opcode, Value* value1, Value* value2,
TypeName part_type) {
@ -1140,6 +1149,24 @@ Value* HIRBuilder::AddWithCarry(
return i->dest;
}
Value* HIRBuilder::VectorAdd(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags) {
ASSERT_VECTOR_TYPE(value1);
ASSERT_VECTOR_TYPE(value2);
// This is shady.
uint32_t flags = part_type | (arithmetic_flags << 8);
XEASSERTZERO(flags >> 16);
Instr* i = AppendInstr(
OPCODE_VECTOR_ADD_info, (uint16_t)flags,
AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::Sub(
Value* value1, Value* value2, uint32_t arithmetic_flags) {
ASSERT_TYPES_EQUAL(value1, value2);

View File

@ -94,10 +94,6 @@ public:
Value* Convert(Value* value, TypeName target_type,
RoundMode round_mode = ROUND_TO_ZERO);
Value* Round(Value* value, RoundMode round_mode);
// TODO(benvanik): make this cleaner -- not happy with it.
// It'd be nice if Convert() supported this, however then we'd need a
// VEC128_INT32_TYPE or something.
Value* VectorConvertI2F(Value* value, uint32_t arithmetic_flags = 0);
Value* VectorConvertF2I(Value* value, uint32_t arithmetic_flags = 0);
@ -143,6 +139,7 @@ public:
Value* CompareUGE(Value* value1, Value* value2);
Value* DidCarry(Value* value);
Value* DidOverflow(Value* value);
Value* DidSaturate(Value* value);
Value* VectorCompareEQ(Value* value1, Value* value2, TypeName part_type);
Value* VectorCompareSGT(Value* value1, Value* value2, TypeName part_type);
Value* VectorCompareSGE(Value* value1, Value* value2, TypeName part_type);
@ -152,6 +149,8 @@ public:
Value* Add(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
Value* AddWithCarry(Value* value1, Value* value2, Value* value3,
uint32_t arithmetic_flags = 0);
Value* VectorAdd(Value* value1, Value* value2, TypeName part_type,
uint32_t arithmetic_flags = 0);
Value* Sub(Value* value1, Value* value2,
uint32_t arithmetic_flags = 0);
Value* Mul(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);

View File

@ -130,6 +130,7 @@ enum Opcode {
OPCODE_COMPARE_UGE,
OPCODE_DID_CARRY,
OPCODE_DID_OVERFLOW,
OPCODE_DID_SATURATE,
OPCODE_VECTOR_COMPARE_EQ,
OPCODE_VECTOR_COMPARE_SGT,
OPCODE_VECTOR_COMPARE_SGE,
@ -138,6 +139,7 @@ enum Opcode {
OPCODE_ADD,
OPCODE_ADD_CARRY,
OPCODE_VECTOR_ADD,
OPCODE_SUB,
OPCODE_MUL,
OPCODE_MUL_HI, // TODO(benvanik): remove this and add INT128 type.

View File

@ -292,12 +292,16 @@ DEFINE_OPCODE(
"did_carry",
OPCODE_SIG_V_V,
0);
DEFINE_OPCODE(
OPCODE_DID_OVERFLOW,
"did_overflow",
OPCODE_SIG_V_V,
0);
DEFINE_OPCODE(
OPCODE_DID_SATURATE,
"did_saturate",
OPCODE_SIG_V_V,
0);
DEFINE_OPCODE(
OPCODE_VECTOR_COMPARE_EQ,
@ -337,6 +341,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_COMMUNATIVE);
DEFINE_OPCODE(
OPCODE_VECTOR_ADD,
"vector_add",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE);
DEFINE_OPCODE(
OPCODE_SUB,
"sub",