Removing REM, adding MUL_HI, renaming MULADD/MULSUB.

This commit is contained in:
Ben Vanik 2014-01-06 22:17:49 -08:00
parent 3dcbcce38d
commit d1528e24bb
10 changed files with 315 additions and 114 deletions

View File

@ -2204,6 +2204,22 @@ uint32_t IntCode_MUL_V128_V128(IntCodeState& ics, const IntCode* i) {
} }
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MUL_I8_I8_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u8 = ics.rf[i->src1_reg].u8 * ics.rf[i->src2_reg].u8;
return IA_NEXT;
}
uint32_t IntCode_MUL_I16_I16_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u16 = ics.rf[i->src1_reg].u16 * ics.rf[i->src2_reg].u16;
return IA_NEXT;
}
uint32_t IntCode_MUL_I32_I32_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u32 = ics.rf[i->src1_reg].u32 * ics.rf[i->src2_reg].u32;
return IA_NEXT;
}
uint32_t IntCode_MUL_I64_I64_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u64 = ics.rf[i->src1_reg].u64 * ics.rf[i->src2_reg].u64;
return IA_NEXT;
}
int Translate_MUL(TranslationContext& ctx, Instr* i) { int Translate_MUL(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_MUL_I8_I8, IntCode_MUL_I8_I8,
@ -2214,34 +2230,184 @@ int Translate_MUL(TranslationContext& ctx, Instr* i) {
IntCode_MUL_F64_F64, IntCode_MUL_F64_F64,
IntCode_MUL_V128_V128, IntCode_MUL_V128_V128,
}; };
return DispatchToC(ctx, i, fns[i->dest->type]); static IntCodeFn fns_unsigned[] = {
IntCode_MUL_I8_I8_U,
IntCode_MUL_I16_I16_U,
IntCode_MUL_I32_I32_U,
IntCode_MUL_I64_I64_U,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
if (i->flags & ARITHMETIC_UNSIGNED) {
return DispatchToC(ctx, i, fns_unsigned[i->dest->type]);
} else {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
} }
uint32_t IntCode_DIV_I8(IntCodeState& ics, const IntCode* i) { namespace {
ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].u8 / ics.rf[i->src2_reg].u8; uint64_t Mul128(uint64_t xi_low, uint64_t xi_high,
uint64_t yi_low, uint64_t yi_high) {
// 128bit multiply, simplified for two input 64bit integers.
// http://mrob.com/pub/math/int128.c.txt
#define HI_WORD 0xFFFFFFFF00000000LL
#define LO_WORD 0x00000000FFFFFFFFLL
uint64_t d = xi_low & LO_WORD;
uint64_t c = (xi_low & HI_WORD) >> 32LL;
uint64_t b = xi_high & LO_WORD;
uint64_t a = (xi_high & HI_WORD) >> 32LL;
uint64_t h = yi_low & LO_WORD;
uint64_t g = (yi_low & HI_WORD) >> 32LL;
uint64_t f = yi_high & LO_WORD;
uint64_t e = (yi_high & HI_WORD) >> 32LL;
uint64_t acc = d * h;
uint64_t o1 = acc & LO_WORD;
acc >>= 32LL;
uint64_t carry = 0;
uint64_t ac2 = acc + c * h; if (ac2 < acc) { carry++; }
acc = ac2 + d * g; if (acc < ac2) { carry++; }
uint64_t rv2_lo = o1 | (acc << 32LL);
ac2 = (acc >> 32LL) | (carry << 32LL); carry = 0;
acc = ac2 + b * h; if (acc < ac2) { carry++; }
ac2 = acc + c * g; if (ac2 < acc) { carry++; }
acc = ac2 + d * f; if (acc < ac2) { carry++; }
uint64_t o2 = acc & LO_WORD;
ac2 = (acc >> 32LL) | (carry << 32LL);
acc = ac2 + a * h;
ac2 = acc + b * g;
acc = ac2 + c * f;
ac2 = acc + d * e;
uint64_t rv2_hi = (ac2 << 32LL) | o2;
return rv2_hi;
}
}
uint32_t IntCode_MUL_HI_I8_I8(IntCodeState& ics, const IntCode* i) {
int16_t v =
(int16_t)ics.rf[i->src1_reg].i8 * (int16_t)ics.rf[i->src2_reg].i8;
ics.rf[i->dest_reg].i8 = (v >> 8);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_DIV_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_HI_I16_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].u16 / ics.rf[i->src2_reg].u16; int32_t v =
(int32_t)ics.rf[i->src1_reg].i16 * (int32_t)ics.rf[i->src2_reg].i16;
ics.rf[i->dest_reg].i16 = (v >> 16);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_DIV_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_HI_I32_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].u32 / ics.rf[i->src2_reg].u32; int64_t v =
(int64_t)ics.rf[i->src1_reg].i32 * (int64_t)ics.rf[i->src2_reg].i32;
ics.rf[i->dest_reg].i32 = (v >> 32);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_DIV_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_HI_I64_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].u64 / ics.rf[i->src2_reg].u64; #if !XE_COMPILER(MSVC)
// GCC can, in theory, do this:
__int128 v =
(__int128)ics.rf[i->src1_reg].i64 * (__int128)ics.rf[i->src2_reg].i64;
ics.rf[i->dest_reg].i64 = (v >> 64);
#else
// 128bit multiply, simplified for two input 64bit integers.
// http://mrob.com/pub/math/int128.c.txt
int64_t xi_low = ics.rf[i->src1_reg].i64;
int64_t xi_high = xi_low < 0 ? -1 : 0;
int64_t yi_low = ics.rf[i->src2_reg].i64;
int64_t yi_high = yi_low < 0 ? -1 : 0;
ics.rf[i->dest_reg].i64 = Mul128(xi_low, xi_high, yi_low, yi_high);
#endif // !MSVC
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_DIV_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_HI_I8_I8_U(IntCodeState& ics, const IntCode* i) {
uint16_t v =
(uint16_t)ics.rf[i->src1_reg].u8 * (uint16_t)ics.rf[i->src2_reg].u8;
ics.rf[i->dest_reg].u8 = (v >> 8);
return IA_NEXT;
}
uint32_t IntCode_MUL_HI_I16_I16_U(IntCodeState& ics, const IntCode* i) {
uint32_t v =
(uint32_t)ics.rf[i->src1_reg].u16 * (uint32_t)ics.rf[i->src2_reg].u16;
ics.rf[i->dest_reg].u16 = (v >> 16);
return IA_NEXT;
}
uint32_t IntCode_MUL_HI_I32_I32_U(IntCodeState& ics, const IntCode* i) {
uint64_t v =
(uint64_t)ics.rf[i->src1_reg].u32 * (uint64_t)ics.rf[i->src2_reg].u32;
ics.rf[i->dest_reg].u32 = (v >> 32);
return IA_NEXT;
}
uint32_t IntCode_MUL_HI_I64_I64_U(IntCodeState& ics, const IntCode* i) {
#if !XE_COMPILER(MSVC)
// GCC can, in theory, do this:
__int128 v =
(__int128)ics.rf[i->src1_reg].i64 * (__int128)ics.rf[i->src2_reg].i64;
ics.rf[i->dest_reg].i64 = (v >> 64);
#else
// 128bit multiply, simplified for two input 64bit integers.
// http://mrob.com/pub/math/int128.c.txt
int64_t xi_low = ics.rf[i->src1_reg].i64;
int64_t xi_high = 0;
int64_t yi_low = ics.rf[i->src2_reg].i64;
int64_t yi_high = 0;
ics.rf[i->dest_reg].i64 = Mul128(xi_low, xi_high, yi_low, yi_high);
#endif // !MSVC
return IA_NEXT;
}
int Translate_MUL_HI(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_MUL_HI_I8_I8,
IntCode_MUL_HI_I16_I16,
IntCode_MUL_HI_I32_I32,
IntCode_MUL_HI_I64_I64,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
static IntCodeFn fns_unsigned[] = {
IntCode_MUL_HI_I8_I8_U,
IntCode_MUL_HI_I16_I16_U,
IntCode_MUL_HI_I32_I32_U,
IntCode_MUL_HI_I64_I64_U,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
if (i->flags & ARITHMETIC_UNSIGNED) {
return DispatchToC(ctx, i, fns_unsigned[i->dest->type]);
} else {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
}
uint32_t IntCode_DIV_I8_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 / ics.rf[i->src2_reg].i8;
return IA_NEXT;
}
uint32_t IntCode_DIV_I16_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 / ics.rf[i->src2_reg].i16;
return IA_NEXT;
}
uint32_t IntCode_DIV_I32_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 / ics.rf[i->src2_reg].i32;
return IA_NEXT;
}
uint32_t IntCode_DIV_I64_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 / ics.rf[i->src2_reg].i64;
return IA_NEXT;
}
uint32_t IntCode_DIV_F32_F32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 / ics.rf[i->src2_reg].f32; ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 / ics.rf[i->src2_reg].f32;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_DIV_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_DIV_F64_F64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 / ics.rf[i->src2_reg].f64; ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 / ics.rf[i->src2_reg].f64;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_DIV_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_DIV_V128_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128; const vec128_t& src2 = ics.rf[i->src2_reg].v128;
vec128_t& dest = ics.rf[i->dest_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128;
@ -2250,45 +2416,74 @@ uint32_t IntCode_DIV_V128(IntCodeState& ics, const IntCode* i) {
} }
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_DIV_I8_I8_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u8 = ics.rf[i->src1_reg].u8 / ics.rf[i->src2_reg].u8;
return IA_NEXT;
}
uint32_t IntCode_DIV_I16_I16_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u16 = ics.rf[i->src1_reg].u16 / ics.rf[i->src2_reg].u16;
return IA_NEXT;
}
uint32_t IntCode_DIV_I32_I32_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u32 = ics.rf[i->src1_reg].u32 / ics.rf[i->src2_reg].u32;
return IA_NEXT;
}
uint32_t IntCode_DIV_I64_I64_U(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].u64 = ics.rf[i->src1_reg].u64 / ics.rf[i->src2_reg].u64;
return IA_NEXT;
}
int Translate_DIV(TranslationContext& ctx, Instr* i) { int Translate_DIV(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_DIV_I8, IntCode_DIV_I8_I8,
IntCode_DIV_I16, IntCode_DIV_I16_I16,
IntCode_DIV_I32, IntCode_DIV_I32_I32,
IntCode_DIV_I64, IntCode_DIV_I64_I64,
IntCode_DIV_F32, IntCode_DIV_F32_F32,
IntCode_DIV_F64, IntCode_DIV_F64_F64,
IntCode_DIV_V128, IntCode_DIV_V128_V128,
}; };
return DispatchToC(ctx, i, fns[i->dest->type]); static IntCodeFn fns_unsigned[] = {
IntCode_DIV_I8_I8_U,
IntCode_DIV_I16_I16_U,
IntCode_DIV_I32_I32_U,
IntCode_DIV_I64_I64_U,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
if (i->flags & ARITHMETIC_UNSIGNED) {
return DispatchToC(ctx, i, fns_unsigned[i->dest->type]);
} else {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
} }
// TODO(benvanik): use intrinsics or something // TODO(benvanik): use intrinsics or something
uint32_t IntCode_MULADD_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_ADD_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8 + ics.rf[i->src3_reg].i8; ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8 + ics.rf[i->src3_reg].i8;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULADD_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_ADD_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 * ics.rf[i->src2_reg].i16 + ics.rf[i->src3_reg].i16; ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 * ics.rf[i->src2_reg].i16 + ics.rf[i->src3_reg].i16;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULADD_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_ADD_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 * ics.rf[i->src2_reg].i32 + ics.rf[i->src3_reg].i32; ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 * ics.rf[i->src2_reg].i32 + ics.rf[i->src3_reg].i32;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULADD_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_ADD_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 * ics.rf[i->src2_reg].i64 + ics.rf[i->src3_reg].i64; ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 * ics.rf[i->src2_reg].i64 + ics.rf[i->src3_reg].i64;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULADD_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_ADD_F32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 * ics.rf[i->src2_reg].f32 + ics.rf[i->src3_reg].f32; ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 * ics.rf[i->src2_reg].f32 + ics.rf[i->src3_reg].f32;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULADD_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_ADD_F64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 * ics.rf[i->src2_reg].f64 + ics.rf[i->src3_reg].f64; ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 * ics.rf[i->src2_reg].f64 + ics.rf[i->src3_reg].f64;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULADD_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_ADD_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128; const vec128_t& src2 = ics.rf[i->src2_reg].v128;
const vec128_t& src3 = ics.rf[i->src3_reg].v128; const vec128_t& src3 = ics.rf[i->src3_reg].v128;
@ -2298,45 +2493,45 @@ uint32_t IntCode_MULADD_V128(IntCodeState& ics, const IntCode* i) {
} }
return IA_NEXT; return IA_NEXT;
} }
int Translate_MULADD(TranslationContext& ctx, Instr* i) { int Translate_MUL_ADD(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_MULADD_I8, IntCode_MUL_ADD_I8,
IntCode_MULADD_I16, IntCode_MUL_ADD_I16,
IntCode_MULADD_I32, IntCode_MUL_ADD_I32,
IntCode_MULADD_I64, IntCode_MUL_ADD_I64,
IntCode_MULADD_F32, IntCode_MUL_ADD_F32,
IntCode_MULADD_F64, IntCode_MUL_ADD_F64,
IntCode_MULADD_V128, IntCode_MUL_ADD_V128,
}; };
return DispatchToC(ctx, i, fns[i->dest->type]); return DispatchToC(ctx, i, fns[i->dest->type]);
} }
// TODO(benvanik): use intrinsics or something // TODO(benvanik): use intrinsics or something
uint32_t IntCode_MULSUB_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_SUB_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8 - ics.rf[i->src3_reg].i8; ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8 - ics.rf[i->src3_reg].i8;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULSUB_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_SUB_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 * ics.rf[i->src2_reg].i16 - ics.rf[i->src3_reg].i16; ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 * ics.rf[i->src2_reg].i16 - ics.rf[i->src3_reg].i16;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULSUB_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_SUB_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 * ics.rf[i->src2_reg].i32 - ics.rf[i->src3_reg].i32; ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 * ics.rf[i->src2_reg].i32 - ics.rf[i->src3_reg].i32;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULSUB_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_SUB_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 * ics.rf[i->src2_reg].i64 - ics.rf[i->src3_reg].i64; ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 * ics.rf[i->src2_reg].i64 - ics.rf[i->src3_reg].i64;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULSUB_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_SUB_F32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 * ics.rf[i->src2_reg].f32 - ics.rf[i->src3_reg].f32; ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 * ics.rf[i->src2_reg].f32 - ics.rf[i->src3_reg].f32;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULSUB_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_SUB_F64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 * ics.rf[i->src2_reg].f64 - ics.rf[i->src3_reg].f64; ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 * ics.rf[i->src2_reg].f64 - ics.rf[i->src3_reg].f64;
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_MULSUB_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_MUL_SUB_V128(IntCodeState& ics, const IntCode* i) {
const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src1 = ics.rf[i->src1_reg].v128;
const vec128_t& src2 = ics.rf[i->src2_reg].v128; const vec128_t& src2 = ics.rf[i->src2_reg].v128;
const vec128_t& src3 = ics.rf[i->src3_reg].v128; const vec128_t& src3 = ics.rf[i->src3_reg].v128;
@ -2346,15 +2541,15 @@ uint32_t IntCode_MULSUB_V128(IntCodeState& ics, const IntCode* i) {
} }
return IA_NEXT; return IA_NEXT;
} }
int Translate_MULSUB(TranslationContext& ctx, Instr* i) { int Translate_MUL_SUB(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_MULSUB_I8, IntCode_MUL_SUB_I8,
IntCode_MULSUB_I16, IntCode_MUL_SUB_I16,
IntCode_MULSUB_I32, IntCode_MUL_SUB_I32,
IntCode_MULSUB_I64, IntCode_MUL_SUB_I64,
IntCode_MULSUB_F32, IntCode_MUL_SUB_F32,
IntCode_MULSUB_F64, IntCode_MUL_SUB_F64,
IntCode_MULSUB_V128, IntCode_MUL_SUB_V128,
}; };
return DispatchToC(ctx, i, fns[i->dest->type]); return DispatchToC(ctx, i, fns[i->dest->type]);
} }
@ -3273,10 +3468,10 @@ static const TranslateFn dispatch_table[] = {
Translate_ADD_CARRY, Translate_ADD_CARRY,
Translate_SUB, Translate_SUB,
Translate_MUL, Translate_MUL,
Translate_MUL_HI,
Translate_DIV, Translate_DIV,
TranslateInvalid, //Translate_REM, Translate_MUL_ADD,
Translate_MULADD, Translate_MUL_SUB,
Translate_MULSUB,
Translate_NEG, Translate_NEG,
Translate_ABS, Translate_ABS,
Translate_SQRT, Translate_SQRT,

View File

@ -448,19 +448,13 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) {
return true; return true;
}); });
table->AddSequence(OPCODE_REM, [](LIRBuilder& lb, Instr*& instr) { table->AddSequence(OPCODE_MUL_ADD, [](LIRBuilder& lb, Instr*& instr) {
// TODO // TODO
instr = instr->next; instr = instr->next;
return true; return true;
}); });
table->AddSequence(OPCODE_MULADD, [](LIRBuilder& lb, Instr*& instr) { table->AddSequence(OPCODE_MUL_SUB, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next;
return true;
});
table->AddSequence(OPCODE_MULSUB, [](LIRBuilder& lb, Instr*& instr) {
// TODO // TODO
instr = instr->next; instr = instr->next;
return true; return true;

View File

@ -209,15 +209,8 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
i->Remove(); i->Remove();
} }
break; break;
case OPCODE_REM: // case OPCODE_MUL_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { // case OPCODE_MUL_SUB
v->set_from(i->src1.value);
v->Rem(i->src2.value);
i->Remove();
}
break;
// case OPCODE_MULADD:
// case OPCODE_MULSUB
case OPCODE_NEG: case OPCODE_NEG:
if (i->src1.value->IsConstant()) { if (i->src1.value->IsConstant()) {
v->set_from(i->src1.value); v->set_from(i->src1.value);

View File

@ -212,8 +212,8 @@ XEEMITTER(divdux, 0x7C000392, XO )(PPCHIRBuilder& f, InstrData& i) {
// TODO(benvanik): check if zero // TODO(benvanik): check if zero
// if OE=1, set XER[OV] = 1 // if OE=1, set XER[OV] = 1
// else skip the divide // else skip the divide
Value* v = f.Div(f.LoadGPR(i.XO.RA), divisor); Value* v = f.Div(f.LoadGPR(i.XO.RA), divisor, ARITHMETIC_UNSIGNED);
f.StoreGPR(i.XO.RT, v); f.StoreGPR(i.XO.RT, v);
if (i.XO.OE) { if (i.XO.OE) {
// If we are OE=1 we need to clear the overflow bit. // If we are OE=1 we need to clear the overflow bit.
//e.update_xer_with_overflow(e.get_uint64(0)); //e.update_xer_with_overflow(e.get_uint64(0));
@ -240,7 +240,7 @@ XEEMITTER(divwx, 0x7C0003D6, XO )(PPCHIRBuilder& f, InstrData& i) {
// if OE=1, set XER[OV] = 1 // if OE=1, set XER[OV] = 1
// else skip the divide // else skip the divide
Value* v = f.Div(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), divisor); Value* v = f.Div(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), divisor);
v = f.ZeroExtend(v, INT64_TYPE); v = f.SignExtend(v, INT64_TYPE);
f.StoreGPR(i.XO.RT, v); f.StoreGPR(i.XO.RT, v);
if (i.XO.OE) { if (i.XO.OE) {
// If we are OE=1 we need to clear the overflow bit. // If we are OE=1 we need to clear the overflow bit.
@ -267,7 +267,8 @@ XEEMITTER(divwux, 0x7C000396, XO )(PPCHIRBuilder& f, InstrData& i) {
// TODO(benvanik): check if zero // TODO(benvanik): check if zero
// if OE=1, set XER[OV] = 1 // if OE=1, set XER[OV] = 1
// else skip the divide // else skip the divide
Value* v = f.Div(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), divisor); Value* v = f.Div(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), divisor,
ARITHMETIC_UNSIGNED);
v = f.ZeroExtend(v, INT64_TYPE); v = f.ZeroExtend(v, INT64_TYPE);
f.StoreGPR(i.XO.RT, v); f.StoreGPR(i.XO.RT, v);
if (i.XO.OE) { if (i.XO.OE) {
@ -283,13 +284,34 @@ XEEMITTER(divwux, 0x7C000396, XO )(PPCHIRBuilder& f, InstrData& i) {
} }
XEEMITTER(mulhdx, 0x7C000092, XO )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(mulhdx, 0x7C000092, XO )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); // RT <- ((RA) × (RB) as 128)[0:63]
return 1; if (i.XO.OE) {
// With XER update.
XEINSTRNOTIMPLEMENTED();
return 1;
}
Value* v = f.MulHi(f.LoadGPR(i.XO.RA), f.LoadGPR(i.XO.RB));
f.StoreGPR(i.XO.RT, v);
if (i.XO.Rc) {
f.UpdateCR(0, v);
}
return 0;
} }
XEEMITTER(mulhdux, 0x7C000012, XO )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(mulhdux, 0x7C000012, XO )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); // RT <- ((RA) × (RB) as 128)[0:63]
return 1; if (i.XO.OE) {
// With XER update.
XEINSTRNOTIMPLEMENTED();
return 1;
}
Value* v = f.MulHi(
f.LoadGPR(i.XO.RA), f.LoadGPR(i.XO.RB), ARITHMETIC_UNSIGNED);
f.StoreGPR(i.XO.RT, v);
if (i.XO.Rc) {
f.UpdateCR(0, v);
}
return 0;
} }
XEEMITTER(mulhwx, 0x7C000096, XO )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(mulhwx, 0x7C000096, XO )(PPCHIRBuilder& f, InstrData& i) {
@ -299,10 +321,9 @@ XEEMITTER(mulhwx, 0x7C000096, XO )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); XEINSTRNOTIMPLEMENTED();
return 1; return 1;
} }
Value* v = f.Mul( Value* v = f.SignExtend(f.MulHi(
f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), INT64_TYPE), f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE),
f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE), INT64_TYPE)); f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE)), INT64_TYPE);
v = f.Shr(v, 32);
f.StoreGPR(i.XO.RT, v); f.StoreGPR(i.XO.RT, v);
if (i.XO.Rc) { if (i.XO.Rc) {
f.UpdateCR(0, v); f.UpdateCR(0, v);
@ -317,10 +338,10 @@ XEEMITTER(mulhwux, 0x7C000016, XO )(PPCHIRBuilder& f, InstrData& i) {
XEINSTRNOTIMPLEMENTED(); XEINSTRNOTIMPLEMENTED();
return 1; return 1;
} }
Value* v = f.Mul( Value* v = f.ZeroExtend(f.MulHi(
f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), INT64_TYPE), f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE),
f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE), INT64_TYPE)); f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE),
v = f.Shr(v, 32); ARITHMETIC_UNSIGNED), INT64_TYPE);
f.StoreGPR(i.XO.RT, v); f.StoreGPR(i.XO.RT, v);
if (i.XO.Rc) { if (i.XO.Rc) {
f.UpdateCR(0, v, false); f.UpdateCR(0, v, false);

View File

@ -1145,11 +1145,12 @@ Value* HIRBuilder::Sub(
return i->dest; return i->dest;
} }
Value* HIRBuilder::Mul(Value* value1, Value* value2) { Value* HIRBuilder::Mul(
Value* value1, Value* value2, uint32_t arithmetic_flags) {
ASSERT_TYPES_EQUAL(value1, value2); ASSERT_TYPES_EQUAL(value1, value2);
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_MUL_info, 0, OPCODE_MUL_info, arithmetic_flags,
AllocValue(value1->type)); AllocValue(value1->type));
i->set_src1(value1); i->set_src1(value1);
i->set_src2(value2); i->set_src2(value2);
@ -1157,11 +1158,12 @@ Value* HIRBuilder::Mul(Value* value1, Value* value2) {
return i->dest; return i->dest;
} }
Value* HIRBuilder::Div(Value* value1, Value* value2) { Value* HIRBuilder::MulHi(
Value* value1, Value* value2, uint32_t arithmetic_flags) {
ASSERT_TYPES_EQUAL(value1, value2); ASSERT_TYPES_EQUAL(value1, value2);
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_DIV_info, 0, OPCODE_MUL_HI_info, arithmetic_flags,
AllocValue(value1->type)); AllocValue(value1->type));
i->set_src1(value1); i->set_src1(value1);
i->set_src2(value2); i->set_src2(value2);
@ -1169,11 +1171,12 @@ Value* HIRBuilder::Div(Value* value1, Value* value2) {
return i->dest; return i->dest;
} }
Value* HIRBuilder::Rem(Value* value1, Value* value2) { Value* HIRBuilder::Div(
Value* value1, Value* value2, uint32_t arithmetic_flags) {
ASSERT_TYPES_EQUAL(value1, value2); ASSERT_TYPES_EQUAL(value1, value2);
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_REM_info, 0, OPCODE_DIV_info, arithmetic_flags,
AllocValue(value1->type)); AllocValue(value1->type));
i->set_src1(value1); i->set_src1(value1);
i->set_src2(value2); i->set_src2(value2);
@ -1194,7 +1197,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
} }
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_MULADD_info, 0, OPCODE_MUL_ADD_info, 0,
AllocValue(value1->type)); AllocValue(value1->type));
i->set_src1(value1); i->set_src1(value1);
i->set_src2(value2); i->set_src2(value2);
@ -1215,7 +1218,7 @@ Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) {
} }
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_MULSUB_info, 0, OPCODE_MUL_SUB_info, 0,
AllocValue(value1->type)); AllocValue(value1->type));
i->set_src1(value1); i->set_src1(value1);
i->set_src2(value2); i->set_src2(value2);

View File

@ -152,9 +152,9 @@ public:
uint32_t arithmetic_flags = 0); uint32_t arithmetic_flags = 0);
Value* Sub(Value* value1, Value* value2, Value* Sub(Value* value1, Value* value2,
uint32_t arithmetic_flags = 0); uint32_t arithmetic_flags = 0);
Value* Mul(Value* value1, Value* value2); Value* Mul(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
Value* Div(Value* value1, Value* value2); Value* MulHi(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
Value* Rem(Value* value1, Value* value2); Value* Div(Value* value1, Value* value2, uint32_t arithmetic_flags = 0);
Value* MulAdd(Value* value1, Value* value2, Value* value3); // (1 * 2) + 3 Value* MulAdd(Value* value1, Value* value2, Value* value3); // (1 * 2) + 3
Value* MulSub(Value* value1, Value* value2, Value* value3); // (1 * 2) - 3 Value* MulSub(Value* value1, Value* value2, Value* value3); // (1 * 2) - 3
Value* Neg(Value* value); Value* Neg(Value* value);

View File

@ -47,6 +47,7 @@ enum PrefetchFlags {
}; };
enum ArithmeticFlags { enum ArithmeticFlags {
ARITHMETIC_SET_CARRY = (1 << 1), ARITHMETIC_SET_CARRY = (1 << 1),
ARITHMETIC_UNSIGNED = (1 << 2),
}; };
enum Permutes { enum Permutes {
PERMUTE_XY_ZW = 0x00010405, PERMUTE_XY_ZW = 0x00010405,
@ -134,10 +135,10 @@ enum Opcode {
OPCODE_ADD_CARRY, OPCODE_ADD_CARRY,
OPCODE_SUB, OPCODE_SUB,
OPCODE_MUL, OPCODE_MUL,
OPCODE_MUL_HI, // TODO(benvanik): remove this and add INT128 type.
OPCODE_DIV, OPCODE_DIV,
OPCODE_REM, OPCODE_MUL_ADD,
OPCODE_MULADD, OPCODE_MUL_SUB,
OPCODE_MULSUB,
OPCODE_NEG, OPCODE_NEG,
OPCODE_ABS, OPCODE_ABS,
OPCODE_SQRT, OPCODE_SQRT,

View File

@ -343,6 +343,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE); OPCODE_FLAG_COMMUNATIVE);
DEFINE_OPCODE(
OPCODE_MUL_HI,
"mul_hi",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_COMMUNATIVE);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DIV, OPCODE_DIV,
"div", "div",
@ -350,19 +356,13 @@ DEFINE_OPCODE(
0); 0);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_REM, OPCODE_MUL_ADD,
"rem",
OPCODE_SIG_V_V_V,
0);
DEFINE_OPCODE(
OPCODE_MULADD,
"mul_add", "mul_add",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MULSUB, OPCODE_MUL_SUB,
"mul_sub", "mul_sub",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0); 0);

View File

@ -278,11 +278,6 @@ void Value::Div(Value* other) {
} }
} }
void Value::Rem(Value* other) {
// TODO(benvanik): big matrix.
XEASSERTALWAYS();
}
void Value::MulAdd(Value* dest, Value* value1, Value* value2, Value* value3) { void Value::MulAdd(Value* dest, Value* value1, Value* value2, Value* value3) {
// TODO(benvanik): big matrix. // TODO(benvanik): big matrix.
XEASSERTALWAYS(); XEASSERTALWAYS();

View File

@ -185,7 +185,6 @@ public:
void Sub(Value* other); void Sub(Value* other);
void Mul(Value* other); void Mul(Value* other);
void Div(Value* other); void Div(Value* other);
void Rem(Value* other);
static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3); static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3);
static void MulSub(Value* dest, Value* value1, Value* value2, Value* value3); static void MulSub(Value* dest, Value* value1, Value* value2, Value* value3);
void Neg(); void Neg();