From d1528e24bb27c0f6577ebea29d9f4e610068b28a Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 6 Jan 2014 22:17:49 -0800 Subject: [PATCH] Removing REM, adding MUL_HI, renaming MULADD/MULSUB. --- src/alloy/backend/ivm/ivm_intcode.cc | 301 +++++++++++++++--- .../x64/lowering/lowering_sequences.cc | 10 +- .../passes/constant_propagation_pass.cc | 11 +- src/alloy/frontend/ppc/ppc_emit_alu.cc | 53 ++- src/alloy/hir/hir_builder.cc | 19 +- src/alloy/hir/hir_builder.h | 6 +- src/alloy/hir/opcodes.h | 7 +- src/alloy/hir/opcodes.inl | 16 +- src/alloy/hir/value.cc | 5 - src/alloy/hir/value.h | 1 - 10 files changed, 315 insertions(+), 114 deletions(-) diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 277819185..923cdb621 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -2204,6 +2204,22 @@ uint32_t IntCode_MUL_V128_V128(IntCodeState& ics, const IntCode* i) { } return IA_NEXT; } +uint32_t IntCode_MUL_I8_I8_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u8 = ics.rf[i->src1_reg].u8 * ics.rf[i->src2_reg].u8; + return IA_NEXT; +} +uint32_t IntCode_MUL_I16_I16_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u16 = ics.rf[i->src1_reg].u16 * ics.rf[i->src2_reg].u16; + return IA_NEXT; +} +uint32_t IntCode_MUL_I32_I32_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u32 = ics.rf[i->src1_reg].u32 * ics.rf[i->src2_reg].u32; + return IA_NEXT; +} +uint32_t IntCode_MUL_I64_I64_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u64 = ics.rf[i->src1_reg].u64 * ics.rf[i->src2_reg].u64; + return IA_NEXT; +} int Translate_MUL(TranslationContext& ctx, Instr* i) { static IntCodeFn fns[] = { IntCode_MUL_I8_I8, @@ -2214,34 +2230,184 @@ int Translate_MUL(TranslationContext& ctx, Instr* i) { IntCode_MUL_F64_F64, IntCode_MUL_V128_V128, }; - return DispatchToC(ctx, i, fns[i->dest->type]); + static IntCodeFn fns_unsigned[] = { + IntCode_MUL_I8_I8_U, + IntCode_MUL_I16_I16_U, + IntCode_MUL_I32_I32_U, + IntCode_MUL_I64_I64_U, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + }; + if (i->flags & ARITHMETIC_UNSIGNED) { + return DispatchToC(ctx, i, fns_unsigned[i->dest->type]); + } else { + return DispatchToC(ctx, i, fns[i->dest->type]); + } } -uint32_t IntCode_DIV_I8(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].u8 / ics.rf[i->src2_reg].u8; +namespace { +uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, + uint64_t yi_low, uint64_t yi_high) { + // 128bit multiply, simplified for two input 64bit integers. + // http://mrob.com/pub/math/int128.c.txt +#define HI_WORD 0xFFFFFFFF00000000LL +#define LO_WORD 0x00000000FFFFFFFFLL + uint64_t d = xi_low & LO_WORD; + uint64_t c = (xi_low & HI_WORD) >> 32LL; + uint64_t b = xi_high & LO_WORD; + uint64_t a = (xi_high & HI_WORD) >> 32LL; + uint64_t h = yi_low & LO_WORD; + uint64_t g = (yi_low & HI_WORD) >> 32LL; + uint64_t f = yi_high & LO_WORD; + uint64_t e = (yi_high & HI_WORD) >> 32LL; + uint64_t acc = d * h; + uint64_t o1 = acc & LO_WORD; + acc >>= 32LL; + uint64_t carry = 0; + + uint64_t ac2 = acc + c * h; if (ac2 < acc) { carry++; } + acc = ac2 + d * g; if (acc < ac2) { carry++; } + uint64_t rv2_lo = o1 | (acc << 32LL); + ac2 = (acc >> 32LL) | (carry << 32LL); carry = 0; + + acc = ac2 + b * h; if (acc < ac2) { carry++; } + ac2 = acc + c * g; if (ac2 < acc) { carry++; } + acc = ac2 + d * f; if (acc < ac2) { carry++; } + uint64_t o2 = acc & LO_WORD; + ac2 = (acc >> 32LL) | (carry << 32LL); + + acc = ac2 + a * h; + ac2 = acc + b * g; + acc = ac2 + c * f; + ac2 = acc + d * e; + uint64_t rv2_hi = (ac2 << 32LL) | o2; + + return rv2_hi; +} +} + +uint32_t IntCode_MUL_HI_I8_I8(IntCodeState& ics, const IntCode* i) { + int16_t v = + (int16_t)ics.rf[i->src1_reg].i8 * (int16_t)ics.rf[i->src2_reg].i8; + ics.rf[i->dest_reg].i8 = (v >> 8); return IA_NEXT; } -uint32_t IntCode_DIV_I16(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].u16 / ics.rf[i->src2_reg].u16; +uint32_t IntCode_MUL_HI_I16_I16(IntCodeState& ics, const IntCode* i) { + int32_t v = + (int32_t)ics.rf[i->src1_reg].i16 * (int32_t)ics.rf[i->src2_reg].i16; + ics.rf[i->dest_reg].i16 = (v >> 16); return IA_NEXT; } -uint32_t IntCode_DIV_I32(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].u32 / ics.rf[i->src2_reg].u32; +uint32_t IntCode_MUL_HI_I32_I32(IntCodeState& ics, const IntCode* i) { + int64_t v = + (int64_t)ics.rf[i->src1_reg].i32 * (int64_t)ics.rf[i->src2_reg].i32; + ics.rf[i->dest_reg].i32 = (v >> 32); return IA_NEXT; } -uint32_t IntCode_DIV_I64(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].u64 / ics.rf[i->src2_reg].u64; +uint32_t IntCode_MUL_HI_I64_I64(IntCodeState& ics, const IntCode* i) { +#if !XE_COMPILER(MSVC) + // GCC can, in theory, do this: + __int128 v = + (__int128)ics.rf[i->src1_reg].i64 * (__int128)ics.rf[i->src2_reg].i64; + ics.rf[i->dest_reg].i64 = (v >> 64); +#else + // 128bit multiply, simplified for two input 64bit integers. + // http://mrob.com/pub/math/int128.c.txt + int64_t xi_low = ics.rf[i->src1_reg].i64; + int64_t xi_high = xi_low < 0 ? -1 : 0; + int64_t yi_low = ics.rf[i->src2_reg].i64; + int64_t yi_high = yi_low < 0 ? -1 : 0; + ics.rf[i->dest_reg].i64 = Mul128(xi_low, xi_high, yi_low, yi_high); +#endif // !MSVC return IA_NEXT; } -uint32_t IntCode_DIV_F32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_HI_I8_I8_U(IntCodeState& ics, const IntCode* i) { + uint16_t v = + (uint16_t)ics.rf[i->src1_reg].u8 * (uint16_t)ics.rf[i->src2_reg].u8; + ics.rf[i->dest_reg].u8 = (v >> 8); + return IA_NEXT; +} +uint32_t IntCode_MUL_HI_I16_I16_U(IntCodeState& ics, const IntCode* i) { + uint32_t v = + (uint32_t)ics.rf[i->src1_reg].u16 * (uint32_t)ics.rf[i->src2_reg].u16; + ics.rf[i->dest_reg].u16 = (v >> 16); + return IA_NEXT; +} +uint32_t IntCode_MUL_HI_I32_I32_U(IntCodeState& ics, const IntCode* i) { + uint64_t v = + (uint64_t)ics.rf[i->src1_reg].u32 * (uint64_t)ics.rf[i->src2_reg].u32; + ics.rf[i->dest_reg].u32 = (v >> 32); + return IA_NEXT; +} +uint32_t IntCode_MUL_HI_I64_I64_U(IntCodeState& ics, const IntCode* i) { +#if !XE_COMPILER(MSVC) + // GCC can, in theory, do this: + __int128 v = + (__int128)ics.rf[i->src1_reg].i64 * (__int128)ics.rf[i->src2_reg].i64; + ics.rf[i->dest_reg].i64 = (v >> 64); +#else + // 128bit multiply, simplified for two input 64bit integers. + // http://mrob.com/pub/math/int128.c.txt + int64_t xi_low = ics.rf[i->src1_reg].i64; + int64_t xi_high = 0; + int64_t yi_low = ics.rf[i->src2_reg].i64; + int64_t yi_high = 0; + ics.rf[i->dest_reg].i64 = Mul128(xi_low, xi_high, yi_low, yi_high); +#endif // !MSVC + return IA_NEXT; +} +int Translate_MUL_HI(TranslationContext& ctx, Instr* i) { + static IntCodeFn fns[] = { + IntCode_MUL_HI_I8_I8, + IntCode_MUL_HI_I16_I16, + IntCode_MUL_HI_I32_I32, + IntCode_MUL_HI_I64_I64, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + }; + static IntCodeFn fns_unsigned[] = { + IntCode_MUL_HI_I8_I8_U, + IntCode_MUL_HI_I16_I16_U, + IntCode_MUL_HI_I32_I32_U, + IntCode_MUL_HI_I64_I64_U, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + }; + if (i->flags & ARITHMETIC_UNSIGNED) { + return DispatchToC(ctx, i, fns_unsigned[i->dest->type]); + } else { + return DispatchToC(ctx, i, fns[i->dest->type]); + } +} + +uint32_t IntCode_DIV_I8_I8(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 / ics.rf[i->src2_reg].i8; + return IA_NEXT; +} +uint32_t IntCode_DIV_I16_I16(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 / ics.rf[i->src2_reg].i16; + return IA_NEXT; +} +uint32_t IntCode_DIV_I32_I32(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 / ics.rf[i->src2_reg].i32; + return IA_NEXT; +} +uint32_t IntCode_DIV_I64_I64(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 / ics.rf[i->src2_reg].i64; + return IA_NEXT; +} +uint32_t IntCode_DIV_F32_F32(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 / ics.rf[i->src2_reg].f32; return IA_NEXT; } -uint32_t IntCode_DIV_F64(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_DIV_F64_F64(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 / ics.rf[i->src2_reg].f64; return IA_NEXT; } -uint32_t IntCode_DIV_V128(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_DIV_V128_V128(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src2 = ics.rf[i->src2_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128; @@ -2250,45 +2416,74 @@ uint32_t IntCode_DIV_V128(IntCodeState& ics, const IntCode* i) { } return IA_NEXT; } +uint32_t IntCode_DIV_I8_I8_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u8 = ics.rf[i->src1_reg].u8 / ics.rf[i->src2_reg].u8; + return IA_NEXT; +} +uint32_t IntCode_DIV_I16_I16_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u16 = ics.rf[i->src1_reg].u16 / ics.rf[i->src2_reg].u16; + return IA_NEXT; +} +uint32_t IntCode_DIV_I32_I32_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u32 = ics.rf[i->src1_reg].u32 / ics.rf[i->src2_reg].u32; + return IA_NEXT; +} +uint32_t IntCode_DIV_I64_I64_U(IntCodeState& ics, const IntCode* i) { + ics.rf[i->dest_reg].u64 = ics.rf[i->src1_reg].u64 / ics.rf[i->src2_reg].u64; + return IA_NEXT; +} int Translate_DIV(TranslationContext& ctx, Instr* i) { static IntCodeFn fns[] = { - IntCode_DIV_I8, - IntCode_DIV_I16, - IntCode_DIV_I32, - IntCode_DIV_I64, - IntCode_DIV_F32, - IntCode_DIV_F64, - IntCode_DIV_V128, + IntCode_DIV_I8_I8, + IntCode_DIV_I16_I16, + IntCode_DIV_I32_I32, + IntCode_DIV_I64_I64, + IntCode_DIV_F32_F32, + IntCode_DIV_F64_F64, + IntCode_DIV_V128_V128, }; - return DispatchToC(ctx, i, fns[i->dest->type]); + static IntCodeFn fns_unsigned[] = { + IntCode_DIV_I8_I8_U, + IntCode_DIV_I16_I16_U, + IntCode_DIV_I32_I32_U, + IntCode_DIV_I64_I64_U, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + }; + if (i->flags & ARITHMETIC_UNSIGNED) { + return DispatchToC(ctx, i, fns_unsigned[i->dest->type]); + } else { + return DispatchToC(ctx, i, fns[i->dest->type]); + } } // TODO(benvanik): use intrinsics or something -uint32_t IntCode_MULADD_I8(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_ADD_I8(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8 + ics.rf[i->src3_reg].i8; return IA_NEXT; } -uint32_t IntCode_MULADD_I16(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_ADD_I16(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 * ics.rf[i->src2_reg].i16 + ics.rf[i->src3_reg].i16; return IA_NEXT; } -uint32_t IntCode_MULADD_I32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_ADD_I32(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 * ics.rf[i->src2_reg].i32 + ics.rf[i->src3_reg].i32; return IA_NEXT; } -uint32_t IntCode_MULADD_I64(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_ADD_I64(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 * ics.rf[i->src2_reg].i64 + ics.rf[i->src3_reg].i64; return IA_NEXT; } -uint32_t IntCode_MULADD_F32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_ADD_F32(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 * ics.rf[i->src2_reg].f32 + ics.rf[i->src3_reg].f32; return IA_NEXT; } -uint32_t IntCode_MULADD_F64(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_ADD_F64(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 * ics.rf[i->src2_reg].f64 + ics.rf[i->src3_reg].f64; return IA_NEXT; } -uint32_t IntCode_MULADD_V128(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_ADD_V128(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src2 = ics.rf[i->src2_reg].v128; const vec128_t& src3 = ics.rf[i->src3_reg].v128; @@ -2298,45 +2493,45 @@ uint32_t IntCode_MULADD_V128(IntCodeState& ics, const IntCode* i) { } return IA_NEXT; } -int Translate_MULADD(TranslationContext& ctx, Instr* i) { +int Translate_MUL_ADD(TranslationContext& ctx, Instr* i) { static IntCodeFn fns[] = { - IntCode_MULADD_I8, - IntCode_MULADD_I16, - IntCode_MULADD_I32, - IntCode_MULADD_I64, - IntCode_MULADD_F32, - IntCode_MULADD_F64, - IntCode_MULADD_V128, + IntCode_MUL_ADD_I8, + IntCode_MUL_ADD_I16, + IntCode_MUL_ADD_I32, + IntCode_MUL_ADD_I64, + IntCode_MUL_ADD_F32, + IntCode_MUL_ADD_F64, + IntCode_MUL_ADD_V128, }; return DispatchToC(ctx, i, fns[i->dest->type]); } // TODO(benvanik): use intrinsics or something -uint32_t IntCode_MULSUB_I8(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_SUB_I8(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i8 = ics.rf[i->src1_reg].i8 * ics.rf[i->src2_reg].i8 - ics.rf[i->src3_reg].i8; return IA_NEXT; } -uint32_t IntCode_MULSUB_I16(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_SUB_I16(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i16 = ics.rf[i->src1_reg].i16 * ics.rf[i->src2_reg].i16 - ics.rf[i->src3_reg].i16; return IA_NEXT; } -uint32_t IntCode_MULSUB_I32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_SUB_I32(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i32 = ics.rf[i->src1_reg].i32 * ics.rf[i->src2_reg].i32 - ics.rf[i->src3_reg].i32; return IA_NEXT; } -uint32_t IntCode_MULSUB_I64(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_SUB_I64(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i64 = ics.rf[i->src1_reg].i64 * ics.rf[i->src2_reg].i64 - ics.rf[i->src3_reg].i64; return IA_NEXT; } -uint32_t IntCode_MULSUB_F32(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_SUB_F32(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f32 = ics.rf[i->src1_reg].f32 * ics.rf[i->src2_reg].f32 - ics.rf[i->src3_reg].f32; return IA_NEXT; } -uint32_t IntCode_MULSUB_F64(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_SUB_F64(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f64 = ics.rf[i->src1_reg].f64 * ics.rf[i->src2_reg].f64 - ics.rf[i->src3_reg].f64; return IA_NEXT; } -uint32_t IntCode_MULSUB_V128(IntCodeState& ics, const IntCode* i) { +uint32_t IntCode_MUL_SUB_V128(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; const vec128_t& src2 = ics.rf[i->src2_reg].v128; const vec128_t& src3 = ics.rf[i->src3_reg].v128; @@ -2346,15 +2541,15 @@ uint32_t IntCode_MULSUB_V128(IntCodeState& ics, const IntCode* i) { } return IA_NEXT; } -int Translate_MULSUB(TranslationContext& ctx, Instr* i) { +int Translate_MUL_SUB(TranslationContext& ctx, Instr* i) { static IntCodeFn fns[] = { - IntCode_MULSUB_I8, - IntCode_MULSUB_I16, - IntCode_MULSUB_I32, - IntCode_MULSUB_I64, - IntCode_MULSUB_F32, - IntCode_MULSUB_F64, - IntCode_MULSUB_V128, + IntCode_MUL_SUB_I8, + IntCode_MUL_SUB_I16, + IntCode_MUL_SUB_I32, + IntCode_MUL_SUB_I64, + IntCode_MUL_SUB_F32, + IntCode_MUL_SUB_F64, + IntCode_MUL_SUB_V128, }; return DispatchToC(ctx, i, fns[i->dest->type]); } @@ -3273,10 +3468,10 @@ static const TranslateFn dispatch_table[] = { Translate_ADD_CARRY, Translate_SUB, Translate_MUL, + Translate_MUL_HI, Translate_DIV, - TranslateInvalid, //Translate_REM, - Translate_MULADD, - Translate_MULSUB, + Translate_MUL_ADD, + Translate_MUL_SUB, Translate_NEG, Translate_ABS, Translate_SQRT, diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index ec215f84e..0419effa8 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -448,19 +448,13 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { return true; }); - table->AddSequence(OPCODE_REM, [](LIRBuilder& lb, Instr*& instr) { + table->AddSequence(OPCODE_MUL_ADD, [](LIRBuilder& lb, Instr*& instr) { // TODO instr = instr->next; return true; }); - table->AddSequence(OPCODE_MULADD, [](LIRBuilder& lb, Instr*& instr) { - // TODO - instr = instr->next; - return true; - }); - - table->AddSequence(OPCODE_MULSUB, [](LIRBuilder& lb, Instr*& instr) { + table->AddSequence(OPCODE_MUL_SUB, [](LIRBuilder& lb, Instr*& instr) { // TODO instr = instr->next; return true; diff --git a/src/alloy/compiler/passes/constant_propagation_pass.cc b/src/alloy/compiler/passes/constant_propagation_pass.cc index af00e1608..0bf269334 100644 --- a/src/alloy/compiler/passes/constant_propagation_pass.cc +++ b/src/alloy/compiler/passes/constant_propagation_pass.cc @@ -209,15 +209,8 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) { i->Remove(); } break; - case OPCODE_REM: - if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { - v->set_from(i->src1.value); - v->Rem(i->src2.value); - i->Remove(); - } - break; - // case OPCODE_MULADD: - // case OPCODE_MULSUB + // case OPCODE_MUL_ADD: + // case OPCODE_MUL_SUB case OPCODE_NEG: if (i->src1.value->IsConstant()) { v->set_from(i->src1.value); diff --git a/src/alloy/frontend/ppc/ppc_emit_alu.cc b/src/alloy/frontend/ppc/ppc_emit_alu.cc index 019b99b52..ed9fbdf38 100644 --- a/src/alloy/frontend/ppc/ppc_emit_alu.cc +++ b/src/alloy/frontend/ppc/ppc_emit_alu.cc @@ -212,8 +212,8 @@ XEEMITTER(divdux, 0x7C000392, XO )(PPCHIRBuilder& f, InstrData& i) { // TODO(benvanik): check if zero // if OE=1, set XER[OV] = 1 // else skip the divide - Value* v = f.Div(f.LoadGPR(i.XO.RA), divisor); -f.StoreGPR(i.XO.RT, v); + Value* v = f.Div(f.LoadGPR(i.XO.RA), divisor, ARITHMETIC_UNSIGNED); + f.StoreGPR(i.XO.RT, v); if (i.XO.OE) { // If we are OE=1 we need to clear the overflow bit. //e.update_xer_with_overflow(e.get_uint64(0)); @@ -240,7 +240,7 @@ XEEMITTER(divwx, 0x7C0003D6, XO )(PPCHIRBuilder& f, InstrData& i) { // if OE=1, set XER[OV] = 1 // else skip the divide Value* v = f.Div(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), divisor); - v = f.ZeroExtend(v, INT64_TYPE); + v = f.SignExtend(v, INT64_TYPE); f.StoreGPR(i.XO.RT, v); if (i.XO.OE) { // If we are OE=1 we need to clear the overflow bit. @@ -267,7 +267,8 @@ XEEMITTER(divwux, 0x7C000396, XO )(PPCHIRBuilder& f, InstrData& i) { // TODO(benvanik): check if zero // if OE=1, set XER[OV] = 1 // else skip the divide - Value* v = f.Div(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), divisor); + Value* v = f.Div(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), divisor, + ARITHMETIC_UNSIGNED); v = f.ZeroExtend(v, INT64_TYPE); f.StoreGPR(i.XO.RT, v); if (i.XO.OE) { @@ -283,13 +284,34 @@ XEEMITTER(divwux, 0x7C000396, XO )(PPCHIRBuilder& f, InstrData& i) { } XEEMITTER(mulhdx, 0x7C000092, XO )(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // RT <- ((RA) × (RB) as 128)[0:63] + if (i.XO.OE) { + // With XER update. + XEINSTRNOTIMPLEMENTED(); + return 1; + } + Value* v = f.MulHi(f.LoadGPR(i.XO.RA), f.LoadGPR(i.XO.RB)); + f.StoreGPR(i.XO.RT, v); + if (i.XO.Rc) { + f.UpdateCR(0, v); + } + return 0; } XEEMITTER(mulhdux, 0x7C000012, XO )(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // RT <- ((RA) × (RB) as 128)[0:63] + if (i.XO.OE) { + // With XER update. + XEINSTRNOTIMPLEMENTED(); + return 1; + } + Value* v = f.MulHi( + f.LoadGPR(i.XO.RA), f.LoadGPR(i.XO.RB), ARITHMETIC_UNSIGNED); + f.StoreGPR(i.XO.RT, v); + if (i.XO.Rc) { + f.UpdateCR(0, v); + } + return 0; } XEEMITTER(mulhwx, 0x7C000096, XO )(PPCHIRBuilder& f, InstrData& i) { @@ -299,10 +321,9 @@ XEEMITTER(mulhwx, 0x7C000096, XO )(PPCHIRBuilder& f, InstrData& i) { XEINSTRNOTIMPLEMENTED(); return 1; } - Value* v = f.Mul( - f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), INT64_TYPE), - f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE), INT64_TYPE)); - v = f.Shr(v, 32); + Value* v = f.SignExtend(f.MulHi( + f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), + f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE)), INT64_TYPE); f.StoreGPR(i.XO.RT, v); if (i.XO.Rc) { f.UpdateCR(0, v); @@ -317,10 +338,10 @@ XEEMITTER(mulhwux, 0x7C000016, XO )(PPCHIRBuilder& f, InstrData& i) { XEINSTRNOTIMPLEMENTED(); return 1; } - Value* v = f.Mul( - f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), INT64_TYPE), - f.ZeroExtend(f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE), INT64_TYPE)); - v = f.Shr(v, 32); + Value* v = f.ZeroExtend(f.MulHi( + f.Truncate(f.LoadGPR(i.XO.RA), INT32_TYPE), + f.Truncate(f.LoadGPR(i.XO.RB), INT32_TYPE), + ARITHMETIC_UNSIGNED), INT64_TYPE); f.StoreGPR(i.XO.RT, v); if (i.XO.Rc) { f.UpdateCR(0, v, false); diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index b31907438..861e70853 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -1145,11 +1145,12 @@ Value* HIRBuilder::Sub( return i->dest; } -Value* HIRBuilder::Mul(Value* value1, Value* value2) { +Value* HIRBuilder::Mul( + Value* value1, Value* value2, uint32_t arithmetic_flags) { ASSERT_TYPES_EQUAL(value1, value2); Instr* i = AppendInstr( - OPCODE_MUL_info, 0, + OPCODE_MUL_info, arithmetic_flags, AllocValue(value1->type)); i->set_src1(value1); i->set_src2(value2); @@ -1157,11 +1158,12 @@ Value* HIRBuilder::Mul(Value* value1, Value* value2) { return i->dest; } -Value* HIRBuilder::Div(Value* value1, Value* value2) { +Value* HIRBuilder::MulHi( + Value* value1, Value* value2, uint32_t arithmetic_flags) { ASSERT_TYPES_EQUAL(value1, value2); Instr* i = AppendInstr( - OPCODE_DIV_info, 0, + OPCODE_MUL_HI_info, arithmetic_flags, AllocValue(value1->type)); i->set_src1(value1); i->set_src2(value2); @@ -1169,11 +1171,12 @@ Value* HIRBuilder::Div(Value* value1, Value* value2) { return i->dest; } -Value* HIRBuilder::Rem(Value* value1, Value* value2) { +Value* HIRBuilder::Div( + Value* value1, Value* value2, uint32_t arithmetic_flags) { ASSERT_TYPES_EQUAL(value1, value2); Instr* i = AppendInstr( - OPCODE_REM_info, 0, + OPCODE_DIV_info, arithmetic_flags, AllocValue(value1->type)); i->set_src1(value1); i->set_src2(value2); @@ -1194,7 +1197,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) { } Instr* i = AppendInstr( - OPCODE_MULADD_info, 0, + OPCODE_MUL_ADD_info, 0, AllocValue(value1->type)); i->set_src1(value1); i->set_src2(value2); @@ -1215,7 +1218,7 @@ Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) { } Instr* i = AppendInstr( - OPCODE_MULSUB_info, 0, + OPCODE_MUL_SUB_info, 0, AllocValue(value1->type)); i->set_src1(value1); i->set_src2(value2); diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h index 117ff79f3..86dd94784 100644 --- a/src/alloy/hir/hir_builder.h +++ b/src/alloy/hir/hir_builder.h @@ -152,9 +152,9 @@ public: uint32_t arithmetic_flags = 0); Value* Sub(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); - Value* Mul(Value* value1, Value* value2); - Value* Div(Value* value1, Value* value2); - Value* Rem(Value* value1, Value* value2); + Value* Mul(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); + Value* MulHi(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); + Value* Div(Value* value1, Value* value2, uint32_t arithmetic_flags = 0); Value* MulAdd(Value* value1, Value* value2, Value* value3); // (1 * 2) + 3 Value* MulSub(Value* value1, Value* value2, Value* value3); // (1 * 2) - 3 Value* Neg(Value* value); diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index e6b0e3b54..abe7f3940 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -47,6 +47,7 @@ enum PrefetchFlags { }; enum ArithmeticFlags { ARITHMETIC_SET_CARRY = (1 << 1), + ARITHMETIC_UNSIGNED = (1 << 2), }; enum Permutes { PERMUTE_XY_ZW = 0x00010405, @@ -134,10 +135,10 @@ enum Opcode { OPCODE_ADD_CARRY, OPCODE_SUB, OPCODE_MUL, + OPCODE_MUL_HI, // TODO(benvanik): remove this and add INT128 type. OPCODE_DIV, - OPCODE_REM, - OPCODE_MULADD, - OPCODE_MULSUB, + OPCODE_MUL_ADD, + OPCODE_MUL_SUB, OPCODE_NEG, OPCODE_ABS, OPCODE_SQRT, diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index a1b56775e..ef47de819 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -343,6 +343,12 @@ DEFINE_OPCODE( OPCODE_SIG_V_V_V, OPCODE_FLAG_COMMUNATIVE); +DEFINE_OPCODE( + OPCODE_MUL_HI, + "mul_hi", + OPCODE_SIG_V_V_V, + OPCODE_FLAG_COMMUNATIVE); + DEFINE_OPCODE( OPCODE_DIV, "div", @@ -350,19 +356,13 @@ DEFINE_OPCODE( 0); DEFINE_OPCODE( - OPCODE_REM, - "rem", - OPCODE_SIG_V_V_V, - 0); - -DEFINE_OPCODE( - OPCODE_MULADD, + OPCODE_MUL_ADD, "mul_add", OPCODE_SIG_V_V_V_V, 0); DEFINE_OPCODE( - OPCODE_MULSUB, + OPCODE_MUL_SUB, "mul_sub", OPCODE_SIG_V_V_V_V, 0); diff --git a/src/alloy/hir/value.cc b/src/alloy/hir/value.cc index 88201ccf1..43d40d647 100644 --- a/src/alloy/hir/value.cc +++ b/src/alloy/hir/value.cc @@ -278,11 +278,6 @@ void Value::Div(Value* other) { } } -void Value::Rem(Value* other) { - // TODO(benvanik): big matrix. - XEASSERTALWAYS(); -} - void Value::MulAdd(Value* dest, Value* value1, Value* value2, Value* value3) { // TODO(benvanik): big matrix. XEASSERTALWAYS(); diff --git a/src/alloy/hir/value.h b/src/alloy/hir/value.h index 8bf9f0135..814fe081d 100644 --- a/src/alloy/hir/value.h +++ b/src/alloy/hir/value.h @@ -185,7 +185,6 @@ public: void Sub(Value* other); void Mul(Value* other); void Div(Value* other); - void Rem(Value* other); static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3); static void MulSub(Value* dest, Value* value1, Value* value2, Value* value3); void Neg();