From d948308e198c8792f0321d9a9bb01ad38df0cfba Mon Sep 17 00:00:00 2001 From: "chss95cs@gmail.com" Date: Tue, 14 Jan 2020 20:48:18 -0800 Subject: [PATCH] Added lea based multiply optimization for pow2+1 values below 16. Added magicgu2 division magic number optimization for INT_64 and INT_32. Added in my HIR optimization subpass but disabled all but 3 of the optimization rules. --- src/xenia/cpu/backend/x64/x64_sequences.cc | 143 ++- .../cpu/compiler/passes/peephole_rules_impl.h | 520 +++++++++++ .../repetitive_computation_merger_pass.cc | 857 ++++++++++++++++++ .../repetitive_computation_merger_pass.h | 36 + 4 files changed, 1546 insertions(+), 10 deletions(-) create mode 100644 src/xenia/cpu/compiler/passes/peephole_rules_impl.h create mode 100644 src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.cc create mode 100644 src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.h diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 6ae28b468..26a642612 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -1395,6 +1395,13 @@ struct MUL_I16 : Sequence> { }; struct MUL_I32 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src2.is_constant) { + uint32_t multiplier = i.src2.value->constant.u32; + if (multiplier == 3 || multiplier == 5 || multiplier == 9) { + e.lea(i.dest, e.ptr[i.src1.reg() * (multiplier - 1) + i.src1.reg()]); + return; + } + } if (e.IsFeatureEnabled(kX64EmitBMI2)) { // mulx: $1:$2 = EDX * $3 @@ -1437,6 +1444,14 @@ struct MUL_I32 : Sequence> { }; struct MUL_I64 : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src2.is_constant) { + uint64_t multiplier = i.src2.value->constant.u64; + if (multiplier == 3 || multiplier == 5 || multiplier == 9) { + e.lea(i.dest, + e.ptr[i.src1.reg() * ((int)multiplier - 1) + i.src1.reg()]); + return; + } + } if (e.IsFeatureEnabled(kX64EmitBMI2)) { // mulx: $1:$2 = RDX * $3 @@ -1694,7 +1709,62 @@ struct MUL_HI_I64 }; EMITTER_OPCODE_TABLE(OPCODE_MUL_HI, MUL_HI_I8, MUL_HI_I16, MUL_HI_I32, MUL_HI_I64); +/* from Hackers Delight - by Henry S. Warren Jr. Calculate magic number for + * unsigned division */ +template +auto magicu(T d) { + constexpr unsigned NBITS = sizeof(T) * CHAR_BIT; + constexpr unsigned NBITS_M1 = NBITS - 1; + constexpr T SIGNBIT = T(1) << NBITS_M1; + constexpr T POSMASK = ~SIGNBIT; + + struct mu { + T M; // Magic number, + int a; // "add" indicator, + int s; + }; // and shift amount. + + // Must have 1 <= d <= 2**32-1. + int p, gt = 0; + T nc, delta, q1, r1, q2, r2; + struct mu magu; + + magu.a = 0; // Initialize "add" indicator. + nc = -1 - ((T) - (std::make_signed_t)d) % d; // Unsigned arithmetic here. + p = NBITS_M1; // Init. p. + q1 = SIGNBIT / nc; // Init. q1 = 2**p/nc. + r1 = SIGNBIT - q1 * nc; // Init. r1 = rem(2**p, nc). + q2 = POSMASK / d; // Init. q2 = (2**p - 1)/d. + r2 = POSMASK - q2 * d; // Init. r2 = rem(2**p - 1, d). + do { + p = p + 1; + if (q1 >= SIGNBIT) gt = 1; // Means q1 > delta. + if (r1 >= nc - r1) { + q1 = 2 * q1 + 1; // Update q1. + r1 = 2 * r1 - nc; + } // Update r1. + else { + q1 = 2 * q1; + r1 = 2 * r1; + } + if (r2 + 1 >= d - r2) { + if (q2 >= POSMASK) magu.a = 1; + q2 = 2 * q2 + 1; // Update q2. + r2 = 2 * r2 + 1 - d; + } // Update r2. + else { + if (q2 >= SIGNBIT) magu.a = 1; + q2 = 2 * q2; + r2 = 2 * r2 + 1; + } + delta = d - 1 - r2; + } while (gt == 0 && (q1 < delta || (q1 == delta && r1 == 0))); + + magu.M = q2 + 1; // Magic number + magu.s = p - NBITS; // and shift amount to return + return magu; // (magu.a was set above). +} // ============================================================================ // OPCODE_DIV // ============================================================================ @@ -1797,13 +1867,40 @@ struct DIV_I32 : Sequence> { if (i.src2.is_constant) { assert_true(!i.src1.is_constant); - e.mov(e.ecx, i.src2.constant()); + if (i.instr->flags & ARITHMETIC_UNSIGNED) { - e.mov(e.eax, i.src1); - // Zero upper bits. - e.xor_(e.edx, e.edx); - e.div(e.ecx); + auto div_shift_info = magicu(i.src2.value->constant.u32); + // disabling addflag for now. the code for handling it is incorrect + if (!div_shift_info.a) { + e.mov(i.dest, i.src1); + if (e.IsFeatureEnabled(kX64EmitAVX2)) { + e.mov(e.edx, div_shift_info.M); + e.mulx(i.dest, i.dest, i.dest); + } else { + e.mov(e.eax, div_shift_info.M); + e.mul(i.dest); + e.mov(i.dest, e.edx); + } + if (div_shift_info.a) { + // pg 228, hackers delight, 2nd edition. add initial input to + // product, result may carry we need to handle this carry, treat + // result as 33 bits with carry as bit 33 + e.add(i.dest, i.src1); + e.rcr(i.dest, 1); + } + if (div_shift_info.s != 0) { + e.shr(i.dest, div_shift_info.s); + } + return; + } else { + e.mov(e.ecx, i.src2.constant()); + e.mov(e.eax, i.src1); + // Zero upper bits. + e.xor_(e.edx, e.edx); + e.div(e.ecx); + } } else { + e.mov(e.ecx, i.src2.constant()); e.mov(e.eax, i.src1); e.cdq(); // edx:eax = sign-extend eax e.idiv(e.ecx); @@ -1845,13 +1942,39 @@ struct DIV_I64 : Sequence> { if (i.src2.is_constant) { assert_true(!i.src1.is_constant); - e.mov(e.rcx, i.src2.constant()); if (i.instr->flags & ARITHMETIC_UNSIGNED) { - e.mov(e.rax, i.src1); - // Zero upper bits. - e.xor_(e.rdx, e.rdx); - e.div(e.rcx); + auto div_shift_info = magicu(i.src2.value->constant.u64); + // disabled addflag for now + if (!div_shift_info.a) { + if (e.IsFeatureEnabled(kX64EmitAVX2)) { + e.mov(e.rdx, div_shift_info.M); + e.mov(i.dest, i.src1); + e.mulx(i.dest, i.dest, i.dest); + } else { + e.mov(e.rax, div_shift_info.M); + e.mul(i.src1); + e.mov(i.dest, e.rdx); + } + if (div_shift_info.a) { + // pg 228, hackers delight, 2nd edition. add initial input to + // product, result may carry we need to handle this carry, treat + // result as 65 bits with carry as bit 65 + e.add(i.dest, i.src1); + e.rcr(i.dest, 1); + } + if (div_shift_info.s) { + e.shr(i.dest, div_shift_info.s); + } + return; + } else { + e.mov(e.rcx, i.src2.constant()); + e.mov(e.rax, i.src1); + // Zero upper bits. + e.xor_(e.rdx, e.rdx); + e.div(e.rcx); + } } else { + e.mov(e.rcx, i.src2.constant()); e.mov(e.rax, i.src1); e.cqo(); // rdx:rax = sign-extend rax e.idiv(e.rcx); diff --git a/src/xenia/cpu/compiler/passes/peephole_rules_impl.h b/src/xenia/cpu/compiler/passes/peephole_rules_impl.h new file mode 100644 index 000000000..0fdb6fcea --- /dev/null +++ b/src/xenia/cpu/compiler/passes/peephole_rules_impl.h @@ -0,0 +1,520 @@ +#pragma once + +static size_t n_cvt_opts = 0, n_vec_shift_opts = 0, + n_useless_ctx_store_opts = 0; + +static bool do_cvt_opt(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_CONVERT)) { + continue; + } + + /*if (insn->dest->type != FLOAT32_TYPE || + insn->src1.value->type != FLOAT64_TYPE) { + continue; + }*/ + + if (!is_rvalue(insn->dest) || !is_rvalue(insn->src1)) continue; + + Instr* target_next_use = find_next_use(insn->dest, block, insn); + + if (!target_next_use) { + continue; + } + + if (!is_op(target_next_use, OPCODE_CONVERT)) continue; + + if (target_next_use->src1.value != insn->dest) continue; + + if (target_next_use->dest->type != insn->src1.value->type || + !is_rvalue(target_next_use->dest)) + continue; + ++n_cvt_opts; + make_assignment(target_next_use, insn->src1); + + did_change = true; + } + return did_change; +} + +static bool do_vector_shift_opt(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_LOAD_VECTOR_SHR) && + !is_op(insn, OPCODE_LOAD_VECTOR_SHL)) { + continue; + } + + if (!is_rvalue(insn->dest) || !is_rvalue(insn->src1)) continue; + + for (Instr* nextu = find_next_use(insn->src1, block, insn); nextu; + nextu = find_next_use(insn->src1, block, nextu)) { + if (!nextu || nextu->opcode != insn->opcode) continue; + ++n_vec_shift_opts; + make_assignment(nextu, insn->dest); + did_change = true; + } + } + return did_change; +} +template +static bool elim_useless_ctx_stores(HIRBuilder* builder, Block* block) { + bool did_change = false; + + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_STORE_CONTEXT)) continue; + ppc_ctx_vset_t trace{}; + add_store_to_bitset(&trace, insn); + + if (creg_only && !has_any_creg(&trace)) { + continue; + } + contextuse_res_t redefed = USED; + auto next = + find_next_context_use(&trace, block, insn->next, &redefed); + + if (redefed == USED && !next) { + bool all_redef = true; + for (auto succ = block->outgoing_edge_head; succ; + succ = succ->outgoing_next) { + if (find_next_context_use(&trace, succ->dest, nullptr, + &redefed) && + redefed == REDEFED) { + } else { + all_redef = false; + break; + } + } + + if (!all_redef) { + continue; + } + + // continue; + } else if (redefed == INDET || redefed == USED) { + continue; + } + ++n_useless_ctx_store_opts; + + make_nop(insn); + did_change = true; + } + return did_change; +} +static uint64_t n_replaced_assignments = 0; + +static bool replace_assignment_uses(HIRBuilder* builder, Block* block) { + bool did_change = false; + + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_ASSIGN)) continue; + if (!is_rvalue(insn->dest) || !is_rvalue(insn->src1)) continue; + + replace_uses(builder, insn->dest, insn->src1.value); + n_replaced_assignments++; + + make_nop(insn); + did_change = true; + } + return did_change; +} +static uint64_t n_replaced_loads = 0; +static bool optimize_repeated_loads(HIRBuilder* builder, Block* block) { + bool did_change = false; + + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_LOAD)) { + continue; + } + + if (!is_rvalue(insn->src1)) continue; + + auto n = find_next_use(insn->src1, block, insn); + + if (!n) continue; + + if (!is_op(n, OPCODE_LOAD)) continue; + + if (insn->dest->type != n->dest->type) continue; + + if (has_any_intervening_store(insn, n)) continue; + + make_assignment(n, insn->dest); + n_replaced_loads++; + did_change = true; + } + return did_change; +} +static uint64_t n_and1s_optimized = 0; + +static bool and1_optimizer(HIRBuilder* builder, Block* block) { + bool did_change = false; + + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_IS_TRUE) && + !(is_op(insn, OPCODE_COMPARE_EQ) && is_const_x(insn->src2, 1ULL)) && + !(is_op(insn, OPCODE_COMPARE_NE) && is_const_x(insn->src2, 0ULL))) { + continue; + } + + auto def = insn->src1.value->def; + + if (def_is_onebit(def)) { + if (insn->src1.value->type == insn->dest->type) { + make_assignment(insn, insn->src1); + } else { + insn->Replace(&OPCODE_TRUNCATE_info, 0); + insn->set_src1(def->dest); + } + n_and1s_optimized++; + did_change = true; + } + } + return did_change; +} +static uint64_t n_trunc_exts = 0; + +static bool trunc_ext_optimizer(HIRBuilder* builder, Block* block) { + bool did_change = false; + + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_TRUNCATE)) continue; + + auto def = insn->src1.value->def; + + if (!is_op(def, OPCODE_ZERO_EXTEND) && !is_op(def, OPCODE_SIGN_EXTEND)) + continue; + + if (def->src1.value->type != insn->dest->type) continue; + + make_assignment(insn, def->src1); + ++n_trunc_exts; + did_change = true; + // __debugbreak(); + } + return did_change; +} +static uint64_t n_eliminated_unused_conversion_results = 0; +static bool unused_conversion_eliminator(HIRBuilder* builder, Block* block) { + bool did_change = false; + + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_elimable(insn)) continue; + + if (!insn->dest->use_head || insn->dest->use_head->instr == nullptr) { + make_nop(insn); + did_change = true; + n_eliminated_unused_conversion_results++; + } + } + return did_change; +} +static uint64_t n_useless_operations = 0; +static bool useless_operation_eliminator(HIRBuilder* builder, Block* block) { + bool did_change = false; + auto signal_change = [&did_change](auto insn, auto& src) { + make_assignment(insn, insn->src1); + n_useless_operations++; + did_change = true; + }; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (is_op(insn, OPCODE_ADD) || is_op(insn, OPCODE_SUB) || + is_op(insn, OPCODE_OR) || is_op(insn, OPCODE_XOR)) { + if (is_const_x(insn->src2, 0)) { + signal_change(insn, insn->src1); + + continue; + } + } + + if (is_op(insn, OPCODE_AND) && + is_const_x(insn->src2, mask_for_typename(insn->src1.value->type))) { + signal_change(insn, insn->src1); + continue; + } + + if ((is_op(insn, OPCODE_DIV) || is_op(insn, OPCODE_MUL)) && + is_const_x(insn->src2, 1ULL)) { + signal_change(insn, insn->src1); + continue; + } + } + return did_change; +} +static uint64_t n_rotates_lowered_to_shifts = 0; +static bool rlwinm_sanitizer(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_ROTATE_LEFT)) continue; + + if (!insn->src2.value->IsConstant()) continue; + + auto defed_value = insn->dest; + + auto nxtuse = get_solo_use(defed_value); + if (!nxtuse) continue; + + if (!is_op(nxtuse, OPCODE_AND)) continue; + + if (!nxtuse->src2.value->IsConstant()) continue; + + uint64_t v = extract_constant(nxtuse->src2.value); + + uint64_t rotamt = extract_constant(insn->src2.value); + + if ((v & ((1ULL << rotamt) - 1)) == 0ULL) { + // the bits rotated in to the left are discarded, can be shl + insn->opcode = &OPCODE_SHL_info; + n_rotates_lowered_to_shifts++; + did_change = true; + } + } + + return did_change; +} +static uint64_t n_useless_rlwinm_ors_removed = 0; +static bool rlwinm_sanitizer_useless_or(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_SHL) || !insn->src2.value->IsConstant()) continue; + + auto solo_use_shl_result = get_solo_use(insn->dest); + + if (!solo_use_shl_result) continue; + + if (!is_op(solo_use_shl_result, OPCODE_OR) || + solo_use_shl_result->src1.value != insn->dest) { + continue; + } + + auto solo_use_or_result = get_solo_use(solo_use_shl_result->dest); + if (!solo_use_or_result) continue; + + auto mask = mask_for_typename(insn->dest->type); + + mask <<= extract_constant(insn->src2.value); + + /* if (is_op(solo_use_or_result, OPCODE_TRUNCATE)) { + auto newmask = mask_for_typename(solo_use_or_result->dest->type); + if ((newmask & mask) == 0) { + // none of our shl bits survive + make_nop(insn); + make_assignment(solo_use_or_result, solo_use_shl_result->src2); + n_useless_rlwinm_ors_removed++; + did_change = true; + } + } + + else*/ + if (is_op(solo_use_or_result, OPCODE_AND) && + solo_use_or_result->src2.value->IsConstant()) { + auto newmask = extract_constant(solo_use_or_result->src2.value); + + if ((newmask & mask) == 0) { + // none of our shl bits survive + make_nop(insn); + auto newandoper = solo_use_shl_result->src2.value; + + // make_nop(solo_use_shl_result); + make_assignment(solo_use_shl_result, solo_use_shl_result->src2); + n_useless_rlwinm_ors_removed++; + did_change = true; + } + } + } + + return did_change; +} + +static uint64_t n_ssa_merges = 0; +static bool ssa_merger(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_ASSIGN)) continue; + + auto solo = get_solo_use(insn->dest); + if (!solo || !is_like_assign(solo)) continue; + + solo->set_src1(insn->src1.value); + + make_nop(insn); + ++n_ssa_merges; + + did_change = true; + } + return did_change; +} +static uint64_t n_signbit_rols_optimized = 0; +static bool signbit_rol_optimizer(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_ROTATE_LEFT) || !is_const_x(insn->src2, 1ULL)) + continue; + + auto solo_use = get_solo_use(insn->dest); + if (!solo_use) continue; + + if (!is_op(solo_use, OPCODE_AND) || !is_const_x(solo_use->src2, 1ULL)) + continue; + + Value* shift_constant = constant_for_type( + builder, highbit_for_typename(insn->dest->type), INT8_TYPE); + if (!shift_constant) continue; + + solo_use->Replace(&OPCODE_SHR_info, 0); + + solo_use->set_src1(insn->src1.value); + solo_use->set_src2(shift_constant); + make_nop(insn); + did_change = true; + n_signbit_rols_optimized++; + } + return did_change; +} + +static uint64_t n_rol_bitextracts_optimized = 0; +static bool rol_bitextract_optimizer(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_ROTATE_LEFT) || !insn->src2.value->IsConstant()) + continue; + + auto defuse = get_solo_use(insn->dest); + + if (!defuse) continue; + + if (!is_op(defuse, OPCODE_AND) || !is_const_x(defuse->src2, 1)) continue; + + unsigned bitsz = highbit_for_typename(insn->dest->type); + + uint64_t realtarget = + + (bitsz + 1) - extract_constant(insn->src2.value); + auto old_inp = insn->src1.value; + + Value* shrconst = constant_for_type(builder, realtarget, INT8_TYPE); + + if (!shrconst) continue; + insn->Replace(&OPCODE_SHR_info, 0); + + insn->set_src1(old_inp); + insn->set_src2(shrconst); + n_rol_bitextracts_optimized++; + did_change = true; + } + return did_change; +} + +static uint64_t n_shl_shr_masks_gen = 0; + +static bool shl_shr_mask_optimizer(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_SHL) || !insn->src2.value->IsConstant()) continue; + + auto defuse = get_solo_use(insn->dest); + + if (!defuse || !is_op(defuse, OPCODE_SHR) || + !insn->src2.value->IsConstantEQ(defuse->src2.value)) + continue; + + uint64_t mask = mask_for_typename(insn->src1.value->type); + uint64_t shfactor = extract_constant(insn->src2.value); + uint64_t msk = mask; + mask <<= shfactor; + mask >>= shfactor; + mask &= msk; + auto oldsrc1 = insn->src1.value; + + auto maskval = constant_for_type(builder, mask, oldsrc1->type); + if (!maskval) continue; + + make_nop(insn); + + defuse->Replace(&OPCODE_AND_info, 0); + defuse->set_src1(oldsrc1); + defuse->set_src2(maskval); + n_shl_shr_masks_gen++; + did_change = true; + } + return did_change; +} +static uint64_t n_redundant_loads_local = 0; +static bool redundant_local_slot_load_optimizer(HIRBuilder* builder, + Block* block) { + bool did_change = false; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (!is_op(insn, OPCODE_LOAD_LOCAL)) { + continue; + } + + auto next_load = find_next_local_load(insn->src1.value, block, insn); + + if (!next_load) continue; + + make_assignment(next_load, insn->dest); + + ++n_redundant_loads_local; + did_change = true; + } + return did_change; +} +/* +Re-enable these one by one or add flags to control them until each is proven to be stable + +*/ +#if 0 +static const optblock_pass_t g_passes[] = {do_cvt_opt, + do_vector_shift_opt, + elim_useless_ctx_stores, + replace_assignment_uses, + elim_useless_ctx_stores, + optimize_repeated_loads, + and1_optimizer, + trunc_ext_optimizer, + unused_conversion_eliminator, + useless_operation_eliminator, + rlwinm_sanitizer, + rlwinm_sanitizer_useless_or, + ssa_merger, + + signbit_rol_optimizer, + rol_bitextract_optimizer, + shl_shr_mask_optimizer, + redundant_local_slot_load_optimizer, + nop_deleter}; +void dump_opts() { + FILE* lel = fopen("Optdump.txt", "w"); + + fprintf( + lel, + "Optimized %lld converts\nOptimized %lld vector shifts\nOptimized %lld " + "context stores\nOptimized away %lld useless assignments\nOptimized " + "away " + "%lld redundant loads." + "\nOptimized away %lld and 1 instructions\nOptimized away %lld useless " + "truncate-extends\nEliminated %lld unused operation " + "results.\nOptimized away %lld no-op operations.\nLowered %lld left " + "rotates to left shifts.\nRemoved %lld useless rlwinm rotate-or " + "sequences.\nShortened %lld assignment chains\nOptimized %lld signbit " + "rotate lefts.\nOptimized %lld rol bitextracts.\nConverted %lld shift " + "sequences to bitmasks.\n Optimized %lld redundant local loads.", + n_cvt_opts, n_vec_shift_opts, n_useless_ctx_store_opts, + n_replaced_assignments, n_replaced_loads, n_and1s_optimized, n_trunc_exts, + n_eliminated_unused_conversion_results, n_useless_operations, + n_rotates_lowered_to_shifts, n_useless_rlwinm_ors_removed, n_ssa_merges, + n_signbit_rols_optimized, n_rol_bitextracts_optimized, + n_shl_shr_masks_gen, n_redundant_loads_local); + fclose(lel); +} +#else +static const optblock_pass_t g_passes[] = {rlwinm_sanitizer_useless_or, + rlwinm_sanitizer, + + nop_deleter, + rol_bitextract_optimizer, + signbit_rol_optimizer, + shl_shr_mask_optimizer +}; +#endif diff --git a/src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.cc b/src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.cc new file mode 100644 index 000000000..a4b52c903 --- /dev/null +++ b/src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.cc @@ -0,0 +1,857 @@ +#include "repetitive_computation_merger_pass.h" +#include +#include "xenia/cpu/ppc/ppc_context.h" +namespace xe { +namespace cpu { +namespace compiler { +namespace passes { +using namespace xe::cpu::hir; +static bool didit = false; +using xe::cpu::hir::HIRBuilder; +using xe::cpu::hir::Instr; +using xe::cpu::hir::Value; + +using ppc_ctx_vset_underlying_t = std::bitset; + +struct alignas(__m256i) ppc_ctx_vset_t : public ppc_ctx_vset_underlying_t { + constexpr ppc_ctx_vset_t() : ppc_ctx_vset_underlying_t(), pad(){}; + + char pad[sizeof(ppc_ctx_vset_underlying_t) % sizeof(__m256i)]; +}; + +#ifdef _MSC_VER + +#define add_ppc_offset(field) \ + __builtin_offsetof(ppc::PPCContext, field), \ + __builtin_offsetof(ppc::PPCContext, field) + 1, \ + __builtin_offsetof(ppc::PPCContext, field) + 2, \ + __builtin_offsetof(ppc::PPCContext, field) + 3 +#else +#define add_ppc_offset(field) offsetof(ppc::PPCContext, field) +#endif + +constexpr unsigned bitset_cregs[] = {add_ppc_offset(cr0), add_ppc_offset(cr1), + add_ppc_offset(cr2), add_ppc_offset(cr3), + add_ppc_offset(cr4), add_ppc_offset(cr5), + add_ppc_offset(cr6), add_ppc_offset(cr7)}; + +RepetitiveComputationMergerPass::RepetitiveComputationMergerPass() {} +RepetitiveComputationMergerPass::~RepetitiveComputationMergerPass() {} + +static Instr* create_instr(hir::HIRBuilder* builder, hir::Block* blk, + const hir::OpcodeInfo* opcode, unsigned flags, + Value* dest) { + Instr* instr = builder->arena()->Alloc(); + instr->next = NULL; + instr->prev = nullptr; + + instr->ordinal = UINT32_MAX; + instr->block = blk; + instr->opcode = opcode; + instr->flags = flags; + instr->dest = dest; + instr->src1.value = instr->src2.value = instr->src3.value = NULL; + instr->src1_use = instr->src2_use = instr->src3_use = NULL; + if (dest) { + dest->def = instr; + } + return instr; +} + +static uint64_t extract_constant(Value* v) { + switch (v->type) { + case INT8_TYPE: + return v->constant.u8; + case INT16_TYPE: + return v->constant.u16; + case INT32_TYPE: + return v->constant.u32; + case INT64_TYPE: + return v->constant.u64; + } + return 0; +} + +static Instr* find_next_use(Value* val, hir::Block* blk, Instr* after, + Instr* end = nullptr) { + if (!after || after->next == end) { + return nullptr; + } + + for (Instr* pos = after->next; pos != end; pos = pos->next) { + if (pos->opcode->flags & OPCODE_FLAG_VOLATILE) return nullptr; + if (pos->src1.value == val || pos->src2.value == val || + pos->src3.value == val) { + return pos; + } + } + return nullptr; +} + +static Instr* find_next_use(Instr::Op& val, hir::Block* blk, Instr* after, + Instr* end = nullptr) { + return find_next_use(val.value, blk, after, end); +} + +static Instr* get_solo_use(Value* val) { + if (!val || !val->use_head || val->use_head->next) { + return nullptr; + } + return val->use_head->instr; +} + +static void insert_after(Instr* i, Instr* after) { + after->prev = i; + after->next = i->next; + i->next = after; + if (after->next) { + after->next->prev = after; + } +} + +static bool is_before_in_block(Instr* first, Instr* second, hir::Block* blk) { + for (auto s = blk->instr_head; s; s = s->next) { + if (s == first) { + return true; + } else if (s == second) { + return false; + } + } + return false; +} + +static unsigned size_for_typename(TypeName type) { + switch (type) { + case INT8_TYPE: + return 1; + case INT16_TYPE: + return 2; + case INT32_TYPE: + return 4; + case INT64_TYPE: + return 8; + case FLOAT32_TYPE: + return 4; + case FLOAT64_TYPE: + return 8; + case VEC128_TYPE: + return 16; + } + xenia_assert(false); + return 0; +} + +static Value* constant_for_type(HIRBuilder* builder, uint64_t v, + TypeName type) { + switch (type) { + case INT8_TYPE: + return builder->LoadConstantUint8((uint8_t)v); + case INT16_TYPE: + return builder->LoadConstantUint16((uint16_t)v); + case INT32_TYPE: + return builder->LoadConstantUint32((uint32_t)v); + case INT64_TYPE: + return builder->LoadConstantUint64((uint64_t)v); + } + return nullptr; +} + +static unsigned bitsize_for_typename(TypeName type) { + return size_for_typename(type) * CHAR_BIT; +} + +static unsigned highbit_for_typename(TypeName type) { + return bitsize_for_typename(type) - 1; +} + +static uint64_t mask_for_typename(TypeName type) { + unsigned v = size_for_typename(type); + xenia_assert(v <= 8); + + return (1ULL << ((CHAR_BIT * v))) - 1; +} + +static bool is_const_x(Value* v, uint64_t val) { + return v->IsConstant() && extract_constant(v) == val; +} + +static bool is_const_x(const Instr::Op& v, uint64_t val) { + return is_const_x(v.value, val); +} + +static bool is_op(Instr* insn, Opcode opcode) { + return insn && insn->opcode && insn->opcode->num == opcode; +} + +static bool has_any_intervening_memop(Instr* start, Instr* end) { + for (auto pos = start->next; pos != end; pos = pos->next) { + if (pos->opcode->flags & OPCODE_FLAG_MEMORY) return true; + } + return false; +} +static bool has_any_intervening_store(Instr* start, Instr* end) { + for (auto pos = start->next; pos != end; pos = pos->next) { + if (is_op(pos, OPCODE_STORE)) return true; + } + return false; +} +static bool is_rvalue(const Value* v) { return v && v->local_slot == nullptr; } +static bool is_rvalue(const Instr::Op& v) { + return v.value && v.value->local_slot == nullptr; +} + +static void make_assignment(Instr* insn, Value* from) { + insn->Replace(&OPCODE_ASSIGN_info, 0); + insn->set_src1(from); +} + +static void make_assignment(Instr* insn, Instr::Op& from) { + make_assignment(insn, from.value); +} + +static void add_store_to_bitset(ppc_ctx_vset_t* out, Instr* insn) { + out->set(insn->src1.offset); +} + +static bool load_target_in_bitset(const ppc_ctx_vset_t* to_test, Instr* insn) { + if (!is_op(insn, OPCODE_LOAD_CONTEXT)) { + return false; + } + return to_test->test(insn->src1.offset); +} + +static bool store_target_in_bitset(const ppc_ctx_vset_t* to_test, Instr* insn) { + if (!is_op(insn, OPCODE_STORE_CONTEXT)) { + return false; + } + return to_test->test(insn->src1.offset); +} +static bool has_any_creg(const ppc_ctx_vset_t* b) { + for (auto&& bt : bitset_cregs) { + if (b->test(bt)) return true; + } + return false; +} +static Instr* find_next_local_store(Value* slot, hir::Block* blk, Instr* after, + Instr* end = nullptr) { + if (!after || after->next == end) { + return nullptr; + } + + for (Instr* pos = after->next; pos != end; pos = pos->next) { + if (is_op(pos, OPCODE_STORE_LOCAL) && pos->src1.value == slot) { + return pos; + } + } + + return nullptr; +} + +static bool is_like_assign(Instr* insn) { + return is_op(insn, OPCODE_ASSIGN) || is_op(insn, OPCODE_SIGN_EXTEND) || + is_op(insn, OPCODE_ZERO_EXTEND); +} + +static bool produces_boolean(Instr* i) { + constexpr Opcode opcodes[] = { + OPCODE_IS_FALSE, OPCODE_IS_TRUE, OPCODE_COMPARE_EQ, + OPCODE_COMPARE_NE, OPCODE_COMPARE_SLT, OPCODE_COMPARE_SLE, + OPCODE_COMPARE_SGT, OPCODE_COMPARE_SGE, OPCODE_COMPARE_ULT, + OPCODE_COMPARE_ULE, OPCODE_COMPARE_UGT, OPCODE_COMPARE_UGE, + OPCODE_DID_SATURATE, + + }; + for (auto&& op : opcodes) { + if (is_op(i, op)) return true; + } + return false; +} + +static void replace_uses(HIRBuilder* builder, Value* to_replace, Value* with) { + for (auto start = to_replace->use_head; start; start = start->next) { + auto i = start->instr; + + if (i->src1.value == to_replace) { + i->set_src1(with); + } + if (i->src2.value == to_replace) { + i->set_src2(with); + } + if (i->src3.value == to_replace) { + i->set_src3(with); + } + } +} + +static bool is_any_call(Instr* i) { + return is_op(i, OPCODE_CALL) || is_op(i, OPCODE_CALL_EXTERN) || + is_op(i, OPCODE_CALL_INDIRECT) || + is_op(i, OPCODE_CALL_INDIRECT_TRUE) || is_op(i, OPCODE_CALL_TRUE) || + is_op(i, OPCODE_DEBUG_BREAK) || is_op(i, OPCODE_DEBUG_BREAK_TRUE) || + is_op(i, OPCODE_RETURN); +} + +static void make_nop(Instr* i) { + i->Replace(&OPCODE_NOP_info, 0); + // i->Remove(); +} + +static Instr* find_next_local_load(Value* slot, hir::Block* blk, Instr* after, + Instr* end = nullptr) { + if (!after || after->next == end) { + return nullptr; + } + + for (Instr* pos = after->next; pos != end; pos = pos->next) { + if (is_op(pos, OPCODE_LOAD_LOCAL) && pos->src1.value == slot) { + return pos; + } else if (is_op(pos, OPCODE_STORE_LOCAL) && pos->src1.value == slot) { + return nullptr; // redefed b + } + } + + return nullptr; +} + +enum contextuse_res_t { USED, REDEFED, INDET }; + +template +static Instr* find_next_context_use(const ppc_ctx_vset_t* val, hir::Block* blk, + Instr* start, contextuse_res_t* redefed, + Instr* end = nullptr) { + if (!start) { + start = blk->instr_head; + } + *redefed = USED; + for (Instr* pos = start; pos != end; pos = pos->next) { + if (pos->opcode->flags & OPCODE_FLAG_VOLATILE) { + *redefed = INDET; + + return nullptr; + } + if (break_on_call && is_any_call(pos)) { + *redefed = REDEFED; + return nullptr; + } + if (load_target_in_bitset(val, pos)) { + return pos; + } else if (store_target_in_bitset(val, pos)) { + *redefed = REDEFED; + return nullptr; + } + } + return nullptr; +} + +static bool def_is_onebit(Instr* def) { + if (((is_op(def, OPCODE_AND) || is_op(def, OPCODE_XOR)) && + def->src2.value->IsConstant() && def->src2.value->constant.u64 == 1) || + produces_boolean(def)) { + return true; + } + + if (is_op(def, OPCODE_ASSIGN)) { + return def_is_onebit(def->src1.value->def); + } + + if (is_op(def, OPCODE_AND) || is_op(def, OPCODE_OR) || + is_op(def, OPCODE_XOR) || is_op(def, OPCODE_SHR) || + is_op(def, OPCODE_MUL)) { + return def_is_onebit(def->src1.value->def) && + def_is_onebit(def->src2.value->def); + } + + if (is_op(def, OPCODE_ZERO_EXTEND)) { + return def_is_onebit(def->src1.value->def); + } + + if (is_op(def, OPCODE_SIGN_EXTEND)) { + return def_is_onebit(def->src1.value->def); + } + + if (is_op(def, OPCODE_LOAD_CONTEXT) && def->dest->type == INT8_TYPE) { + ppc_ctx_vset_t bset{}; + + bset.set(def->src1.offset); + if (has_any_creg(&bset)) { + return true; + } + } + return false; +} + +static constexpr Opcode elimable_results[] = { + OPCODE_ADD, OPCODE_SUB, OPCODE_ASSIGN, + OPCODE_BYTE_SWAP, OPCODE_OR, OPCODE_XOR, + OPCODE_AND, OPCODE_NOT, OPCODE_NEG, + OPCODE_MUL, OPCODE_CONVERT, OPCODE_IS_FALSE, + OPCODE_IS_TRUE, OPCODE_COMPARE_EQ, OPCODE_COMPARE_NE, + OPCODE_COMPARE_SLT, OPCODE_COMPARE_SLE, OPCODE_COMPARE_SGT, + OPCODE_COMPARE_SGE, OPCODE_COMPARE_ULT, OPCODE_COMPARE_ULE, + OPCODE_COMPARE_UGT, OPCODE_COMPARE_UGE, OPCODE_DID_SATURATE, + OPCODE_DIV, OPCODE_SHL, OPCODE_SHR, + OPCODE_SHA, OPCODE_ABS, OPCODE_ADD_CARRY, + OPCODE_ZERO_EXTEND, OPCODE_SIGN_EXTEND, OPCODE_TRUNCATE, + OPCODE_CNTLZ, OPCODE_LOAD_LOCAL, OPCODE_LOAD, + OPCODE_LOAD_CONTEXT, OPCODE_LOG2, OPCODE_SQRT, + OPCODE_ROTATE_LEFT, OPCODE_ROUND + +}; +static bool is_elimable(Instr* i) { + for (auto&& opcode : elimable_results) { + if (is_op(i, opcode)) return true; + } + return false; +} +static bool nop_deleter(HIRBuilder* builder, Block* block) { + #if 0 + Instr* pos = block->instr_head; + bool did_change = false; + while (pos) { + Instr* nextdude = pos->next; + + if (is_op(pos, OPCODE_NOP)) { + pos->Remove(); + did_change = true; + } + pos = nextdude; + } + return did_change; + #else + return false; + #endif +} + +class optrule_t { + protected: + HIRBuilder* builder; + Block* block; + uint64_t n_runs; + + public: + optrule_t() : builder(nullptr), block(nullptr), n_runs(0) {} + virtual const char* name() = 0; + virtual bool run(Instr* insn) = 0; + uint64_t get_execs() const { return n_runs; } + bool exec(HIRBuilder* blder, Block* blk); +}; + +bool optrule_t::exec(HIRBuilder* blder, Block* blk) { + bool did_change = false; + block = blk; + builder = blder; + for (auto* insn = block->instr_head; insn; insn = insn->next) { + if (run(insn)) { + did_change = true; + ++n_runs; + } + } + return did_change; +} + +#define DECL_RULE(typenam, printname) \ + class typenam##_t : public optrule_t { \ + virtual const char* name() override { return printname; } \ + virtual bool run(Instr* insn); \ + }; \ + static typenam##_t typenam{}; + +#define IMPL_RULE(typenam) bool typenam##_t ::run(Instr* insn) + +#define RULE(typenam, printname) \ + DECL_RULE(typenam, printname); \ + IMPL_RULE(typenam) + +using optblock_pass_t = bool (*)(HIRBuilder*, Block*); + +#if 1 +#include "peephole_rules_impl.h" +#else +RULE(shr_shl_mask, + "Convert shift-left followed by shift-right to and constant") { + if (!is_op(insn, OPCODE_SHL) || !insn->src2.value->IsConstant()) return false; + + auto defuse = get_solo_use(insn->dest); + + if (!defuse || !is_op(defuse, OPCODE_SHR) || + !insn->src2.value->IsConstantEQ(defuse->src2.value)) + return false; + + uint64_t mask = mask_for_typename(insn->src1.value->type); + uint64_t shfactor = extract_constant(insn->src2.value); + uint64_t msk = mask; + mask <<= shfactor; + mask >>= shfactor; + mask &= msk; + auto oldsrc1 = insn->src1.value; + + auto maskval = constant_for_type(builder, mask, oldsrc1->type); + if (!maskval) return false; + + make_nop(insn); + + defuse->Replace(&OPCODE_AND_info, 0); + defuse->set_src1(oldsrc1); + defuse->set_src2(maskval); + return true; +} + +RULE(cvt_op_optimizer, "Remove redundant conversion pair") { + if (!is_op(insn, OPCODE_CONVERT)) { + return false; + } + if (!is_rvalue(insn->dest) || !is_rvalue(insn->src1)) return false; + + Instr* target_next_use = find_next_use(insn->dest, block, insn); + + if (!target_next_use) { + return false; + } + + if (!is_op(target_next_use, OPCODE_CONVERT)) return false; + + if (target_next_use->src1.value != insn->dest) return false; + + if (target_next_use->dest->type != insn->src1.value->type || + !is_rvalue(target_next_use->dest)) + return false; + + make_assignment(target_next_use, insn->src1); + return true; +} + +RULE(vector_shift_op, "Reuse load vector shift results") { + if (!is_op(insn, OPCODE_LOAD_VECTOR_SHR) && + !is_op(insn, OPCODE_LOAD_VECTOR_SHL)) { + return false; + } + + if (!is_rvalue(insn->dest) || !is_rvalue(insn->src1)) return false; + bool did_change = false; + for (Instr* nextu = find_next_use(insn->src1, block, insn); nextu; + nextu = find_next_use(insn->src1, block, nextu)) { + if (!nextu || nextu->opcode != insn->opcode) return false; + make_assignment(nextu, insn->dest); + did_change = true; + } + return did_change; +} + +RULE(useless_ctx_store_elim, "Eliminate useless context stores") { + if (!is_op(insn, OPCODE_STORE_CONTEXT)) return false; + ppc_ctx_vset_t trace{}; + add_store_to_bitset(&trace, insn); + + if (true && !has_any_creg(&trace)) { + return false; + } + contextuse_res_t redefed = USED; + auto next = + find_next_context_use(&trace, block, insn->next, &redefed); + + if (redefed == USED && !next) { + bool all_redef = true; + for (auto succ = block->outgoing_edge_head; succ; + succ = succ->outgoing_next) { + if (find_next_context_use(&trace, succ->dest, nullptr, + &redefed) && + redefed == REDEFED) { + } else { + all_redef = false; + break; + } + } + + if (!all_redef) { + return false; + } + + // continue; + } else if (redefed == INDET || redefed == USED) { + return false; + } + + make_nop(insn); + return true; +} +RULE(replace_assignment_uses, "Replace rvalue assignment uses") { + if (!is_op(insn, OPCODE_ASSIGN)) return false; + if (!is_rvalue(insn->dest) || !is_rvalue(insn->src1)) return false; + + replace_uses(builder, insn->dest, insn->src1.value); + + make_nop(insn); + return true; +} + +RULE(optimize_rep_loads, "Remove repeated loads") { + if (!is_op(insn, OPCODE_LOAD)) { + return false; + } + + if (!is_rvalue(insn->src1)) return false; + + auto n = find_next_use(insn->src1, block, insn); + + if (!n) return false; + + if (!is_op(n, OPCODE_LOAD)) return false; + + if (insn->dest->type != n->dest->type) return false; + + if (has_any_intervening_store(insn, n)) return false; + + make_assignment(n, insn->dest); + return true; +} + +RULE(optimize_and1, "Replace onebit istrue evaluation with evaluated term") { + if (!is_op(insn, OPCODE_IS_TRUE) && + !(is_op(insn, OPCODE_COMPARE_EQ) && is_const_x(insn->src2, 1ULL)) && + !(is_op(insn, OPCODE_COMPARE_NE) && is_const_x(insn->src2, 0ULL))) { + return false; + } + + auto def = insn->src1.value->def; + + if (def_is_onebit(def)) { + if (insn->src1.value->type == insn->dest->type) { + make_assignment(insn, insn->src1); + } else { + insn->Replace(&OPCODE_TRUNCATE_info, 0); + insn->set_src1(def->dest); + } + return true; + } + return false; +} + +RULE(truncate_extend_opt, "Remove useless truncate-extends") { + if (!is_op(insn, OPCODE_TRUNCATE)) return false; + + auto def = insn->src1.value->def; + + if (!is_op(def, OPCODE_ZERO_EXTEND) && !is_op(def, OPCODE_SIGN_EXTEND)) + return false; + + if (def->src1.value->type != insn->dest->type) return false; + + make_assignment(insn, def->src1); + return true; +} + +RULE(elim_unused_opres, "Eliminate operations with unused results") { + if (!is_elimable(insn)) return false; + + if (!insn->dest->use_head || insn->dest->use_head->instr == nullptr) { + make_nop(insn); + return true; + } + return false; +} + +RULE(elim_useless_op, "Eliminate useless operations") { + if (is_op(insn, OPCODE_ADD) || is_op(insn, OPCODE_SUB) || + is_op(insn, OPCODE_OR) || is_op(insn, OPCODE_XOR)) { + if (is_const_x(insn->src2, 0)) { + make_assignment(insn, insn->src1); + return true; + } + } + + if (is_op(insn, OPCODE_AND) && + is_const_x(insn->src2, mask_for_typename(insn->src1.value->type))) { + make_assignment(insn, insn->src1); + return true; + } + + if ((is_op(insn, OPCODE_DIV) || is_op(insn, OPCODE_MUL)) && + is_const_x(insn->src2, 1ULL)) { + make_assignment(insn, insn->src1); + return true; + } + return false; +} + +RULE(rol_to_shl, "Convert rotates to shifts") { + if (!is_op(insn, OPCODE_ROTATE_LEFT)) return false; + + if (!insn->src2.value->IsConstant()) return false; + + auto defed_value = insn->dest; + + auto nxtuse = get_solo_use(defed_value); + if (!nxtuse) return false; + + if (!is_op(nxtuse, OPCODE_AND)) return false; + + if (!nxtuse->src2.value->IsConstant()) return false; + + uint64_t v = extract_constant(nxtuse->src2.value); + + uint64_t rotamt = extract_constant(insn->src2.value); + + if ((v & ((1ULL << rotamt) - 1)) == 0ULL) { + // the bits rotated in to the left are discarded, can be shl + insn->opcode = &OPCODE_SHL_info; + return true; + } + return false; +} + +RULE(remove_useless_or, "Remove useless or if bits are not used") { + if (!is_op(insn, OPCODE_SHL) || !insn->src2.value->IsConstant()) return false; + + auto solo_use_shl_result = get_solo_use(insn->dest); + + if (!solo_use_shl_result) return false; + + if (!is_op(solo_use_shl_result, OPCODE_OR) || + solo_use_shl_result->src1.value != insn->dest) { + return false; + } + + auto solo_use_or_result = get_solo_use(solo_use_shl_result->dest); + if (!solo_use_or_result) return false; + + auto mask = mask_for_typename(insn->dest->type); + + mask <<= extract_constant(insn->src2.value); + + + if (is_op(solo_use_or_result, OPCODE_AND) && + solo_use_or_result->src2.value->IsConstant()) { + auto newmask = extract_constant(solo_use_or_result->src2.value); + + if ((newmask & mask) == 0) { + // none of our shl bits survive + make_nop(insn); + auto newandoper = solo_use_shl_result->src2.value; + + // make_nop(solo_use_shl_result); + make_assignment(solo_use_shl_result, solo_use_shl_result->src2); + return true; + } + } + return false; +} + +RULE(signbit_rol_opt, "Optimize rotates to extract signbit") { + if (!is_op(insn, OPCODE_ROTATE_LEFT) || !is_const_x(insn->src2, 1ULL)) + return false; + + auto solo_use = get_solo_use(insn->dest); + if (!solo_use) return false; + + if (!is_op(solo_use, OPCODE_AND) || !is_const_x(solo_use->src2, 1ULL)) + return false; + + Value* shift_constant = constant_for_type( + builder, highbit_for_typename(insn->dest->type), INT8_TYPE); + if (!shift_constant) return false; + + solo_use->Replace(&OPCODE_SHR_info, 0); + + solo_use->set_src1(insn->src1.value); + solo_use->set_src2(shift_constant); + make_nop(insn); + return true; +} + +RULE(bitextract_by_rol_opt, "Optimize rotates intended to extract one bit") { + if (!is_op(insn, OPCODE_ROTATE_LEFT) || !insn->src2.value->IsConstant()) + return false; + + auto defuse = get_solo_use(insn->dest); + + if (!defuse) return false; + + if (!is_op(defuse, OPCODE_AND) || !is_const_x(defuse->src2, 1)) return false; + + unsigned bitsz = highbit_for_typename(insn->dest->type); + + uint64_t realtarget = + + (bitsz + 1) - extract_constant(insn->src2.value); + auto old_inp = insn->src1.value; + + Value* shrconst = constant_for_type(builder, realtarget, INT8_TYPE); + + if (!shrconst) return false; + insn->Replace(&OPCODE_SHR_info, 0); + + insn->set_src1(old_inp); + insn->set_src2(shrconst); + return true; +} + +RULE(optimize_redundant_local_load, "Optimize away redundant local loads") { + if (!is_op(insn, OPCODE_LOAD_LOCAL)) { + return false; + } + + auto next_load = find_next_local_load(insn->src1.value, block, insn); + + if (!next_load) return false; + + make_assignment(next_load, insn->dest); + return true; +} +static optrule_t* g_optrules[] = {&shr_shl_mask, &cvt_op_optimizer, &vector_shift_op, + &useless_ctx_store_elim, &replace_assignment_uses, &optimize_rep_loads, + &optimize_and1, &truncate_extend_opt, &elim_unused_opres, + &elim_useless_op, &remove_useless_or, &signbit_rol_opt, + &bitextract_by_rol_opt, + &optimize_redundant_local_load}; + + +static bool run_optrules(HIRBuilder* builder, Block* block) { + bool did_change = false; + for (auto&& rule : g_optrules) { + did_change |= rule->exec(builder, block); + } + return did_change; +} + +void dump_opts() { + FILE* lel = fopen("Optdump.txt", "w"); + + for (auto&& rule : g_optrules) { + fprintf(lel, "(%s) matched and optimized %lld times.\n", rule->name(), + rule->get_execs()); + } + fclose(lel); +} +static const optblock_pass_t g_passes[] = {run_optrules, nop_deleter}; +#endif + +static bool did_atexit = false; + +bool RepetitiveComputationMergerPass::RunPerBlock(hir::HIRBuilder* builder, + hir::Block* block) { + bool did_change = false; + for (auto&& pass : g_passes) { + did_change |= pass(builder, block); + } + return did_change; +} +bool RepetitiveComputationMergerPass::Run(hir::HIRBuilder* builder) { + if (!did_atexit) { + // atexit(dump_opts); + did_atexit = true; + } + auto block = builder->first_block(); + + while (block) { + // rerun until no changes + while (RunPerBlock(builder, block)) + ; + + block = block->next; + } + return true; +} +} // namespace passes +} // namespace compiler +} // namespace cpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.h b/src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.h new file mode 100644 index 000000000..51d5a5147 --- /dev/null +++ b/src/xenia/cpu/compiler/passes/repetitive_computation_merger_pass.h @@ -0,0 +1,36 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_COMPILER_PASSES_REPETITIVE_COMPUTATION_MERGER_PASS_H_ +#define XENIA_CPU_COMPILER_PASSES_REPETITIVE_COMPUTATION_MERGER_PASS_H_ + +#include "xenia/cpu/compiler/compiler_pass.h" + +namespace xe { +namespace cpu { +namespace compiler { +namespace passes { + +class RepetitiveComputationMergerPass : public CompilerPass { + public: + RepetitiveComputationMergerPass(); + ~RepetitiveComputationMergerPass() override; + + bool Run(hir::HIRBuilder* builder) override; + + private: + bool RunPerBlock(hir::HIRBuilder* builder, hir::Block* block); +}; + +} // namespace passes +} // namespace compiler +} // namespace cpu +} // namespace xe + +#endif // XENIA_CPU_COMPILER_PASSES_REPETITIVE_COMPUTATION_MERGER_PASS_H_