Merge pull request #47 from chrisps/canary_experimental
drastically reduce size of final generated code for rlwinm by adding …
This commit is contained in:
commit
f8f6a20569
|
@ -106,7 +106,16 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
|
|||
|
||||
|
||||
#undef TEST_EMIT_FEATURE
|
||||
|
||||
/*
|
||||
fix for xbyak bug/omission, amd cpus are never checked for lzcnt. fixed in latest version of xbyak
|
||||
*/
|
||||
unsigned int data[4];
|
||||
Xbyak::util::Cpu::getCpuid(0x80000001, data);
|
||||
if (data[2] & (1U << 5)) {
|
||||
if ((cvars::x64_extension_mask & kX64EmitLZCNT) == kX64EmitLZCNT) {
|
||||
feature_flags_ |= kX64EmitLZCNT;
|
||||
}
|
||||
}
|
||||
if (cpu_.has(Xbyak::util::Cpu::tAMD)) {
|
||||
|
||||
bool is_zennish = cpu_.displayFamily >= 0x17;
|
||||
|
|
|
@ -2749,11 +2749,17 @@ struct AND_I32 : Sequence<AND_I32, I<OPCODE_AND, I32Op, I32Op, I32Op>> {
|
|||
};
|
||||
struct AND_I64 : Sequence<AND_I64, I<OPCODE_AND, I64Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
EmitAndXX<AND_I64, Reg64>(e, i);
|
||||
if (i.src2.is_constant && i.src2.constant() == 0xFFFFFFFF) {
|
||||
// special case for rlwinm codegen
|
||||
e.mov(((Reg64)i.dest).cvt32(), ((Reg64)i.src1).cvt32());
|
||||
} else {
|
||||
EmitAndXX<AND_I64, Reg64>(e, i);
|
||||
}
|
||||
}
|
||||
};
|
||||
struct AND_V128 : Sequence<AND_V128, I<OPCODE_AND, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
|
||||
EmitCommutativeBinaryXmmOp(e, i,
|
||||
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
||||
e.vpand(dest, src1, src2);
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
|
||||
#include "xenia/cpu/compiler/passes/simplification_pass.h"
|
||||
|
||||
#include "xenia/base/byte_order.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace compiler {
|
||||
|
@ -29,11 +29,241 @@ SimplificationPass::~SimplificationPass() {}
|
|||
|
||||
bool SimplificationPass::Run(HIRBuilder* builder, bool& result) {
|
||||
result = false;
|
||||
result |= SimplifyBitArith(builder);
|
||||
result |= EliminateConversions(builder);
|
||||
result |= SimplifyAssignments(builder);
|
||||
return true;
|
||||
}
|
||||
// simplifications that apply to both or and xor
|
||||
bool SimplificationPass::CheckOrXorZero(hir::Instr* i) {
|
||||
auto [constant_value, variable_value] = i->BinaryValueArrangeAsConstAndVar();
|
||||
|
||||
if (constant_value && constant_value->IsConstantZero()) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(variable_value);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool SimplificationPass::CheckOr(hir::Instr* i) { return CheckOrXorZero(i); }
|
||||
bool SimplificationPass::CheckXor(hir::Instr* i) {
|
||||
if (CheckOrXorZero(i)) {
|
||||
return true;
|
||||
} else {
|
||||
uint64_t type_mask = GetScalarTypeMask(i->dest->type);
|
||||
|
||||
auto [constant_value, variable_value] =
|
||||
i->BinaryValueArrangeAsConstAndVar();
|
||||
|
||||
if (!constant_value) return false;
|
||||
|
||||
if (constant_value->AsUint64() == type_mask) {
|
||||
i->Replace(&OPCODE_NOT_info, 0);
|
||||
i->set_src1(variable_value);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool SimplificationPass::Is1BitOpcode(hir::Opcode def_opcode) {
|
||||
return def_opcode >= OPCODE_IS_TRUE && def_opcode <= OPCODE_DID_SATURATE;
|
||||
}
|
||||
uint64_t SimplificationPass::GetScalarNZM(hir::Value* value, hir::Instr* def,
|
||||
|
||||
uint64_t typemask,
|
||||
hir::Opcode def_opcode) {
|
||||
if (def_opcode == OPCODE_SHL) {
|
||||
hir::Value* shifted = def->src1.value;
|
||||
hir::Value* shiftby = def->src2.value;
|
||||
// todo: nzm shift
|
||||
if (shiftby->IsConstant()) {
|
||||
uint64_t shifted_nzm = GetScalarNZM(shifted);
|
||||
return shifted_nzm << shiftby->AsUint64();
|
||||
}
|
||||
} else if (def_opcode == OPCODE_SHR) {
|
||||
hir::Value* shifted = def->src1.value;
|
||||
hir::Value* shiftby = def->src2.value;
|
||||
// todo: nzm shift
|
||||
if (shiftby->IsConstant()) {
|
||||
uint64_t shifted_nzm = GetScalarNZM(shifted);
|
||||
return shifted_nzm >> shiftby->AsUint64();
|
||||
}
|
||||
}
|
||||
// todo : sha, check signbit
|
||||
else if (def_opcode == OPCODE_ROTATE_LEFT) {
|
||||
hir::Value* shifted = def->src1.value;
|
||||
hir::Value* shiftby = def->src2.value;
|
||||
// todo: nzm shift
|
||||
if (shiftby->IsConstant()) {
|
||||
uint64_t shifted_nzm = GetScalarNZM(shifted);
|
||||
return xe::rotate_left(shifted_nzm,
|
||||
static_cast<uint8_t>(shiftby->AsUint64()));
|
||||
}
|
||||
} else if (def_opcode == OPCODE_XOR || def_opcode == OPCODE_OR) {
|
||||
return GetScalarNZM(def->src1.value) | GetScalarNZM(def->src2.value);
|
||||
} else if (def_opcode == OPCODE_NOT) {
|
||||
return typemask;
|
||||
} else if (def_opcode == OPCODE_ASSIGN) {
|
||||
return GetScalarNZM(def->src1.value);
|
||||
} else if (def_opcode == OPCODE_BYTE_SWAP) {
|
||||
uint64_t input_nzm = GetScalarNZM(def->src1.value);
|
||||
switch (GetTypeSize(def->dest->type)) {
|
||||
case 1:
|
||||
return input_nzm;
|
||||
case 2:
|
||||
return xe::byte_swap<unsigned short>(
|
||||
static_cast<unsigned short>(input_nzm));
|
||||
|
||||
case 4:
|
||||
return xe::byte_swap<unsigned int>(
|
||||
static_cast<unsigned int>(input_nzm));
|
||||
case 8:
|
||||
return xe::byte_swap<unsigned long long>(input_nzm);
|
||||
default:
|
||||
xenia_assert(0);
|
||||
return typemask;
|
||||
}
|
||||
} else if (def_opcode == OPCODE_ZERO_EXTEND) {
|
||||
return GetScalarNZM(def->src1.value);
|
||||
} else if (def_opcode == OPCODE_TRUNCATE) {
|
||||
return GetScalarNZM(def->src1.value); // caller will truncate by masking
|
||||
} else if (def_opcode == OPCODE_AND) {
|
||||
return GetScalarNZM(def->src1.value) & GetScalarNZM(def->src2.value);
|
||||
} else if (def_opcode == OPCODE_SELECT) {
|
||||
return GetScalarNZM(def->src2.value) | GetScalarNZM(def->src3.value);
|
||||
} else if (def_opcode == OPCODE_MIN) {
|
||||
/*
|
||||
the nzm will be that of the narrowest operand, because if one value is
|
||||
capable of being much larger than the other it can never actually reach
|
||||
a value that is outside the range of the other values nzm, because that
|
||||
would make it not the minimum of the two
|
||||
|
||||
ahh, actually, we have to be careful about constants then.... for now,
|
||||
just return or
|
||||
*/
|
||||
return GetScalarNZM(def->src2.value) | GetScalarNZM(def->src1.value);
|
||||
} else if (def_opcode == OPCODE_MAX) {
|
||||
return GetScalarNZM(def->src2.value) | GetScalarNZM(def->src1.value);
|
||||
} else if (Is1BitOpcode(def_opcode)) {
|
||||
return 1ULL;
|
||||
} else if (def_opcode == OPCODE_CAST) {
|
||||
return GetScalarNZM(def->src1.value);
|
||||
}
|
||||
|
||||
return typemask;
|
||||
}
|
||||
uint64_t SimplificationPass::GetScalarNZM(hir::Value* value) {
|
||||
if (value->IsConstant()) {
|
||||
return value->AsUint64();
|
||||
}
|
||||
|
||||
uint64_t default_return = GetScalarTypeMask(value->type);
|
||||
|
||||
hir::Instr* def = value->def;
|
||||
if (!def) {
|
||||
return default_return;
|
||||
}
|
||||
return GetScalarNZM(value, def, default_return, def->opcode->num) &
|
||||
default_return;
|
||||
}
|
||||
bool SimplificationPass::CheckAnd(hir::Instr* i) {
|
||||
retry_and_simplification:
|
||||
auto [constant_value, variable_value] = i->BinaryValueArrangeAsConstAndVar();
|
||||
if (!constant_value) return false;
|
||||
|
||||
// todo: check if masking with mask that covers all of zero extension source
|
||||
uint64_t type_mask = GetScalarTypeMask(i->dest->type);
|
||||
// if masking with entire width, pointless instruction so become an assign
|
||||
|
||||
if (constant_value->AsUint64() == type_mask) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(variable_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
auto variable_def = variable_value->def;
|
||||
|
||||
if (variable_def) {
|
||||
auto true_variable_def = variable_def->GetDestDefSkipAssigns();
|
||||
if (true_variable_def) {
|
||||
if (true_variable_def->opcode == &OPCODE_AND_info) {
|
||||
auto [variable_def_constant, variable_def_variable] =
|
||||
true_variable_def->BinaryValueArrangeAsConstAndVar();
|
||||
|
||||
if (variable_def_constant) {
|
||||
// todo: check if masked with mask that was a subset of the current
|
||||
// one and elim if so
|
||||
if (variable_def_constant->AsUint64() == constant_value->AsUint64()) {
|
||||
// we already masked the input with the same mask
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(variable_value);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else if (true_variable_def->opcode == &OPCODE_OR_info) {
|
||||
Value* or_left = true_variable_def->src1.value;
|
||||
Value* or_right = true_variable_def->src2.value;
|
||||
|
||||
uint64_t left_nzm = GetScalarNZM(or_left);
|
||||
|
||||
// use the other or input instead of the or output
|
||||
if ((constant_value->AsUint64() & left_nzm) == 0) {
|
||||
i->Replace(&OPCODE_AND_info, 0);
|
||||
i->set_src1(or_right);
|
||||
i->set_src2(constant_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t right_nzm = GetScalarNZM(or_right);
|
||||
|
||||
if ((constant_value->AsUint64() & right_nzm) == 0) {
|
||||
i->Replace(&OPCODE_AND_info, 0);
|
||||
i->set_src1(or_left);
|
||||
i->set_src2(constant_value);
|
||||
return true;
|
||||
}
|
||||
} else if (true_variable_def->opcode == &OPCODE_ROTATE_LEFT_info) {
|
||||
if (true_variable_def->src2.value->IsConstant()) {
|
||||
if (((type_mask << true_variable_def->src2.value->AsUint64()) &
|
||||
type_mask) ==
|
||||
constant_value->AsUint64()) { // rotated bits are unused, convert
|
||||
// to shift if we are the only use
|
||||
if (true_variable_def->dest->use_head->next == nullptr) {
|
||||
// one use, convert to shift
|
||||
true_variable_def->opcode = &OPCODE_SHL_info;
|
||||
goto retry_and_simplification;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
bool SimplificationPass::SimplifyBitArith(hir::HIRBuilder* builder) {
|
||||
bool result = false;
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
auto i = block->instr_head;
|
||||
while (i) {
|
||||
// vector types use the same opcodes as scalar ones for AND/OR/XOR! we
|
||||
// don't handle these in our simplifications, so skip
|
||||
if (i->dest && i->dest->type != VEC128_TYPE) {
|
||||
if (i->opcode == &OPCODE_OR_info) {
|
||||
result |= CheckOr(i);
|
||||
} else if (i->opcode == &OPCODE_XOR_info) {
|
||||
result |= CheckXor(i);
|
||||
} else if (i->opcode == &OPCODE_AND_info) {
|
||||
result |= CheckAnd(i);
|
||||
}
|
||||
}
|
||||
i = i->next;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
bool SimplificationPass::EliminateConversions(HIRBuilder* builder) {
|
||||
// First, we check for truncates/extensions that can be skipped.
|
||||
// This generates some assignments which then the second step will clean up.
|
||||
|
@ -158,6 +388,7 @@ bool SimplificationPass::SimplifyAssignments(HIRBuilder* builder) {
|
|||
i->set_src3(CheckValue(i->src3.value, modified));
|
||||
result |= modified;
|
||||
}
|
||||
|
||||
i = i->next;
|
||||
}
|
||||
block = block->next;
|
||||
|
|
|
@ -31,6 +31,19 @@ class SimplificationPass : public ConditionalGroupSubpass {
|
|||
|
||||
bool SimplifyAssignments(hir::HIRBuilder* builder);
|
||||
hir::Value* CheckValue(hir::Value* value, bool& result);
|
||||
bool SimplifyBitArith(hir::HIRBuilder* builder);
|
||||
// handle either or or xor with 0
|
||||
bool CheckOrXorZero(hir::Instr* i);
|
||||
bool CheckOr(hir::Instr* i);
|
||||
bool CheckXor(hir::Instr* i);
|
||||
bool CheckAnd(hir::Instr* i);
|
||||
static bool Is1BitOpcode(hir::Opcode def_opcode);
|
||||
static uint64_t GetScalarNZM(hir::Value* value, hir::Instr* def,
|
||||
uint64_t typemask, hir::Opcode def_opcode);
|
||||
// todo: use valuemask
|
||||
// returns maybenonzeromask for value (mask of bits that may possibly hold
|
||||
// information)
|
||||
static uint64_t GetScalarNZM(hir::Value* value);
|
||||
};
|
||||
|
||||
} // namespace passes
|
||||
|
|
|
@ -114,7 +114,20 @@ void Instr::Remove() {
|
|||
block->instr_tail = prev;
|
||||
}
|
||||
}
|
||||
Instr* Instr::GetDestDefSkipAssigns() {
|
||||
Instr* current_def = this;
|
||||
|
||||
while (current_def->opcode == &OPCODE_ASSIGN_info) {
|
||||
Instr* next_def = current_def->src1.value->def;
|
||||
|
||||
if (!next_def) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
current_def = next_def;
|
||||
}
|
||||
return current_def;
|
||||
}
|
||||
} // namespace hir
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -59,6 +59,52 @@ class Instr {
|
|||
void MoveBefore(Instr* other);
|
||||
void Replace(const OpcodeInfo* new_opcode, uint16_t new_flags);
|
||||
void Remove();
|
||||
|
||||
template <typename TPredicate>
|
||||
std::pair<Value*, Value*> BinaryValueArrangeByPredicateExclusive(
|
||||
TPredicate&& pred) {
|
||||
auto src1_value = src1.value;
|
||||
auto src2_value = src2.value;
|
||||
if (!src1_value || !src2_value) return {nullptr, nullptr};
|
||||
|
||||
if (!opcode) return {nullptr, nullptr}; // impossible!
|
||||
|
||||
// check if binary opcode taking two values. we dont care if the dest is a
|
||||
// value
|
||||
|
||||
if (!IsOpcodeBinaryValue(opcode->signature)) return {nullptr, nullptr};
|
||||
|
||||
if (pred(src1_value)) {
|
||||
if (pred(src2_value)) {
|
||||
return {nullptr, nullptr};
|
||||
} else {
|
||||
return {src1_value, src2_value};
|
||||
}
|
||||
} else if (pred(src2_value)) {
|
||||
return {src2_value, src1_value};
|
||||
} else {
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
if src1 is constant, and src2 is not, return [src1, src2]
|
||||
if src2 is constant, and src1 is not, return [src2, src1]
|
||||
if neither is constant, return nullptr, nullptr
|
||||
if both are constant, return nullptr, nullptr
|
||||
*/
|
||||
std::pair<Value*, Value*> BinaryValueArrangeAsConstAndVar() {
|
||||
return BinaryValueArrangeByPredicateExclusive(
|
||||
[](Value* value) { return value->IsConstant(); });
|
||||
}
|
||||
std::pair<Value*, Value*> BinaryValueArrangeByDefiningOpcode(
|
||||
const OpcodeInfo* op_ptr) {
|
||||
return BinaryValueArrangeByPredicateExclusive([op_ptr](Value* value) {
|
||||
return value->def && value->def->opcode == op_ptr;
|
||||
});
|
||||
}
|
||||
|
||||
Instr* GetDestDefSkipAssigns();
|
||||
};
|
||||
|
||||
} // namespace hir
|
||||
|
|
|
@ -347,6 +347,10 @@ enum OpcodeSignature {
|
|||
#define GET_OPCODE_SIG_TYPE_SRC1(sig) (OpcodeSignatureType)((sig >> 3) & 0x7)
|
||||
#define GET_OPCODE_SIG_TYPE_SRC2(sig) (OpcodeSignatureType)((sig >> 6) & 0x7)
|
||||
#define GET_OPCODE_SIG_TYPE_SRC3(sig) (OpcodeSignatureType)((sig >> 9) & 0x7)
|
||||
static bool IsOpcodeBinaryValue(uint32_t signature) {
|
||||
return (signature & ~(0x7)) ==
|
||||
((OPCODE_SIG_TYPE_V << 3) | (OPCODE_SIG_TYPE_V << 6));
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t flags;
|
||||
|
|
|
@ -57,6 +57,15 @@ inline size_t GetTypeSize(TypeName type_name) {
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
inline uint64_t GetScalarTypeMask(TypeName type_name) {
|
||||
size_t mask_width = GetTypeSize(type_name);
|
||||
|
||||
if (mask_width == 8) {
|
||||
return ~0ULL;
|
||||
} else {
|
||||
return (1ULL << (mask_width * CHAR_BIT)) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
enum ValueFlags {
|
||||
VALUE_IS_CONSTANT = (1 << 1),
|
||||
|
@ -68,6 +77,23 @@ struct RegAssignment {
|
|||
int32_t index;
|
||||
};
|
||||
|
||||
struct ValueMask {
|
||||
uint64_t low; // low 64 bits, usually for scalar values
|
||||
uint64_t high; // high 64 bits, only used for vector types
|
||||
|
||||
ValueMask(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
|
||||
|
||||
ValueMask operator&(ValueMask other) const {
|
||||
return ValueMask{low & other.low, high & other.high};
|
||||
}
|
||||
ValueMask operator|(ValueMask other) const {
|
||||
return ValueMask{low | other.low, high | other.high};
|
||||
}
|
||||
ValueMask operator^(ValueMask other) const {
|
||||
return ValueMask{low ^ other.low, high ^ other.high};
|
||||
}
|
||||
};
|
||||
|
||||
class Value {
|
||||
public:
|
||||
typedef struct Use_s {
|
||||
|
|
|
@ -1023,6 +1023,17 @@ int InstrEmit_rlwimix(PPCHIRBuilder& f, const InstrData& i) {
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
static bool InstrCheck_rlx_only_needs_low(unsigned rotation, uint64_t mask) {
|
||||
uint32_t mask32 = static_cast<uint32_t>(mask);
|
||||
if (static_cast<uint64_t>(mask32) != mask) {
|
||||
return false;
|
||||
}
|
||||
uint32_t all_ones_32 = ~0U;
|
||||
all_ones_32 <<= rotation;
|
||||
|
||||
return all_ones_32 == mask32; // mask is only 32 bits and all bits from the
|
||||
// rotation are discarded
|
||||
}
|
||||
|
||||
int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) {
|
||||
// n <- SH
|
||||
|
@ -1031,23 +1042,47 @@ int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// RA <- r & m
|
||||
Value* v = f.LoadGPR(i.M.RT);
|
||||
|
||||
// (x||x)
|
||||
v = f.Or(f.Shl(v, 32), f.ZeroExtend(f.Truncate(v, INT32_TYPE), INT64_TYPE));
|
||||
unsigned rotation = i.M.SH;
|
||||
|
||||
// TODO(benvanik): optimize srwi
|
||||
// TODO(benvanik): optimize slwi
|
||||
// The compiler will generate a bunch of these for the special case of SH=0.
|
||||
// Which seems to just select some bits and set cr0 for use with a branch.
|
||||
// We can detect this and do less work.
|
||||
if (i.M.SH) {
|
||||
v = f.RotateLeft(v, f.LoadConstantInt8(i.M.SH));
|
||||
}
|
||||
// Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here
|
||||
// as our truncation/zero-extend does it for us.
|
||||
uint64_t m = XEMASK(i.M.MB + 32, i.M.ME + 32);
|
||||
if (m != 0xFFFFFFFFFFFFFFFFull) {
|
||||
|
||||
// in uint32 range (so no register concat/truncate/zx needed) and no rotation
|
||||
if (m < (1ULL << 32) && (rotation == 0)) {
|
||||
v = f.And(v, f.LoadConstantUint64(m));
|
||||
}
|
||||
// masks out all the bits that are rotated in from the right, so just do a
|
||||
// shift + and. the and with 0xFFFFFFFF is done instead of a truncate/zx
|
||||
// because we have a special case for it in the emitters that will just do a
|
||||
// single insn (mov reg32, lowpartofreg64), otherwise we generate
|
||||
// significantly more code from setting up the opnds of the truncate/zx
|
||||
else if (InstrCheck_rlx_only_needs_low(rotation, m)) {
|
||||
// this path is taken for like 90% of all rlwinms
|
||||
v = f.And(f.Shl(v, rotation), f.LoadConstantUint64(0xFFFFFFFF));
|
||||
}
|
||||
|
||||
else {
|
||||
// (x||x)
|
||||
// cs: changed this to mask with UINT32_MAX instead of doing the
|
||||
// truncate/extend, this generates better code in the backend and is easier
|
||||
// to do analysis on
|
||||
v = f.And(v, f.LoadConstantUint64(0xFFFFFFFF));
|
||||
|
||||
v = f.Or(f.Shl(v, 32), v);
|
||||
|
||||
// TODO(benvanik): optimize srwi
|
||||
// TODO(benvanik): optimize slwi
|
||||
// The compiler will generate a bunch of these for the special case of SH=0.
|
||||
// Which seems to just select some bits and set cr0 for use with a branch.
|
||||
// We can detect this and do less work.
|
||||
if (i.M.SH) {
|
||||
v = f.RotateLeft(v, f.LoadConstantInt8(rotation));
|
||||
}
|
||||
// Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here
|
||||
// as our truncation/zero-extend does it for us.
|
||||
if (m != 0xFFFFFFFFFFFFFFFFull) {
|
||||
v = f.And(v, f.LoadConstantUint64(m));
|
||||
}
|
||||
}
|
||||
f.StoreGPR(i.M.RA, v);
|
||||
if (i.M.Rc) {
|
||||
f.UpdateCR(0, v);
|
||||
|
|
Loading…
Reference in New Issue