Merge pull request #47 from chrisps/canary_experimental
drastically reduce size of final generated code for rlwinm by adding …
This commit is contained in:
commit
f8f6a20569
|
@ -106,7 +106,16 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
|
||||||
|
|
||||||
|
|
||||||
#undef TEST_EMIT_FEATURE
|
#undef TEST_EMIT_FEATURE
|
||||||
|
/*
|
||||||
|
fix for xbyak bug/omission, amd cpus are never checked for lzcnt. fixed in latest version of xbyak
|
||||||
|
*/
|
||||||
|
unsigned int data[4];
|
||||||
|
Xbyak::util::Cpu::getCpuid(0x80000001, data);
|
||||||
|
if (data[2] & (1U << 5)) {
|
||||||
|
if ((cvars::x64_extension_mask & kX64EmitLZCNT) == kX64EmitLZCNT) {
|
||||||
|
feature_flags_ |= kX64EmitLZCNT;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (cpu_.has(Xbyak::util::Cpu::tAMD)) {
|
if (cpu_.has(Xbyak::util::Cpu::tAMD)) {
|
||||||
|
|
||||||
bool is_zennish = cpu_.displayFamily >= 0x17;
|
bool is_zennish = cpu_.displayFamily >= 0x17;
|
||||||
|
|
|
@ -2749,11 +2749,17 @@ struct AND_I32 : Sequence<AND_I32, I<OPCODE_AND, I32Op, I32Op, I32Op>> {
|
||||||
};
|
};
|
||||||
struct AND_I64 : Sequence<AND_I64, I<OPCODE_AND, I64Op, I64Op, I64Op>> {
|
struct AND_I64 : Sequence<AND_I64, I<OPCODE_AND, I64Op, I64Op, I64Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
EmitAndXX<AND_I64, Reg64>(e, i);
|
if (i.src2.is_constant && i.src2.constant() == 0xFFFFFFFF) {
|
||||||
|
// special case for rlwinm codegen
|
||||||
|
e.mov(((Reg64)i.dest).cvt32(), ((Reg64)i.src1).cvt32());
|
||||||
|
} else {
|
||||||
|
EmitAndXX<AND_I64, Reg64>(e, i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct AND_V128 : Sequence<AND_V128, I<OPCODE_AND, V128Op, V128Op, V128Op>> {
|
struct AND_V128 : Sequence<AND_V128, I<OPCODE_AND, V128Op, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
|
||||||
EmitCommutativeBinaryXmmOp(e, i,
|
EmitCommutativeBinaryXmmOp(e, i,
|
||||||
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
||||||
e.vpand(dest, src1, src2);
|
e.vpand(dest, src1, src2);
|
||||||
|
|
|
@ -9,8 +9,8 @@
|
||||||
|
|
||||||
#include "xenia/cpu/compiler/passes/simplification_pass.h"
|
#include "xenia/cpu/compiler/passes/simplification_pass.h"
|
||||||
|
|
||||||
|
#include "xenia/base/byte_order.h"
|
||||||
#include "xenia/base/profiling.h"
|
#include "xenia/base/profiling.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
namespace compiler {
|
namespace compiler {
|
||||||
|
@ -29,11 +29,241 @@ SimplificationPass::~SimplificationPass() {}
|
||||||
|
|
||||||
bool SimplificationPass::Run(HIRBuilder* builder, bool& result) {
|
bool SimplificationPass::Run(HIRBuilder* builder, bool& result) {
|
||||||
result = false;
|
result = false;
|
||||||
|
result |= SimplifyBitArith(builder);
|
||||||
result |= EliminateConversions(builder);
|
result |= EliminateConversions(builder);
|
||||||
result |= SimplifyAssignments(builder);
|
result |= SimplifyAssignments(builder);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// simplifications that apply to both or and xor
|
||||||
|
bool SimplificationPass::CheckOrXorZero(hir::Instr* i) {
|
||||||
|
auto [constant_value, variable_value] = i->BinaryValueArrangeAsConstAndVar();
|
||||||
|
|
||||||
|
if (constant_value && constant_value->IsConstantZero()) {
|
||||||
|
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||||
|
i->set_src1(variable_value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool SimplificationPass::CheckOr(hir::Instr* i) { return CheckOrXorZero(i); }
|
||||||
|
bool SimplificationPass::CheckXor(hir::Instr* i) {
|
||||||
|
if (CheckOrXorZero(i)) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
uint64_t type_mask = GetScalarTypeMask(i->dest->type);
|
||||||
|
|
||||||
|
auto [constant_value, variable_value] =
|
||||||
|
i->BinaryValueArrangeAsConstAndVar();
|
||||||
|
|
||||||
|
if (!constant_value) return false;
|
||||||
|
|
||||||
|
if (constant_value->AsUint64() == type_mask) {
|
||||||
|
i->Replace(&OPCODE_NOT_info, 0);
|
||||||
|
i->set_src1(variable_value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool SimplificationPass::Is1BitOpcode(hir::Opcode def_opcode) {
|
||||||
|
return def_opcode >= OPCODE_IS_TRUE && def_opcode <= OPCODE_DID_SATURATE;
|
||||||
|
}
|
||||||
|
uint64_t SimplificationPass::GetScalarNZM(hir::Value* value, hir::Instr* def,
|
||||||
|
|
||||||
|
uint64_t typemask,
|
||||||
|
hir::Opcode def_opcode) {
|
||||||
|
if (def_opcode == OPCODE_SHL) {
|
||||||
|
hir::Value* shifted = def->src1.value;
|
||||||
|
hir::Value* shiftby = def->src2.value;
|
||||||
|
// todo: nzm shift
|
||||||
|
if (shiftby->IsConstant()) {
|
||||||
|
uint64_t shifted_nzm = GetScalarNZM(shifted);
|
||||||
|
return shifted_nzm << shiftby->AsUint64();
|
||||||
|
}
|
||||||
|
} else if (def_opcode == OPCODE_SHR) {
|
||||||
|
hir::Value* shifted = def->src1.value;
|
||||||
|
hir::Value* shiftby = def->src2.value;
|
||||||
|
// todo: nzm shift
|
||||||
|
if (shiftby->IsConstant()) {
|
||||||
|
uint64_t shifted_nzm = GetScalarNZM(shifted);
|
||||||
|
return shifted_nzm >> shiftby->AsUint64();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// todo : sha, check signbit
|
||||||
|
else if (def_opcode == OPCODE_ROTATE_LEFT) {
|
||||||
|
hir::Value* shifted = def->src1.value;
|
||||||
|
hir::Value* shiftby = def->src2.value;
|
||||||
|
// todo: nzm shift
|
||||||
|
if (shiftby->IsConstant()) {
|
||||||
|
uint64_t shifted_nzm = GetScalarNZM(shifted);
|
||||||
|
return xe::rotate_left(shifted_nzm,
|
||||||
|
static_cast<uint8_t>(shiftby->AsUint64()));
|
||||||
|
}
|
||||||
|
} else if (def_opcode == OPCODE_XOR || def_opcode == OPCODE_OR) {
|
||||||
|
return GetScalarNZM(def->src1.value) | GetScalarNZM(def->src2.value);
|
||||||
|
} else if (def_opcode == OPCODE_NOT) {
|
||||||
|
return typemask;
|
||||||
|
} else if (def_opcode == OPCODE_ASSIGN) {
|
||||||
|
return GetScalarNZM(def->src1.value);
|
||||||
|
} else if (def_opcode == OPCODE_BYTE_SWAP) {
|
||||||
|
uint64_t input_nzm = GetScalarNZM(def->src1.value);
|
||||||
|
switch (GetTypeSize(def->dest->type)) {
|
||||||
|
case 1:
|
||||||
|
return input_nzm;
|
||||||
|
case 2:
|
||||||
|
return xe::byte_swap<unsigned short>(
|
||||||
|
static_cast<unsigned short>(input_nzm));
|
||||||
|
|
||||||
|
case 4:
|
||||||
|
return xe::byte_swap<unsigned int>(
|
||||||
|
static_cast<unsigned int>(input_nzm));
|
||||||
|
case 8:
|
||||||
|
return xe::byte_swap<unsigned long long>(input_nzm);
|
||||||
|
default:
|
||||||
|
xenia_assert(0);
|
||||||
|
return typemask;
|
||||||
|
}
|
||||||
|
} else if (def_opcode == OPCODE_ZERO_EXTEND) {
|
||||||
|
return GetScalarNZM(def->src1.value);
|
||||||
|
} else if (def_opcode == OPCODE_TRUNCATE) {
|
||||||
|
return GetScalarNZM(def->src1.value); // caller will truncate by masking
|
||||||
|
} else if (def_opcode == OPCODE_AND) {
|
||||||
|
return GetScalarNZM(def->src1.value) & GetScalarNZM(def->src2.value);
|
||||||
|
} else if (def_opcode == OPCODE_SELECT) {
|
||||||
|
return GetScalarNZM(def->src2.value) | GetScalarNZM(def->src3.value);
|
||||||
|
} else if (def_opcode == OPCODE_MIN) {
|
||||||
|
/*
|
||||||
|
the nzm will be that of the narrowest operand, because if one value is
|
||||||
|
capable of being much larger than the other it can never actually reach
|
||||||
|
a value that is outside the range of the other values nzm, because that
|
||||||
|
would make it not the minimum of the two
|
||||||
|
|
||||||
|
ahh, actually, we have to be careful about constants then.... for now,
|
||||||
|
just return or
|
||||||
|
*/
|
||||||
|
return GetScalarNZM(def->src2.value) | GetScalarNZM(def->src1.value);
|
||||||
|
} else if (def_opcode == OPCODE_MAX) {
|
||||||
|
return GetScalarNZM(def->src2.value) | GetScalarNZM(def->src1.value);
|
||||||
|
} else if (Is1BitOpcode(def_opcode)) {
|
||||||
|
return 1ULL;
|
||||||
|
} else if (def_opcode == OPCODE_CAST) {
|
||||||
|
return GetScalarNZM(def->src1.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return typemask;
|
||||||
|
}
|
||||||
|
uint64_t SimplificationPass::GetScalarNZM(hir::Value* value) {
|
||||||
|
if (value->IsConstant()) {
|
||||||
|
return value->AsUint64();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t default_return = GetScalarTypeMask(value->type);
|
||||||
|
|
||||||
|
hir::Instr* def = value->def;
|
||||||
|
if (!def) {
|
||||||
|
return default_return;
|
||||||
|
}
|
||||||
|
return GetScalarNZM(value, def, default_return, def->opcode->num) &
|
||||||
|
default_return;
|
||||||
|
}
|
||||||
|
bool SimplificationPass::CheckAnd(hir::Instr* i) {
|
||||||
|
retry_and_simplification:
|
||||||
|
auto [constant_value, variable_value] = i->BinaryValueArrangeAsConstAndVar();
|
||||||
|
if (!constant_value) return false;
|
||||||
|
|
||||||
|
// todo: check if masking with mask that covers all of zero extension source
|
||||||
|
uint64_t type_mask = GetScalarTypeMask(i->dest->type);
|
||||||
|
// if masking with entire width, pointless instruction so become an assign
|
||||||
|
|
||||||
|
if (constant_value->AsUint64() == type_mask) {
|
||||||
|
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||||
|
i->set_src1(variable_value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto variable_def = variable_value->def;
|
||||||
|
|
||||||
|
if (variable_def) {
|
||||||
|
auto true_variable_def = variable_def->GetDestDefSkipAssigns();
|
||||||
|
if (true_variable_def) {
|
||||||
|
if (true_variable_def->opcode == &OPCODE_AND_info) {
|
||||||
|
auto [variable_def_constant, variable_def_variable] =
|
||||||
|
true_variable_def->BinaryValueArrangeAsConstAndVar();
|
||||||
|
|
||||||
|
if (variable_def_constant) {
|
||||||
|
// todo: check if masked with mask that was a subset of the current
|
||||||
|
// one and elim if so
|
||||||
|
if (variable_def_constant->AsUint64() == constant_value->AsUint64()) {
|
||||||
|
// we already masked the input with the same mask
|
||||||
|
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||||
|
i->set_src1(variable_value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (true_variable_def->opcode == &OPCODE_OR_info) {
|
||||||
|
Value* or_left = true_variable_def->src1.value;
|
||||||
|
Value* or_right = true_variable_def->src2.value;
|
||||||
|
|
||||||
|
uint64_t left_nzm = GetScalarNZM(or_left);
|
||||||
|
|
||||||
|
// use the other or input instead of the or output
|
||||||
|
if ((constant_value->AsUint64() & left_nzm) == 0) {
|
||||||
|
i->Replace(&OPCODE_AND_info, 0);
|
||||||
|
i->set_src1(or_right);
|
||||||
|
i->set_src2(constant_value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t right_nzm = GetScalarNZM(or_right);
|
||||||
|
|
||||||
|
if ((constant_value->AsUint64() & right_nzm) == 0) {
|
||||||
|
i->Replace(&OPCODE_AND_info, 0);
|
||||||
|
i->set_src1(or_left);
|
||||||
|
i->set_src2(constant_value);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (true_variable_def->opcode == &OPCODE_ROTATE_LEFT_info) {
|
||||||
|
if (true_variable_def->src2.value->IsConstant()) {
|
||||||
|
if (((type_mask << true_variable_def->src2.value->AsUint64()) &
|
||||||
|
type_mask) ==
|
||||||
|
constant_value->AsUint64()) { // rotated bits are unused, convert
|
||||||
|
// to shift if we are the only use
|
||||||
|
if (true_variable_def->dest->use_head->next == nullptr) {
|
||||||
|
// one use, convert to shift
|
||||||
|
true_variable_def->opcode = &OPCODE_SHL_info;
|
||||||
|
goto retry_and_simplification;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool SimplificationPass::SimplifyBitArith(hir::HIRBuilder* builder) {
|
||||||
|
bool result = false;
|
||||||
|
auto block = builder->first_block();
|
||||||
|
while (block) {
|
||||||
|
auto i = block->instr_head;
|
||||||
|
while (i) {
|
||||||
|
// vector types use the same opcodes as scalar ones for AND/OR/XOR! we
|
||||||
|
// don't handle these in our simplifications, so skip
|
||||||
|
if (i->dest && i->dest->type != VEC128_TYPE) {
|
||||||
|
if (i->opcode == &OPCODE_OR_info) {
|
||||||
|
result |= CheckOr(i);
|
||||||
|
} else if (i->opcode == &OPCODE_XOR_info) {
|
||||||
|
result |= CheckXor(i);
|
||||||
|
} else if (i->opcode == &OPCODE_AND_info) {
|
||||||
|
result |= CheckAnd(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i = i->next;
|
||||||
|
}
|
||||||
|
block = block->next;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
bool SimplificationPass::EliminateConversions(HIRBuilder* builder) {
|
bool SimplificationPass::EliminateConversions(HIRBuilder* builder) {
|
||||||
// First, we check for truncates/extensions that can be skipped.
|
// First, we check for truncates/extensions that can be skipped.
|
||||||
// This generates some assignments which then the second step will clean up.
|
// This generates some assignments which then the second step will clean up.
|
||||||
|
@ -158,6 +388,7 @@ bool SimplificationPass::SimplifyAssignments(HIRBuilder* builder) {
|
||||||
i->set_src3(CheckValue(i->src3.value, modified));
|
i->set_src3(CheckValue(i->src3.value, modified));
|
||||||
result |= modified;
|
result |= modified;
|
||||||
}
|
}
|
||||||
|
|
||||||
i = i->next;
|
i = i->next;
|
||||||
}
|
}
|
||||||
block = block->next;
|
block = block->next;
|
||||||
|
|
|
@ -31,6 +31,19 @@ class SimplificationPass : public ConditionalGroupSubpass {
|
||||||
|
|
||||||
bool SimplifyAssignments(hir::HIRBuilder* builder);
|
bool SimplifyAssignments(hir::HIRBuilder* builder);
|
||||||
hir::Value* CheckValue(hir::Value* value, bool& result);
|
hir::Value* CheckValue(hir::Value* value, bool& result);
|
||||||
|
bool SimplifyBitArith(hir::HIRBuilder* builder);
|
||||||
|
// handle either or or xor with 0
|
||||||
|
bool CheckOrXorZero(hir::Instr* i);
|
||||||
|
bool CheckOr(hir::Instr* i);
|
||||||
|
bool CheckXor(hir::Instr* i);
|
||||||
|
bool CheckAnd(hir::Instr* i);
|
||||||
|
static bool Is1BitOpcode(hir::Opcode def_opcode);
|
||||||
|
static uint64_t GetScalarNZM(hir::Value* value, hir::Instr* def,
|
||||||
|
uint64_t typemask, hir::Opcode def_opcode);
|
||||||
|
// todo: use valuemask
|
||||||
|
// returns maybenonzeromask for value (mask of bits that may possibly hold
|
||||||
|
// information)
|
||||||
|
static uint64_t GetScalarNZM(hir::Value* value);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace passes
|
} // namespace passes
|
||||||
|
|
|
@ -114,7 +114,20 @@ void Instr::Remove() {
|
||||||
block->instr_tail = prev;
|
block->instr_tail = prev;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Instr* Instr::GetDestDefSkipAssigns() {
|
||||||
|
Instr* current_def = this;
|
||||||
|
|
||||||
|
while (current_def->opcode == &OPCODE_ASSIGN_info) {
|
||||||
|
Instr* next_def = current_def->src1.value->def;
|
||||||
|
|
||||||
|
if (!next_def) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
current_def = next_def;
|
||||||
|
}
|
||||||
|
return current_def;
|
||||||
|
}
|
||||||
} // namespace hir
|
} // namespace hir
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -59,6 +59,52 @@ class Instr {
|
||||||
void MoveBefore(Instr* other);
|
void MoveBefore(Instr* other);
|
||||||
void Replace(const OpcodeInfo* new_opcode, uint16_t new_flags);
|
void Replace(const OpcodeInfo* new_opcode, uint16_t new_flags);
|
||||||
void Remove();
|
void Remove();
|
||||||
|
|
||||||
|
template <typename TPredicate>
|
||||||
|
std::pair<Value*, Value*> BinaryValueArrangeByPredicateExclusive(
|
||||||
|
TPredicate&& pred) {
|
||||||
|
auto src1_value = src1.value;
|
||||||
|
auto src2_value = src2.value;
|
||||||
|
if (!src1_value || !src2_value) return {nullptr, nullptr};
|
||||||
|
|
||||||
|
if (!opcode) return {nullptr, nullptr}; // impossible!
|
||||||
|
|
||||||
|
// check if binary opcode taking two values. we dont care if the dest is a
|
||||||
|
// value
|
||||||
|
|
||||||
|
if (!IsOpcodeBinaryValue(opcode->signature)) return {nullptr, nullptr};
|
||||||
|
|
||||||
|
if (pred(src1_value)) {
|
||||||
|
if (pred(src2_value)) {
|
||||||
|
return {nullptr, nullptr};
|
||||||
|
} else {
|
||||||
|
return {src1_value, src2_value};
|
||||||
|
}
|
||||||
|
} else if (pred(src2_value)) {
|
||||||
|
return {src2_value, src1_value};
|
||||||
|
} else {
|
||||||
|
return {nullptr, nullptr};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
if src1 is constant, and src2 is not, return [src1, src2]
|
||||||
|
if src2 is constant, and src1 is not, return [src2, src1]
|
||||||
|
if neither is constant, return nullptr, nullptr
|
||||||
|
if both are constant, return nullptr, nullptr
|
||||||
|
*/
|
||||||
|
std::pair<Value*, Value*> BinaryValueArrangeAsConstAndVar() {
|
||||||
|
return BinaryValueArrangeByPredicateExclusive(
|
||||||
|
[](Value* value) { return value->IsConstant(); });
|
||||||
|
}
|
||||||
|
std::pair<Value*, Value*> BinaryValueArrangeByDefiningOpcode(
|
||||||
|
const OpcodeInfo* op_ptr) {
|
||||||
|
return BinaryValueArrangeByPredicateExclusive([op_ptr](Value* value) {
|
||||||
|
return value->def && value->def->opcode == op_ptr;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Instr* GetDestDefSkipAssigns();
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace hir
|
} // namespace hir
|
||||||
|
|
|
@ -347,6 +347,10 @@ enum OpcodeSignature {
|
||||||
#define GET_OPCODE_SIG_TYPE_SRC1(sig) (OpcodeSignatureType)((sig >> 3) & 0x7)
|
#define GET_OPCODE_SIG_TYPE_SRC1(sig) (OpcodeSignatureType)((sig >> 3) & 0x7)
|
||||||
#define GET_OPCODE_SIG_TYPE_SRC2(sig) (OpcodeSignatureType)((sig >> 6) & 0x7)
|
#define GET_OPCODE_SIG_TYPE_SRC2(sig) (OpcodeSignatureType)((sig >> 6) & 0x7)
|
||||||
#define GET_OPCODE_SIG_TYPE_SRC3(sig) (OpcodeSignatureType)((sig >> 9) & 0x7)
|
#define GET_OPCODE_SIG_TYPE_SRC3(sig) (OpcodeSignatureType)((sig >> 9) & 0x7)
|
||||||
|
static bool IsOpcodeBinaryValue(uint32_t signature) {
|
||||||
|
return (signature & ~(0x7)) ==
|
||||||
|
((OPCODE_SIG_TYPE_V << 3) | (OPCODE_SIG_TYPE_V << 6));
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
|
|
|
@ -57,6 +57,15 @@ inline size_t GetTypeSize(TypeName type_name) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
inline uint64_t GetScalarTypeMask(TypeName type_name) {
|
||||||
|
size_t mask_width = GetTypeSize(type_name);
|
||||||
|
|
||||||
|
if (mask_width == 8) {
|
||||||
|
return ~0ULL;
|
||||||
|
} else {
|
||||||
|
return (1ULL << (mask_width * CHAR_BIT)) - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
enum ValueFlags {
|
enum ValueFlags {
|
||||||
VALUE_IS_CONSTANT = (1 << 1),
|
VALUE_IS_CONSTANT = (1 << 1),
|
||||||
|
@ -68,6 +77,23 @@ struct RegAssignment {
|
||||||
int32_t index;
|
int32_t index;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ValueMask {
|
||||||
|
uint64_t low; // low 64 bits, usually for scalar values
|
||||||
|
uint64_t high; // high 64 bits, only used for vector types
|
||||||
|
|
||||||
|
ValueMask(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
|
||||||
|
|
||||||
|
ValueMask operator&(ValueMask other) const {
|
||||||
|
return ValueMask{low & other.low, high & other.high};
|
||||||
|
}
|
||||||
|
ValueMask operator|(ValueMask other) const {
|
||||||
|
return ValueMask{low | other.low, high | other.high};
|
||||||
|
}
|
||||||
|
ValueMask operator^(ValueMask other) const {
|
||||||
|
return ValueMask{low ^ other.low, high ^ other.high};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class Value {
|
class Value {
|
||||||
public:
|
public:
|
||||||
typedef struct Use_s {
|
typedef struct Use_s {
|
||||||
|
|
|
@ -1023,6 +1023,17 @@ int InstrEmit_rlwimix(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
static bool InstrCheck_rlx_only_needs_low(unsigned rotation, uint64_t mask) {
|
||||||
|
uint32_t mask32 = static_cast<uint32_t>(mask);
|
||||||
|
if (static_cast<uint64_t>(mask32) != mask) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
uint32_t all_ones_32 = ~0U;
|
||||||
|
all_ones_32 <<= rotation;
|
||||||
|
|
||||||
|
return all_ones_32 == mask32; // mask is only 32 bits and all bits from the
|
||||||
|
// rotation are discarded
|
||||||
|
}
|
||||||
|
|
||||||
int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) {
|
int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
// n <- SH
|
// n <- SH
|
||||||
|
@ -1031,23 +1042,47 @@ int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
// RA <- r & m
|
// RA <- r & m
|
||||||
Value* v = f.LoadGPR(i.M.RT);
|
Value* v = f.LoadGPR(i.M.RT);
|
||||||
|
|
||||||
// (x||x)
|
unsigned rotation = i.M.SH;
|
||||||
v = f.Or(f.Shl(v, 32), f.ZeroExtend(f.Truncate(v, INT32_TYPE), INT64_TYPE));
|
|
||||||
|
|
||||||
// TODO(benvanik): optimize srwi
|
|
||||||
// TODO(benvanik): optimize slwi
|
|
||||||
// The compiler will generate a bunch of these for the special case of SH=0.
|
|
||||||
// Which seems to just select some bits and set cr0 for use with a branch.
|
|
||||||
// We can detect this and do less work.
|
|
||||||
if (i.M.SH) {
|
|
||||||
v = f.RotateLeft(v, f.LoadConstantInt8(i.M.SH));
|
|
||||||
}
|
|
||||||
// Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here
|
|
||||||
// as our truncation/zero-extend does it for us.
|
|
||||||
uint64_t m = XEMASK(i.M.MB + 32, i.M.ME + 32);
|
uint64_t m = XEMASK(i.M.MB + 32, i.M.ME + 32);
|
||||||
if (m != 0xFFFFFFFFFFFFFFFFull) {
|
|
||||||
|
// in uint32 range (so no register concat/truncate/zx needed) and no rotation
|
||||||
|
if (m < (1ULL << 32) && (rotation == 0)) {
|
||||||
v = f.And(v, f.LoadConstantUint64(m));
|
v = f.And(v, f.LoadConstantUint64(m));
|
||||||
}
|
}
|
||||||
|
// masks out all the bits that are rotated in from the right, so just do a
|
||||||
|
// shift + and. the and with 0xFFFFFFFF is done instead of a truncate/zx
|
||||||
|
// because we have a special case for it in the emitters that will just do a
|
||||||
|
// single insn (mov reg32, lowpartofreg64), otherwise we generate
|
||||||
|
// significantly more code from setting up the opnds of the truncate/zx
|
||||||
|
else if (InstrCheck_rlx_only_needs_low(rotation, m)) {
|
||||||
|
// this path is taken for like 90% of all rlwinms
|
||||||
|
v = f.And(f.Shl(v, rotation), f.LoadConstantUint64(0xFFFFFFFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
// (x||x)
|
||||||
|
// cs: changed this to mask with UINT32_MAX instead of doing the
|
||||||
|
// truncate/extend, this generates better code in the backend and is easier
|
||||||
|
// to do analysis on
|
||||||
|
v = f.And(v, f.LoadConstantUint64(0xFFFFFFFF));
|
||||||
|
|
||||||
|
v = f.Or(f.Shl(v, 32), v);
|
||||||
|
|
||||||
|
// TODO(benvanik): optimize srwi
|
||||||
|
// TODO(benvanik): optimize slwi
|
||||||
|
// The compiler will generate a bunch of these for the special case of SH=0.
|
||||||
|
// Which seems to just select some bits and set cr0 for use with a branch.
|
||||||
|
// We can detect this and do less work.
|
||||||
|
if (i.M.SH) {
|
||||||
|
v = f.RotateLeft(v, f.LoadConstantInt8(rotation));
|
||||||
|
}
|
||||||
|
// Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here
|
||||||
|
// as our truncation/zero-extend does it for us.
|
||||||
|
if (m != 0xFFFFFFFFFFFFFFFFull) {
|
||||||
|
v = f.And(v, f.LoadConstantUint64(m));
|
||||||
|
}
|
||||||
|
}
|
||||||
f.StoreGPR(i.M.RA, v);
|
f.StoreGPR(i.M.RA, v);
|
||||||
if (i.M.Rc) {
|
if (i.M.Rc) {
|
||||||
f.UpdateCR(0, v);
|
f.UpdateCR(0, v);
|
||||||
|
|
Loading…
Reference in New Issue