Merge branch 'chris_cpu_changes' of https://github.com/Gliniak/xenia.git into canary_experimental
This commit is contained in:
commit
0b183a3582
|
@ -818,6 +818,12 @@ void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
|
|||
// 1111...
|
||||
vpcmpeqb(dest, dest);
|
||||
} else {
|
||||
for (size_t i = 0; i < (kConstDataSize / sizeof(vec128_t)); ++i) {
|
||||
if (xmm_consts[i] == v) {
|
||||
vmovapd(dest, GetXmmConstPtr((XmmConst)i));
|
||||
return;
|
||||
}
|
||||
}
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
MovMem64(rsp + kStashOffset, v.low);
|
||||
|
@ -833,11 +839,19 @@ void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, float v) {
|
|||
} x = {v};
|
||||
if (!x.i) {
|
||||
// +0.0f (but not -0.0f because it may be used to flip the sign via xor).
|
||||
vpxor(dest, dest);
|
||||
vxorps(dest, dest);
|
||||
} else if (x.i == ~uint32_t(0)) {
|
||||
// 1111...
|
||||
vpcmpeqb(dest, dest);
|
||||
vcmpeqss(dest, dest);
|
||||
} else {
|
||||
unsigned raw_bits = *reinterpret_cast<unsigned*>(&v);
|
||||
|
||||
for (size_t i = 0; i < (kConstDataSize / sizeof(vec128_t)); ++i) {
|
||||
if (xmm_consts[i].u32[0] == raw_bits) {
|
||||
vmovss(dest, GetXmmConstPtr((XmmConst)i));
|
||||
return;
|
||||
}
|
||||
}
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
mov(eax, x.i);
|
||||
|
@ -852,11 +866,19 @@ void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, double v) {
|
|||
} x = {v};
|
||||
if (!x.i) {
|
||||
// +0.0 (but not -0.0 because it may be used to flip the sign via xor).
|
||||
vpxor(dest, dest);
|
||||
vxorpd(dest, dest);
|
||||
} else if (x.i == ~uint64_t(0)) {
|
||||
// 1111...
|
||||
vpcmpeqb(dest, dest);
|
||||
vcmpeqpd(dest, dest);
|
||||
} else {
|
||||
uint64_t raw_bits = *reinterpret_cast<uint64_t*>(&v);
|
||||
|
||||
for (size_t i = 0; i < (kConstDataSize / sizeof(vec128_t)); ++i) {
|
||||
if (xmm_consts[i].u64[0] == raw_bits) {
|
||||
vmovsd(dest, GetXmmConstPtr((XmmConst)i));
|
||||
return;
|
||||
}
|
||||
}
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
mov(rax, x.i);
|
||||
|
|
|
@ -20,7 +20,59 @@ namespace backend {
|
|||
namespace x64 {
|
||||
|
||||
volatile int anchor_control = 0;
|
||||
|
||||
template <typename T>
|
||||
static void EmitFusedBranch(X64Emitter& e, const T& i) {
|
||||
bool valid = i.instr->prev && i.instr->prev->dest == i.src1.value;
|
||||
auto opcode = valid ? i.instr->prev->opcode->num : -1;
|
||||
if (valid) {
|
||||
auto name = i.src2.value->name;
|
||||
switch (opcode) {
|
||||
case OPCODE_IS_TRUE:
|
||||
e.jnz(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_IS_FALSE:
|
||||
e.jz(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_EQ:
|
||||
e.je(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_NE:
|
||||
e.jne(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_SLT:
|
||||
e.jl(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_SLE:
|
||||
e.jle(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_SGT:
|
||||
e.jg(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_SGE:
|
||||
e.jge(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_ULT:
|
||||
e.jb(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_ULE:
|
||||
e.jbe(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_UGT:
|
||||
e.ja(name, e.T_NEAR);
|
||||
break;
|
||||
case OPCODE_COMPARE_UGE:
|
||||
e.jae(name, e.T_NEAR);
|
||||
break;
|
||||
default:
|
||||
e.test(i.src1, i.src1);
|
||||
e.jnz(name, e.T_NEAR);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
e.test(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
}
|
||||
}
|
||||
// ============================================================================
|
||||
// OPCODE_DEBUG_BREAK
|
||||
// ============================================================================
|
||||
|
@ -450,43 +502,57 @@ EMITTER_OPCODE_TABLE(OPCODE_BRANCH, BRANCH);
|
|||
struct BRANCH_TRUE_I8
|
||||
: Sequence<BRANCH_TRUE_I8, I<OPCODE_BRANCH_TRUE, VoidOp, I8Op, LabelOp>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.test(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
EmitFusedBranch(e, i);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_I16
|
||||
: Sequence<BRANCH_TRUE_I16, I<OPCODE_BRANCH_TRUE, VoidOp, I16Op, LabelOp>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.test(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
EmitFusedBranch(e, i);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_I32
|
||||
: Sequence<BRANCH_TRUE_I32, I<OPCODE_BRANCH_TRUE, VoidOp, I32Op, LabelOp>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.test(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
EmitFusedBranch(e, i);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_I64
|
||||
: Sequence<BRANCH_TRUE_I64, I<OPCODE_BRANCH_TRUE, VoidOp, I64Op, LabelOp>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.test(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
EmitFusedBranch(e, i);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_F32
|
||||
: Sequence<BRANCH_TRUE_F32, I<OPCODE_BRANCH_TRUE, VoidOp, F32Op, LabelOp>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vptest(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
if (i.instr->prev && i.instr->prev->opcode == &OPCODE_IS_TRUE_info &&
|
||||
i.instr->prev->dest == i.src1.value) {
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
} else if (i.instr->prev &&
|
||||
i.instr->prev->opcode == &OPCODE_IS_FALSE_info &&
|
||||
i.instr->prev->dest == i.src1.value) {
|
||||
e.jz(i.src2.value->name, e.T_NEAR);
|
||||
} else {
|
||||
e.vptest(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
}
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_F64
|
||||
: Sequence<BRANCH_TRUE_F64, I<OPCODE_BRANCH_TRUE, VoidOp, F64Op, LabelOp>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.vptest(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
if (i.instr->prev && i.instr->prev->opcode == &OPCODE_IS_TRUE_info &&
|
||||
i.instr->prev->dest == i.src1.value) {
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
} else if (i.instr->prev &&
|
||||
i.instr->prev->opcode == &OPCODE_IS_FALSE_info &&
|
||||
i.instr->prev->dest == i.src1.value) {
|
||||
e.jz(i.src2.value->name, e.T_NEAR);
|
||||
} else {
|
||||
e.vptest(i.src1, i.src1);
|
||||
e.jnz(i.src2.value->name, e.T_NEAR);
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_TRUE, BRANCH_TRUE_I8, BRANCH_TRUE_I16,
|
||||
|
|
|
@ -52,9 +52,9 @@ RegExp ComputeMemoryAddressOffset(X64Emitter& e, const T& guest,
|
|||
if (xe::memory::allocation_granularity() > 0x1000) {
|
||||
// Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
|
||||
// it via memory mapping.
|
||||
e.xor_(e.eax, e.eax);
|
||||
e.cmp(guest.reg().cvt32(), 0xE0000000 - offset_const);
|
||||
e.setae(e.al);
|
||||
e.movzx(e.eax, e.al);
|
||||
e.shl(e.eax, 12);
|
||||
e.add(e.eax, guest.reg().cvt32());
|
||||
} else {
|
||||
|
@ -89,9 +89,9 @@ RegExp ComputeMemoryAddress(X64Emitter& e, const T& guest) {
|
|||
if (xe::memory::allocation_granularity() > 0x1000) {
|
||||
// Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
|
||||
// it via memory mapping.
|
||||
e.xor_(e.eax, e.eax);
|
||||
e.cmp(guest.reg().cvt32(), 0xE0000000);
|
||||
e.setae(e.al);
|
||||
e.movzx(e.eax, e.al);
|
||||
e.shl(e.eax, 12);
|
||||
e.add(e.eax, guest.reg().cvt32());
|
||||
} else {
|
||||
|
|
|
@ -175,7 +175,7 @@ struct ZERO_EXTEND_I32_I8
|
|||
struct ZERO_EXTEND_I64_I8
|
||||
: Sequence<ZERO_EXTEND_I64_I8, I<OPCODE_ZERO_EXTEND, I64Op, I8Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.movzx(i.dest, i.src1);
|
||||
e.movzx(i.dest.reg().cvt32(), i.src1);
|
||||
}
|
||||
};
|
||||
struct ZERO_EXTEND_I32_I16
|
||||
|
@ -187,7 +187,7 @@ struct ZERO_EXTEND_I32_I16
|
|||
struct ZERO_EXTEND_I64_I16
|
||||
: Sequence<ZERO_EXTEND_I64_I16, I<OPCODE_ZERO_EXTEND, I64Op, I16Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.movzx(i.dest, i.src1);
|
||||
e.movzx(i.dest.reg().cvt32(), i.src1);
|
||||
}
|
||||
};
|
||||
struct ZERO_EXTEND_I64_I32
|
||||
|
@ -317,31 +317,20 @@ struct CONVERT_I32_F64
|
|||
struct CONVERT_I64_F64
|
||||
: Sequence<CONVERT_I64_F64, I<OPCODE_CONVERT, I64Op, F64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// Copy src1.
|
||||
e.movq(e.rcx, i.src1);
|
||||
e.xor_(e.eax, e.eax);
|
||||
|
||||
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
||||
e.vcomisd(i.src1, e.GetXmmConstPtr(XmmConst::XMMZero));
|
||||
if (i.instr->flags == ROUND_TO_ZERO) {
|
||||
e.vcvttsd2si(i.dest, i.src1);
|
||||
} else {
|
||||
e.vcvtsd2si(i.dest, i.src1);
|
||||
}
|
||||
|
||||
// 0x8000000000000000
|
||||
e.mov(e.rax, 0x1);
|
||||
e.shl(e.rax, 63);
|
||||
|
||||
// Saturate positive overflow
|
||||
// TODO(DrChat): Find a shorter equivalent sequence.
|
||||
// if (result ind. && src1 >= 0)
|
||||
// result = 0x7FFFFFFFFFFFFFFF;
|
||||
e.cmp(e.rax, i.dest);
|
||||
e.sete(e.al);
|
||||
e.movzx(e.rax, e.al);
|
||||
e.shr(e.rcx, 63);
|
||||
e.xor_(e.rcx, 0x01);
|
||||
e.and_(e.rax, e.rcx);
|
||||
|
||||
// cf set if less than
|
||||
e.setnc(e.cl);
|
||||
e.cmp(i.dest, -1LL);
|
||||
// if dest == 0x80000000 and not inp < 0 then dest = 0x7FFFFFFF
|
||||
e.seto(e.al);
|
||||
e.and_(e.al, e.cl);
|
||||
e.sub(i.dest, e.rax);
|
||||
}
|
||||
};
|
||||
|
@ -1220,14 +1209,7 @@ void EmitAddCarryXX(X64Emitter& e, const ARGS& i) {
|
|||
e.clc();
|
||||
}
|
||||
} else {
|
||||
if (i.src3.reg().getIdx() <= 4) {
|
||||
// Can move from A/B/C/DX to AH.
|
||||
e.mov(e.ah, i.src3.reg().cvt8());
|
||||
} else {
|
||||
e.mov(e.al, i.src3);
|
||||
e.mov(e.ah, e.al);
|
||||
}
|
||||
e.sahf();
|
||||
e.bt(i.src3.reg().cvt32(), 0);
|
||||
}
|
||||
SEQ::EmitCommutativeBinaryOp(
|
||||
e, i,
|
||||
|
@ -1337,6 +1319,18 @@ EMITTER_OPCODE_TABLE(OPCODE_SUB, SUB_I8, SUB_I16, SUB_I32, SUB_I64, SUB_F32,
|
|||
// We exploit mulx here to avoid creating too much register pressure.
|
||||
struct MUL_I8 : Sequence<MUL_I8, I<OPCODE_MUL, I8Op, I8Op, I8Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src1.is_constant || i.src2.is_constant) {
|
||||
uint64_t cval =
|
||||
i.src1.is_constant ? i.src1.constant() : i.src2.constant();
|
||||
|
||||
if (cval < (1ull << 32)) {
|
||||
auto& whichevs = i.src1.is_constant ? i.src2 : i.src1;
|
||||
|
||||
e.imul(i.dest, whichevs, (int)cval);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// mulx: $1:$2 = EDX * $3
|
||||
|
||||
|
@ -1378,6 +1372,18 @@ struct MUL_I8 : Sequence<MUL_I8, I<OPCODE_MUL, I8Op, I8Op, I8Op>> {
|
|||
};
|
||||
struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src1.is_constant || i.src2.is_constant) {
|
||||
uint64_t cval =
|
||||
i.src1.is_constant ? i.src1.constant() : i.src2.constant();
|
||||
|
||||
if (cval < (1ull << 32)) {
|
||||
auto& whichevs = i.src1.is_constant ? i.src2 : i.src1;
|
||||
|
||||
e.imul(i.dest, whichevs, (int)cval);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// mulx: $1:$2 = EDX * $3
|
||||
|
||||
|
@ -1419,6 +1425,26 @@ struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
|
|||
};
|
||||
struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src2.is_constant) {
|
||||
uint32_t multiplier = i.src2.value->constant.u32;
|
||||
if (multiplier == 3 || multiplier == 5 || multiplier == 9) {
|
||||
e.lea(i.dest, e.ptr[i.src1.reg() * (multiplier - 1) + i.src1.reg()]);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (i.src1.is_constant || i.src2.is_constant) {
|
||||
uint64_t cval =
|
||||
i.src1.is_constant ? i.src1.constant() : i.src2.constant();
|
||||
|
||||
if (cval < (1ull << 32)) {
|
||||
auto& whichevs = i.src1.is_constant ? i.src2 : i.src1;
|
||||
|
||||
e.imul(i.dest, whichevs, (int)cval);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// mulx: $1:$2 = EDX * $3
|
||||
|
||||
|
@ -1461,6 +1487,27 @@ struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
|
|||
};
|
||||
struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src2.is_constant) {
|
||||
uint64_t multiplier = i.src2.value->constant.u64;
|
||||
if (multiplier == 3 || multiplier == 5 || multiplier == 9) {
|
||||
e.lea(i.dest,
|
||||
e.ptr[i.src1.reg() * ((int)multiplier - 1) + i.src1.reg()]);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (i.src1.is_constant || i.src2.is_constant) {
|
||||
uint64_t cval =
|
||||
i.src1.is_constant ? i.src1.constant() : i.src2.constant();
|
||||
|
||||
if (cval < (1ull << 32)) {
|
||||
auto& whichevs = i.src1.is_constant ? i.src2 : i.src1;
|
||||
|
||||
e.imul(i.dest, whichevs, (int)cval);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
||||
// mulx: $1:$2 = RDX * $3
|
||||
|
||||
|
@ -2628,6 +2675,34 @@ void EmitAndXX(X64Emitter& e, const ARGS& i) {
|
|||
e.and_(dest_src, src);
|
||||
},
|
||||
[](X64Emitter& e, const REG& dest_src, int32_t constant) {
|
||||
if (constant == 0xFF) {
|
||||
if (dest_src.getBit() == 16 || dest_src.getBit() == 32) {
|
||||
e.movzx(dest_src, dest_src.cvt8());
|
||||
return;
|
||||
} else if (dest_src.getBit() == 64) {
|
||||
// take advantage of automatic zeroing of upper 32 bits
|
||||
e.movzx(dest_src.cvt32(), dest_src.cvt8());
|
||||
return;
|
||||
}
|
||||
} else if (constant == 0xFFFF) {
|
||||
if (dest_src.getBit() == 32) {
|
||||
e.movzx(dest_src, dest_src.cvt16());
|
||||
return;
|
||||
} else if (dest_src.getBit() == 64) {
|
||||
e.movzx(dest_src.cvt32(), dest_src.cvt16());
|
||||
return;
|
||||
}
|
||||
} else if (constant == -1) {
|
||||
if (dest_src.getBit() == 64) {
|
||||
// todo: verify that mov eax, eax will properly zero upper 64 bits
|
||||
}
|
||||
} else if (dest_src.getBit() == 64 && constant > 0) {
|
||||
// do 32 bit and, not the full 64, because the upper 32 of the mask
|
||||
// are zero and the 32 bit op will auto clear the top, save space on
|
||||
// the immediate and avoid a rex prefix
|
||||
e.and_(dest_src.cvt32(), constant);
|
||||
return;
|
||||
}
|
||||
e.and_(dest_src, constant);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -720,7 +720,14 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
result = true;
|
||||
}
|
||||
break;
|
||||
// TODO(benvanik): ROTATE_LEFT
|
||||
case OPCODE_ROTATE_LEFT:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->RotateLeft(i->src2.value);
|
||||
i->Remove();
|
||||
result = true;
|
||||
}
|
||||
break;
|
||||
case OPCODE_BYTE_SWAP:
|
||||
if (i->src1.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
|
|
|
@ -820,6 +820,29 @@ void Value::Sha(Value* other) {
|
|||
}
|
||||
}
|
||||
|
||||
void Value::RotateLeft(Value* other) {
|
||||
assert_true(other->type == INT8_TYPE);
|
||||
auto rotation = other->constant.u8;
|
||||
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
constant.u8 = rotate_left<uint8_t>(constant.u8, rotation);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
constant.u16 = rotate_left<uint16_t>(constant.u16, rotation);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
constant.u32 = rotate_left<uint32_t>(constant.u32, rotation);
|
||||
break;
|
||||
case INT64_TYPE:
|
||||
constant.u64 = rotate_left<uint64_t>(constant.u64, rotation);
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Value::Extract(Value* vec, Value* index) {
|
||||
assert_true(vec->type == VEC128_TYPE);
|
||||
switch (type) {
|
||||
|
|
|
@ -520,6 +520,7 @@ class Value {
|
|||
void Shl(Value* other);
|
||||
void Shr(Value* other);
|
||||
void Sha(Value* other);
|
||||
void RotateLeft(Value* other);
|
||||
void Extract(Value* vec, Value* index);
|
||||
void Select(Value* other, Value* ctrl);
|
||||
void Splat(Value* other);
|
||||
|
|
Loading…
Reference in New Issue