Re-enabling x64 backend, fixing many bugs.
This commit is contained in:
parent
4a584129d2
commit
74c9df6697
|
@ -11,6 +11,6 @@
|
||||||
|
|
||||||
'includes': [
|
'includes': [
|
||||||
'ivm/sources.gypi',
|
'ivm/sources.gypi',
|
||||||
#'x64/sources.gypi',
|
'x64/sources.gypi',
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
|
@ -2500,7 +2500,7 @@ table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) {
|
||||||
} else if (IsVecType(i->dest->type)) {
|
} else if (IsVecType(i->dest->type)) {
|
||||||
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||||
// dest_src ^= 0xFFFF...
|
// dest_src ^= 0xFFFF...
|
||||||
if (dest != src) {
|
if (dest.getIdx() != src.getIdx()) {
|
||||||
e.movaps(dest, src);
|
e.movaps(dest, src);
|
||||||
}
|
}
|
||||||
e.mov(e.rax, XMMCONSTBASE);
|
e.mov(e.rax, XMMCONSTBASE);
|
||||||
|
@ -2697,7 +2697,7 @@ table->AddSequence(OPCODE_BYTE_SWAP, [](X64Emitter& e, Instr*& i) {
|
||||||
Reg32 dest, src1;
|
Reg32 dest, src1;
|
||||||
e.BeginOp(i->dest, dest, REG_DEST,
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
i->src1.value, src1, 0);
|
i->src1.value, src1, 0);
|
||||||
if (dest != src1) {
|
if (dest.getIdx() != src1.getIdx()) {
|
||||||
e.mov(dest, src1);
|
e.mov(dest, src1);
|
||||||
e.bswap(dest);
|
e.bswap(dest);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2708,7 +2708,7 @@ table->AddSequence(OPCODE_BYTE_SWAP, [](X64Emitter& e, Instr*& i) {
|
||||||
Reg64 dest, src1;
|
Reg64 dest, src1;
|
||||||
e.BeginOp(i->dest, dest, REG_DEST,
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
i->src1.value, src1, 0);
|
i->src1.value, src1, 0);
|
||||||
if (dest != src1) {
|
if (dest.getIdx() != src1.getIdx()) {
|
||||||
e.mov(dest, src1);
|
e.mov(dest, src1);
|
||||||
e.bswap(dest);
|
e.bswap(dest);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2972,7 +2972,7 @@ table->AddSequence(OPCODE_PERMUTE, [](X64Emitter& e, Instr*& i) {
|
||||||
(((control >> 18) & 0x1) << 1) |
|
(((control >> 18) & 0x1) << 1) |
|
||||||
(((control >> 10) & 0x1) << 2) |
|
(((control >> 10) & 0x1) << 2) |
|
||||||
(((control >> 2) & 0x1) << 3);
|
(((control >> 2) & 0x1) << 3);
|
||||||
if (dest != src3) {
|
if (dest.getIdx() != src3.getIdx()) {
|
||||||
e.pshufd(dest, src2, src_control);
|
e.pshufd(dest, src2, src_control);
|
||||||
e.pshufd(e.xmm0, src3, src_control);
|
e.pshufd(e.xmm0, src3, src_control);
|
||||||
e.blendps(dest, e.xmm0, blend_control);
|
e.blendps(dest, e.xmm0, blend_control);
|
||||||
|
|
|
@ -311,9 +311,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
|
||||||
if (op == VECTOR_CMP_EQ) {
|
if (op == VECTOR_CMP_EQ) {
|
||||||
// Commutative, so simple.
|
// Commutative, so simple.
|
||||||
Xmm real_src;
|
Xmm real_src;
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
real_src = src2;
|
real_src = src2;
|
||||||
} else if (dest == src2) {
|
} else if (dest.getIdx() == src2.getIdx()) {
|
||||||
real_src = src1;
|
real_src = src1;
|
||||||
} else {
|
} else {
|
||||||
e.movaps(dest, src1);
|
e.movaps(dest, src1);
|
||||||
|
@ -334,9 +334,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
|
||||||
// Float GT/GE must be emulated.
|
// Float GT/GE must be emulated.
|
||||||
if (op == VECTOR_CMP_GT) {
|
if (op == VECTOR_CMP_GT) {
|
||||||
// Have to swap: src2 < src1.
|
// Have to swap: src2 < src1.
|
||||||
if (dest == src2) {
|
if (dest.getIdx() == src2.getIdx()) {
|
||||||
e.cmpltps(dest, src1);
|
e.cmpltps(dest, src1);
|
||||||
} else if (dest == src1) {
|
} else if (dest.getIdx() == src1.getIdx()) {
|
||||||
e.movaps(e.xmm0, src1);
|
e.movaps(e.xmm0, src1);
|
||||||
e.movaps(dest, src2);
|
e.movaps(dest, src2);
|
||||||
e.cmpltps(dest, e.xmm0);
|
e.cmpltps(dest, e.xmm0);
|
||||||
|
@ -346,9 +346,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
|
||||||
}
|
}
|
||||||
} else if (op == VECTOR_CMP_GE) {
|
} else if (op == VECTOR_CMP_GE) {
|
||||||
// Have to swap: src2 <= src1.
|
// Have to swap: src2 <= src1.
|
||||||
if (dest == src2) {
|
if (dest.getIdx() == src2.getIdx()) {
|
||||||
e.cmpleps(dest, src1);
|
e.cmpleps(dest, src1);
|
||||||
} else if (dest == src1) {
|
} else if (dest.getIdx() == src1.getIdx()) {
|
||||||
e.movaps(e.xmm0, src1);
|
e.movaps(e.xmm0, src1);
|
||||||
e.movaps(dest, src2);
|
e.movaps(dest, src2);
|
||||||
e.cmpleps(dest, e.xmm0);
|
e.cmpleps(dest, e.xmm0);
|
||||||
|
@ -362,9 +362,9 @@ void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_sign
|
||||||
} else {
|
} else {
|
||||||
// Integer types are easier.
|
// Integer types are easier.
|
||||||
Xmm real_src;
|
Xmm real_src;
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
real_src = src2;
|
real_src = src2;
|
||||||
} else if (dest == src2) {
|
} else if (dest.getIdx() == src2.getIdx()) {
|
||||||
e.movaps(e.xmm0, src2);
|
e.movaps(e.xmm0, src2);
|
||||||
e.movaps(dest, src1);
|
e.movaps(dest, src1);
|
||||||
real_src = e.xmm0;
|
real_src = e.xmm0;
|
||||||
|
@ -429,7 +429,7 @@ void IntUnaryOpV(X64Emitter& e, Instr*& i, v_fn v_fn,
|
||||||
T& dest, T& src1) {
|
T& dest, T& src1) {
|
||||||
e.BeginOp(i->dest, dest, REG_DEST,
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
i->src1.value, src1, 0);
|
i->src1.value, src1, 0);
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
v_fn(e, *i, dest);
|
v_fn(e, *i, dest);
|
||||||
} else {
|
} else {
|
||||||
e.mov(dest, src1);
|
e.mov(dest, src1);
|
||||||
|
@ -486,9 +486,9 @@ void IntBinaryOpVV(X64Emitter& e, Instr*& i, vv_fn vv_fn,
|
||||||
e.BeginOp(i->dest, dest, REG_DEST,
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
i->src1.value, src1, 0,
|
i->src1.value, src1, 0,
|
||||||
i->src2.value, src2, 0);
|
i->src2.value, src2, 0);
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
vv_fn(e, *i, dest, src2);
|
vv_fn(e, *i, dest, src2);
|
||||||
} else if (dest == src2) {
|
} else if (dest.getIdx() == src2.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
vv_fn(e, *i, dest, src1);
|
vv_fn(e, *i, dest, src1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -511,7 +511,7 @@ void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
|
||||||
i->src1.value, src1, 0);
|
i->src1.value, src1, 0);
|
||||||
if (dest.getBit() <= 32) {
|
if (dest.getBit() <= 32) {
|
||||||
// 32-bit.
|
// 32-bit.
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
vc_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()));
|
vc_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()));
|
||||||
} else {
|
} else {
|
||||||
e.mov(dest, src1);
|
e.mov(dest, src1);
|
||||||
|
@ -519,7 +519,7 @@ void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 64-bit.
|
// 64-bit.
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
e.mov(TEMP_REG, src2->constant.i64);
|
e.mov(TEMP_REG, src2->constant.i64);
|
||||||
vv_fn(e, *i, dest, TEMP_REG);
|
vv_fn(e, *i, dest, TEMP_REG);
|
||||||
} else {
|
} else {
|
||||||
|
@ -537,7 +537,7 @@ void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
|
||||||
i->src2.value, src2, 0);
|
i->src2.value, src2, 0);
|
||||||
if (dest.getBit() <= 32) {
|
if (dest.getBit() <= 32) {
|
||||||
// 32-bit.
|
// 32-bit.
|
||||||
if (dest == src2) {
|
if (dest.getIdx() == src2.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
vc_fn(e, *i, dest, (uint32_t)src1->get_constant(CT()));
|
vc_fn(e, *i, dest, (uint32_t)src1->get_constant(CT()));
|
||||||
} else {
|
} else {
|
||||||
|
@ -559,7 +559,7 @@ void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 64-bit.
|
// 64-bit.
|
||||||
if (dest == src2) {
|
if (dest.getIdx() == src2.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
e.mov(TEMP_REG, src1->constant.i64);
|
e.mov(TEMP_REG, src1->constant.i64);
|
||||||
vv_fn(e, *i, dest, TEMP_REG);
|
vv_fn(e, *i, dest, TEMP_REG);
|
||||||
|
@ -669,14 +669,19 @@ void IntTernaryOpVVV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn,
|
||||||
i->src1.value, src1, 0,
|
i->src1.value, src1, 0,
|
||||||
i->src2.value, src2, 0,
|
i->src2.value, src2, 0,
|
||||||
i->src3.value, src3, 0);
|
i->src3.value, src3, 0);
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
vvv_fn(e, *i, dest, src2, src3);
|
vvv_fn(e, *i, dest, src2, src3);
|
||||||
} else if (dest == src2) {
|
} else if (dest.getIdx() == src2.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
vvv_fn(e, *i, dest, src1, src3);
|
vvv_fn(e, *i, dest, src1, src3);
|
||||||
} else {
|
} else {
|
||||||
UNIMPLEMENTED_SEQ();
|
UNIMPLEMENTED_SEQ();
|
||||||
}
|
}
|
||||||
|
} else if (dest.getIdx() == src3.getIdx()) {
|
||||||
|
auto Ntx = TEMP_LIKE(src3);
|
||||||
|
e.mov(Ntx, src3);
|
||||||
|
e.mov(dest, src1);
|
||||||
|
vvv_fn(e, *i, dest, src2, Ntx);
|
||||||
} else {
|
} else {
|
||||||
e.mov(dest, src1);
|
e.mov(dest, src1);
|
||||||
vvv_fn(e, *i, dest, src2, src3);
|
vvv_fn(e, *i, dest, src2, src3);
|
||||||
|
@ -691,7 +696,7 @@ void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn,
|
||||||
i->src2.value, src2, 0);
|
i->src2.value, src2, 0);
|
||||||
if (dest.getBit() <= 32) {
|
if (dest.getBit() <= 32) {
|
||||||
// 32-bit.
|
// 32-bit.
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
vvc_fn(e, *i, dest, src2, (uint32_t)src3->get_constant(CT()));
|
vvc_fn(e, *i, dest, src2, (uint32_t)src3->get_constant(CT()));
|
||||||
} else if (dest == src2) {
|
} else if (dest == src2) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
|
@ -709,10 +714,10 @@ void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 64-bit.
|
// 64-bit.
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
e.mov(TEMP_REG, src3->constant.i64);
|
e.mov(TEMP_REG, src3->constant.i64);
|
||||||
vvv_fn(e, *i, dest, src2, TEMP_REG);
|
vvv_fn(e, *i, dest, src2, TEMP_REG);
|
||||||
} else if (dest == src2) {
|
} else if (dest.getIdx() == src2.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
e.mov(TEMP_REG, src3->constant.i64);
|
e.mov(TEMP_REG, src3->constant.i64);
|
||||||
vvv_fn(e, *i, dest, src1, TEMP_REG);
|
vvv_fn(e, *i, dest, src1, TEMP_REG);
|
||||||
|
@ -740,9 +745,9 @@ void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn,
|
||||||
i->src3.value, src3, 0);
|
i->src3.value, src3, 0);
|
||||||
if (dest.getBit() <= 32) {
|
if (dest.getBit() <= 32) {
|
||||||
// 32-bit.
|
// 32-bit.
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src3);
|
vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src3);
|
||||||
} else if (dest == src3) {
|
} else if (dest.getIdx() == src3.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src1);
|
vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -758,10 +763,10 @@ void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 64-bit.
|
// 64-bit.
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
e.mov(TEMP_REG, src2->constant.i64);
|
e.mov(TEMP_REG, src2->constant.i64);
|
||||||
vvv_fn(e, *i, dest, TEMP_REG, src3);
|
vvv_fn(e, *i, dest, TEMP_REG, src3);
|
||||||
} else if (dest == src3) {
|
} else if (dest.getIdx() == src3.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
e.mov(TEMP_REG, src2->constant.i64);
|
e.mov(TEMP_REG, src2->constant.i64);
|
||||||
vvv_fn(e, *i, dest, src1, TEMP_REG);
|
vvv_fn(e, *i, dest, src1, TEMP_REG);
|
||||||
|
@ -817,16 +822,20 @@ void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn
|
||||||
IntTernaryOpVVC<int8_t>(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value);
|
IntTernaryOpVVC<int8_t>(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value);
|
||||||
//
|
//
|
||||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8C, SIG_TYPE_I8)) {
|
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8C, SIG_TYPE_I8)) {
|
||||||
Reg8 dest, src1, src3;
|
Reg8 dest, src1;
|
||||||
|
Reg8 src3;
|
||||||
IntTernaryOpVCV<int8_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
IntTernaryOpVCV<int8_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
||||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16C, SIG_TYPE_I8)) {
|
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16C, SIG_TYPE_I8)) {
|
||||||
Reg16 dest, src1, src3;
|
Reg16 dest, src1;
|
||||||
|
Reg8 src3;
|
||||||
IntTernaryOpVCV<int16_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
IntTernaryOpVCV<int16_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
||||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32C, SIG_TYPE_I8)) {
|
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32C, SIG_TYPE_I8)) {
|
||||||
Reg32 dest, src1, src3;
|
Reg32 dest, src1;
|
||||||
|
Reg8 src3;
|
||||||
IntTernaryOpVCV<int32_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
IntTernaryOpVCV<int32_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
||||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64C, SIG_TYPE_I8)) {
|
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64C, SIG_TYPE_I8)) {
|
||||||
Reg64 dest, src1, src3;
|
Reg64 dest, src1;
|
||||||
|
Reg8 src3;
|
||||||
IntTernaryOpVCV<int64_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
IntTernaryOpVCV<int64_t>(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3);
|
||||||
} else {
|
} else {
|
||||||
ASSERT_INVALID_TYPE();
|
ASSERT_INVALID_TYPE();
|
||||||
|
@ -856,7 +865,7 @@ void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn,
|
||||||
e.mov(e.rax, (uint64_t)src1->constant.i64);
|
e.mov(e.rax, (uint64_t)src1->constant.i64);
|
||||||
e.movq(dest, e.rax);
|
e.movq(dest, e.rax);
|
||||||
} else {
|
} else {
|
||||||
UNIMPLEMENTED_SEQ();
|
LoadXmmConstant(e, dest, src1->constant.v128);
|
||||||
}
|
}
|
||||||
v_fn(e, *i, dest, dest);
|
v_fn(e, *i, dest, dest);
|
||||||
e.EndOp(dest);
|
e.EndOp(dest);
|
||||||
|
@ -901,9 +910,9 @@ void XmmBinaryOpVV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
|
||||||
e.BeginOp(i->dest, dest, REG_DEST,
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
i->src1.value, src1, 0,
|
i->src1.value, src1, 0,
|
||||||
i->src2.value, src2, 0);
|
i->src2.value, src2, 0);
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
vv_fn(e, *i, dest, src2);
|
vv_fn(e, *i, dest, src2);
|
||||||
} else if (dest == src2) {
|
} else if (dest.getIdx() == src2.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
vv_fn(e, *i, dest, src1);
|
vv_fn(e, *i, dest, src1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -934,7 +943,7 @@ void XmmBinaryOpVC(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
|
||||||
}
|
}
|
||||||
vv_fn(e, *i, dest, src1);
|
vv_fn(e, *i, dest, src1);
|
||||||
} else {
|
} else {
|
||||||
if (dest != src1) {
|
if (dest.getIdx() != src1.getIdx()) {
|
||||||
e.movaps(dest, src1);
|
e.movaps(dest, src1);
|
||||||
}
|
}
|
||||||
if (src2->type == FLOAT32_TYPE) {
|
if (src2->type == FLOAT32_TYPE) {
|
||||||
|
@ -967,7 +976,7 @@ void XmmBinaryOpCV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
|
||||||
vv_fn(e, *i, dest, src2);
|
vv_fn(e, *i, dest, src2);
|
||||||
} else {
|
} else {
|
||||||
auto real_src2 = src2;
|
auto real_src2 = src2;
|
||||||
if (dest == src2) {
|
if (dest.getIdx() == src2.getIdx()) {
|
||||||
e.movaps(e.xmm0, src2);
|
e.movaps(e.xmm0, src2);
|
||||||
real_src2 = e.xmm0;
|
real_src2 = e.xmm0;
|
||||||
}
|
}
|
||||||
|
@ -1010,9 +1019,9 @@ void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn,
|
||||||
i->src1.value, src1, 0,
|
i->src1.value, src1, 0,
|
||||||
i->src2.value, src2, 0,
|
i->src2.value, src2, 0,
|
||||||
i->src3.value, src3, 0);
|
i->src3.value, src3, 0);
|
||||||
if (dest == src1) {
|
if (dest.getIdx() == src1.getIdx()) {
|
||||||
vvv_fn(e, *i, dest, src2, src3);
|
vvv_fn(e, *i, dest, src2, src3);
|
||||||
} else if (dest == src2) {
|
} else if (dest.getIdx() == src2.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
vvv_fn(e, *i, dest, src1, src3);
|
vvv_fn(e, *i, dest, src1, src3);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1021,7 +1030,7 @@ void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn,
|
||||||
vvv_fn(e, *i, e.xmm0, src2, src3);
|
vvv_fn(e, *i, e.xmm0, src2, src3);
|
||||||
e.movaps(dest, e.xmm0);
|
e.movaps(dest, e.xmm0);
|
||||||
}
|
}
|
||||||
} else if (dest == src3) {
|
} else if (dest.getIdx() == src3.getIdx()) {
|
||||||
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) {
|
||||||
vvv_fn(e, *i, dest, src1, src2);
|
vvv_fn(e, *i, dest, src1, src2);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -45,14 +45,14 @@ int X64Backend::Initialize() {
|
||||||
0,
|
0,
|
||||||
"gpr",
|
"gpr",
|
||||||
MachineInfo::RegisterSet::INT_TYPES,
|
MachineInfo::RegisterSet::INT_TYPES,
|
||||||
10,
|
X64Emitter::GPR_COUNT,
|
||||||
};
|
};
|
||||||
machine_info_.register_sets[1] = {
|
machine_info_.register_sets[1] = {
|
||||||
1,
|
1,
|
||||||
"xmm",
|
"xmm",
|
||||||
MachineInfo::RegisterSet::FLOAT_TYPES |
|
MachineInfo::RegisterSet::FLOAT_TYPES |
|
||||||
MachineInfo::RegisterSet::VEC_TYPES,
|
MachineInfo::RegisterSet::VEC_TYPES,
|
||||||
10,
|
X64Emitter::XMM_COUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
code_cache_ = new X64CodeCache();
|
code_cache_ = new X64CodeCache();
|
||||||
|
|
|
@ -36,6 +36,16 @@ static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024;
|
||||||
} // namespace alloy
|
} // namespace alloy
|
||||||
|
|
||||||
|
|
||||||
|
const uint32_t X64Emitter::gpr_reg_map_[X64Emitter::GPR_COUNT] = {
|
||||||
|
Operand::RBX,
|
||||||
|
Operand::R12, Operand::R13, Operand::R14, Operand::R15,
|
||||||
|
};
|
||||||
|
|
||||||
|
const uint32_t X64Emitter::xmm_reg_map_[X64Emitter::XMM_COUNT] = {
|
||||||
|
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
|
X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
|
||||||
runtime_(backend->runtime()),
|
runtime_(backend->runtime()),
|
||||||
backend_(backend),
|
backend_(backend),
|
||||||
|
@ -43,7 +53,6 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) :
|
||||||
allocator_(allocator),
|
allocator_(allocator),
|
||||||
current_instr_(0),
|
current_instr_(0),
|
||||||
CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) {
|
CodeGenerator(MAX_CODE_SIZE, AutoGrow, allocator) {
|
||||||
xe_zero_struct(®_state_, sizeof(reg_state_));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Emitter::~X64Emitter() {
|
X64Emitter::~X64Emitter() {
|
||||||
|
@ -99,28 +108,6 @@ void* X64Emitter::Emplace(size_t stack_size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
||||||
// These are the registers we will not be using. All others are fare game.
|
|
||||||
const uint32_t reserved_regs =
|
|
||||||
GetRegBit(rax) | // scratch
|
|
||||||
GetRegBit(rcx) | // arg
|
|
||||||
GetRegBit(rdx) | // arg/clobbered
|
|
||||||
GetRegBit(rsp) |
|
|
||||||
GetRegBit(rbp) |
|
|
||||||
GetRegBit(rsi) |
|
|
||||||
GetRegBit(rdi) |
|
|
||||||
GetRegBit(r8) | // arg/clobbered
|
|
||||||
GetRegBit(xmm0) | // scratch
|
|
||||||
GetRegBit(xmm1) | // sometimes used for scratch, could be fixed
|
|
||||||
|
|
||||||
// TODO(benvanik): save so that we can use these.
|
|
||||||
GetRegBit(r9) |
|
|
||||||
GetRegBit(r10) |
|
|
||||||
GetRegBit(r11) |
|
|
||||||
GetRegBit(xmm2) |
|
|
||||||
GetRegBit(xmm3) |
|
|
||||||
GetRegBit(xmm4) |
|
|
||||||
GetRegBit(xmm5);
|
|
||||||
|
|
||||||
// Calculate stack size. We need to align things to their natural sizes.
|
// Calculate stack size. We need to align things to their natural sizes.
|
||||||
// This could be much better (sort by type/etc).
|
// This could be much better (sort by type/etc).
|
||||||
auto locals = builder->locals();
|
auto locals = builder->locals();
|
||||||
|
@ -164,8 +151,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
||||||
|
|
||||||
auto lowering_table = backend_->lowering_table();
|
auto lowering_table = backend_->lowering_table();
|
||||||
|
|
||||||
reg_state_.active_regs = reg_state_.live_regs = reserved_regs;
|
|
||||||
|
|
||||||
// Body.
|
// Body.
|
||||||
auto block = builder->first_block();
|
auto block = builder->first_block();
|
||||||
while (block) {
|
while (block) {
|
||||||
|
@ -176,11 +161,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
||||||
label = label->next;
|
label = label->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset reg allocation state.
|
|
||||||
// If we start keeping regs across blocks this needs to change.
|
|
||||||
// We mark a few active so that the allocator doesn't use them.
|
|
||||||
ResetRegisters(reserved_regs);
|
|
||||||
|
|
||||||
// Add instructions.
|
// Add instructions.
|
||||||
// The table will process sequences of instructions to (try to)
|
// The table will process sequences of instructions to (try to)
|
||||||
// generate optimal code.
|
// generate optimal code.
|
||||||
|
@ -211,201 +191,6 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void X64Emitter::ResetRegisters(uint32_t reserved_regs) {
|
|
||||||
// Just need to reset the register for each live value.
|
|
||||||
uint32_t live_regs = reg_state_.live_regs;
|
|
||||||
for (size_t n = 0; n < 32; n++, live_regs >>= 1) {
|
|
||||||
if (live_regs & 0x1) {
|
|
||||||
auto v = reg_state_.reg_values[n];
|
|
||||||
if (v) {
|
|
||||||
v->reg.index = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
reg_state_.reg_values[n] = 0;
|
|
||||||
}
|
|
||||||
reg_state_.active_regs = reg_state_.live_regs = reserved_regs;
|
|
||||||
}
|
|
||||||
|
|
||||||
void X64Emitter::EvictStaleRegisters() {
|
|
||||||
// NOTE: if we are getting called it's because we *need* a register.
|
|
||||||
// We must get rid of something.
|
|
||||||
|
|
||||||
uint32_t current_ordinal = current_instr_ ?
|
|
||||||
current_instr_->ordinal : 0xFFFFFFFF;
|
|
||||||
|
|
||||||
// Remove any register with no more uses.
|
|
||||||
uint32_t new_live_regs = 0;
|
|
||||||
for (size_t n = 0; n < 32; n++) {
|
|
||||||
uint32_t bit = 1 << n;
|
|
||||||
if (bit & reg_state_.active_regs) {
|
|
||||||
// Register is active and cannot be freed.
|
|
||||||
new_live_regs |= bit;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!(bit & reg_state_.live_regs)) {
|
|
||||||
// Register is not alive - nothing to do.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register is live, not active. Check and see if we get rid of it.
|
|
||||||
auto v = reg_state_.reg_values[n];
|
|
||||||
if (!v->last_use ||
|
|
||||||
v->last_use->ordinal < current_ordinal) {
|
|
||||||
reg_state_.reg_values[n] = NULL;
|
|
||||||
v->reg = -1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register still in use.
|
|
||||||
new_live_regs |= bit;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hrm. We have spilled.
|
|
||||||
if (reg_state_.live_regs == new_live_regs) {
|
|
||||||
XEASSERTALWAYS();
|
|
||||||
}
|
|
||||||
|
|
||||||
reg_state_.live_regs = new_live_regs;
|
|
||||||
|
|
||||||
// Assert that live is a superset of active.
|
|
||||||
XEASSERTZERO((reg_state_.live_regs ^ reg_state_.active_regs) & reg_state_.active_regs);
|
|
||||||
}
|
|
||||||
|
|
||||||
void X64Emitter::FindFreeRegs(
|
|
||||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags) {
|
|
||||||
// If the value is already in a register, use it.
|
|
||||||
if (v0->reg != -1) {
|
|
||||||
// Already in a register. Mark active and return.
|
|
||||||
v0_idx = v0->reg;
|
|
||||||
reg_state_.active_regs |= 1 << v0_idx;
|
|
||||||
|
|
||||||
// Assert that live is a superset of active.
|
|
||||||
XEASSERTZERO((reg_state_.live_regs ^ reg_state_.active_regs) & reg_state_.active_regs);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t avail_regs = 0;
|
|
||||||
if (IsIntType(v0->type)) {
|
|
||||||
if (v0_flags & REG_ABCD) {
|
|
||||||
avail_regs = B00001111;
|
|
||||||
} else {
|
|
||||||
avail_regs = 0xFFFF;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
avail_regs = 0xFFFF0000;
|
|
||||||
}
|
|
||||||
uint32_t free_regs = avail_regs & ~reg_state_.live_regs;
|
|
||||||
if (!free_regs) {
|
|
||||||
// Need to evict something.
|
|
||||||
EvictStaleRegisters();
|
|
||||||
free_regs = avail_regs & ~reg_state_.live_regs;
|
|
||||||
XEASSERT(free_regs);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the first available.
|
|
||||||
// We start from the MSB so that we get the non-rNx regs that are often
|
|
||||||
// in short supply.
|
|
||||||
_BitScanReverse((DWORD*)&v0_idx, free_regs);
|
|
||||||
|
|
||||||
reg_state_.active_regs |= 1 << v0_idx;
|
|
||||||
reg_state_.live_regs |= 1 << v0_idx;
|
|
||||||
v0->reg = v0_idx;
|
|
||||||
reg_state_.reg_values[v0_idx] = v0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void X64Emitter::FindFreeRegs(
|
|
||||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
|
||||||
Value* v1, uint32_t& v1_idx, uint32_t v1_flags) {
|
|
||||||
// TODO(benvanik): support REG_DEST reuse/etc.
|
|
||||||
// Grab all already-present registers first.
|
|
||||||
// This way we won't spill them trying to get new registers.
|
|
||||||
bool need_v0 = v0->reg == -1;
|
|
||||||
bool need_v1 = v1->reg == -1;
|
|
||||||
if (!need_v0) {
|
|
||||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
|
||||||
}
|
|
||||||
if (!need_v1) {
|
|
||||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
|
||||||
}
|
|
||||||
// Grab any registers we still need. These calls may evict.
|
|
||||||
if (need_v0) {
|
|
||||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
|
||||||
}
|
|
||||||
if (need_v1) {
|
|
||||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void X64Emitter::FindFreeRegs(
|
|
||||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
|
||||||
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
|
||||||
Value* v2, uint32_t& v2_idx, uint32_t v2_flags) {
|
|
||||||
// TODO(benvanik): support REG_DEST reuse/etc.
|
|
||||||
// Grab all already-present registers first.
|
|
||||||
// This way we won't spill them trying to get new registers.
|
|
||||||
bool need_v0 = v0->reg == -1;
|
|
||||||
bool need_v1 = v1->reg == -1;
|
|
||||||
bool need_v2 = v2->reg == -1;
|
|
||||||
if (!need_v0) {
|
|
||||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
|
||||||
}
|
|
||||||
if (!need_v1) {
|
|
||||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
|
||||||
}
|
|
||||||
if (!need_v2) {
|
|
||||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
|
||||||
}
|
|
||||||
// Grab any registers we still need. These calls may evict.
|
|
||||||
if (need_v0) {
|
|
||||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
|
||||||
}
|
|
||||||
if (need_v1) {
|
|
||||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
|
||||||
}
|
|
||||||
if (need_v2) {
|
|
||||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void X64Emitter::FindFreeRegs(
|
|
||||||
Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
|
||||||
Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
|
||||||
Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
|
|
||||||
Value* v3, uint32_t& v3_idx, uint32_t v3_flags) {
|
|
||||||
// TODO(benvanik): support REG_DEST reuse/etc.
|
|
||||||
// Grab all already-present registers first.
|
|
||||||
// This way we won't spill them trying to get new registers.
|
|
||||||
bool need_v0 = v0->reg == -1;
|
|
||||||
bool need_v1 = v1->reg == -1;
|
|
||||||
bool need_v2 = v2->reg == -1;
|
|
||||||
bool need_v3 = v3->reg == -1;
|
|
||||||
if (!need_v0) {
|
|
||||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
|
||||||
}
|
|
||||||
if (!need_v1) {
|
|
||||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
|
||||||
}
|
|
||||||
if (!need_v2) {
|
|
||||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
|
||||||
}
|
|
||||||
if (!need_v3) {
|
|
||||||
FindFreeRegs(v3, v3_idx, v3_flags);
|
|
||||||
}
|
|
||||||
// Grab any registers we still need. These calls may evict.
|
|
||||||
if (need_v0) {
|
|
||||||
FindFreeRegs(v0, v0_idx, v0_flags);
|
|
||||||
}
|
|
||||||
if (need_v1) {
|
|
||||||
FindFreeRegs(v1, v1_idx, v1_flags);
|
|
||||||
}
|
|
||||||
if (need_v2) {
|
|
||||||
FindFreeRegs(v2, v2_idx, v2_flags);
|
|
||||||
}
|
|
||||||
if (need_v3) {
|
|
||||||
FindFreeRegs(v3, v3_idx, v3_flags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Instr* X64Emitter::Advance(Instr* i) {
|
Instr* X64Emitter::Advance(Instr* i) {
|
||||||
auto next = i->next;
|
auto next = i->next;
|
||||||
current_instr_ = next;
|
current_instr_ = next;
|
||||||
|
|
|
@ -56,90 +56,73 @@ public:
|
||||||
public:
|
public:
|
||||||
template<typename V0>
|
template<typename V0>
|
||||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) {
|
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) {
|
||||||
uint32_t v0_idx;
|
SetupReg(v0, r0);
|
||||||
FindFreeRegs(v0, v0_idx, r0_flags);
|
|
||||||
SetupReg(v0_idx, r0);
|
|
||||||
}
|
}
|
||||||
template<typename V0, typename V1>
|
template<typename V0, typename V1>
|
||||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||||
hir::Value* v1, V1& r1, uint32_t r1_flags) {
|
hir::Value* v1, V1& r1, uint32_t r1_flags) {
|
||||||
uint32_t v0_idx, v1_idx;
|
SetupReg(v0, r0);
|
||||||
FindFreeRegs(v0, v0_idx, r0_flags,
|
SetupReg(v1, r1);
|
||||||
v1, v1_idx, r1_flags);
|
|
||||||
SetupReg(v0_idx, r0);
|
|
||||||
SetupReg(v1_idx, r1);
|
|
||||||
}
|
}
|
||||||
template<typename V0, typename V1, typename V2>
|
template<typename V0, typename V1, typename V2>
|
||||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||||
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
||||||
hir::Value* v2, V2& r2, uint32_t r2_flags) {
|
hir::Value* v2, V2& r2, uint32_t r2_flags) {
|
||||||
uint32_t v0_idx, v1_idx, v2_idx;
|
SetupReg(v0, r0);
|
||||||
FindFreeRegs(v0, v0_idx, r0_flags,
|
SetupReg(v1, r1);
|
||||||
v1, v1_idx, r1_flags,
|
SetupReg(v2, r2);
|
||||||
v2, v2_idx, r2_flags);
|
|
||||||
SetupReg(v0_idx, r0);
|
|
||||||
SetupReg(v1_idx, r1);
|
|
||||||
SetupReg(v2_idx, r2);
|
|
||||||
}
|
}
|
||||||
template<typename V0, typename V1, typename V2, typename V3>
|
template<typename V0, typename V1, typename V2, typename V3>
|
||||||
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags,
|
||||||
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
hir::Value* v1, V1& r1, uint32_t r1_flags,
|
||||||
hir::Value* v2, V2& r2, uint32_t r2_flags,
|
hir::Value* v2, V2& r2, uint32_t r2_flags,
|
||||||
hir::Value* v3, V3& r3, uint32_t r3_flags) {
|
hir::Value* v3, V3& r3, uint32_t r3_flags) {
|
||||||
uint32_t v0_idx, v1_idx, v2_idx, v3_idx;
|
SetupReg(v0, r0);
|
||||||
FindFreeRegs(v0, v0_idx, r0_flags,
|
SetupReg(v1, r1);
|
||||||
v1, v1_idx, r1_flags,
|
SetupReg(v2, r2);
|
||||||
v2, v2_idx, r2_flags,
|
SetupReg(v3, r3);
|
||||||
v3, v3_idx, r3_flags);
|
|
||||||
SetupReg(v0_idx, r0);
|
|
||||||
SetupReg(v1_idx, r1);
|
|
||||||
SetupReg(v2_idx, r2);
|
|
||||||
SetupReg(v3_idx, r3);
|
|
||||||
}
|
}
|
||||||
template<typename V0>
|
template<typename V0>
|
||||||
void EndOp(V0& r0) {
|
void EndOp(V0& r0) {
|
||||||
reg_state_.active_regs = reg_state_.active_regs ^ GetRegBit(r0);
|
|
||||||
}
|
}
|
||||||
template<typename V0, typename V1>
|
template<typename V0, typename V1>
|
||||||
void EndOp(V0& r0, V1& r1) {
|
void EndOp(V0& r0, V1& r1) {
|
||||||
reg_state_.active_regs = reg_state_.active_regs ^ (
|
|
||||||
GetRegBit(r0) | GetRegBit(r1));
|
|
||||||
}
|
}
|
||||||
template<typename V0, typename V1, typename V2>
|
template<typename V0, typename V1, typename V2>
|
||||||
void EndOp(V0& r0, V1& r1, V2& r2) {
|
void EndOp(V0& r0, V1& r1, V2& r2) {
|
||||||
reg_state_.active_regs = reg_state_.active_regs ^ (
|
|
||||||
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2));
|
|
||||||
}
|
}
|
||||||
template<typename V0, typename V1, typename V2, typename V3>
|
template<typename V0, typename V1, typename V2, typename V3>
|
||||||
void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) {
|
void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) {
|
||||||
reg_state_.active_regs = reg_state_.active_regs ^ (
|
|
||||||
GetRegBit(r0) | GetRegBit(r1) | GetRegBit(r2) | GetRegBit(r3));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResetRegisters(uint32_t reserved_regs);
|
// Reserved: rsp
|
||||||
void EvictStaleRegisters();
|
// Scratch: rax/rcx/rdx
|
||||||
|
// xmm0-1
|
||||||
|
// Available: rbx, r12-r15 (maybe r8-r11, rbp, rsi, rdi?)
|
||||||
|
// xmm2-xmm15
|
||||||
|
static const int GPR_COUNT = 5;
|
||||||
|
static const int XMM_COUNT = 14;
|
||||||
|
|
||||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags);
|
static void SetupReg(hir::Value* v, Xbyak::Reg8& r) {
|
||||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
auto idx = gpr_reg_map_[v->reg.index];
|
||||||
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags);
|
r = Xbyak::Reg8(idx);
|
||||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
}
|
||||||
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
static void SetupReg(hir::Value* v, Xbyak::Reg16& r) {
|
||||||
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags);
|
auto idx = gpr_reg_map_[v->reg.index];
|
||||||
void FindFreeRegs(hir::Value* v0, uint32_t& v0_idx, uint32_t v0_flags,
|
r = Xbyak::Reg16(idx);
|
||||||
hir::Value* v1, uint32_t& v1_idx, uint32_t v1_flags,
|
}
|
||||||
hir::Value* v2, uint32_t& v2_idx, uint32_t v2_flags,
|
static void SetupReg(hir::Value* v, Xbyak::Reg32& r) {
|
||||||
hir::Value* v3, uint32_t& v3_idx, uint32_t v3_flags);
|
auto idx = gpr_reg_map_[v->reg.index];
|
||||||
|
r = Xbyak::Reg32(idx);
|
||||||
static void SetupReg(uint32_t idx, Xbyak::Reg8& r) { r = Xbyak::Reg8(idx); }
|
}
|
||||||
static void SetupReg(uint32_t idx, Xbyak::Reg16& r) { r = Xbyak::Reg16(idx); }
|
static void SetupReg(hir::Value* v, Xbyak::Reg64& r) {
|
||||||
static void SetupReg(uint32_t idx, Xbyak::Reg32& r) { r = Xbyak::Reg32(idx); }
|
auto idx = gpr_reg_map_[v->reg.index];
|
||||||
static void SetupReg(uint32_t idx, Xbyak::Reg64& r) { r = Xbyak::Reg64(idx); }
|
r = Xbyak::Reg64(idx);
|
||||||
static void SetupReg(uint32_t idx, Xbyak::Xmm& r) { r = Xbyak::Xmm(idx - 16); }
|
}
|
||||||
static uint32_t GetRegBit(const Xbyak::Reg8& r) { return 1 << r.getIdx(); }
|
static void SetupReg(hir::Value* v, Xbyak::Xmm& r) {
|
||||||
static uint32_t GetRegBit(const Xbyak::Reg16& r) { return 1 << r.getIdx(); }
|
auto idx = xmm_reg_map_[v->reg.index];
|
||||||
static uint32_t GetRegBit(const Xbyak::Reg32& r) { return 1 << r.getIdx(); }
|
r = Xbyak::Xmm(idx);
|
||||||
static uint32_t GetRegBit(const Xbyak::Reg64& r) { return 1 << r.getIdx(); }
|
}
|
||||||
static uint32_t GetRegBit(const Xbyak::Xmm& r) { return 1 << (16 + r.getIdx()); }
|
|
||||||
|
|
||||||
hir::Instr* Advance(hir::Instr* i);
|
hir::Instr* Advance(hir::Instr* i);
|
||||||
|
|
||||||
|
@ -157,21 +140,15 @@ protected:
|
||||||
X64CodeCache* code_cache_;
|
X64CodeCache* code_cache_;
|
||||||
XbyakAllocator* allocator_;
|
XbyakAllocator* allocator_;
|
||||||
|
|
||||||
struct {
|
|
||||||
// Registers currently active within a begin/end op block. These
|
|
||||||
// cannot be reused.
|
|
||||||
uint32_t active_regs;
|
|
||||||
// Registers with values in them.
|
|
||||||
uint32_t live_regs;
|
|
||||||
// Current register values.
|
|
||||||
hir::Value* reg_values[32];
|
|
||||||
} reg_state_;
|
|
||||||
hir::Instr* current_instr_;
|
hir::Instr* current_instr_;
|
||||||
|
|
||||||
size_t source_map_count_;
|
size_t source_map_count_;
|
||||||
Arena source_map_arena_;
|
Arena source_map_arena_;
|
||||||
|
|
||||||
size_t stack_size_;
|
size_t stack_size_;
|
||||||
|
|
||||||
|
static const uint32_t gpr_reg_map_[GPR_COUNT];
|
||||||
|
static const uint32_t xmm_reg_map_[XMM_COUNT];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -179,6 +179,76 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// TODO(benvanik): compares
|
// TODO(benvanik): compares
|
||||||
|
case OPCODE_COMPARE_EQ:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantEQ(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_NE:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantNE(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_SLT:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantSLT(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_SLE:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantSLE(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_SGT:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantSGT(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_SGE:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantSGE(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_ULT:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantULT(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_ULE:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantULE(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_UGT:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantUGT(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OPCODE_COMPARE_UGE:
|
||||||
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
bool value = i->src1.value->IsConstantUGE(i->src2.value);
|
||||||
|
i->dest->set_constant(value);
|
||||||
|
i->Remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case OPCODE_ADD:
|
case OPCODE_ADD:
|
||||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||||
|
|
|
@ -280,6 +280,7 @@ Value* PPCHIRBuilder::LoadCA() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPCHIRBuilder::StoreCA(Value* value) {
|
void PPCHIRBuilder::StoreCA(Value* value) {
|
||||||
|
value = Truncate(value, INT8_TYPE);
|
||||||
StoreContext(offsetof(PPCContext, xer_ca), value);
|
StoreContext(offsetof(PPCContext, xer_ca), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,6 +289,7 @@ Value* PPCHIRBuilder::LoadSAT() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPCHIRBuilder::StoreSAT(Value* value) {
|
void PPCHIRBuilder::StoreSAT(Value* value) {
|
||||||
|
value = Truncate(value, INT8_TYPE);
|
||||||
StoreContext(offsetof(PPCContext, vscr_sat), value);
|
StoreContext(offsetof(PPCContext, vscr_sat), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -363,7 +363,7 @@ DEFINE_OPCODE(
|
||||||
OPCODE_ADD_CARRY,
|
OPCODE_ADD_CARRY,
|
||||||
"add_carry",
|
"add_carry",
|
||||||
OPCODE_SIG_V_V_V_V,
|
OPCODE_SIG_V_V_V_V,
|
||||||
OPCODE_FLAG_COMMUNATIVE);
|
0);
|
||||||
|
|
||||||
DEFINE_OPCODE(
|
DEFINE_OPCODE(
|
||||||
OPCODE_VECTOR_ADD,
|
OPCODE_VECTOR_ADD,
|
||||||
|
|
|
@ -221,6 +221,150 @@ public:
|
||||||
(other->flags & VALUE_IS_CONSTANT) &&
|
(other->flags & VALUE_IS_CONSTANT) &&
|
||||||
constant.i64 != other->constant.i64;
|
constant.i64 != other->constant.i64;
|
||||||
}
|
}
|
||||||
|
bool IsConstantSLT(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return constant.i8 < other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return constant.i16 < other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return constant.i32 < other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return constant.i64 < other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 < other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 < other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsConstantSLE(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return constant.i8 <= other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return constant.i16 <= other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return constant.i32 <= other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return constant.i64 <= other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 <= other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 <= other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsConstantSGT(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return constant.i8 > other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return constant.i16 > other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return constant.i32 > other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return constant.i64 > other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 > other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 > other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsConstantSGE(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return constant.i8 >= other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return constant.i16 >= other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return constant.i32 >= other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return constant.i64 >= other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 >= other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 >= other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsConstantULT(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return (uint8_t)constant.i8 < (uint8_t)other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return (uint16_t)constant.i16 < (uint16_t)other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return (uint32_t)constant.i32 < (uint32_t)other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return (uint64_t)constant.i64 < (uint64_t)other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 < other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 < other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsConstantULE(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return (uint8_t)constant.i8 <= (uint8_t)other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return (uint16_t)constant.i16 <= (uint16_t)other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return (uint32_t)constant.i32 <= (uint32_t)other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return (uint64_t)constant.i64 <= (uint64_t)other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 <= other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 <= other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsConstantUGT(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return (uint8_t)constant.i8 > (uint8_t)other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return (uint16_t)constant.i16 > (uint16_t)other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return (uint32_t)constant.i32 > (uint32_t)other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return (uint64_t)constant.i64 > (uint64_t)other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 > other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 > other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsConstantUGE(Value* other) const {
|
||||||
|
XEASSERT(flags & VALUE_IS_CONSTANT && other->flags & VALUE_IS_CONSTANT);
|
||||||
|
switch (type) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
return (uint8_t)constant.i8 >= (uint8_t)other->constant.i8;
|
||||||
|
case INT16_TYPE:
|
||||||
|
return (uint16_t)constant.i16 >= (uint16_t)other->constant.i16;
|
||||||
|
case INT32_TYPE:
|
||||||
|
return (uint32_t)constant.i32 >= (uint32_t)other->constant.i32;
|
||||||
|
case INT64_TYPE:
|
||||||
|
return (uint64_t)constant.i64 >= (uint64_t)other->constant.i64;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
return constant.f32 >= other->constant.f32;
|
||||||
|
case FLOAT64_TYPE:
|
||||||
|
return constant.f64 >= other->constant.f64;
|
||||||
|
default: XEASSERTALWAYS(); return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
uint32_t AsUint32();
|
uint32_t AsUint32();
|
||||||
uint64_t AsUint64();
|
uint64_t AsUint64();
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ Runtime::~Runtime() {
|
||||||
|
|
||||||
// TODO(benvanik): based on compiler support
|
// TODO(benvanik): based on compiler support
|
||||||
#include <alloy/backend/ivm/ivm_backend.h>
|
#include <alloy/backend/ivm/ivm_backend.h>
|
||||||
//#include <alloy/backend/x64/x64_backend.h>
|
#include <alloy/backend/x64/x64_backend.h>
|
||||||
|
|
||||||
int Runtime::Initialize(Frontend* frontend, Backend* backend) {
|
int Runtime::Initialize(Frontend* frontend, Backend* backend) {
|
||||||
// Must be initialized by subclass before calling into this.
|
// Must be initialized by subclass before calling into this.
|
||||||
|
|
Loading…
Reference in New Issue