Hacking.
This commit is contained in:
parent
5421108b9e
commit
465c3a41dd
|
@ -44,6 +44,11 @@ namespace {
|
|||
// Basically, this identity must hold:
|
||||
// shuffle(vec, b00011011) -> {x,y,z,w} => {x,y,z,w}
|
||||
// All indices and operations must respect that.
|
||||
//
|
||||
// Memory (big endian):
|
||||
// [00 01 02 03] [04 05 06 07] [08 09 0A 0B] [0C 0D 0E 0F] (x, y, z, w)
|
||||
// load into xmm register:
|
||||
// [0F 0E 0D 0C] [0B 0A 09 08] [07 06 05 04] [03 02 01 00] (w, z, y, x)
|
||||
|
||||
void Dummy() {
|
||||
//
|
||||
|
@ -498,7 +503,63 @@ table->AddSequence(OPCODE_TRUNCATE, [](X64Emitter& e, Instr*& i) {
|
|||
});
|
||||
|
||||
table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
if (i->Match(SIG_TYPE_I32, SIG_TYPE_F32)) {
|
||||
Reg32 dest;
|
||||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
|
||||
e.cvtss2si(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_I32, SIG_TYPE_F64)) {
|
||||
Reg32 dest;
|
||||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
|
||||
e.cvtsd2ss(e.xmm0, src);
|
||||
e.cvtss2si(dest, e.xmm0);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_I64, SIG_TYPE_F64)) {
|
||||
Reg64 dest;
|
||||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
|
||||
e.cvtsd2si(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_I32)) {
|
||||
Xmm dest;
|
||||
Reg32 src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc?
|
||||
e.cvtsi2ss(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_F64)) {
|
||||
Xmm dest, src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc?
|
||||
e.cvtsd2ss(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_I64)) {
|
||||
Xmm dest;
|
||||
Reg64 src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc?
|
||||
e.cvtsi2sd(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F32)) {
|
||||
Xmm dest, src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.cvtss2sd(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
i = e.Advance(i);
|
||||
return true;
|
||||
});
|
||||
|
@ -506,9 +567,56 @@ table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
|
|||
table->AddSequence(OPCODE_ROUND, [](X64Emitter& e, Instr*& i) {
|
||||
// flags = ROUND_TO_*
|
||||
if (IsFloatType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
if (i.src1.value->type == FLOAT32_TYPE) {
|
||||
switch (i.flags) {
|
||||
case ROUND_TO_ZERO:
|
||||
e.roundss(dest, src, B00000011);
|
||||
break;
|
||||
case ROUND_TO_NEAREST:
|
||||
e.roundss(dest, src, B00000000);
|
||||
break;
|
||||
case ROUND_TO_MINUS_INFINITY:
|
||||
e.roundss(dest, src, B00000001);
|
||||
break;
|
||||
case ROUND_TO_POSITIVE_INFINITY:
|
||||
e.roundss(dest, src, B00000010);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (i.flags) {
|
||||
case ROUND_TO_ZERO:
|
||||
e.roundsd(dest, src, B00000011);
|
||||
break;
|
||||
case ROUND_TO_NEAREST:
|
||||
e.roundsd(dest, src, B00000000);
|
||||
break;
|
||||
case ROUND_TO_MINUS_INFINITY:
|
||||
e.roundsd(dest, src, B00000001);
|
||||
break;
|
||||
case ROUND_TO_POSITIVE_INFINITY:
|
||||
e.roundsd(dest, src, B00000010);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
} else if (IsVecType(i->dest->type)) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
switch (i.flags) {
|
||||
case ROUND_TO_ZERO:
|
||||
e.roundps(dest, src, B00000011);
|
||||
break;
|
||||
case ROUND_TO_NEAREST:
|
||||
e.roundps(dest, src, B00000000);
|
||||
break;
|
||||
case ROUND_TO_MINUS_INFINITY:
|
||||
e.roundps(dest, src, B00000001);
|
||||
break;
|
||||
case ROUND_TO_POSITIVE_INFINITY:
|
||||
e.roundps(dest, src, B00000010);
|
||||
break;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -634,7 +742,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movaps(e.xmm0, dest);
|
||||
e.lea(e.r8, Stash(e, dest));
|
||||
CallNative(e, TraceContextLoadV128);
|
||||
#endif // DTRACE
|
||||
} else {
|
||||
|
@ -755,7 +863,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movaps(e.xmm0, src);
|
||||
e.lea(e.r8, Stash(e, src));
|
||||
CallNative(e, TraceContextStoreV128);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) {
|
||||
|
@ -765,7 +873,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
MovMem64(e, e.rcx + i->src1.offset + 8, i->src2.value->constant.v128.high);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movups(e.xmm0, e.ptr[e.rcx + i->src1.offset]);
|
||||
e.lea(e.r8, e.ptr[e.rcx + i->src1.offset]);
|
||||
CallNative(e, TraceContextStoreV128);
|
||||
#endif // DTRACE
|
||||
} else {
|
||||
|
@ -886,7 +994,7 @@ table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) {
|
|||
e.db(0xCC);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movaps(e.xmm0, dest);
|
||||
e.lea(e.r8, Stash(e, dest));
|
||||
CallNative(e, TraceMemoryLoadV128);
|
||||
#endif // DTRACE
|
||||
} else {
|
||||
|
@ -1063,7 +1171,7 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) {
|
|||
e.db(0xCC);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movaps(e.xmm0, src);
|
||||
e.lea(e.r8, Stash(e, src));
|
||||
CallNative(e, TraceMemoryStoreV128);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) {
|
||||
|
@ -1073,7 +1181,7 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) {
|
|||
MovMem64(e, addr + 8, i->src2.value->constant.v128.high);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movups(e.xmm0, e.ptr[addr]);
|
||||
e.lea(e.r8, e.ptr[addr]);
|
||||
CallNative(e, TraceMemoryStoreV128);
|
||||
#endif // DTRACE
|
||||
} else {
|
||||
|
@ -2107,14 +2215,57 @@ table->AddSequence(OPCODE_INSERT, [](X64Emitter& e, Instr*& i) {
|
|||
return true;
|
||||
});
|
||||
|
||||
// TODO(benvanik): sequence extract/splat:
|
||||
// v0.i32 = extract v0.v128, 0
|
||||
// v0.v128 = splat v0.i32
|
||||
// This can be a single broadcast.
|
||||
|
||||
table->AddSequence(OPCODE_EXTRACT, [](X64Emitter& e, Instr*& i) {
|
||||
if (IsVecType(i->src1.value->type)) {
|
||||
if (i->dest->type == INT8_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
Reg8 dest;
|
||||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
if (i->src2.value->IsConstant()) {
|
||||
e.pextrb(dest, src, i->src2.value->constant.i8);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->dest->type == INT16_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
Reg16 dest;
|
||||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
if (i->src2.value->IsConstant()) {
|
||||
e.pextrw(dest, src, i->src2.value->constant.i8);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->dest->type == INT32_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
Reg32 dest;
|
||||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
if (i->src2.value->IsConstant()) {
|
||||
e.pextrd(dest, src, i->src2.value->constant.i8);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->dest->type == FLOAT32_TYPE) {
|
||||
Reg32 dest;
|
||||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
if (i->src2.value->IsConstant()) {
|
||||
e.extractps(dest, src, i->src2.value->constant.i8);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
}
|
||||
e.EndOp(dest, src);
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
@ -2128,13 +2279,35 @@ table->AddSequence(OPCODE_EXTRACT, [](X64Emitter& e, Instr*& i) {
|
|||
table->AddSequence(OPCODE_SPLAT, [](X64Emitter& e, Instr*& i) {
|
||||
if (IsVecType(i->dest->type)) {
|
||||
if (i->src1.value->type == INT8_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
Xmm dest;
|
||||
Reg8 src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.pinsrb(e.xmm0, src, 0);
|
||||
e.vpbroadcastb(dest, e.xmm0);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->src1.value->type == INT16_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
Xmm dest;
|
||||
Reg16 src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.pinsrw(e.xmm0, src, 0);
|
||||
e.vpbroadcastw(dest, e.xmm0);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->src1.value->type == INT32_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
Xmm dest;
|
||||
Reg32 src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.pinsrd(e.xmm0, src, 0);
|
||||
e.vpbroadcastd(dest, e.xmm0);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->src1.value->type == FLOAT32_TYPE) {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
Xmm dest, src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.vbroadcastss(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else {
|
||||
ASSERT_INVALID_TYPE();
|
||||
}
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace {
|
|||
#define LIKE_REG(dest, like) Reg(dest.getIdx(), dest.getKind(), like.getBit(), false)
|
||||
#define NAX_LIKE(like) Reg(e.rax.getIdx(), e.rax.getKind(), like.getBit(), false)
|
||||
|
||||
Address Stash(X64Emitter& e, const Xmm& r) {
|
||||
auto addr = e.ptr[e.rsp + 40];
|
||||
e.movaps(addr, r);
|
||||
return addr;
|
||||
}
|
||||
|
||||
// Moves a 64bit immediate into memory.
|
||||
void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) {
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
|
@ -869,19 +875,13 @@ void XmmBinaryOpCV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn,
|
|||
}
|
||||
void XmmBinaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_vv_fn vv_fn) {
|
||||
// TODO(benvanik): table lookup. This linear scan is slow.
|
||||
if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128, SIG_TYPE_V128)) {
|
||||
if (!i->src1.value->IsConstant() && !i->src2.value->IsConstant()) {
|
||||
Xmm dest, src1, src2;
|
||||
XmmBinaryOpVV(e, i, vv_fn, dest, src1, src2);
|
||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32C) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64C) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128, SIG_TYPE_V128C)) {
|
||||
} else if (!i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
Xmm dest, src1;
|
||||
XmmBinaryOpVC(e, i, vv_fn, dest, src1, i->src2.value);
|
||||
} else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32C, SIG_TYPE_F32) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64C, SIG_TYPE_F64) ||
|
||||
i->Match(SIG_TYPE_IGNORE, SIG_TYPE_V128C, SIG_TYPE_V128)) {
|
||||
} else if (i->src1.value->IsConstant() && !i->src2.value->IsConstant()) {
|
||||
Xmm dest, src2;
|
||||
XmmBinaryOpCV(e, i, vv_fn, dest, i->src1.value, src2);
|
||||
} else {
|
||||
|
|
|
@ -77,9 +77,10 @@ void TraceContextLoadF64(void* raw_context, uint64_t offset, double value) {
|
|||
}
|
||||
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = ctx v128 +%d\n", offset,
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = ctx v128 +%d\n",
|
||||
value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3],
|
||||
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]);
|
||||
value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3],
|
||||
offset);
|
||||
}
|
||||
|
||||
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value) {
|
||||
|
|
|
@ -74,34 +74,40 @@ int ValueReductionPass::Run(HIRBuilder* builder) {
|
|||
OpcodeSignatureType src1_type = GET_OPCODE_SIG_TYPE_SRC1(info->signature);
|
||||
OpcodeSignatureType src2_type = GET_OPCODE_SIG_TYPE_SRC2(info->signature);
|
||||
OpcodeSignatureType src3_type = GET_OPCODE_SIG_TYPE_SRC3(info->signature);
|
||||
if (src1_type == OPCODE_SIG_TYPE_V && !instr->src1.value->IsConstant()) {
|
||||
if (src1_type == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->src1.value;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
if (v->last_use == instr) {
|
||||
// Available.
|
||||
ordinals.set(v->ordinal, false);
|
||||
if (!instr->src1.value->IsConstant()) {
|
||||
ordinals.set(v->ordinal, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (src2_type == OPCODE_SIG_TYPE_V && !instr->src2.value->IsConstant()) {
|
||||
if (src2_type == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->src2.value;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
if (v->last_use == instr) {
|
||||
// Available.
|
||||
ordinals.set(v->ordinal, false);
|
||||
if (!instr->src2.value->IsConstant()) {
|
||||
ordinals.set(v->ordinal, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (src3_type == OPCODE_SIG_TYPE_V && !instr->src3.value->IsConstant()) {
|
||||
if (src3_type == OPCODE_SIG_TYPE_V) {
|
||||
auto v = instr->src3.value;
|
||||
if (!v->last_use) {
|
||||
ComputeLastUse(v);
|
||||
}
|
||||
if (v->last_use == instr) {
|
||||
// Available.
|
||||
ordinals.set(v->ordinal, false);
|
||||
if (!instr->src3.value->IsConstant()) {
|
||||
ordinals.set(v->ordinal, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dest_type == OPCODE_SIG_TYPE_V) {
|
||||
|
|
Loading…
Reference in New Issue