More SSE work.
This commit is contained in:
parent
b2e9086932
commit
8cddfcbf19
|
@ -2093,19 +2093,19 @@ int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, IntCode_DID_SATURATE);
|
||||
}
|
||||
|
||||
#define VECTOR_COMPARER(type, value, count, op) \
|
||||
#define VECTOR_COMPARER(type, value, dest_value, count, op) \
|
||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128; \
|
||||
const vec128_t& src2 = ics.rf[i->src2_reg].v128; \
|
||||
vec128_t& dest = ics.rf[i->dest_reg].v128; \
|
||||
for (int n = 0; n < count; n++) { \
|
||||
dest.value[n] = ((type)src1.value[n] op (type)src2.value[n]) ? (type)0xFFFFFFFF : 0; \
|
||||
dest.dest_value[n] = ((type)src1.value[n] op (type)src2.value[n]) ? 0xFFFFFFFF : 0; \
|
||||
} \
|
||||
return IA_NEXT;
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, ==) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, ==) };
|
||||
int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_EQ_I8,
|
||||
|
@ -2119,10 +2119,10 @@ int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
|
||||
int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_SGT_I8,
|
||||
|
@ -2136,10 +2136,10 @@ int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
|
||||
int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_SGE_I8,
|
||||
|
@ -2153,10 +2153,10 @@ int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
|
||||
int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_UGT_I8,
|
||||
|
@ -2170,10 +2170,10 @@ int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >=) };
|
||||
uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
|
||||
int Translate_VECTOR_COMPARE_UGE(TranslationContext& ctx, Instr* i) {
|
||||
static IntCodeFn fns[] = {
|
||||
IntCode_VECTOR_COMPARE_UGE_I8,
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
#include <alloy/runtime/runtime.h>
|
||||
#include <alloy/runtime/thread_state.h>
|
||||
|
||||
// TODO(benvanik): reimplement packing functions
|
||||
#include <DirectXPackedVector.h>
|
||||
|
||||
using namespace alloy;
|
||||
using namespace alloy::backend::x64;
|
||||
using namespace alloy::backend::x64::lowering;
|
||||
|
@ -87,6 +90,14 @@ void Dummy() {
|
|||
//
|
||||
}
|
||||
|
||||
void Unpack_FLOAT16_2(void* raw_context, __m128& v) {
|
||||
uint32_t src = v.m128_i32[3];
|
||||
v.m128_f32[0] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src);
|
||||
v.m128_f32[1] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)(src >> 16));
|
||||
v.m128_f32[2] = 0.0f;
|
||||
v.m128_f32[3] = 1.0f;
|
||||
}
|
||||
|
||||
uint64_t LoadClock(void* raw_context) {
|
||||
LARGE_INTEGER counter;
|
||||
uint64_t time = 0;
|
||||
|
@ -378,7 +389,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
|
|||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.pextrd(dest, src, 0);
|
||||
e.vmovd(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
|
@ -389,7 +400,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
|
|||
Xmm src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.pextrq(dest, src, 0);
|
||||
e.vmovq(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
|
@ -400,7 +411,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
|
|||
Reg32 src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.pinsrd(dest, src, 0);
|
||||
e.vmovd(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
|
@ -411,7 +422,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
|
|||
Reg64 src;
|
||||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
e.pinsrq(dest, src, 0);
|
||||
e.vmovq(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else {
|
||||
UNIMPLEMENTED_SEQ();
|
||||
|
@ -582,7 +593,7 @@ table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
|
|||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
|
||||
e.cvtss2si(dest, src);
|
||||
e.cvttss2si(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_I32, SIG_TYPE_F64)) {
|
||||
Reg32 dest;
|
||||
|
@ -591,7 +602,7 @@ table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
|
|||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
|
||||
e.cvtsd2ss(e.xmm0, src);
|
||||
e.cvtss2si(dest, e.xmm0);
|
||||
e.cvttss2si(dest, e.xmm0);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_I64, SIG_TYPE_F64)) {
|
||||
Reg64 dest;
|
||||
|
@ -599,7 +610,7 @@ table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
|
|||
e.BeginOp(i->dest, dest, REG_DEST,
|
||||
i->src1.value, src, 0);
|
||||
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
|
||||
e.cvtsd2si(dest, src);
|
||||
e.cvttsd2si(dest, src);
|
||||
e.EndOp(dest, src);
|
||||
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_I32)) {
|
||||
Xmm dest;
|
||||
|
@ -764,10 +775,11 @@ table->AddSequence(OPCODE_LOAD_CLOCK, [](X64Emitter& e, Instr*& i) {
|
|||
// --------------------------------------------------------------------------
|
||||
|
||||
table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
||||
auto addr = e.rcx + i->src1.offset;
|
||||
if (i->Match(SIG_TYPE_I8, SIG_TYPE_IGNORE)) {
|
||||
Reg8 dest;
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
e.mov(dest, e.byte[e.rcx + i->src1.offset]);
|
||||
e.mov(dest, e.byte[addr]);
|
||||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -777,7 +789,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) {
|
||||
Reg16 dest;
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
e.mov(dest, e.word[e.rcx + i->src1.offset]);
|
||||
e.mov(dest, e.word[addr]);
|
||||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -787,7 +799,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) {
|
||||
Reg32 dest;
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
e.mov(dest, e.dword[e.rcx + i->src1.offset]);
|
||||
e.mov(dest, e.dword[addr]);
|
||||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -797,7 +809,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) {
|
||||
Reg64 dest;
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
e.mov(dest, e.qword[e.rcx + i->src1.offset]);
|
||||
e.mov(dest, e.qword[addr]);
|
||||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -807,28 +819,28 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) {
|
||||
Xmm dest;
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
e.movss(dest, e.dword[e.rcx + i->src1.offset]);
|
||||
e.movss(dest, e.dword[addr]);
|
||||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movaps(e.xmm0, dest);
|
||||
e.lea(e.r8, Stash(e, dest));
|
||||
CallNative(e, TraceContextLoadF32);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) {
|
||||
Xmm dest;
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
e.movsd(dest, e.qword[e.rcx + i->src1.offset]);
|
||||
e.movsd(dest, e.qword[addr]);
|
||||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movaps(e.xmm0, dest);
|
||||
e.lea(e.r8, Stash(e, dest));
|
||||
CallNative(e, TraceContextLoadF64);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) {
|
||||
Xmm dest;
|
||||
e.BeginOp(i->dest, dest, REG_DEST);
|
||||
// NOTE: we always know we are aligned.
|
||||
e.movaps(dest, e.ptr[e.rcx + i->src1.offset]);
|
||||
e.movaps(dest, e.ptr[addr]);
|
||||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -843,10 +855,11 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
});
|
||||
|
||||
table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
||||
auto addr = e.rcx + i->src1.offset;
|
||||
if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8)) {
|
||||
Reg8 src;
|
||||
e.BeginOp(i->src2.value, src, 0);
|
||||
e.mov(e.byte[e.rcx + i->src1.offset], src);
|
||||
e.mov(e.byte[addr], src);
|
||||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -854,7 +867,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
CallNative(e, TraceContextStoreI8);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) {
|
||||
e.mov(e.byte[e.rcx + i->src1.offset], i->src2.value->constant.i8);
|
||||
e.mov(e.byte[addr], i->src2.value->constant.i8);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.mov(e.r8b, i->src2.value->constant.i8);
|
||||
|
@ -863,7 +876,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) {
|
||||
Reg16 src;
|
||||
e.BeginOp(i->src2.value, src, 0);
|
||||
e.mov(e.word[e.rcx + i->src1.offset], src);
|
||||
e.mov(e.word[addr], src);
|
||||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -871,7 +884,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
CallNative(e, TraceContextStoreI16);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) {
|
||||
e.mov(e.word[e.rcx + i->src1.offset], i->src2.value->constant.i16);
|
||||
e.mov(e.word[addr], i->src2.value->constant.i16);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.mov(e.r8w, i->src2.value->constant.i16);
|
||||
|
@ -880,7 +893,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) {
|
||||
Reg32 src;
|
||||
e.BeginOp(i->src2.value, src, 0);
|
||||
e.mov(e.dword[e.rcx + i->src1.offset], src);
|
||||
e.mov(e.dword[addr], src);
|
||||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -888,7 +901,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
CallNative(e, TraceContextStoreI32);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) {
|
||||
e.mov(e.dword[e.rcx + i->src1.offset], i->src2.value->constant.i32);
|
||||
e.mov(e.dword[addr], i->src2.value->constant.i32);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.mov(e.r8d, i->src2.value->constant.i32);
|
||||
|
@ -897,7 +910,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) {
|
||||
Reg64 src;
|
||||
e.BeginOp(i->src2.value, src, 0);
|
||||
e.mov(e.qword[e.rcx + i->src1.offset], src);
|
||||
e.mov(e.qword[addr], src);
|
||||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -905,7 +918,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
CallNative(e, TraceContextStoreI64);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) {
|
||||
MovMem64(e, e.rcx + i->src1.offset, i->src2.value->constant.i64);
|
||||
MovMem64(e, addr, i->src2.value->constant.i64);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.mov(e.r8, i->src2.value->constant.i64);
|
||||
|
@ -914,42 +927,46 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) {
|
||||
Xmm src;
|
||||
e.BeginOp(i->src2.value, src, 0);
|
||||
e.movss(e.dword[e.rcx + i->src1.offset], src);
|
||||
e.movss(e.dword[addr], src);
|
||||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movss(e.xmm0, src);
|
||||
e.lea(e.r8, Stash(e, src));
|
||||
CallNative(e, TraceContextStoreF32);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) {
|
||||
e.mov(e.dword[e.rcx + i->src1.offset], i->src2.value->constant.i32);
|
||||
e.mov(e.dword[addr], i->src2.value->constant.i32);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movss(e.xmm0, e.dword[e.rcx + i->src1.offset]);
|
||||
e.mov(e.eax, i->src2.value->constant.i32);
|
||||
e.vmovd(e.xmm0, e.eax);
|
||||
e.lea(e.r8, Stash(e, e.xmm0));
|
||||
CallNative(e, TraceContextStoreF32);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) {
|
||||
Xmm src;
|
||||
e.BeginOp(i->src2.value, src, 0);
|
||||
e.movsd(e.qword[e.rcx + i->src1.offset], src);
|
||||
e.movsd(e.qword[addr], src);
|
||||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movsd(e.xmm0, src);
|
||||
e.lea(e.r8, Stash(e, src));
|
||||
CallNative(e, TraceContextStoreF64);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) {
|
||||
MovMem64(e, e.rcx + i->src1.offset, i->src2.value->constant.i64);
|
||||
MovMem64(e, addr, i->src2.value->constant.i64);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.movsd(e.xmm0, e.qword[e.rcx + i->src1.offset]);
|
||||
e.mov(e.rax, i->src2.value->constant.i64);
|
||||
e.vmovq(e.xmm0, e.rax);
|
||||
e.lea(e.r8, Stash(e, e.xmm0));
|
||||
CallNative(e, TraceContextStoreF64);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) {
|
||||
Xmm src;
|
||||
e.BeginOp(i->src2.value, src, 0);
|
||||
// NOTE: we always know we are aligned.
|
||||
e.movaps(e.ptr[e.rcx + i->src1.offset], src);
|
||||
e.movaps(e.ptr[addr], src);
|
||||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
|
@ -959,11 +976,11 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
|
|||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) {
|
||||
// TODO(benvanik): check zero
|
||||
// TODO(benvanik): correct order?
|
||||
MovMem64(e, e.rcx + i->src1.offset, i->src2.value->constant.v128.low);
|
||||
MovMem64(e, e.rcx + i->src1.offset + 8, i->src2.value->constant.v128.high);
|
||||
MovMem64(e, addr, i->src2.value->constant.v128.low);
|
||||
MovMem64(e, addr + 8, i->src2.value->constant.v128.high);
|
||||
#if DTRACE
|
||||
e.mov(e.rdx, i->src1.offset);
|
||||
e.lea(e.r8, e.ptr[e.rcx + i->src1.offset]);
|
||||
e.lea(e.r8, e.ptr[addr]);
|
||||
CallNative(e, TraceContextStoreV128);
|
||||
#endif // DTRACE
|
||||
} else {
|
||||
|
@ -1062,7 +1079,7 @@ table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) {
|
|||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movss(e.xmm0, dest);
|
||||
e.lea(e.r8, Stash(e, dest));
|
||||
CallNative(e, TraceMemoryLoadF32);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) {
|
||||
|
@ -1072,7 +1089,7 @@ table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) {
|
|||
e.EndOp(dest);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movsd(e.xmm0, dest);
|
||||
e.lea(e.r8, Stash(e, dest));
|
||||
CallNative(e, TraceMemoryLoadF64);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) {
|
||||
|
@ -1224,14 +1241,16 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) {
|
|||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movss(e.xmm0, src);
|
||||
e.lea(e.r8, Stash(e, src));
|
||||
CallNative(e, TraceMemoryStoreF32);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) {
|
||||
e.mov(e.dword[addr], i->src2.value->constant.i32);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movss(e.xmm0, e.ptr[addr]);
|
||||
e.mov(e.eax, i->src2.value->constant.i32);
|
||||
e.vmovd(e.xmm0, e.eax);
|
||||
e.lea(e.r8, Stash(e, e.xmm0));
|
||||
CallNative(e, TraceMemoryStoreF32);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) {
|
||||
|
@ -1241,7 +1260,7 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) {
|
|||
e.EndOp(src);
|
||||
#if DTRACE
|
||||
e.lea(e.rdx, e.ptr[addr]);
|
||||
e.movsd(e.xmm0, src);
|
||||
e.lea(e.r8, Stash(e, src));
|
||||
CallNative(e, TraceMemoryStoreF64);
|
||||
#endif // DTRACE
|
||||
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) {
|
||||
|
@ -2160,7 +2179,6 @@ table->AddSequence(OPCODE_VECTOR_SHL, [](X64Emitter& e, Instr*& i) {
|
|||
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
|
||||
// src shift mask may have values >31, and x86 sets to zero when
|
||||
// that happens so we mask.
|
||||
e.db(0xCC);
|
||||
e.mov(e.eax, 0x1F);
|
||||
e.vmovd(e.xmm0, e.eax);
|
||||
e.vpbroadcastd(e.xmm0, e.xmm0);
|
||||
|
@ -2637,16 +2655,14 @@ table->AddSequence(OPCODE_UNPACK, [](X64Emitter& e, Instr*& i) {
|
|||
XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
|
||||
// sx = src.iw >> 16;
|
||||
// sy = src.iw & 0xFFFF;
|
||||
// dest = { 3.0 + (sx / float(1 << 22)),
|
||||
// 3.0 + (sy / float(1 << 22)),
|
||||
// dest = { XMConvertHalfToFloat(sx),
|
||||
// XMConvertHalfToFloat(sy),
|
||||
// 0.0,
|
||||
// 1.0); --- or 3.0?
|
||||
// So:
|
||||
// xmm = {0,0,0,packed}
|
||||
// xmm <<= 1w {0,0,packed,0}
|
||||
// xmm = VCVTPH2PS(xmm) {sx,sy,0,0}
|
||||
// xmm /=
|
||||
UNIMPLEMENTED_SEQ();
|
||||
// 1.0 };
|
||||
auto addr = Stash(e, src);
|
||||
e.lea(e.rdx, addr);
|
||||
CallNative(e, Unpack_FLOAT16_2);
|
||||
e.movaps(dest, addr);
|
||||
});
|
||||
} else if (i->flags == PACK_TYPE_FLOAT16_4) {
|
||||
// Could be shared with FLOAT16_2.
|
||||
|
|
|
@ -57,23 +57,18 @@ void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value) {
|
|||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%lld (%llX) = ctx i64 +%d\n", (int64_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset, float value) {
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
float f;
|
||||
uint32_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("%e (%X) = ctx f32 +%d\n", x.f, x.u, offset);
|
||||
DPRINT("%e (%X) = ctx f32 +%d\n", value.m128_f32[0], value.m128_i32[0], offset);
|
||||
}
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset, double value) {
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double f;
|
||||
uint64_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("%lle (%llX) = ctx f64 +%d\n", x.f, x.u, offset);
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("%lle (%llX) = ctx f64 +%d\n", f.d, value.m128_i64[0], offset);
|
||||
}
|
||||
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
|
@ -99,23 +94,18 @@ void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value) {
|
|||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("ctx i64 +%d = %lld (%llX)\n", offset, (int64_t)value, value);
|
||||
}
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset, float value) {
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
float f;
|
||||
uint32_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("ctx f32 +%d = %e (%.X)\n", offset, x.f, x.u);
|
||||
DPRINT("ctx f32 +%d = %e (%X)\n", offset, value.m128_i32[0], value.m128_f32[0]);
|
||||
}
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset, double value) {
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double f;
|
||||
uint64_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("ctx f64 +%d = %lle (%.llX)\n", offset, x.f, x.u);
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("ctx f64 +%d = %lle (%llX)\n", offset, value.m128_i64[0], f.d);
|
||||
}
|
||||
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
|
@ -140,23 +130,18 @@ void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value) {
|
|||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("%lld (%llX) = load.i64 %.8X\n", (int64_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadF32(void* raw_context, uint64_t address, float value) {
|
||||
void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
float f;
|
||||
uint32_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("%e (%X) = load.f32 %.8X\n", x.f, x.u, address);
|
||||
DPRINT("%e (%X) = load.f32 %.8X\n", value.m128_f32[0], value.m128_i32[0], address);
|
||||
}
|
||||
void TraceMemoryLoadF64(void* raw_context, uint64_t address, double value) {
|
||||
void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double f;
|
||||
uint64_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("%lle (%llX) = load.f64 %.8X\n", x.f, x.u, address);
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("%lle (%llX) = load.f64 %.8X\n", f.d, value.m128_i64[0], address);
|
||||
}
|
||||
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
|
@ -182,23 +167,18 @@ void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value) {
|
|||
auto thread_state = *((ThreadState**)raw_context);
|
||||
DPRINT("store.i64 %.8X = %lld (%llX)\n", address, (int64_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreF32(void* raw_context, uint64_t address, float value) {
|
||||
void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
float f;
|
||||
uint32_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("store.f32 %.8X = %e (%X)\n", address, x.f, x.u);
|
||||
DPRINT("store.f32 %.8X = %e (%X)\n", address, value.m128_f32[0], value.m128_i32[0]);
|
||||
}
|
||||
void TraceMemoryStoreF64(void* raw_context, uint64_t address, double value) {
|
||||
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
union {
|
||||
double f;
|
||||
uint64_t u;
|
||||
} x;
|
||||
x.f = value;
|
||||
DPRINT("store.f64 %.8X = %lle (%llX)\n", address, x.f, x.u);
|
||||
double d;
|
||||
uint64_t x;
|
||||
} f;
|
||||
f.x = value.m128_i64[0];
|
||||
DPRINT("store.f64 %.8X = %lle (%llX)\n", address, f.d, value.m128_i64[0]);
|
||||
}
|
||||
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) {
|
||||
auto thread_state = *((ThreadState**)raw_context);
|
||||
|
|
|
@ -25,32 +25,32 @@ void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value);
|
|||
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value);
|
||||
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value);
|
||||
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value);
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset, float value);
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset, double value);
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value);
|
||||
|
||||
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value);
|
||||
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value);
|
||||
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value);
|
||||
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value);
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset, float value);
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset, double value);
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value);
|
||||
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value);
|
||||
|
||||
void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value);
|
||||
void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value);
|
||||
void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value);
|
||||
void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value);
|
||||
void TraceMemoryLoadF32(void* raw_context, uint64_t address, float value);
|
||||
void TraceMemoryLoadF64(void* raw_context, uint64_t address, double value);
|
||||
void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value);
|
||||
|
||||
void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value);
|
||||
void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value);
|
||||
void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value);
|
||||
void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value);
|
||||
void TraceMemoryStoreF32(void* raw_context, uint64_t address, float value);
|
||||
void TraceMemoryStoreF64(void* raw_context, uint64_t address, double value);
|
||||
void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value);
|
||||
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value);
|
||||
|
||||
} // namespace lowering
|
||||
|
|
|
@ -116,7 +116,9 @@ int X64Emitter::Emit(HIRBuilder* builder) {
|
|||
GetRegBit(r11) |
|
||||
GetRegBit(xmm1) |
|
||||
GetRegBit(xmm2) |
|
||||
GetRegBit(xmm3);
|
||||
GetRegBit(xmm3) |
|
||||
GetRegBit(xmm4) |
|
||||
GetRegBit(xmm5);
|
||||
|
||||
// Function prolog.
|
||||
// Must be 16b aligned.
|
||||
|
|
Loading…
Reference in New Issue