More SSE work.

This commit is contained in:
Ben Vanik 2014-01-28 22:06:45 -08:00
parent b2e9086932
commit 8cddfcbf19
5 changed files with 132 additions and 134 deletions

View File

@ -2093,19 +2093,19 @@ int Translate_DID_SATURATE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_DID_SATURATE); return DispatchToC(ctx, i, IntCode_DID_SATURATE);
} }
#define VECTOR_COMPARER(type, value, count, op) \ #define VECTOR_COMPARER(type, value, dest_value, count, op) \
const vec128_t& src1 = ics.rf[i->src1_reg].v128; \ const vec128_t& src1 = ics.rf[i->src1_reg].v128; \
const vec128_t& src2 = ics.rf[i->src2_reg].v128; \ const vec128_t& src2 = ics.rf[i->src2_reg].v128; \
vec128_t& dest = ics.rf[i->dest_reg].v128; \ vec128_t& dest = ics.rf[i->dest_reg].v128; \
for (int n = 0; n < count; n++) { \ for (int n = 0; n < count; n++) { \
dest.value[n] = ((type)src1.value[n] op (type)src2.value[n]) ? (type)0xFFFFFFFF : 0; \ dest.dest_value[n] = ((type)src1.value[n] op (type)src2.value[n]) ? 0xFFFFFFFF : 0; \
} \ } \
return IA_NEXT; return IA_NEXT;
uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, ==) };
uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, ==) };
uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, ==) };
uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, ==) }; uint32_t IntCode_VECTOR_COMPARE_EQ_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, ==) };
int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_EQ_I8, IntCode_VECTOR_COMPARE_EQ_I8,
@ -2119,10 +2119,10 @@ int Translate_VECTOR_COMPARE_EQ(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >) };
uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >) };
uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >) };
uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_SGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_SGT_I8, IntCode_VECTOR_COMPARE_SGT_I8,
@ -2136,10 +2136,10 @@ int Translate_VECTOR_COMPARE_SGT(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, 16, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int8_t, b16, b16, 16, >=) };
uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, 8, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int16_t, s8, s8, 8, >=) };
uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(int32_t, i4, i4, 4, >=) };
uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_SGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_SGE_I8, IntCode_VECTOR_COMPARE_SGE_I8,
@ -2153,10 +2153,10 @@ int Translate_VECTOR_COMPARE_SGE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >) };
uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >) };
uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >) };
uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >) }; uint32_t IntCode_VECTOR_COMPARE_UGT_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >) };
int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_UGT_I8, IntCode_VECTOR_COMPARE_UGT_I8,
@ -2170,10 +2170,10 @@ int Translate_VECTOR_COMPARE_UGT(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->flags]); return DispatchToC(ctx, i, fns[i->flags]);
} }
uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, 16, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_I8(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint8_t, b16, b16, 16, >=) };
uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, 8, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_I16(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint16_t, s8, s8, 8, >=) };
uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_I32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(uint32_t, i4, i4, 4, >=) };
uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, 4, >=) }; uint32_t IntCode_VECTOR_COMPARE_UGE_F32(IntCodeState& ics, const IntCode* i) { VECTOR_COMPARER(float, f4, i4, 4, >=) };
int Translate_VECTOR_COMPARE_UGE(TranslationContext& ctx, Instr* i) { int Translate_VECTOR_COMPARE_UGE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = { static IntCodeFn fns[] = {
IntCode_VECTOR_COMPARE_UGE_I8, IntCode_VECTOR_COMPARE_UGE_I8,

View File

@ -17,6 +17,9 @@
#include <alloy/runtime/runtime.h> #include <alloy/runtime/runtime.h>
#include <alloy/runtime/thread_state.h> #include <alloy/runtime/thread_state.h>
// TODO(benvanik): reimplement packing functions
#include <DirectXPackedVector.h>
using namespace alloy; using namespace alloy;
using namespace alloy::backend::x64; using namespace alloy::backend::x64;
using namespace alloy::backend::x64::lowering; using namespace alloy::backend::x64::lowering;
@ -87,6 +90,14 @@ void Dummy() {
// //
} }
void Unpack_FLOAT16_2(void* raw_context, __m128& v) {
uint32_t src = v.m128_i32[3];
v.m128_f32[0] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src);
v.m128_f32[1] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)(src >> 16));
v.m128_f32[2] = 0.0f;
v.m128_f32[3] = 1.0f;
}
uint64_t LoadClock(void* raw_context) { uint64_t LoadClock(void* raw_context) {
LARGE_INTEGER counter; LARGE_INTEGER counter;
uint64_t time = 0; uint64_t time = 0;
@ -378,7 +389,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
Xmm src; Xmm src;
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0); i->src1.value, src, 0);
e.pextrd(dest, src, 0); e.vmovd(dest, src);
e.EndOp(dest, src); e.EndOp(dest, src);
} else { } else {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
@ -389,7 +400,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
Xmm src; Xmm src;
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0); i->src1.value, src, 0);
e.pextrq(dest, src, 0); e.vmovq(dest, src);
e.EndOp(dest, src); e.EndOp(dest, src);
} else { } else {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
@ -400,7 +411,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
Reg32 src; Reg32 src;
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0); i->src1.value, src, 0);
e.pinsrd(dest, src, 0); e.vmovd(dest, src);
e.EndOp(dest, src); e.EndOp(dest, src);
} else { } else {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
@ -411,7 +422,7 @@ table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) {
Reg64 src; Reg64 src;
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0); i->src1.value, src, 0);
e.pinsrq(dest, src, 0); e.vmovq(dest, src);
e.EndOp(dest, src); e.EndOp(dest, src);
} else { } else {
UNIMPLEMENTED_SEQ(); UNIMPLEMENTED_SEQ();
@ -582,7 +593,7 @@ table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0); i->src1.value, src, 0);
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?) // TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
e.cvtss2si(dest, src); e.cvttss2si(dest, src);
e.EndOp(dest, src); e.EndOp(dest, src);
} else if (i->Match(SIG_TYPE_I32, SIG_TYPE_F64)) { } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_F64)) {
Reg32 dest; Reg32 dest;
@ -591,7 +602,7 @@ table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
i->src1.value, src, 0); i->src1.value, src, 0);
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?) // TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
e.cvtsd2ss(e.xmm0, src); e.cvtsd2ss(e.xmm0, src);
e.cvtss2si(dest, e.xmm0); e.cvttss2si(dest, e.xmm0);
e.EndOp(dest, src); e.EndOp(dest, src);
} else if (i->Match(SIG_TYPE_I64, SIG_TYPE_F64)) { } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_F64)) {
Reg64 dest; Reg64 dest;
@ -599,7 +610,7 @@ table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) {
e.BeginOp(i->dest, dest, REG_DEST, e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0); i->src1.value, src, 0);
// TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?) // TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?)
e.cvtsd2si(dest, src); e.cvttsd2si(dest, src);
e.EndOp(dest, src); e.EndOp(dest, src);
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_I32)) { } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_I32)) {
Xmm dest; Xmm dest;
@ -764,10 +775,11 @@ table->AddSequence(OPCODE_LOAD_CLOCK, [](X64Emitter& e, Instr*& i) {
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
auto addr = e.rcx + i->src1.offset;
if (i->Match(SIG_TYPE_I8, SIG_TYPE_IGNORE)) { if (i->Match(SIG_TYPE_I8, SIG_TYPE_IGNORE)) {
Reg8 dest; Reg8 dest;
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
e.mov(dest, e.byte[e.rcx + i->src1.offset]); e.mov(dest, e.byte[addr]);
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -777,7 +789,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) {
Reg16 dest; Reg16 dest;
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
e.mov(dest, e.word[e.rcx + i->src1.offset]); e.mov(dest, e.word[addr]);
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -787,7 +799,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) {
Reg32 dest; Reg32 dest;
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
e.mov(dest, e.dword[e.rcx + i->src1.offset]); e.mov(dest, e.dword[addr]);
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -797,7 +809,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) {
Reg64 dest; Reg64 dest;
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
e.mov(dest, e.qword[e.rcx + i->src1.offset]); e.mov(dest, e.qword[addr]);
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -807,28 +819,28 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) {
Xmm dest; Xmm dest;
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
e.movss(dest, e.dword[e.rcx + i->src1.offset]); e.movss(dest, e.dword[addr]);
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.movaps(e.xmm0, dest); e.lea(e.r8, Stash(e, dest));
CallNative(e, TraceContextLoadF32); CallNative(e, TraceContextLoadF32);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) {
Xmm dest; Xmm dest;
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
e.movsd(dest, e.qword[e.rcx + i->src1.offset]); e.movsd(dest, e.qword[addr]);
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.movaps(e.xmm0, dest); e.lea(e.r8, Stash(e, dest));
CallNative(e, TraceContextLoadF64); CallNative(e, TraceContextLoadF64);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) {
Xmm dest; Xmm dest;
e.BeginOp(i->dest, dest, REG_DEST); e.BeginOp(i->dest, dest, REG_DEST);
// NOTE: we always know we are aligned. // NOTE: we always know we are aligned.
e.movaps(dest, e.ptr[e.rcx + i->src1.offset]); e.movaps(dest, e.ptr[addr]);
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -843,10 +855,11 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) {
}); });
table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
auto addr = e.rcx + i->src1.offset;
if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8)) { if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8)) {
Reg8 src; Reg8 src;
e.BeginOp(i->src2.value, src, 0); e.BeginOp(i->src2.value, src, 0);
e.mov(e.byte[e.rcx + i->src1.offset], src); e.mov(e.byte[addr], src);
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -854,7 +867,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
CallNative(e, TraceContextStoreI8); CallNative(e, TraceContextStoreI8);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) {
e.mov(e.byte[e.rcx + i->src1.offset], i->src2.value->constant.i8); e.mov(e.byte[addr], i->src2.value->constant.i8);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.mov(e.r8b, i->src2.value->constant.i8); e.mov(e.r8b, i->src2.value->constant.i8);
@ -863,7 +876,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) {
Reg16 src; Reg16 src;
e.BeginOp(i->src2.value, src, 0); e.BeginOp(i->src2.value, src, 0);
e.mov(e.word[e.rcx + i->src1.offset], src); e.mov(e.word[addr], src);
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -871,7 +884,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
CallNative(e, TraceContextStoreI16); CallNative(e, TraceContextStoreI16);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) {
e.mov(e.word[e.rcx + i->src1.offset], i->src2.value->constant.i16); e.mov(e.word[addr], i->src2.value->constant.i16);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.mov(e.r8w, i->src2.value->constant.i16); e.mov(e.r8w, i->src2.value->constant.i16);
@ -880,7 +893,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) {
Reg32 src; Reg32 src;
e.BeginOp(i->src2.value, src, 0); e.BeginOp(i->src2.value, src, 0);
e.mov(e.dword[e.rcx + i->src1.offset], src); e.mov(e.dword[addr], src);
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -888,7 +901,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
CallNative(e, TraceContextStoreI32); CallNative(e, TraceContextStoreI32);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) {
e.mov(e.dword[e.rcx + i->src1.offset], i->src2.value->constant.i32); e.mov(e.dword[addr], i->src2.value->constant.i32);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.mov(e.r8d, i->src2.value->constant.i32); e.mov(e.r8d, i->src2.value->constant.i32);
@ -897,7 +910,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) {
Reg64 src; Reg64 src;
e.BeginOp(i->src2.value, src, 0); e.BeginOp(i->src2.value, src, 0);
e.mov(e.qword[e.rcx + i->src1.offset], src); e.mov(e.qword[addr], src);
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -905,7 +918,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
CallNative(e, TraceContextStoreI64); CallNative(e, TraceContextStoreI64);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) {
MovMem64(e, e.rcx + i->src1.offset, i->src2.value->constant.i64); MovMem64(e, addr, i->src2.value->constant.i64);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.mov(e.r8, i->src2.value->constant.i64); e.mov(e.r8, i->src2.value->constant.i64);
@ -914,42 +927,46 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) {
Xmm src; Xmm src;
e.BeginOp(i->src2.value, src, 0); e.BeginOp(i->src2.value, src, 0);
e.movss(e.dword[e.rcx + i->src1.offset], src); e.movss(e.dword[addr], src);
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.movss(e.xmm0, src); e.lea(e.r8, Stash(e, src));
CallNative(e, TraceContextStoreF32); CallNative(e, TraceContextStoreF32);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) {
e.mov(e.dword[e.rcx + i->src1.offset], i->src2.value->constant.i32); e.mov(e.dword[addr], i->src2.value->constant.i32);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.movss(e.xmm0, e.dword[e.rcx + i->src1.offset]); e.mov(e.eax, i->src2.value->constant.i32);
e.vmovd(e.xmm0, e.eax);
e.lea(e.r8, Stash(e, e.xmm0));
CallNative(e, TraceContextStoreF32); CallNative(e, TraceContextStoreF32);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) {
Xmm src; Xmm src;
e.BeginOp(i->src2.value, src, 0); e.BeginOp(i->src2.value, src, 0);
e.movsd(e.qword[e.rcx + i->src1.offset], src); e.movsd(e.qword[addr], src);
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.movsd(e.xmm0, src); e.lea(e.r8, Stash(e, src));
CallNative(e, TraceContextStoreF64); CallNative(e, TraceContextStoreF64);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) {
MovMem64(e, e.rcx + i->src1.offset, i->src2.value->constant.i64); MovMem64(e, addr, i->src2.value->constant.i64);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.movsd(e.xmm0, e.qword[e.rcx + i->src1.offset]); e.mov(e.rax, i->src2.value->constant.i64);
e.vmovq(e.xmm0, e.rax);
e.lea(e.r8, Stash(e, e.xmm0));
CallNative(e, TraceContextStoreF64); CallNative(e, TraceContextStoreF64);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) {
Xmm src; Xmm src;
e.BeginOp(i->src2.value, src, 0); e.BeginOp(i->src2.value, src, 0);
// NOTE: we always know we are aligned. // NOTE: we always know we are aligned.
e.movaps(e.ptr[e.rcx + i->src1.offset], src); e.movaps(e.ptr[addr], src);
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
@ -959,11 +976,11 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) {
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) {
// TODO(benvanik): check zero // TODO(benvanik): check zero
// TODO(benvanik): correct order? // TODO(benvanik): correct order?
MovMem64(e, e.rcx + i->src1.offset, i->src2.value->constant.v128.low); MovMem64(e, addr, i->src2.value->constant.v128.low);
MovMem64(e, e.rcx + i->src1.offset + 8, i->src2.value->constant.v128.high); MovMem64(e, addr + 8, i->src2.value->constant.v128.high);
#if DTRACE #if DTRACE
e.mov(e.rdx, i->src1.offset); e.mov(e.rdx, i->src1.offset);
e.lea(e.r8, e.ptr[e.rcx + i->src1.offset]); e.lea(e.r8, e.ptr[addr]);
CallNative(e, TraceContextStoreV128); CallNative(e, TraceContextStoreV128);
#endif // DTRACE #endif // DTRACE
} else { } else {
@ -1062,7 +1079,7 @@ table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) {
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
e.movss(e.xmm0, dest); e.lea(e.r8, Stash(e, dest));
CallNative(e, TraceMemoryLoadF32); CallNative(e, TraceMemoryLoadF32);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) {
@ -1072,7 +1089,7 @@ table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) {
e.EndOp(dest); e.EndOp(dest);
#if DTRACE #if DTRACE
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
e.movsd(e.xmm0, dest); e.lea(e.r8, Stash(e, dest));
CallNative(e, TraceMemoryLoadF64); CallNative(e, TraceMemoryLoadF64);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) { } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) {
@ -1224,14 +1241,16 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) {
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
e.movss(e.xmm0, src); e.lea(e.r8, Stash(e, src));
CallNative(e, TraceMemoryStoreF32); CallNative(e, TraceMemoryStoreF32);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) {
e.mov(e.dword[addr], i->src2.value->constant.i32); e.mov(e.dword[addr], i->src2.value->constant.i32);
#if DTRACE #if DTRACE
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
e.movss(e.xmm0, e.ptr[addr]); e.mov(e.eax, i->src2.value->constant.i32);
e.vmovd(e.xmm0, e.eax);
e.lea(e.r8, Stash(e, e.xmm0));
CallNative(e, TraceMemoryStoreF32); CallNative(e, TraceMemoryStoreF32);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) {
@ -1241,7 +1260,7 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) {
e.EndOp(src); e.EndOp(src);
#if DTRACE #if DTRACE
e.lea(e.rdx, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]);
e.movsd(e.xmm0, src); e.lea(e.r8, Stash(e, src));
CallNative(e, TraceMemoryStoreF64); CallNative(e, TraceMemoryStoreF64);
#endif // DTRACE #endif // DTRACE
} else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) { } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) {
@ -2160,7 +2179,6 @@ table->AddSequence(OPCODE_VECTOR_SHL, [](X64Emitter& e, Instr*& i) {
XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) {
// src shift mask may have values >31, and x86 sets to zero when // src shift mask may have values >31, and x86 sets to zero when
// that happens so we mask. // that happens so we mask.
e.db(0xCC);
e.mov(e.eax, 0x1F); e.mov(e.eax, 0x1F);
e.vmovd(e.xmm0, e.eax); e.vmovd(e.xmm0, e.eax);
e.vpbroadcastd(e.xmm0, e.xmm0); e.vpbroadcastd(e.xmm0, e.xmm0);
@ -2637,16 +2655,14 @@ table->AddSequence(OPCODE_UNPACK, [](X64Emitter& e, Instr*& i) {
XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) {
// sx = src.iw >> 16; // sx = src.iw >> 16;
// sy = src.iw & 0xFFFF; // sy = src.iw & 0xFFFF;
// dest = { 3.0 + (sx / float(1 << 22)), // dest = { XMConvertHalfToFloat(sx),
// 3.0 + (sy / float(1 << 22)), // XMConvertHalfToFloat(sy),
// 0.0, // 0.0,
// 1.0); --- or 3.0? // 1.0 };
// So: auto addr = Stash(e, src);
// xmm = {0,0,0,packed} e.lea(e.rdx, addr);
// xmm <<= 1w {0,0,packed,0} CallNative(e, Unpack_FLOAT16_2);
// xmm = VCVTPH2PS(xmm) {sx,sy,0,0} e.movaps(dest, addr);
// xmm /=
UNIMPLEMENTED_SEQ();
}); });
} else if (i->flags == PACK_TYPE_FLOAT16_4) { } else if (i->flags == PACK_TYPE_FLOAT16_4) {
// Could be shared with FLOAT16_2. // Could be shared with FLOAT16_2.

View File

@ -57,23 +57,18 @@ void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
DPRINT("%lld (%llX) = ctx i64 +%d\n", (int64_t)value, value, offset); DPRINT("%lld (%llX) = ctx i64 +%d\n", (int64_t)value, value, offset);
} }
void TraceContextLoadF32(void* raw_context, uint64_t offset, float value) { void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { DPRINT("%e (%X) = ctx f32 +%d\n", value.m128_f32[0], value.m128_i32[0], offset);
float f;
uint32_t u;
} x;
x.f = value;
DPRINT("%e (%X) = ctx f32 +%d\n", x.f, x.u, offset);
} }
void TraceContextLoadF64(void* raw_context, uint64_t offset, double value) { void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { union {
double f; double d;
uint64_t u; uint64_t x;
} x; } f;
x.f = value; f.x = value.m128_i64[0];
DPRINT("%lle (%llX) = ctx f64 +%d\n", x.f, x.u, offset); DPRINT("%lle (%llX) = ctx f64 +%d\n", f.d, value.m128_i64[0], offset);
} }
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) { void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
@ -99,23 +94,18 @@ void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
DPRINT("ctx i64 +%d = %lld (%llX)\n", offset, (int64_t)value, value); DPRINT("ctx i64 +%d = %lld (%llX)\n", offset, (int64_t)value, value);
} }
void TraceContextStoreF32(void* raw_context, uint64_t offset, float value) { void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { DPRINT("ctx f32 +%d = %e (%X)\n", offset, value.m128_i32[0], value.m128_f32[0]);
float f;
uint32_t u;
} x;
x.f = value;
DPRINT("ctx f32 +%d = %e (%.X)\n", offset, x.f, x.u);
} }
void TraceContextStoreF64(void* raw_context, uint64_t offset, double value) { void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { union {
double f; double d;
uint64_t u; uint64_t x;
} x; } f;
x.f = value; f.x = value.m128_i64[0];
DPRINT("ctx f64 +%d = %lle (%.llX)\n", offset, x.f, x.u); DPRINT("ctx f64 +%d = %lle (%llX)\n", offset, value.m128_i64[0], f.d);
} }
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value) { void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
@ -140,23 +130,18 @@ void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
DPRINT("%lld (%llX) = load.i64 %.8X\n", (int64_t)value, value, address); DPRINT("%lld (%llX) = load.i64 %.8X\n", (int64_t)value, value, address);
} }
void TraceMemoryLoadF32(void* raw_context, uint64_t address, float value) { void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { DPRINT("%e (%X) = load.f32 %.8X\n", value.m128_f32[0], value.m128_i32[0], address);
float f;
uint32_t u;
} x;
x.f = value;
DPRINT("%e (%X) = load.f32 %.8X\n", x.f, x.u, address);
} }
void TraceMemoryLoadF64(void* raw_context, uint64_t address, double value) { void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { union {
double f; double d;
uint64_t u; uint64_t x;
} x; } f;
x.f = value; f.x = value.m128_i64[0];
DPRINT("%lle (%llX) = load.f64 %.8X\n", x.f, x.u, address); DPRINT("%lle (%llX) = load.f64 %.8X\n", f.d, value.m128_i64[0], address);
} }
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value) { void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
@ -182,23 +167,18 @@ void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
DPRINT("store.i64 %.8X = %lld (%llX)\n", address, (int64_t)value, value); DPRINT("store.i64 %.8X = %lld (%llX)\n", address, (int64_t)value, value);
} }
void TraceMemoryStoreF32(void* raw_context, uint64_t address, float value) { void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { DPRINT("store.f32 %.8X = %e (%X)\n", address, value.m128_f32[0], value.m128_i32[0]);
float f;
uint32_t u;
} x;
x.f = value;
DPRINT("store.f32 %.8X = %e (%X)\n", address, x.f, x.u);
} }
void TraceMemoryStoreF64(void* raw_context, uint64_t address, double value) { void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);
union { union {
double f; double d;
uint64_t u; uint64_t x;
} x; } f;
x.f = value; f.x = value.m128_i64[0];
DPRINT("store.f64 %.8X = %lle (%llX)\n", address, x.f, x.u); DPRINT("store.f64 %.8X = %lle (%llX)\n", address, f.d, value.m128_i64[0]);
} }
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) { void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) {
auto thread_state = *((ThreadState**)raw_context); auto thread_state = *((ThreadState**)raw_context);

View File

@ -25,32 +25,32 @@ void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value);
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value); void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value);
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value); void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value);
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value); void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value);
void TraceContextLoadF32(void* raw_context, uint64_t offset, float value); void TraceContextLoadF32(void* raw_context, uint64_t offset, __m128 value);
void TraceContextLoadF64(void* raw_context, uint64_t offset, double value); void TraceContextLoadF64(void* raw_context, uint64_t offset, __m128 value);
void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value); void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value);
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value); void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value);
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value); void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value);
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value); void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value);
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value); void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value);
void TraceContextStoreF32(void* raw_context, uint64_t offset, float value); void TraceContextStoreF32(void* raw_context, uint64_t offset, __m128 value);
void TraceContextStoreF64(void* raw_context, uint64_t offset, double value); void TraceContextStoreF64(void* raw_context, uint64_t offset, __m128 value);
void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value); void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value);
void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value); void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value);
void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value); void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value);
void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value); void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value);
void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value); void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value);
void TraceMemoryLoadF32(void* raw_context, uint64_t address, float value); void TraceMemoryLoadF32(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryLoadF64(void* raw_context, uint64_t address, double value); void TraceMemoryLoadF64(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value); void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value); void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value);
void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value); void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value);
void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value); void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value);
void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value); void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value);
void TraceMemoryStoreF32(void* raw_context, uint64_t address, float value); void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreF64(void* raw_context, uint64_t address, double value); void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value);
void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value); void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value);
} // namespace lowering } // namespace lowering

View File

@ -116,7 +116,9 @@ int X64Emitter::Emit(HIRBuilder* builder) {
GetRegBit(r11) | GetRegBit(r11) |
GetRegBit(xmm1) | GetRegBit(xmm1) |
GetRegBit(xmm2) | GetRegBit(xmm2) |
GetRegBit(xmm3); GetRegBit(xmm3) |
GetRegBit(xmm4) |
GetRegBit(xmm5);
// Function prolog. // Function prolog.
// Must be 16b aligned. // Must be 16b aligned.