diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index c5bfdd4b3..873a083b0 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -40,10 +40,10 @@ namespace ivm { #define DPRINT #define DFLUSH() -//#define IPRINT if (ics.thread_state->thread_id() == 1) printf -//#define IFLUSH() fflush(stdout) -//#define DPRINT if (ics.thread_state->thread_id() == 1) printf -//#define DFLUSH() fflush(stdout) +#define IPRINT if (ics.thread_state->thread_id() == 1) printf +#define IFLUSH() fflush(stdout) +#define DPRINT if (ics.thread_state->thread_id() == 1) printf +#define DFLUSH() fflush(stdout) #if XE_CPU_BIGENDIAN #define VECB16(v,n) (v.b16[n]) @@ -1515,7 +1515,7 @@ uint32_t IntCode_LOAD_V128(IntCodeState& ics, const IntCode* i) { for (int n = 0; n < 4; n++) { VECI4(dest,n) = *((uint32_t*)(ics.membase + address + n * 4)); } - DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load v128 %.8X\n", + DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load.v128 %.8X\n", VECF4(dest,0), VECF4(dest,1), VECF4(dest,2), VECF4(dest,3), VECI4(dest,0), VECI4(dest,1), VECI4(dest,2), VECI4(dest,3), address); @@ -1610,7 +1610,7 @@ uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) { } uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) { uint32_t address = ics.rf[i->src1_reg].u32; - DPRINT("store v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", + DPRINT("store.v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", address, VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3), VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECI4(ics.rf[i->src2_reg].v128,3)); diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index e433991e4..8c37e7f5b 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -756,7 +756,7 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.movaps(e.xmm0, src); - CallNative(e, TraceContextStoreF64); + CallNative(e, TraceContextStoreV128); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) { // TODO(benvanik): check zero diff --git a/src/alloy/backend/x64/lowering/tracers.cc b/src/alloy/backend/x64/lowering/tracers.cc index a0a2f212b..1115f360d 100644 --- a/src/alloy/backend/x64/lowering/tracers.cc +++ b/src/alloy/backend/x64/lowering/tracers.cc @@ -77,7 +77,9 @@ void TraceContextLoadF64(void* raw_context, uint64_t offset, double value) { } void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) { auto thread_state = *((ThreadState**)raw_context); - //DPRINT("%d (%.X) = ctx i8 +%d\n", (int8_t)value, value, offset); + DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = ctx v128 +%d\n", offset, + value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3], + value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]); } void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value) { @@ -116,9 +118,9 @@ void TraceContextStoreF64(void* raw_context, uint64_t offset, double value) { } void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value) { auto thread_state = *((ThreadState**)raw_context); - /*DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", ics.rf[i->src1_reg].u64, - VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3), - VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3));*/ + DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", offset, + value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3], + value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]); } void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value) { @@ -157,7 +159,10 @@ void TraceMemoryLoadF64(void* raw_context, uint64_t address, double value) { } void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value) { auto thread_state = *((ThreadState**)raw_context); - //DPRINT("%d (%.X) = load.v128 +%d\n", (int8_t)value, value, offset); + DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load.v128 %.8X\n", + value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3], + value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3], + address); } void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value) { @@ -196,9 +201,9 @@ void TraceMemoryStoreF64(void* raw_context, uint64_t address, double value) { } void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) { auto thread_state = *((ThreadState**)raw_context); - /*DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", ics.rf[i->src1_reg].u64, - VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3), - VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3));*/ + DPRINT("store.v128 %.8X = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", address, + value.m128_f32[0], value.m128_f32[1], value.m128_f32[2], value.m128_f32[3], + value.m128_i32[0], value.m128_i32[1], value.m128_i32[2], value.m128_i32[3]); }