diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index cef1e7930..3bc84a771 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1337,32 +1337,32 @@ int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) { uint32_t IntCode_LOAD_CONTEXT_I8(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)); - DPRINT("%d (%.X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64); + DPRINT("%d (%X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64); return IA_NEXT; } uint32_t IntCode_LOAD_CONTEXT_I16(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.context + ics.rf[i->src1_reg].u64)); - DPRINT("%d (%.X) = ctx i16 +%d\n", ics.rf[i->dest_reg].i16, ics.rf[i->dest_reg].u16, ics.rf[i->src1_reg].u64); + DPRINT("%d (%X) = ctx i16 +%d\n", ics.rf[i->dest_reg].i16, ics.rf[i->dest_reg].u16, ics.rf[i->src1_reg].u64); return IA_NEXT; } uint32_t IntCode_LOAD_CONTEXT_I32(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.context + ics.rf[i->src1_reg].u64)); - DPRINT("%d (%.X) = ctx i32 +%d\n", ics.rf[i->dest_reg].i32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64); + DPRINT("%d (%X) = ctx i32 +%d\n", ics.rf[i->dest_reg].i32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64); return IA_NEXT; } uint32_t IntCode_LOAD_CONTEXT_I64(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.context + ics.rf[i->src1_reg].u64)); - DPRINT("%lld (%.llX) = ctx i64 +%d\n", ics.rf[i->dest_reg].i64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64); + DPRINT("%lld (%llX) = ctx i64 +%d\n", ics.rf[i->dest_reg].i64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64); return IA_NEXT; } uint32_t IntCode_LOAD_CONTEXT_F32(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f32 = *((float*)(ics.context + ics.rf[i->src1_reg].u64)); - DPRINT("%e (%.X) = ctx f32 +%d\n", ics.rf[i->dest_reg].f32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64); + DPRINT("%e (%X) = ctx f32 +%d\n", ics.rf[i->dest_reg].f32, ics.rf[i->dest_reg].u32, ics.rf[i->src1_reg].u64); return IA_NEXT; } uint32_t IntCode_LOAD_CONTEXT_F64(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].f64 = *((double*)(ics.context + ics.rf[i->src1_reg].u64)); - DPRINT("%lle (%.llX) = ctx f64 +%d\n", ics.rf[i->dest_reg].f64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64); + DPRINT("%lle (%llX) = ctx f64 +%d\n", ics.rf[i->dest_reg].f64, ics.rf[i->dest_reg].u64, ics.rf[i->src1_reg].u64); return IA_NEXT; } uint32_t IntCode_LOAD_CONTEXT_V128(IntCodeState& ics, const IntCode* i) { @@ -1388,32 +1388,32 @@ int Translate_LOAD_CONTEXT(TranslationContext& ctx, Instr* i) { uint32_t IntCode_STORE_CONTEXT_I8(IntCodeState& ics, const IntCode* i) { *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i8; - DPRINT("ctx i8 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8); + DPRINT("ctx i8 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i8, ics.rf[i->src2_reg].u8); return IA_NEXT; } uint32_t IntCode_STORE_CONTEXT_I16(IntCodeState& ics, const IntCode* i) { *((int16_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i16; - DPRINT("ctx i16 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16); + DPRINT("ctx i16 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i16, ics.rf[i->src2_reg].u16); return IA_NEXT; } uint32_t IntCode_STORE_CONTEXT_I32(IntCodeState& ics, const IntCode* i) { *((int32_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i32; - DPRINT("ctx i32 +%d = %d (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32); + DPRINT("ctx i32 +%d = %d (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i32, ics.rf[i->src2_reg].u32); return IA_NEXT; } uint32_t IntCode_STORE_CONTEXT_I64(IntCodeState& ics, const IntCode* i) { *((int64_t*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i64; - DPRINT("ctx i64 +%d = %lld (%.llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64); + DPRINT("ctx i64 +%d = %lld (%llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].i64, ics.rf[i->src2_reg].u64); return IA_NEXT; } uint32_t IntCode_STORE_CONTEXT_F32(IntCodeState& ics, const IntCode* i) { *((float*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f32; - DPRINT("ctx f32 +%d = %e (%.X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32); + DPRINT("ctx f32 +%d = %e (%X)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f32, ics.rf[i->src2_reg].u32); return IA_NEXT; } uint32_t IntCode_STORE_CONTEXT_F64(IntCodeState& ics, const IntCode* i) { *((double*)(ics.context + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f64; - DPRINT("ctx f64 +%d = %lle (%.llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64); + DPRINT("ctx f64 +%d = %lle (%llX)\n", ics.rf[i->src1_reg].u64, ics.rf[i->src2_reg].f64, ics.rf[i->src2_reg].u64); return IA_NEXT; } uint32_t IntCode_STORE_CONTEXT_V128(IntCodeState& ics, const IntCode* i) { diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index 379e438c2..15ca6d19f 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,7 @@ namespace { #define ASSERT_INVALID_TYPE() XEASSERTALWAYS() #define ITRACE 1 -#define DTRACE 0 +#define DTRACE 1 #define SHUFPS_SWAP_DWORDS 0x1B @@ -44,28 +45,10 @@ namespace { // shuffle(vec, b00011011) -> {x,y,z,w} => {x,y,z,w} // All indices and operations must respect that. -// TODO(benvanik): emit traces/printfs/etc - void Dummy() { // } -void PrintString(void* raw_context, const char* str) { - // TODO(benvanik): generate this thunk at runtime? or a shim? - auto thread_state = *((ThreadState**)raw_context); - fprintf(stdout, "XE[t] :%d: %s\n", thread_state->GetThreadID(), str); - fflush(stdout); -} - -void TraceContextLoad(void* raw_context, uint64_t offset, uint64_t value) { - fprintf(stdout, "%lld (%.llX) = ctx i64 +%lld\n", (int64_t)value, value, offset); - fflush(stdout); -} -void TraceContextStore(void* raw_context, uint64_t offset, uint64_t value) { - fprintf(stdout, "ctx i64 +%lld = %lld (%.llX)\n", offset, (int64_t)value, value); - fflush(stdout); -} - uint64_t LoadClock(void* raw_context) { LARGE_INTEGER counter; uint64_t time = 0; @@ -173,7 +156,7 @@ table->AddSequence(OPCODE_COMMENT, [](X64Emitter& e, Instr*& i) { auto str = (const char*)i->src1.offset; auto str_copy = xestrdupa(str); e.mov(e.rdx, (uint64_t)str_copy); - CallNative(e, PrintString); + CallNative(e, TraceString); #endif // ITRACE i = e.Advance(i); return true; @@ -591,7 +574,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8b, dest); - CallNative(e, TraceContextLoad); + CallNative(e, TraceContextLoadI8); #endif // DTRACE } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) { Reg16 dest; @@ -601,7 +584,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8w, dest); - CallNative(e, TraceContextLoad); + CallNative(e, TraceContextLoadI16); #endif // DTRACE } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) { Reg32 dest; @@ -611,7 +594,7 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8d, dest); - CallNative(e, TraceContextLoad); + CallNative(e, TraceContextLoadI32); #endif // DTRACE } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) { Reg64 dest; @@ -621,24 +604,39 @@ table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8, dest); - CallNative(e, TraceContextLoad); + CallNative(e, TraceContextLoadI64); #endif // DTRACE } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) { Xmm dest; e.BeginOp(i->dest, dest, REG_DEST); e.movss(dest, e.dword[e.rcx + i->src1.offset]); e.EndOp(dest); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movaps(e.xmm0, dest); + CallNative(e, TraceContextLoadF32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) { Xmm dest; e.BeginOp(i->dest, dest, REG_DEST); e.movsd(dest, e.qword[e.rcx + i->src1.offset]); e.EndOp(dest); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movaps(e.xmm0, dest); + CallNative(e, TraceContextLoadF64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) { Xmm dest; e.BeginOp(i->dest, dest, REG_DEST); // NOTE: we always know we are aligned. e.movaps(dest, e.ptr[e.rcx + i->src1.offset]); e.EndOp(dest); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movaps(e.xmm0, dest); + CallNative(e, TraceContextLoadV128); +#endif // DTRACE } else { ASSERT_INVALID_TYPE(); } @@ -655,14 +653,14 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8b, src); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI8); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) { e.mov(e.byte[e.rcx + i->src1.offset], i->src2.value->constant.i8); #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8b, i->src2.value->constant.i8); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI8); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) { Reg16 src; @@ -672,14 +670,14 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8w, src); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI16); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) { e.mov(e.word[e.rcx + i->src1.offset], i->src2.value->constant.i16); #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8w, i->src2.value->constant.i16); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI16); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) { Reg32 src; @@ -689,14 +687,14 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8d, src); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI32); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) { e.mov(e.dword[e.rcx + i->src1.offset], i->src2.value->constant.i32); #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8d, i->src2.value->constant.i32); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI32); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) { Reg64 src; @@ -706,38 +704,68 @@ table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) { #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8, src); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI64); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) { e.mov(e.qword[e.rcx + i->src1.offset], i->src2.value->constant.i64); #if DTRACE e.mov(e.rdx, i->src1.offset); e.mov(e.r8, i->src2.value->constant.i64); - CallNative(e, TraceContextStore); + CallNative(e, TraceContextStoreI64); #endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) { Xmm src; e.BeginOp(i->src2.value, src, 0); e.movss(e.dword[e.rcx + i->src1.offset], src); e.EndOp(src); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movss(e.xmm0, src); + CallNative(e, TraceContextStoreF32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) { e.mov(e.dword[e.rcx + i->src1.offset], i->src2.value->constant.i32); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movss(e.xmm0, e.dword[e.rcx + i->src1.offset]); + CallNative(e, TraceContextStoreF32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) { Xmm src; e.BeginOp(i->src2.value, src, 0); e.movsd(e.qword[e.rcx + i->src1.offset], src); e.EndOp(src); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movsd(e.xmm0, src); + CallNative(e, TraceContextStoreF64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) { e.mov(e.qword[e.rcx + i->src1.offset], i->src2.value->constant.i64); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movsd(e.xmm0, e.qword[e.rcx + i->src1.offset]); + CallNative(e, TraceContextStoreF64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) { Xmm src; e.BeginOp(i->src2.value, src, 0); // NOTE: we always know we are aligned. e.movaps(e.ptr[e.rcx + i->src1.offset], src); e.EndOp(src); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movaps(e.xmm0, src); + CallNative(e, TraceContextStoreF64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) { e.mov(e.qword[e.rcx + i->src1.offset], i->src2.value->constant.v128.low); e.mov(e.qword[e.rcx + i->src1.offset + 8], i->src2.value->constant.v128.high); +#if DTRACE + e.mov(e.rdx, i->src1.offset); + e.movups(e.xmm0, e.ptr[e.rcx + i->src1.offset]); + CallNative(e, TraceContextStoreV128); +#endif // DTRACE } else { ASSERT_INVALID_TYPE(); } @@ -792,31 +820,61 @@ table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) { e.BeginOp(i->dest, dest, REG_DEST); e.mov(dest, e.byte[addr]); e.EndOp(dest); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8b, dest); + CallNative(e, TraceMemoryLoadI8); +#endif // DTRACE } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) { Reg16 dest; e.BeginOp(i->dest, dest, REG_DEST); e.mov(dest, e.word[addr]); e.EndOp(dest); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8w, dest); + CallNative(e, TraceMemoryLoadI16); +#endif // DTRACE } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) { Reg32 dest; e.BeginOp(i->dest, dest, REG_DEST); e.mov(dest, e.dword[addr]); e.EndOp(dest); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8d, dest); + CallNative(e, TraceMemoryLoadI32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) { Reg64 dest; e.BeginOp(i->dest, dest, REG_DEST); e.mov(dest, e.qword[addr]); e.EndOp(dest); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8, dest); + CallNative(e, TraceMemoryLoadI64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) { Xmm dest; e.BeginOp(i->dest, dest, REG_DEST); e.movss(dest, e.dword[addr]); e.EndOp(dest); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movss(e.xmm0, dest); + CallNative(e, TraceMemoryLoadF32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) { Xmm dest; e.BeginOp(i->dest, dest, REG_DEST); e.movsd(dest, e.qword[addr]); e.EndOp(dest); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movsd(e.xmm0, dest); + CallNative(e, TraceMemoryLoadF64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) { Xmm dest; e.BeginOp(i->dest, dest, REG_DEST); @@ -824,6 +882,11 @@ table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) { e.movups(dest, e.ptr[addr]); e.EndOp(dest); e.db(0xCC); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movaps(e.xmm0, dest); + CallNative(e, TraceMemoryLoadV128); +#endif // DTRACE } else { ASSERT_INVALID_TYPE(); } @@ -892,43 +955,103 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) { e.BeginOp(i->src2.value, src, 0); e.mov(e.byte[addr], src); e.EndOp(src); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8b, src); + CallNative(e, TraceMemoryStoreI8); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) { e.mov(e.byte[addr], i->src2.value->constant.i8); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8b, i->src2.value->constant.i8); + CallNative(e, TraceMemoryStoreI8); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) { Reg16 src; e.BeginOp(i->src2.value, src, 0); e.mov(e.word[addr], src); e.EndOp(src); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8w, src); + CallNative(e, TraceMemoryStoreI16); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) { e.mov(e.word[addr], i->src2.value->constant.i16); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8w, i->src2.value->constant.i16); + CallNative(e, TraceMemoryStoreI16); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) { Reg32 src; e.BeginOp(i->src2.value, src, 0); e.mov(e.dword[addr], src); e.EndOp(src); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8d, src); + CallNative(e, TraceMemoryStoreI32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) { e.mov(e.dword[addr], i->src2.value->constant.i32); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8d, i->src2.value->constant.i32); + CallNative(e, TraceMemoryStoreI32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) { Reg64 src; e.BeginOp(i->src2.value, src, 0); e.mov(e.qword[addr], src); e.EndOp(src); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8, src); + CallNative(e, TraceMemoryStoreI64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) { e.mov(e.qword[addr], i->src2.value->constant.i64); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.mov(e.r8, i->src2.value->constant.i64); + CallNative(e, TraceMemoryStoreI64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) { Xmm src; e.BeginOp(i->src2.value, src, 0); e.movss(e.dword[addr], src); e.EndOp(src); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movss(e.xmm0, src); + CallNative(e, TraceMemoryStoreF32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) { e.mov(e.dword[addr], i->src2.value->constant.i32); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movss(e.xmm0, e.ptr[addr]); + CallNative(e, TraceMemoryStoreF32); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) { Xmm src; e.BeginOp(i->src2.value, src, 0); e.movsd(e.qword[addr], src); e.EndOp(src); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movsd(e.xmm0, src); + CallNative(e, TraceMemoryStoreF64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) { e.mov(e.qword[addr], i->src2.value->constant.i64); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movsd(e.xmm0, e.ptr[addr]); + CallNative(e, TraceMemoryStoreF64); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) { Xmm src; e.BeginOp(i->src2.value, src, 0); @@ -936,9 +1059,19 @@ table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) { e.movups(e.ptr[addr], src); e.EndOp(src); e.db(0xCC); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movaps(e.xmm0, src); + CallNative(e, TraceMemoryStoreV128); +#endif // DTRACE } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) { e.mov(e.ptr[addr], i->src2.value->constant.v128.low); e.mov(e.ptr[addr + 8], i->src2.value->constant.v128.high); +#if DTRACE + e.lea(e.rdx, e.ptr[addr]); + e.movups(e.xmm0, e.ptr[addr]); + CallNative(e, TraceMemoryStoreV128); +#endif // DTRACE } else { ASSERT_INVALID_TYPE(); } diff --git a/src/alloy/backend/x64/lowering/sources.gypi b/src/alloy/backend/x64/lowering/sources.gypi index 93a754180..d6cdeb1bb 100644 --- a/src/alloy/backend/x64/lowering/sources.gypi +++ b/src/alloy/backend/x64/lowering/sources.gypi @@ -6,5 +6,7 @@ 'lowering_table.cc', 'lowering_table.h', 'op_utils.inl', + 'tracers.cc', + 'tracers.h', ], } diff --git a/src/alloy/backend/x64/lowering/tracers.cc b/src/alloy/backend/x64/lowering/tracers.cc new file mode 100644 index 000000000..a0a2f212b --- /dev/null +++ b/src/alloy/backend/x64/lowering/tracers.cc @@ -0,0 +1,208 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include + +using namespace alloy; +using namespace alloy::backend::x64; +using namespace alloy::backend::x64::lowering; +using namespace alloy::runtime; + +namespace alloy { +namespace backend { +namespace x64 { +namespace lowering { + + +#define IPRINT +#define IFLUSH() +#define DPRINT +#define DFLUSH() + +#define IPRINT if (thread_state->thread_id() == 1) printf +#define IFLUSH() fflush(stdout) +#define DPRINT if (thread_state->thread_id() == 1) printf +#define DFLUSH() fflush(stdout) + + +void TraceString(void* raw_context, const char* str) { + auto thread_state = *((ThreadState**)raw_context); + IPRINT("XE[t] :%d: %s\n", thread_state->GetThreadID(), str); + IFLUSH(); +} + +void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%d (%X) = ctx i8 +%d\n", (int8_t)value, value, offset); +} +void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%d (%X) = ctx i16 +%d\n", (int16_t)value, value, offset); +} +void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%d (%X) = ctx i32 +%d\n", (int32_t)value, value, offset); +} +void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%lld (%llX) = ctx i64 +%d\n", (int64_t)value, value, offset); +} +void TraceContextLoadF32(void* raw_context, uint64_t offset, float value) { + auto thread_state = *((ThreadState**)raw_context); + union { + float f; + uint32_t u; + } x; + x.f = value; + DPRINT("%e (%X) = ctx f32 +%d\n", x.f, x.u, offset); +} +void TraceContextLoadF64(void* raw_context, uint64_t offset, double value) { + auto thread_state = *((ThreadState**)raw_context); + union { + double f; + uint64_t u; + } x; + x.f = value; + DPRINT("%lle (%llX) = ctx f64 +%d\n", x.f, x.u, offset); +} +void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value) { + auto thread_state = *((ThreadState**)raw_context); + //DPRINT("%d (%.X) = ctx i8 +%d\n", (int8_t)value, value, offset); +} + +void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("ctx i8 +%d = %d (%X)\n", offset, (int8_t)value, value); +} +void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("ctx i16 +%d = %d (%X)\n", offset, (int16_t)value, value); +} +void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("ctx i32 +%d = %d (%X)\n", offset, (int32_t)value, value); +} +void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("ctx i64 +%d = %lld (%llX)\n", offset, (int64_t)value, value); +} +void TraceContextStoreF32(void* raw_context, uint64_t offset, float value) { + auto thread_state = *((ThreadState**)raw_context); + union { + float f; + uint32_t u; + } x; + x.f = value; + DPRINT("ctx f32 +%d = %e (%.X)\n", offset, x.f, x.u); +} +void TraceContextStoreF64(void* raw_context, uint64_t offset, double value) { + auto thread_state = *((ThreadState**)raw_context); + union { + double f; + uint64_t u; + } x; + x.f = value; + DPRINT("ctx f64 +%d = %lle (%.llX)\n", offset, x.f, x.u); +} +void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value) { + auto thread_state = *((ThreadState**)raw_context); + /*DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", ics.rf[i->src1_reg].u64, + VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3), + VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3));*/ +} + +void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%d (%X) = load.i8 %.8X\n", (int8_t)value, value, address); +} +void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%d (%X) = load.i16 %.8X\n", (int16_t)value, value, address); +} +void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%d (%X) = load.i32 %.8X\n", (int32_t)value, value, address); +} +void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("%lld (%llX) = load.i64 %.8X\n", (int64_t)value, value, address); +} +void TraceMemoryLoadF32(void* raw_context, uint64_t address, float value) { + auto thread_state = *((ThreadState**)raw_context); + union { + float f; + uint32_t u; + } x; + x.f = value; + DPRINT("%e (%X) = load.f32 %.8X\n", x.f, x.u, address); +} +void TraceMemoryLoadF64(void* raw_context, uint64_t address, double value) { + auto thread_state = *((ThreadState**)raw_context); + union { + double f; + uint64_t u; + } x; + x.f = value; + DPRINT("%lle (%llX) = load.f64 %.8X\n", x.f, x.u, address); +} +void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value) { + auto thread_state = *((ThreadState**)raw_context); + //DPRINT("%d (%.X) = load.v128 +%d\n", (int8_t)value, value, offset); +} + +void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("store.i8 %.8X = %d (%X)\n", address, (int8_t)value, value); +} +void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("store.i16 %.8X = %d (%X)\n", address, (int16_t)value, value); +} +void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("store.i32 %.8X = %d (%X)\n", address, (int32_t)value, value); +} +void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("store.i64 %.8X = %lld (%llX)\n", address, (int64_t)value, value); +} +void TraceMemoryStoreF32(void* raw_context, uint64_t address, float value) { + auto thread_state = *((ThreadState**)raw_context); + union { + float f; + uint32_t u; + } x; + x.f = value; + DPRINT("store.f32 %.8X = %e (%X)\n", address, x.f, x.u); +} +void TraceMemoryStoreF64(void* raw_context, uint64_t address, double value) { + auto thread_state = *((ThreadState**)raw_context); + union { + double f; + uint64_t u; + } x; + x.f = value; + DPRINT("store.f64 %.8X = %lle (%llX)\n", address, x.f, x.u); +} +void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) { + auto thread_state = *((ThreadState**)raw_context); + /*DPRINT("ctx v128 +%d = [%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X]\n", ics.rf[i->src1_reg].u64, + VECF4(ics.rf[i->src2_reg].v128,0), VECF4(ics.rf[i->src2_reg].v128,1), VECF4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3), + VECI4(ics.rf[i->src2_reg].v128,0), VECI4(ics.rf[i->src2_reg].v128,1), VECI4(ics.rf[i->src2_reg].v128,2), VECF4(ics.rf[i->src2_reg].v128,3));*/ +} + + +} // namespace lowering +} // namespace x64 +} // namespace backend +} // namespace alloy diff --git a/src/alloy/backend/x64/lowering/tracers.h b/src/alloy/backend/x64/lowering/tracers.h new file mode 100644 index 000000000..eccc87de9 --- /dev/null +++ b/src/alloy/backend/x64/lowering/tracers.h @@ -0,0 +1,62 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ +#define ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ + +#include + + +namespace alloy { +namespace backend { +namespace x64 { +class X64Emitter; +namespace lowering { + +void TraceString(void* raw_context, const char* str); + +void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value); +void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value); +void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value); +void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value); +void TraceContextLoadF32(void* raw_context, uint64_t offset, float value); +void TraceContextLoadF64(void* raw_context, uint64_t offset, double value); +void TraceContextLoadV128(void* raw_context, uint64_t offset, __m128 value); + +void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value); +void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value); +void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value); +void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value); +void TraceContextStoreF32(void* raw_context, uint64_t offset, float value); +void TraceContextStoreF64(void* raw_context, uint64_t offset, double value); +void TraceContextStoreV128(void* raw_context, uint64_t offset, __m128 value); + +void TraceMemoryLoadI8(void* raw_context, uint64_t address, uint8_t value); +void TraceMemoryLoadI16(void* raw_context, uint64_t address, uint16_t value); +void TraceMemoryLoadI32(void* raw_context, uint64_t address, uint32_t value); +void TraceMemoryLoadI64(void* raw_context, uint64_t address, uint64_t value); +void TraceMemoryLoadF32(void* raw_context, uint64_t address, float value); +void TraceMemoryLoadF64(void* raw_context, uint64_t address, double value); +void TraceMemoryLoadV128(void* raw_context, uint64_t address, __m128 value); + +void TraceMemoryStoreI8(void* raw_context, uint64_t address, uint8_t value); +void TraceMemoryStoreI16(void* raw_context, uint64_t address, uint16_t value); +void TraceMemoryStoreI32(void* raw_context, uint64_t address, uint32_t value); +void TraceMemoryStoreI64(void* raw_context, uint64_t address, uint64_t value); +void TraceMemoryStoreF32(void* raw_context, uint64_t address, float value); +void TraceMemoryStoreF64(void* raw_context, uint64_t address, double value); +void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value); + +} // namespace lowering +} // namespace x64 +} // namespace backend +} // namespace alloy + + +#endif // ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ diff --git a/src/alloy/runtime/thread_state.cc b/src/alloy/runtime/thread_state.cc index 32edf177e..84add8bce 100644 --- a/src/alloy/runtime/thread_state.cc +++ b/src/alloy/runtime/thread_state.cc @@ -64,6 +64,5 @@ ThreadState* ThreadState::Get() { } uint32_t ThreadState::GetThreadID() { - XEASSERT(thread_state_); return thread_state_->thread_id_; }