diff --git a/src/xenia/cpu/global_exports.cc b/src/xenia/cpu/global_exports.cc index b5937f726..d7d635656 100644 --- a/src/xenia/cpu/global_exports.cc +++ b/src/xenia/cpu/global_exports.cc @@ -111,6 +111,39 @@ void _cdecl XeTraceBranch( (uint32_t)cia, (uint32_t)target_ia); } +void _cdecl XeTraceFPR( + xe_ppc_state_t* state, uint64_t fpr0, uint64_t fpr1, uint64_t fpr2, + uint64_t fpr3, uint64_t fpr4) { + char buffer[2048]; + buffer[0] = 0; + int offset = 0; + + offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, + "%.8X:", state->cia); + + offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, + "\nf%.2d = %e", fpr0, state->f[fpr0]); + if (fpr1 != UINT_MAX) { + offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, + "\nf%.2d = %e", fpr1, state->f[fpr1]); + } + if (fpr2 != UINT_MAX) { + offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, + "\nf%.2d = %e", fpr2, state->f[fpr2]); + } + if (fpr3 != UINT_MAX) { + offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, + "\nf%.2d = %e", fpr3, state->f[fpr3]); + } + if (fpr4 != UINT_MAX) { + offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, + "\nf%.2d = %e", fpr4, state->f[fpr4]); + } + + uint32_t thread_id = state->thread_state->thread_id(); + xe_log_line("", thread_id, "XeTraceFPR", 't', buffer); +} + void _cdecl XeTraceVR( xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2, uint64_t vr3, uint64_t vr4) { @@ -122,30 +155,30 @@ void _cdecl XeTraceVR( "%.8X:", state->cia); offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, - "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr0, + "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%e, %e, %e, %e]", vr0, state->v[vr0].ix, state->v[vr0].iy, state->v[vr0].iz, state->v[vr0].iw, state->v[vr0].x, state->v[vr0].y, state->v[vr0].z, state->v[vr0].w); if (vr1 != UINT_MAX) { offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, - "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr1, + "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%e, %e, %e, %e]", vr1, state->v[vr1].ix, state->v[vr1].iy, state->v[vr1].iz, state->v[vr1].iw, state->v[vr1].x, state->v[vr1].y, state->v[vr1].z, state->v[vr1].w); } if (vr2 != UINT_MAX) { offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, - "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr2, + "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%e, %e, %e, %e]", vr2, state->v[vr2].ix, state->v[vr2].iy, state->v[vr2].iz, state->v[vr2].iw, state->v[vr2].x, state->v[vr2].y, state->v[vr2].z, state->v[vr2].w); } if (vr3 != UINT_MAX) { offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, - "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr3, + "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%e, %e, %e, %e]", vr3, state->v[vr3].ix, state->v[vr3].iy, state->v[vr3].iz, state->v[vr3].iw, state->v[vr3].x, state->v[vr3].y, state->v[vr3].z, state->v[vr3].w); } if (vr4 != UINT_MAX) { offset += xesnprintfa(buffer + offset, XECOUNT(buffer) - offset, - "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%g, %g, %g, %g]", vr4, + "\nvr%.3d=[%.8X, %.8X, %.8X, %.8X] [%e, %e, %e, %e]", vr4, state->v[vr4].ix, state->v[vr4].iy, state->v[vr4].iz, state->v[vr4].iw, state->v[vr4].x, state->v[vr4].y, state->v[vr4].z, state->v[vr4].w); } @@ -227,6 +260,7 @@ void xe::cpu::GetGlobalExports(GlobalExports* global_exports) { global_exports->XeTraceKernelCall = XeTraceKernelCall; global_exports->XeTraceUserCall = XeTraceUserCall; global_exports->XeTraceBranch = XeTraceBranch; + global_exports->XeTraceFPR = XeTraceFPR; global_exports->XeTraceVR = XeTraceVR; global_exports->XeTraceInstruction = XeTraceInstruction; } diff --git a/src/xenia/cpu/global_exports.h b/src/xenia/cpu/global_exports.h index a0aaa65cd..8349ad688 100644 --- a/src/xenia/cpu/global_exports.h +++ b/src/xenia/cpu/global_exports.h @@ -39,6 +39,9 @@ typedef struct { sdb::FunctionSymbol* fn); void (_cdecl *XeTraceBranch)( xe_ppc_state_t* state, uint64_t cia, uint64_t target_ia); + void (_cdecl *XeTraceFPR)( + xe_ppc_state_t* state, uint64_t fpr0, uint64_t fpr1, uint64_t fpr2, + uint64_t fpr3, uint64_t fpr4); void (_cdecl *XeTraceVR)( xe_ppc_state_t* state, uint64_t vr0, uint64_t vr1, uint64_t vr2, uint64_t vr3, uint64_t vr4); diff --git a/src/xenia/cpu/ppc/disasm_fpu.cc b/src/xenia/cpu/ppc/disasm_fpu.cc index 179bbd68d..5c977524c 100644 --- a/src/xenia/cpu/ppc/disasm_fpu.cc +++ b/src/xenia/cpu/ppc/disasm_fpu.cc @@ -61,7 +61,7 @@ XEDISASMR(fmulx, 0xFC000032, A )(InstrData& i, InstrDisasm& d) { InstrDisasm::kFP | (i.A.Rc ? InstrDisasm::kRc : 0)); d.AddRegOperand(InstrRegister::kFPR, i.A.FRT, InstrRegister::kWrite); d.AddRegOperand(InstrRegister::kFPR, i.A.FRA, InstrRegister::kRead); - d.AddRegOperand(InstrRegister::kFPR, i.A.FRB, InstrRegister::kRead); + d.AddRegOperand(InstrRegister::kFPR, i.A.FRC, InstrRegister::kRead); return d.Finish(); } @@ -70,7 +70,7 @@ XEDISASMR(fmulsx, 0xEC000032, A )(InstrData& i, InstrDisasm& d) { InstrDisasm::kFP | (i.A.Rc ? InstrDisasm::kRc : 0)); d.AddRegOperand(InstrRegister::kFPR, i.A.FRT, InstrRegister::kWrite); d.AddRegOperand(InstrRegister::kFPR, i.A.FRA, InstrRegister::kRead); - d.AddRegOperand(InstrRegister::kFPR, i.A.FRB, InstrRegister::kRead); + d.AddRegOperand(InstrRegister::kFPR, i.A.FRC, InstrRegister::kRead); return d.Finish(); } diff --git a/src/xenia/cpu/x64/x64_emit_fpu.cc b/src/xenia/cpu/x64/x64_emit_fpu.cc index 0aed2ada0..592b67de3 100644 --- a/src/xenia/cpu/x64/x64_emit_fpu.cc +++ b/src/xenia/cpu/x64/x64_emit_fpu.cc @@ -53,6 +53,8 @@ XEEMITTER(faddx, 0xFC00002A, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB); + return 0; } @@ -81,6 +83,8 @@ XEEMITTER(faddsx, 0xEC00002A, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB); + return 0; } @@ -102,6 +106,8 @@ XEEMITTER(fdivx, 0xFC000024, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB); + return 0; } @@ -130,6 +136,8 @@ XEEMITTER(fdivsx, 0xEC000024, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB); + return 0; } @@ -151,6 +159,8 @@ XEEMITTER(fmulx, 0xFC000032, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRC); + return 0; } @@ -179,6 +189,8 @@ XEEMITTER(fmulsx, 0xEC000032, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRC); + return 0; } @@ -217,6 +229,8 @@ XEEMITTER(fsubx, 0xFC000028, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB); + return 0; } @@ -238,6 +252,8 @@ XEEMITTER(fsubsx, 0xEC000028, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB); + return 0; } @@ -250,7 +266,7 @@ XEEMITTER(fselx, 0xFC00002E, A )(X64Emitter& e, X86Compiler& c, InstrDat GpVar zero(c.newGpVar()); c.mov(zero, imm(0)); c.movq(v, zero); - c.cmpsd(e.fpr_value(i.A.FRA), v, 0); + c.comisd(e.fpr_value(i.A.FRA), v); // TODO(benvanik): find a way to do this without jumps. Label choose_b(c.newLabel()); @@ -273,6 +289,8 @@ XEEMITTER(fselx, 0xFC00002E, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB, i.A.FRC); + return 0; } @@ -310,6 +328,8 @@ XEEMITTER(fmaddx, 0xFC00003A, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB, i.A.FRC); + return 0; } @@ -361,6 +381,8 @@ XEEMITTER(fnmsubx, 0xFC00003C, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB, i.A.FRC); + return 0; } @@ -394,6 +416,8 @@ XEEMITTER(fnmsubsx, 0xEC00003C, A )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.A.FRT, i.A.FRA, i.A.FRB, i.A.FRC); + return 0; } @@ -403,10 +427,11 @@ XEEMITTER(fnmsubsx, 0xEC00003C, A )(X64Emitter& e, X86Compiler& c, InstrDat XEEMITTER(fcfidx, 0xFC00069C, X )(X64Emitter& e, X86Compiler& c, InstrData& i) { // frD <- signed_int64_to_double( frB ) + XmmVar frb(c.newXmmVar()); + c.movq(frb, e.fpr_value(i.A.FRB)); + c.save(frb); XmmVar v(c.newXmmVar()); - c.movq(v, e.fpr_value(i.A.FRB)); - c.save(v); - c.cvtsi2sd(v, v.m64()); + c.cvtsi2sd(v, frb.m64()); e.update_fpr_value(i.A.FRT, v); // TODO(benvanik): update status/control register. @@ -419,6 +444,8 @@ XEEMITTER(fcfidx, 0xFC00069C, X )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.X.RT, i.X.RB); + return 0; } @@ -435,17 +462,27 @@ XEEMITTER(fctidx, 0xFC00065C, X )(X64Emitter& e, X86Compiler& c, InstrDat GpVar tmp(c.newGpVar()); XmmVar xmm_tmp(c.newXmmVar()); + // TODO(benvanik): pull from FPSCR[RN] + // http://www.rz.uni-karlsruhe.de/rz/docs/VTune/reference/vc148.htm + // Round to zero (truncate). + GpVar mxcsr(c.newGpVar()); + c.save(mxcsr); + c.stmxcsr(mxcsr.m32()); + c.or_(mxcsr, imm(0x6000)); + c.save(mxcsr); + c.ldmxcsr(mxcsr.m32()); + XmmVar v(c.newXmmVar()); c.movq(v, e.fpr_value(i.X.RB)); // Max value: 2^63 - 1 c.mov(tmp, imm(0x43e0000000000000)); c.movq(xmm_tmp, tmp); - c.cmpsd(v, xmm_tmp, 0); - c.jg(over_max); + c.comisd(v, xmm_tmp); + c.jl(over_max); // Min value: -2^63 c.mov(tmp, imm(0xc3e0000000000000)); c.movq(xmm_tmp, tmp); - c.cmpsd(v, xmm_tmp, 0); + c.comisd(v, xmm_tmp); c.jl(under_min); c.save(v); c.cvtsd2si(tmp, v.m64()); @@ -463,7 +500,7 @@ XEEMITTER(fctidx, 0xFC00065C, X )(X64Emitter& e, X86Compiler& c, InstrDat // TODO(benvanik): update status/control register. - if (i.A.Rc) { + if (i.X.Rc) { // With cr0 update. XEASSERTALWAYS(); //e.update_cr_with_cond(0, v); @@ -471,6 +508,8 @@ XEEMITTER(fctidx, 0xFC00065C, X )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.X.RT, i.X.RB); + return 0; } @@ -492,18 +531,28 @@ XEEMITTER(fctiwx, 0xFC00001C, X )(X64Emitter& e, X86Compiler& c, InstrDat GpVar tmp(c.newGpVar()); XmmVar xmm_tmp(c.newXmmVar()); + // TODO(benvanik): pull from FPSCR[RN] + // http://www.rz.uni-karlsruhe.de/rz/docs/VTune/reference/vc148.htm + // Round to zero (truncate). + GpVar mxcsr(c.newGpVar()); + c.save(mxcsr); + c.stmxcsr(mxcsr.m32()); + c.or_(mxcsr, imm(0x6000)); + c.save(mxcsr); + c.ldmxcsr(mxcsr.m32()); + XmmVar v(c.newXmmVar()); c.movq(v, e.fpr_value(i.X.RB)); // Max value: 2^31 - 1 c.mov(tmp, imm(0x41efffffffe00000)); c.movq(xmm_tmp, tmp); - c.cmpsd(v, xmm_tmp, 0); - c.jg(over_max); + c.comisd(v, xmm_tmp); + c.jl(over_max); // Min value: -2^31 c.mov(tmp, imm(0xc1e0000000000000)); c.movq(xmm_tmp, tmp); - c.cmpsd(v, xmm_tmp, 0); - c.jl(under_min); + c.comisd(v, xmm_tmp); + c.jg(under_min); c.save(v); c.cvtsd2si(tmp, v.m64()); c.movq(v, tmp); @@ -528,6 +577,8 @@ XEEMITTER(fctiwx, 0xFC00001C, X )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.X.RT, i.X.RB); + return 0; } @@ -561,6 +612,8 @@ XEEMITTER(frspx, 0xFC000018, X )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.X.RT, i.X.RB); + return 0; } @@ -631,12 +684,12 @@ XEEMITTER(fabsx, 0xFC000210, X )(X64Emitter& e, X86Compiler& c, InstrDat XmmVar v(c.newXmmVar()); c.movq(v, e.fpr_value(i.X.RB)); - // XOR with 0 in the sign bit and ones everywhere else. + // AND with 0 in the sign bit and 1 everywhere else. GpVar gp_bit(c.newGpVar()); c.mov(gp_bit, imm(0x7FFFFFFFFFFFFFFF)); XmmVar bit(c.newXmmVar()); c.movq(bit, gp_bit); - c.xorpd(v, bit); + c.andpd(v, bit); e.update_fpr_value(i.X.RT, v); if (i.X.Rc) { @@ -647,6 +700,8 @@ XEEMITTER(fabsx, 0xFC000210, X )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.X.RT, i.X.RB); + return 0; } @@ -665,6 +720,8 @@ XEEMITTER(fmrx, 0xFC000090, X )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.X.RT, i.X.RB); + return 0; } @@ -694,6 +751,8 @@ XEEMITTER(fnegx, 0xFC000050, X )(X64Emitter& e, X86Compiler& c, InstrDat return 1; } + e.TraceFPR(i.X.RT, i.X.RB); + return 0; } diff --git a/src/xenia/cpu/x64/x64_emitter.cc b/src/xenia/cpu/x64/x64_emitter.cc index a02b2116f..22a64ea0c 100644 --- a/src/xenia/cpu/x64/x64_emitter.cc +++ b/src/xenia/cpu/x64/x64_emitter.cc @@ -911,10 +911,60 @@ void X64Emitter::TraceBranch(uint32_t cia) { } } +void X64Emitter::TraceFPR(uint32_t fpr0, uint32_t fpr1, + uint32_t fpr2, uint32_t fpr3, + uint32_t fpr4) { + X86Compiler& c = compiler_; + + if (!FLAGS_trace_instructions) { + return; + } + + for (int n = 0; n < 5; n++) { + c.nop(); + } + + if (FLAGS_annotate_disassembly) { + c.comment("XeTraceFPR (+spill)"); + } + + SpillRegisters(); + + // TODO(benvanik): remove once fixed: https://code.google.com/p/asmjit/issues/detail?id=86 + GpVar arg1 = c.newGpVar(kX86VarTypeGpq); + c.mov(arg1, imm(fpr0)); + GpVar arg2 = c.newGpVar(kX86VarTypeGpq); + c.mov(arg2, imm(fpr1)); + GpVar arg3 = c.newGpVar(kX86VarTypeGpq); + c.mov(arg3, imm(fpr2)); + GpVar arg4 = c.newGpVar(kX86VarTypeGpq); + c.mov(arg4, imm(fpr3)); + GpVar arg5 = c.newGpVar(kX86VarTypeGpq); + c.mov(arg5, imm(fpr4)); + X86CompilerFuncCall* call = c.call(global_exports_.XeTraceFPR); + call->setPrototype(kX86FuncConvDefault, + FuncBuilder6()); + call->setArgument(0, c.getGpArg(0)); + call->setArgument(1, arg1); + call->setArgument(2, arg2); + call->setArgument(3, arg3); + call->setArgument(4, arg4); + call->setArgument(5, arg5); + + for (int n = 0; n < 2; n++) { + c.nop(); + } +} + void X64Emitter::TraceVR(uint32_t vr0, uint32_t vr1, uint32_t vr2, uint32_t vr3, uint32_t vr4) { X86Compiler& c = compiler_; + if (!FLAGS_trace_instructions) { + return; + } + for (int n = 0; n < 5; n++) { c.nop(); } diff --git a/src/xenia/cpu/x64/x64_emitter.h b/src/xenia/cpu/x64/x64_emitter.h index 6daf9f49c..7a7505e2a 100644 --- a/src/xenia/cpu/x64/x64_emitter.h +++ b/src/xenia/cpu/x64/x64_emitter.h @@ -55,6 +55,9 @@ public: void TraceInstruction(ppc::InstrData& i); void TraceInvalidInstruction(ppc::InstrData& i); void TraceBranch(uint32_t cia); + void TraceFPR(uint32_t fpr0, uint32_t fpr1 = UINT_MAX, + uint32_t fpr2 = UINT_MAX, uint32_t fpr3 = UINT_MAX, + uint32_t fpr4 = UINT_MAX); void TraceVR(uint32_t vr0, uint32_t vr1 = UINT_MAX, uint32_t vr2 = UINT_MAX, uint32_t vr3 = UINT_MAX, uint32_t vr4 = UINT_MAX);