Added preliminary support for the Gekko CPU Performance Monitor. Fixes Harry Potter and the Prisoner of Azkaban.

This commit is contained in:
skidau 2012-04-28 20:42:45 +10:00
parent 75fbbcae40
commit cdace9d776
7 changed files with 175 additions and 25 deletions

View File

@ -181,7 +181,7 @@ union UGeckoInstruction
u32 : 11;
u32 CRBB : 5;
u32 CRBA : 5;
u32 CRBD : 5;
u32 CRBD : 5;
u32 : 6;
};
@ -235,9 +235,9 @@ union UGeckoInstruction
};
struct
{
u32 : 17;
u32 FM : 8;
u32 : 7;
u32 : 17;
u32 FM : 8;
u32 : 7;
};
// paired
@ -247,8 +247,8 @@ union UGeckoInstruction
u32 Ix : 3;
u32 Wx : 1;
u32 : 1;
u32 I : 3;
u32 W : 1;
u32 I : 3;
u32 W : 1;
u32 : 16;
};
@ -319,7 +319,7 @@ union UReg_XER
u32 Hex;
UReg_XER(u32 _hex) { Hex = _hex; }
UReg_XER() { Hex = 0; }
UReg_XER() { Hex = 0; }
};
// Machine State Register
@ -351,7 +351,7 @@ union UReg_MSR
u32 Hex;
UReg_MSR(u32 _hex) { Hex = _hex; }
UReg_MSR() { Hex = 0; }
UReg_MSR() { Hex = 0; }
};
// Floating Point Status and Control Register
@ -487,13 +487,47 @@ union UReg_SPR1
u32 Hex;
struct
{
u32 htaborg : 16;
u32 : 7;
u32 htabmask : 9;
u32 htaborg : 16;
u32 : 7;
u32 htabmask : 9;
};
};
// MMCR0 - Monitor Mode Control Register 0 format
union UReg_MMCR0
{
u32 Hex;
struct
{
u32 PMC2SELECT : 6;
u32 PMC1SELECT : 7;
u32 PMCTRIGGER : 1;
u32 PMCINTCONTROL : 1;
u32 PMC1INTCONTROL : 1;
u32 THRESHOLD : 6;
u32 INTONBITTRANS : 1;
u32 RTCSELECT : 2;
u32 DISCOUNT : 1;
u32 ENINT : 1;
u32 DMR : 1;
u32 DMS : 1;
u32 DU : 1;
u32 DP : 1;
u32 DIS : 1;
};
};
// MMCR1 - Monitor Mode Control Register 1 format
union UReg_MMCR1
{
u32 Hex;
struct
{
u32 : 22;
u32 PMC4SELECT : 5;
u32 PMC3SELECT : 5;
};
};
// Write Pipe Address Register
union UReg_WPAR
@ -516,7 +550,7 @@ union UReg_DMAU
struct
{
u32 DMA_LEN_U : 5;
u32 MEM_ADDR : 27;
u32 MEM_ADDR : 27;
};
u32 Hex;
@ -533,7 +567,7 @@ union UReg_DMAL
u32 DMA_T : 1;
u32 DMA_LEN_L : 2;
u32 DMA_LD : 1;
u32 LC_ADDR : 27;
u32 LC_ADDR : 27;
};
u32 Hex;
@ -545,11 +579,11 @@ union UReg_BAT_Up
{
struct
{
u32 VP : 1;
u32 VS : 1;
u32 VP : 1;
u32 VS : 1;
u32 BL : 11; // Block length (aka block size mask)
u32 : 4;
u32 BEPI : 15;
u32 BEPI : 15;
};
u32 Hex;
@ -561,8 +595,8 @@ union UReg_BAT_Lo
{
struct
{
u32 PP : 2;
u32 : 1;
u32 PP : 2;
u32 : 1;
u32 WIMG : 4;
u32 : 10;
u32 BRPN : 15; // Physical Block Number
@ -586,7 +620,7 @@ union UReg_PTE
u64 WIMG : 4;
u64 C : 1;
u64 R : 1;
u64 : 3;
u64 : 3;
u64 RPN : 20;
};
@ -623,16 +657,16 @@ enum
// Special purpose register indices
enum
{
SPR_XER = 1,
SPR_XER = 1,
SPR_LR = 8,
SPR_CTR = 9,
SPR_DSISR = 18,
SPR_DAR = 19,
SPR_DAR = 19,
SPR_DEC = 22,
SPR_SDR = 25,
SPR_SDR = 25,
SPR_SRR0 = 26,
SPR_SRR1 = 27,
SPR_TL = 268,
SPR_TL = 268,
SPR_TU = 269,
SPR_TL_W = 284,
SPR_TU_W = 285,
@ -669,7 +703,17 @@ enum
SPR_ECID_U = 924,
SPR_ECID_M = 925,
SPR_ECID_L = 926,
SPR_L2CR = 1017
SPR_L2CR = 1017,
SPR_UMMCR0 = 936,
SPR_MMCR0 = 952,
SPR_PMC1 = 953,
SPR_PMC2 = 954,
SPR_UMMCR1 = 940,
SPR_MMCR1 = 956,
SPR_PMC3 = 957,
SPR_PMC4 = 958,
};
// Exceptions
@ -679,8 +723,9 @@ enum
#define EXCEPTION_DSI 0x00000008
#define EXCEPTION_ISI 0x00000010
#define EXCEPTION_ALIGNMENT 0x00000020
#define EXCEPTION_FPU_UNAVAILABLE 0x00000040
#define EXCEPTION_FPU_UNAVAILABLE 0x00000040
#define EXCEPTION_PROGRAM 0x00000080
#define EXCEPTION_PERFORMANCE_MONITOR 0x00000100
inline s32 SignExt16(s16 x) {return (s32)(s16)x;}
inline s32 SignExt26(u32 x) {return x & 0x2000000 ? (s32)(x | 0xFC000000) : (s32)(x);}

View File

@ -295,6 +295,11 @@ void Jit64::Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
CMP(32, M(&MMCR0), Imm32(0));
FixupBranch mmcr0 = J_CC(CC_Z);
ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);
SetJumpTarget(mmcr0);
}
void Jit64::WriteExit(u32 destination, int exit_num)
@ -654,6 +659,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
WriteExceptionExit();
SetJumpTarget(noMemException);
}
if (opinfo->flags & FL_LOADSTORE)
++jit->js.numLoadStoreInst;
if (opinfo->flags & FL_USE_FPU)
++jit->js.numFloatingPointInst;
}
#if defined(_DEBUG) || defined(DEBUGFAST)

View File

@ -93,6 +93,10 @@ void Jit64::mfspr(UGeckoInstruction inst)
case SPR_DEC:
case SPR_TL:
case SPR_TU:
case SPR_PMC1:
case SPR_PMC2:
case SPR_PMC3:
case SPR_PMC4:
Default(inst);
return;
default:

View File

@ -390,6 +390,11 @@ void JitIL::Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
CMP(32, M(&MMCR0), Imm32(0));
FixupBranch mmcr0 = J_CC(CC_Z);
ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);
SetJumpTarget(mmcr0);
}
void JitIL::WriteExit(u32 destination, int exit_num)
@ -666,6 +671,12 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
{
ibuild.EmitDSIExceptionCheck(ibuild.EmitIntConst(ops[i].address));
}
if (opinfo->flags & FL_LOADSTORE)
++jit->js.numLoadStoreInst;
if (opinfo->flags & FL_USE_FPU)
++jit->js.numFloatingPointInst;
}
}

View File

@ -57,6 +57,8 @@ protected:
int blockSize;
int instructionNumber;
int downcountAmount;
u32 numLoadStoreInst;
u32 numFloatingPointInst;
bool firstFPInstructionFound;
bool isLastInstruction;

View File

@ -289,6 +289,68 @@ void Stop()
Host_UpdateDisasmDialog();
}
void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst)
{
switch (MMCR0.PMC1SELECT)
{
case 0: // No change
break;
case 1: // Processor cycles
PowerPC::ppcState.spr[SPR_PMC1] += cycles;
break;
default:
break;
}
switch (MMCR0.PMC2SELECT)
{
case 0: // No change
break;
case 1: // Processor cycles
PowerPC::ppcState.spr[SPR_PMC2] += cycles;
break;
case 11: // Number of loads and stores completed
PowerPC::ppcState.spr[SPR_PMC2] += num_load_stores;
break;
default:
break;
}
switch (MMCR1.PMC3SELECT)
{
case 0: // No change
break;
case 1: // Processor cycles
PowerPC::ppcState.spr[SPR_PMC3] += cycles;
break;
case 11: // Number of FPU instructions completed
PowerPC::ppcState.spr[SPR_PMC3] += num_fp_inst;
break;
default:
break;
}
switch (MMCR1.PMC4SELECT)
{
case 0: // No change
break;
case 1: // Processor cycles
PowerPC::ppcState.spr[SPR_PMC4] += cycles;
break;
default:
break;
}
if (MMCR0.PMC1INTCONTROL && (PowerPC::ppcState.spr[SPR_PMC1] & 80000000) != 0)
PowerPC::ppcState.Exceptions |= EXCEPTION_PERFORMANCE_MONITOR;
if (MMCR0.PMCINTCONTROL && (PowerPC::ppcState.spr[SPR_PMC2] & 80000000) != 0)
PowerPC::ppcState.Exceptions |= EXCEPTION_PERFORMANCE_MONITOR;
if (MMCR0.PMCINTCONTROL && (PowerPC::ppcState.spr[SPR_PMC3] & 80000000) != 0)
PowerPC::ppcState.Exceptions |= EXCEPTION_PERFORMANCE_MONITOR;
if (MMCR0.PMCINTCONTROL && (PowerPC::ppcState.spr[SPR_PMC4] & 80000000) != 0)
PowerPC::ppcState.Exceptions |= EXCEPTION_PERFORMANCE_MONITOR;
}
void CheckExceptions()
{
// Make sure we are checking against the latest EXI status. This is required
@ -453,6 +515,17 @@ void CheckExternalExceptions()
_dbg_assert_msg_(POWERPC, (SRR1 & 0x02) != 0, "EXTERNAL_INT unrecoverable???");
}
else if (exceptions & EXCEPTION_PERFORMANCE_MONITOR)
{
SRR0 = NPC;
SRR1 = MSR & 0x87C0FFFF;
MSR |= (MSR >> 16) & 1;
MSR &= ~0x04EF36;
NPC = 0x80000F00;
INFO_LOG(POWERPC, "EXCEPTION_PERFORMANCE_MONITOR");
Common::AtomicAnd(ppcState.Exceptions, ~EXCEPTION_PERFORMANCE_MONITOR);
}
else if (exceptions & EXCEPTION_DECREMENTER)
{
SRR0 = NPC;

View File

@ -117,12 +117,16 @@ void ExpandCR();
void OnIdle(u32 _uThreadAddr);
void OnIdleIL();
void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst);
// Easy register access macros.
#define HID0 ((UReg_HID0&)PowerPC::ppcState.spr[SPR_HID0])
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
#define HID4 ((UReg_HID4&)PowerPC::ppcState.spr[SPR_HID4])
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])
#define MMCR0 ((UReg_MMCR0&)PowerPC::ppcState.spr[SPR_MMCR0])
#define MMCR1 ((UReg_MMCR1&)PowerPC::ppcState.spr[SPR_MMCR1])
#define PC PowerPC::ppcState.pc
#define NPC PowerPC::ppcState.npc
#define FPSCR ((UReg_FPSCR&)PowerPC::ppcState.fpscr)