JIT: implement timer support in mtspr

Faster, of course, since we avoid the interpreter, but also means we can
get more a more accurate timer in long blocks by adding the offset from the
start of the block to the retrieved timer. I don't know if this will actually
fix any issues, but it's more correct and a nearly-free improvement.
This commit is contained in:
Fiora 2014-08-19 21:52:09 -07:00
parent e10b0d1008
commit 816d056657
3 changed files with 49 additions and 28 deletions

View File

@ -70,31 +70,6 @@ namespace SystemTimers
static u32 CPU_CORE_CLOCK = 486000000u; // 486 mhz (its not 485, stop bugging me!) static u32 CPU_CORE_CLOCK = 486000000u; // 486 mhz (its not 485, stop bugging me!)
/*
GameCube MHz
flipper <-> ARAM bus: 81 (DSP)
gekko <-> flipper bus: 162
flipper <-> 1T-SRAM bus: 324
gekko: 486
These contain some guesses:
Wii MHz
hollywood <-> GDDR3 RAM bus: ??? no idea really
broadway <-> hollywood bus: 243
hollywood <-> 1T-SRAM bus: 486
broadway: 729
*/
// Ratio of TB and Decrementer to clock cycles.
// TB clk is 1/4 of BUS clk. And it seems BUS clk is really 1/3 of CPU clk.
// So, ratio is 1 / (1/4 * 1/3 = 1/12) = 12.
// note: ZWW is ok and faster with TIMER_RATIO=8 though.
// !!! POSSIBLE STABLE PERF BOOST HACK THERE !!!
enum
{
TIMER_RATIO = 12
};
static int et_Dec; static int et_Dec;
static int et_VI; static int et_VI;
static int et_SI; static int et_SI;

View File

@ -9,6 +9,31 @@
namespace SystemTimers namespace SystemTimers
{ {
/*
GameCube MHz
flipper <-> ARAM bus: 81 (DSP)
gekko <-> flipper bus: 162
flipper <-> 1T-SRAM bus: 324
gekko: 486
These contain some guesses:
Wii MHz
hollywood <-> GDDR3 RAM bus: ??? no idea really
broadway <-> hollywood bus: 243
hollywood <-> 1T-SRAM bus: 486
broadway: 729
*/
// Ratio of TB and Decrementer to clock cycles.
// TB clk is 1/4 of BUS clk. And it seems BUS clk is really 1/3 of CPU clk.
// So, ratio is 1 / (1/4 * 1/3 = 1/12) = 12.
// note: ZWW is ok and faster with TIMER_RATIO=8 though.
// !!! POSSIBLE STABLE PERF BOOST HACK THERE !!!
enum
{
TIMER_RATIO = 12
};
u32 GetTicksPerSecond(); u32 GetTicksPerSecond();
void PreInit(); void PreInit();
void Init(); void Init();

View File

@ -166,10 +166,31 @@ void Jit64::mfspr(UGeckoInstruction inst)
int d = inst.RD; int d = inst.RD;
switch (iIndex) switch (iIndex)
{ {
case SPR_WPAR:
case SPR_DEC:
case SPR_TL: case SPR_TL:
case SPR_TU: case SPR_TU:
{
// TODO: we really only need to call GetFakeTimeBase once per JIT block; this matters because
// typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne
// to deal with possible timer wraparound. This makes the second two (out of three) completely
// redundant for the JIT.
u32 registersInUse = CallerSavedRegistersInUse();
u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO;
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunction((void *)&SystemTimers::GetFakeTimeBase);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
// The timer can change within a long block, so add in any difference
if (offset > 0)
ADD(64, R(RAX), Imm32(offset));
MOV(64, M(&TL), R(RAX));
gpr.Lock(d);
gpr.BindToRegister(d, false);
if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(EAX));
break;
}
case SPR_WPAR:
case SPR_DEC:
case SPR_PMC1: case SPR_PMC1:
case SPR_PMC2: case SPR_PMC2:
case SPR_PMC3: case SPR_PMC3:
@ -179,9 +200,9 @@ void Jit64::mfspr(UGeckoInstruction inst)
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
gpr.UnlockAll();
break; break;
} }
gpr.UnlockAll();
} }
void Jit64::mtmsr(UGeckoInstruction inst) void Jit64::mtmsr(UGeckoInstruction inst)