JIT: implement timer support in mtspr
Faster, of course, since we avoid the interpreter, but also means we can get more a more accurate timer in long blocks by adding the offset from the start of the block to the retrieved timer. I don't know if this will actually fix any issues, but it's more correct and a nearly-free improvement.
This commit is contained in:
parent
e10b0d1008
commit
816d056657
|
@ -70,31 +70,6 @@ namespace SystemTimers
|
||||||
|
|
||||||
static u32 CPU_CORE_CLOCK = 486000000u; // 486 mhz (its not 485, stop bugging me!)
|
static u32 CPU_CORE_CLOCK = 486000000u; // 486 mhz (its not 485, stop bugging me!)
|
||||||
|
|
||||||
/*
|
|
||||||
GameCube MHz
|
|
||||||
flipper <-> ARAM bus: 81 (DSP)
|
|
||||||
gekko <-> flipper bus: 162
|
|
||||||
flipper <-> 1T-SRAM bus: 324
|
|
||||||
gekko: 486
|
|
||||||
|
|
||||||
These contain some guesses:
|
|
||||||
Wii MHz
|
|
||||||
hollywood <-> GDDR3 RAM bus: ??? no idea really
|
|
||||||
broadway <-> hollywood bus: 243
|
|
||||||
hollywood <-> 1T-SRAM bus: 486
|
|
||||||
broadway: 729
|
|
||||||
*/
|
|
||||||
// Ratio of TB and Decrementer to clock cycles.
|
|
||||||
// TB clk is 1/4 of BUS clk. And it seems BUS clk is really 1/3 of CPU clk.
|
|
||||||
// So, ratio is 1 / (1/4 * 1/3 = 1/12) = 12.
|
|
||||||
// note: ZWW is ok and faster with TIMER_RATIO=8 though.
|
|
||||||
// !!! POSSIBLE STABLE PERF BOOST HACK THERE !!!
|
|
||||||
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
TIMER_RATIO = 12
|
|
||||||
};
|
|
||||||
|
|
||||||
static int et_Dec;
|
static int et_Dec;
|
||||||
static int et_VI;
|
static int et_VI;
|
||||||
static int et_SI;
|
static int et_SI;
|
||||||
|
|
|
@ -9,6 +9,31 @@
|
||||||
namespace SystemTimers
|
namespace SystemTimers
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
GameCube MHz
|
||||||
|
flipper <-> ARAM bus: 81 (DSP)
|
||||||
|
gekko <-> flipper bus: 162
|
||||||
|
flipper <-> 1T-SRAM bus: 324
|
||||||
|
gekko: 486
|
||||||
|
|
||||||
|
These contain some guesses:
|
||||||
|
Wii MHz
|
||||||
|
hollywood <-> GDDR3 RAM bus: ??? no idea really
|
||||||
|
broadway <-> hollywood bus: 243
|
||||||
|
hollywood <-> 1T-SRAM bus: 486
|
||||||
|
broadway: 729
|
||||||
|
*/
|
||||||
|
// Ratio of TB and Decrementer to clock cycles.
|
||||||
|
// TB clk is 1/4 of BUS clk. And it seems BUS clk is really 1/3 of CPU clk.
|
||||||
|
// So, ratio is 1 / (1/4 * 1/3 = 1/12) = 12.
|
||||||
|
// note: ZWW is ok and faster with TIMER_RATIO=8 though.
|
||||||
|
// !!! POSSIBLE STABLE PERF BOOST HACK THERE !!!
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
TIMER_RATIO = 12
|
||||||
|
};
|
||||||
|
|
||||||
u32 GetTicksPerSecond();
|
u32 GetTicksPerSecond();
|
||||||
void PreInit();
|
void PreInit();
|
||||||
void Init();
|
void Init();
|
||||||
|
|
|
@ -166,10 +166,31 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
int d = inst.RD;
|
int d = inst.RD;
|
||||||
switch (iIndex)
|
switch (iIndex)
|
||||||
{
|
{
|
||||||
case SPR_WPAR:
|
|
||||||
case SPR_DEC:
|
|
||||||
case SPR_TL:
|
case SPR_TL:
|
||||||
case SPR_TU:
|
case SPR_TU:
|
||||||
|
{
|
||||||
|
// TODO: we really only need to call GetFakeTimeBase once per JIT block; this matters because
|
||||||
|
// typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne
|
||||||
|
// to deal with possible timer wraparound. This makes the second two (out of three) completely
|
||||||
|
// redundant for the JIT.
|
||||||
|
u32 registersInUse = CallerSavedRegistersInUse();
|
||||||
|
u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO;
|
||||||
|
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||||
|
ABI_CallFunction((void *)&SystemTimers::GetFakeTimeBase);
|
||||||
|
ABI_PopRegistersAndAdjustStack(registersInUse, false);
|
||||||
|
// The timer can change within a long block, so add in any difference
|
||||||
|
if (offset > 0)
|
||||||
|
ADD(64, R(RAX), Imm32(offset));
|
||||||
|
MOV(64, M(&TL), R(RAX));
|
||||||
|
gpr.Lock(d);
|
||||||
|
gpr.BindToRegister(d, false);
|
||||||
|
if (iIndex == SPR_TU)
|
||||||
|
SHR(64, R(RAX), Imm8(32));
|
||||||
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SPR_WPAR:
|
||||||
|
case SPR_DEC:
|
||||||
case SPR_PMC1:
|
case SPR_PMC1:
|
||||||
case SPR_PMC2:
|
case SPR_PMC2:
|
||||||
case SPR_PMC3:
|
case SPR_PMC3:
|
||||||
|
@ -179,9 +200,9 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
gpr.Lock(d);
|
gpr.Lock(d);
|
||||||
gpr.BindToRegister(d, false);
|
gpr.BindToRegister(d, false);
|
||||||
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
|
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
|
||||||
gpr.UnlockAll();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||||
|
|
Loading…
Reference in New Issue