improve timings for S variants of multiply instructions on arm9
behavior seems to be a quirk of the way they made the interlock cycle mandatory
This commit is contained in:
parent
4fcd52ed16
commit
789ef21c70
13
src/ARM.h
13
src/ARM.h
|
@ -272,12 +272,23 @@ public:
|
|||
|
||||
void AddCycles_CI(s32 numI) override
|
||||
{
|
||||
// code+internal
|
||||
// code||internal
|
||||
s32 numC = CodeCycles;
|
||||
numI += 1;
|
||||
Cycles += std::max(numC, numI);
|
||||
}
|
||||
|
||||
void AddCycles_CIL(s32 numI, s32 numL)
|
||||
{
|
||||
// (code||internal)+forced interlock
|
||||
// used by S variants of multiply instructions on the ARM9
|
||||
// seems that instead of adding extra hardware logic to allow for handling the memory stage of the instructions during the execute stage
|
||||
// it instead seems to force a two cycle interlock allowing for the interlocked cycle to be executed without any special logic + presumably an extra cycle to set flags
|
||||
s32 numC = CodeCycles;
|
||||
numI += 1;
|
||||
Cycles += std::max(numC, numI) + numL;
|
||||
}
|
||||
|
||||
void AddCycles_CDI_LDR() override;
|
||||
void AddCycles_CDI_LDM() override;
|
||||
void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str
|
||||
|
|
|
@ -774,19 +774,24 @@ void A_MUL(ARM* cpu)
|
|||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
if (cpu->Num == 0)
|
||||
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
|
||||
{
|
||||
if (cpu->CurInstr & (1<<20))
|
||||
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
|
||||
else
|
||||
cpu->AddCycles_CI(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cycles;
|
||||
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1;
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3;
|
||||
else cycles = 4;
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
}
|
||||
|
||||
void A_MLA(ARM* cpu)
|
||||
{
|
||||
|
@ -804,19 +809,24 @@ void A_MLA(ARM* cpu)
|
|||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
if (cpu->Num == 0)
|
||||
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
|
||||
{
|
||||
if (cpu->CurInstr & (1<<20))
|
||||
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
|
||||
else
|
||||
cpu->AddCycles_CI(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cycles;
|
||||
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
}
|
||||
|
||||
void A_UMULL(ARM* cpu)
|
||||
{
|
||||
|
@ -834,20 +844,26 @@ void A_UMULL(ARM* cpu)
|
|||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
if (cpu->Num == 0)
|
||||
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
|
||||
{
|
||||
if (cpu->CurInstr & (1<<20))
|
||||
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
|
||||
else
|
||||
cpu->AddCycles_CI(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cycles;
|
||||
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void A_UMLAL(ARM* cpu)
|
||||
{
|
||||
u32 rm = cpu->R[cpu->CurInstr & 0xF];
|
||||
|
@ -867,20 +883,26 @@ void A_UMLAL(ARM* cpu)
|
|||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
if (cpu->Num == 0)
|
||||
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
|
||||
{
|
||||
if (cpu->CurInstr & (1<<20))
|
||||
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
|
||||
else
|
||||
cpu->AddCycles_CI(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cycles;
|
||||
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void A_SMULL(ARM* cpu)
|
||||
{
|
||||
u32 rm = cpu->R[cpu->CurInstr & 0xF];
|
||||
|
@ -897,20 +919,26 @@ void A_SMULL(ARM* cpu)
|
|||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
if (cpu->Num == 0)
|
||||
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
|
||||
{
|
||||
if (cpu->CurInstr & (1<<20))
|
||||
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
|
||||
else
|
||||
cpu->AddCycles_CI(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cycles;
|
||||
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void A_SMLAL(ARM* cpu)
|
||||
{
|
||||
u32 rm = cpu->R[cpu->CurInstr & 0xF];
|
||||
|
@ -930,20 +958,26 @@ void A_SMLAL(ARM* cpu)
|
|||
if (cpu->Num==1) cpu->SetC(0);
|
||||
}
|
||||
|
||||
u32 cycles;
|
||||
if (cpu->Num == 0)
|
||||
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
|
||||
{
|
||||
if (cpu->CurInstr & (1<<20))
|
||||
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
|
||||
else
|
||||
cpu->AddCycles_CI(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cycles;
|
||||
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
|
||||
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
|
||||
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
|
||||
else cycles = 5;
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void A_SMLAxy(ARM* cpu)
|
||||
{
|
||||
if (cpu->Num != 0) return;
|
||||
|
@ -1461,21 +1495,22 @@ void T_MUL_REG(ARM* cpu)
|
|||
cpu->SetNZ(res & 0x80000000,
|
||||
!res);
|
||||
|
||||
s32 cycles = 0;
|
||||
if (cpu->Num == 0)
|
||||
{
|
||||
cycles += 3;
|
||||
((ARMv5*)cpu)->AddCycles_CIL(1, 2); // checkme?
|
||||
}
|
||||
else
|
||||
{
|
||||
s32 cycles = 0;
|
||||
cpu->SetC(0); // carry flag destroyed, they say. whatever that means...
|
||||
if (a & 0xFF000000) cycles += 4;
|
||||
else if (a & 0x00FF0000) cycles += 3;
|
||||
else if (a & 0x0000FF00) cycles += 2;
|
||||
else cycles += 1;
|
||||
}
|
||||
|
||||
cpu->AddCycles_CI(cycles);
|
||||
}
|
||||
}
|
||||
|
||||
void T_BIC_REG(ARM* cpu)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue