improve timings for S variants of multiply instructions on arm9

behavior seems to be a quirk of the way they made the interlock cycle mandatory
This commit is contained in:
Jaklyy 2024-07-12 22:46:22 -04:00
parent 4fcd52ed16
commit 789ef21c70
2 changed files with 70 additions and 24 deletions

View File

@ -272,12 +272,23 @@ public:
void AddCycles_CI(s32 numI) override
{
// code+internal
// code||internal
s32 numC = CodeCycles;
numI += 1;
Cycles += std::max(numC, numI);
}
void AddCycles_CIL(s32 numI, s32 numL)
{
// (code||internal)+forced interlock
// used by S variants of multiply instructions on the ARM9
// seems that instead of adding extra hardware logic to allow for handling the memory stage of the instructions during the execute stage
// it instead seems to force a two cycle interlock allowing for the interlocked cycle to be executed without any special logic + presumably an extra cycle to set flags
s32 numC = CodeCycles;
numI += 1;
Cycles += std::max(numC, numI) + numL;
}
void AddCycles_CDI_LDR() override;
void AddCycles_CDI_LDM() override;
void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str

View File

@ -774,19 +774,24 @@ void A_MUL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
{
if (cpu->CurInstr & (1<<20))
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
else
cpu->AddCycles_CI(1);
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3;
else cycles = 4;
}
cpu->AddCycles_CI(cycles);
}
}
void A_MLA(ARM* cpu)
{
@ -804,19 +809,24 @@ void A_MLA(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
{
if (cpu->CurInstr & (1<<20))
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
else
cpu->AddCycles_CI(1);
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
}
}
void A_UMULL(ARM* cpu)
{
@ -834,20 +844,26 @@ void A_UMULL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
{
if (cpu->CurInstr & (1<<20))
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
else
cpu->AddCycles_CI(1);
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
}
}
void A_UMLAL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
@ -867,20 +883,26 @@ void A_UMLAL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
{
if (cpu->CurInstr & (1<<20))
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
else
cpu->AddCycles_CI(1);
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
}
}
void A_SMULL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
@ -897,20 +919,26 @@ void A_SMULL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
{
if (cpu->CurInstr & (1<<20))
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
else
cpu->AddCycles_CI(1);
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
}
}
void A_SMLAL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
@ -930,20 +958,26 @@ void A_SMLAL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
{
if (cpu->CurInstr & (1<<20))
((ARMv5*)cpu)->AddCycles_CIL(1, 2);
else
cpu->AddCycles_CI(1);
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
}
}
void A_SMLAxy(ARM* cpu)
{
if (cpu->Num != 0) return;
@ -1461,21 +1495,22 @@ void T_MUL_REG(ARM* cpu)
cpu->SetNZ(res & 0x80000000,
!res);
s32 cycles = 0;
if (cpu->Num == 0)
{
cycles += 3;
((ARMv5*)cpu)->AddCycles_CIL(1, 2); // checkme?
}
else
{
s32 cycles = 0;
cpu->SetC(0); // carry flag destroyed, they say. whatever that means...
if (a & 0xFF000000) cycles += 4;
else if (a & 0x00FF0000) cycles += 3;
else if (a & 0x0000FF00) cycles += 2;
else cycles += 1;
}
cpu->AddCycles_CI(cycles);
}
}
void T_BIC_REG(ARM* cpu)
{