lay ground work for main ram contention TAKE 2

alt title: ITS WORKING! ITS WORKING!!!
This commit is contained in:
Jaklyy 2024-12-05 13:29:32 -05:00
parent 9f04905672
commit 5698cf1862
14 changed files with 1062 additions and 569 deletions

View File

@ -194,12 +194,22 @@ void ARM::Reset()
MainRAMTimestamp = 0;
memset(&MRTrack, 0, sizeof(MRTrack));
FuncQueueFill = 0;
FuncQueueEnd = 0;
FuncQueueProg = 0;
FuncQueueActive = false;
ExecuteCycles = 0;
// zorp
JumpTo(ExceptionBase);
}
void ARMv5::Reset()
{
FuncQueue[0] = &ARMv5::StartExec;
PU_Map = PU_PrivMap;
Store = false;
@ -208,8 +218,8 @@ void ARMv5::Reset()
ILCurrReg = 16;
ILPrevReg = 16;
ICacheFillPtr = 7;
DCacheFillPtr = 7;
ICacheStreamPtr = 7;
DCacheStreamPtr = 7;
WBWritePointer = 16;
WBFillPointer = 0;
@ -313,14 +323,33 @@ void ARM::SetupCodeMem(u32 addr)
}
}
void ARMv5::JumpTo(u32 addr, bool restorecpsr)
void ARMv5::JumpTo(u32 addr, bool restorecpsr, u8 R15)
{
if (restorecpsr)
//printf("JUMP! %08X %i %i\n", addr, restorecpsr, R15);
NDS.MonitorARM9Jump(addr);
BranchRestore = restorecpsr;
BranchUpdate = R15;
BranchAddr = addr;
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_2;
else JumpTo_2();
}
void ARMv5::JumpTo_2()
{
if (CP15Control & (1<<15))
{
if (BranchUpdate == 1) BranchAddr = R[15] & ~1;
else if (BranchUpdate == 2) BranchAddr = R[15] | 1;
}
else if (BranchUpdate) BranchAddr = R[15];
if (BranchRestore)
{
RestoreCPSR();
if (CPSR & 0x20) addr |= 0x1;
else addr &= ~0x1;
if (CPSR & 0x20) BranchAddr |= 0x1;
else BranchAddr &= ~0x1;
}
// aging cart debug crap
@ -329,47 +358,81 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
// jumps count as nonsequential accesses on the instruction bus on the arm9
// thus it requires waiting for the current ICache line fill to complete before continuing
if (ICacheFillPtr < 7)
if (ICacheStreamPtr < 7)
{
u64 fillend = ICacheFillTimes[6] + 1;
u64 fillend = ICacheStreamTimes[6] + 1;
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend;
ICacheFillPtr = 7;
ICacheStreamPtr = 7;
}
if (addr & 0x1)
if (BranchAddr & 0x1)
{
addr &= ~0x1;
R[15] = addr+2;
BranchAddr &= ~0x1;
R[15] = BranchAddr+2;
CPSR |= 0x20;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
if (BranchAddr & 0x2)
{
NextInstr[0] = CodeRead32(addr-2, true) >> 16;
NextInstr[1] = CodeRead32(addr+2, false);
CodeRead32(BranchAddr-2);
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_3A;
else JumpTo_3A();
}
else
{
NextInstr[0] = CodeRead32(addr, true);
NextInstr[1] = NextInstr[0] >> 16;
}
CodeRead32(BranchAddr);
CPSR |= 0x20;
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_3B;
else JumpTo_3B();
}
}
else
{
addr &= ~0x3;
R[15] = addr+4;
NextInstr[0] = CodeRead32(addr, true);
NextInstr[1] = CodeRead32(addr+4, false);
BranchAddr &= ~0x3;
R[15] = BranchAddr+4;
CPSR &= ~0x20;
}
NDS.MonitorARM9Jump(addr);
CodeRead32(BranchAddr);
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_3C;
else JumpTo_3C();
}
}
void ARMv4::JumpTo(u32 addr, bool restorecpsr)
void ARMv5::JumpTo_3A()
{
NextInstr[0] = RetVal >> 16;
CodeRead32(BranchAddr+2);
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_4;
else JumpTo_4();
}
void ARMv5::JumpTo_3B()
{
NextInstr[0] = RetVal;
NextInstr[1] = NextInstr[0] >> 16;
}
void ARMv5::JumpTo_3C()
{
NextInstr[0] = RetVal;
CodeRead32(BranchAddr+4);
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_4;
else JumpTo_4();
}
void ARMv5::JumpTo_4()
{
NextInstr[1] = RetVal;
}
void ARMv4::JumpTo(u32 addr, bool restorecpsr, u8 R15)
{
if (restorecpsr)
{
@ -447,6 +510,11 @@ void ARM::RestoreCPSR()
UpdateMode(oldcpsr, CPSR);
}
void ARMv5::QueueUpdateMode()
{
UpdateMode(QueueMode[0], QueueMode[1], true);
}
void ARM::UpdateMode(u32 oldmode, u32 newmode, bool phony)
{
if ((oldmode & 0x1F) == (newmode & 0x1F)) return;
@ -563,6 +631,7 @@ template void ARM::TriggerIRQ<CPUExecuteMode::JIT>();
void ARMv5::PrefetchAbort()
{
abt = true;
AddCycles_C();
Log(LogLevel::Warn, "ARM9: prefetch abort (%08X)\n", R[15]);
@ -578,7 +647,8 @@ void ARMv5::PrefetchAbort()
void ARMv5::DataAbort()
{
Log(LogLevel::Warn, "ARM9: data abort (%08X)\n", R[15]);
abt = true;
Log(LogLevel::Warn, "ARM9: data abort (%08X) %08llX\n", R[15], CurInstr);
u32 oldcpsr = CPSR;
CPSR &= ~0xBF;
@ -595,6 +665,63 @@ void ARM::CheckGdbIncoming()
GdbCheckA();
}
void ARMv5::StartExec()
{
if (CPSR & 0x20) // THUMB
{
// prefetch
R[15] += 2;
CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1];
// code fetch is done during the execute stage cycle handling
if (R[15] & 0x2) NullFetch = true;
else NullFetch = false;
PC = R[15];
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<CPUExecuteMode::Interpreter>();
else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions
{
PrefetchAbort();
}
else [[likely]] // actually execute
{
u32 icode = (CurInstr >> 6) & 0x3FF;
ARMInterpreter::THUMBInstrTable[icode](this);
}
}
else
{
// prefetch
R[15] += 4;
CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1];
// code fetch is done during the execute stage cycle handling
NullFetch = false;
PC = R[15];
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<CPUExecuteMode::Interpreter>();
else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions
{
PrefetchAbort();
}
else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute
{
u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0);
ARMInterpreter::ARMInstrTable[icode](this);
}
else if ((CurInstr & 0xFE000000) == 0xFA000000)
{
ARMInterpreter::A_BLX_IMM(this);
}
else if ((CurInstr & 0x0FF000F0) == 0x01200070)
{
ARMInterpreter::A_BKPT(this); // always passes regardless of condition code
}
else
AddCycles_C();
}
}
template <CPUExecuteMode mode>
void ARMv5::Execute()
{
@ -670,65 +797,81 @@ void ARMv5::Execute()
else
#endif
{
if (CPSR & 0x20) // THUMB
if constexpr (mode == CPUExecuteMode::InterpreterGDB)
GdbCheckC(); // gdb might throw a hissy fit about this change but idc
//printf("A:%i, F:%i, P:%i, E:%i, I:%08llX, P:%08X, 15:%08X\n", FuncQueueActive, FuncQueueFill, FuncQueueProg, FuncQueueEnd, CurInstr, PC, R[15]);
(this->*FuncQueue[FuncQueueProg])();
if (FuncQueueActive)
{
if constexpr (mode == CPUExecuteMode::InterpreterGDB)
GdbCheckC();
// prefetch
R[15] += 2;
CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1];
// code fetch is done during the execute stage cycle handling
if (R[15] & 0x2) NullFetch = true;
else NullFetch = false;
PC = R[15];
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions
if (FuncQueueFill == FuncQueueProg)
{
PrefetchAbort();
// we did not get a new addition to the queue; increment and reset ptrs
FuncQueueFill = ++FuncQueueProg;
// check if we're done with the queue, if so, reset everything
if (FuncQueueProg >= FuncQueueEnd)
{
FuncQueueFill = 0;
FuncQueueProg = 0;
FuncQueueEnd = 0;
FuncQueueActive = false;
FuncQueue[0] = &ARMv5::StartExec;
/*
Platform::FileHandle* file = Platform::OpenFile("REGLOG.bin", Platform::FileMode::Read);
Platform::FileSeek(file, iter*16*4, Platform::FileSeekOrigin::Start);
u32 Regs[16];
Platform::FileRead(Regs, 4, 16, file);
if (memcmp(Regs, R, 16*4))
{
printf("MISMATCH ON ITERATION %lli! %08llX", iter, CurInstr);
for (int i = 0; i < 16; i++)
{
printf(" %i: %08X vs %08X", i, R[i], Regs[i]);
}
printf("\n");
abt=1;
}
Platform::CloseFile(file);
iter++;*/
}
}
else [[likely]] // actually execute
else
{
u32 icode = (CurInstr >> 6) & 0x3FF;
ARMInterpreter::THUMBInstrTable[icode](this);
// we got a new addition to the list; redo the current entry
FuncQueueFill = FuncQueueProg;
}
}
else if (FuncQueueFill > 0) // check if we started the queue up
{
FuncQueueEnd = FuncQueueFill;
FuncQueueFill = 0;
FuncQueueActive = true;
}
else
{
if constexpr (mode == CPUExecuteMode::InterpreterGDB)
GdbCheckC();
// prefetch
R[15] += 4;
CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1];
// code fetch is done during the execute stage cycle handling
NullFetch = false;
PC = R[15];
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions
/*
Platform::FileHandle* file = Platform::OpenFile("REGLOG.bin", Platform::FileMode::Read);
Platform::FileSeek(file, iter*16*4, Platform::FileSeekOrigin::Start);
u32 Regs[16];
Platform::FileRead(Regs, 4, 16, file);
if (memcmp(Regs, R, 16*4))
{
PrefetchAbort();
printf("MISMATCH ON ITERATION %lli! %08llX", iter, CurInstr);
for (int i = 0; i < 16; i++)
{
printf(" %i: %08X vs %08X", i, R[i], Regs[i]);
}
printf("\n");
abt=1;
}
else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute
{
u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0);
ARMInterpreter::ARMInstrTable[icode](this);
}
else if ((CurInstr & 0xFE000000) == 0xFA000000)
{
ARMInterpreter::A_BLX_IMM(this);
}
else if ((CurInstr & 0x0FF000F0) == 0x01200070)
{
ARMInterpreter::A_BKPT(this); // always passes regardless of condition code
}
else
AddCycles_C();
Platform::CloseFile(file);
iter++;*/
}
if (MRTrack.Type != MainRAMType::Null) break; // check if we need to resolve main ram
// TODO optimize this shit!!!
if (Halted)
@ -907,26 +1050,26 @@ template void ARMv4::Execute<CPUExecuteMode::JIT>();
void ARMv5::FillPipeline()
{
SetupCodeMem(R[15]);
/*SetupCodeMem(R[15]);
if (CPSR & 0x20)
{
if ((R[15] - 2) & 0x2)
{
NextInstr[0] = CodeRead32(R[15] - 4, false) >> 16;
NextInstr[1] = CodeRead32(R[15], false);
NextInstr[0] = CodeRead32(R[15] - 4) >> 16;
NextInstr[1] = CodeRead32(R[15]);
}
else
{
NextInstr[0] = CodeRead32(R[15] - 2, false);
NextInstr[0] = CodeRead32(R[15] - 2);
NextInstr[1] = NextInstr[0] >> 16;
}
}
else
{
NextInstr[0] = CodeRead32(R[15] - 4, false);
NextInstr[1] = CodeRead32(R[15], false);
}
NextInstr[0] = CodeRead32(R[15] - 4);
NextInstr[1] = CodeRead32(R[15]);
}*/
}
void ARMv4::FillPipeline()
@ -1160,23 +1303,37 @@ void ARMv5::CodeFetch()
{
// the value we need is cached by the bus
// in practice we can treat this as a 1 cycle fetch, with no penalties
NextInstr[1] >>= 16;
RetVal = NextInstr[1] >> 16;
NDS.ARM9Timestamp++;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
Store = false;
DataRegion = Mem9_Null;
}
else NextInstr[1] = CodeRead32(PC, false);
else
{
CodeRead32(PC);
}
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::AddExecute;
else AddExecute();
}
void ARMv5::AddCycles_CI(s32 numX)
void ARMv5::AddExecute()
{
CodeFetch();
NDS.ARM9Timestamp += numX;
NextInstr[1] = RetVal;
NDS.ARM9Timestamp += ExecuteCycles;
}
void ARMv5::AddCycles_MW(s32 numM)
{
DataCycles = numM;
if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::AddCycles_MW_2;
else AddCycles_MW_2();
}
void ARMv5::AddCycles_MW_2()
{
s32 numM = DataCycles;
TimestampActual = numM + NDS.ARM9Timestamp;
numM -= 3<<NDS.ARM9ClockShift;
@ -1187,6 +1344,7 @@ void ARMv5::AddCycles_MW(s32 numM)
template <bool bitfield>
void ARMv5::HandleInterlocksExecute(u16 ilmask, u8* times)
{
/*
if ((bitfield && (ilmask & (1<<ILCurrReg))) || (!bitfield && (ilmask == ILCurrReg)))
{
u64 time = ILCurrTime - (times ? times[ILCurrReg] : 0);
@ -1214,19 +1372,20 @@ void ARMv5::HandleInterlocksExecute(u16 ilmask, u8* times)
ILPrevReg = ILCurrReg;
ILPrevTime = ILCurrTime;
ILCurrReg = 16;
ILCurrReg = 16;*/
}
template void ARMv5::HandleInterlocksExecute<true>(u16 ilmask, u8* times);
template void ARMv5::HandleInterlocksExecute<false>(u16 ilmask, u8* times);
void ARMv5::HandleInterlocksMemory(u8 reg)
{
/*
if ((reg != ILPrevReg) || (NDS.ARM9Timestamp >= ILPrevTime)) return;
u64 diff = ILPrevTime - NDS.ARM9Timestamp; // should always be 1?
NDS.ARM9Timestamp = ILPrevTime;
ITCMTimestamp += diff; // checkme
ILPrevTime = 16;
ILPrevTime = 16;*/
}
u16 ARMv4::CodeRead16(u32 addr)
@ -1265,8 +1424,10 @@ u32 ARMv4::CodeRead32(u32 addr)
return BusRead32(addr);
}
bool ARMv4::DataRead8(u32 addr, u32* val)
bool ARMv4::DataRead8(u32 addr, u8 reg)
{
u32* val = &R[reg];
if ((addr >> 24) == 0x02)
{
if (NDS.ARM7Timestamp < MainRAMTimestamp) NDS.ARM7Timestamp = MainRAMTimestamp;
@ -1284,8 +1445,9 @@ bool ARMv4::DataRead8(u32 addr, u32* val)
return true;
}
bool ARMv4::DataRead16(u32 addr, u32* val)
bool ARMv4::DataRead16(u32 addr, u8 reg)
{
u32* val = &R[reg];
addr &= ~1;
if ((addr >> 24) == 0x02)
@ -1305,8 +1467,9 @@ bool ARMv4::DataRead16(u32 addr, u32* val)
return true;
}
bool ARMv4::DataRead32(u32 addr, u32* val)
bool ARMv4::DataRead32(u32 addr, u8 reg)
{
u32* val = &R[reg];
addr &= ~3;
if ((addr >> 24) == 0x02)
@ -1326,8 +1489,9 @@ bool ARMv4::DataRead32(u32 addr, u32* val)
return true;
}
bool ARMv4::DataRead32S(u32 addr, u32* val)
bool ARMv4::DataRead32S(u32 addr, u8 reg)
{
u32* val = &R[reg];
addr &= ~3;
if ((addr >> 24) == 0x02)
@ -1347,7 +1511,7 @@ bool ARMv4::DataRead32S(u32 addr, u32* val)
return true;
}
bool ARMv4::DataWrite8(u32 addr, u8 val)
bool ARMv4::DataWrite8(u32 addr, u8 val, u8 reg)
{
if ((addr >> 24) == 0x02)
{
@ -1366,7 +1530,7 @@ bool ARMv4::DataWrite8(u32 addr, u8 val)
return true;
}
bool ARMv4::DataWrite16(u32 addr, u16 val)
bool ARMv4::DataWrite16(u32 addr, u16 val, u8 reg)
{
addr &= ~1;
@ -1387,7 +1551,7 @@ bool ARMv4::DataWrite16(u32 addr, u16 val)
return true;
}
bool ARMv4::DataWrite32(u32 addr, u32 val)
bool ARMv4::DataWrite32(u32 addr, u32 val, u8 reg)
{
addr &= ~3;
@ -1408,7 +1572,7 @@ bool ARMv4::DataWrite32(u32 addr, u32 val)
return true;
}
bool ARMv4::DataWrite32S(u32 addr, u32 val)
bool ARMv4::DataWrite32S(u32 addr, u32 val, u8 reg)
{
addr &= ~3;

143
src/ARM.h
View File

@ -53,6 +53,19 @@ enum class CPUExecuteMode : u32
#endif
};
enum class MainRAMType : u8
{
Null = 0,
ICacheStream,
};
struct MainRAMTrackers
{
MainRAMType Type;
u8 Var;
u8 Progress;
};
struct GDBArgs;
class ARMJIT;
class GPU;
@ -75,7 +88,7 @@ public:
virtual void FillPipeline() = 0;
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
virtual void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) = 0;
void RestoreCPSR();
void Halt(u32 halt)
@ -135,14 +148,14 @@ public:
void SetupCodeMem(u32 addr);
virtual bool DataRead8(u32 addr, u32* val) = 0;
virtual bool DataRead16(u32 addr, u32* val) = 0;
virtual bool DataRead32(u32 addr, u32* val) = 0;
virtual bool DataRead32S(u32 addr, u32* val) = 0;
virtual bool DataWrite8(u32 addr, u8 val) = 0;
virtual bool DataWrite16(u32 addr, u16 val) = 0;
virtual bool DataWrite32(u32 addr, u32 val) = 0;
virtual bool DataWrite32S(u32 addr, u32 val) = 0;
virtual bool DataRead8(u32 addr, u8 reg) = 0;
virtual bool DataRead16(u32 addr, u8 reg) = 0;
virtual bool DataRead32(u32 addr, u8 reg) = 0;
virtual bool DataRead32S(u32 addr, u8 reg) = 0;
virtual bool DataWrite8(u32 addr, u8 val, u8 reg) = 0;
virtual bool DataWrite16(u32 addr, u16 val, u8 reg) = 0;
virtual bool DataWrite32(u32 addr, u32 val, u8 reg) = 0;
virtual bool DataWrite32S(u32 addr, u32 val, u8 reg) = 0;
virtual void AddCycles_C() = 0;
virtual void AddCycles_CI(s32 numI) = 0;
@ -186,6 +199,29 @@ public:
MemRegion CodeMem;
u64 MainRAMTimestamp;
MainRAMTrackers MRTrack;
u32 BranchAddr;
u8 BranchUpdate;
bool BranchRestore;
u32 QueueMode[2];
u64 RetVal;
u16 LDRRegs;
u16 LDRFailedRegs;
u16 STRRegs;
u32 FetchAddr[17];
u32 STRVal[16];
u64 iter;
u8 FuncQueueFill;
u8 FuncQueueEnd;
u8 FuncQueueProg;
u8 ExecuteCycles;
bool FuncQueueActive;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize;
@ -245,7 +281,7 @@ public:
void FillPipeline() override;
void JumpTo(u32 addr, bool restorecpsr = false) override;
void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) override;
void PrefetchAbort();
void DataAbort();
@ -254,36 +290,42 @@ public:
void Execute();
// all code accesses are forced nonseq 32bit
u64 CodeRead32(const u32 addr, const bool branch);
void CodeRead32(const u32 addr);
bool DataRead8(u32 addr, u32* val) override;
bool DataRead16(u32 addr, u32* val) override;
bool DataRead32(u32 addr, u32* val) override;
bool DataRead32S(u32 addr, u32* val) override;
bool DataWrite8(u32 addr, u8 val) override;
bool DataWrite16(u32 addr, u16 val) override;
bool DataWrite32(u32 addr, u32 val) override;
bool DataWrite32S(u32 addr, u32 val) override;
bool DataRead8(u32 addr, u8 reg) override;
bool DataRead16(u32 addr, u8 reg) override;
bool DataRead32(u32 addr, u8 reg) override;
bool DataRead32S(u32 addr, u8 reg) override;
bool DataWrite8(u32 addr, u8 val, u8 reg) override;
bool DataWrite16(u32 addr, u16 val, u8 reg) override;
bool DataWrite32(u32 addr, u32 val, u8 reg) override;
bool DataWrite32S(u32 addr, u32 val, u8 reg) override;
void CodeFetch();
void AddCycles_C() override { CodeFetch(); }
void AddCycles_C() override
{
ExecuteCycles = 0;
CodeFetch();
}
void AddCycles_CI(s32 numX) override;
void AddCycles_CI(s32 numX) override
{
ExecuteCycles = numX;
CodeFetch();
}
void AddCycles_MW(s32 numM);
void AddCycles_CDI() override
{
AddCycles_MW(DataCycles);
DataCycles = 0;
}
void AddCycles_CD() override
{
Store = true;
Store = true; // todo: queue this
AddCycles_MW(DataCycles);
DataCycles = 0;
}
template <bool bitfield>
@ -366,7 +408,7 @@ public:
* cache. The address is internally aligned to an word boundary
* @return Value of the word at addr
*/
u32 ICacheLookup(const u32 addr);
bool ICacheLookup(const u32 addr);
/**
* @brief Check if an address is within a instruction cachable
@ -604,6 +646,26 @@ public:
* @return Value of the cp15 register
*/
u32 CP15Read(const u32 id) const;
void StartExec();
void AddExecute();
void AddCycles_MW_2();
void JumpTo_2();
void JumpTo_3A();
void JumpTo_3B();
void JumpTo_3C();
void JumpTo_4();
void DAbortHandle();
void DAbortHandleS();
void DRead8_2();
void DRead16_2();
void DRead32_2();
void DRead32S_2();
void DWrite8_2();
void DWrite16_2();
void DWrite32_2();
void DWrite32S_2();
void QueueUpdateMode();
u32 CP15Control; //! CP15 Register 1: Control Register
@ -652,7 +714,7 @@ public:
* 1 - CP15_MAP_WRITEABLE
* 2 - CP15_MAP_EXECUTABLE
* 4 - CP15_MAP_DCACHEABLE
* 5 - CP15_MAP_DCACHEWRITEBACK
* 5 - CP15_MAP_BUFFERABLE
* 6 - CP15_MAP_ICACHEABLE
*/
u8 PU_UserMap[CP15_MAP_ENTRYCOUNT]; //! Memory mapping flags for User Mode
@ -665,6 +727,7 @@ public:
u64 ITCMTimestamp;
u64 TimestampActual;
void (ARMv5::*FuncQueue[31])(void);
u32 PC;
bool NullFetch;
bool Store;
@ -674,10 +737,12 @@ public:
u64 ILCurrTime;
u64 ILPrevTime;
u8 ICacheFillPtr;
u8 DCacheFillPtr;
u64 ICacheFillTimes[7];
u64 DCacheFillTimes[7];
u8 ICacheStreamPtr;
u8 DCacheStreamPtr;
u64 ICacheStreamTimes[7];
u64 DCacheStreamTimes[7];
bool abt;
u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing
u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing
@ -716,7 +781,7 @@ public:
void FillPipeline() override;
void JumpTo(u32 addr, bool restorecpsr = false) override;
void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) override;
template <CPUExecuteMode mode>
void Execute();
@ -726,14 +791,14 @@ public:
u16 CodeRead16(u32 addr);
u32 CodeRead32(u32 addr);
bool DataRead8(u32 addr, u32* val) override;
bool DataRead16(u32 addr, u32* val) override;
bool DataRead32(u32 addr, u32* val) override;
bool DataRead32S(u32 addr, u32* val) override;
bool DataWrite8(u32 addr, u8 val) override;
bool DataWrite16(u32 addr, u16 val) override;
bool DataWrite32(u32 addr, u32 val) override;
bool DataWrite32S(u32 addr, u32 val) override;
bool DataRead8(u32 addr, u8 reg) override;
bool DataRead16(u32 addr, u8 reg) override;
bool DataRead32(u32 addr, u8 reg) override;
bool DataRead32S(u32 addr, u8 reg) override;
bool DataWrite8(u32 addr, u8 val, u8 reg) override;
bool DataWrite16(u32 addr, u16 val, u8 reg) override;
bool DataWrite32(u32 addr, u32 val, u8 reg) override;
bool DataWrite32S(u32 addr, u32 val, u8 reg) override;
void AddCycles_C() override;
void AddCycles_CI(s32 num) override;
void AddCycles_CDI() override;

View File

@ -50,6 +50,7 @@ void A_UNK(ARM* cpu)
cpu->R_UND[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 4;
cpu->JumpTo(cpu->ExceptionBase + 0x04);
}
@ -68,12 +69,13 @@ void T_UNK(ARM* cpu)
cpu->R_UND[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 2;
cpu->JumpTo(cpu->ExceptionBase + 0x04);
}
void A_BKPT(ARM* cpu)
{
if (cpu->Num == 1) A_UNK(cpu); // checkme
if (cpu->Num == 1) return A_UNK(cpu); // checkme
Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message
((ARMv5*)cpu)->PrefetchAbort();
@ -83,6 +85,9 @@ void A_BKPT(ARM* cpu)
void A_MSR_IMM(ARM* cpu)
{
if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr
else cpu->AddCycles_C();
u32* psr;
if (cpu->CurInstr & (1<<22))
{
@ -100,8 +105,6 @@ void A_MSR_IMM(ARM* cpu)
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7
else cpu->AddCycles_CI(2); // arm 9
return;
}
}
@ -138,23 +141,15 @@ void A_MSR_IMM(ARM* cpu)
cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least
}
}
if (cpu->Num != 1)
{
if (cpu->CurInstr & (1<<22))
{
cpu->AddCycles_CI(2); // spsr
}
else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc
else cpu->AddCycles_C();
}
else cpu->AddCycles_C();
}
void A_MSR_REG(ARM* cpu)
{
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr
else cpu->AddCycles_C();
u32* psr;
if (cpu->CurInstr & (1<<22))
{
@ -172,8 +167,6 @@ void A_MSR_REG(ARM* cpu)
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7
else cpu->AddCycles_CI(2); // arm 9
return;
}
}
@ -210,17 +203,6 @@ void A_MSR_REG(ARM* cpu)
cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least
}
}
if (cpu->Num != 1)
{
if (cpu->CurInstr & (1<<22))
{
cpu->AddCycles_CI(2); // spsr
}
else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc
else cpu->AddCycles_C();
}
else cpu->AddCycles_C();
}
void A_MRS(ARM* cpu)
@ -247,20 +229,19 @@ void A_MRS(ARM* cpu)
else
psr = cpu->CPSR;
if (cpu->Num != 1) // arm9
{
cpu->AddCycles_C(); // 1 X
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
}
else cpu->AddCycles_C(); // arm7
if (((cpu->CurInstr>>12) & 0xF) == 15)
{
if (cpu->Num == 1) // doesn't seem to jump on the arm9? checkme
cpu->JumpTo(psr & ~0x1); // checkme: this shouldn't be able to switch to thumb?
}
else cpu->R[(cpu->CurInstr>>12) & 0xF] = psr;
if (cpu->Num != 1) // arm9
{
cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
}
else cpu->AddCycles_C(); // arm7
}
@ -281,7 +262,7 @@ void A_MCR(ARM* cpu)
if (cpu->Num==0 && cp==15)
{
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val);
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val); // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT
}
else if (cpu->Num==1 && cp==14)
{
@ -292,7 +273,8 @@ void A_MCR(ARM* cpu)
Log(LogLevel::Warn, "bad MCR opcode p%d, %d, reg, c%d, c%d, %d on ARM%d\n", cp, op, cn, cm, cpinfo, cpu->Num?7:9);
return A_UNK(cpu); // TODO: check what kind of exception it really is
}
// TODO: SINCE THIS DOES A CODE FETCH WE NEED TO DELAY ANY MPU UPDATES UNTIL *AFTER* THE CODE FETCH
if (cpu->Num==0) cpu->AddCycles_CI(5); // checkme
else /* ARM7 */ cpu->AddCycles_CI(1 + 1); // TODO: checkme
}
@ -315,7 +297,7 @@ void A_MRC(ARM* cpu)
else
{
// r15 updates the top 4 bits of the cpsr, done to "allow for conditional branching based on coprocessor status"
u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo|(op<<12)) & 0xF0000000;
u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo|(op<<12)) & 0xF0000000; // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT
cpu->CPSR = (cpu->CPSR & ~0xF0000000) | flags;
}
}
@ -332,7 +314,6 @@ void A_MRC(ARM* cpu)
if (cpu->Num != 1)
{
cpu->AddCycles_C(); // 1 Execute cycle
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 Memory cycles
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -352,6 +333,7 @@ void A_SVC(ARM* cpu) // A_SWI
cpu->R_SVC[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 4;
cpu->JumpTo(cpu->ExceptionBase + 0x08);
}
@ -365,6 +347,7 @@ void T_SVC(ARM* cpu) // T_SWI
cpu->R_SVC[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 2;
cpu->JumpTo(cpu->ExceptionBase + 0x08);
}

View File

@ -926,7 +926,6 @@ void A_MUL(ARM* cpu)
{
cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -974,7 +973,6 @@ void A_MLA(ARM* cpu)
{
cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1020,7 +1018,6 @@ void A_UMULL(ARM* cpu)
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1073,7 +1070,6 @@ void A_UMLAL(ARM* cpu)
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1119,7 +1115,6 @@ void A_SMULL(ARM* cpu)
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1171,7 +1166,6 @@ void A_SMLAL(ARM* cpu)
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1219,7 +1213,6 @@ void A_SMLAxy(ARM* cpu)
(1 << ((cpu->CurInstr >> 12) & 0xF)), iltime);
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1251,7 +1244,6 @@ void A_SMLAWy(ARM* cpu)
(1 << ((cpu->CurInstr >> 12) & 0xF)), iltime);
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1279,7 +1271,6 @@ void A_SMULxy(ARM* cpu)
(1 << ((cpu->CurInstr >> 8) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1305,7 +1296,6 @@ void A_SMULWy(ARM* cpu)
(1 << ((cpu->CurInstr >> 8) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1342,7 +1332,7 @@ void A_SMLALxy(ARM* cpu)
(1 << ((cpu->CurInstr >> 12) & 0xF))/* |
(1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime);
cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1370,11 +1360,11 @@ void A_CLZ(ARM* cpu)
val |= 0x1;
}
if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1);
else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
cpu->AddCycles_C();
if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1);
else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
}
void A_QADD(ARM* cpu)
@ -1398,7 +1388,6 @@ void A_QADD(ARM* cpu)
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1424,7 +1413,6 @@ void A_QSUB(ARM* cpu)
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1458,7 +1446,6 @@ void A_QDADD(ARM* cpu)
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1492,7 +1479,6 @@ void A_QDSUB(ARM* cpu)
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual;
@ -1902,6 +1888,8 @@ void T_CMP_HIREG(ARM* cpu)
CarrySub(a, b),
OverflowSub(a, b));
cpu->AddCycles_C();
if ((cpu->Num == 1) && (rd == 15))
{
u32 oldpsr = cpu->CPSR;
@ -1913,8 +1901,6 @@ void T_CMP_HIREG(ARM* cpu)
}
}
else if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << rd) | (1 << rs));
cpu->AddCycles_C();
}
void T_MOV_HIREG(ARM* cpu)

View File

@ -82,6 +82,7 @@ enum class Writeback
template<bool signextend, int size, Writeback writeback, bool multireg>
void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 ilmask)
{
cpu->LDRFailedRegs = 0;
static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!");
ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)));
@ -96,11 +97,11 @@ void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16
((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap;
}
u32 val;
u32 oldrd = cpu->R[rd];
bool dabort;
if constexpr (size == 8) dabort = !cpu->DataRead8 (addr, &val);
if constexpr (size == 16) dabort = !cpu->DataRead16(addr, &val);
if constexpr (size == 32) dabort = !cpu->DataRead32(addr, &val);
if constexpr (size == 8) dabort = !cpu->DataRead8 (addr, rd);
if constexpr (size == 16) dabort = !cpu->DataRead16(addr, rd);
if constexpr (size == 32) dabort = !cpu->DataRead32(addr, rd);
if constexpr (writeback == Writeback::Trans)
{
@ -114,21 +115,21 @@ void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16
((ARMv5*)cpu)->DataAbort();
return;
}
if ((cpu->MRTrack.Type != MainRAMType::Null) && signextend && cpu->Num == 0) printf("ARGH ME BONES");
if constexpr (size == 8 && signextend) val = (s32)(s8)val;
if constexpr (size == 8 && signextend) cpu->R[rd] = (s32)(s8)cpu->R[rd];
if constexpr (size == 16)
{
if (cpu->Num == 1)
{
val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7
if constexpr (signextend) val = (s32)((addr&0x1) ? (s8)val : (s16)val); // sign extend like a ldrsb if we ror'd the value.
cpu->R[rd] = ROR(cpu->R[rd], ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7
if constexpr (signextend) cpu->R[rd] = (s32)((addr&0x1) ? (s8)cpu->R[rd] : (s16)cpu->R[rd]); // sign extend like a ldrsb if we ror'd the value.
}
else if constexpr (signextend) val = (s32)(s16)val;
else if constexpr (signextend) cpu->R[rd] = (s32)(s16)cpu->R[rd];
}
if constexpr (size == 32) val = ROR(val, ((addr&0x3)<<3));
if constexpr (size == 32) cpu->R[rd] = ROR(cpu->R[rd], ((addr&0x3)<<3));
if constexpr (writeback >= Writeback::Post) addr += offset;
if constexpr (writeback != Writeback::None)
@ -139,22 +140,23 @@ void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16
}
else if (cpu->Num == 1) // arm 7
{
// note that at no point does it actually write the value it loaded to a register...
cpu->JumpTo((addr+4) & ~1);
cpu->R[rd] = oldrd; // note that at no point does it actually write the value it loaded into a register...
cpu->LDRFailedRegs = 1<<rd;
cpu->JumpTo((addr+4) & ~1); // +4 cause reasons
return;
}
}
if (rd == 15)
{
if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1;
if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual + ((size<32) || (addr&0x3)); // force an interlock
if (cpu->Num==1) cpu->R[15] &= ~0x1;
cpu->JumpTo(val);
//if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual + ((size<32) || (addr&0x3)); // force an interlock
cpu->JumpTo(cpu->R[15], false, 1);
}
else
{
cpu->R[rd] = val;
if (cpu->Num == 0)
{
((ARMv5*)cpu)->ILCurrReg = rd;
@ -188,9 +190,9 @@ void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16
((ARMv5*)cpu)->HandleInterlocksMemory(rd);
bool dabort;
if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval);
if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval);
if constexpr (size == 32) dabort = !cpu->DataWrite32(addr, storeval);
if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval, rd);
if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval, rd);
if constexpr (size == 32) dabort = !cpu->DataWrite32(addr, storeval, rd);
if constexpr (writeback == Writeback::Trans)
{
@ -208,11 +210,11 @@ void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16
if constexpr (writeback >= Writeback::Post) addr += offset;
if constexpr (writeback != Writeback::None)
{
if (rn != 15) [[likely]] // r15 writeback fails on arm9
if (rn != 15) [[likely]]
{
cpu->R[rn] = addr;
}
else if (cpu->Num == 1) // arm 7
else if (cpu->Num == 1) // r15 writeback fails on arm9
{
cpu->JumpTo(addr & ~1);
}
@ -349,19 +351,20 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
cpu->LDRFailedRegs = 0; \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \
u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \
if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \
bool dabort = !cpu->DataRead32(offset, r); \
u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(offset+4, r+1); \
/*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \
cpu->AddCycles_CDI(); \
if (dabort) { \
cpu->R[r+1] = oldval; \
((ARMv5*)cpu)->DataAbort(); \
return; } \
if (r+1 == 15) { \
if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; \
cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); } /* restores cpsr presumably due to shared dna with ldm */ \
/*if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual;*/ \
cpu->JumpTo(cpu->R[15], cpu->CurInstr & (1<<22), 1); } /* restores cpsr presumably due to shared dna with ldm */ \
else { \
cpu->R[r+1] = val; \
if (cpu->Num == 0) { \
((ARMv5*)cpu)->ILCurrReg = r+1; \
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; } } \
@ -372,19 +375,20 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
cpu->LDRFailedRegs = 0; \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \
u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \
if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \
bool dabort = !cpu->DataRead32(addr, r); \
u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(addr+4, r+1); \
/*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \
cpu->AddCycles_CDI(); \
if (dabort) { \
cpu->R[r+1] = oldval; \
((ARMv5*)cpu)->DataAbort(); \
return; } \
if (r+1 == 15) { \
if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; \
cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); } /* restores cpsr presumably due to shared dna with ldm */ \
/*if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual;*/ \
cpu->JumpTo(cpu->R[15], cpu->CurInstr & (1<<22), 1); } /* restores cpsr presumably due to shared dna with ldm */ \
else { \
cpu->R[r+1] = val; \
if (cpu->Num == 0) { \
((ARMv5*)cpu)->ILCurrReg = r+1; \
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; } } \
@ -397,10 +401,10 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \
bool dabort = !cpu->DataWrite32(offset, cpu->R[r], r); \
u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \
dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \
if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \
dabort |= !cpu->DataWrite32S (offset+4, storeval, r+1); \
/*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \
cpu->AddCycles_CD(); \
if (dabort) [[unlikely]] { \
((ARMv5*)cpu)->DataAbort(); \
@ -414,10 +418,10 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \
bool dabort = !cpu->DataWrite32(addr, cpu->R[r], r); \
u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \
dabort |= !cpu->DataWrite32S (addr+4, storeval); \
if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \
dabort |= !cpu->DataWrite32S (addr+4, storeval, r+1); \
/*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \
cpu->AddCycles_CD(); \
if (dabort) [[unlikely]] { \
((ARMv5*)cpu)->DataAbort(); \
@ -484,29 +488,32 @@ template<bool byte>
inline void SWP(ARM* cpu)
{
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 16) & 0xF));
cpu->LDRFailedRegs = 0;
u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->R[cpu->CurInstr & 0xF];
if ((cpu->CurInstr & 0xF) == 15) rm += 4;
u32 rd = (cpu->CurInstr >> 12) & 0xF;
u32 rm = cpu->CurInstr & 0xF;
u32 storeval = cpu->R[rm];
if (rm == 15) storeval += 4;
u32 val;
if ((byte ? cpu->DataRead8 (base, &val)
: cpu->DataRead32(base, &val))) [[likely]]
u32 oldrd = cpu->R[rd];
if ((byte ? cpu->DataRead8 (base, rd)
: cpu->DataRead32(base, rd))) [[likely]]
{
cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme
//cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme
if ((byte ? cpu->DataWrite8 (base, rm)
: cpu->DataWrite32(base, rm))) [[likely]]
if ((byte ? cpu->DataWrite8 (base, storeval, rm)
: cpu->DataWrite32(base, storeval, rm))) [[likely]]
{
// rd only gets updated if both read and write succeed
u32 rd = (cpu->CurInstr >> 12) & 0xF;
if constexpr (!byte) val = ROR(val, 8*(base&0x3));
if constexpr (!byte) cpu->R[rd] = ROR(cpu->R[rd], 8*(base&0x3));
cpu->AddCycles_CDI();
if (rd != 15)
{
cpu->R[rd] = val;
if (cpu->Num == 0)
{
((ARMv5*)cpu)->ILCurrReg = rd;
@ -514,12 +521,18 @@ inline void SWP(ARM* cpu)
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + extra;
}
}
else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9?
else if (cpu->Num==1) // for some reason these jumps don't seem to work on the arm 9?
{
cpu->R[rd] = cpu->R[rd] & ~1;
cpu->JumpTo(cpu->R[rd], false, 1);
}
return;
}
}
// data abort handling
cpu->R[rd] = oldrd;
cpu->LDRFailedRegs = 1<<rd;
cpu->AddCycles_CDI();
((ARMv5*)cpu)->DataAbort();
}
@ -559,15 +572,15 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags)
if (flags & load)
{
u32 pc;
cpu->DataRead32(base, &pc);
cpu->DataRead32(base, 15);
cpu->AddCycles_CDI();
cpu->JumpTo(pc, flags & restoreorthumb);
cpu->JumpTo(cpu->R[15] & ~1, flags & restoreorthumb, 1); // TODO: fix this not maintaining current mode properly
}
else
{
cpu->DataWrite32(base, cpu->R[15] + ((flags & restoreorthumb) ? 2 : 4));
cpu->DataWrite32(base, cpu->R[15] + ((flags & restoreorthumb) ? 2 : 4), 15);
cpu->AddCycles_CD();
}
@ -586,6 +599,7 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags)
void A_LDM(ARM* cpu)
{
cpu->LDRFailedRegs = 0;
u32 baseid = (cpu->CurInstr >> 16) & 0xF;
u32 base = cpu->R[baseid];
u32 wbbase;
@ -626,48 +640,49 @@ void A_LDM(ARM* cpu)
// switch to user mode regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
{
cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true);
//if (cpu->MRTrack.Type != MainRAMType::Null) printf("AHA, DERES THE PROBLEM\n");
}
for (int i = 0; i < 15; i++)
{
if (cpu->CurInstr & (1<<i))
{
if (preinc) base += 4;
u32 val;
dabort |= !(first ? cpu->DataRead32 (base, &val)
: cpu->DataRead32S(base, &val));
// remaining loads still occur but are not written to a reg after a data abort is raised
if (!dabort) [[likely]] cpu->R[i] = val;
u32 oldval = cpu->R[i];
dabort |= !(first ? cpu->DataRead32 (base, i)
: cpu->DataRead32S(base, i));
if (dabort) [[unlikely]] { cpu->R[i] = oldval; cpu->LDRFailedRegs |= (1<<i); }
first = false;
if (!preinc) base += 4;
}
}
u32 pc = 0;
if (cpu->CurInstr & (1<<15))
{
if (preinc) base += 4;
dabort |= !(first ? cpu->DataRead32 (base, &pc)
: cpu->DataRead32S(base, &pc));
u32 oldval = cpu->R[15];
dabort |= !(first ? cpu->DataRead32 (base, 15)
: cpu->DataRead32S(base, 15));
if (dabort) [[unlikely]] { cpu->R[15] = oldval; cpu->LDRFailedRegs |= (1<<15); }
else if (cpu->Num == 1)
cpu->R[15] &= ~0x1;
if (!preinc) base += 4;
if (cpu->Num == 1 || (((ARMv5*)cpu)->CP15Control & (1<<15)))
pc &= ~0x1;
}
if (__builtin_popcount(cpu->CurInstr & 0xFFFF) == 1) [[unlikely]] // single reg
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
cpu->AddCycles_CDI();
if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
if (cpu->Num == 0) ;//cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
else; // CHECKME: ARM7 timing behavior?
}
else
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
cpu->AddCycles_CDI();
}
@ -675,7 +690,17 @@ void A_LDM(ARM* cpu)
if (dabort) [[unlikely]]
{
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
{
if (cpu->Num == 0)
{
cpu->QueueMode[0] = (cpu->CPSR&~0x1F)|0x10;
cpu->QueueMode[1] = cpu->CPSR;
if (cpu->MRTrack.Type != MainRAMType::Null) ((ARMv5*)cpu)->FuncQueue[cpu->FuncQueueFill++] = &ARMv5::QueueUpdateMode;
else ((ARMv5*)cpu)->QueueUpdateMode();
}
else cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
}
((ARMv5*)cpu)->DataAbort();
return;
@ -694,7 +719,7 @@ void A_LDM(ARM* cpu)
{
u32 rlist = cpu->CurInstr & 0xFFFF;
if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1)))
cpu->R[baseid] = wbbase;
{ cpu->R[baseid] = wbbase; cpu->LDRFailedRegs = 1<<baseid; }
}
}
else
@ -703,13 +728,23 @@ void A_LDM(ARM* cpu)
// switch back to previous regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
{
if (cpu->Num == 0)
{
cpu->QueueMode[0] = (cpu->CPSR&~0x1F)|0x10;
cpu->QueueMode[1] = cpu->CPSR;
if (cpu->MRTrack.Type != MainRAMType::Null) ((ARMv5*)cpu)->FuncQueue[cpu->FuncQueueFill++] = &ARMv5::QueueUpdateMode;
else ((ARMv5*)cpu)->QueueUpdateMode();
}
else cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
}
// jump if pc got written
if (cpu->CurInstr & (1<<15))
{
if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock
cpu->JumpTo(pc, cpu->CurInstr & (1<<22));
//if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock
cpu->JumpTo(cpu->R[15], cpu->CurInstr & (1<<22), 1);
}
else if (cpu->Num == 0)
{
@ -785,8 +820,8 @@ void A_STM(ARM* cpu)
if (i == 15) val+=4;
dabort |= !(first ? cpu->DataWrite32 (base, val)
: cpu->DataWrite32S(base, val));
dabort |= !(first ? cpu->DataWrite32 (base, val, i)
: cpu->DataWrite32S(base, val, i));
first = false;
@ -799,14 +834,14 @@ void A_STM(ARM* cpu)
if (__builtin_popcount(cpu->CurInstr & 0xFFFF) == 1) [[unlikely]] // single reg
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
cpu->AddCycles_CD();
if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
else; // CHECKME: ARM7 timing behavior?
}
else
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
cpu->AddCycles_CD();
}
@ -833,8 +868,9 @@ void A_STM(ARM* cpu)
void T_LDR_PCREL(ARM* cpu)
{
ExecuteStage<false>(cpu, 15);
cpu->LDRFailedRegs = 0;
u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2);
bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]);
bool dabort = !cpu->DataRead32(addr, (cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_CDI();
if (dabort) [[unlikely]] ((ARMv5*)cpu)->DataAbort();
@ -968,9 +1004,8 @@ void T_PUSH(ARM* cpu)
{
if (cpu->CurInstr & (1<<i))
{
dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i])
: cpu->DataWrite32S(base, cpu->R[i]));
dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i], i)
: cpu->DataWrite32S(base, cpu->R[i], i));
first = false;
base += 4;
}
@ -978,20 +1013,20 @@ void T_PUSH(ARM* cpu)
if (cpu->CurInstr & (1<<8))
{
dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[14])
: cpu->DataWrite32S(base, cpu->R[14]));
dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[14], 14)
: cpu->DataWrite32S(base, cpu->R[14], 14));
}
if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
cpu->AddCycles_CD();
if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
else; // CHECKME: ARM7 timing behavior?
}
else
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
cpu->AddCycles_CD();
}
@ -1007,6 +1042,7 @@ void T_PUSH(ARM* cpu)
void T_POP(ARM* cpu)
{
ExecuteStage<false>(cpu, 13);
cpu->LDRFailedRegs = 0;
u32 base = cpu->R[13];
bool first = true;
bool dabort = false;
@ -1021,11 +1057,10 @@ void T_POP(ARM* cpu)
{
if (cpu->CurInstr & (1<<i))
{
u32 val;
dabort |= !(first ? cpu->DataRead32 (base, &val)
: cpu->DataRead32S(base, &val));
if (!dabort) [[likely]] cpu->R[i] = val;
u32 oldval = cpu->R[i];
dabort |= !(first ? cpu->DataRead32 (base, i)
: cpu->DataRead32S(base, i));
if (dabort) [[unlikely]] { cpu->R[i] = oldval; cpu->LDRFailedRegs |= (1<<i); }
first = false;
base += 4;
@ -1034,33 +1069,35 @@ void T_POP(ARM* cpu)
if (cpu->CurInstr & (1<<8))
{
u32 pc;
dabort |= !(first ? cpu->DataRead32 (base, &pc)
: cpu->DataRead32S(base, &pc));
u32 oldval = cpu->R[15];
dabort |= !(first ? cpu->DataRead32 (base, 15)
: cpu->DataRead32S(base, 15));
if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
cpu->AddCycles_CDI();
if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
else; // CHECKME: ARM7 timing behavior?
}
else
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
cpu->AddCycles_CDI();
}
if (!dabort) [[likely]]
{
if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1;
if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock
if (cpu->Num==1) cpu->R[15] |= 0x1;
//if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock
cpu->JumpTo(pc);
cpu->JumpTo(cpu->R[15], false, 2);
base += 4;
}
else [[unlikely]]
{
cpu->R[15] = oldval;
cpu->LDRFailedRegs |= (1<<15);
((ARMv5*)cpu)->DataAbort();
return;
}
@ -1069,14 +1106,14 @@ void T_POP(ARM* cpu)
{
if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
cpu->AddCycles_CDI();
if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
else; // CHECKME: ARM7 timing behavior?
}
else
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
cpu->AddCycles_CDI();
}
@ -1122,9 +1159,8 @@ void T_STMIA(ARM* cpu)
{
if (cpu->CurInstr & (1<<i))
{
dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i])
: cpu->DataWrite32S(base, cpu->R[i]));
dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i], i)
: cpu->DataWrite32S(base, cpu->R[i], i));
first = false;
base += 4;
}
@ -1132,14 +1168,14 @@ void T_STMIA(ARM* cpu)
if (__builtin_popcount(cpu->CurInstr & 0xFF) == 1) [[unlikely]] // single reg
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
cpu->AddCycles_CD();
if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
else; // CHECKME: ARM7 timing behavior?
}
else
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
cpu->AddCycles_CD();
}
@ -1170,11 +1206,11 @@ void T_LDMIA(ARM* cpu)
{
if (cpu->CurInstr & (1<<i))
{
u32 val;
dabort |= !(first ? cpu->DataRead32 (base, &val)
: cpu->DataRead32S(base, &val));
u32 oldval = cpu->R[i];
dabort |= !(first ? cpu->DataRead32 (base, i)
: cpu->DataRead32S(base, i));
if (dabort) [[unlikely]] { cpu->R[i] = oldval; cpu->LDRFailedRegs |= (1<<i); }
if (!dabort) [[likely]] cpu->R[i] = val;
first = false;
base += 4;
}
@ -1182,14 +1218,14 @@ void T_LDMIA(ARM* cpu)
if (__builtin_popcount(cpu->CurInstr & 0xFF) == 1) [[unlikely]] // single reg
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1;
cpu->AddCycles_CDI();
if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages
else; // CHECKME: ARM7 timing behavior?
}
else
{
if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
//if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;
cpu->AddCycles_CDI();
}

View File

@ -646,17 +646,17 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
}
else
{
nextInstr[1] = cpuv5->CodeRead32(r15, false);
//nextInstr[1] = cpuv5->CodeRead32(r15, false);
instrs[i].CodeCycles = cpu->CodeCycles;
}
}
else
{
ARMv4* cpuv4 = (ARMv4*)cpu;
if (thumb)
nextInstr[1] = cpuv4->CodeRead16(r15);
else
nextInstr[1] = cpuv4->CodeRead32(r15);
if (thumb);
//nextInstr[1] = cpuv4->CodeRead16(r15);
else;
// nextInstr[1] = cpuv4->CodeRead32(r15);
instrs[i].CodeCycles = cpu->CodeCycles;
}
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr, LiteralOptimizations);
@ -724,7 +724,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
addressRanges[numAddressRanges++] = translatedAddrRounded;
addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16);
JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]);
cpu->DataRead32(literalAddr, &literalValues[numLiterals]);
//cpu->DataRead32(literalAddr, &literalValues[numLiterals]);
literalLoadAddrs[numLiterals++] = translatedAddr;
}
}

View File

@ -83,14 +83,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true) >> 16;
//cpu9->CodeRead32(addr-2, true) >> 16;
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
//cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
@ -99,9 +99,9 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
//cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}

View File

@ -79,18 +79,18 @@ bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
//CurCPU->DataRead32(addr & ~0x3, &val);
val = melonDS::ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
//CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
// CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}

View File

@ -72,14 +72,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true);
//cpu9->CodeRead32(addr-2, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
//cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
@ -88,9 +88,9 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
//cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}

View File

@ -85,18 +85,18 @@ bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
//CurCPU->DataRead32(addr & ~0x3, &val);
val = melonDS::ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
//CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
//CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}

File diff suppressed because it is too large Load Diff

View File

@ -151,7 +151,7 @@ constexpr u32 CP15_MAP_READABLE = 0x01;
constexpr u32 CP15_MAP_WRITEABLE = 0x02;
constexpr u32 CP15_MAP_EXECUTABLE = 0x04;
constexpr u32 CP15_MAP_DCACHEABLE = 0x10;
constexpr u32 CP15_MAP_DCACHEWRITEBACK = 0x20;
constexpr u32 CP15_MAP_BUFFERABLE = 0x20;
constexpr u32 CP15_MAP_ICACHEABLE = 0x40;
constexpr u32 CP15_MAP_ENTRYSIZE_LOG2 = CP15_REGION_BASE_GRANULARITY_LOG2;

View File

@ -453,6 +453,8 @@ void NDS::Reset()
ARM9Timestamp = 0; ARM9Target = 0;
ARM7Timestamp = 0; ARM7Target = 0;
MainRAMTimestamp = 0;
A9ContentionTS = 0;
SysTimestamp = 0;
InitTimings();
@ -889,6 +891,102 @@ void NDS::RunSystemSleep(u64 timestamp)
}
}
#define A9WENTLAST (!MainRAMLastAccess)
#define A7WENTLAST ( MainRAMLastAccess)
#define A9LAST false
#define A7LAST true
void NDS::MainRAMHandleARM9()
{
switch (ARM9.MRTrack.Type)
{
case MainRAMType::Null:
Platform::Log(Platform::LogLevel::Error, "NULL MAIN RAM TYPE ARM9");
break;
case MainRAMType::ICacheStream:
{
if (A9ContentionTS < MainRAMTimestamp) { A9ContentionTS = MainRAMTimestamp; return; }
//printf("ICACHEHANDLER\n");
u8* prog = &ARM9.MRTrack.Progress;
u32 addr = (ARM9.FetchAddr[16] & ~0x1F) | (*prog * 4);
u32* icache = (u32*)&ARM9.ICache[ARM9.MRTrack.Var << 5];
icache[*prog] = ARM9Read32(addr);
if ((*prog > 0) && A9WENTLAST)
{
MainRAMTimestamp += 2;
A9ContentionTS += 2;
}
else
{
MainRAMTimestamp = A9ContentionTS + 9;
A9ContentionTS += (ARM9ClockShift == 1) ? 9 : 8;
MainRAMLastAccess = A9LAST;
}
if (*prog == ARM9.ICacheStreamPtr) ARM9Timestamp = (A9ContentionTS << ARM9ClockShift) - 1;
else if (*prog > ARM9.ICacheStreamPtr) ARM9.ICacheStreamTimes[*prog-1] = (A9ContentionTS << ARM9ClockShift) - 1;
(*prog)++;
if (*prog >= 8)
{
ARM9.RetVal = icache[(ARM9.FetchAddr[16] & 0x1F) / 4];
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
A9ContentionTS = 0;
}
break;
}
}
}
void NDS::MainRAMHandle()
{
if (!A9ContentionTS)
{
A9ContentionTS = (ARM9Timestamp + ((1<<ARM9ClockShift)-1)) >> ARM9ClockShift;
if ((ARM9.MRTrack.Type != MainRAMType::Null) && (A9ContentionTS < MainRAMTimestamp)) A9ContentionTS = MainRAMTimestamp;
}
bool A7Priority = ExMemCnt[0] & 0x8000;
if (A7Priority)
{
while (true)
{
if (A9ContentionTS < ARM7Timestamp)
{
if (ARM9.MRTrack.Type == MainRAMType::Null) { A9ContentionTS = 0; return; }
MainRAMHandleARM9();
}
else
{
if (true) return;
}
}
}
else
{
while (true)
{
if (A9ContentionTS <= ARM7Timestamp)
{
if (ARM9.MRTrack.Type == MainRAMType::Null) { A9ContentionTS = 0; return; }
MainRAMHandleARM9();
}
else
{
if (true) return;
}
}
}
}
#undef A9WENTLAST
#undef A7WENTLAST
#undef A9LAST
#undef A7LAST
template <CPUExecuteMode cpuMode>
u32 NDS::RunFrame()
{
@ -970,16 +1068,21 @@ u32 NDS::RunFrame()
ts = ARM9Timestamp - ts;
for (int i = 0; i < 7; i++)
{
ARM9.ICacheFillTimes[i] += ts;
ARM9.DCacheFillTimes[i] += ts;
ARM9.ICacheStreamTimes[i] += ts;
ARM9.DCacheStreamTimes[i] += ts;
}
ARM9.WBTimestamp += ts;
}
else
else if (ARM9.MRTrack.Type == MainRAMType::Null)
{
if (ARM9.abt) ARM9Timestamp = ARM9Target;
ARM9.Execute<cpuMode>();
}
//printf("MAIN LOOP: %lli %lli\n", ARM9Timestamp>>ARM9ClockShift, ARM7Timestamp);
MainRAMHandle();
RunTimers(0);
GPU.GPU3D.Run();
@ -987,9 +1090,11 @@ u32 NDS::RunFrame()
target = ARM9Timestamp >> ARM9ClockShift;
CurCPU = 1;
while (ARM7Timestamp < target)
while ((ARM7Timestamp < target) || (ARM9.MRTrack.Type != MainRAMType::Null))
{
ARM7Target = target; // might be changed by a reschedule
ARM7Target = (ARM9.MRTrack.Type != MainRAMType::Null) ? (ARM7Timestamp+1) : target; // might be changed by a reschedule
//printf("A7 LOOP: %lli %lli\n", ARM9Timestamp>>ARM9ClockShift, ARM7Timestamp);
if (CPUStop & CPUStop_DMA7)
{
@ -1008,6 +1113,8 @@ u32 NDS::RunFrame()
ARM7.Execute<cpuMode>();
}
MainRAMHandle();
RunTimers(1);
}

View File

@ -253,6 +253,8 @@ public: // TODO: Encapsulate the rest of these members
// no need to worry about those overflowing, they can keep going for atleast 4350 years
u64 ARM9Timestamp, ARM9Target;
u64 ARM7Timestamp, ARM7Target;
u64 MainRAMTimestamp;
u64 A9ContentionTS;
u32 ARM9ClockShift;
u32 IME[2];
@ -270,6 +272,8 @@ public: // TODO: Encapsulate the rest of these members
alignas(u32) u8 ROMSeed0[2*8];
alignas(u32) u8 ROMSeed1[2*8];
bool MainRAMLastAccess; // 0 == ARM9 | 1 == ARM7
protected:
// These BIOS arrays should be declared *before* the component objects (JIT, SPI, etc.)
// so that they're initialized before the component objects' constructors run.
@ -394,6 +398,9 @@ public: // TODO: Encapsulate the rest of these members
void LoadGBAAddon(int type);
std::unique_ptr<GBACart::CartCommon> EjectGBACart() { return GBACartSlot.EjectCart(); }
void MainRAMHandleARM9();
void MainRAMHandle();
u32 RunFrame();
bool IsRunning() const noexcept { return Running; }