This commit is contained in:
Jakly 2024-12-21 23:10:24 +00:00 committed by GitHub
commit f5049fb6cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 7117 additions and 1404 deletions

File diff suppressed because it is too large Load Diff

750
src/ARM.h
View File

@ -21,10 +21,13 @@
#include <algorithm>
#include <optional>
#include <cstring>
#include "types.h"
#include "MemRegion.h"
#include "MemConstants.h"
#include "CP15_Constants.h"
#include "Platform.h"
#ifdef GDBSTUB_ENABLED
#include "debug/GdbStub.h"
@ -52,6 +55,50 @@ enum class CPUExecuteMode : u32
#endif
};
enum class WBMode
{
Check,
Force,
SingleBurst,
WaitEntry,
};
enum class MainRAMType : u8
{
Null = 0,
Fetch,
ICacheStream,
DCacheStream,
DMA16,
DMA32,
WriteBufferCmds, // all write buffer commands must be below this one; wb cmds are not strictly used for main ram
WBDrain,
WBWrite,
WBCheck,
WBWaitRead,
WBWaitWrite,
};
// each one represents a bit in the field
enum FetchFlags
{
MR8 = 0x00, // tbh it only exists because it felt wrong to write nothing to the field for 8 bit reads
MR16 = 0x01,
MR32 = 0x02,
MRWrite = 0x20,
MRSequential = 0x40,
MRCodeFetch = 0x80,
};
struct MainRAMTrackers
{
MainRAMType Type;
u8 Var;
u8 Progress;
};
struct GDBArgs;
class ARMJIT;
class GPU;
@ -76,7 +123,7 @@ public:
virtual void FillPipeline() = 0;
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
virtual void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) = 0;
void RestoreCPSR();
void Halt(u32 halt)
@ -130,19 +177,20 @@ public:
void UpdateMode(u32 oldmode, u32 newmode, bool phony = false);
template <CPUExecuteMode mode>
void TriggerIRQ();
void SetupCodeMem(u32 addr);
virtual void DataRead8(u32 addr, u32* val) = 0;
virtual void DataRead16(u32 addr, u32* val) = 0;
virtual void DataRead32(u32 addr, u32* val) = 0;
virtual void DataRead32S(u32 addr, u32* val) = 0;
virtual void DataWrite8(u32 addr, u8 val) = 0;
virtual void DataWrite16(u32 addr, u16 val) = 0;
virtual void DataWrite32(u32 addr, u32 val) = 0;
virtual void DataWrite32S(u32 addr, u32 val) = 0;
virtual bool DataRead8(u32 addr, u8 reg) = 0;
virtual bool DataRead16(u32 addr, u8 reg) = 0;
virtual bool DataRead32(u32 addr, u8 reg) = 0;
virtual bool DataRead32S(u32 addr, u8 reg) = 0;
virtual bool DataWrite8(u32 addr, u8 val, u8 reg) = 0;
virtual bool DataWrite16(u32 addr, u16 val, u8 reg) = 0;
virtual bool DataWrite32(u32 addr, u32 val, u8 reg) = 0;
virtual bool DataWrite32S(u32 addr, u32 val, u8 reg) = 0;
virtual void AddCycles_C() = 0;
virtual void AddCycles_CI(s32 numI) = 0;
@ -171,20 +219,48 @@ public:
u32 DataRegion;
s32 DataCycles;
u32 R[16]; // heh
alignas(64) u32 R[16]; // heh
u32 CPSR;
u32 R_FIQ[8]; // holding SPSR too
u32 R_SVC[3];
u32 R_ABT[3];
u32 R_IRQ[3];
u32 R_UND[3];
u32 CurInstr;
u32 NextInstr[2];
u64 CurInstr;
u64 NextInstr[2];
u32 ExceptionBase;
MemRegion CodeMem;
MainRAMTrackers MRTrack;
u32 BranchAddr;
u8 BranchUpdate;
bool BranchRestore;
u32 QueueMode[2];
u8 ExtReg;
u8 ExtROROffs;
u64 RetVal;
u16 LDRRegs;
u16 LDRFailedRegs;
u16 STRRegs;
u32 FetchAddr[17];
u32 STRVal[16];
// debugging crud: REMOVE ME
u8 abt;
u64 iter;
u8 FuncQueueFill;
u8 FuncQueueEnd;
u8 FuncQueueProg;
u8 ExecuteCycles;
bool FuncQueueActive;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize;
u64* FastBlockLookup;
@ -243,7 +319,7 @@ public:
void FillPipeline() override;
void JumpTo(u32 addr, bool restorecpsr = false) override;
void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) override;
void PrefetchAbort();
void DataAbort();
@ -252,119 +328,576 @@ public:
void Execute();
// all code accesses are forced nonseq 32bit
u32 CodeRead32(u32 addr, bool branch);
void CodeRead32(const u32 addr);
void DataRead8(u32 addr, u32* val) override;
void DataRead16(u32 addr, u32* val) override;
void DataRead32(u32 addr, u32* val) override;
void DataRead32S(u32 addr, u32* val) override;
void DataWrite8(u32 addr, u8 val) override;
void DataWrite16(u32 addr, u16 val) override;
void DataWrite32(u32 addr, u32 val) override;
void DataWrite32S(u32 addr, u32 val) override;
bool DataRead8(u32 addr, u8 reg) override;
bool DataRead16(u32 addr, u8 reg) override;
bool DataRead32(u32 addr, u8 reg) override;
bool DataRead32S(u32 addr, u8 reg) override;
bool DataWrite8(u32 addr, u8 val, u8 reg) override;
bool DataWrite16(u32 addr, u16 val, u8 reg) override;
bool DataWrite32(u32 addr, u32 val, u8 reg) override;
bool DataWrite32S(u32 addr, u32 val, u8 reg) override;
void CodeFetch();
void AddCycles_C() override
{
// code only. always nonseq 32-bit for ARM9.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC;
ExecuteCycles = 0;
CodeFetch();
}
void AddCycles_CI(s32 numI) override
void AddCycles_CI(s32 numX) override
{
// code+internal
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC + numI;
ExecuteCycles = numX;
CodeFetch();
}
void AddCycles_MW(s32 numM)
{
DataCycles = numM;
QueueFunction(&ARMv5::AddCycles_MW_2);
}
void AddCycles_CDI() override
{
// LDR/LDM cycles. ARM9 seems to skip the internal cycle there.
// TODO: ITCM data fetches shouldn't be parallelized, they say
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
AddCycles_MW(DataCycles);
}
void AddCycles_CD() override
{
// TODO: ITCM data fetches shouldn't be parallelized, they say
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
Store = true; // todo: queue this
AddCycles_MW(DataCycles);
}
void GetCodeMemRegion(u32 addr, MemRegion* region);
void DelayIfITCM(s8 delay)
{
ITCMDelay = delay;
QueueFunction(&ARMv5::DelayIfITCM_2);
}
inline void SetupInterlock(u8 reg, s8 delay = 0)
{
ILQueueReg = reg;
ILQueueDelay = delay;
QueueFunction(&ARMv5::SetupInterlock_2);
}
template <bool bitfield>
inline void HandleInterlocksExecute(u16 ilmask, u8* times = NULL)
{
if constexpr (bitfield) ILQueueMask = ilmask;
else ILQueueMask = 1<<ilmask;
if (times == NULL) memset(ILQueueTimes, 0, sizeof(ILQueueTimes));
else memcpy(ILQueueTimes, times, sizeof(ILQueueTimes));
QueueFunction(&ARMv5::HandleInterlocksExecute_2);
}
inline void ForceInterlock(s8 delay = 0)
{
ILForceDelay = delay;
QueueFunction(&ARMv5::ForceInterlock_2);
}
inline void HandleInterlocksMemory(u8 reg)
{
ILQueueMemReg = reg;
QueueFunction(&ARMv5::HandleInterlocksMemory_2);
}
void GetCodeMemRegion(const u32 addr, MemRegion* region);
/**
* @brief Resets the state of all CP15 registers and variables
* to power up state.
* @par Returns
* Nothing
*/
void CP15Reset();
/**
* @brief handles read and write operations to a save-state
* file.
* @param [in] file Savestate file
* @par Returns
* Nothing
*/
void CP15DoSavestate(Savestate* file);
/**
* @brief Calculates the internal state from @ref DTCMSettings
* @par Returns
* Nothing
*/
void UpdateDTCMSetting();
/**
* @brief Calculates the internal state from @ref ITCMSettings
* @par Returns
* Nothing
*/
void UpdateITCMSetting();
void UpdatePURegion(u32 n);
void UpdatePURegions(bool update_all);
/**
* @brief Calculates the internal state from the
* region protection bits of a specific region number
* @details
* This function updates the PU_####Map array in all
* parts that are occupied by this region. Updating a single
* region does not take into account the priority of the
* regions.
* @param [in] n index of the region from 0 to @ref CP15_REGION_COUNT - 1
* @par Returns
* Nothing
*/
void UpdatePURegion(const u32 n);
/**
* @brief Calculates the internal state from all region
* protection bits.
* @details
* This function updates the internal state in order from the
* least to the most priotized regions, so that the
* priority of the regions match the internal state
* @par Returns
* Nothing
*/
void UpdatePURegions(const bool update_all);
u32 RandomLineIndex();
void ICacheLookup(u32 addr);
void ICacheInvalidateByAddr(u32 addr);
/**
* @brief Perform an instruction cache lookup handle
* @details
* A cache lookup is performed, if not disabled in
* @ref CP15BISTTestStateRegister, a hit will returned the
* cached data, otherwise it returns the result of an memory
* access instead.
* If the cache lookup results in a cachemiss and linefill is
* not disabled in @ref CP15BISTTestStateRegister, will fill
* fetch all data to fill the entire cacheline directly
* from the ITCM or bus
* @param [in] addr Address of the memory to be retreived from
* cache. The address is internally aligned to an word boundary
* @return Value of the word at addr
*/
bool ICacheLookup(const u32 addr);
/**
* @brief Check if an address is within a instruction cachable
* region
* @details
* Checks the address by looking up the PU_map flags for
* the address and returns the status of the instruction
* cache enable flag
*
* @param [in] addr Address. May be unaligned.
* @retval true If the address points to a region, that is
* enabled for instruction fetches to be cached.
*/
inline bool IsAddressICachable(const u32 addr) const;
/**
* @brief Invalidates all instruction cache lines
* @details
* Clears the @ref CACHE_FLAG_VALID of each cache line in the
* instruction cache. All other flags and values are kept.
* @par Returns
* Nothing
*/
void ICacheInvalidateAll();
template <WBMode mode> bool WriteBufferHandle();
template <int next> void WriteBufferCheck();
void WriteBufferWrite(u32 val, u8 flag, u32 addr = 0);
void WriteBufferDrain();
void CP15Write(u32 id, u32 val);
u32 CP15Read(u32 id) const;
/**
* @brief Invalidates the instruction cacheline containing
* the data of an address.
* @details
* Searches the cacheline containing the data of an address, and
* if found clears the @ref CACHE_FLAG_VALID of this cache line.
* Nothing is done if the address is not present in the cache.
* @param [in] addr Memory address of the data in the cache line
* @par Returns
* Nothing
*/
void ICacheInvalidateByAddr(const u32 addr);
u32 CP15Control;
/**
* @brief Invalidates an instruction cache line
* @details
* Clears the @ref CACHE_FLAG_VALID of the cacheline given by
* set and index within the set. Nothing is done if the cache
* line does not exist.
* @param [in] cacheSet index of the internal cache set from
* 0 to @ref ICACHE_SETS - 1
* @param [in] cacheLine index of the line within the cache set
* from 0 to @ref ICACHE_LINESPERSET - 1
* @par Returns
* Nothing
*/
void ICacheInvalidateBySetAndWay(const u8 cacheSet, const u8 cacheLine);
u32 RNGSeed;
u32 TraceProcessID;
/**
* @brief Perform an data cache lookup handle
* @details
* A cache lookup is performed, if not disabled in
* @ref CP15BISTTestStateRegister, a hit will returned the
* cached data, otherwise it returns the result of an memory
* access instead.
* If the cache lookup results in a cachemiss and linefill is
* not disabled in @ref CP15BISTTestStateRegister, will fill
* fetch all data to fill the entire cacheline directly
* from the ITCM, DTCM or bus
* @param [in] addr Address of the memory to be retreived from
* cache. The address is internally aligned to an word boundary
* @return Value of the word at addr
*/
bool DCacheLookup(const u32 addr);
u32 DTCMSetting, ITCMSetting;
/**
* @brief Updates a word in the data cache if present
* @param [in] addr Memory address which is written
* @param [in] val Word value to be written
* @retval true, if the data was written into the cache and
* does not need to be updated until cache is
* cleaned
* false, to write through
*/
bool DCacheWrite32(const u32 addr, const u32 val);
/**
* @brief Updates a word in the data cache if present
* @param [in] addr Memory address which is written
* @param [in] val Half-Word value to be written
* @retval true, if the data was written into the cache and
* does not need to be updated until cache is
* cleaned
* false, to write through
*/
bool DCacheWrite16(const u32 addr, const u16 val);
/**
* @brief Updates a word in the data cache if present
* @param [in] addr Memory address which is written
* @param [in] val Byte value to be written
* @retval true, if the data was written into the cache and
* does not need to be updated until cache is
* cleaned
* false, to write through
*/
bool DCacheWrite8(const u32 addr, const u8 val);
/**
* @brief Check if an address is within a data cachable region
* @details
* Checks the address by looking up the PU_map flags for
* the address and returns the status of the data cache enable
* flag
*
* @param [in] addr Address. May be unaligned.
* @retval true If the address points to a region, that is
* enabled for instruction fetches to be cached.
*/
inline bool IsAddressDCachable(const u32 addr) const;
/**
* @brief Invalidates the data cacheline containing the data of
* an address.
* @details
* Searches the cacheline containing the data of an address, and
* if found clears the @ref CACHE_FLAG_VALID of this cache line.
* Nothing is done if the address is not present in the cache.
* @par Returns
* Nothing
*/
void DCacheInvalidateAll();
/**
* @brief Invalidates the data cacheline containing the data of
* an address.
* @details
* Searches the cacheline containing the data of an address, and
* if found clears the @ref CACHE_FLAG_VALID of this cache line.
* Nothing is done if the address is not present in the cache.
* @par Returns
* Nothing
*/
void DCacheInvalidateByAddr(const u32 addr);
/**
* @brief Invalidates an data cache line
* @details
* Clears the @ref CACHE_FLAG_VALID of the cacheline given by
* set and index within the set. Nothing is done if the cache
* line does not exist.
* @param [in] cacheSet index of the internal cache set from
* 0 to @ref DCACHE_SETS - 1
* @param [in] cacheLine index of the line within the cache set
* from 0 to @ref DCACHE_LINESPERSET - 1
* @par Returns
* Nothing
*/
void DCacheInvalidateBySetAndWay(const u8 cacheSet, const u8 cacheLine);
/**
* @brief Cleans the entire data cache
* @details
* If write-back is enabled in conjunction with the data cache
* the dirty flags in tags are set if the corresponding cache
* line is written to.
* A clean will write the parts of the cache line back
* that is marked dirty and adds the required cycles to the
* @ref DataCyces member.
* @par Returns
* Nothing
*/
void DCacheClearAll();
/**
* @brief Cleans a data cache line
* @details
* If write-back is enabled in conjunction with the data cache
* the dirty flags in tags are set if the corresponding cache
* line is written to.
* A clean will write the parts of the cache line back
* that is marked dirty and adds the required cycles to the
* @ref DataCyces member.
* @param [in] addr Memory address of the data in the cache line
* @par Returns
* Nothing
*/
void DCacheClearByAddr(const u32 addr);
/**
* @brief Cleans a data cache line
* @details
* If write-back is enabled in conjunction with the data cache
* the dirty flags in tags are set if the corresponding cache
* line is written to.
* A clean will write the parts of the cache line back
* that is marked dirty and adds the required cycles to the
* @ref DataCyces member.
* @param [in] cacheSet index of the internal cache set from
* 0 to @ref DCACHE_SETS - 1
* @param [in] cacheLine index of the line within the cache set
* from 0 to @ref DCACHE_LINESPERSET - 1
* @par Returns
* Nothing
*/
void DCacheClearByASetAndWay(const u8 cacheSet, const u8 cacheLine);
/**
* @brief Handles MCR operations writing to cp15 registers
* @details
* This function updates the internal state of the emulator when
* a cp15 register is written, or triggers the corresponding action
* like flushing caches.
*
* @param [in] id the operation id to be performed, consisting of
* (from lower to higher nibble) opcode2, intermediate register,
* register and opcode1. Most write operations just take the first 3
* into account.
* param [in] val value to be written to the cp15 register
* @par Returns
* Nothing
*/
void CP15Write(const u32 id, const u32 val);
/**
* @brief handles MRC operations reading from cp15 registers
* @details
* This function accumulates the regsiter states from the internal
* emulator state. It does not modify the internal state of the
* emulator or cp15.
* @param [in] id the operation id to be performed, consisting of
* (from lower to higher nibble) opcode2, intermediate register,
* register and opcode1. Most read operations just take the first 3
* into account.
* @return Value of the cp15 register
*/
u32 CP15Read(const u32 id) const;
void QueueFunction(void (ARMv5::*QueueEntry)(void));
// Queue Functions
void StartExecARM();
void StartExecTHUMB();
void AddExecute();
void AddCycles_MW_2();
void DelayIfITCM_2();
void JumpTo_2();
void JumpTo_3A();
void JumpTo_3B();
void JumpTo_3C();
void JumpTo_4();
void CodeRead32_2();
void CodeRead32_3();
void CodeRead32_4();
void ICacheLookup_2();
void DAbortHandle();
void DCacheFin8();
void DRead8_2();
void DRead8_3();
void DRead8_4();
void DRead8_5();
void DCacheFin16();
void DRead16_2();
void DRead16_3();
void DRead16_4();
void DRead16_5();
void DCacheFin32();
void DRead32_2();
void DRead32_3();
void DRead32_4();
void DRead32_5();
void DRead32S_2();
void DRead32S_3();
void DRead32S_4();
void DRead32S_5A();
void DRead32S_5B();
void DWrite8_2();
void DWrite8_3();
void DWrite8_4();
void DWrite8_5();
void DWrite16_2();
void DWrite16_3();
void DWrite16_4();
void DWrite16_5();
void DWrite32_2();
void DWrite32_3();
void DWrite32_4();
void DWrite32_5();
void DWrite32S_2();
void DWrite32S_3();
void DWrite32S_4();
void DWrite32S_5A();
void DWrite32S_5B();
void WBCheck_2();
void ICachePrefetch_2();
void DCacheLookup_2();
void DCacheLookup_3();
void DCClearAddr_2();
void DCClearSetWay_2();
void DCClearInvalidateAddr_2();
void DCClearInvalidateSetWay_2();
void SetupInterlock_2();
void HandleInterlocksExecute_2();
void HandleInterlocksMemory_2();
void ForceInterlock_2();
void QueueUpdateMode() { UpdateMode(QueueMode[0], QueueMode[1], true); }
void SignExtend8() { R[ExtReg] = (s8)R[ExtReg]; }
void SignExtend16() { R[ExtReg] = (s16)R[ExtReg]; }
void ROR32() { R[ExtReg] = ROR(R[ExtReg], ExtROROffs); }
u32 CP15Control; //! CP15 Register 1: Control Register
u32 RNGSeed; //! Global cache line fill seed. Used for pseudo random replacement strategy with the instruction and data cache
u32 DTCMSetting; //! CP15 Register 9 Intermediate 1 Opcode2 0: Data Tightly-Coupled Memory register
u32 ITCMSetting; //! CP15 Register 9 Intermediate 1 Opcode2 1: Instruction Tightly-Coupled Memory register
u32 DCacheLockDown; //! CP15 Register 9 Intermediate 0 Opcode2 0: Data Cache Lockdown Register
u32 ICacheLockDown; //! CP15 Register 9 Intermediate 0 Opcode2 1: Instruction Cache Lockdown Register
u32 CacheDebugRegisterIndex; //! CP15: Cache Debug Index Register
u32 CP15TraceProcessId; //! CP15: Trace Process Id Register
u32 CP15BISTTestStateRegister; //! CP15: BIST Test State Register
// for aarch64 JIT they need to go up here
// to be addressable by a 12-bit immediate
u32 ITCMSize;
u32 DTCMBase, DTCMMask;
s32 RegionCodeCycles;
u32 ITCMSize; //! Internal: Size of the memory ITCM is mapped to. @ref ITCM data repeats every @ref ITCMPhysicalSize withhin
u32 DTCMBase; //! Internal: DTCMBase Address. The DTCM can be accessed if the address & ~ @ref DTCMMask is equal to thhis base address
u32 DTCMMask; //! Internal: DTCM Address Mask used in conjunction with @ref DTCMBase to check for DTCM access
s32 RegionCodeCycles; //! Internal: Cached amount of cycles to fetch instruction from the current code region.
u8 ITCM[ITCMPhysicalSize];
u8* DTCM;
alignas(u32) u8 ITCM[ITCMPhysicalSize]; //! Content of the ITCM
u8* DTCM; //! Content of the DTCM
u8 ICache[0x2000];
u32 ICacheTags[64*4];
u8 ICacheCount[64];
alignas(u32) u8 ICache[ICACHE_SIZE]; //! Instruction Cache Content organized in @ref ICACHE_LINESPERSET times @ref ICACHE_SETS times @ref ICACHE_LINELENGTH bytes
u32 ICacheTags[ICACHE_LINESPERSET*ICACHE_SETS]; //! Instruction Cache Tags organized in @ref ICACHE_LINESPERSET times @ref ICACHE_SETS Tags
u8 ICacheCount; //! Global instruction line fill counter. Used for round-robin replacement strategy with the instruction cache
u32 PU_CodeCacheable;
u32 PU_DataCacheable;
u32 PU_DataCacheWrite;
alignas(u32) u8 DCache[DCACHE_SIZE]; //! Data Cache Content organized in @ref DCACHE_LINESPERSET times @ref DCACHE_SETS times @ref DCACHE_LINELENGTH bytes
u32 DCacheTags[DCACHE_LINESPERSET*DCACHE_SETS]; //! Data Cache Tags organized in @ref DCACHE_LINESPERSET times @ref DCACHE_SETS Tags
u8 DCacheCount; //! Global data line fill counter. Used for round-robin replacement strategy with the instruction cache
u32 PU_CodeRW;
u32 PU_DataRW;
u32 PU_CodeCacheable; //! CP15 Register 2 Opcode2 1: Code Cachable Bits
u32 PU_DataCacheable; //! CP15 Register 2 Opcode2 0: Data Cachable Bits
u32 PU_WriteBufferability; //! CP15 Register 3 Opcode2 0: Write Buffer Control Register
u32 PU_Region[8];
u32 PU_CodeRW; //! CP15 Register 5 Opcode2 3: Code Access Permission register
u32 PU_DataRW; //! CP15 Register 5 Opcode2 2: Data Access Permission register
u32 PU_Region[CP15_REGION_COUNT]; //! CP15 Register 6 Opcode2 0..7: Protection Region Base and Size Register
// 0=dataR 1=dataW 2=codeR 4=datacache 5=datawrite 6=codecache
u8 PU_PrivMap[0x100000];
u8 PU_UserMap[0x100000];
// games operate under system mode, generally
//#define PU_Map PU_PrivMap
u8* PU_Map;
u8 PU_PrivMap[CP15_MAP_ENTRYCOUNT]; /**
* Memory mapping flags for Privileged Modes
* Bits:
* 0 - CP15_MAP_READABLE
* 1 - CP15_MAP_WRITEABLE
* 2 - CP15_MAP_EXECUTABLE
* 4 - CP15_MAP_DCACHEABLE
* 5 - CP15_MAP_BUFFERABLE
* 6 - CP15_MAP_ICACHEABLE
*/
u8 PU_UserMap[CP15_MAP_ENTRYCOUNT]; //! Memory mapping flags for User Mode
u8* PU_Map; //! Current valid Region Mapping (is either @ref PU_PrivMap or PU_UserMap)
// code/16N/32N/32S
u8 MemTimings[0x100000][4];
u8* CurICacheLine;
u8 MemTimings[0x40000][3];
bool (*GetMemRegion)(u32 addr, bool write, MemRegion* region);
alignas(64) void (ARMv5::*DelayedQueue)(void); // adding more than one new entry to the queue while it's already active does not work. so uh. we use this to work around that. it's less than ideal...
void (ARMv5::*StartExec)(void);
void (ARMv5::*FuncQueue[32])(void);
u64 ITCMTimestamp;
u64 TimestampMemory;
u32 PC;
bool NullFetch;
bool Store;
s8 ITCMDelay;
u32 QueuedDCacheLine;
u32 CP15Queue;
u8 ILCurrReg;
u8 ILPrevReg;
u64 ILCurrTime;
u64 ILPrevTime;
u8 ILQueueReg;
s8 ILQueueDelay;
u8 ILQueueMemReg;
u8 ILQueueTimes[16];
u16 ILQueueMask;
u8 ICacheStreamPtr;
u8 DCacheStreamPtr;
u64 ICacheStreamTimes[7];
u64 DCacheStreamTimes[7];
s8 ILForceDelay;
u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing
u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing
u8 WBWriting; // whether the buffer is actively trying to perform a write
u32 WBCurAddr; // address the write buffer is currently writing to
u64 WBCurVal; // current value being written; 0-31: val | 61-63: flag; 0 = byte ns; 1 = halfword ns; 2 = word ns; 3 = word s; 4 = address (invalid in this variable)
u32 WBAddrQueued[40];
u32 storeaddr[16]; // temp until i figure out why using the fifo address entries directly didn't work
u64 WBValQueued[40];
u64 WriteBufferFifo[16]; // 0-31: val | 61-63: flag; 0 = byte ns; 1 = halfword ns; 2 = word ns; 3 = word s; 4 = address
u64 WBTimestamp; // current timestamp
//u64 WBMainRAMDelay; // timestamp used to emulate the delay before the next main ram write can begin
u64 WBDelay; // timestamp in bus cycles use for the delay before next write to the write buffer can occur (seems to be a 1 cycle delay after a write to it)
u32 WBLastRegion; // the last region written to by the write buffer
u64 WBReleaseTS; // the timestamp on which the write buffer relinquished control of the bus back
u64 WBInitialTS; // what cycle the entry was first sent in
#ifdef GDBSTUB_ENABLED
u32 ReadMem(u32 addr, int size) override;
@ -384,36 +917,59 @@ class ARMv4 : public ARM
{
public:
ARMv4(melonDS::NDS& nds, std::optional<GDBArgs> gdb, bool jit);
void Reset() override;
void FillPipeline() override;
void JumpTo(u32 addr, bool restorecpsr = false) override;
void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) override;
template <CPUExecuteMode mode>
void Execute();
u16 CodeRead16(u32 addr)
{
return BusRead16(addr);
}
alignas(64) void (ARMv4::*StartExec)(void);
void (ARMv4::*FuncQueue[32])(void);
bool Nonseq;
u32 CodeRead32(u32 addr)
{
return BusRead32(addr);
}
void CodeRead16(u32 addr);
void CodeRead32(u32 addr);
void DataRead8(u32 addr, u32* val) override;
void DataRead16(u32 addr, u32* val) override;
void DataRead32(u32 addr, u32* val) override;
void DataRead32S(u32 addr, u32* val) override;
void DataWrite8(u32 addr, u8 val) override;
void DataWrite16(u32 addr, u16 val) override;
void DataWrite32(u32 addr, u32 val) override;
void DataWrite32S(u32 addr, u32 val) override;
bool DataRead8(u32 addr, u8 reg) override;
bool DataRead16(u32 addr, u8 reg) override;
bool DataRead32(u32 addr, u8 reg) override;
bool DataRead32S(u32 addr, u8 reg) override;
bool DataWrite8(u32 addr, u8 val, u8 reg) override;
bool DataWrite16(u32 addr, u16 val, u8 reg) override;
bool DataWrite32(u32 addr, u32 val, u8 reg) override;
bool DataWrite32S(u32 addr, u32 val, u8 reg) override;
void AddCycles_C() override;
void AddCycles_CI(s32 num) override;
void AddCycles_CDI() override;
void AddCycles_CD() override;
void QueueFunction(void (ARMv4::*QueueEntry)(void));
void StartExecARM();
void StartExecTHUMB();
void UpdateNextInstr1() { NextInstr[1] = RetVal; }
void JumpTo_2();
void JumpTo_3A();
void JumpTo_3B();
void DRead8_2();
void DRead16_2();
void DRead32_2();
void DRead32S_2();
void DWrite8_2();
void DWrite16_2();
void DWrite32_2();
void DWrite32S_2();
void AddExecute();
void AddExtraCycle();
void QueueUpdateMode() { UpdateMode(QueueMode[0], QueueMode[1], true); }
void SignExtend8() { if (!(LDRFailedRegs & 1<<ExtReg)) R[ExtReg] = (s8)R[ExtReg]; }
void SignExtend16() { if (!(LDRFailedRegs & 1<<ExtReg)) R[ExtReg] = (s16)R[ExtReg]; }
void ROR32() { if (!(LDRFailedRegs & 1<<ExtReg)) R[ExtReg] = ROR(R[ExtReg], ExtROROffs); }
protected:
u8 BusRead8(u32 addr) override;
u16 BusRead16(u32 addr) override;

View File

@ -36,6 +36,7 @@ namespace melonDS::ARMInterpreter
void A_UNK(ARM* cpu)
{
cpu->AddCycles_C();
Log(LogLevel::Warn, "undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8);
#ifdef GDBSTUB_ENABLED
cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-8);
@ -49,11 +50,13 @@ void A_UNK(ARM* cpu)
cpu->R_UND[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 4;
cpu->JumpTo(cpu->ExceptionBase + 0x04);
}
void T_UNK(ARM* cpu)
{
cpu->AddCycles_C();
Log(LogLevel::Warn, "undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4);
#ifdef GDBSTUB_ENABLED
cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-4);
@ -66,13 +69,25 @@ void T_UNK(ARM* cpu)
cpu->R_UND[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 2;
cpu->JumpTo(cpu->ExceptionBase + 0x04);
}
void A_BKPT(ARM* cpu)
{
if (cpu->Num == 1) return A_UNK(cpu); // checkme
Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message
((ARMv5*)cpu)->PrefetchAbort();
}
void A_MSR_IMM(ARM* cpu)
{
if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr
else cpu->AddCycles_C();
u32* psr;
if (cpu->CurInstr & (1<<22))
{
@ -90,7 +105,6 @@ void A_MSR_IMM(ARM* cpu)
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
cpu->AddCycles_C();
return;
}
}
@ -101,12 +115,9 @@ void A_MSR_IMM(ARM* cpu)
u32 mask = 0;
if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF;
if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00;
if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000;
if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000;
if (!(cpu->CurInstr & (1<<22)))
mask &= 0xFFFFFFDF;
//if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9
//if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9
if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000);
if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00;
@ -121,11 +132,29 @@ void A_MSR_IMM(ARM* cpu)
if (!(cpu->CurInstr & (1<<22)))
cpu->UpdateMode(oldpsr, cpu->CPSR);
cpu->AddCycles_C();
if (cpu->CPSR & 0x20) [[unlikely]]
{
if (cpu->Num == 0)
{
cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect.
((ARMv5*)cpu)->StartExec = &ARMv5::StartExecTHUMB;
if (cpu->MRTrack.Type == MainRAMType::Null) ((ARMv5*)cpu)->FuncQueue[0] = ((ARMv5*)cpu)->StartExec;
}
else
{
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n");
cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least
}
}
}
void A_MSR_REG(ARM* cpu)
{
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr
else cpu->AddCycles_C();
u32* psr;
if (cpu->CurInstr & (1<<22))
{
@ -143,7 +172,6 @@ void A_MSR_REG(ARM* cpu)
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
cpu->AddCycles_C();
return;
}
}
@ -154,12 +182,9 @@ void A_MSR_REG(ARM* cpu)
u32 mask = 0;
if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF;
if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00;
if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000;
if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000;
if (!(cpu->CurInstr & (1<<22)))
mask &= 0xFFFFFFDF;
//if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9
//if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9
if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000);
if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00;
@ -174,7 +199,20 @@ void A_MSR_REG(ARM* cpu)
if (!(cpu->CurInstr & (1<<22)))
cpu->UpdateMode(oldpsr, cpu->CPSR);
cpu->AddCycles_C();
if (cpu->CPSR & 0x20) [[unlikely]]
{
if (cpu->Num == 0)
{
cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect.
((ARMv5*)cpu)->StartExec = &ARMv5::StartExecTHUMB;
if (cpu->MRTrack.Type == MainRAMType::Null) ((ARMv5*)cpu)->FuncQueue[0] = ((ARMv5*)cpu)->StartExec;
}
else
{
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n");
cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least
}
}
}
void A_MRS(ARM* cpu)
@ -201,8 +239,19 @@ void A_MRS(ARM* cpu)
else
psr = cpu->CPSR;
cpu->R[(cpu->CurInstr>>12) & 0xF] = psr;
cpu->AddCycles_C();
if (cpu->Num != 1) // arm9
{
cpu->AddCycles_CI(2); // 1 X
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
}
else cpu->AddCycles_C(); // arm7
if (((cpu->CurInstr>>12) & 0xF) == 15)
{
if (cpu->Num == 1) // doesn't seem to jump on the arm9? checkme
cpu->JumpTo(psr & ~0x1); // checkme: this shouldn't be able to switch to thumb?
}
else cpu->R[(cpu->CurInstr>>12) & 0xF] = psr;
}
@ -212,14 +261,18 @@ void A_MCR(ARM* cpu)
return A_UNK(cpu);
u32 cp = (cpu->CurInstr >> 8) & 0xF;
//u32 op = (cpu->CurInstr >> 21) & 0x7;
u32 op = (cpu->CurInstr >> 21) & 0x7;
u32 cn = (cpu->CurInstr >> 16) & 0xF;
u32 cm = cpu->CurInstr & 0xF;
u32 cpinfo = (cpu->CurInstr >> 5) & 0x7;
u32 val = cpu->R[(cpu->CurInstr>>12)&0xF];
if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr>>12)&0xF);
if (cpu->Num==0 && cp==15)
{
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]);
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val); // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT
}
else if (cpu->Num==1 && cp==14)
{
@ -227,11 +280,13 @@ void A_MCR(ARM* cpu)
}
else
{
Log(LogLevel::Warn, "bad MCR opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
Log(LogLevel::Warn, "bad MCR opcode p%d, %d, reg, c%d, c%d, %d on ARM%d\n", cp, op, cn, cm, cpinfo, cpu->Num?7:9);
return A_UNK(cpu); // TODO: check what kind of exception it really is
}
cpu->AddCycles_CI(1 + 1); // TODO: checkme
// TODO: SINCE THIS DOES A CODE FETCH WE NEED TO DELAY ANY MPU UPDATES UNTIL *AFTER* THE CODE FETCH
if (cpu->Num==0) cpu->AddCycles_CI(5); // checkme
else /* ARM7 */ cpu->AddCycles_CI(1 + 1); // TODO: checkme
}
void A_MRC(ARM* cpu)
@ -240,14 +295,21 @@ void A_MRC(ARM* cpu)
return A_UNK(cpu);
u32 cp = (cpu->CurInstr >> 8) & 0xF;
//u32 op = (cpu->CurInstr >> 21) & 0x7;
u32 op = (cpu->CurInstr >> 21) & 0x7;
u32 cn = (cpu->CurInstr >> 16) & 0xF;
u32 cm = cpu->CurInstr & 0xF;
u32 cpinfo = (cpu->CurInstr >> 5) & 0x7;
u32 rd = (cpu->CurInstr>>12) & 0xF;
if (cpu->Num==0 && cp==15)
{
cpu->R[(cpu->CurInstr>>12)&0xF] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo);
if (rd != 15) cpu->R[rd] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo|(op<<12));
else
{
// r15 updates the top 4 bits of the cpsr, done to "allow for conditional branching based on coprocessor status"
u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo|(op<<12)) & 0xF0000000; // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT
cpu->CPSR = (cpu->CPSR & ~0xF0000000) | flags;
}
}
else if (cpu->Num==1 && cp==14)
{
@ -255,17 +317,24 @@ void A_MRC(ARM* cpu)
}
else
{
Log(LogLevel::Warn, "bad MRC opcode p%d,%d,%d,%d on ARM%d\n", cp, cn, cm, cpinfo, cpu->Num?7:9);
Log(LogLevel::Warn, "bad MRC opcode p%d, %d, reg, c%d, c%d, %d on ARM%d\n", cp, op, cn, cm, cpinfo, cpu->Num?7:9);
return A_UNK(cpu); // TODO: check what kind of exception it really is
}
cpu->AddCycles_CI(2 + 1); // TODO: checkme
if (cpu->Num != 1)
{
cpu->AddCycles_CI(2); // 1 Execute cycle
((ARMv5*)cpu)->AddCycles_MW(2); // 2 Memory cycles
((ARMv5*)cpu)->SetupInterlock((cpu->CurInstr >> 12) & 0xF);
}
else cpu->AddCycles_CI(2 + 1); // TODO: checkme
}
void A_SVC(ARM* cpu)
void A_SVC(ARM* cpu) // A_SWI
{
cpu->AddCycles_C();
u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF;
cpu->CPSR |= 0x93;
@ -273,11 +342,13 @@ void A_SVC(ARM* cpu)
cpu->R_SVC[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 4;
cpu->JumpTo(cpu->ExceptionBase + 0x08);
}
void T_SVC(ARM* cpu)
void T_SVC(ARM* cpu) // T_SWI
{
cpu->AddCycles_C();
u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF;
cpu->CPSR |= 0x93;
@ -285,6 +356,7 @@ void T_SVC(ARM* cpu)
cpu->R_SVC[2] = oldcpsr;
cpu->R[14] = cpu->R[15] - 2;
cpu->JumpTo(cpu->ExceptionBase + 0x08);
}

View File

@ -36,6 +36,7 @@ void A_MRS(ARM* cpu);
void A_MCR(ARM* cpu);
void A_MRC(ARM* cpu);
void A_SVC(ARM* cpu);
void A_BKPT(ARM* cpu);
void T_SVC(ARM* cpu);

File diff suppressed because it is too large Load Diff

View File

@ -27,12 +27,14 @@ using Platform::LogLevel;
void A_B(ARM* cpu)
{
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
cpu->JumpTo(cpu->R[15] + offset);
}
void A_BL(ARM* cpu)
{
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
cpu->R[14] = cpu->R[15] - 4;
cpu->JumpTo(cpu->R[15] + offset);
@ -40,6 +42,7 @@ void A_BL(ARM* cpu)
void A_BLX_IMM(ARM* cpu)
{
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
if (cpu->CurInstr & 0x01000000) offset += 2;
cpu->R[14] = cpu->R[15] - 4;
@ -48,11 +51,15 @@ void A_BLX_IMM(ARM* cpu)
void A_BX(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
}
void A_BLX_REG(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
u32 lr = cpu->R[15] - 4;
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
cpu->R[14] = lr;
@ -62,22 +69,25 @@ void A_BLX_REG(ARM* cpu)
void T_BCOND(ARM* cpu)
{
cpu->AddCycles_C();
if (cpu->CheckCondition((cpu->CurInstr >> 8) & 0xF))
{
s32 offset = (s32)(cpu->CurInstr << 24) >> 23;
cpu->JumpTo(cpu->R[15] + offset + 1);
}
else
cpu->AddCycles_C();
}
void T_BX(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
}
void T_BLX_REG(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
if (cpu->Num==1)
{
Log(LogLevel::Warn, "!! THUMB BLX_REG ON ARM7\n");
@ -91,6 +101,7 @@ void T_BLX_REG(ARM* cpu)
void T_B(ARM* cpu)
{
cpu->AddCycles_C();
s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 20;
cpu->JumpTo(cpu->R[15] + offset + 1);
}
@ -104,6 +115,10 @@ void T_BL_LONG_1(ARM* cpu)
void T_BL_LONG_2(ARM* cpu)
{
if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an undefined instruction.
return T_UNK(cpu); // TODO: Check ARM7 for exceptions
cpu->AddCycles_C();
s32 offset = (cpu->CurInstr & 0x7FF) << 1;
u32 pc = cpu->R[14] + offset;
cpu->R[14] = (cpu->R[15] - 2) | 1;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,136 @@
#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H
#define ARMINTERPRETER_MULTIPLYSUPERLLE_H
#include "types.h"
using namespace melonDS;
/*
Copyright (c) 2024 zaydlang
This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software.
If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// code taken from: (also features a few alternative implementations that could maybe be worth looking at?)
// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h
// based on research that can be found here: https://bmchtech.github.io/post/multiply/
// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI.
// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2
static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) {
// Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31
multiplicand |= 1;
// Optimized first iteration
u32 booth = (s32)(multiplier << 31) >> 31;
u32 carry = booth * multiplicand;
// Pre-populate accumulator for output
u32 output = accumulator;
u32 sum = output + carry;
int shift = 29;
do {
for (int i = 0; i < 4; i++, shift -= 2) {
// Get next booth factor (-2 to 2, shifted left by 30-shift)
u32 next_booth = (s32)(multiplier << shift) >> shift;
u32 factor = next_booth - booth;
booth = next_booth;
// Get scaled value of booth addend
u32 addend = multiplicand * factor;
// Combine the addend with the CSA
// Not performing any masking seems to work because the lower carries can't propagate to bit 31
output ^= carry ^ addend;
sum += addend;
carry = sum - output;
}
} while (booth != multiplier);
return carry >> 31;
}
// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended)
static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) {
// Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63
// This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks),
// and only the last two booth iterations are needed
// Set the low bit of the multiplicand to cause negation to invert the upper bits
multiplicand |= 1;
// Pre-populate magic bit 61 for carry
u32 carry = ~accum_hi & UINT32_C(0x20000000);
// Pre-populate magic bits 63-60 for output (with carry magic pre-added in)
u32 output = accum_hi - UINT32_C(0x08000000);
// Get factors from the top 3 booth chunks
u32 booth0 = (s32)(multiplier << 27) >> 27;
u32 booth1 = (s32)(multiplier << 29) >> 29;
u32 booth2 = (s32)(multiplier << 31) >> 31;
u32 factor0 = multiplier - booth0;
u32 factor1 = booth0 - booth1;
u32 factor2 = booth1 - booth2;
// Get scaled value of the 3rd top booth addend
u32 addend = multiplicand * factor2;
// Finalize bits 61-60 of output magic using its sign
output -= addend & UINT32_C(0x10000000);
// Get scaled value of the 2nd top booth addend
addend = multiplicand * factor1;
// Finalize bits 63-62 of output magic using its sign
output -= addend & UINT32_C(0x40000000);
// Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration
u32 sum = output + (addend & UINT32_C(0x20000000));
// Subtract out the carry magic to get the actual output magic
output -= carry;
// Get scaled value of the 1st top booth addend
addend = multiplicand * factor0;
// Add to bit 62 and propagate the carry
sum += addend & UINT32_C(0x40000000);
// Cancel out the output magic bit 63 to get the carry bit 63
return (sum ^ output) >> 31;
}
// also for MLAS and MUL (thumb ver.)
inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle)
{
if (lastcycle)
return (rs >> 30) == -2;
else
return booths_multiplication32_opt(rm, rs, rn);
}
// also for UMLALS
inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle)
{
if (lastcycle)
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
else
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
}
// also for SMLALS
inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle)
{
if (lastcycle)
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
else
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
}
#endif

View File

@ -51,10 +51,10 @@ namespace melonDS
using Platform::Log;
using Platform::LogLevel;
static_assert(offsetof(ARM, CPSR) == ARM_CPSR_offset, "");
/*static_assert(offsetof(ARM, CPSR) == ARM_CPSR_offset, "");
static_assert(offsetof(ARM, Cycles) == ARM_Cycles_offset, "");
static_assert(offsetof(ARM, StopExecution) == ARM_StopExecution_offset, "");
*/
#define JIT_DEBUGPRINT(msg, ...)
//#define JIT_DEBUGPRINT(msg, ...) Platform::Log(Platform::LogLevel::Debug, msg, ## __VA_ARGS__)
@ -586,7 +586,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
u32 numWriteAddrs = 0, writeAddrsTranslated = 0;
cpu->FillPipeline();
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
u32 nextInstr[2] = {(u32)cpu->NextInstr[0], (u32)cpu->NextInstr[1]};
u32 nextInstrAddr[2] = {blockAddr, r15};
JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr);
@ -644,17 +644,17 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
}
else
{
nextInstr[1] = cpuv5->CodeRead32(r15, false);
//nextInstr[1] = cpuv5->CodeRead32(r15, false);
instrs[i].CodeCycles = cpu->CodeCycles;
}
}
else
{
ARMv4* cpuv4 = (ARMv4*)cpu;
if (thumb)
nextInstr[1] = cpuv4->CodeRead16(r15);
else
nextInstr[1] = cpuv4->CodeRead32(r15);
if (thumb);
//nextInstr[1] = cpuv4->CodeRead16(r15);
else;
// nextInstr[1] = cpuv4->CodeRead32(r15);
instrs[i].CodeCycles = cpu->CodeCycles;
}
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr, LiteralOptimizations);
@ -722,7 +722,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
addressRanges[numAddressRanges++] = translatedAddrRounded;
addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16);
JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]);
cpu->DataRead32(literalAddr, &literalValues[numLiterals]);
//cpu->DataRead32(literalAddr, &literalValues[numLiterals]);
literalLoadAddrs[numLiterals++] = translatedAddr;
}
}

View File

@ -83,14 +83,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true) >> 16;
//cpu9->CodeRead32(addr-2, true) >> 16;
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
//cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
@ -99,9 +99,9 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
//cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}

View File

@ -79,18 +79,18 @@ bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
//CurCPU->DataRead32(addr & ~0x3, &val);
val = melonDS::ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
//CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
// CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}

View File

@ -72,14 +72,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true);
//cpu9->CodeRead32(addr-2, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
//cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
@ -88,9 +88,9 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
//cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
//cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}

View File

@ -85,18 +85,18 @@ bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
//CurCPU->DataRead32(addr & ~0x3, &val);
val = melonDS::ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
//CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
//CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}

View File

@ -194,6 +194,7 @@ const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX);
const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG);
const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK);
const u32 A_BKPT = A_BranchAlways | A_Link | ak(ak_UNK);
const u32 A_MSR_IMM = ak(ak_MSR_IMM);
const u32 A_MSR_REG = A_Read0 | ak(ak_MSR_REG);
const u32 A_MRS = A_Write12 | ak(ak_MRS);

View File

@ -130,7 +130,7 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) =
// 0001 0010 0000
A_MSR_REG, A_BX, A_UNK, A_BLX_REG,
A_UNK, A_QSUB, A_UNK, A_UNK,
A_UNK, A_QSUB, A_UNK, A_BKPT,
A_SMLAWy, A_UNK, A_SMULWy, A_STRH_REG,
A_SMLAWy, A_LDRD_REG, A_SMULWy, A_STRD_REG,

File diff suppressed because it is too large Load Diff

170
src/CP15_Constants.h Normal file
View File

@ -0,0 +1,170 @@
/*
Copyright 2016-2023 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef MELONDS_CP15CONSTANTS_H
#define MELONDS_CP15CONSTANTS_H
#include "types.h"
namespace melonDS
{
/* ICACHE Layout constants */
constexpr u32 ICACHE_SIZE_LOG2 = 13;
constexpr u32 ICACHE_SIZE = 1 << ICACHE_SIZE_LOG2;
constexpr u32 ICACHE_SETS_LOG2 = 2;
constexpr u32 ICACHE_SETS = 1 << ICACHE_SETS_LOG2;
constexpr u32 ICACHE_LINELENGTH_ENCODED = 2;
constexpr u32 ICACHE_LINELENGTH_LOG2 = ICACHE_LINELENGTH_ENCODED + 3;
constexpr u32 ICACHE_LINELENGTH = 8 * (1 << ICACHE_LINELENGTH_ENCODED);
constexpr u32 ICACHE_LINESPERSET = ICACHE_SIZE / (ICACHE_SETS * ICACHE_LINELENGTH);
/* DCACHE Layout constants */
constexpr u32 DCACHE_SIZE_LOG2 = 12;
constexpr u32 DCACHE_SIZE = 1 << DCACHE_SIZE_LOG2;
constexpr u32 DCACHE_SETS_LOG2 = 2;
constexpr u32 DCACHE_SETS = 1 << DCACHE_SETS_LOG2;
constexpr u32 DCACHE_LINELENGTH_ENCODED = 2;
constexpr u32 DCACHE_LINELENGTH_LOG2 = DCACHE_LINELENGTH_ENCODED + 3;
constexpr u32 DCACHE_LINELENGTH = 8 * (1 << DCACHE_LINELENGTH_ENCODED);
constexpr u32 DCACHE_LINESPERSET = DCACHE_SIZE / (DCACHE_SETS * DCACHE_LINELENGTH);
/* CP15 Cache Data TAGs */
constexpr u32 CACHE_FLAG_VALID = (1 << 4);
constexpr u32 CACHE_FLAG_DIRTY_LOWERHALF = (1 << 2);
constexpr u32 CACHE_FLAG_DIRTY_UPPERHALF = (1 << 3);
constexpr u32 CACHE_FLAG_DIRTY_MASK = (3 << 2);
constexpr u32 CACHE_FLAG_SET_MASK = (3 << 0);
constexpr u32 CACHE_FLAG_MASK = 0x1F;
/* CP15 Cache Type Register */
constexpr u32 CACHE_TR_LOCKDOWN_TYPE_B = (7 << 25);
constexpr u32 CACHE_TR_NONUNIFIED = (1 << 24);
/* CP15 I/DCache LockDown registers */
constexpr u32 CACHE_LOCKUP_L = (1 << 31);
/* CP15 Main ID register */
constexpr u32 CP15_MAINID_IMPLEMENTOR_ARM = (0x41 << 24);
constexpr u32 CP15_MAINID_IMPLEMENTOR_DEC = (0x44 << 24);
constexpr u32 CP15_MAINID_IMPLEMENTOR_MOTOROLA = (0x4D << 24);
constexpr u32 CP15_MAINID_IMPLEMENTOR_MARVELL = (0x56 << 24);
constexpr u32 CP15_MAINID_IMPLEMENTOR_INTEL = (0x69 << 24);
constexpr u32 CP15_MAINID_VARIANT_0 = (0 << 20);
constexpr u32 CP15_MAINID_ARCH_v4 = (1 << 16);
constexpr u32 CP15_MAINID_ARCH_v4T = (2 << 16);
constexpr u32 CP15_MAINID_ARCH_v5 = (3 << 16);
constexpr u32 CP15_MAINID_ARCH_v5T = (4 << 16);
constexpr u32 CP15_MAINID_ARCH_v5TE = (5 << 16);
constexpr u32 CP15_MAINID_ARCH_v5TEJ = (6 << 16);
constexpr u32 CP15_MAINID_ARCH_v6 = (7 << 16);
constexpr u32 CP15_MAINID_IMPLEMENTATION_946 = (0x946 << 4);
constexpr u32 CP15_MAINID_REVISION_0 = (0 << 0);
constexpr u32 CP15_MAINID_REVISION_1 = (1 << 0);
/* CP15 TCM Size Register */
constexpr u32 CP15_TCMSIZE_DTCM_16KB = (5 << 18);
constexpr u32 CP15_TCMSIZE_ITCM_32KB = (6 << 6);
/* CP15 Cache and Write Buffer Conrol Register */
constexpr u32 CP15_CACHE_CR_ROUNDROBIN = (1 << 14);
constexpr u32 CP15_CACHE_CR_ICACHEENABLE = (1 << 12);
constexpr u32 CP15_CACHE_CR_DCACHEENABLE = (1 << 2);
constexpr u32 CP15_CACHE_CR_WRITEBUFFERENABLE = (1 << 3);
/* CP15 TCM Control Register */
constexpr u32 CP15_TCM_CR_DTCM_ENABLE = (1 << 16);
constexpr u32 CP15_TCM_CR_ITCM_ENABLE = (1 << 18);
constexpr u32 CP15_TCM_CR_DTCM_LOADMODE = (1 << 17); // TODO
constexpr u32 CP15_TCM_CR_ITCM_LOADMODE = (1 << 19); // TODO
/* CP15 DTCM Settings Register */
constexpr u32 CP15_DTCM_SIZE_BASE = 0x200;
constexpr u32 CP15_DTCM_SIZE_MASK = 0x3E;
constexpr u32 CP15_DTCM_SIZE_POS = 1;
constexpr u32 CP15_DTCM_SIZE_MIN = 0b00011;
constexpr u32 CP15_DTCM_SIZE_MAX = 0b10111;
constexpr u32 CP15_DTCM_BASE_MASK = 0xFFFFF000;
/* CP15 ITCM Settings Register */
constexpr u32 CP15_ITCM_SIZE_BASE = 0x200;
constexpr u32 CP15_ITCM_SIZE_MASK = 0x3E;
constexpr u32 CP15_ITCM_SIZE_POS = 1;
constexpr u32 CP15_ITCM_SIZE_MIN = 0b00011;
constexpr u32 CP15_ITCM_SIZE_MAX = 0b10111;
constexpr u32 CP15_ITCM_BASE_MASK = 0x00000000;
/* CP15 Control Register */
constexpr u32 CP15_CR_MPUENABLE = (1 << 0);
constexpr u32 CP15_CR_BIGENDIAN = (1 << 7);
constexpr u32 CP15_CR_HIGHEXCEPTIONBASE = (1 << 13);
constexpr u32 CP15_CR_DISABLE_THUMBBIT = (1 << 15);
constexpr u32 CP15_CR_CHANGEABLE_MASK = CP15_CR_MPUENABLE | CP15_CR_BIGENDIAN | CP15_CACHE_CR_DCACHEENABLE
| CP15_CACHE_CR_ICACHEENABLE | CP15_CR_HIGHEXCEPTIONBASE
| CP15_TCM_CR_DTCM_ENABLE | CP15_TCM_CR_ITCM_ENABLE
| CP15_TCM_CR_DTCM_LOADMODE | CP15_TCM_CR_ITCM_LOADMODE
| CP15_CACHE_CR_ROUNDROBIN | CP15_CR_DISABLE_THUMBBIT;
/* Note: ARM946E-S Technical reference manual, Chapter 6.5.2 "You cannot directly enable or disable the write buffer"
CP15_CACHE_CR_WRITEBUFFERENABLE is always set on the cp15
*/
/* CP15 Internal Exception base value */
constexpr u32 CP15_EXCEPTIONBASE_HIGH = 0xFFFF0000;
constexpr u32 CP15_EXCEPTIONBASE_LOW = 0x00000000;
/* CP15 BIST Test State register */
constexpr u32 CP15_BIST_TR_DISABLE_ICACHE_STREAMING = (1 << 11);
constexpr u32 CP15_BIST_TR_DISABLE_DCACHE_STREAMING = (1 << 12);
constexpr u32 CP15_BIST_TR_DISABLE_ICACHE_LINEFILL = (1 << 9);
constexpr u32 CP15_BIST_TR_DISABLE_DCACHE_LINEFILL = (1 << 10);
/* CP15 Region Base and Size Register */
constexpr u32 CP15_REGION_COUNT = 8;
constexpr u32 CP15_REGION_ENABLE = (1 << 0);
constexpr u32 CP15_REGION_SIZE_MASK = (0x1F << 1);
constexpr u32 CP15_REGION_BASE_GRANULARITY_LOG2 = 12;
constexpr u32 CP15_REGION_BASE_GRANULARITY = (1 << CP15_REGION_BASE_GRANULARITY_LOG2);
constexpr u32 CP15_REGION_BASE_MASK = ~(CP15_REGION_BASE_GRANULARITY_LOG2-1);
/* CP15 Region access mask registers */
constexpr u32 CP15_REGIONACCESS_BITS_PER_REGION = 4;
constexpr u32 CP15_REGIONACCESS_REGIONMASK = (1 << CP15_REGIONACCESS_BITS_PER_REGION) - 1;
/* Flags in the melonDS internal PU_PrivMap and PU_UserMap */
constexpr u32 CP15_MAP_NOACCESS = 0x00;
constexpr u32 CP15_MAP_READABLE = 0x01;
constexpr u32 CP15_MAP_WRITEABLE = 0x02;
constexpr u32 CP15_MAP_EXECUTABLE = 0x04;
constexpr u32 CP15_MAP_DCACHEABLE = 0x10;
constexpr u32 CP15_MAP_BUFFERABLE = 0x20;
constexpr u32 CP15_MAP_ICACHEABLE = 0x40;
constexpr u32 CP15_MAP_ENTRYSIZE_LOG2 = CP15_REGION_BASE_GRANULARITY_LOG2;
constexpr u32 CP15_MAP_ENTRYSIZE = (1 << CP15_MAP_ENTRYSIZE_LOG2);
constexpr u32 CP15_MAP_ENTRYCOUNT = 1 << (32 - CP15_MAP_ENTRYSIZE_LOG2);
/* Internal Timing Constants */
constexpr u32 BUSCYCLES_N16 = 0;
constexpr u32 BUSCYCLES_S16 = 1;
constexpr u32 BUSCYCLES_N32 = 2;
constexpr u32 BUSCYCLES_S32 = 3;
constexpr u32 BUSCYCLES_MAP_GRANULARITY_LOG2 = CP15_REGION_BASE_GRANULARITY_LOG2;
}
#endif // MELONDS_CP15CONSTANTS_H

View File

@ -21,6 +21,7 @@
#include "DSi.h"
#include "DMA.h"
#include "GPU.h"
#include "ARM.h"
#include "GPU3D.h"
#include "DMA_Timings.h"
#include "Platform.h"
@ -80,6 +81,7 @@ void DMA::Reset()
Running = false;
Executing = false;
InProgress = false;
DMAQueued = false;
MRAMBurstCount = 0;
MRAMBurstTable = DMATiming::MRAMDummy;
}
@ -137,14 +139,17 @@ void DMA::WriteCnt(u32 val)
case 0x01000000: SrcAddrInc = 0; break;
case 0x01800000: SrcAddrInc = 1; break;
}
u32 oldstartmode = StartMode;
if (CPU == 0)
StartMode = (Cnt >> 27) & 0x7;
else
StartMode = ((Cnt >> 28) & 0x3) | 0x10;
if ((StartMode & 0x7) == 0)
Start();
{
NDS.DMAsQueued[NDS.DMAQueuePtr++] = (CPU*4)+Num;
if (!(NDS.SchedListMask & (1<<Event_DMA))) NDS.ScheduleEvent(Event_DMA, false, 1, 0, 0);
}
else if (StartMode == 0x07)
NDS.GPU.GPU3D.CheckFIFODMA();
@ -155,7 +160,22 @@ void DMA::WriteCnt(u32 val)
void DMA::Start()
{
if (Running) return;
if (Running)
{
if (CPU ? StartMode == 0x12 : StartMode == 0x05)
{
DMAQueued = true;
}
else
{
DMAQueued = false;
}
return;
}
else
{
DMAQueued = false;
}
if (!InProgress)
{
@ -187,16 +207,32 @@ void DMA::Start()
// TODO eventually: not stop if we're running code in ITCM
Running = 2;
Running = 3;
// safety measure
MRAMBurstTable = DMATiming::MRAMDummy;
InProgress = true;
NDS.StopCPU(CPU, 1<<Num);
if (CPU == 0)
{
u64 ts;
/*if (StartMode == 0x00)
{
ts = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
}
else*/ ts = NDS.SysTimestamp << NDS.ARM9ClockShift;
if (NDS.DMA9Timestamp < ts) NDS.DMA9Timestamp = ts;
}
if (Num == 0) NDS.DMAs[(CPU*4)+1].ResetBurst();
if (Num <= 1) NDS.DMAs[(CPU*4)+2].ResetBurst();
if (Num <= 2) NDS.DMAs[(CPU*4)+3].ResetBurst();
}
u32 DMA::UnitTimings9_16(bool burststart)
u32 DMA::UnitTimings9_16(int burststart)
{
u32 src_id = CurSrcAddr >> 14;
u32 dst_id = CurDstAddr >> 14;
@ -209,15 +245,17 @@ u32 DMA::UnitTimings9_16(bool burststart)
src_s = NDS.ARM9MemTimings[src_id][5];
dst_n = NDS.ARM9MemTimings[dst_id][4];
dst_s = NDS.ARM9MemTimings[dst_id][5];
if (src_rgn == Mem9_MainRAM)
/*if (src_rgn == Mem9_MainRAM)
{
if (dst_rgn == Mem9_MainRAM)
return 16;
{
return (burststart == 2) ? 11 : 16;
}
if (SrcAddrInc > 0)
{
if (burststart || MRAMBurstTable[MRAMBurstCount] == 0)
if ((burststart == 2) || MRAMBurstTable[MRAMBurstCount] == 0)
{
MRAMBurstCount = 0;
@ -239,14 +277,14 @@ u32 DMA::UnitTimings9_16(bool burststart)
{
// TODO: not quite right for GBA slot
return (((CurSrcAddr & 0x1F) == 0x1E) ? 7 : 8) +
(burststart ? dst_n : dst_s);
((burststart == 2) ? dst_n : dst_s);
}
}
else if (dst_rgn == Mem9_MainRAM)
{
if (DstAddrInc > 0)
{
if (burststart || MRAMBurstTable[MRAMBurstCount] == 0)
if ((burststart == 2) || MRAMBurstTable[MRAMBurstCount] == 0)
{
MRAMBurstCount = 0;
@ -266,23 +304,26 @@ u32 DMA::UnitTimings9_16(bool burststart)
}
else
{
return (burststart ? src_n : src_s) + 7;
return ((burststart == 2) ? src_n : src_s) + 7;
}
}
else if (src_rgn & dst_rgn)
else*/ if (src_rgn & dst_rgn)
{
return src_n + dst_n + 1;
if (burststart != 1)
return src_n + dst_n + (src_n == 1 || burststart <= 0);
else
return src_n + dst_n + (src_n != 1);
}
else
{
if (burststart)
return src_n + dst_n;
if (burststart == 2)
return src_n + dst_n + (src_n == 1);
else
return src_s + dst_s;
}
}
u32 DMA::UnitTimings9_32(bool burststart)
u32 DMA::UnitTimings9_32(int burststart)
{
u32 src_id = CurSrcAddr >> 14;
u32 dst_id = CurDstAddr >> 14;
@ -296,14 +337,14 @@ u32 DMA::UnitTimings9_32(bool burststart)
dst_n = NDS.ARM9MemTimings[dst_id][6];
dst_s = NDS.ARM9MemTimings[dst_id][7];
if (src_rgn == Mem9_MainRAM)
/*if (src_rgn == Mem9_MainRAM)
{
if (dst_rgn == Mem9_MainRAM)
return 18;
return (burststart == 2) ? 13 : 18;
if (SrcAddrInc > 0)
{
if (burststart || MRAMBurstTable[MRAMBurstCount] == 0)
if ((burststart == 2) || MRAMBurstTable[MRAMBurstCount] == 0)
{
MRAMBurstCount = 0;
@ -327,14 +368,14 @@ u32 DMA::UnitTimings9_32(bool burststart)
{
// TODO: not quite right for GBA slot
return (((CurSrcAddr & 0x1F) == 0x1C) ? (dst_n==2 ? 7:8) : 9) +
(burststart ? dst_n : dst_s);
((burststart == 2) ? dst_n : dst_s);
}
}
else if (dst_rgn == Mem9_MainRAM)
{
if (DstAddrInc > 0)
{
if (burststart || MRAMBurstTable[MRAMBurstCount] == 0)
if ((burststart == 2) || MRAMBurstTable[MRAMBurstCount] == 0)
{
MRAMBurstCount = 0;
@ -356,17 +397,20 @@ u32 DMA::UnitTimings9_32(bool burststart)
}
else
{
return (burststart ? src_n : src_s) + 8;
return ((burststart == 2) ? src_n : src_s) + 8;
}
}
else if (src_rgn & dst_rgn)
else*/ if (src_rgn & dst_rgn)
{
return src_n + dst_n + 1;
if (burststart != 1)
return src_n + dst_n + (src_n == 1 || burststart <= 0);
else
return src_n + dst_n + (src_n != 1);
}
else
{
if (burststart)
return src_n + dst_n;
if (burststart == 2)
return src_n + dst_n + (src_n == 1);
else
return src_s + dst_s;
}
@ -374,7 +418,7 @@ u32 DMA::UnitTimings9_32(bool burststart)
// TODO: the ARM7 ones don't take into account that the two wifi regions have different timings
u32 DMA::UnitTimings7_16(bool burststart)
u32 DMA::UnitTimings7_16(int burststart)
{
u32 src_id = CurSrcAddr >> 15;
u32 dst_id = CurDstAddr >> 15;
@ -388,7 +432,7 @@ u32 DMA::UnitTimings7_16(bool burststart)
dst_n = NDS.ARM7MemTimings[dst_id][0];
dst_s = NDS.ARM7MemTimings[dst_id][1];
if (src_rgn == Mem7_MainRAM)
/*if (src_rgn == Mem7_MainRAM)
{
if (dst_rgn == Mem7_MainRAM)
return 16;
@ -447,20 +491,23 @@ u32 DMA::UnitTimings7_16(bool burststart)
return (burststart ? src_n : src_s) + 7;
}
}
else if (src_rgn & dst_rgn)
else*/ if (src_rgn & dst_rgn)
{
return src_n + dst_n + 1;
if (burststart != 1)
return src_n + dst_n + (src_n == 1 || burststart <= 0);
else
return src_n + dst_n + (src_n != 1);
}
else
{
if (burststart)
return src_n + dst_n;
if (burststart == 2)
return src_n + dst_n + (src_n == 1);
else
return src_s + dst_s;
}
}
u32 DMA::UnitTimings7_32(bool burststart)
u32 DMA::UnitTimings7_32(int burststart)
{
u32 src_id = CurSrcAddr >> 15;
u32 dst_id = CurDstAddr >> 15;
@ -474,7 +521,7 @@ u32 DMA::UnitTimings7_32(bool burststart)
dst_n = NDS.ARM7MemTimings[dst_id][2];
dst_s = NDS.ARM7MemTimings[dst_id][3];
if (src_rgn == Mem7_MainRAM)
/*if (src_rgn == Mem7_MainRAM)
{
if (dst_rgn == Mem7_MainRAM)
return 18;
@ -537,14 +584,17 @@ u32 DMA::UnitTimings7_32(bool burststart)
return (burststart ? src_n : src_s) + 8;
}
}
else if (src_rgn & dst_rgn)
else*/ if (src_rgn & dst_rgn)
{
return src_n + dst_n + 1;
if (burststart != 1)
return src_n + dst_n + (src_n == 1 || burststart <= 0);
else
return src_n + dst_n + (src_n != 1);
}
else
{
if (burststart)
return src_n + dst_n;
if (burststart == 2)
return src_n + dst_n + (src_n == 1);
else
return src_s + dst_s;
}
@ -552,20 +602,30 @@ u32 DMA::UnitTimings7_32(bool burststart)
void DMA::Run9()
{
if (NDS.ARM9Timestamp >= NDS.ARM9Target) return;
//NDS.DMA9Timestamp = std::max(NDS.DMA9Timestamp, NDS.SysTimestamp << NDS.ARM9ClockShift);
//NDS.DMA9Timestamp = (NDS.DMA9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
if (NDS.DMA9Timestamp-1 >= NDS.ARM9Target) return;
Executing = true;
// add NS penalty for first accesses in burst
bool burststart = (Running == 2);
Running = 1;
int burststart = Running-1;
if (!(Cnt & (1<<26)))
{
while (IterCount > 0 && !Stall)
{
NDS.ARM9Timestamp += (UnitTimings9_16(burststart) << NDS.ARM9ClockShift);
burststart = false;
u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14];
if (rgn & Mem9_MainRAM)
{
NDS.ARM9.MRTrack.Type = MainRAMType::DMA16;
NDS.ARM9.MRTrack.Var = Num;
return;
}
NDS.DMA9Timestamp += (UnitTimings9_16(burststart) << NDS.ARM9ClockShift);
burststart -= 1;
NDS.ARM9Write16(CurDstAddr, NDS.ARM9Read16(CurSrcAddr));
@ -574,15 +634,23 @@ void DMA::Run9()
IterCount--;
RemCount--;
if (NDS.ARM9Timestamp >= NDS.ARM9Target) break;
if (NDS.DMA9Timestamp-1 >= NDS.ARM9Target) break;
}
}
else
{
while (IterCount > 0 && !Stall)
{
NDS.ARM9Timestamp += (UnitTimings9_32(burststart) << NDS.ARM9ClockShift);
burststart = false;
u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14];
if (rgn & Mem9_MainRAM)
{
NDS.ARM9.MRTrack.Type = MainRAMType::DMA32;
NDS.ARM9.MRTrack.Var = Num;
return;
}
NDS.DMA9Timestamp += (UnitTimings9_32(burststart) << NDS.ARM9ClockShift);
burststart -= 1;
NDS.ARM9Write32(CurDstAddr, NDS.ARM9Read32(CurSrcAddr));
@ -591,10 +659,15 @@ void DMA::Run9()
IterCount--;
RemCount--;
if (NDS.ARM9Timestamp >= NDS.ARM9Target) break;
if (NDS.DMA9Timestamp-1 >= NDS.ARM9Target) break;
}
}
NDS.DMA9Timestamp -= 1;
if (burststart <= 0) Running = 1;
else Running = 2;
Executing = false;
Stall = false;
@ -621,6 +694,8 @@ void DMA::Run9()
Running = 0;
InProgress = false;
NDS.ResumeCPU(0, 1<<Num);
if (DMAQueued) Start();
}
void DMA::Run7()
@ -630,13 +705,20 @@ void DMA::Run7()
Executing = true;
// add NS penalty for first accesses in burst
bool burststart = (Running == 2);
Running = 1;
int burststart = Running - 1;
if (!(Cnt & (1<<26)))
{
while (IterCount > 0 && !Stall)
{
u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15];
if (rgn & Mem7_MainRAM)
{
NDS.ARM7.MRTrack.Type = MainRAMType::DMA16;
NDS.ARM7.MRTrack.Var = Num+4;
return;
}
NDS.ARM7Timestamp += UnitTimings7_16(burststart);
burststart = false;
@ -654,6 +736,14 @@ void DMA::Run7()
{
while (IterCount > 0 && !Stall)
{
u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15];
if (rgn & Mem7_MainRAM)
{
NDS.ARM7.MRTrack.Type = MainRAMType::DMA32;
NDS.ARM7.MRTrack.Var = Num+4;
return;
}
NDS.ARM7Timestamp += UnitTimings7_32(burststart);
burststart = false;
@ -668,6 +758,9 @@ void DMA::Run7()
}
}
if (burststart <= 0) Running = 1;
else Running = 2;
Executing = false;
Stall = false;
@ -691,6 +784,8 @@ void DMA::Run7()
Running = 0;
InProgress = false;
NDS.ResumeCPU(1, 1<<Num);
if (DMAQueued) Start();
}
void DMA::Run()

View File

@ -40,10 +40,10 @@ public:
void WriteCnt(u32 val);
void Start();
u32 UnitTimings9_16(bool burststart);
u32 UnitTimings9_32(bool burststart);
u32 UnitTimings7_16(bool burststart);
u32 UnitTimings7_32(bool burststart);
u32 UnitTimings9_16(int burststart);
u32 UnitTimings9_32(int burststart);
u32 UnitTimings7_16(int burststart);
u32 UnitTimings7_32(int burststart);
void Run();
void Run9();
@ -73,30 +73,34 @@ public:
if (Executing) Stall = true;
}
void ResetBurst()
{
if (Running > 0) Running = 3;
}
u32 SrcAddr {};
u32 DstAddr {};
u32 Cnt {};
private:
melonDS::NDS& NDS;
u32 CPU {};
u32 Num {};
u32 StartMode {};
u32 CurSrcAddr {};
u32 CurDstAddr {};
u32 RemCount {};
u32 IterCount {};
s32 SrcAddrInc {};
s32 DstAddrInc {};
u32 CountMask {};
u32 Running {};
bool InProgress {};
u32 Num {};
u32 StartMode {};
bool Executing {};
bool Stall {};
private:
melonDS::NDS& NDS;
u32 CPU {};
bool DMAQueued;
u32 CountMask {};
bool IsGXFIFODMA {};
u32 MRAMBurstCount {};

View File

@ -48,7 +48,7 @@ extern const std::array<u8, 256> MRAMDummy = {0};
extern const std::array<u8, 256> MRAMRead16Bursts[] =
{
// main RAM to regular 16bit or 32bit bus (similar)
{7, 3, 2, 2, 2, 2, 2, 2, 2, 2,
{6, 3, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -60,7 +60,7 @@ extern const std::array<u8, 256> MRAMRead16Bursts[] =
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2,
7, 3, 2, 2, 2, 2, 2, 2, 2, 2,
6, 3, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -72,7 +72,7 @@ extern const std::array<u8, 256> MRAMRead16Bursts[] =
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2,
7, 3,
6, 3,
0},
// main RAM to GBA/wifi, seq=4
{8, 6, 5, 5, 5, 5, 5, 5, 5, 5,
@ -181,7 +181,7 @@ extern const std::array<u8, 256> MRAMRead32Bursts[] =
extern const std::array<u8, 256> MRAMWrite16Bursts[] =
{
// regular 16bit or 32bit bus to main RAM (similar)
{8, 2, 2, 2, 2, 2, 2, 2, 2, 2,
{5, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -212,7 +212,7 @@ extern const std::array<u8, 256> MRAMWrite16Bursts[] =
extern const std::array<u8, 256> MRAMWrite32Bursts[4] =
{
// regular 16bit bus to main RAM
{9, 4, 4, 4, 4, 4, 4, 4, 4, 4,
{6, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
@ -220,7 +220,7 @@ extern const std::array<u8, 256> MRAMWrite32Bursts[4] =
4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0},
// regular 32bit bus to main RAM
{9, 3, 3, 3, 3, 3, 3, 3, 3, 3,
{6, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

View File

@ -129,6 +129,9 @@ void DSi::Reset()
//ARM9.CP15Write(0x911, 0x00000020);
//ARM9.CP15Write(0x100, ARM9.CP15Read(0x100) | 0x00050000);
NDS::Reset();
ExMemCnt[0] = 0xEC8C; // checkme: bit 10 should be explicitly set?
ExMemCnt[1] = 0xEC8C;
// The SOUNDBIAS register does nothing on DSi
SPU.SetApplyBias(false);
@ -160,6 +163,7 @@ void DSi::Reset()
SCFG_Clock9 = 0x0187; // CHECKME
SCFG_Clock7 = 0x0187;
SCFG_EXT[0] = 0x8307F100;
SetVRAMTimings(true);
SCFG_EXT[1] = 0x93FFFB06;
SCFG_MC = 0x0010 | (~((u32)(NDSCartSlot.GetCart() != nullptr))&1);//0x0011;
SCFG_RST = 0;
@ -233,6 +237,7 @@ void DSi::DoSavestateExtra(Savestate* file)
Set_SCFG_Clock9(SCFG_Clock9);
Set_SCFG_MC(SCFG_MC);
DSP.SetRstLine(SCFG_RST & 0x0001);
SetVRAMTimings(SCFG_EXT[0] & (1<<13));
MBK[0][8] = 0;
MBK[1][8] = 0;
@ -711,6 +716,7 @@ void DSi::SoftReset()
SCFG_Clock9 = 0x0187; // CHECKME
SCFG_Clock7 = 0x0187;
SCFG_EXT[0] = 0x8307F100;
SetVRAMTimings(true);
SCFG_EXT[1] = 0x93FFFB06;
SCFG_MC = 0x0010;//0x0011;
// TODO: is this actually reset?
@ -1274,6 +1280,19 @@ void DSi::Set_SCFG_Clock9(u16 val)
{
ARM9Timestamp >>= ARM9ClockShift;
ARM9Target >>= ARM9ClockShift;
for (int i = 0; i < 7; i++)
{
ARM9.ICacheStreamTimes[i] >>= ARM9ClockShift;
ARM9.DCacheStreamTimes[i] >>= ARM9ClockShift;
}
ARM9.TimestampMemory >>= ARM9ClockShift;
ARM9.ITCMTimestamp >>= ARM9ClockShift;
ARM9.WBTimestamp >>= ARM9ClockShift;
ARM9.WBDelay >>= ARM9ClockShift;
ARM9.WBReleaseTS >>= ARM9ClockShift;
ARM9.WBInitialTS >>= ARM9ClockShift;
ARM9.ILCurrTime >>= ARM9ClockShift;
ARM9.ILPrevTime >>= ARM9ClockShift;
Log(LogLevel::Debug, "CLOCK9=%04X\n", val);
SCFG_Clock9 = val & 0x0187;
@ -1283,7 +1302,21 @@ void DSi::Set_SCFG_Clock9(u16 val)
ARM9Timestamp <<= ARM9ClockShift;
ARM9Target <<= ARM9ClockShift;
ARM9.UpdateRegionTimings(0x00000, 0x100000);
for (int i = 0; i < 7; i++)
{
ARM9.ICacheStreamTimes[i] <<= ARM9ClockShift;
ARM9.DCacheStreamTimes[i] <<= ARM9ClockShift;
}
ARM9.TimestampMemory <<= ARM9ClockShift;
ARM9.ITCMTimestamp <<= ARM9ClockShift;
ARM9.WBTimestamp <<= ARM9ClockShift;
ARM9.WBDelay <<= ARM9ClockShift;
ARM9.WBReleaseTS <<= ARM9ClockShift;
ARM9.WBInitialTS <<= ARM9ClockShift;
ARM9.ILCurrTime <<= ARM9ClockShift;
ARM9.ILPrevTime <<= ARM9ClockShift;
ARM9.UpdateRegionTimings(0x00000, 0x40000);
}
void DSi::Set_SCFG_MC(u32 val)
@ -1301,6 +1334,14 @@ void DSi::Set_SCFG_MC(u32 val)
}
}
void DSi::SetVRAMTimings(bool extrabuswidth)
{
if (extrabuswidth)
SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 32, 1, 1); // dsi vram
else
SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 16, 1, 1); // ds vram
}
u8 DSi::ARM9Read8(u32 addr)
{
@ -1723,7 +1764,7 @@ void DSi::ARM9Write32(u32 addr, u32 val)
return NDS::ARM9Write32(addr, val);
}
bool DSi::ARM9GetMemRegion(u32 addr, bool write, MemRegion* region)
bool DSi::ARM9GetMemRegion(const u32 addr, const bool write, MemRegion* region)
{
assert(ConsoleType == 1);
switch (addr & 0xFF000000)
@ -2539,12 +2580,19 @@ void DSi::ARM9IOWrite32(u32 addr, u32 val)
u32 oldram = (SCFG_EXT[0] >> 14) & 0x3;
u32 newram = (val >> 14) & 0x3;
u32 oldvram = (SCFG_EXT[0] & (1<<13));
u32 newvram = (val & (1<<13));
SCFG_EXT[0] &= ~0x8007F19F;
SCFG_EXT[0] |= (val & 0x8007F19F);
SCFG_EXT[1] &= ~0x0000F080;
SCFG_EXT[1] |= (val & 0x0000F080);
Log(LogLevel::Debug, "SCFG_EXT = %08X / %08X (val9 %08X)\n", SCFG_EXT[0], SCFG_EXT[1], val);
/*switch ((SCFG_EXT[0] >> 14) & 0x3)
if (oldvram != newvram)
SetVRAMTimings(newvram);
switch ((SCFG_EXT[0] >> 14) & 0x3)
{
case 0:
case 1:
@ -2557,7 +2605,7 @@ void DSi::ARM9IOWrite32(u32 addr, u32 val)
NDS::MainRAMMask = 0xFFFFFF;
printf("RAM: 16MB\n");
break;
}*/
}
// HAX!!
// a change to the RAM size setting is supposed to apply immediately (it does so on hardware)
// however, doing so will cause DS-mode app startup to break, because the change happens while the ARM7
@ -3069,6 +3117,7 @@ void DSi::ARM7IOWrite32(u32 addr, u32 val)
SCFG_EXT[0] |= (val & 0x03000000);
SCFG_EXT[1] &= ~0x93FF0F07;
SCFG_EXT[1] |= (val & 0x93FF0F07);
if (!(val & (1<<24))) { ExMemCnt[0] &= ~(1<<10); ExMemCnt[1] &= ~(1<<10); } // bit 10 of exmemcnt is cleared when disabling second card slot access
Log(LogLevel::Debug, "SCFG_EXT = %08X / %08X (val7 %08X)\n", SCFG_EXT[0], SCFG_EXT[1], val);
return;
case 0x04004010:

View File

@ -96,6 +96,7 @@ public:
void MapNWRAM_B(u32 num, u8 val);
void MapNWRAM_C(u32 num, u8 val);
void MapNWRAMRange(u32 cpu, u32 num, u32 val);
void SetVRAMTimings(bool extrabuswidth);
u8 ARM9Read8(u32 addr) override;
u16 ARM9Read16(u32 addr) override;
@ -104,7 +105,7 @@ public:
void ARM9Write16(u32 addr, u16 val) override;
void ARM9Write32(u32 addr, u32 val) override;
bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region) override;
bool ARM9GetMemRegion(const u32 addr, const bool write, MemRegion* region) override;
u8 ARM7Read8(u32 addr) override;
u16 ARM7Read16(u32 addr) override;

View File

@ -2378,13 +2378,13 @@ void GPU3D::Run() noexcept
if (!GeometryEnabled || FlushRequest ||
(CmdPIPE.IsEmpty() && !(GXStat & (1<<27))))
{
Timestamp = NDS.ARM9Timestamp >> NDS.ARM9ClockShift;
Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift;
return;
}
s32 cycles = (NDS.ARM9Timestamp >> NDS.ARM9ClockShift) - Timestamp;
s32 cycles = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift) - Timestamp;
CycleCount -= cycles;
Timestamp = NDS.ARM9Timestamp >> NDS.ARM9ClockShift;
Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift;
if (CycleCount <= 0)
{

File diff suppressed because it is too large Load Diff

View File

@ -64,6 +64,7 @@ enum
Event_SPITransfer,
Event_Div,
Event_Sqrt,
Event_DMA,
// DSi
Event_DSi_SDMMCTransfer,
@ -196,6 +197,8 @@ enum
Mem9_VRAM = 0x00000100,
Mem9_GBAROM = 0x00020000,
Mem9_GBARAM = 0x00040000,
Mem9_DCache = 0x40000000,
Mem9_Null = 0x80000000,
Mem7_BIOS = 0x00000001,
Mem7_MainRAM = 0x00000002,
@ -241,7 +244,8 @@ public: // TODO: Encapsulate the rest of these members
int ConsoleType;
int CurCPU;
u32 SchedListMask;
SchedEvent SchedList[Event_MAX] {};
u8 ARM9MemTimings[0x40000][8];
u32 ARM9Regions[0x40000];
@ -253,8 +257,11 @@ public: // TODO: Encapsulate the rest of these members
bool LagFrameFlag;
// no need to worry about those overflowing, they can keep going for atleast 4350 years
u64 ARM9Timestamp, ARM9Target;
u64 ARM9Timestamp, DMA9Timestamp, ARM9Target;
u64 ARM7Timestamp, ARM7Target;
u64 MainRAMTimestamp, MainRAMBurstStart;
u64 A9ContentionTS; bool ConTSLock;
u64 SysTimestamp;
u32 ARM9ClockShift;
u32 IME[2];
@ -272,11 +279,18 @@ public: // TODO: Encapsulate the rest of these members
alignas(u32) u8 ROMSeed0[2*8];
alignas(u32) u8 ROMSeed1[2*8];
u32 DMAReadHold[2];
u8 DMAsQueued[8];
u8 DMAQueuePtr;
bool MainRAMBork; // if a main ram read burst starts in the last 6 bytes of a 32 byte block, and then crosses the 32 byte boundary, the burst forcibly restarts
bool MainRAMLastAccess; // 0 == ARM9 | 1 == ARM7
bool DMALastWasMainRAM;
protected:
// These BIOS arrays should be declared *before* the component objects (JIT, SPI, etc.)
// so that they're initialized before the component objects' constructors run.
std::array<u8, ARM9BIOSSize> ARM9BIOS;
std::array<u8, ARM7BIOSSize> ARM7BIOS;
alignas(u32) std::array<u8, ARM9BIOSSize> ARM9BIOS;
alignas(u32) std::array<u8, ARM7BIOSSize> ARM7BIOS;
bool ARM9BIOSNative;
bool ARM7BIOSNative;
public: // TODO: Encapsulate the rest of these members
@ -311,6 +325,11 @@ public: // TODO: Encapsulate the rest of these members
GBACart::GBACartSlot GBACartSlot;
melonDS::GPU GPU;
melonDS::AREngine AREngine;
DMA DMAs[8];
#ifdef JIT_ENABLED
bool IsJITEnabled(){return EnableJIT;};
#endif
const u32 ARM7WRAMSize = 0x10000;
u8* ARM7WRAM;
@ -390,6 +409,10 @@ public: // TODO: Encapsulate the rest of these members
std::unique_ptr<GBACart::CartCommon> EjectGBACart() { return GBACartSlot.EjectCart(); }
void MainRAMHandleARM9();
void MainRAMHandleARM7();
bool MainRAMHandle();
u32 RunFrame();
bool IsRunning() const noexcept { return Running; }
@ -409,6 +432,7 @@ public: // TODO: Encapsulate the rest of these members
void UnregisterEventFuncs(u32 id);
void ScheduleEvent(u32 id, bool periodic, s32 delay, u32 funcid, u32 param);
void CancelEvent(u32 id);
void RunEventManual(u32 id);
void debug(u32 p);
@ -447,7 +471,7 @@ public: // TODO: Encapsulate the rest of these members
virtual void ARM9Write16(u32 addr, u16 val);
virtual void ARM9Write32(u32 addr, u32 val);
virtual bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region);
virtual bool ARM9GetMemRegion(const u32 addr, const bool write, MemRegion* region);
virtual u8 ARM7Read8(u32 addr);
virtual u16 ARM7Read16(u32 addr);
@ -488,8 +512,6 @@ public: // TODO: Encapsulate the rest of these members
private:
void InitTimings();
u32 SchedListMask;
u64 SysTimestamp;
u8 WRAMCnt;
u8 PostFlag9;
u8 PostFlag7;
@ -497,7 +519,6 @@ private:
u16 WifiWaitCnt;
u8 TimerCheckMask[2];
u64 TimerTimestamp[2];
DMA DMAs[8];
u32 DMA9Fill[4];
u16 IPCSync9, IPCSync7;
u16 IPCFIFOCnt9, IPCFIFOCnt7;
@ -525,6 +546,7 @@ private:
void HandleTimerOverflow(u32 tid);
u16 TimerGetCounter(u32 timer);
void TimerStart(u32 id, u16 cnt);
void QueueDMAs(u32 param);
void StartDiv();
void DivDone(u32 param);
void SqrtDone(u32 param);

View File

@ -1799,6 +1799,7 @@ void NDSCartSlot::ResetCart() noexcept
TransferDir = 0;
memset(TransferCmd.data(), 0, sizeof(TransferCmd));
TransferCmd[0] = 0xFF;
ROMTransferTime = -1;
if (Cart) Cart->Reset();
}
@ -1835,6 +1836,12 @@ void NDSCartSlot::ROMPrepareData(u32 param) noexcept
NDS.CheckDMAs(0, 0x05);
}
u32 NDSCartSlot::GetROMCnt() noexcept
{
NDS.RunEventManual(Event_ROMTransfer);
return ROMCnt;
}
void NDSCartSlot::WriteROMCnt(u32 val) noexcept
{
u32 xferstart = (val & ~ROMCnt) & (1<<31);
@ -1921,7 +1928,15 @@ void NDSCartSlot::WriteROMCnt(u32 val) noexcept
if (datasize == 0)
NDS.ScheduleEvent(Event_ROMTransfer, false, xfercycle*cmddelay, ROMTransfer_End, 0);
else
{
NDS.ScheduleEvent(Event_ROMTransfer, false, xfercycle*(cmddelay+4), ROMTransfer_PrepareData, 0);
u64 curts;
if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp;
else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift;
ROMTransferTime = (xfercycle*(cmddelay+8)) + curts;
}
}
void NDSCartSlot::AdvanceROMTransfer() noexcept
@ -1934,11 +1949,17 @@ void NDSCartSlot::AdvanceROMTransfer() noexcept
u32 delay = 4;
if (!(ROMCnt & (1<<30)))
{
if (!(TransferPos & 0x1FF))
if (!((TransferPos+4) & 0x1FF))
delay += ((ROMCnt >> 16) & 0x3F);
}
u64 curts;
if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp;
else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift;
NDS.ScheduleEvent(Event_ROMTransfer, false, ROMTransferTime-curts, ROMTransfer_PrepareData, 0);
NDS.ScheduleEvent(Event_ROMTransfer, false, xfercycle*delay, ROMTransfer_PrepareData, 0);
ROMTransferTime = (xfercycle*delay) + std::max(curts, ROMTransferTime);
}
else
ROMEndTransfer(0);
@ -1947,6 +1968,8 @@ void NDSCartSlot::AdvanceROMTransfer() noexcept
u32 NDSCartSlot::ReadROMData() noexcept
{
if (ROMCnt & (1<<30)) return 0;
NDS.RunEventManual(Event_ROMTransfer);
if (ROMCnt & (1<<23))
{
@ -1959,6 +1982,8 @@ u32 NDSCartSlot::ReadROMData() noexcept
void NDSCartSlot::WriteROMData(u32 val) noexcept
{
if (!(ROMCnt & (1<<30))) return;
NDS.RunEventManual(Event_ROMTransfer);
ROMData = val;

View File

@ -414,9 +414,11 @@ public:
[[nodiscard]] u8 GetROMCommand(u8 index) const noexcept { return ROMCommand[index]; }
void SetROMCommand(u8 index, u8 val) noexcept { ROMCommand[index] = val; }
[[nodiscard]] u32 GetROMCnt() const noexcept { return ROMCnt; }
[[nodiscard]] u32 GetROMCnt() noexcept;
[[nodiscard]] u16 GetSPICnt() const noexcept { return SPICnt; }
void SetSPICnt(u16 val) noexcept { SPICnt = val; }
private:
friend class CartCommon;
melonDS::NDS& NDS;
@ -441,6 +443,7 @@ private:
u64 Key2_X = 0;
u64 Key2_Y = 0;
u64 ROMTransferTime;
void Key1_Encrypt(u32* data) const noexcept;
void Key1_Decrypt(u32* data) const noexcept;

View File

@ -24,7 +24,7 @@
#include <stdio.h>
#include "types.h"
#define SAVESTATE_MAJOR 12
#define SAVESTATE_MAJOR 13
#define SAVESTATE_MINOR 1
namespace melonDS