3227 lines
107 KiB
C++
3227 lines
107 KiB
C++
/*
|
|
Copyright 2016-2024 melonDS team
|
|
|
|
This file is part of melonDS.
|
|
|
|
melonDS is free software: you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation, either version 3 of the License, or (at your option)
|
|
any later version.
|
|
|
|
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with melonDS. If not, see http://www.gnu.org/licenses/.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#if defined(__x86_64__)
|
|
#include <emmintrin.h>
|
|
#elif defined(__ARM_NEON)
|
|
#include <arm_neon.h>
|
|
#endif
|
|
#include "NDS.h"
|
|
#include "DSi.h"
|
|
#include "ARM.h"
|
|
#include "Platform.h"
|
|
#include "ARMJIT_Memory.h"
|
|
#include "ARMJIT.h"
|
|
#include "CP15_Constants.h"
|
|
|
|
namespace melonDS
|
|
{
|
|
using Platform::Log;
|
|
using Platform::LogLevel;
|
|
|
|
// access timing for cached regions
|
|
// this would be an average between cache hits and cache misses
|
|
// this was measured to be close to hardware average
|
|
// a value of 1 would represent a perfect cache, but that causes
|
|
// games to run too fast, causing a number of issues
|
|
const int kDataCacheTiming = 3;//2;
|
|
const int kCodeCacheTiming = 3;//5;
|
|
|
|
|
|
void ARMv5::CP15Reset()
|
|
{
|
|
CP15Control = 0x2078; // dunno
|
|
|
|
RNGSeed = 44203;
|
|
|
|
// Memory Regions Protection
|
|
PU_CodeRW = 0;
|
|
PU_DataRW = 0;
|
|
|
|
memset(PU_Region, 0, CP15_REGION_COUNT*sizeof(*PU_Region));
|
|
|
|
// TCM-Settings
|
|
DTCMSetting = 0;
|
|
ITCMSetting = 0;
|
|
|
|
memset(ITCM, 0, ITCMPhysicalSize);
|
|
memset(DTCM, 0, DTCMPhysicalSize);
|
|
|
|
// Cache Settings
|
|
PU_CodeCacheable = 0;
|
|
PU_DataCacheable = 0;
|
|
PU_WriteBufferability = 0;
|
|
|
|
ICacheLockDown = 0;
|
|
DCacheLockDown = 0;
|
|
|
|
memset(ICache, 0, ICACHE_SIZE);
|
|
ICacheInvalidateAll();
|
|
ICacheCount = 0;
|
|
|
|
memset(DCache, 0, DCACHE_SIZE);
|
|
DCacheInvalidateAll();
|
|
DCacheCount = 0;
|
|
|
|
// Debug / Misc Registers
|
|
CacheDebugRegisterIndex = 0;
|
|
CP15BISTTestStateRegister = 0;
|
|
CP15TraceProcessId = 0;
|
|
|
|
// And now Update the internal state
|
|
UpdateDTCMSetting();
|
|
UpdateITCMSetting();
|
|
UpdatePURegions(true);
|
|
}
|
|
|
|
void ARMv5::CP15DoSavestate(Savestate* file)
|
|
{
|
|
file->Section("CP15");
|
|
|
|
file->Var32(&CP15Control);
|
|
|
|
file->Var32(&DTCMSetting);
|
|
file->Var32(&ITCMSetting);
|
|
|
|
file->VarArray(ITCM, ITCMPhysicalSize);
|
|
file->VarArray(DTCM, DTCMPhysicalSize);
|
|
|
|
file->VarArray(ICache, sizeof(ICache));
|
|
file->VarArray(ICacheTags, sizeof(ICacheTags));
|
|
file->Var8(&ICacheCount);
|
|
|
|
file->VarArray(DCache, sizeof(DCache));
|
|
file->VarArray(DCacheTags, sizeof(DCacheTags));
|
|
file->Var8(&DCacheCount);
|
|
|
|
file->Var32(&DCacheLockDown);
|
|
file->Var32(&ICacheLockDown);
|
|
file->Var32(&CacheDebugRegisterIndex);
|
|
file->Var32(&CP15TraceProcessId);
|
|
file->Var32(&CP15BISTTestStateRegister);
|
|
|
|
file->Var32(&PU_CodeCacheable);
|
|
file->Var32(&PU_DataCacheable);
|
|
file->Var32(&PU_WriteBufferability);
|
|
|
|
file->Var32(&PU_CodeRW);
|
|
file->Var32(&PU_DataRW);
|
|
|
|
file->VarArray(PU_Region, CP15_REGION_COUNT*sizeof(u32));
|
|
|
|
if (!file->Saving)
|
|
{
|
|
UpdateDTCMSetting();
|
|
UpdateITCMSetting();
|
|
UpdatePURegions(true);
|
|
}
|
|
}
|
|
|
|
|
|
void ARMv5::UpdateDTCMSetting()
|
|
{
|
|
u32 newDTCMBase;
|
|
u32 newDTCMMask;
|
|
u32 newDTCMSize;
|
|
|
|
if (CP15Control & CP15_TCM_CR_DTCM_ENABLE)
|
|
{
|
|
newDTCMSize = CP15_DTCM_SIZE_BASE << ((DTCMSetting & CP15_DTCM_SIZE_MASK) >> CP15_DTCM_SIZE_POS);
|
|
if (newDTCMSize < (CP15_DTCM_SIZE_BASE << CP15_DTCM_SIZE_MIN))
|
|
newDTCMSize = CP15_DTCM_SIZE_BASE << CP15_DTCM_SIZE_MIN;
|
|
|
|
newDTCMMask = CP15_DTCM_BASE_MASK & ~(newDTCMSize-1);
|
|
newDTCMBase = DTCMSetting & newDTCMMask;
|
|
}
|
|
else
|
|
{
|
|
// DTCM Disabled
|
|
newDTCMSize = 0;
|
|
newDTCMBase = 0xFFFFFFFF;
|
|
newDTCMMask = 0;
|
|
}
|
|
|
|
if (newDTCMBase != DTCMBase || newDTCMMask != DTCMMask)
|
|
{
|
|
NDS.JIT.Memory.RemapDTCM(newDTCMBase, newDTCMSize);
|
|
DTCMBase = newDTCMBase;
|
|
DTCMMask = newDTCMMask;
|
|
}
|
|
}
|
|
|
|
void ARMv5::UpdateITCMSetting()
|
|
{
|
|
if (CP15Control & CP15_TCM_CR_ITCM_ENABLE)
|
|
{
|
|
ITCMSize = CP15_ITCM_SIZE_BASE << ((ITCMSetting & CP15_ITCM_SIZE_MASK) >> CP15_ITCM_SIZE_POS);
|
|
#ifdef JIT_ENABLED
|
|
FastBlockLookupSize = 0;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
ITCMSize = 0;
|
|
}
|
|
}
|
|
|
|
|
|
// covers updates to a specific PU region's cache/etc settings
|
|
// (not to the region range/enabled status)
|
|
void ARMv5::UpdatePURegion(const u32 n)
|
|
{
|
|
if (!(CP15Control & CP15_CR_MPUENABLE))
|
|
return;
|
|
|
|
if (n >= CP15_REGION_COUNT)
|
|
return;
|
|
|
|
u32 coderw = (PU_CodeRW >> (CP15_REGIONACCESS_BITS_PER_REGION * n)) & CP15_REGIONACCESS_REGIONMASK;
|
|
u32 datarw = (PU_DataRW >> (CP15_REGIONACCESS_BITS_PER_REGION * n)) & CP15_REGIONACCESS_REGIONMASK;
|
|
|
|
bool codecache, datacache, datawrite;
|
|
|
|
// datacache/datawrite
|
|
// 0/0: goes directly to memory
|
|
// 0/1: goes to write buffer
|
|
// 1/0: goes to write buffer and cache
|
|
// 1/1: goes to cache
|
|
|
|
if (CP15Control & CP15_CACHE_CR_ICACHEENABLE)
|
|
codecache = (PU_CodeCacheable >> n) & 0x1;
|
|
else
|
|
codecache = false;
|
|
|
|
if (CP15Control & CP15_CACHE_CR_DCACHEENABLE)
|
|
{
|
|
datacache = (PU_DataCacheable >> n) & 0x1;
|
|
}
|
|
else
|
|
{
|
|
datacache = false;
|
|
}
|
|
|
|
datawrite = (PU_WriteBufferability >> n) & 0x1;
|
|
|
|
u32 rgn = PU_Region[n];
|
|
if (!(rgn & CP15_REGION_ENABLE))
|
|
{
|
|
return;
|
|
}
|
|
|
|
// notes:
|
|
// * min size of a pu region is 4KiB (12 bits)
|
|
// * size is calculated as size + 1, but the 12 lsb of address space are ignored, therefore we need it as size + 1 - 12, or size - 11
|
|
// * pu regions are aligned based on their size
|
|
u32 size = std::max((int)((rgn>>1) & 0x1F) - 11, 0); // obtain the size, subtract 11 and clamp to a min of 0.
|
|
u32 start = ((rgn >> 12) >> size) << size; // determine the start offset, and use shifts to force alignment with a multiple of the size.
|
|
u32 end = start + (1<<size); // add 1 left shifted by size to start to determine end point
|
|
// dont need to bounds check the end point because the force alignment inherently prevents it from breaking
|
|
|
|
u8 usermask = CP15_MAP_NOACCESS;
|
|
u8 privmask = CP15_MAP_NOACCESS;
|
|
|
|
switch (datarw)
|
|
{
|
|
case 0: break;
|
|
case 1: privmask |= CP15_MAP_READABLE | CP15_MAP_WRITEABLE; break;
|
|
case 2: privmask |= CP15_MAP_READABLE | CP15_MAP_WRITEABLE; usermask |= CP15_MAP_READABLE; break;
|
|
case 3: privmask |= CP15_MAP_READABLE | CP15_MAP_WRITEABLE; usermask |= CP15_MAP_READABLE | CP15_MAP_WRITEABLE; break;
|
|
case 5: privmask |= CP15_MAP_READABLE; break;
|
|
case 6: privmask |= CP15_MAP_READABLE; usermask |= CP15_MAP_READABLE; break;
|
|
default: Log(LogLevel::Warn, "!! BAD DATARW VALUE %d\n", datarw & ((1 << CP15_REGIONACCESS_BITS_PER_REGION)-1));
|
|
}
|
|
|
|
switch (coderw)
|
|
{
|
|
case 0: break;
|
|
case 1: privmask |= CP15_MAP_EXECUTABLE; break;
|
|
case 2: privmask |= CP15_MAP_EXECUTABLE; usermask |= CP15_MAP_EXECUTABLE; break;
|
|
case 3: privmask |= CP15_MAP_EXECUTABLE; usermask |= CP15_MAP_EXECUTABLE; break;
|
|
case 5: privmask |= CP15_MAP_EXECUTABLE; break;
|
|
case 6: privmask |= CP15_MAP_EXECUTABLE; usermask |= CP15_MAP_EXECUTABLE; break;
|
|
default: Log(LogLevel::Warn, "!! BAD CODERW VALUE %d\n", datarw & ((1 << CP15_REGIONACCESS_BITS_PER_REGION)-1));
|
|
}
|
|
|
|
if (datacache)
|
|
{
|
|
privmask |= 0x10;
|
|
usermask |= 0x10;
|
|
}
|
|
|
|
if (datawrite & 0x1)
|
|
{
|
|
privmask |= 0x20;
|
|
usermask |= 0x20;
|
|
}
|
|
|
|
if (codecache)
|
|
{
|
|
privmask |= CP15_MAP_ICACHEABLE;
|
|
usermask |= CP15_MAP_ICACHEABLE;
|
|
}
|
|
|
|
Log(
|
|
LogLevel::Debug,
|
|
"PU region %d: %08X-%08X, user=%02X priv=%02X, %08X/%08X\n",
|
|
n,
|
|
start << CP15_MAP_ENTRYSIZE_LOG2,
|
|
(end << CP15_MAP_ENTRYSIZE_LOG2) - 1,
|
|
usermask,
|
|
privmask,
|
|
PU_DataRW,
|
|
PU_CodeRW
|
|
);
|
|
|
|
for (u32 i = start; i < end; i++)
|
|
{
|
|
PU_UserMap[i] = usermask;
|
|
PU_PrivMap[i] = privmask;
|
|
}
|
|
}
|
|
|
|
void ARMv5::UpdatePURegions(const bool update_all)
|
|
{
|
|
if (!(CP15Control & CP15_CR_MPUENABLE))
|
|
{
|
|
// PU disabled
|
|
|
|
u8 mask = CP15_MAP_READABLE | CP15_MAP_WRITEABLE | CP15_MAP_EXECUTABLE;
|
|
|
|
memset(PU_UserMap, mask, CP15_MAP_ENTRYCOUNT);
|
|
memset(PU_PrivMap, mask, CP15_MAP_ENTRYCOUNT);
|
|
|
|
return;
|
|
}
|
|
|
|
if (update_all)
|
|
{
|
|
memset(PU_UserMap, CP15_MAP_NOACCESS, CP15_MAP_ENTRYCOUNT);
|
|
memset(PU_PrivMap, CP15_MAP_NOACCESS, CP15_MAP_ENTRYCOUNT);
|
|
}
|
|
|
|
for (int n = 0; n < CP15_REGION_COUNT; n++)
|
|
{
|
|
UpdatePURegion(n);
|
|
}
|
|
|
|
// TODO: throw exception if the region we're running in has become non-executable, I guess
|
|
}
|
|
|
|
void ARMv5::UpdateRegionTimings(u32 addrstart, u32 addrend)
|
|
{
|
|
for (u32 i = addrstart; i < addrend; i++)
|
|
{
|
|
u8* bustimings = NDS.ARM9MemTimings[i];
|
|
|
|
if (NDS.ARM9ClockShift == 1)
|
|
{
|
|
MemTimings[i][0] = (bustimings[0] << NDS.ARM9ClockShift) - 1;
|
|
MemTimings[i][1] = (bustimings[2] << NDS.ARM9ClockShift) - 1;
|
|
MemTimings[i][2] = bustimings[3] << NDS.ARM9ClockShift; // sequentials technically should probably be -1 as well?
|
|
// but it doesn't really matter as long as i also dont force align the start of sequential accesses, now does it?
|
|
}
|
|
else
|
|
{
|
|
if (NDS.ARM9Regions[i] != Mem9_MainRAM)
|
|
{
|
|
// 133MHz clock has 1 less bus cycle penalty on ns accesses
|
|
MemTimings[i][0] = ((bustimings[0] - 1) << NDS.ARM9ClockShift) - 1;
|
|
MemTimings[i][1] = ((bustimings[2] - 1) << NDS.ARM9ClockShift) - 1;
|
|
}
|
|
else
|
|
{
|
|
// we handle the different timings for main ram in the read/write functions (they're slightly more complicated...)
|
|
MemTimings[i][0] = (bustimings[0] << NDS.ARM9ClockShift) - 1;
|
|
MemTimings[i][1] = (bustimings[2] << NDS.ARM9ClockShift) - 1;
|
|
}
|
|
MemTimings[i][2] = bustimings[3] << NDS.ARM9ClockShift;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
u32 ARMv5::RandomLineIndex()
|
|
{
|
|
// lame RNG, but good enough for this purpose
|
|
u32 s = RNGSeed;
|
|
RNGSeed ^= (s*17);
|
|
RNGSeed ^= (s*7);
|
|
|
|
return (RNGSeed >> 17) & 0x3;
|
|
}
|
|
|
|
bool ARMv5::ICacheLookup(const u32 addr)
|
|
{
|
|
const u32 tag = (addr & ~(ICACHE_LINELENGTH - 1));
|
|
const u32 id = ((addr >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1)) << ICACHE_SETS_LOG2;
|
|
|
|
#if defined(__x86_64__)
|
|
// we use sse here to greatly speed up checking for valid sets vs the fallback for loop
|
|
|
|
__m128i tags; memcpy(&tags, &ICacheTags[id], 16); // load the tags for all 4 sets, one for each 32 bits
|
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)); // load copies of the mask into each 32 bits
|
|
__m128i cmp = _mm_set1_epi32(tag | CACHE_FLAG_VALID); // load the tag we're checking for into each 32 bit
|
|
tags = _mm_and_si128(tags, mask); // mask out the bits we dont want to check for
|
|
cmp = _mm_cmpeq_epi32(tags, cmp); // compare to see if any bits match; sets all bits of each value to either 0 or 1 depending on the result
|
|
u32 set = _mm_movemask_ps(_mm_castsi128_ps(cmp)); // move the "sign bits" of each field into the low 4 bits of a 32 bit integer
|
|
|
|
if (!set) goto miss; // check if none of them were a match
|
|
else set = __builtin_ctz(set); // count trailing zeros and right shift to figure out which set had a match
|
|
|
|
{
|
|
#elif defined(__ARM_NEON)
|
|
uint32x4_t tags = { ICacheTags[id+0], ICacheTags[id+1], ICacheTags[id+2], ICacheTags[id+3] }; // load tags
|
|
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
|
uint32x4_t cmp = { tag | CACHE_FLAG_VALID,
|
|
tag | CACHE_FLAG_VALID,
|
|
tag | CACHE_FLAG_VALID,
|
|
tag | CACHE_FLAG_VALID }; // load tag and flag we're checking for
|
|
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
|
cmp = vceqq_u32(tags, cmp);
|
|
uint16x4_t res = vmovn_u32(cmp);
|
|
u64 set; memcpy(&set, &res, 4);
|
|
|
|
if (!set) goto miss;
|
|
else set = __builtin_ctz(set) >> 4;
|
|
|
|
{
|
|
#else
|
|
// fallback for loop; slow
|
|
for (int set = 0; set < ICACHE_SETS; set++)
|
|
{
|
|
if ((ICacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == (tag | CACHE_FLAG_VALID))
|
|
#endif
|
|
{
|
|
u32 *cacheLine = (u32 *)&ICache[(id+set) << ICACHE_LINELENGTH_LOG2];
|
|
|
|
if (ICacheStreamPtr >= 7)
|
|
{
|
|
if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp; // does this apply to streamed fetches?
|
|
NDS.ARM9Timestamp++;
|
|
}
|
|
else
|
|
{
|
|
u64 nextfill = ICacheStreamTimes[ICacheStreamPtr++];
|
|
if (NDS.ARM9Timestamp < nextfill)
|
|
{
|
|
NDS.ARM9Timestamp = nextfill;
|
|
}
|
|
else
|
|
{
|
|
u64 fillend = ICacheStreamTimes[6] + 2;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend;
|
|
else // checkme
|
|
{
|
|
if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp;
|
|
NDS.ARM9Timestamp++;
|
|
}
|
|
ICacheStreamPtr = 7;
|
|
}
|
|
}
|
|
if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory;
|
|
DataRegion = Mem9_Null;
|
|
Store = false;
|
|
|
|
RetVal = cacheLine[(addr & (ICACHE_LINELENGTH -1)) / 4];
|
|
QueueFunction(DelayedQueue);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// cache miss
|
|
miss:
|
|
// We do not fill the cacheline if it is disabled in the
|
|
// BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
|
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_LINEFILL) [[unlikely]]
|
|
return false;
|
|
|
|
WriteBufferDrain();
|
|
FetchAddr[16] = addr;
|
|
QueueFunction(&ARMv5::ICacheLookup_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::ICacheLookup_2()
|
|
{
|
|
u32 addr = FetchAddr[16];
|
|
const u32 tag = (addr & ~(ICACHE_LINELENGTH - 1));
|
|
const u32 id = ((addr >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1)) << ICACHE_SETS_LOG2;
|
|
|
|
u32 line;
|
|
|
|
if (CP15Control & CP15_CACHE_CR_ROUNDROBIN) [[likely]]
|
|
{
|
|
line = ICacheCount;
|
|
ICacheCount = (line+1) & (ICACHE_SETS-1);
|
|
}
|
|
else
|
|
{
|
|
line = RandomLineIndex();
|
|
}
|
|
|
|
if (ICacheLockDown)
|
|
{
|
|
if (ICacheLockDown & CACHE_LOCKUP_L) [[unlikely]]
|
|
{
|
|
// load into locked up cache
|
|
// into the selected set
|
|
line = ICacheLockDown & (ICACHE_SETS-1);
|
|
} else
|
|
{
|
|
u8 minSet = ICacheLockDown & (ICACHE_SETS-1);
|
|
line = line | minSet;
|
|
}
|
|
}
|
|
|
|
line += id;
|
|
|
|
u32* ptr = (u32 *)&ICache[line << ICACHE_LINELENGTH_LOG2];
|
|
|
|
// bus reads can only overlap with dcache streaming by 6 cycles
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 time = DCacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
ICacheTags[line] = tag | (line & (ICACHE_SETS-1)) | CACHE_FLAG_VALID;
|
|
|
|
// timing logic
|
|
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if (NDS.ARM9Regions[addr>>14] == Mem9_MainRAM)
|
|
{
|
|
MRTrack.Type = MainRAMType::ICacheStream;
|
|
MRTrack.Var = line;
|
|
FetchAddr[16] = addr & ~3;
|
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_STREAMING) [[unlikely]]
|
|
ICacheStreamPtr = 7;
|
|
else ICacheStreamPtr = (addr & 0x1F) / 4;
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < ICACHE_LINELENGTH; i+=sizeof(u32))
|
|
ptr[i/4] = NDS.ARM9Read32(tag+i);
|
|
|
|
if (((NDS.ARM9Timestamp <= WBReleaseTS) && (NDS.ARM9Regions[addr>>14] == WBLastRegion)) // check write buffer
|
|
|| (Store && (NDS.ARM9Regions[addr>>14] == DataRegion))) //check the actual store
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
// Disabled ICACHE Streaming:
|
|
// Wait until the entire cache line is filled before continuing with execution
|
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_STREAMING) [[unlikely]]
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[tag >> 14][1] + (MemTimings[tag >> 14][2] * ((DCACHE_LINELENGTH / 4) - 1));
|
|
if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory; // this should never trigger in practice
|
|
}
|
|
else // ICache Streaming logic
|
|
{
|
|
u8 ns = MemTimings[addr>>14][1];
|
|
u8 seq = MemTimings[addr>>14][2];
|
|
|
|
u8 linepos = (addr & 0x1F) / 4; // technically this is one too low, but we want that actually
|
|
|
|
u64 cycles = ns + (seq * linepos);
|
|
NDS.ARM9Timestamp = cycles += NDS.ARM9Timestamp;
|
|
|
|
ICacheStreamPtr = linepos;
|
|
for (int i = linepos; i < 7; i++)
|
|
{
|
|
cycles += seq;
|
|
ICacheStreamTimes[i] = cycles;
|
|
}
|
|
}
|
|
RetVal = ptr[(addr & (ICACHE_LINELENGTH-1)) / 4];
|
|
}
|
|
Store = false;
|
|
DataRegion = Mem9_Null;
|
|
QueueFunction(DelayedQueue);
|
|
}
|
|
|
|
void ARMv5::ICacheInvalidateByAddr(const u32 addr)
|
|
{
|
|
const u32 tag = (addr & ~(ICACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
|
const u32 id = ((addr >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1)) << ICACHE_SETS_LOG2;
|
|
|
|
for (int set = 0; set < ICACHE_SETS; set++)
|
|
{
|
|
if ((ICacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
|
{
|
|
ICacheTags[id+set] &= ~CACHE_FLAG_VALID;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void ARMv5::ICacheInvalidateBySetAndWay(const u8 cacheSet, const u8 cacheLine)
|
|
{
|
|
if (cacheSet >= ICACHE_SETS)
|
|
return;
|
|
if (cacheLine >= ICACHE_LINESPERSET)
|
|
return;
|
|
|
|
u32 idx = (cacheLine << ICACHE_SETS_LOG2) + cacheSet;
|
|
ICacheTags[idx] &= ~CACHE_FLAG_VALID;
|
|
}
|
|
|
|
|
|
void ARMv5::ICacheInvalidateAll()
|
|
{
|
|
#pragma GCC ivdep
|
|
for (int i = 0; i < ICACHE_SIZE / ICACHE_LINELENGTH; i++)
|
|
ICacheTags[i] &= ~CACHE_FLAG_VALID;
|
|
}
|
|
|
|
bool ARMv5::IsAddressICachable(const u32 addr) const
|
|
{
|
|
return PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_ICACHEABLE;
|
|
}
|
|
|
|
bool ARMv5::DCacheLookup(const u32 addr)
|
|
{
|
|
//Log(LogLevel::Debug,"DCache load @ %08x\n", addr);
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1));
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
|
|
#if defined(__x86_64__)
|
|
// we use sse here to greatly speed up checking for valid sets vs the fallback for loop
|
|
|
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16); // load the tags for all 4 sets, one for each 32 bits
|
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)); // load copies of the mask into each 32 bits
|
|
__m128i cmp = _mm_set1_epi32(tag | CACHE_FLAG_VALID); // load the tag we're checking for into each 32 bit
|
|
tags = _mm_and_si128(tags, mask); // mask out the bits we dont want to check for
|
|
cmp = _mm_cmpeq_epi32(tags, cmp); // compare to see if any bits match; sets all bits of each value to either 0 or 1 depending on the result
|
|
u32 set = _mm_movemask_ps(_mm_castsi128_ps(cmp)); // move the "sign bits" of each field into the low 4 bits of a 32 bit integer
|
|
|
|
if (!set) goto miss; // check if none of them were a match
|
|
else set = __builtin_ctz(set); // count trailing zeros and right shift to figure out which set had a match
|
|
|
|
{
|
|
#elif defined(__ARM_NEON)
|
|
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
|
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
|
uint32x4_t cmp = { tag | CACHE_FLAG_VALID,
|
|
tag | CACHE_FLAG_VALID,
|
|
tag | CACHE_FLAG_VALID,
|
|
tag | CACHE_FLAG_VALID }; // load tag and flag we're checking for
|
|
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
|
cmp = vceqq_u32(tags, cmp);
|
|
uint16x4_t res = vmovn_u32(cmp);
|
|
u64 set; memcpy(&set, &res, 4);
|
|
|
|
if (!set) goto miss;
|
|
else set = __builtin_ctz(set) >> 4;
|
|
|
|
{
|
|
#else
|
|
// fallback for loop; slow
|
|
for (int set = 0; set < DCACHE_SETS; set++)
|
|
{
|
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == (tag | CACHE_FLAG_VALID))
|
|
#endif
|
|
{
|
|
u32 *cacheLine = (u32 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
|
|
|
if (DCacheStreamPtr >= 7)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
}
|
|
else
|
|
{
|
|
u64 nextfill = DCacheStreamTimes[DCacheStreamPtr++];
|
|
//if (NDS.ARM9Timestamp < nextfill) // can this ever really fail?
|
|
{
|
|
DataCycles = nextfill - NDS.ARM9Timestamp;
|
|
if (DataCycles > (3<<NDS.ARM9ClockShift)) DataCycles = 3<<NDS.ARM9ClockShift;
|
|
NDS.ARM9Timestamp = nextfill;
|
|
}
|
|
/*else
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 2;
|
|
if (NDS.ARM9Timestamp < fillend) DataCycles = fillend - NDS.ARM9Timestamp;
|
|
else DataCycles = 1;
|
|
DCacheStreamPtr = 7;
|
|
}*/
|
|
}
|
|
DataRegion = Mem9_DCache;
|
|
//Log(LogLevel::Debug, "DCache hit at %08lx returned %08x from set %i, line %i\n", addr, cacheLine[(addr & (DCACHE_LINELENGTH -1)) >> 2], set, id>>2);
|
|
RetVal = cacheLine[(addr & (DCACHE_LINELENGTH -1)) >> 2];
|
|
(this->*DelayedQueue)();
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
// cache miss
|
|
miss:
|
|
// We do not fill the cacheline if it is disabled in the
|
|
// BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
|
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_LINEFILL) [[unlikely]]
|
|
return false;
|
|
WriteBufferDrain(); // checkme?
|
|
|
|
FetchAddr[16] = addr;
|
|
QueueFunction(&ARMv5::DCacheLookup_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DCacheLookup_2()
|
|
{
|
|
u32 addr = FetchAddr[16];
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1));
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
u32 line;
|
|
|
|
if (CP15Control & CP15_CACHE_CR_ROUNDROBIN) [[likely]]
|
|
{
|
|
line = DCacheCount;
|
|
DCacheCount = (line+1) & (DCACHE_SETS-1);
|
|
}
|
|
else
|
|
{
|
|
line = RandomLineIndex();
|
|
}
|
|
|
|
if (DCacheLockDown)
|
|
{
|
|
if (DCacheLockDown & CACHE_LOCKUP_L) [[unlikely]]
|
|
{
|
|
// load into locked up cache
|
|
// into the selected set
|
|
line = DCacheLockDown & (DCACHE_SETS-1);
|
|
} else
|
|
{
|
|
u8 minSet = DCacheLockDown & (DCACHE_SETS-1);
|
|
line = line | minSet;
|
|
}
|
|
}
|
|
line += id;
|
|
|
|
#if !DISABLE_CACHEWRITEBACK
|
|
// Before we fill the cacheline, we need to write back dirty content
|
|
// Datacycles will be incremented by the required cycles to do so
|
|
DCacheClearByASetAndWay(line & (DCACHE_SETS-1), line >> DCACHE_SETS_LOG2);
|
|
#endif
|
|
|
|
QueuedDCacheLine = line;
|
|
QueueFunction(&ARMv5::DCacheLookup_3);
|
|
}
|
|
|
|
void ARMv5::DCacheLookup_3()
|
|
{
|
|
u32 addr = FetchAddr[16];
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1));
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
u32 line = QueuedDCacheLine;
|
|
u32* ptr = (u32 *)&DCache[line << DCACHE_LINELENGTH_LOG2];
|
|
DCacheTags[line] = tag | (line & (DCACHE_SETS-1)) | CACHE_FLAG_VALID;
|
|
|
|
// timing logic
|
|
|
|
if (NDS.ARM9Regions[addr>>14] == Mem9_MainRAM)
|
|
{
|
|
MRTrack.Type = MainRAMType::DCacheStream;
|
|
MRTrack.Var = line;
|
|
|
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_STREAMING) [[unlikely]]
|
|
DCacheStreamPtr = 7;
|
|
else DCacheStreamPtr = (addr & 0x1F) / 4;
|
|
|
|
QueueFunction(DelayedQueue);
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < DCACHE_LINELENGTH; i+=sizeof(u32))
|
|
{
|
|
ptr[i >> 2] = BusRead32(tag+i);
|
|
}
|
|
// Disabled DCACHE Streaming:
|
|
// Wait until the entire cache line is filled before continuing with execution
|
|
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_STREAMING) [[unlikely]]
|
|
{
|
|
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
NDS.ARM9Timestamp += MemTimings[tag >> 14][1] + (MemTimings[tag >> 14][2] * ((DCACHE_LINELENGTH / 4) - 1));
|
|
DataCycles = MemTimings[tag>>14][2];
|
|
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
if (((NDS.ARM9Timestamp <= WBReleaseTS) && (NDS.ARM9Regions[addr>>14] == WBLastRegion)) // check write buffer
|
|
|| (Store && (NDS.ARM9Regions[addr>>14] == DataRegion))) //check the actual store
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
}
|
|
else // DCache Streaming logic
|
|
{
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
u8 ns = MemTimings[addr>>14][1];
|
|
u8 seq = MemTimings[addr>>14][2];
|
|
|
|
u8 linepos = (addr & 0x1F) >> 2; // technically this is one too low, but we want that actually
|
|
|
|
u64 cycles = ns + (seq * linepos);
|
|
DataCycles = 3<<NDS.ARM9ClockShift;
|
|
NDS.ARM9Timestamp += DataCycles;
|
|
cycles = NDS.ARM9Timestamp;
|
|
|
|
DCacheStreamPtr = linepos;
|
|
for (int i = linepos; i < 7; i++)
|
|
{
|
|
cycles += seq;
|
|
DCacheStreamTimes[i] = cycles;
|
|
}
|
|
}
|
|
RetVal = ptr[(addr & (DCACHE_LINELENGTH-1)) >> 2];
|
|
(this->*DelayedQueue)();
|
|
}
|
|
}
|
|
|
|
bool ARMv5::DCacheWrite32(const u32 addr, const u32 val)
|
|
{
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
|
|
//Log(LogLevel::Debug, "Cache write 32: %08lx <= %08lx\n", addr, val);
|
|
|
|
#if defined(__x86_64__)
|
|
// we use sse here to greatly speed up checking for valid sets vs the fallback for loop
|
|
|
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16); // load the tags for all 4 sets, one for each 32 bits
|
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)); // load copies of the mask into each 32 bits
|
|
__m128i cmp = _mm_set1_epi32(tag); // load the tag we're checking for into each 32 bit
|
|
tags = _mm_and_si128(tags, mask); // mask out the bits we dont want to check for
|
|
cmp = _mm_cmpeq_epi32(tags, cmp); // compare to see if any bits match; sets all bits of each value to either 0 or 1 depending on the result
|
|
u32 set = _mm_movemask_ps(_mm_castsi128_ps(cmp)); // move the "sign bits" of each field into the low 4 bits of a 32 bit integer
|
|
|
|
if (!set) return false; // check if none of them were a match
|
|
else set = __builtin_ctz(set); // count trailing zeros and right shift to figure out which set had a match
|
|
|
|
{
|
|
#elif defined(__ARM_NEON)
|
|
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
|
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
|
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
|
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
|
cmp = vceqq_u32(tags, cmp);
|
|
uint16x4_t res = vmovn_u32(cmp);
|
|
u64 set; memcpy(&set, &res, 4);
|
|
|
|
if (!set) return false;
|
|
else set = __builtin_ctz(set) >> 4;
|
|
|
|
{
|
|
#else
|
|
// fallback for loop; slow
|
|
for (int set = 0; set < DCACHE_SETS; set++)
|
|
{
|
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
|
#endif
|
|
{
|
|
u32 *cacheLine = (u32 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
|
cacheLine[(addr & (DCACHE_LINELENGTH-1)) >> 2] = val;
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DCache;
|
|
#if !DISABLE_CACHEWRITEBACK
|
|
if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_BUFFERABLE)
|
|
{
|
|
if (addr & (DCACHE_LINELENGTH / 2))
|
|
{
|
|
DCacheTags[id+set] |= CACHE_FLAG_DIRTY_UPPERHALF;
|
|
}
|
|
else
|
|
{
|
|
DCacheTags[id+set] |= CACHE_FLAG_DIRTY_LOWERHALF;
|
|
}
|
|
// just mark dirty and abort the data write through the bus
|
|
return true;
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool ARMv5::DCacheWrite16(const u32 addr, const u16 val)
|
|
{
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
//Log(LogLevel::Debug, "Cache write 16: %08lx <= %04x\n", addr, val);
|
|
|
|
#if defined(__x86_64__)
|
|
// we use sse here to greatly speed up checking for valid sets vs the fallback for loop
|
|
|
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16); // load the tags for all 4 sets, one for each 32 bits
|
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)); // load copies of the mask into each 32 bits
|
|
__m128i cmp = _mm_set1_epi32(tag); // load the tag we're checking for into each 32 bit
|
|
tags = _mm_and_si128(tags, mask); // mask out the bits we dont want to check for
|
|
cmp = _mm_cmpeq_epi32(tags, cmp); // compare to see if any bits match; sets all bits of each value to either 0 or 1 depending on the result
|
|
u32 set = _mm_movemask_ps(_mm_castsi128_ps(cmp)); // move the "sign bits" of each field into the low 4 bits of a 32 bit integer
|
|
|
|
if (!set) return false; // check if none of them were a match
|
|
else set = __builtin_ctz(set); // count trailing zeros and right shift to figure out which set had a match
|
|
|
|
{
|
|
#elif defined(__ARM_NEON)
|
|
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
|
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
|
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
|
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
|
cmp = vceqq_u32(tags, cmp);
|
|
uint16x4_t res = vmovn_u32(cmp);
|
|
u64 set; memcpy(&set, &res, 4);
|
|
|
|
if (!set) return false;
|
|
else set = __builtin_ctz(set) >> 4;
|
|
|
|
{
|
|
#else
|
|
// fallback for loop; slow
|
|
for (int set = 0; set < DCACHE_SETS; set++)
|
|
{
|
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
|
#endif
|
|
{
|
|
u16 *cacheLine = (u16 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
|
cacheLine[(addr & (DCACHE_LINELENGTH-1)) >> 1] = val;
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DCache;
|
|
#if !DISABLE_CACHEWRITEBACK
|
|
if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_BUFFERABLE)
|
|
{
|
|
if (addr & (DCACHE_LINELENGTH / 2))
|
|
{
|
|
DCacheTags[id+set] |= CACHE_FLAG_DIRTY_UPPERHALF;
|
|
}
|
|
else
|
|
{
|
|
DCacheTags[id+set] |= CACHE_FLAG_DIRTY_LOWERHALF;
|
|
}
|
|
// just mark dirtyand abort the data write through the bus
|
|
return true;
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool ARMv5::DCacheWrite8(const u32 addr, const u8 val)
|
|
{
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
|
|
//Log(LogLevel::Debug, "Cache write 8: %08lx <= %02x\n", addr, val);
|
|
|
|
#if defined(__x86_64__)
|
|
// we use sse here to greatly speed up checking for valid sets vs the fallback for loop
|
|
|
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16); // load the tags for all 4 sets, one for each 32 bits
|
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)); // load copies of the mask into each 32 bits
|
|
__m128i cmp = _mm_set1_epi32(tag); // load the tag we're checking for into each 32 bit
|
|
tags = _mm_and_si128(tags, mask); // mask out the bits we dont want to check for
|
|
cmp = _mm_cmpeq_epi32(tags, cmp); // compare to see if any bits match; sets all bits of each value to either 0 or 1 depending on the result
|
|
u32 set = _mm_movemask_ps(_mm_castsi128_ps(cmp)); // move the "sign bits" of each field into the low 4 bits of a 32 bit integer
|
|
|
|
if (!set) return false; // check if none of them were a match
|
|
else set = __builtin_ctz(set); // count trailing zeros and right shift to figure out which set had a match
|
|
|
|
{
|
|
#elif defined(__ARM_NEON)
|
|
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
|
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
|
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
|
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
|
cmp = vceqq_u32(tags, cmp);
|
|
uint16x4_t res = vmovn_u32(cmp);
|
|
u64 set; memcpy(&set, &res, 4);
|
|
|
|
if (!set) return false;
|
|
else set = __builtin_ctz(set) >> 4;
|
|
|
|
{
|
|
#else
|
|
// fallback for loop; slow
|
|
for (int set = 0; set < DCACHE_SETS; set++)
|
|
{
|
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
|
#endif
|
|
{
|
|
u8 *cacheLine = &DCache[(id+set) << DCACHE_LINELENGTH_LOG2];
|
|
cacheLine[addr & (DCACHE_LINELENGTH-1)] = val;
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DCache;
|
|
#if !DISABLE_CACHEWRITEBACK
|
|
if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_BUFFERABLE)
|
|
{
|
|
if (addr & (DCACHE_LINELENGTH / 2))
|
|
{
|
|
DCacheTags[id+set] |= CACHE_FLAG_DIRTY_UPPERHALF;
|
|
}
|
|
else
|
|
{
|
|
DCacheTags[id+set] |= CACHE_FLAG_DIRTY_LOWERHALF;
|
|
}
|
|
|
|
// just mark dirty and abort the data write through the bus
|
|
return true;
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void ARMv5::DCacheInvalidateByAddr(const u32 addr)
|
|
{
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
|
|
#if defined(__x86_64__)
|
|
// we use sse here to greatly speed up checking for valid sets vs the fallback for loop
|
|
|
|
__m128i tags; memcpy(&tags, &DCacheTags[id], 16); // load the tags for all 4 sets, one for each 32 bits
|
|
__m128i mask = _mm_set1_epi32(~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)); // load copies of the mask into each 32 bits
|
|
__m128i cmp = _mm_set1_epi32(tag); // load the tag we're checking for into each 32 bit
|
|
tags = _mm_and_si128(tags, mask); // mask out the bits we dont want to check for
|
|
cmp = _mm_cmpeq_epi32(tags, cmp); // compare to see if any bits match; sets all bits of each value to either 0 or 1 depending on the result
|
|
u32 set = _mm_movemask_ps(_mm_castsi128_ps(cmp)); // move the "sign bits" of each field into the low 4 bits of a 32 bit integer
|
|
|
|
if (!set) return; // check if none of them were a match
|
|
else set = __builtin_ctz(set); // count trailing zeros and right shift to figure out which set had a match
|
|
|
|
{
|
|
#elif defined(__ARM_NEON)
|
|
uint32x4_t tags = { DCacheTags[id+0], DCacheTags[id+1], DCacheTags[id+2], DCacheTags[id+3] }; // load tags
|
|
uint32x4_t mask = { ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK),
|
|
~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK) }; // load mask
|
|
uint32x4_t cmp = { tag, tag, tag, tag }; // load tag and flag we're checking for
|
|
tags = vandq_u32(tags, mask); // mask out bits we dont wanna check for
|
|
cmp = vceqq_u32(tags, cmp);
|
|
uint16x4_t res = vmovn_u32(cmp);
|
|
u64 set; memcpy(&set, &res, 4);
|
|
|
|
if (!set) return;
|
|
else set = __builtin_ctz(set) >> 4;
|
|
|
|
{
|
|
#else
|
|
// fallback for loop; slow
|
|
for (int set = 0; set < DCACHE_SETS; set++)
|
|
{
|
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
|
#endif
|
|
{
|
|
//Log(LogLevel::Debug,"DCache invalidated %08lx\n", addr & ~(ICACHE_LINELENGTH-1));
|
|
DCacheTags[id+set] &= ~CACHE_FLAG_VALID;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void ARMv5::DCacheInvalidateBySetAndWay(const u8 cacheSet, const u8 cacheLine)
|
|
{
|
|
if (cacheSet >= DCACHE_SETS)
|
|
return;
|
|
if (cacheLine >= DCACHE_LINESPERSET)
|
|
return;
|
|
|
|
u32 idx = (cacheLine << DCACHE_SETS_LOG2) + cacheSet;
|
|
DCacheTags[idx] &= ~CACHE_FLAG_VALID;
|
|
}
|
|
|
|
|
|
void ARMv5::DCacheInvalidateAll()
|
|
{
|
|
#pragma GCC ivdep
|
|
for (int i = 0; i < DCACHE_SIZE / DCACHE_LINELENGTH; i++)
|
|
DCacheTags[i] &= ~CACHE_FLAG_VALID;
|
|
}
|
|
|
|
void ARMv5::DCacheClearAll()
|
|
{
|
|
#if !DISABLE_CACHEWRITEBACK
|
|
for (int set = 0; set < DCACHE_SETS; set++)
|
|
for (int line = 0; line <= DCACHE_LINESPERSET; line++)
|
|
DCacheClearByASetAndWay(set, line);
|
|
#endif
|
|
}
|
|
|
|
void ARMv5::DCacheClearByAddr(const u32 addr)
|
|
{
|
|
#if !DISABLE_CACHEWRITEBACK
|
|
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)) | CACHE_FLAG_VALID;
|
|
const u32 id = ((addr >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1)) << DCACHE_SETS_LOG2;
|
|
|
|
for (int set = 0; set < DCACHE_SETS; set++)
|
|
{
|
|
if ((DCacheTags[id+set] & ~(CACHE_FLAG_DIRTY_MASK | CACHE_FLAG_SET_MASK)) == tag)
|
|
{
|
|
DCacheClearByASetAndWay(set, id >> DCACHE_SETS_LOG2);
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void ARMv5::DCacheClearByASetAndWay(const u8 cacheSet, const u8 cacheLine)
|
|
{
|
|
#if !DISABLE_CACHEWRITEBACK
|
|
const u32 index = cacheSet | (cacheLine << DCACHE_SETS_LOG2);
|
|
|
|
// Only write back if valid
|
|
if (!(DCacheTags[index] & CACHE_FLAG_VALID))
|
|
return;
|
|
|
|
const u32 tag = DCacheTags[index] & ~CACHE_FLAG_MASK;
|
|
u32* ptr = (u32 *)&DCache[index << DCACHE_LINELENGTH_LOG2];
|
|
|
|
if (DCacheTags[index] & CACHE_FLAG_DIRTY_LOWERHALF)
|
|
{
|
|
if (WBDelay > NDS.ARM9Timestamp) NDS.ARM9Timestamp = WBDelay;
|
|
|
|
WriteBufferWrite(tag, 4);
|
|
WriteBufferWrite(ptr[0], 2, tag+0x00);
|
|
WriteBufferWrite(ptr[1], 3, tag+0x04);
|
|
WriteBufferWrite(ptr[2], 3, tag+0x08);
|
|
WriteBufferWrite(ptr[3], 3, tag+0x0C);
|
|
//NDS.ARM9Timestamp += 4; //DataCycles += 5; CHECKME: does this function like a write does but with mcr?
|
|
}
|
|
if (DCacheTags[index] & CACHE_FLAG_DIRTY_UPPERHALF) // todo: check how this behaves when both fields need to be written
|
|
{
|
|
if (WBDelay > NDS.ARM9Timestamp) NDS.ARM9Timestamp = WBDelay;
|
|
|
|
if (DCacheTags[index] & CACHE_FLAG_DIRTY_LOWERHALF)
|
|
{
|
|
WriteBufferWrite(ptr[4], 3, tag+0x10);
|
|
}
|
|
else
|
|
{
|
|
WriteBufferWrite(tag+0x10, 4);
|
|
WriteBufferWrite(ptr[4], 2, tag+0x10);
|
|
}
|
|
WriteBufferWrite(ptr[5], 3, tag+0x14);
|
|
WriteBufferWrite(ptr[6], 3, tag+0x18);
|
|
WriteBufferWrite(ptr[7], 3, tag+0x1C);
|
|
//NDS.ARM9Timestamp += 4;
|
|
}
|
|
DCacheTags[index] &= ~(CACHE_FLAG_DIRTY_LOWERHALF | CACHE_FLAG_DIRTY_UPPERHALF);
|
|
#endif
|
|
}
|
|
|
|
bool ARMv5::IsAddressDCachable(const u32 addr) const
|
|
{
|
|
return PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_DCACHEABLE;
|
|
}
|
|
|
|
#define A9WENTLAST (!NDS.MainRAMLastAccess)
|
|
#define A7WENTLAST ( NDS.MainRAMLastAccess)
|
|
#define A9LAST false
|
|
#define A7LAST true
|
|
#define A9PRIORITY !(NDS.ExMemCnt[0] & 0x8000)
|
|
#define A7PRIORITY (NDS.ExMemCnt[0] & 0x8000)
|
|
|
|
template <WBMode mode>
|
|
bool ARMv5::WriteBufferHandle()
|
|
{
|
|
while (true)
|
|
{
|
|
if (WBWriting)
|
|
{
|
|
if ((mode == WBMode::Check) && ((NDS.A9ContentionTS << NDS.ARM9ClockShift) > NDS.ARM9Timestamp)) return true;
|
|
// look up timings
|
|
// TODO: handle interrupted bursts?
|
|
u32 cycles;
|
|
switch (WBCurVal >> 61)
|
|
{
|
|
case 0:
|
|
{
|
|
if (NDS.ARM9Regions[WBCurAddr>>14] == Mem9_MainRAM)
|
|
{
|
|
if (A7PRIORITY) { if (NDS.A9ContentionTS >= NDS.ARM7Timestamp) return false; }
|
|
else { if (NDS.A9ContentionTS > NDS.ARM7Timestamp) return false; }
|
|
if (NDS.A9ContentionTS < NDS.MainRAMTimestamp) { NDS.A9ContentionTS = NDS.MainRAMTimestamp; if (A7PRIORITY) return false; }
|
|
cycles = 4;
|
|
NDS.MainRAMTimestamp = NDS.A9ContentionTS + 9;
|
|
NDS.MainRAMLastAccess = A9LAST;
|
|
}
|
|
else cycles = (MemTimings[WBCurAddr>>14][0] - 5) >> NDS.ARM9ClockShift; // todo: twl timings
|
|
break;
|
|
}
|
|
case 1:
|
|
{
|
|
if (NDS.ARM9Regions[WBCurAddr>>14] == Mem9_MainRAM)
|
|
{
|
|
if (A7PRIORITY) { if (NDS.A9ContentionTS >= NDS.ARM7Timestamp) return false; }
|
|
else { if (NDS.A9ContentionTS > NDS.ARM7Timestamp) return false; }
|
|
if (NDS.A9ContentionTS < NDS.MainRAMTimestamp) { NDS.A9ContentionTS = NDS.MainRAMTimestamp; if (A7PRIORITY) return false; }
|
|
NDS.MainRAMTimestamp = NDS.A9ContentionTS + 8;
|
|
cycles = 3;
|
|
NDS.MainRAMLastAccess = A9LAST;
|
|
}
|
|
else cycles = (MemTimings[WBCurAddr>>14][0] - 5) >> NDS.ARM9ClockShift; // todo: twl timings
|
|
break;
|
|
}
|
|
case 3:
|
|
{
|
|
if (NDS.ARM9Regions[WBCurAddr>>14] == Mem9_MainRAM)
|
|
{
|
|
if (A7PRIORITY) { if (NDS.A9ContentionTS >= NDS.ARM7Timestamp) return false; }
|
|
else { if (NDS.A9ContentionTS > NDS.ARM7Timestamp) return false; }
|
|
if (A9WENTLAST)
|
|
{
|
|
NDS.MainRAMTimestamp += 2;
|
|
cycles = 2;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cycles = MemTimings[WBCurAddr>>14][2] >> NDS.ARM9ClockShift;
|
|
break;
|
|
}
|
|
}
|
|
case 2:
|
|
{
|
|
if (NDS.ARM9Regions[WBCurAddr>>14] == Mem9_MainRAM)
|
|
{
|
|
if (A7PRIORITY) { if (NDS.A9ContentionTS >= NDS.ARM7Timestamp) return false; }
|
|
else { if (NDS.A9ContentionTS > NDS.ARM7Timestamp) return false; }
|
|
if (NDS.A9ContentionTS < NDS.MainRAMTimestamp) { NDS.A9ContentionTS = NDS.MainRAMTimestamp; if (A7PRIORITY) return false; }
|
|
NDS.MainRAMTimestamp = NDS.A9ContentionTS + 9;
|
|
cycles = 4;
|
|
NDS.MainRAMLastAccess = A9LAST;
|
|
}
|
|
else cycles = (MemTimings[WBCurAddr>>14][1] - 5) >> NDS.ARM9ClockShift; // todo: twl timings
|
|
break;
|
|
}
|
|
}
|
|
|
|
NDS.A9ContentionTS += cycles;
|
|
WBReleaseTS = (NDS.A9ContentionTS << NDS.ARM9ClockShift) - 1;
|
|
if (NDS.ARM9Regions[WBCurAddr>>14] != Mem9_MainRAM && ((WBCurVal >> 61) != 3))
|
|
{
|
|
NDS.A9ContentionTS += 1;
|
|
WBTimestamp = WBReleaseTS + 2; // todo: twl timings
|
|
}
|
|
else
|
|
{
|
|
WBTimestamp = WBReleaseTS;
|
|
}
|
|
if (WBWritePointer != 16 && (WriteBufferFifo[WBWritePointer] >> 61) != 3) WBInitialTS = WBTimestamp;
|
|
|
|
switch (WBCurVal >> 61)
|
|
{
|
|
case 0: // byte
|
|
BusWrite8 (WBCurAddr, WBCurVal);
|
|
break;
|
|
case 1: // halfword
|
|
BusWrite16(WBCurAddr, WBCurVal);
|
|
break;
|
|
case 2: // word
|
|
case 3:
|
|
BusWrite32(WBCurAddr, WBCurVal);
|
|
break;
|
|
default: // invalid
|
|
Platform::Log(Platform::LogLevel::Warn, "WHY ARE WE TRYING TO WRITE NONSENSE VIA THE WRITE BUFFER! PANIC!!! Flag: %i\n", (u8)(WBCurVal >> 61));
|
|
break;
|
|
}
|
|
|
|
WBLastRegion = NDS.ARM9Regions[WBCurAddr>>14];
|
|
WBWriting = false;
|
|
if ((mode == WBMode::SingleBurst) && ((WriteBufferFifo[WBWritePointer] >> 61) != 3)) return true;
|
|
}
|
|
|
|
// check if write buffer is empty
|
|
if (WBWritePointer == 16) return true;
|
|
|
|
// attempt to drain write buffer
|
|
if ((WriteBufferFifo[WBWritePointer] >> 61) != 4) // not an address
|
|
{
|
|
if (WBInitialTS > NDS.ARM9Timestamp)
|
|
{
|
|
if (mode == WBMode::Check) return true;
|
|
else NDS.ARM9Timestamp = WBInitialTS;
|
|
}
|
|
|
|
//if ((WriteBufferFifo[WBWritePointer] >> 61) == 3) WBCurAddr+=4; // TODO
|
|
//if (storeaddr[WBWritePointer] != WBCurAddr) printf("MISMATCH: %08X %08X\n", storeaddr[WBWritePointer], WBCurAddr);
|
|
|
|
WBCurAddr = storeaddr[WBWritePointer];
|
|
WBCurVal = WriteBufferFifo[WBWritePointer];
|
|
WBWriting = true;
|
|
}
|
|
else
|
|
{
|
|
//WBCurAddr = (u32)WriteBufferFifo[WBWritePointer]; // TODO
|
|
}
|
|
|
|
WBWritePointer = (WBWritePointer + 1) & 0xF;
|
|
if (WBWritePointer == WBFillPointer)
|
|
{
|
|
WBWritePointer = 16;
|
|
WBFillPointer = 0;
|
|
}
|
|
if ((mode == WBMode::WaitEntry) && (WBWritePointer != WBFillPointer)) return true;
|
|
}
|
|
}
|
|
template bool ARMv5::WriteBufferHandle<WBMode::Check>();
|
|
template bool ARMv5::WriteBufferHandle<WBMode::Force>();
|
|
template bool ARMv5::WriteBufferHandle<WBMode::SingleBurst>();
|
|
template bool ARMv5::WriteBufferHandle<WBMode::WaitEntry>();
|
|
|
|
#undef A9WENTLAST
|
|
#undef A7WENTLAST
|
|
#undef A9LAST
|
|
#undef A7LAST
|
|
#undef A9PRIORITY
|
|
#undef A7PRIORITY
|
|
|
|
template <int next>
|
|
void ARMv5::WriteBufferCheck()
|
|
{
|
|
if ((WBWritePointer != 16) || WBWriting)
|
|
{
|
|
if constexpr (next == 0)
|
|
{
|
|
MRTrack.Type = MainRAMType::WBCheck;
|
|
}
|
|
else if constexpr (next == 2)
|
|
{
|
|
MRTrack.Type = MainRAMType::WBWaitWrite;
|
|
}
|
|
else
|
|
{
|
|
MRTrack.Type = MainRAMType::WBWaitRead;
|
|
}
|
|
}
|
|
/*
|
|
while (!WriteBufferHandle<0>()); // loop until we've cleared out all writeable entries
|
|
|
|
if constexpr (next == 1 || next == 3) // check if the next write is occuring
|
|
{
|
|
if (NDS.ARM9Timestamp >= WBInitialTS)// + (NDS.ARM9Regions[WBCurAddr>>14] == Mem9_MainRAM)))// || ((NDS.ARM9Regions[WBCurAddr>>14] == Mem9_MainRAM) && WBWriting))
|
|
{
|
|
u64 tsold = NDS.ARM9Timestamp;
|
|
while(!WriteBufferHandle<2>());
|
|
|
|
//if constexpr (next == 3) NDS.ARM9Timestamp = std::max(tsold, NDS.ARM9Timestamp - (2<<NDS.ARM9ClockShift));
|
|
}
|
|
}
|
|
else if constexpr (next == 2)
|
|
{
|
|
//if (NDS.ARM9Timestamp >= WBInitialTS)
|
|
while(!WriteBufferHandle<2>());
|
|
}*/
|
|
}
|
|
template void ARMv5::WriteBufferCheck<3>();
|
|
template void ARMv5::WriteBufferCheck<2>();
|
|
template void ARMv5::WriteBufferCheck<1>();
|
|
template void ARMv5::WriteBufferCheck<0>();
|
|
|
|
void ARMv5::WriteBufferWrite(u32 val, u8 flag, u32 addr)
|
|
{
|
|
MRTrack.Type = MainRAMType::WBWrite;
|
|
WBAddrQueued[MRTrack.Var] = addr;
|
|
WBValQueued[MRTrack.Var++] = val | (u64)flag << 61;
|
|
/*switch (flag)
|
|
{
|
|
case 0: // byte
|
|
BusWrite8 (addr, val);
|
|
break;
|
|
case 1: // halfword
|
|
BusWrite16(addr, val);
|
|
break;
|
|
case 2: // word
|
|
case 3:
|
|
BusWrite32(addr, val);
|
|
break;
|
|
default: // invalid
|
|
//Platform::Log(Platform::LogLevel::Warn, "WHY ARE WE TRYING TO WRITE NONSENSE VIA THE WRITE BUFFER! PANIC!!! Flag: %i\n", (u8)(WBCurVal >> 61));
|
|
break;
|
|
}*/
|
|
/*WriteBufferCheck<0>();
|
|
|
|
if (WBFillPointer == WBWritePointer) // if the write buffer is full then we stall the cpu until room is made
|
|
WriteBufferHandle<1>();
|
|
else if (WBWritePointer == 16) // indicates empty write buffer
|
|
{
|
|
WBWritePointer = 0;
|
|
if (!WBWriting)
|
|
{
|
|
u64 ts = ((NDS.ARM9Regions[addr>>14] == Mem9_MainRAM) ? std::max(MainRAMTimestamp, (NDS.ARM9Timestamp + 1)) : (NDS.ARM9Timestamp + 1));
|
|
|
|
if (!WBWriting && (WBTimestamp < ((ts + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1))))
|
|
WBTimestamp = (ts + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
WBInitialTS = WBTimestamp;
|
|
}
|
|
}
|
|
|
|
WriteBufferFifo[WBFillPointer] = val | (u64)flag << 61;
|
|
storeaddr[WBFillPointer] = addr;
|
|
WBFillPointer = (WBFillPointer + 1) & 0xF;*/
|
|
}
|
|
|
|
void ARMv5::WriteBufferDrain()
|
|
{
|
|
if ((WBWritePointer != 16) || WBWriting)
|
|
MRTrack.Type = MainRAMType::WBDrain;
|
|
//while (!WriteBufferHandle<1>()); // loop until drained fully
|
|
}
|
|
|
|
void ARMv5::CP15Write(u32 id, u32 val)
|
|
{
|
|
//if(id!=0x704)printf("CP15 write op %03X %08X %08X\n", id, val, R[15]);
|
|
|
|
switch (id & 0xFFF)
|
|
{
|
|
case 0x100:
|
|
{
|
|
u32 old = CP15Control;
|
|
CP15Control = (CP15Control & ~CP15_CR_CHANGEABLE_MASK) | (val & CP15_CR_CHANGEABLE_MASK);
|
|
//Log(LogLevel::Debug, "CP15Control = %08X (%08X->%08X)\n", CP15Control, old, val);
|
|
UpdateDTCMSetting();
|
|
UpdateITCMSetting();
|
|
u32 changedBits = old ^ CP15Control;
|
|
if (changedBits & (CP15_CR_MPUENABLE | CP15_CACHE_CR_ICACHEENABLE| CP15_CACHE_CR_DCACHEENABLE))
|
|
{
|
|
UpdatePURegions(changedBits & CP15_CR_MPUENABLE);
|
|
}
|
|
if (val & CP15_CR_BIGENDIAN) Log(LogLevel::Warn, "!!!! ARM9 BIG ENDIAN MODE. VERY BAD. SHIT GONNA ASPLODE NOW\n");
|
|
if (val & CP15_CR_HIGHEXCEPTIONBASE) ExceptionBase = CP15_EXCEPTIONBASE_HIGH;
|
|
else ExceptionBase = CP15_EXCEPTIONBASE_LOW;
|
|
}
|
|
return;
|
|
|
|
case 0x200: // data cacheable
|
|
{
|
|
u32 diff = PU_DataCacheable ^ val;
|
|
PU_DataCacheable = val;
|
|
#if 0
|
|
// This code just updates the PU_Map entries of the given region
|
|
// this works fine, if the regions do not overlap
|
|
// If overlapping and the least priority region cachable bit
|
|
// would change, this results in wrong map entries. On HW the changed
|
|
// cachable bit would not be applied because of a higher priority
|
|
// region overwriting them.
|
|
//
|
|
// Writing to the cachable bits is sparse, so we
|
|
// should just take the long but correct update via all regions
|
|
// so the permission priority is correct
|
|
|
|
for (u32 i = 0; i < CP15_REGION_COUNT; i++)
|
|
{
|
|
if (diff & (1<<i)) UpdatePURegion(i);
|
|
}
|
|
#else
|
|
if (diff) UpdatePURegions(true);
|
|
#endif
|
|
}
|
|
return;
|
|
|
|
case 0x201: // code cacheable
|
|
{
|
|
u32 diff = PU_CodeCacheable ^ val;
|
|
PU_CodeCacheable = val;
|
|
#if 0
|
|
// This code just updates the PU_Map entries of the given region
|
|
// this works fine, if the regions do not overlap
|
|
// If overlapping and the least priority region cachable bit
|
|
// would change, this results in wrong map entries. On HW the changed
|
|
// cachable bit would not be applied because of a higher priority
|
|
// region overwriting them.
|
|
//
|
|
// Writing to the cachable bits is sparse, so we
|
|
// should just take the long but correct update via all regions
|
|
// so the permission priority is correct
|
|
|
|
for (u32 i = 0; i < CP15_REGION_COUNT; i++)
|
|
{
|
|
if (diff & (1<<i)) UpdatePURegion(i);
|
|
}
|
|
#else
|
|
if (diff) UpdatePURegions(true);
|
|
#endif
|
|
}
|
|
return;
|
|
|
|
|
|
case 0x300: // write-buffer
|
|
{
|
|
u32 diff = PU_WriteBufferability ^ val;
|
|
PU_WriteBufferability = val;
|
|
#if 0
|
|
// This code just updates the PU_Map entries of the given region
|
|
// this works fine, if the regions do not overlap
|
|
// If overlapping and the least priority region write buffer
|
|
// would change, this results in wrong map entries. On HW the changed
|
|
// write buffer would not be applied because of a higher priority
|
|
// region overwriting them.
|
|
//
|
|
// Writing to the write buffer bits is sparse, so we
|
|
// should just take the long but correct update via all regions
|
|
// so the permission priority is correct
|
|
|
|
for (u32 i = 0; i < CP15_REGION_COUNT; i++)
|
|
{
|
|
if (diff & (1<<i)) UpdatePURegion(i);
|
|
}
|
|
#else
|
|
if (diff) UpdatePURegions(true);
|
|
#endif
|
|
}
|
|
return;
|
|
|
|
|
|
case 0x500: // legacy data permissions
|
|
{
|
|
u32 old = PU_DataRW;
|
|
PU_DataRW = 0;
|
|
#pragma GCC ivdep
|
|
#pragma GCC unroll 8
|
|
for (int i = 0; i < CP15_REGION_COUNT; i++)
|
|
PU_DataRW |= (val >> (i * 2) & 3) << (i * CP15_REGIONACCESS_BITS_PER_REGION);
|
|
|
|
#if 0
|
|
// This code just updates the PU_Map entries of the given region
|
|
// this works fine, if the regions do not overlap
|
|
// If overlapping and the least priority region access permission
|
|
// would change, this results in wrong map entries. On HW the changed
|
|
// access permissions would not be applied because of a higher priority
|
|
// region overwriting them.
|
|
//
|
|
// Writing to the data permission bits is sparse, so we
|
|
// should just take the long but correct update via all regions
|
|
// so the permission priority is correct
|
|
|
|
u32 diff = old ^ PU_DataRW;
|
|
for (u32 i = 0; i < CP15_REGION_COUNT; i++)
|
|
{
|
|
if (diff & (CP15_REGIONACCESS_REGIONMASK<<(i*CP15_REGIONACCESS_BITS_PER_REGION))) UpdatePURegion(i);
|
|
}
|
|
#else
|
|
u32 diff = old ^ PU_DataRW;
|
|
if (diff) UpdatePURegions(true);
|
|
#endif
|
|
}
|
|
return;
|
|
|
|
case 0x501: // legacy code permissions
|
|
{
|
|
u32 old = PU_CodeRW;
|
|
PU_CodeRW = 0;
|
|
#pragma GCC ivdep
|
|
#pragma GCC unroll 8
|
|
for (int i = 0; i < CP15_REGION_COUNT; i++)
|
|
PU_CodeRW |= (val >> (i * 2) & 3) << (i * CP15_REGIONACCESS_BITS_PER_REGION);
|
|
|
|
#if 0
|
|
// This code just updates the PU_Map entries of the given region
|
|
// this works fine, if the regions do not overlap
|
|
// If overlapping and the least priority region access permission
|
|
// would change, this results in wrong map entries, because it
|
|
// would on HW be overridden by the higher priority region
|
|
//
|
|
// Writing to the data permission bits is sparse, so we
|
|
// should just take the long but correct update via all regions
|
|
// so the permission priority is correct
|
|
|
|
u32 diff = old ^ PU_CodeRW;
|
|
for (u32 i = 0; i < CP15_REGION_COUNT; i++)
|
|
{
|
|
if (diff & (CP15_REGIONACCESS_REGIONMASK<<(i*CP15_REGIONACCESS_BITS_PER_REGION))) UpdatePURegion(i);
|
|
}
|
|
#else
|
|
u32 diff = old ^ PU_CodeRW;
|
|
if (diff) UpdatePURegions(true);
|
|
#endif
|
|
}
|
|
return;
|
|
|
|
case 0x502: // data permissions
|
|
{
|
|
u32 diff = PU_DataRW ^ val;
|
|
PU_DataRW = val;
|
|
#if 0
|
|
// This code just updates the PU_Map entries of the given region
|
|
// this works fine, if the regions do not overlap
|
|
// If overlapping and the least priority region access permission
|
|
// would change, this results in wrong map entries, because it
|
|
// would on HW be overridden by the higher priority region
|
|
//
|
|
// Writing to the data permission bits is sparse, so we
|
|
// should just take the long but correct update via all regions
|
|
// so the permission priority is correct
|
|
for (u32 i = 0; i < CP15_REGION_COUNT; i++)
|
|
{
|
|
if (diff & (CP15_REGIONACCESS_REGIONMASK<<(i*CP15_REGIONACCESS_BITS_PER_REGION))) UpdatePURegion(i);
|
|
}
|
|
#else
|
|
if (diff) UpdatePURegions(true);
|
|
#endif
|
|
}
|
|
return;
|
|
|
|
case 0x503: // code permissions
|
|
{
|
|
u32 diff = PU_CodeRW ^ val;
|
|
PU_CodeRW = val;
|
|
#if 0
|
|
// This code just updates the PU_Map entries of the given region
|
|
// this works fine, if the regions do not overlap
|
|
// If overlapping and the least priority region access permission
|
|
// would change, this results in wrong map entries, because it
|
|
// would on HW be overridden by the higher priority region
|
|
//
|
|
// Writing to the data permission bits is sparse, so we
|
|
// should just take the long but correct update via all regions
|
|
// so the permission priority is correct
|
|
for (u32 i = 0; i < CP15_REGION_COUNT; i++)
|
|
{
|
|
if (diff & (CP15_REGIONACCESS_REGIONMASK<<(i*CP15_REGIONACCESS_BITS_PER_REGION))) UpdatePURegion(i);
|
|
}
|
|
#else
|
|
if (diff) UpdatePURegions(true);
|
|
#endif
|
|
}
|
|
return;
|
|
|
|
|
|
case 0x600:
|
|
case 0x601:
|
|
case 0x610:
|
|
case 0x611:
|
|
case 0x620:
|
|
case 0x621:
|
|
case 0x630:
|
|
case 0x631:
|
|
case 0x640:
|
|
case 0x641:
|
|
case 0x650:
|
|
case 0x651:
|
|
case 0x660:
|
|
case 0x661:
|
|
case 0x670:
|
|
case 0x671:
|
|
{
|
|
char log_output[1024];
|
|
u32 old = PU_Region[(id >> 4) & 0xF];
|
|
PU_Region[(id >> 4) & 0xF] = val & ~(0x3F<<6);
|
|
u32 diff = old ^ PU_Region[(id >> 4) & 0xF];
|
|
|
|
std::snprintf(log_output,
|
|
sizeof(log_output),
|
|
"PU: region %d = %08X : %s, start: %08X size: %02X\n",
|
|
(id >> 4) & 0xF,
|
|
val,
|
|
val & 1 ? "enabled" : "disabled",
|
|
val & 0xFFFFF000,
|
|
(val & 0x3E) >> 1
|
|
);
|
|
// TODO: smarter region update for this?
|
|
if (diff) UpdatePURegions(true);
|
|
return;
|
|
}
|
|
|
|
|
|
case 0x704:
|
|
case 0x782:
|
|
//WriteBufferDrain(); // checkme
|
|
Halt(1);
|
|
return;
|
|
|
|
|
|
case 0x750:
|
|
// Can be executed in user and priv mode
|
|
ICacheInvalidateAll();
|
|
return;
|
|
case 0x751:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
ICacheInvalidateByAddr(val);
|
|
//Halt(255);
|
|
return;
|
|
/*case 0x752:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
// Cache invalidat by line number and set number
|
|
u8 cacheSet = val >> (32 - ICACHE_SETS_LOG2) & (ICACHE_SETS -1);
|
|
u8 cacheLine = (val >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET -1);
|
|
ICacheInvalidateBySetAndWay(cacheSet, cacheLine);
|
|
}
|
|
//Halt(255);
|
|
return;
|
|
*/
|
|
|
|
case 0x760:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
DCacheInvalidateAll();
|
|
//printf("inval data cache %08X\n", val);
|
|
return;
|
|
case 0x761:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
DCacheInvalidateByAddr(val);
|
|
//printf("inval data cache SI\n");
|
|
return;
|
|
/*case 0x762:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
// Cache invalidat by line number and set number
|
|
u8 cacheSet = val >> (32 - DCACHE_SETS_LOG2) & (DCACHE_SETS -1);
|
|
u8 cacheLine = (val >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET -1);
|
|
DCacheInvalidateBySetAndWay(cacheSet, cacheLine);
|
|
}
|
|
return;
|
|
*/
|
|
/*case 0x770:
|
|
// invalidate both caches
|
|
// can be called from user and privileged
|
|
ICacheInvalidateAll();
|
|
DCacheInvalidateAll();
|
|
break;
|
|
*/
|
|
/*case 0x7A0:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
//Log(LogLevel::Debug,"clean data cache\n");
|
|
DCacheClearAll();
|
|
return;*/
|
|
case 0x7A1:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
//Log(LogLevel::Debug,"clean data cache MVA\n");=
|
|
CP15Queue = val;
|
|
QueueFunction(&ARMv5::DCClearAddr_2);
|
|
return;
|
|
case 0x7A2:
|
|
//Log(LogLevel::Debug,"clean data cache SET/WAY\n");
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
// Cache invalidat by line number and set number
|
|
CP15Queue = val;
|
|
QueueFunction(&ARMv5::DCClearSetWay_2);
|
|
}
|
|
return;
|
|
case 0x7A3:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
// Test and clean (optional)
|
|
// Is not present on the NDS/DSi
|
|
return;
|
|
|
|
case 0x7A4:
|
|
// Can be used in user and privileged mode
|
|
// Drain Write Buffer: Stall until all write back completed
|
|
QueueFunction(&ARMv5::WriteBufferDrain);
|
|
return;
|
|
|
|
case 0x7D1:
|
|
//Log(LogLevel::Debug,"Prefetch instruction cache MVA\n");
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
// we force a fill by looking up the value from cache
|
|
// if it wasn't cached yet, it will be loaded into cache
|
|
// low bits are set to 0x1C to trick cache streaming
|
|
CP15Queue = val;
|
|
QueueFunction(&ARMv5::ICachePrefetch_2);
|
|
return;
|
|
|
|
/*case 0x7E0:
|
|
//Log(LogLevel::Debug,"clean & invalidate data cache\n");
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
DCacheClearAll();
|
|
DCacheInvalidateAll();
|
|
return;*/
|
|
case 0x7E1:
|
|
//Log(LogLevel::Debug,"clean & invalidate data cache MVA\n");
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
CP15Queue = val;
|
|
QueueFunction(&ARMv5::DCClearInvalidateAddr_2);
|
|
return;
|
|
case 0x7E2:
|
|
//Log(LogLevel::Debug,"clean & invalidate data cache SET/WAY\n");
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
// Cache invalidat by line number and set number
|
|
CP15Queue = val;
|
|
QueueFunction(&ARMv5::DCClearInvalidateSetWay_2);
|
|
}
|
|
return;
|
|
|
|
case 0x900:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
// Cache Lockdown - Format B
|
|
// Bit 31: Lock bit
|
|
// Bit 0..Way-1: locked ways
|
|
// The Cache is 4 way associative
|
|
// But all bits are r/w
|
|
DCacheLockDown = val;
|
|
Log(LogLevel::Debug,"DCacheLockDown\n");
|
|
return;
|
|
case 0x901:
|
|
// requires priv mode or causes UNKNOWN INSTRUCTION exception
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
// Cache Lockdown - Format B
|
|
// Bit 31: Lock bit
|
|
// Bit 0..Way-1: locked ways
|
|
// The Cache is 4 way associative
|
|
// But all bits are r/w
|
|
ICacheLockDown = val;
|
|
Log(LogLevel::Debug,"ICacheLockDown\n");
|
|
return;
|
|
|
|
case 0x910:
|
|
DTCMSetting = val & (CP15_DTCM_BASE_MASK | CP15_DTCM_SIZE_MASK);
|
|
UpdateDTCMSetting();
|
|
return;
|
|
|
|
case 0x911:
|
|
ITCMSetting = val & (CP15_ITCM_BASE_MASK | CP15_ITCM_SIZE_MASK);
|
|
UpdateITCMSetting();
|
|
return;
|
|
|
|
case 0xD01:
|
|
case 0xD11:
|
|
CP15TraceProcessId = val;
|
|
return;
|
|
|
|
case 0xF00:
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
if (((id >> 12) & 0x0f) == 0x03)
|
|
CacheDebugRegisterIndex = val;
|
|
else if (((id >> 12) & 0x0f) == 0x00)
|
|
CP15BISTTestStateRegister = val;
|
|
else
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
}
|
|
|
|
}
|
|
return;
|
|
|
|
case 0xF10:
|
|
// instruction cache Tag register
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-ICACHE_SETS_LOG2)) & (ICACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (ICACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1);
|
|
ICacheTags[(index << ICACHE_SETS_LOG2) + segment] = val;
|
|
}
|
|
|
|
case 0xF20:
|
|
// data cache Tag register
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-DCACHE_SETS_LOG2)) & (DCACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (DCACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1);
|
|
DCacheTags[(index << DCACHE_SETS_LOG2) + segment] = val;
|
|
}
|
|
|
|
|
|
case 0xF30:
|
|
//printf("cache debug instruction cache %08X\n", val);
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-ICACHE_SETS_LOG2)) & (ICACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (ICACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1);
|
|
*(u32 *)&ICache[(((index << ICACHE_SETS_LOG2) + segment) << ICACHE_LINELENGTH_LOG2) + wordAddress*4] = val;
|
|
}
|
|
return;
|
|
|
|
case 0xF40:
|
|
//printf("cache debug data cache %08X\n", val);
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return ARMInterpreter::A_UNK(this);
|
|
} else
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-DCACHE_SETS_LOG2)) & (DCACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (DCACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1);
|
|
*(u32 *)&DCache[(((index << DCACHE_SETS_LOG2) + segment) << DCACHE_LINELENGTH_LOG2) + wordAddress*4] = val;
|
|
}
|
|
return;
|
|
|
|
}
|
|
|
|
Log(LogLevel::Debug, "unknown CP15 write op %04X %08X\n", id, val);
|
|
}
|
|
|
|
u32 ARMv5::CP15Read(const u32 id) const
|
|
{
|
|
//printf("CP15 read op %03X %08X\n", id, NDS::ARM9->R[15]);
|
|
|
|
switch (id & 0xFFF)
|
|
{
|
|
case 0x000: // CPU ID
|
|
case 0x003:
|
|
case 0x004:
|
|
case 0x005:
|
|
case 0x006:
|
|
case 0x007:
|
|
return CP15_MAINID_IMPLEMENTOR_ARM | CP15_MAINID_VARIANT_0 | CP15_MAINID_ARCH_v5TE | CP15_MAINID_IMPLEMENTATION_946 | CP15_MAINID_REVISION_1;
|
|
|
|
case 0x001: // cache type
|
|
return CACHE_TR_LOCKDOWN_TYPE_B | CACHE_TR_NONUNIFIED
|
|
| (DCACHE_LINELENGTH_ENCODED << 12) | (DCACHE_SETS_LOG2 << 15) | ((DCACHE_SIZE_LOG2 - 9) << 18)
|
|
| (ICACHE_LINELENGTH_ENCODED << 0) | (ICACHE_SETS_LOG2 << 3) | ((ICACHE_SIZE_LOG2 - 9) << 6);
|
|
|
|
case 0x002: // TCM size
|
|
return CP15_TCMSIZE_ITCM_32KB | CP15_TCMSIZE_DTCM_16KB;
|
|
|
|
|
|
case 0x100: // control reg
|
|
return CP15Control;
|
|
|
|
|
|
case 0x200:
|
|
return PU_DataCacheable;
|
|
case 0x201:
|
|
return PU_CodeCacheable;
|
|
case 0x300:
|
|
return PU_WriteBufferability;
|
|
|
|
|
|
case 0x500:
|
|
{
|
|
// this format has 2 bits per region, but we store 4 per region
|
|
// so we reduce and consoldate the bits
|
|
// 0x502 returns all 4 bits per region
|
|
u32 ret = 0;
|
|
#pragma GCC ivdep
|
|
#pragma GCC unroll 8
|
|
for (int i = 0; i < CP15_REGION_COUNT; i++)
|
|
ret |= (PU_DataRW >> (i * CP15_REGIONACCESS_BITS_PER_REGION) & CP15_REGIONACCESS_REGIONMASK) << (i*2);
|
|
return ret;
|
|
}
|
|
case 0x501:
|
|
{
|
|
// this format has 2 bits per region, but we store 4 per region
|
|
// so we reduce and consoldate the bits
|
|
// 0x503 returns all 4 bits per region
|
|
u32 ret = 0;
|
|
#pragma GCC unroll 8
|
|
for (int i = 0; i < CP15_REGION_COUNT; i++)
|
|
ret |= (PU_CodeRW >> (i * CP15_REGIONACCESS_BITS_PER_REGION) & CP15_REGIONACCESS_REGIONMASK) << (i*2);
|
|
return ret;
|
|
}
|
|
case 0x502:
|
|
return PU_DataRW;
|
|
case 0x503:
|
|
return PU_CodeRW;
|
|
|
|
|
|
case 0x600:
|
|
case 0x601:
|
|
case 0x610:
|
|
case 0x611:
|
|
case 0x620:
|
|
case 0x621:
|
|
case 0x630:
|
|
case 0x631:
|
|
case 0x640:
|
|
case 0x641:
|
|
case 0x650:
|
|
case 0x651:
|
|
case 0x660:
|
|
case 0x661:
|
|
case 0x670:
|
|
case 0x671:
|
|
return PU_Region[(id >> 4) & 0xF];
|
|
|
|
case 0x7A6:
|
|
// read Cache Dirty Bit (optional)
|
|
// it is not present on the NDS/DSi
|
|
return 0;
|
|
|
|
case 0x900:
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return 0;
|
|
} else
|
|
return DCacheLockDown;
|
|
case 0x901:
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return 0;
|
|
} else
|
|
return ICacheLockDown;
|
|
|
|
case 0x910:
|
|
return DTCMSetting;
|
|
case 0x911:
|
|
return ITCMSetting;
|
|
|
|
case 0xD01: // See arm946E-S Rev 1 technical Reference Manual, Chapter 2.3.13 */
|
|
case 0xD11: // backwards compatible read/write of the same register
|
|
return CP15TraceProcessId;
|
|
|
|
case 0xF00:
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return 0;
|
|
} else
|
|
{
|
|
if (((id >> 12) & 0x0f) == 0x03)
|
|
return CacheDebugRegisterIndex;
|
|
if (((id >> 12) & 0x0f) == 0x00)
|
|
return CP15BISTTestStateRegister;
|
|
}
|
|
case 0xF10:
|
|
// instruction cache Tag register
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return 0;
|
|
} else
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-ICACHE_SETS_LOG2)) & (ICACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (ICACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1);
|
|
Log(LogLevel::Debug, "Read ICache Tag %08lx -> %08lx\n", CacheDebugRegisterIndex, ICacheTags[(index << ICACHE_SETS_LOG2) + segment]);
|
|
return ICacheTags[(index << ICACHE_SETS_LOG2) + segment];
|
|
}
|
|
case 0xF20:
|
|
// data cache Tag register
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return 0;
|
|
} else
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-DCACHE_SETS_LOG2)) & (DCACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (DCACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1);
|
|
Log(LogLevel::Debug, "Read DCache Tag %08lx (%u, %02x, %u) -> %08lx\n", CacheDebugRegisterIndex, segment, index, wordAddress, DCacheTags[(index << DCACHE_SETS_LOG2) + segment]);
|
|
return DCacheTags[(index << DCACHE_SETS_LOG2) + segment];
|
|
}
|
|
case 0xF30:
|
|
if (PU_Map != PU_PrivMap)
|
|
{
|
|
return 0;
|
|
} else
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-ICACHE_SETS_LOG2)) & (ICACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (ICACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1);
|
|
return *(u32 *)&ICache[(((index << ICACHE_SETS_LOG2) + segment) << ICACHE_LINELENGTH_LOG2) + wordAddress*4];
|
|
}
|
|
case 0xF40:
|
|
{
|
|
uint8_t segment = (CacheDebugRegisterIndex >> (32-DCACHE_SETS_LOG2)) & (DCACHE_SETS-1);
|
|
uint8_t wordAddress = (CacheDebugRegisterIndex & (DCACHE_LINELENGTH-1)) >> 2;
|
|
uint8_t index = (CacheDebugRegisterIndex >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET-1);
|
|
return *(u32 *)&DCache[(((index << DCACHE_SETS_LOG2) + segment) << DCACHE_LINELENGTH_LOG2) + wordAddress*4];
|
|
}
|
|
}
|
|
|
|
Log(LogLevel::Debug, "unknown CP15 read op %04X\n", id);
|
|
return 0;
|
|
}
|
|
void ARMv5::ICachePrefetch_2()
|
|
{
|
|
u32 val = CP15Queue;
|
|
ICacheLookup((val & ~0x03) | 0x1C);
|
|
}
|
|
|
|
void ARMv5::DCClearAddr_2()
|
|
{
|
|
u32 val = CP15Queue;
|
|
DCacheClearByAddr(val);
|
|
}
|
|
|
|
void ARMv5::DCClearSetWay_2()
|
|
{
|
|
u32 val = CP15Queue;
|
|
u8 cacheSet = val >> (32 - DCACHE_SETS_LOG2) & (DCACHE_SETS -1);
|
|
u8 cacheLine = (val >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET -1);
|
|
DCacheClearByASetAndWay(cacheSet, cacheLine);
|
|
}
|
|
|
|
void ARMv5::DCClearInvalidateAddr_2()
|
|
{
|
|
u32 val = CP15Queue;
|
|
DCacheClearByAddr(val);
|
|
DCacheInvalidateByAddr(val);
|
|
}
|
|
|
|
void ARMv5::DCClearInvalidateSetWay_2()
|
|
{
|
|
u32 val = CP15Queue;
|
|
u8 cacheSet = val >> (32 - DCACHE_SETS_LOG2) & (DCACHE_SETS -1);
|
|
u8 cacheLine = (val >> DCACHE_LINELENGTH_LOG2) & (DCACHE_LINESPERSET -1);
|
|
DCacheClearByASetAndWay(cacheSet, cacheLine);
|
|
DCacheInvalidateBySetAndWay(cacheSet, cacheLine);
|
|
}
|
|
|
|
// TCM are handled here.
|
|
// TODO: later on, handle PU
|
|
|
|
void ARMv5::CodeRead32(u32 addr)
|
|
{
|
|
// prefetch abort
|
|
// the actual exception is not raised until the aborted instruction is executed
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_EXECUTABLE)) [[unlikely]]
|
|
{
|
|
NDS.ARM9Timestamp += 1;
|
|
if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory;
|
|
DataRegion = Mem9_Null;
|
|
Store = false;
|
|
RetVal = ((u64)1<<63);
|
|
QueueFunction(DelayedQueue);
|
|
return;
|
|
}
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp;
|
|
NDS.ARM9Timestamp += 1;
|
|
if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory;
|
|
DataRegion = Mem9_Null;
|
|
Store = false;
|
|
RetVal = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
|
|
QueueFunction(DelayedQueue);
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_ICACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressICachable(addr))
|
|
{
|
|
if (ICacheLookup(addr)) return;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// bus reads can only overlap with dcache streaming by 6 cycles
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 time = DCacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (PU_Map[addr>>12] & 0x30)
|
|
WriteBufferDrain();
|
|
else
|
|
WriteBufferCheck<3>();
|
|
|
|
FetchAddr[16] = addr;
|
|
QueueFunction(&ARMv5::CodeRead32_2);
|
|
}
|
|
|
|
void ARMv5::CodeRead32_2()
|
|
{
|
|
u32 addr = FetchAddr[16];
|
|
|
|
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
u8 cycles = MemTimings[addr >> 14][1];
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
FetchAddr[16] = addr;
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MRCodeFetch | MR32;
|
|
}
|
|
else
|
|
{
|
|
if (((NDS.ARM9Timestamp <= WBReleaseTS) && (NDS.ARM9Regions[addr>>14] == WBLastRegion)) // check write buffer
|
|
|| (Store && (NDS.ARM9Regions[addr>>14] == DataRegion))) //check the actual store
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
NDS.ARM9Timestamp += cycles;
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
RetVal = BusRead32(addr);
|
|
}
|
|
|
|
Store = false;
|
|
DataRegion = Mem9_Null;
|
|
QueueFunction(DelayedQueue);
|
|
return;
|
|
}
|
|
|
|
|
|
void ARMv5::DAbortHandle()
|
|
{
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 1;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles?
|
|
DCacheStreamPtr = 7;
|
|
}
|
|
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
}
|
|
|
|
void ARMv5::DCacheFin8()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
*val = (RetVal >> (8 * (addr & 3))) & 0xff;
|
|
}
|
|
|
|
bool ARMv5::DataRead8(u32 addr, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_READABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
LDRRegs = 1<<reg;
|
|
|
|
QueueFunction(&ARMv5::DRead8_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DRead8_2()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 1;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles?
|
|
DCacheStreamPtr = 7;
|
|
}
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
ITCMTimestamp = NDS.ARM9Timestamp;
|
|
DataRegion = Mem9_ITCM;
|
|
*val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)];
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)];
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
DelayedQueue = &ARMv5::DCacheFin8;
|
|
if (DCacheLookup(addr)) return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does dcache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (PU_Map[addr>>12] & 0x30)
|
|
WriteBufferDrain();
|
|
else
|
|
WriteBufferCheck<1>();
|
|
|
|
QueueFunction(&ARMv5::DRead8_3);
|
|
}
|
|
|
|
void ARMv5::DRead8_3()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MR8;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr >> 14][0];
|
|
DataCycles = 3<<NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
*val = BusRead8(addr);
|
|
}
|
|
}
|
|
|
|
void ARMv5::DCacheFin16()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
*val = (RetVal >> (8 * (addr & 2))) & 0xffff;
|
|
}
|
|
|
|
bool ARMv5::DataRead16(u32 addr, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_READABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
LDRRegs = 1<<reg;
|
|
|
|
QueueFunction(&ARMv5::DRead16_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DRead16_2()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 1;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles?
|
|
DCacheStreamPtr = 7;
|
|
}
|
|
|
|
addr &= ~1;
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
ITCMTimestamp = NDS.ARM9Timestamp + DataCycles;
|
|
DataRegion = Mem9_ITCM;
|
|
*val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)];
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)];
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
DelayedQueue = &ARMv5::DCacheFin16;
|
|
if (DCacheLookup(addr)) return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (PU_Map[addr>>12] & 0x30)
|
|
WriteBufferDrain();
|
|
else
|
|
WriteBufferCheck<1>();
|
|
|
|
QueueFunction(&ARMv5::DRead16_3);
|
|
}
|
|
|
|
void ARMv5::DRead16_3()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MR16;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr >> 14][0];
|
|
DataCycles = 3<<NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
*val = BusRead16(addr);
|
|
}
|
|
}
|
|
|
|
void ARMv5::DCacheFin32()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
*val = RetVal;
|
|
LDRRegs &= ~1<<reg;
|
|
}
|
|
|
|
bool ARMv5::DataRead32(u32 addr, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_READABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
LDRRegs = 1<<reg;
|
|
|
|
QueueFunction(&ARMv5::DRead32_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DRead32_2()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 1;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles?
|
|
DCacheStreamPtr = 7;
|
|
}
|
|
|
|
addr &= ~3;
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
ITCMTimestamp = NDS.ARM9Timestamp + DataCycles;
|
|
DataRegion = Mem9_ITCM;
|
|
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
|
|
LDRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)];
|
|
LDRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
DelayedQueue = &ARMv5::DCacheFin32;
|
|
if (DCacheLookup(addr)) return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (PU_Map[addr>>12] & 0x30)
|
|
WriteBufferDrain();
|
|
else
|
|
WriteBufferCheck<1>();
|
|
|
|
QueueFunction(&ARMv5::DRead32_3);
|
|
}
|
|
|
|
void ARMv5::DRead32_3()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MR32;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr >> 14][1];
|
|
DataCycles = 3<<NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
*val = BusRead32(addr);
|
|
}
|
|
LDRRegs &= ~1<<reg;
|
|
}
|
|
|
|
bool ARMv5::DataRead32S(u32 addr, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_READABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
LDRRegs |= 1<<reg;
|
|
|
|
QueueFunction(&ARMv5::DRead32S_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DRead32S_2()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
addr &= ~3;
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
// we update the timestamp during the actual function, as a sequential itcm access can only occur during instructions with strange itcm wait cycles
|
|
DataRegion = Mem9_ITCM;
|
|
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
|
|
LDRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)];
|
|
LDRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
DelayedQueue = &ARMv5::DCacheFin32;
|
|
if (DCacheLookup(addr)) return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (PU_Map[addr>>12] & 0x30) // checkme
|
|
WriteBufferDrain();
|
|
else
|
|
WriteBufferCheck<1>();
|
|
|
|
QueueFunction(&ARMv5::DRead32S_3);
|
|
}
|
|
|
|
void ARMv5::DRead32S_3()
|
|
{
|
|
u8 reg = __builtin_ctz(LDRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
|
|
|
|
// bursts cannot cross a 1kb boundary
|
|
if (addr & 0x3FF) // s
|
|
{
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MR32 | MRSequential;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr>>14][2];
|
|
DataCycles = MemTimings[addr>>14][2];
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
*val = BusRead32(addr);
|
|
}
|
|
}
|
|
else // ns
|
|
{
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MR32;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr>>14][1];
|
|
DataCycles = 3<<NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer
|
|
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp - (3<<NDS.ARM9ClockShift) + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
*val = BusRead32(addr);
|
|
}
|
|
}
|
|
LDRRegs &= ~1<<reg;
|
|
}
|
|
|
|
bool ARMv5::DataWrite8(u32 addr, u8 val, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_WRITEABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
STRRegs = 1<<reg;
|
|
STRVal[reg] = val;
|
|
|
|
QueueFunction(&ARMv5::DWrite8_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DWrite8_2()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u8 val = STRVal[reg];
|
|
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 1;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles?
|
|
DCacheStreamPtr = 7;
|
|
}
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
// does not stall (for some reason?)
|
|
DataRegion = Mem9_ITCM;
|
|
*(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
|
|
#ifdef JIT_ENABLED
|
|
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
|
|
#endif
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
if (DCacheWrite8(addr, val))
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (!(PU_Map[addr>>12] & (0x30)))
|
|
{
|
|
WriteBufferCheck<2>();
|
|
QueueFunction(&ARMv5::DWrite8_3);
|
|
}
|
|
else
|
|
{
|
|
if (WBDelay > NDS.ARM9Timestamp) NDS.ARM9Timestamp = WBDelay;
|
|
|
|
WriteBufferWrite(addr, 4);
|
|
WriteBufferWrite(val, 0, addr);
|
|
}
|
|
}
|
|
|
|
void ARMv5::DWrite8_3()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u8 val = STRVal[reg];
|
|
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MRWrite | MR8;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr >> 14][0];
|
|
DataCycles = 3<<NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
BusWrite8(addr, val);
|
|
}
|
|
}
|
|
|
|
bool ARMv5::DataWrite16(u32 addr, u16 val, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_WRITEABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
STRRegs = 1<<reg;
|
|
STRVal[reg] = val;
|
|
|
|
QueueFunction(&ARMv5::DWrite16_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DWrite16_2()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u16 val = STRVal[reg];
|
|
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 1;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles?
|
|
DCacheStreamPtr = 7;
|
|
}
|
|
|
|
addr &= ~1;
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
// does not stall (for some reason?)
|
|
DataRegion = Mem9_ITCM;
|
|
*(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
|
|
#ifdef JIT_ENABLED
|
|
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
|
|
#endif
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
if (DCacheWrite16(addr, val))
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (!(PU_Map[addr>>12] & 0x30))
|
|
{
|
|
WriteBufferCheck<2>();
|
|
QueueFunction(&ARMv5::DWrite16_3);
|
|
}
|
|
else
|
|
{
|
|
if (WBDelay > NDS.ARM9Timestamp) NDS.ARM9Timestamp = WBDelay;
|
|
|
|
WriteBufferWrite(addr, 4);
|
|
WriteBufferWrite(val, 1, addr);
|
|
}
|
|
}
|
|
|
|
void ARMv5::DWrite16_3()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u16 val = STRVal[reg];
|
|
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MRWrite | MR16;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr >> 14][0];
|
|
DataCycles = NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
BusWrite16(addr, val);
|
|
}
|
|
}
|
|
|
|
bool ARMv5::DataWrite32(u32 addr, u32 val, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_WRITEABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
STRRegs = 1<<reg;
|
|
STRVal[reg] = val;
|
|
|
|
QueueFunction(&ARMv5::DWrite32_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DWrite32_2()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 val = STRVal[reg];
|
|
|
|
if (DCacheStreamPtr < 7)
|
|
{
|
|
u64 fillend = DCacheStreamTimes[6] + 1;
|
|
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles?
|
|
DCacheStreamPtr = 7;
|
|
}
|
|
|
|
addr &= ~3;
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
// does not stall (for some reason?)
|
|
DataRegion = Mem9_ITCM;
|
|
*(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
|
|
#ifdef JIT_ENABLED
|
|
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
|
|
#endif
|
|
STRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
|
|
STRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
if (DCacheWrite32(addr, val))
|
|
{
|
|
STRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (!(PU_Map[addr>>12] & 0x30))
|
|
{
|
|
WriteBufferCheck<2>();
|
|
QueueFunction(&ARMv5::DWrite32_3);
|
|
}
|
|
else
|
|
{
|
|
if (WBDelay > NDS.ARM9Timestamp) NDS.ARM9Timestamp = WBDelay;
|
|
|
|
WriteBufferWrite(addr, 4);
|
|
WriteBufferWrite(val, 2, addr);
|
|
STRRegs &= ~1<<reg;
|
|
}
|
|
}
|
|
|
|
void ARMv5::DWrite32_3()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 val = STRVal[reg];
|
|
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MRWrite | MR32;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr >> 14][1];
|
|
DataCycles = 3<<NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
BusWrite32(addr, val);
|
|
}
|
|
STRRegs &= ~1<<reg;
|
|
}
|
|
|
|
bool ARMv5::DataWrite32S(u32 addr, u32 val, u8 reg)
|
|
{
|
|
// Data Aborts
|
|
// Exception is handled in the actual instruction implementation
|
|
if (!(PU_Map[addr>>12] & CP15_MAP_WRITEABLE)) [[unlikely]]
|
|
{
|
|
QueueFunction(&ARMv5::DAbortHandle);
|
|
return false;
|
|
}
|
|
|
|
FetchAddr[reg] = addr;
|
|
STRRegs |= 1<<reg;
|
|
STRVal[reg] = val;
|
|
|
|
QueueFunction(&ARMv5::DWrite32S_2);
|
|
return true;
|
|
}
|
|
|
|
void ARMv5::DWrite32S_2()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 val = STRVal[reg];
|
|
|
|
addr &= ~3;
|
|
|
|
if (addr < ITCMSize)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
// we update the timestamp during the actual function, as a sequential itcm access can only occur during instructions with strange itcm wait cycles
|
|
DataRegion = Mem9_ITCM;
|
|
*(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
|
|
#ifdef JIT_ENABLED
|
|
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
|
|
#endif
|
|
STRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
if ((addr & DTCMMask) == DTCMBase)
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = 1;
|
|
DataRegion = Mem9_DTCM;
|
|
*(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
|
|
STRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
|
|
#if !DISABLE_DCACHE
|
|
#ifdef JIT_ENABLED
|
|
//if (!NDS.IsJITEnabled())
|
|
#endif
|
|
{
|
|
if (IsAddressDCachable(addr))
|
|
{
|
|
if (DCacheWrite32(addr, val))
|
|
{
|
|
STRRegs &= ~1<<reg;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// bus reads can only overlap with icache streaming by 6 cycles
|
|
// checkme: does cache trigger this?
|
|
if (ICacheStreamPtr < 7)
|
|
{
|
|
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
|
|
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
|
|
}
|
|
|
|
if (!(PU_Map[addr>>12] & 0x30)) // non-bufferable
|
|
{
|
|
WriteBufferCheck<2>();
|
|
QueueFunction(&ARMv5::DWrite32S_3);
|
|
}
|
|
else
|
|
{
|
|
WriteBufferWrite(val, 3, addr);
|
|
STRRegs &= ~1<<reg;
|
|
}
|
|
}
|
|
|
|
void ARMv5::DWrite32S_3()
|
|
{
|
|
u8 reg = __builtin_ctz(STRRegs);
|
|
u32 addr = FetchAddr[reg];
|
|
u32 val = STRVal[reg];
|
|
|
|
// bursts cannot cross a 1kb boundary
|
|
if (addr & 0x3FF) // s
|
|
{
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MRWrite | MR32 | MRSequential;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += DataCycles = MemTimings[addr>>14][2];
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
BusWrite32(addr, val);
|
|
}
|
|
}
|
|
else // ns
|
|
{
|
|
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
|
|
if ((addr >> 24) == 0x02)
|
|
{
|
|
MRTrack.Type = MainRAMType::Fetch;
|
|
MRTrack.Var = MRWrite | MR32;
|
|
MRTrack.Progress = reg;
|
|
}
|
|
else
|
|
{
|
|
NDS.ARM9Timestamp += MemTimings[addr>>14][1];
|
|
DataCycles = 3 << NDS.ARM9ClockShift;
|
|
DataRegion = NDS.ARM9Regions[addr>>14];
|
|
|
|
if (WBTimestamp < ((NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1)))
|
|
WBTimestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
|
|
BusWrite32(addr, val);
|
|
}
|
|
}
|
|
STRRegs &= ~1<<reg;
|
|
}
|
|
|
|
void ARMv5::GetCodeMemRegion(const u32 addr, MemRegion* region)
|
|
{
|
|
NDS.ARM9GetMemRegion(addr, false, &CodeMem);
|
|
}
|
|
|
|
}
|