- new memory access timing, should be more accurate. this fixes some games for me.

- this includes basic cache emulation, but that's disabled for now for performance reasons
- got rid of the fast fetch execute menu option
(switching between the two should no longer be needed and I don't think we should have things like that be options)
This commit is contained in:
nitsuja 2009-09-20 20:11:16 +00:00
parent f78ce4e966
commit cb1d7d2f23
20 changed files with 825 additions and 405 deletions

View File

@ -44,6 +44,7 @@
#include "addons.h"
#include "mic.h"
#include "movie.h"
#include "MMU_timing.h"
#ifdef DO_ASSERT_UNALIGNED
#define ASSERT_UNALIGNED(x) assert(x)
@ -163,6 +164,7 @@ void mmu_log_debug_ARM7(u32 adr, const char *fmt, ...)
MMU_struct MMU;
MMU_struct_new MMU_new;
MMU_struct_timing MMU_timing;
u8 * MMU_struct::MMU_MEM[2][256] = {
//arm9
@ -252,17 +254,18 @@ u32 MMU_struct::MMU_MASK[2][256] = {
}
};
CACHE_ALIGN
TWaitState MMU_struct::MMU_WAIT16[2][16] = {
{ 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 1, 1, 1, 1, 1 }, //arm9
{ 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 1, 1, 1, 1, 1 }, //arm7
};
CACHE_ALIGN
TWaitState MMU_struct::MMU_WAIT32[2][16] = {
{ 1, 1, 1, 1, 1, 2, 2, 1, 8, 8, 5, 1, 1, 1, 1, 1 }, //arm9
{ 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 5, 1, 1, 1, 1, 1 }, //arm7
};
// this logic was moved to MMU_timing.h
//CACHE_ALIGN
//TWaitState MMU_struct::MMU_WAIT16[2][16] = {
// { 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 1, 1, 1, 1, 1 }, //arm9
// { 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 1, 1, 1, 1, 1 }, //arm7
//};
//
//CACHE_ALIGN
//TWaitState MMU_struct::MMU_WAIT32[2][16] = {
// { 1, 1, 1, 1, 1, 2, 2, 1, 8, 8, 5, 1, 1, 1, 1, 1 }, //arm9
// { 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 5, 1, 1, 1, 1, 1 }, //arm7
//};
//////////////////////////////////////////////////////////////
@ -965,6 +968,13 @@ void MMU_Reset()
MMU.dscard[ARMCPU_ARM7].address = 0;
MMU.dscard[ARMCPU_ARM7].transfer_count = 0;
MMU.dscard[ARMCPU_ARM7].mode = CardMode_Normal;
MMU_timing.arm7codeFetch.Reset();
MMU_timing.arm7dataFetch.Reset();
MMU_timing.arm9codeFetch.Reset();
MMU_timing.arm9dataFetch.Reset();
MMU_timing.arm9codeCache.Reset();
MMU_timing.arm9dataCache.Reset();
}
void MMU_setRom(u8 * rom, u32 mask)

View File

@ -120,9 +120,6 @@ struct MMU_struct
u8 ARM9_RW_MODE;
static CACHE_ALIGN TWaitState MMU_WAIT16[2][16];
static CACHE_ALIGN TWaitState MMU_WAIT32[2][16];
u32 DTCMRegion;
u32 ITCMRegion;

395
desmume/src/MMU_timing.h Normal file
View File

@ -0,0 +1,395 @@
/* Copyright (C) 2006 yopyop
yopyop156@ifrance.com
yopyop156.ifrance.com
Copyright (C) 2007 shash
Copyright (C) 2007-2009 DeSmuME team
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
// this file is split from MMU.h for the purpose of avoiding ridiculous recompile times
// when changing it, because practically everything includes MMU.h.
#ifndef MMUTIMING_H
#define MMUTIMING_H
#include <algorithm>
#include "MMU.h"
#include "cp15.h"
#include "readwrite.h"
////////////////////////////////////////////////////////////////
// MEMORY TIMING ACCURACY CONFIGURATION
//
// the more of these are enabled,
// the more accurate memory access timing _should_ become.
// they should be listed roughly in order of most to least important.
// it's reasonable to disable some of these as a speed hack.
// obviously, these defines don't cover all the variables or features needed,
// and in particular, DMA or code+data access bus contention is still missing.
// makes non-sequential accesses slower than sequential ones.
#define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
// enables emulation of code fetch waits.
#define ACCOUNT_FOR_CODE_FETCH_CYCLES
// makes access to DTCM (arm9 only) fast.
#define ACCOUNT_FOR_DATA_TCM_SPEED
// enables simulation of cache hits and cache misses.
// currently disabled for a few FPS of emulator speedup.
//#define ENABLE_CACHE_CONTROLLER_EMULATION
//
////////////////////////////////////////////////////////////////
enum MMU_ACCESS_DIRECTION
{
MMU_AD_READ, MMU_AD_WRITE
};
// note that we don't actually emulate the cache contents here,
// only enough to guess what would be a cache hit or a cache miss.
// this doesn't really get used unless ENABLE_CACHE_CONTROLLER_EMULATION is defined.
template<int SIZESHIFT, int ASSOCIATIVESHIFT, int BLOCKSIZESHIFT>
class CacheController
{
public:
template<MMU_ACCESS_DIRECTION DIR>
FORCEINLINE bool Cached(u32 addr)
{
u32 blockMasked = addr & BLOCKMASK;
if(blockMasked == m_cacheCache)
return true;
else
return this->CachedInternal<DIR>(addr, blockMasked);
}
void Reset()
{
for(int blockIndex = 0; blockIndex < NUMBLOCKS; blockIndex++)
m_blocks[blockIndex].Reset();
m_cacheCache = ~0;
}
CacheController()
{
Reset();
}
void savestate(EMUFILE* os, int version)
{
write32le(m_cacheCache, os);
for(int i = 0; i < NUMBLOCKS; i++)
{
for(int j = 0; j < ASSOCIATIVITY; j++)
write32le(m_blocks[i].tag[j],os);
write32le(m_blocks[i].nextWay,os);
}
}
bool loadstate(EMUFILE* is, int version)
{
read32le(&m_cacheCache, is);
for(int i = 0; i < NUMBLOCKS; i++)
{
for(int j = 0; j < ASSOCIATIVITY; j++)
read32le(&m_blocks[i].tag[j],is);
read32le(&m_blocks[i].nextWay,is);
}
return true;
}
private:
template<MMU_ACCESS_DIRECTION DIR>
bool CachedInternal(u32 addr, u32 blockMasked)
{
u32 blockIndex = blockMasked >> BLOCKSIZESHIFT;
CacheBlock& block = m_blocks[blockIndex];
addr &= TAGMASK;
for(int way = 0; way < ASSOCIATIVITY; way++)
if(addr == block.tag[way])
{
// found it, already allocated
m_cacheCache = blockMasked;
return true;
}
if(DIR == MMU_AD_READ)
{
// TODO: support other allocation orders?
block.tag[block.nextWay++] = addr;
block.nextWay %= ASSOCIATIVITY;
m_cacheCache = blockMasked;
}
return false;
}
enum { SIZE = 1 << SIZESHIFT };
enum { ASSOCIATIVITY = 1 << ASSOCIATIVESHIFT };
enum { BLOCKSIZE = 1 << BLOCKSIZESHIFT };
enum { TAGSHIFT = SIZESHIFT - ASSOCIATIVESHIFT };
enum { TAGMASK = (u32)(~0 << TAGSHIFT) };
enum { BLOCKMASK = ((u32)~0 >> (32 - TAGSHIFT)) & (u32)(~0 << BLOCKSIZESHIFT) };
enum { WORDSIZE = sizeof(u32) };
enum { WORDSPERBLOCK = (1 << BLOCKSIZESHIFT) / WORDSIZE };
enum { DATAPERWORD = WORDSIZE * ASSOCIATIVITY };
enum { DATAPERBLOCK = DATAPERWORD * WORDSPERBLOCK };
enum { NUMBLOCKS = SIZE / DATAPERBLOCK };
struct CacheBlock
{
u32 tag [ASSOCIATIVITY];
u32 nextWay;
void Reset()
{
nextWay = 0;
for(int way = 0; way < ASSOCIATIVITY; way++)
tag[way] = 0;
}
};
u32 m_cacheCache; // optimization
CacheBlock m_blocks [NUMBLOCKS];
};
template<int PROCNUM, MMU_ACCESS_TYPE AT, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential);
template<int PROCNUM, MMU_ACCESS_TYPE AT>
class FetchAccessUnit
{
public:
template<int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
FORCEINLINE u32 Fetch(u32 address)
{
#ifndef ACCOUNT_FOR_CODE_FETCH_CYCLES
if(AT == MMU_AT_CODE)
return 1;
#endif
u32 time = _MMU_accesstime<PROCNUM, AT, READSIZE, DIRECTION>(address,
#ifdef ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
address == (m_lastAddress + (READSIZE>>3))
#else
true
#endif
);
#ifdef ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
m_lastAddress = address;
#endif
return time;
}
void Reset()
{
m_lastAddress = ~0;
}
FetchAccessUnit() { this->Reset(); }
void savestate(EMUFILE* os, int version)
{
write32le(m_lastAddress,os);
}
bool loadstate(EMUFILE* is, int version)
{
read32le(&m_lastAddress,is);
return true;
}
private:
u32 m_lastAddress;
};
struct MMU_struct_timing
{
// technically part of the cp15, but I didn't want the dereferencing penalty.
// these template values correspond with the value of armcp15->cacheType.
CacheController<13,2,5> arm9codeCache; // 8192 bytes, 4-way associative, 32-byte blocks
CacheController<12,2,5> arm9dataCache; // 4096 bytes, 4-way associative, 32-byte blocks
// technically part of armcpu_t, but that struct isn't templated on PROCNUM
FetchAccessUnit<0,MMU_AT_CODE> arm9codeFetch;
FetchAccessUnit<0,MMU_AT_DATA> arm9dataFetch;
FetchAccessUnit<1,MMU_AT_CODE> arm7codeFetch;
FetchAccessUnit<1,MMU_AT_DATA> arm7dataFetch;
template<int PROCNUM> FORCEINLINE FetchAccessUnit<PROCNUM,MMU_AT_CODE>& armCodeFetch();
template<int PROCNUM> FORCEINLINE FetchAccessUnit<PROCNUM,MMU_AT_DATA>& armDataFetch();
};
template<> FORCEINLINE FetchAccessUnit<0,MMU_AT_CODE>& MMU_struct_timing::armCodeFetch<0>() { return this->arm9codeFetch; }
template<> FORCEINLINE FetchAccessUnit<1,MMU_AT_CODE>& MMU_struct_timing::armCodeFetch<1>() { return this->arm7codeFetch; }
template<> FORCEINLINE FetchAccessUnit<0,MMU_AT_DATA>& MMU_struct_timing::armDataFetch<0>() { return this->arm9dataFetch; }
template<> FORCEINLINE FetchAccessUnit<1,MMU_AT_DATA>& MMU_struct_timing::armDataFetch<1>() { return this->arm7dataFetch; }
extern MMU_struct_timing MMU_timing;
// calculates the time a single memory access takes,
// in units of cycles of the current processor.
// this function replaces what used to be MMU_WAIT16 and MMU_WAIT32.
template<int PROCNUM, MMU_ACCESS_TYPE AT, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
{
static const int MC = 1; // cached or tcm memory speed
static const int M32 = (PROCNUM==ARMCPU_ARM9) ? 2 : 1; // access through 32-bit bus
static const int M16 = M32 * ((READSIZE>16) ? 2 : 1); // access through 16-bit bus
static const int MSLW = M16 * 8; // this needs tuning
if(PROCNUM==ARMCPU_ARM9 && AT == MMU_AT_CODE && addr < 0x02000000)
return MC; // ITCM
#ifdef ACCOUNT_FOR_DATA_TCM_SPEED
if(PROCNUM==ARMCPU_ARM9 && AT==MMU_AT_DATA && (addr&(~0x3FFF)) == MMU.DTCMRegion)
return MC; // DTCM
#endif
// for now, assume the cache is always enabled for all of main memory
if(PROCNUM==ARMCPU_ARM9 && (addr & 0x0F000000) == 0x02000000)
{
#ifdef ENABLE_CACHE_CONTROLLER_EMULATION
bool cached = false;
if(AT==MMU_AT_CODE)
cached = MMU_timing.arm9codeCache.Cached<DIRECTION>(addr);
if(AT==MMU_AT_DATA)
cached = MMU_timing.arm9dataCache.Cached<DIRECTION>(addr);
if(cached)
return MC;
if(sequential && AT==MMU_AT_DATA)
return M16;
return M16 * ((DIRECTION == MMU_AD_READ) ? 5 : 4);
#elif defined(ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS)
// this is the closest approximation I could find
// to the with-cache-controller timing
// that doesn't do any actual caching logic.
return sequential ? MC : M16;
#endif
}
static const TWaitState MMU_WAIT[16*16] = {
// ITCM, ITCM, MAIN, SWI, REG, VMEM, LCD, OAM, ROM, ROM, RAM, U, U, U, U, BIOS
#define X MC, MC, M16, M32, M32, M16, M16, M32, MSLW, MSLW, MSLW, M32,M32,M32,M32, M32,
// duplicate it 16 times (this was somehow faster than using a mask of 0xF)
X X X X X X X X X X X X X X X X
#undef X
};
u32 c = MMU_WAIT[(addr >> 24)];
#ifdef ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
if(!sequential)
{
//if(c != MC || PROCNUM==ARMCPU_ARM7) // check not needed anymore because ITCM/DTCM return earlier
{
c += (PROCNUM==ARMCPU_ARM9) ? 3*2 : 1;
}
}
#endif
return c;
}
// calculates the cycle time of a single memory access in the MEM stage.
// to be used to calculate the memCycles argument for MMU_aluMemCycles.
// this may have side effects, so don't call it more than necessary.
template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
FORCEINLINE u32 MMU_memAccessCycles(u32 addr)
{
return MMU_timing.armDataFetch<PROCNUM>().template Fetch<READSIZE,DIRECTION>((addr)&(~((READSIZE>>3)-1)));
}
// calculates the cycle time of a single code fetch in the FETCH stage
// to be used to calculate the fetchCycles argument for MMU_fetchExecuteCycles.
// this may have side effects, so don't call it more than necessary.
template<int PROCNUM, int READSIZE>
FORCEINLINE u32 MMU_codeFetchCycles(u32 addr)
{
return MMU_timing.armCodeFetch<PROCNUM>().template Fetch<READSIZE,MMU_AD_READ>((addr)&(~((READSIZE>>3)-1)));
}
// calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
// given ALU cycle time and the summation of multiple memory access cycle times.
// this function might belong more in armcpu, but I don't think it matters.
template<int PROCNUM>
FORCEINLINE u32 MMU_aluMemCycles(u32 aluCycles, u32 memCycles)
{
if(PROCNUM==ARMCPU_ARM9)
{
// ALU and MEM are different stages of the 5-stage pipeline.
// we approximate the pipeline throughput using max,
// since simply adding the cycles of each instruction together
// fails to take into account the parallelism of the arm pipeline
// and would make the emulated system unnaturally slow.
return std::max(aluCycles, memCycles);
}
else
{
// ALU and MEM are part of the same stage of the 3-stage pipeline,
// thus they occur in sequence and we can simply add the counts together.
return aluCycles + memCycles;
}
}
// calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
// given ALU cycle time and the description of a single memory access.
// this may have side effects, so don't call it more than necessary.
template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
FORCEINLINE u32 MMU_aluMemAccessCycles(u32 aluCycles, u32 addr)
{
u32 memCycles = MMU_memAccessCycles<PROCNUM,READSIZE,DIRECTION>(addr);
return MMU_aluMemCycles<PROCNUM>(aluCycles, memCycles);
}
// calculates the cycle contribution of FETCH + EXECUTE stages
// given executeCycles = the combined ALU+MEM cycles
// and fetchCycles = the cycle time of the FETCH stage
// this function might belong more in armcpu, but I don't think it matters.
template<int PROCNUM>
FORCEINLINE u32 MMU_fetchExecuteCycles(u32 executeCycles, u32 fetchCycles)
{
#ifdef ACCOUNT_FOR_CODE_FETCH_CYCLES
// execute and fetch are different stages of the pipeline for both arm7 and arm9.
// again, we approximate the pipeline throughput using max.
return std::max(executeCycles, fetchCycles);
// TODO: add an option to support conflict between MEM and FETCH cycles
// if they're both using the same data bus.
// in the case of a conflict this should be:
// return std::max(aluCycles, memCycles + fetchCycles);
#else
return executeCycles;
#endif
}
#endif

View File

@ -25,7 +25,7 @@ libdesmume_a_SOURCES = \
path.h \
readwrite.cpp readwrite.h \
wifi.cpp wifi.h \
MMU.cpp MMU.h NDSSystem.cpp NDSSystem.h registers.h \
MMU.cpp MMU.h MMU_timing.h NDSSystem.cpp NDSSystem.h registers.h \
OGLRender.cpp OGLRender.h \
ROMReader.cpp ROMReader.h \
render3D.cpp render3D.h \

View File

@ -431,7 +431,6 @@ extern struct TCommonSettings {
, spuAdpcmCache(false)
, gfx3d_flushMode(0)
, manualBackupType(0)
, armFastFetchExecute(false)
{
strcpy(ARM9BIOS, "biosnds9.bin");
strcpy(ARM7BIOS, "biosnds7.bin");
@ -472,8 +471,6 @@ extern struct TCommonSettings {
//this is the user's choice of manual backup type, for cases when the autodetection can't be trusted
int manualBackupType;
bool armFastFetchExecute;
bool spu_muteChannels[16];
struct _ShowGpu {

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,7 @@
#include "debug.h"
#include "Disassembler.h"
#include "NDSSystem.h"
#include "MMU_timing.h"
template<u32> static u32 armcpu_prefetch();
@ -391,7 +392,7 @@ FORCEINLINE static u32 armcpu_prefetch()
armcpu->R[15] = curInstruction + 8;
#endif
return MMU.MMU_WAIT32[PROCNUM][(curInstruction>>24)&0xF];
return MMU_codeFetchCycles<PROCNUM,32>(curInstruction);
}
u32 curInstruction = armcpu->next_instruction;
@ -413,18 +414,16 @@ FORCEINLINE static u32 armcpu_prefetch()
armcpu->R[15] = curInstruction + 4;
#endif
#if 0
if(PROCNUM==0)
{
// arm9 fetches 2 instructions at a time in thumb mode
if(!(curInstruction == armcpu->instruct_adr + 2 && (curInstruction & 2)))
return MMU.MMU_WAIT32[PROCNUM][(curInstruction>>24)&0xF];
return MMU_codeFetchCycles<PROCNUM,32>(curInstruction);
else
return 0;
}
#endif
return MMU.MMU_WAIT16[PROCNUM][(curInstruction>>24)&0xF];
return MMU_codeFetchCycles<PROCNUM,16>(curInstruction);
}
#if 0 /* not used */
@ -531,7 +530,7 @@ u32 armcpu_exec()
cFetch = armcpu_prefetch(&ARMPROC);
if (ARMPROC.stalled) {
return CommonSettings.armFastFetchExecute ? std::max(cFetch, cExecute) : (cFetch + cExecute);
return MMU_fetchExecuteCycles<PROCNUM>(cExecute, cFetch);
}
#endif
@ -565,7 +564,7 @@ u32 armcpu_exec()
#else
cFetch = armcpu_prefetch<PROCNUM>();
#endif
return CommonSettings.armFastFetchExecute ? std::max(cFetch, cExecute) : (cFetch + cExecute);
return MMU_fetchExecuteCycles<PROCNUM>(cExecute, cFetch);
}
if(PROCNUM==0)
@ -590,7 +589,7 @@ u32 armcpu_exec()
#else
cFetch = armcpu_prefetch<PROCNUM>();
#endif
return CommonSettings.armFastFetchExecute ? std::max(cFetch, cExecute) : (cFetch + cExecute);
return MMU_fetchExecuteCycles<PROCNUM>(cExecute, cFetch);
}
//these templates needed to be instantiated manually

View File

@ -259,6 +259,7 @@
729BECB80D9D57AF00ED561B /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mc.h; path = ../mc.h; sourceTree = SOURCE_ROOT; };
729BECB90D9D57AF00ED561B /* mem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mem.h; path = ../mem.h; sourceTree = SOURCE_ROOT; };
729BECBB0D9D57AF00ED561B /* MMU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MMU.h; path = ../MMU.h; sourceTree = SOURCE_ROOT; };
729BECBC0D9D57AF00ED561B /* MMU_timing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MMU_timing.h; path = ../MMU_timing.h; sourceTree = SOURCE_ROOT; };
729BECBD0D9D57AF00ED561B /* NDSSystem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = NDSSystem.h; path = ../NDSSystem.h; sourceTree = SOURCE_ROOT; };
729BECBE0D9D57AF00ED561B /* registers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = registers.h; path = ../registers.h; sourceTree = SOURCE_ROOT; };
729BECC00D9D57AF00ED561B /* render3D.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = render3D.h; path = ../render3D.h; sourceTree = SOURCE_ROOT; };
@ -491,6 +492,7 @@
729BECB80D9D57AF00ED561B /* mc.h */,
729BECB90D9D57AF00ED561B /* mem.h */,
729BECBB0D9D57AF00ED561B /* MMU.h */,
729BECBC0D9D57AF00ED561B /* MMU_timing.h */,
729BECBD0D9D57AF00ED561B /* NDSSystem.h */,
95D5FE4F1007BC4500882BAE /* guitarGrip.cpp */,
729BECBE0D9D57AF00ED561B /* registers.h */,

View File

@ -63,13 +63,14 @@ static INLINE u32 T1ReadLong_guaranteedAligned(u8* const mem, const u32 addr)
}
static INLINE u32 T1ReadLong(u8* const mem, const u32 addr)
static INLINE u32 T1ReadLong(u8* const mem, u32 addr)
{
addr &= ~3;
#ifdef WORDS_BIGENDIAN
return (mem[addr + 3] << 24 | mem[addr + 2] << 16 |
mem[addr + 1] << 8 | mem[addr]);
#else
return *((u32 *)mem + (addr>>2));
return *(u32*)(mem + addr);
#endif
}

View File

@ -41,6 +41,7 @@
#include "gfx3d.h"
#include "movie.h"
#include "mic.h"
#include "MMU_timing.h"
#include "path.h"
@ -265,11 +266,18 @@ SFORMAT SF_MOVIE[]={
static void mmu_savestate(EMUFILE* os)
{
//version
write32le(2,os);
u32 version = 3;
write32le(version,os);
//newer savefile system:
MMU_new.backupDevice.save_state(os);
MMU_timing.arm9codeFetch.savestate(os, version);
MMU_timing.arm9dataFetch.savestate(os, version);
MMU_timing.arm7codeFetch.savestate(os, version);
MMU_timing.arm7dataFetch.savestate(os, version);
MMU_timing.arm9codeCache.savestate(os, version);
MMU_timing.arm9dataCache.savestate(os, version);
}
SFORMAT SF_WIFI[]={
@ -411,13 +419,24 @@ static bool mmu_loadstate(EMUFILE* is, int size)
delete[] temp;
if(is->fail()) return false;
}
else if(version == 2)
{
//newer savefile system:
MMU_new.backupDevice.load_state(is);
}
if(version < 2)
return true;
//newer savefile system:
bool ok = MMU_new.backupDevice.load_state(is);
if(version < 3)
return ok;
ok &= MMU_timing.arm9codeFetch.loadstate(is, version);
ok &= MMU_timing.arm9dataFetch.loadstate(is, version);
ok &= MMU_timing.arm7codeFetch.loadstate(is, version);
ok &= MMU_timing.arm7dataFetch.loadstate(is, version);
ok &= MMU_timing.arm9codeCache.loadstate(is, version);
ok &= MMU_timing.arm9dataCache.loadstate(is, version);
return ok;
}
static void cp15_saveone(armcp15_t *cp15, EMUFILE* os)

View File

@ -29,6 +29,7 @@
#include "MMU.h"
#include "NDSSystem.h"
#include "thumb_instructions.h"
#include "MMU_timing.h"
#include <assert.h>
#define cpu (&ARMPROC)
@ -547,7 +548,7 @@ TEMPLATE static u32 FASTCALL OP_LDR_PCREL(const u32 i)
cpu->R[REG_NUM(cpu->instruction, 8)] = READ32(cpu->mem_if->data, adr);
return 3 + MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,32,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_STR_REG_OFF(const u32 i)
@ -555,7 +556,7 @@ TEMPLATE static u32 FASTCALL OP_STR_REG_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 6)] + cpu->R[REG_NUM(i, 3)];
WRITE32(cpu->mem_if->data, adr, cpu->R[REG_NUM(i, 0)]);
return 2 + MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,32,MMU_AD_WRITE>(2, adr);
}
TEMPLATE static u32 FASTCALL OP_STRH_REG_OFF(const u32 i)
@ -563,7 +564,7 @@ TEMPLATE static u32 FASTCALL OP_STRH_REG_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + cpu->R[REG_NUM(i, 6)];
WRITE16(cpu->mem_if->data, adr, ((u16)cpu->R[REG_NUM(i, 0)]));
return 2 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,16,MMU_AD_WRITE>(2, adr);
}
TEMPLATE static u32 FASTCALL OP_STRB_REG_OFF(const u32 i)
@ -571,7 +572,7 @@ TEMPLATE static u32 FASTCALL OP_STRB_REG_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + cpu->R[REG_NUM(i, 6)];
WRITE8(cpu->mem_if->data, adr, ((u8)cpu->R[REG_NUM(i, 0)]));
return 2 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,8,MMU_AD_WRITE>(2, adr);
}
TEMPLATE static u32 FASTCALL OP_LDRSB_REG_OFF(const u32 i)
@ -579,7 +580,7 @@ TEMPLATE static u32 FASTCALL OP_LDRSB_REG_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + cpu->R[REG_NUM(i, 6)];
cpu->R[REG_NUM(i, 0)] = (s32)((s8)READ8(cpu->mem_if->data, adr));
return 3 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,8,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_LDR_REG_OFF(const u32 i)
@ -591,7 +592,7 @@ TEMPLATE static u32 FASTCALL OP_LDR_REG_OFF(const u32 i)
tempValue = (tempValue>>adr) | (tempValue<<(32-adr));
cpu->R[REG_NUM(i, 0)] = tempValue;
return 3 + MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,32,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_LDRH_REG_OFF(const u32 i)
@ -599,7 +600,7 @@ TEMPLATE static u32 FASTCALL OP_LDRH_REG_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + cpu->R[REG_NUM(i, 6)];
cpu->R[REG_NUM(i, 0)] = (u32)READ16(cpu->mem_if->data, adr);
return 3 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,16,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_LDRB_REG_OFF(const u32 i)
@ -607,7 +608,7 @@ TEMPLATE static u32 FASTCALL OP_LDRB_REG_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + cpu->R[REG_NUM(i, 6)];
cpu->R[REG_NUM(i, 0)] = (u32)READ8(cpu->mem_if->data, adr);
return 3 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,8,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_LDRSH_REG_OFF(const u32 i)
@ -615,7 +616,7 @@ TEMPLATE static u32 FASTCALL OP_LDRSH_REG_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + cpu->R[REG_NUM(i, 6)];
cpu->R[REG_NUM(i, 0)] = (s32)((s16)READ16(cpu->mem_if->data, adr));
return 3 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,16,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_STR_IMM_OFF(const u32 i)
@ -623,7 +624,7 @@ TEMPLATE static u32 FASTCALL OP_STR_IMM_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + ((i>>4)&0x7C);
WRITE32(cpu->mem_if->data, adr, cpu->R[REG_NUM(i, 0)]);
return 2 + MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,32,MMU_AD_WRITE>(2, adr);
}
TEMPLATE static u32 FASTCALL OP_LDR_IMM_OFF(const u32 i)
@ -634,7 +635,7 @@ TEMPLATE static u32 FASTCALL OP_LDR_IMM_OFF(const u32 i)
tempValue = (tempValue>>adr) | (tempValue<<(32-adr));
cpu->R[REG_NUM(i, 0)] = tempValue;
return 3 + MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,32,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_STRB_IMM_OFF(const u32 i)
@ -642,7 +643,7 @@ TEMPLATE static u32 FASTCALL OP_STRB_IMM_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + ((i>>6)&0x1F);
WRITE8(cpu->mem_if->data, adr, (u8)cpu->R[REG_NUM(i, 0)]);
return 2 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,8,MMU_AD_WRITE>(2, adr);
}
TEMPLATE static u32 FASTCALL OP_LDRB_IMM_OFF(const u32 i)
@ -650,7 +651,7 @@ TEMPLATE static u32 FASTCALL OP_LDRB_IMM_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + ((i>>6)&0x1F);
cpu->R[REG_NUM(i, 0)] = READ8(cpu->mem_if->data, adr);
return 3 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,8,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_STRH_IMM_OFF(const u32 i)
@ -658,7 +659,7 @@ TEMPLATE static u32 FASTCALL OP_STRH_IMM_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + ((i>>5)&0x3E);
WRITE16(cpu->mem_if->data, adr, (u16)cpu->R[REG_NUM(i, 0)]);
return 2 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,16,MMU_AD_WRITE>(2, adr);
}
TEMPLATE static u32 FASTCALL OP_LDRH_IMM_OFF(const u32 i)
@ -666,7 +667,7 @@ TEMPLATE static u32 FASTCALL OP_LDRH_IMM_OFF(const u32 i)
u32 adr = cpu->R[REG_NUM(i, 3)] + ((i>>5)&0x3E);
cpu->R[REG_NUM(i, 0)] = READ16(cpu->mem_if->data, adr);
return 3 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,16,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_STR_SPREL(const u32 i)
@ -674,7 +675,7 @@ TEMPLATE static u32 FASTCALL OP_STR_SPREL(const u32 i)
u32 adr = cpu->R[13] + ((i&0xFF)<<2);
WRITE32(cpu->mem_if->data, adr, cpu->R[REG_NUM(i, 8)]);
return 2 + MMU.MMU_WAIT16[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,16,MMU_AD_WRITE>(2, adr);
}
TEMPLATE static u32 FASTCALL OP_LDR_SPREL(const u32 i)
@ -682,7 +683,7 @@ TEMPLATE static u32 FASTCALL OP_LDR_SPREL(const u32 i)
u32 adr = cpu->R[13] + ((i&0xFF)<<2);
cpu->R[REG_NUM(i, 8)] = READ32(cpu->mem_if->data, adr);
return 3 + MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
return MMU_aluMemAccessCycles<PROCNUM,32,MMU_AD_READ>(3, adr);
}
TEMPLATE static u32 FASTCALL OP_ADD_2PC(const u32 i)
@ -722,12 +723,12 @@ TEMPLATE static u32 FASTCALL OP_PUSH(const u32 i)
if(BIT_N(i, 7-j))
{
WRITE32(cpu->mem_if->data, adr, cpu->R[7-j]);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_WRITE>(adr);
adr -= 4;
}
cpu->R[13] = adr + 4;
return c + 3;
return MMU_aluMemCycles<PROCNUM>(3, c);
}
TEMPLATE static u32 FASTCALL OP_PUSH_LR(const u32 i)
@ -736,19 +737,19 @@ TEMPLATE static u32 FASTCALL OP_PUSH_LR(const u32 i)
u32 c = 0, j;
WRITE32(cpu->mem_if->data, adr, cpu->R[14]);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_WRITE>(adr);
adr -= 4;
for(j = 0; j<8; ++j)
if(BIT_N(i, 7-j))
{
WRITE32(cpu->mem_if->data, adr, cpu->R[7-j]);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_WRITE>(adr);
adr -= 4;
}
cpu->R[13] = adr + 4;
return c + 4;
return MMU_aluMemCycles<PROCNUM>(4, c);
}
TEMPLATE static u32 FASTCALL OP_POP(const u32 i)
@ -760,12 +761,12 @@ TEMPLATE static u32 FASTCALL OP_POP(const u32 i)
if(BIT_N(i, j))
{
cpu->R[j] = READ32(cpu->mem_if->data, adr);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_READ>(adr);
adr += 4;
}
cpu->R[13] = adr;
return c + 2;
return MMU_aluMemCycles<PROCNUM>(2, c);
}
TEMPLATE static u32 FASTCALL OP_POP_PC(const u32 i)
@ -778,12 +779,12 @@ TEMPLATE static u32 FASTCALL OP_POP_PC(const u32 i)
if(BIT_N(i, j))
{
cpu->R[j] = READ32(cpu->mem_if->data, adr);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_READ>(adr);
adr += 4;
}
v = READ32(cpu->mem_if->data, adr);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_READ>(adr);
cpu->R[15] = v & 0xFFFFFFFE;
cpu->next_instruction = v & 0xFFFFFFFE;
if(PROCNUM==0)
@ -791,7 +792,7 @@ TEMPLATE static u32 FASTCALL OP_POP_PC(const u32 i)
adr += 4;
cpu->R[13] = adr;
return c + 5;
return MMU_aluMemCycles<PROCNUM>(5, c);
}
TEMPLATE static u32 FASTCALL OP_BKPT_THUMB(const u32 i)
@ -808,11 +809,11 @@ TEMPLATE static u32 FASTCALL OP_STMIA_THUMB(const u32 i)
if(BIT_N(i, j))
{
WRITE32(cpu->mem_if->data, adr, cpu->R[j]);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_WRITE>(adr);
adr += 4;
}
cpu->R[REG_NUM(i, 8)] = adr;
return c + 2;
return MMU_aluMemCycles<PROCNUM>(2, c);
}
TEMPLATE static u32 FASTCALL OP_LDMIA_THUMB(const u32 i)
@ -825,7 +826,7 @@ TEMPLATE static u32 FASTCALL OP_LDMIA_THUMB(const u32 i)
if(BIT_N(i, j))
{
cpu->R[j] = READ32(cpu->mem_if->data, adr);
c += MMU.MMU_WAIT32[PROCNUM][(adr>>24)&0xF];
c += MMU_memAccessCycles<PROCNUM,32,MMU_AD_READ>(adr);
adr += 4;
}
@ -833,7 +834,7 @@ TEMPLATE static u32 FASTCALL OP_LDMIA_THUMB(const u32 i)
if(!BIT_N(i, regIndex))
cpu->R[regIndex] = adr;
return c + 3;
return MMU_aluMemCycles<PROCNUM>(3, c);
}
TEMPLATE static u32 FASTCALL OP_B_COND(const u32 i)

View File

@ -1378,6 +1378,10 @@
RelativePath="..\MMU.h"
>
</File>
<File
RelativePath="..\MMU_timing.h"
>
</File>
<File
RelativePath="..\movie.cpp"
>

View File

@ -804,6 +804,10 @@
RelativePath="..\MMU.h"
>
</File>
<File
RelativePath="..\MMU_timing.h"
>
</File>
<File
RelativePath="..\movie.cpp"
>

View File

@ -308,6 +308,7 @@
<ClInclude Include="..\memorystream.h" />
<ClInclude Include="..\mic.h" />
<ClInclude Include="..\MMU.h" />
<ClInclude Include="..\MMU_timing.h" />
<ClInclude Include="..\movie.h" />
<ClInclude Include="..\NDSSystem.h" />
<ClInclude Include="..\OGLRender.h" />

View File

@ -388,6 +388,9 @@
<ClInclude Include="..\MMU.h">
<Filter>Core</Filter>
</ClInclude>
<ClInclude Include="..\MMU_timing.h">
<Filter>Core</Filter>
</ClInclude>
<ClInclude Include="..\movie.h">
<Filter>Core</Filter>
</ClInclude>

View File

@ -559,6 +559,8 @@
RelativePath="..\MMU.cpp"/>
<File
RelativePath="..\MMU.h"/>
<File
RelativePath="..\MMU_timing.h"/>
<File
RelativePath="..\movie.cpp"/>
<File

View File

@ -805,6 +805,10 @@
RelativePath="..\MMU.h"
>
</File>
<File
RelativePath="..\MMU_timing.h"
>
</File>
<File
RelativePath="..\movie.cpp"
>

View File

@ -1934,7 +1934,6 @@ int _main()
CommonSettings.showGpu.main = GetPrivateProfileInt("Display", "MainGpu", 1, IniName) != 0;
CommonSettings.showGpu.sub = GetPrivateProfileInt("Display", "SubGpu", 1, IniName) != 0;
lostFocusPause = GetPrivateProfileBool("Focus", "BackgroundPause", false, IniName);
CommonSettings.armFastFetchExecute = GetPrivateProfileBool("Emulation", "FetchExecute", false, IniName);
//Get Ram-Watch values
RWSaveWindowPos = GetPrivateProfileBool("RamWatch", "SaveWindowPos", false, IniName);
@ -3039,8 +3038,6 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM
//Gray the recent ROM menu item if there are no recent ROMs
DesEnableMenuItem(mainMenu, ID_FILE_RECENTROM, RecentRoms.size()>0);
DesEnableMenuItem(mainMenu, IDC_FASTFETCHEXECUTE, movieMode == MOVIEMODE_INACTIVE);
//Updated Checked menu items
//Pause
@ -3116,7 +3113,6 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM
MainWindow->checkMenu(IDC_STATEREWINDING, staterewindingenabled == 1 );
MainWindow->checkMenu(IDC_BACKGROUNDPAUSE, lostFocusPause);
MainWindow->checkMenu(IDC_FASTFETCHEXECUTE, CommonSettings.armFastFetchExecute);
//Save type
const int savelist[] = {IDC_SAVETYPE1,IDC_SAVETYPE2,IDC_SAVETYPE3,IDC_SAVETYPE4,IDC_SAVETYPE5,IDC_SAVETYPE6,IDC_SAVETYPE7};
@ -4118,11 +4114,6 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM
WritePrivateProfileInt("Focus", "BackgroundPause", (int)lostFocusPause, IniName);
return 0;
case IDC_FASTFETCHEXECUTE:
CommonSettings.armFastFetchExecute = !CommonSettings.armFastFetchExecute;
WritePrivateProfileInt("Emulation", "FetchExecute", (int)CommonSettings.armFastFetchExecute, IniName);
return 0;
case IDC_SAVETYPE1: backup_setManualBackupType(0); return 0;
case IDC_SAVETYPE2: backup_setManualBackupType(1); return 0;
case IDC_SAVETYPE3: backup_setManualBackupType(2); return 0;

View File

@ -781,8 +781,6 @@
#define IDC_HKCOMBO 60077
#define IDD_KEYCUSTOM 60078
#define IDM_HOTKEY_CONFIG 60079
#define IDC_FASTFETCHEXECUTE 60080
#define IDM_RENDER_HQ2XS 60081
#define IDM_RENDER_LQ2X 60082
#define IDM_RENDER_LQ2XS 60083

Binary file not shown.