(MMU_timings.h) Style nits/cleanups

2016-08-16 17:42:12 +02:00 · 2016-08-16 17:42:12 +02:00 · 09879832b3
parent a5ab52c3fb
commit 09879832b3
1 changed files with 117 additions and 127 deletions
--- a/desmume/src/MMU_timing.h
+++ b/desmume/src/MMU_timing.h
@ -29,39 +29,37 @@
 #include "debug.h"
 #include "NDSSystem.h"

-////////////////////////////////////////////////////////////////
-// MEMORY TIMING ACCURACY CONFIGURATION
-//
-// the more of these are enabled,
-// the more accurate memory access timing _should_ become.
-// they should be listed roughly in order of most to least important.
-// it's reasonable to disable some of these as a speed hack.
-// obviously, these defines don't cover all the variables or features needed,
-// and in particular, DMA or code+data access bus contention is still missing.
+/*
+ * MEMORY TIMING ACCURACY CONFIGURATION
+ *
+ * the more of these are enabled,
+ * the more accurate memory access timing _should_ become.
+ * they should be listed roughly in order of most to least important.
+ * it's reasonable to disable some of these as a speed hack.
+ * obviously, these defines don't cover all the variables or features needed,
+ * and in particular, DMA or code+data access bus contention is still missing. */

-	//disable this to prevent the advanced timing logic from ever running at all
+/* disable this to prevent the advanced timing logic from ever running at all */
 #define ENABLE_ADVANCED_TIMING

 #ifdef ENABLE_ADVANCED_TIMING
-	// makes non-sequential accesses slower than sequential ones.
+/* makes non-sequential accesses slower than sequential ones. */
 #define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
-	//(SOMETIMES THIS IS A BIG SPEED HIT!)
+/* (SOMETIMES THIS IS A BIG SPEED HIT!) */

-	// enables emulation of code fetch waits.
+/* enables emulation of code fetch waits. */
 #define ACCOUNT_FOR_CODE_FETCH_CYCLES

-	// makes access to DTCM (arm9 only) fast.
+/* makes access to DTCM (arm9 only) fast. */
 #define ACCOUNT_FOR_DATA_TCM_SPEED

-	// enables simulation of cache hits and cache misses.
+/* enables simulation of cache hits and cache misses. */
 #define ENABLE_CACHE_CONTROLLER_EMULATION

-#endif //ENABLE_ADVANCED_TIMING
+#endif /* ENABLE_ADVANCED_TIMING */

-//
-////////////////////////////////////////////////////////////////
-
-FORCEINLINE bool USE_TIMING() { 
+FORCEINLINE bool USE_TIMING(void)
+{ 
 #ifdef ENABLE_ADVANCED_TIMING
 	return CommonSettings.advanced_timing;
 #else
@ -69,16 +67,16 @@ FORCEINLINE bool USE_TIMING() {
 #endif
 }

-
 enum MMU_ACCESS_DIRECTION
 {
-	MMU_AD_READ, MMU_AD_WRITE
+	MMU_AD_READ = 0,
+   MMU_AD_WRITE
 };


-// note that we don't actually emulate the cache contents here,
-// only enough to guess what would be a cache hit or a cache miss.
-// this doesn't really get used unless ENABLE_CACHE_CONTROLLER_EMULATION is defined.
+/* note that we don't actually emulate the cache contents here,
+ * only enough to guess what would be a cache hit or a cache miss.
+ * this doesn't really get used unless ENABLE_CACHE_CONTROLLER_EMULATION is defined. */
 template<int SIZESHIFT, int ASSOCIATIVESHIFT, int BLOCKSIZESHIFT>
 class CacheController
 {
@ -89,13 +87,13 @@ public:
 		u32 blockMasked = addr & BLOCKMASK;
 		if(blockMasked == m_cacheCache)
 			return true;
-		else
-			return this->CachedInternal<DIR>(addr, blockMasked);
+      return this->CachedInternal<DIR>(addr, blockMasked);
 	}
 	
 	void Reset()
 	{
-		for(int blockIndex = 0; blockIndex < NUMBLOCKS; blockIndex++)
+      unsigned blockIndex;
+		for(blockIndex = 0; blockIndex < NUMBLOCKS; blockIndex++)
 			m_blocks[blockIndex].Reset();
 		m_cacheCache = ~0;
 	}
@ -106,20 +104,24 @@ public:
 	
 	void savestate(EMUFILE* os, int version)
 	{
+      unsigned i;
 		write32le(m_cacheCache, os);
-		for(int i = 0; i < NUMBLOCKS; i++)
+		for(i = 0; i < NUMBLOCKS; i++)
 		{
-			for(int j = 0; j < ASSOCIATIVITY; j++)
+         unsigned j;
+			for(j = 0; j < ASSOCIATIVITY; j++)
 				write32le(m_blocks[i].tag[j],os);
 			write32le(m_blocks[i].nextWay,os);
 		}
 	}
 	bool loadstate(EMUFILE* is, int version)
 	{
+      unsigned i;
 		read32le(&m_cacheCache, is);
-		for(int i = 0; i < NUMBLOCKS; i++)
+		for(i = 0; i < NUMBLOCKS; i++)
 		{
-			for(int j = 0; j < ASSOCIATIVITY; j++)
+         unsigned j;
+			for(j = 0; j < ASSOCIATIVITY; j++)
 				read32le(&m_blocks[i].tag[j],is);
 			read32le(&m_blocks[i].nextWay,is);
 		}
@ -130,20 +132,24 @@ private:
 	template<MMU_ACCESS_DIRECTION DIR>
 	bool CachedInternal(u32 addr, u32 blockMasked)
 	{
+      unsigned way;
 		u32 blockIndex = blockMasked >> BLOCKSIZESHIFT;
 		CacheBlock& block = m_blocks[blockIndex];
 		addr &= TAGMASK;

-		for(int way = 0; way < ASSOCIATIVITY; way++)
-			if(addr == block.tag[way])
-			{
-				// found it, already allocated
-				m_cacheCache = blockMasked;
-				return true;
-			}
+		for(way = 0; way < ASSOCIATIVITY; way++)
+      {
+         if(addr != block.tag[way])
+            continue;
+
+         /* found it, already allocated */
+         m_cacheCache = blockMasked;
+         return true;
+      }
+
 		if(DIR == MMU_AD_READ)
 		{
-			// TODO: support other allocation orders?
+			/* TODO: support other allocation orders? */
 			block.tag[block.nextWay++] = addr;
 			block.nextWay %= ASSOCIATIVITY;
 			m_cacheCache = blockMasked;
@ -170,13 +176,14 @@ private:

 		void Reset()
 		{
+         unsigned way;
 			nextWay = 0;
-			for(int way = 0; way < ASSOCIATIVITY; way++)
+			for(way = 0; way < ASSOCIATIVITY; way++)
 				tag[way] = 0;
 		}
 	};

-	u32 m_cacheCache; // optimization
+	u32 m_cacheCache; /* optimization */

 	CacheBlock m_blocks [NUMBLOCKS];
 };
@ -200,9 +207,7 @@ public:
 		#endif
 		
 		if(AT == MMU_AT_CODE && !prohibit)
-		{
 			return 1;
-		}

 		u32 time = _MMU_accesstime<PROCNUM, AT, READSIZE, DIRECTION,TIMING>(address,
 #ifdef ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
@ -248,12 +253,12 @@ private:

 struct MMU_struct_timing
 {
-	// technically part of the cp15, but I didn't want the dereferencing penalty.
-	// these template values correspond with the value of armcp15->cacheType.
-	CacheController<13,2,5> arm9codeCache; // 8192 bytes, 4-way associative, 32-byte blocks
-	CacheController<12,2,5> arm9dataCache; // 4096 bytes, 4-way associative, 32-byte blocks
+	/* technically part of the cp15, but I didn't want the dereferencing penalty.
+	 * these template values correspond with the value of armcp15->cacheType. */
+	CacheController<13,2,5> arm9codeCache; /* 8192 bytes, 4-way associative, 32-byte blocks */
+	CacheController<12,2,5> arm9dataCache; /* 4096 bytes, 4-way associative, 32-byte blocks */

-	// technically part of armcpu_t, but that struct isn't templated on PROCNUM
+	/* technically part of armcpu_t, but that struct isn't templated on PROCNUM */
 	FetchAccessUnit<0,MMU_AT_CODE> arm9codeFetch;
 	FetchAccessUnit<0,MMU_AT_DATA> arm9dataFetch;
 	FetchAccessUnit<1,MMU_AT_CODE> arm7codeFetch;
@ -270,19 +275,17 @@ template<> FORCEINLINE FetchAccessUnit<1,MMU_AT_DATA>& MMU_struct_timing::armDat

 extern MMU_struct_timing MMU_timing;

-
-
-// calculates the time a single memory access takes,
-// in units of cycles of the current processor.
-// this function replaces what used to be MMU_WAIT16 and MMU_WAIT32.
-// this may have side effects, so don't call it more than necessary.
+/* calculates the time a single memory access takes,
+ * in units of cycles of the current processor.
+ * this function replaces what used to be MMU_WAIT16 and MMU_WAIT32.
+ * this may have side effects, so don't call it more than necessary. */
 template<int PROCNUM, MMU_ACCESS_TYPE AT, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION, bool TIMING>
 FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 {
-	static const int MC = 1; // cached or tcm memory speed
-	static const int M32 = (PROCNUM==ARMCPU_ARM9) ? 2 : 1; // access through 32-bit bus
-	static const int M16 = M32 * ((READSIZE>16) ? 2 : 1); // access through 16-bit bus
-	static const int MSLW = M16 * 8; // this needs tuning
+	static const int MC   = 1; /* cached or tcm memory speed */
+	static const int M32  = (PROCNUM==ARMCPU_ARM9) ? 2 : 1; /* access through 32-bit bus */
+	static const int M16  = M32 * ((READSIZE>16) ? 2 : 1); /* access through 16-bit bus */
+	static const int MSLW = M16 * 8; /* this needs tuning */

 	if(PROCNUM==ARMCPU_ARM9 && AT == MMU_AT_CODE && addr < 0x02000000)
 		return MC; // ITCM
@ -292,7 +295,7 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 		return MC; // DTCM
 #endif

-	// for now, assume the cache is always enabled for all of main memory
+	/* for now, assume the cache is always enabled for all of main memory */
 	if(AT != MMU_AT_DMA && TIMING && PROCNUM==ARMCPU_ARM9 && (addr & 0x0F000000) == 0x02000000)
 	{
 #ifdef ENABLE_CACHE_CONTROLLER_EMULATION
@ -305,15 +308,15 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 			return MC;
 		u32 c;
 		if(sequential && AT==MMU_AT_DATA)
-			c = M16; // bonus for sequential data access
+			c = M16; /* bonus for sequential data access */
 		else if(DIRECTION == MMU_AD_READ)
 			c = M16 * 5;
 		else
-			c = M16 * 2; // should be 4, but write buffer isn't emulated yet.
+			c = M16 * 2; /* should be 4, but write buffer isn't emulated yet. */
 		if(DIRECTION == MMU_AD_READ)
 		{
-			// cache miss while reading means it has to fill a whole cache line
-			// by reading 32 bytes...
+			/* cache miss while reading means it has to fill a whole cache line
+			 * by reading 32 bytes... */
 			c += 8 * M32*2;
 		}

@ -326,9 +329,9 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)

 		return c;
 #elif defined(ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS)
-		// this is the closest approximation I could find
-		// to the with-cache-controller timing
-		// that doesn't do any actual caching logic.
+		/* this is the closest approximation I could find
+		 * to the with-cache-controller timing
+		 * that doesn't do any actual caching logic. */
 		return sequential ? MC : M16;
 #endif
 	}
@ -347,29 +350,22 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 	if(TIMING && !sequential)
 	{
 		//if(c != MC || PROCNUM==ARMCPU_ARM7) // check not needed anymore because ITCM/DTCM return earlier
-		{
-			c += (PROCNUM==ARMCPU_ARM9) ? 3*2 : 1;
-		}
+      c += (PROCNUM==ARMCPU_ARM9) ? 3*2 : 1;
 	}
 #endif

 	return c;
 }

-
-
-
-
-// calculates the cycle time of a single memory access in the MEM stage.
-// to be used to calculate the memCycles argument for MMU_aluMemCycles.
-// this may have side effects, so don't call it more than necessary.
+/* calculates the cycle time of a single memory access in the MEM stage.
+ * to be used to calculate the memCycles argument for MMU_aluMemCycles.
+ * this may have side effects, so don't call it more than necessary. */
 template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION, bool TIMING>
 FORCEINLINE u32 MMU_memAccessCycles(u32 addr)
 {
 	if(TIMING)
 		return MMU_timing.armDataFetch<PROCNUM>().template Fetch<READSIZE,DIRECTION,true>((addr)&(~((READSIZE>>3)-1)));
-	else
-		return MMU_timing.armDataFetch<PROCNUM>().template Fetch<READSIZE,DIRECTION,false>((addr)&(~((READSIZE>>3)-1)));
+   return MMU_timing.armDataFetch<PROCNUM>().template Fetch<READSIZE,DIRECTION,false>((addr)&(~((READSIZE>>3)-1)));
 }

 template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
@ -377,48 +373,44 @@ FORCEINLINE u32 MMU_memAccessCycles(u32 addr)
 {
 	if(USE_TIMING())
 		return MMU_memAccessCycles<PROCNUM,READSIZE,DIRECTION,true>(addr);
-	else
-		return MMU_memAccessCycles<PROCNUM,READSIZE,DIRECTION,false>(addr);
+   return MMU_memAccessCycles<PROCNUM,READSIZE,DIRECTION,false>(addr);
 }

-// calculates the cycle time of a single code fetch in the FETCH stage
-// to be used to calculate the fetchCycles argument for MMU_fetchExecuteCycles.
-// this may have side effects, so don't call it more than necessary.
+/* calculates the cycle time of a single code fetch in the FETCH stage
+ * to be used to calculate the fetchCycles argument for MMU_fetchExecuteCycles.
+ * this may have side effects, so don't call it more than necessary. */
 template<int PROCNUM, int READSIZE>
 FORCEINLINE u32 MMU_codeFetchCycles(u32 addr)
 {
 	if(USE_TIMING())
 		return MMU_timing.armCodeFetch<PROCNUM>().template Fetch<READSIZE,MMU_AD_READ,true>((addr)&(~((READSIZE>>3)-1)));
-	else
-		return MMU_timing.armCodeFetch<PROCNUM>().template Fetch<READSIZE,MMU_AD_READ,false>((addr)&(~((READSIZE>>3)-1)));
+   return MMU_timing.armCodeFetch<PROCNUM>().template Fetch<READSIZE,MMU_AD_READ,false>((addr)&(~((READSIZE>>3)-1)));
 }

-// calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
-// given ALU cycle time and the summation of multiple memory access cycle times.
-// this function might belong more in armcpu, but I don't think it matters.
+/* calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
+ * given ALU cycle time and the summation of multiple memory access cycle times.
+ * this function might belong more in armcpu, but I don't think it matters. */
 template<int PROCNUM>
 FORCEINLINE u32 MMU_aluMemCycles(u32 aluCycles, u32 memCycles)
 {
-	if(PROCNUM==ARMCPU_ARM9)
-	{
-		// ALU and MEM are different stages of the 5-stage pipeline.
-		// we approximate the pipeline throughput using max,
-		// since simply adding the cycles of each instruction together
-		// fails to take into account the parallelism of the arm pipeline
-		// and would make the emulated system unnaturally slow.
-		return std::max(aluCycles, memCycles);
-	}
-	else
-	{
-		// ALU and MEM are part of the same stage of the 3-stage pipeline,
-		// thus they occur in sequence and we can simply add the counts together.
-		return aluCycles + memCycles;
-	}
+   if(PROCNUM==ARMCPU_ARM9)
+   {
+      /* ALU and MEM are different stages of the 5-stage pipeline.
+       * we approximate the pipeline throughput using max,
+       * since simply adding the cycles of each instruction together
+       * fails to take into account the parallelism of the arm pipeline
+       * and would make the emulated system unnaturally slow. */
+      return std::max(aluCycles, memCycles);
+   }
+
+   /* ALU and MEM are part of the same stage of the 3-stage pipeline,
+    * thus they occur in sequence and we can simply add the counts together. */
+   return aluCycles + memCycles;
 }

-// calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
-// given ALU cycle time and the description of a single memory access.
-// this may have side effects, so don't call it more than necessary.
+/* calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
+ * given ALU cycle time and the description of a single memory access.
+ * this may have side effects, so don't call it more than necessary. */
 template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
 FORCEINLINE u32 MMU_aluMemAccessCycles(u32 aluCycles, u32 addr)
 {
@ -429,30 +421,28 @@ FORCEINLINE u32 MMU_aluMemAccessCycles(u32 aluCycles, u32 addr)
 	return MMU_aluMemCycles<PROCNUM>(aluCycles, memCycles);
 }

-// calculates the cycle contribution of FETCH + EXECUTE stages
-// given executeCycles = the combined ALU+MEM cycles
-//     and fetchCycles = the cycle time of the FETCH stage
-// this function might belong more in armcpu, but I don't think it matters.
+/* calculates the cycle contribution of FETCH + EXECUTE stages
+ * given executeCycles = the combined ALU+MEM cycles
+ *     and fetchCycles = the cycle time of the FETCH stage
+ * this function might belong more in armcpu, but I don't think it matters. */
 template<int PROCNUM>
 FORCEINLINE u32 MMU_fetchExecuteCycles(u32 executeCycles, u32 fetchCycles)
 {
-	#ifdef ACCOUNT_FOR_CODE_FETCH_CYCLES
-	const bool allow = true;
-	#else
-	const bool allow = false;
-	#endif
+#ifdef ACCOUNT_FOR_CODE_FETCH_CYCLES
+   if(USE_TIMING())
+   {
+      /* execute and fetch are different stages of the pipeline for both arm7 and arm9.
+       * again, we approximate the pipeline throughput using max. */
+      return std::max(executeCycles, fetchCycles);
+      /* TODO: add an option to support conflict between MEM and FETCH cycles
+       *  if they're both using the same data bus.
+       *  in the case of a conflict this should be:
+       *  return std::max(aluCycles, memCycles + fetchCycles);
+       */
+   }
+#endif

-	if(USE_TIMING() && allow)
-	{
-		// execute and fetch are different stages of the pipeline for both arm7 and arm9.
-		// again, we approximate the pipeline throughput using max.
-		return std::max(executeCycles, fetchCycles);
-		// TODO: add an option to support conflict between MEM and FETCH cycles
-		//  if they're both using the same data bus.
-		//  in the case of a conflict this should be:
-		//  return std::max(aluCycles, memCycles + fetchCycles);
-	}
-	return executeCycles;
+   return executeCycles;
 }