From 602da31591ec1f6461315470567b045e456e4e72 Mon Sep 17 00:00:00 2001
From: zeromus <zeromus@users.noreply.github.com>
Date: Tue, 16 Aug 2016 14:16:59 -0500
Subject: [PATCH] Revert "(MMU_timings.h) Style nits/cleanups"

This reverts commit 04c8b0915e85da85a8f08a7b2ad6814d95b6618f.
---
 desmume/src/MMU_timing.h | 244 ++++++++++++++++++++-------------------
 1 file changed, 127 insertions(+), 117 deletions(-)
diff --git a/desmume/src/MMU_timing.h b/desmume/src/MMU_timing.h
index 9952d27e4..a9520c10f 100644
--- a/desmume/src/MMU_timing.h
+++ b/desmume/src/MMU_timing.h
@@ -29,37 +29,39 @@
 #include "debug.h"
 #include "NDSSystem.h"
 
-/*
- * MEMORY TIMING ACCURACY CONFIGURATION
- *
- * the more of these are enabled,
- * the more accurate memory access timing _should_ become.
- * they should be listed roughly in order of most to least important.
- * it's reasonable to disable some of these as a speed hack.
- * obviously, these defines don't cover all the variables or features needed,
- * and in particular, DMA or code+data access bus contention is still missing. */
+////////////////////////////////////////////////////////////////
+// MEMORY TIMING ACCURACY CONFIGURATION
+//
+// the more of these are enabled,
+// the more accurate memory access timing _should_ become.
+// they should be listed roughly in order of most to least important.
+// it's reasonable to disable some of these as a speed hack.
+// obviously, these defines don't cover all the variables or features needed,
+// and in particular, DMA or code+data access bus contention is still missing.
 
-/* disable this to prevent the advanced timing logic from ever running at all */
+	//disable this to prevent the advanced timing logic from ever running at all
 #define ENABLE_ADVANCED_TIMING
 
 #ifdef ENABLE_ADVANCED_TIMING
-/* makes non-sequential accesses slower than sequential ones. */
+	// makes non-sequential accesses slower than sequential ones.
 #define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
-/* (SOMETIMES THIS IS A BIG SPEED HIT!) */
+	//(SOMETIMES THIS IS A BIG SPEED HIT!)
 
-/* enables emulation of code fetch waits. */
+	// enables emulation of code fetch waits.
 #define ACCOUNT_FOR_CODE_FETCH_CYCLES
 
-/* makes access to DTCM (arm9 only) fast. */
+	// makes access to DTCM (arm9 only) fast.
 #define ACCOUNT_FOR_DATA_TCM_SPEED
 
-/* enables simulation of cache hits and cache misses. */
+	// enables simulation of cache hits and cache misses.
 #define ENABLE_CACHE_CONTROLLER_EMULATION
 
-#endif /* ENABLE_ADVANCED_TIMING */
+#endif //ENABLE_ADVANCED_TIMING
 
-FORCEINLINE bool USE_TIMING(void)
-{ 
+//
+////////////////////////////////////////////////////////////////
+
+FORCEINLINE bool USE_TIMING() { 
 #ifdef ENABLE_ADVANCED_TIMING
 	return CommonSettings.advanced_timing;
 #else
@@ -67,16 +69,16 @@ FORCEINLINE bool USE_TIMING(void)
 #endif
 }
 
+
 enum MMU_ACCESS_DIRECTION
 {
-	MMU_AD_READ = 0,
-   MMU_AD_WRITE
+	MMU_AD_READ, MMU_AD_WRITE
 };
 
 
-/* note that we don't actually emulate the cache contents here,
- * only enough to guess what would be a cache hit or a cache miss.
- * this doesn't really get used unless ENABLE_CACHE_CONTROLLER_EMULATION is defined. */
+// note that we don't actually emulate the cache contents here,
+// only enough to guess what would be a cache hit or a cache miss.
+// this doesn't really get used unless ENABLE_CACHE_CONTROLLER_EMULATION is defined.
 template<int SIZESHIFT, int ASSOCIATIVESHIFT, int BLOCKSIZESHIFT>
 class CacheController
 {
@@ -87,13 +89,13 @@ public:
 		u32 blockMasked = addr & BLOCKMASK;
 		if(blockMasked == m_cacheCache)
 			return true;
-      return this->CachedInternal<DIR>(addr, blockMasked);
+		else
+			return this->CachedInternal<DIR>(addr, blockMasked);
 	}
 	
 	void Reset()
 	{
-      unsigned blockIndex;
-		for(blockIndex = 0; blockIndex < NUMBLOCKS; blockIndex++)
+		for(int blockIndex = 0; blockIndex < NUMBLOCKS; blockIndex++)
 			m_blocks[blockIndex].Reset();
 		m_cacheCache = ~0;
 	}
@@ -104,24 +106,20 @@ public:
 	
 	void savestate(EMUFILE* os, int version)
 	{
-      unsigned i;
 		write32le(m_cacheCache, os);
-		for(i = 0; i < NUMBLOCKS; i++)
+		for(int i = 0; i < NUMBLOCKS; i++)
 		{
-         unsigned j;
-			for(j = 0; j < ASSOCIATIVITY; j++)
+			for(int j = 0; j < ASSOCIATIVITY; j++)
 				write32le(m_blocks[i].tag[j],os);
 			write32le(m_blocks[i].nextWay,os);
 		}
 	}
 	bool loadstate(EMUFILE* is, int version)
 	{
-      unsigned i;
 		read32le(&m_cacheCache, is);
-		for(i = 0; i < NUMBLOCKS; i++)
+		for(int i = 0; i < NUMBLOCKS; i++)
 		{
-         unsigned j;
-			for(j = 0; j < ASSOCIATIVITY; j++)
+			for(int j = 0; j < ASSOCIATIVITY; j++)
 				read32le(&m_blocks[i].tag[j],is);
 			read32le(&m_blocks[i].nextWay,is);
 		}
@@ -132,24 +130,20 @@ private:
 	template<MMU_ACCESS_DIRECTION DIR>
 	bool CachedInternal(u32 addr, u32 blockMasked)
 	{
-      unsigned way;
 		u32 blockIndex = blockMasked >> BLOCKSIZESHIFT;
 		CacheBlock& block = m_blocks[blockIndex];
 		addr &= TAGMASK;
 
-		for(way = 0; way < ASSOCIATIVITY; way++)
-      {
-         if(addr != block.tag[way])
-            continue;
-
-         /* found it, already allocated */
-         m_cacheCache = blockMasked;
-         return true;
-      }
-
+		for(int way = 0; way < ASSOCIATIVITY; way++)
+			if(addr == block.tag[way])
+			{
+				// found it, already allocated
+				m_cacheCache = blockMasked;
+				return true;
+			}
 		if(DIR == MMU_AD_READ)
 		{
-			/* TODO: support other allocation orders? */
+			// TODO: support other allocation orders?
 			block.tag[block.nextWay++] = addr;
 			block.nextWay %= ASSOCIATIVITY;
 			m_cacheCache = blockMasked;
@@ -176,14 +170,13 @@ private:
 
 		void Reset()
 		{
-         unsigned way;
 			nextWay = 0;
-			for(way = 0; way < ASSOCIATIVITY; way++)
+			for(int way = 0; way < ASSOCIATIVITY; way++)
 				tag[way] = 0;
 		}
 	};
 
-	u32 m_cacheCache; /* optimization */
+	u32 m_cacheCache; // optimization
 
 	CacheBlock m_blocks [NUMBLOCKS];
 };
@@ -207,7 +200,9 @@ public:
 		#endif
 		
 		if(AT == MMU_AT_CODE && !prohibit)
+		{
 			return 1;
+		}
 
 		u32 time = _MMU_accesstime<PROCNUM, AT, READSIZE, DIRECTION,TIMING>(address,
 #ifdef ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
@@ -253,12 +248,12 @@ private:
 
 struct MMU_struct_timing
 {
-	/* technically part of the cp15, but I didn't want the dereferencing penalty.
-	 * these template values correspond with the value of armcp15->cacheType. */
-	CacheController<13,2,5> arm9codeCache; /* 8192 bytes, 4-way associative, 32-byte blocks */
-	CacheController<12,2,5> arm9dataCache; /* 4096 bytes, 4-way associative, 32-byte blocks */
+	// technically part of the cp15, but I didn't want the dereferencing penalty.
+	// these template values correspond with the value of armcp15->cacheType.
+	CacheController<13,2,5> arm9codeCache; // 8192 bytes, 4-way associative, 32-byte blocks
+	CacheController<12,2,5> arm9dataCache; // 4096 bytes, 4-way associative, 32-byte blocks
 
-	/* technically part of armcpu_t, but that struct isn't templated on PROCNUM */
+	// technically part of armcpu_t, but that struct isn't templated on PROCNUM
 	FetchAccessUnit<0,MMU_AT_CODE> arm9codeFetch;
 	FetchAccessUnit<0,MMU_AT_DATA> arm9dataFetch;
 	FetchAccessUnit<1,MMU_AT_CODE> arm7codeFetch;
@@ -275,17 +270,19 @@ template<> FORCEINLINE FetchAccessUnit<1,MMU_AT_DATA>& MMU_struct_timing::armDat
 
 extern MMU_struct_timing MMU_timing;
 
-/* calculates the time a single memory access takes,
- * in units of cycles of the current processor.
- * this function replaces what used to be MMU_WAIT16 and MMU_WAIT32.
- * this may have side effects, so don't call it more than necessary. */
+
+
+// calculates the time a single memory access takes,
+// in units of cycles of the current processor.
+// this function replaces what used to be MMU_WAIT16 and MMU_WAIT32.
+// this may have side effects, so don't call it more than necessary.
 template<int PROCNUM, MMU_ACCESS_TYPE AT, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION, bool TIMING>
 FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 {
-	static const int MC   = 1; /* cached or tcm memory speed */
-	static const int M32  = (PROCNUM==ARMCPU_ARM9) ? 2 : 1; /* access through 32-bit bus */
-	static const int M16  = M32 * ((READSIZE>16) ? 2 : 1); /* access through 16-bit bus */
-	static const int MSLW = M16 * 8; /* this needs tuning */
+	static const int MC = 1; // cached or tcm memory speed
+	static const int M32 = (PROCNUM==ARMCPU_ARM9) ? 2 : 1; // access through 32-bit bus
+	static const int M16 = M32 * ((READSIZE>16) ? 2 : 1); // access through 16-bit bus
+	static const int MSLW = M16 * 8; // this needs tuning
 
 	if(PROCNUM==ARMCPU_ARM9 && AT == MMU_AT_CODE && addr < 0x02000000)
 		return MC; // ITCM
@@ -295,7 +292,7 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 		return MC; // DTCM
 #endif
 
-	/* for now, assume the cache is always enabled for all of main memory */
+	// for now, assume the cache is always enabled for all of main memory
 	if(AT != MMU_AT_DMA && TIMING && PROCNUM==ARMCPU_ARM9 && (addr & 0x0F000000) == 0x02000000)
 	{
 #ifdef ENABLE_CACHE_CONTROLLER_EMULATION
@@ -308,15 +305,15 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 			return MC;
 		u32 c;
 		if(sequential && AT==MMU_AT_DATA)
-			c = M16; /* bonus for sequential data access */
+			c = M16; // bonus for sequential data access
 		else if(DIRECTION == MMU_AD_READ)
 			c = M16 * 5;
 		else
-			c = M16 * 2; /* should be 4, but write buffer isn't emulated yet. */
+			c = M16 * 2; // should be 4, but write buffer isn't emulated yet.
 		if(DIRECTION == MMU_AD_READ)
 		{
-			/* cache miss while reading means it has to fill a whole cache line
-			 * by reading 32 bytes... */
+			// cache miss while reading means it has to fill a whole cache line
+			// by reading 32 bytes...
 			c += 8 * M32*2;
 		}
 
@@ -329,9 +326,9 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 
 		return c;
 #elif defined(ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS)
-		/* this is the closest approximation I could find
-		 * to the with-cache-controller timing
-		 * that doesn't do any actual caching logic. */
+		// this is the closest approximation I could find
+		// to the with-cache-controller timing
+		// that doesn't do any actual caching logic.
 		return sequential ? MC : M16;
 #endif
 	}
@@ -350,22 +347,29 @@ FORCEINLINE u32 _MMU_accesstime(u32 addr, bool sequential)
 	if(TIMING && !sequential)
 	{
 		//if(c != MC || PROCNUM==ARMCPU_ARM7) // check not needed anymore because ITCM/DTCM return earlier
-      c += (PROCNUM==ARMCPU_ARM9) ? 3*2 : 1;
+		{
+			c += (PROCNUM==ARMCPU_ARM9) ? 3*2 : 1;
+		}
 	}
 #endif
 
 	return c;
 }
 
-/* calculates the cycle time of a single memory access in the MEM stage.
- * to be used to calculate the memCycles argument for MMU_aluMemCycles.
- * this may have side effects, so don't call it more than necessary. */
+
+
+
+
+// calculates the cycle time of a single memory access in the MEM stage.
+// to be used to calculate the memCycles argument for MMU_aluMemCycles.
+// this may have side effects, so don't call it more than necessary.
 template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION, bool TIMING>
 FORCEINLINE u32 MMU_memAccessCycles(u32 addr)
 {
 	if(TIMING)
 		return MMU_timing.armDataFetch<PROCNUM>().template Fetch<READSIZE,DIRECTION,true>((addr)&(~((READSIZE>>3)-1)));
-   return MMU_timing.armDataFetch<PROCNUM>().template Fetch<READSIZE,DIRECTION,false>((addr)&(~((READSIZE>>3)-1)));
+	else
+		return MMU_timing.armDataFetch<PROCNUM>().template Fetch<READSIZE,DIRECTION,false>((addr)&(~((READSIZE>>3)-1)));
 }
 
 template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
@@ -373,44 +377,48 @@ FORCEINLINE u32 MMU_memAccessCycles(u32 addr)
 {
 	if(USE_TIMING())
 		return MMU_memAccessCycles<PROCNUM,READSIZE,DIRECTION,true>(addr);
-   return MMU_memAccessCycles<PROCNUM,READSIZE,DIRECTION,false>(addr);
+	else
+		return MMU_memAccessCycles<PROCNUM,READSIZE,DIRECTION,false>(addr);
 }
 
-/* calculates the cycle time of a single code fetch in the FETCH stage
- * to be used to calculate the fetchCycles argument for MMU_fetchExecuteCycles.
- * this may have side effects, so don't call it more than necessary. */
+// calculates the cycle time of a single code fetch in the FETCH stage
+// to be used to calculate the fetchCycles argument for MMU_fetchExecuteCycles.
+// this may have side effects, so don't call it more than necessary.
 template<int PROCNUM, int READSIZE>
 FORCEINLINE u32 MMU_codeFetchCycles(u32 addr)
 {
 	if(USE_TIMING())
 		return MMU_timing.armCodeFetch<PROCNUM>().template Fetch<READSIZE,MMU_AD_READ,true>((addr)&(~((READSIZE>>3)-1)));
-   return MMU_timing.armCodeFetch<PROCNUM>().template Fetch<READSIZE,MMU_AD_READ,false>((addr)&(~((READSIZE>>3)-1)));
+	else
+		return MMU_timing.armCodeFetch<PROCNUM>().template Fetch<READSIZE,MMU_AD_READ,false>((addr)&(~((READSIZE>>3)-1)));
 }
 
-/* calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
- * given ALU cycle time and the summation of multiple memory access cycle times.
- * this function might belong more in armcpu, but I don't think it matters. */
+// calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
+// given ALU cycle time and the summation of multiple memory access cycle times.
+// this function might belong more in armcpu, but I don't think it matters.
 template<int PROCNUM>
 FORCEINLINE u32 MMU_aluMemCycles(u32 aluCycles, u32 memCycles)
 {
-   if(PROCNUM==ARMCPU_ARM9)
-   {
-      /* ALU and MEM are different stages of the 5-stage pipeline.
-       * we approximate the pipeline throughput using max,
-       * since simply adding the cycles of each instruction together
-       * fails to take into account the parallelism of the arm pipeline
-       * and would make the emulated system unnaturally slow. */
-      return std::max(aluCycles, memCycles);
-   }
-
-   /* ALU and MEM are part of the same stage of the 3-stage pipeline,
-    * thus they occur in sequence and we can simply add the counts together. */
-   return aluCycles + memCycles;
+	if(PROCNUM==ARMCPU_ARM9)
+	{
+		// ALU and MEM are different stages of the 5-stage pipeline.
+		// we approximate the pipeline throughput using max,
+		// since simply adding the cycles of each instruction together
+		// fails to take into account the parallelism of the arm pipeline
+		// and would make the emulated system unnaturally slow.
+		return std::max(aluCycles, memCycles);
+	}
+	else
+	{
+		// ALU and MEM are part of the same stage of the 3-stage pipeline,
+		// thus they occur in sequence and we can simply add the counts together.
+		return aluCycles + memCycles;
+	}
 }
 
-/* calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
- * given ALU cycle time and the description of a single memory access.
- * this may have side effects, so don't call it more than necessary. */
+// calculates the cycle contribution of ALU + MEM stages (= EXECUTE)
+// given ALU cycle time and the description of a single memory access.
+// this may have side effects, so don't call it more than necessary.
 template<int PROCNUM, int READSIZE, MMU_ACCESS_DIRECTION DIRECTION>
 FORCEINLINE u32 MMU_aluMemAccessCycles(u32 aluCycles, u32 addr)
 {
@@ -421,28 +429,30 @@ FORCEINLINE u32 MMU_aluMemAccessCycles(u32 aluCycles, u32 addr)
 	return MMU_aluMemCycles<PROCNUM>(aluCycles, memCycles);
 }
 
-/* calculates the cycle contribution of FETCH + EXECUTE stages
- * given executeCycles = the combined ALU+MEM cycles
- *     and fetchCycles = the cycle time of the FETCH stage
- * this function might belong more in armcpu, but I don't think it matters. */
+// calculates the cycle contribution of FETCH + EXECUTE stages
+// given executeCycles = the combined ALU+MEM cycles
+//     and fetchCycles = the cycle time of the FETCH stage
+// this function might belong more in armcpu, but I don't think it matters.
 template<int PROCNUM>
 FORCEINLINE u32 MMU_fetchExecuteCycles(u32 executeCycles, u32 fetchCycles)
 {
-#ifdef ACCOUNT_FOR_CODE_FETCH_CYCLES
-   if(USE_TIMING())
-   {
-      /* execute and fetch are different stages of the pipeline for both arm7 and arm9.
-       * again, we approximate the pipeline throughput using max. */
-      return std::max(executeCycles, fetchCycles);
-      /* TODO: add an option to support conflict between MEM and FETCH cycles
-       *  if they're both using the same data bus.
-       *  in the case of a conflict this should be:
-       *  return std::max(aluCycles, memCycles + fetchCycles);
-       */
-   }
-#endif
+	#ifdef ACCOUNT_FOR_CODE_FETCH_CYCLES
+	const bool allow = true;
+	#else
+	const bool allow = false;
+	#endif
 
-   return executeCycles;
+	if(USE_TIMING() && allow)
+	{
+		// execute and fetch are different stages of the pipeline for both arm7 and arm9.
+		// again, we approximate the pipeline throughput using max.
+		return std::max(executeCycles, fetchCycles);
+		// TODO: add an option to support conflict between MEM and FETCH cycles
+		//  if they're both using the same data bus.
+		//  in the case of a conflict this should be:
+		//  return std::max(aluCycles, memCycles + fetchCycles);
+	}
+	return executeCycles;
 }