From e2fcf5eadb8cd84946c34f797a5a60581b14d7a6 Mon Sep 17 00:00:00 2001 From: zeromus Date: Sun, 21 Oct 2012 19:44:16 +0000 Subject: [PATCH] support WRAMCNT memory mapping, and do some other code cleanup and documentation --- desmume/src/MMU.cpp | 152 +++++++++++++++++++++++++++++--------- desmume/src/MMU.h | 4 +- desmume/src/NDSSystem.cpp | 44 ++++++----- desmume/src/SPU.cpp | 2 +- desmume/src/saves.cpp | 10 ++- 5 files changed, 157 insertions(+), 55 deletions(-) diff --git a/desmume/src/MMU.cpp b/desmume/src/MMU.cpp index d101bd3f6..d64839164 100644 --- a/desmume/src/MMU.cpp +++ b/desmume/src/MMU.cpp @@ -311,16 +311,85 @@ static const TVramBankInfo vram_bank_info[VRAM_BANKS] = { //in order to play nicely with the MMU address and mask tables #define LCDC_HACKY_LOCATION 0x06000000 +#define ARM7_HACKY_IWRAM_LOCATION 0x03800000 +#define ARM7_HACKY_SIWRAM_LOCATION 0x03000000 + //maps an ARM9 BG/OBJ or LCDC address into an LCDC address, and informs the caller of whether it isn't mapped //TODO - in cases where this does some mapping work, we could bypass the logic at the end of the _read* and _write* routines //this is a good optimization to consider +//NOTE - this whole approach is probably fundamentally wrong. +//according to dasShiny research, its possible to map multiple banks to the same addresses. something more sophisticated would be needed. +//however, it hasnt proven necessary yet for any known test case. template static FORCEINLINE u32 MMU_LCDmap(u32 addr, bool& unmapped, bool& restricted) { unmapped = false; restricted = false; //this will track whether 8bit writes are allowed - //in case the address is entirely outside of the interesting ranges + //handle SIWRAM and non-shared IWRAM in here too, since it is quite similar to vram. + //in fact it is probably implemented with the same pieces of hardware. + //its sort of like arm7 non-shared IWRAM is lowest priority, and then SIWRAM goes on top. + //however, we implement it differently than vram in emulator for historical reasons. + //instead of keeping a page map like we do vram, we just have a list of all possible page maps (there are only 4 each for arm9 and arm7) + if(addr >= 0x03000000 && addr < 0x04000000) + { + //blocks 0,1,2,3 is arm7 non-shared IWRAM and blocks 4,5 is SIWRAM, and block 8 is un-mapped zeroes + int iwram_block_16k; + int iwram_offset = addr & 0x3FFF; + addr &= 0x00FFFFFF; + if(PROCNUM == ARMCPU_ARM7) + { + static const int arm7_siwram_blocks[2][4][4] = + { + { + {0,1,2,3}, //WRAMCNT = 0 -> map to IWRAM + {4,4,4,4}, //WRAMCNT = 1 -> map to SIWRAM block 0 + {5,5,5,5}, //WRAMCNT = 2 -> map to SIWRAM block 1 + {4,5,4,5}, //WRAMCNT = 3 -> map to SIWRAM blocks 0,1 + }, + //high region; always maps to non-shared IWRAM + { + {0,1,2,3}, {0,1,2,3}, {0,1,2,3}, {0,1,2,3} + } + }; + int region = (addr >> 23)&1; + int block = (addr >> 14)&3; + assert(region<2); + assert(block<4); + iwram_block_16k = arm7_siwram_blocks[region][MMU.WRAMCNT][block]; + } //PROCNUM == ARMCPU_ARM7 + else + { + //PROCNUM == ARMCPU_ARM9 + static const int arm9_siwram_blocks[4][4] = + { + {4,5,4,5}, //WRAMCNT = 0 -> map to SIWRAM blocks 0,1 + {5,5,5,5}, //WRAMCNT = 1 -> map to SIWRAM block 1 + {4,4,4,4}, //WRAMCNT = 2 -> map to SIWRAM block 0 + {8,8,8,8}, //WRAMCNT = 3 -> unmapped + }; + int block = (addr >> 14)&3; + assert(block<4); + iwram_block_16k = arm9_siwram_blocks[MMU.WRAMCNT][block]; + } + + switch(iwram_block_16k>>2) + { + case 0: //arm7 non-shared IWRAM + return ARM7_HACKY_IWRAM_LOCATION + (iwram_block_16k<<14) + iwram_offset; + case 1: //SIWRAM + return ARM7_HACKY_SIWRAM_LOCATION + ((iwram_block_16k&3)<<14) + iwram_offset; + case 2: //zeroes + CASE2: + unmapped = true; + return 0; + default: + assert(false); //how did this happen? + goto CASE2; + } + } + + //in case the address is entirely outside of the interesting VRAM ranges if(addr < 0x06000000) return addr; if(addr >= 0x07000000) return addr; @@ -343,6 +412,7 @@ static FORCEINLINE u32 MMU_LCDmap(u32 addr, bool& unmapped, bool& restricted) restricted = true; //handle LCD memory mirroring + //TODO - this is gross! this should be renovated if the vram mapping is ever done in a more sophisticated way taking into account dasShiny research if(addr>=0x068A4000) addr = 0x06800000 + //(addr%0xA4000); //yuck!! is this even how it mirrors? but we have to keep from overrunning the buffer somehow @@ -713,9 +783,13 @@ void MMU_VRAM_unmap_all() static inline void MMU_VRAMmapControl(u8 block, u8 VRAMBankCnt) { - //dont handle wram mappings in here - if(block == 7) { - //wram + //handle WRAM, first of all + if(block == 7) + { + MMU.WRAMCNT = VRAMBankCnt & 3; + //copy new value into WRAMSTAT + //TODO - block user writes to WRAMSTAT + T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x241, MMU.WRAMCNT); return; } @@ -2331,12 +2405,7 @@ void FASTCALL _MMU_ARM9_write08(u32 adr, u8 val) MMU.AUX_SPI_CNT &= ~0x80; //remove busy flag return; - case REG_WRAMCNT: - /* Update WRAMSTAT at the ARM7 side */ - T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x241, val); - break; - - case REG_POWCNT1: writereg_POWCNT1(8,adr,val); break; + case REG_POWCNT1: writereg_POWCNT1(8,adr,val); break; case REG_DISPA_DISP3DCNT: writereg_DISP3DCNT(8,adr,val); return; case REG_DISPA_DISP3DCNT+1: writereg_DISP3DCNT(8,adr,val); return; @@ -2358,10 +2427,12 @@ void FASTCALL _MMU_ARM9_write08(u32 adr, u8 val) case REG_VRAMCNTE: case REG_VRAMCNTF: case REG_VRAMCNTG: + case REG_WRAMCNT: case REG_VRAMCNTH: case REG_VRAMCNTI: MMU_VRAMmapControl(adr-REG_VRAMCNTA, val); break; + case REG_DISPA_DISPMMEMFIFO: { DISP_FIFOsend(val); @@ -2736,14 +2807,7 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val) case REG_VRAMCNTA: case REG_VRAMCNTC: case REG_VRAMCNTE: - MMU_VRAMmapControl(adr-REG_VRAMCNTA, val & 0xFF); - MMU_VRAMmapControl(adr-REG_VRAMCNTA+1, val >> 8); - break; case REG_VRAMCNTG: - MMU_VRAMmapControl(adr-REG_VRAMCNTA, val & 0xFF); - /* Update WRAMSTAT at the ARM7 side */ - T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x241, val >> 8); - break; case REG_VRAMCNTH: MMU_VRAMmapControl(adr-REG_VRAMCNTA, val & 0xFF); MMU_VRAMmapControl(adr-REG_VRAMCNTA+1, val >> 8); @@ -3159,17 +3223,11 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val) return; case REG_VRAMCNTA: - MMU_VRAMmapControl(adr-REG_VRAMCNTA, val & 0xFF); - MMU_VRAMmapControl(adr-REG_VRAMCNTA+1, (val >> 8) & 0xFF); - MMU_VRAMmapControl(adr-REG_VRAMCNTA+2, (val >> 16) & 0xFF); - MMU_VRAMmapControl(adr-REG_VRAMCNTA+3, (val >> 24) & 0xFF); - break; case REG_VRAMCNTE: MMU_VRAMmapControl(adr-REG_VRAMCNTA, val & 0xFF); MMU_VRAMmapControl(adr-REG_VRAMCNTA+1, (val >> 8) & 0xFF); MMU_VRAMmapControl(adr-REG_VRAMCNTA+2, (val >> 16) & 0xFF); - /* Update WRAMSTAT at the ARM7 side */ - T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x241, (val >> 24) & 0xFF); + MMU_VRAMmapControl(adr-REG_VRAMCNTA+3, (val >> 24) & 0xFF); break; case REG_VRAMCNTH: MMU_VRAMmapControl(adr-REG_VRAMCNTA, val & 0xFF); @@ -3337,6 +3395,9 @@ u8 FASTCALL _MMU_ARM9_read08(u32 adr) case REG_IF+2: return (MMU.gen_IF()>>16); case REG_IF+3: return (MMU.gen_IF()>>24); + case REG_WRAMCNT: + return MMU.WRAMCNT; + case REG_DISPA_DISPSTAT: break; case REG_DISPA_DISPSTAT+1: @@ -3456,6 +3517,10 @@ u16 FASTCALL _MMU_ARM9_read16(u32 adr) // ============================================= 3D end case REG_IME : return (u16)MMU.reg_IME[ARMCPU_ARM9]; + + //WRAMCNT is readable but VRAMCNT is not, so just return WRAM's value + case REG_VRAMCNTG: + return MMU.WRAMCNT << 8; case REG_IE : return (u16)MMU.reg_IE[ARMCPU_ARM9]; @@ -3539,7 +3604,12 @@ u32 FASTCALL _MMU_ARM9_read32(u32 adr) case REG_DISPA_DISPSTAT: break; - case REG_DISPx_VCOUNT: return nds.VCount; + case REG_DISPx_VCOUNT: + return nds.VCount; + + //WRAMCNT is readable but VRAMCNT is not, so just return WRAM's value + case REG_VRAMCNTE: + return MMU.WRAMCNT << 24; //despite these being 16bit regs, //Dolphin Island Underwater Adventures uses this amidst seemingly reasonable divs so we're going to emulate it. @@ -4211,11 +4281,11 @@ void FASTCALL _MMU_ARM7_write32(u32 adr, u32 val) return; } - if ((adr>=0x04000400)&&(adr<0x04000520)) - { - SPU_WriteLong(adr, val); - return; - } + if ((adr>=0x04000400)&&(adr<0x04000520)) + { + SPU_WriteLong(adr, val); + return; + } if((adr>>24)==4) { @@ -4326,10 +4396,10 @@ u8 FASTCALL _MMU_ARM7_read08(u32 adr) else return addon.read08(ARMCPU_ARM7,adr); } - if ((adr>=0x04000400)&&(adr<0x04000520)) - { - return SPU_ReadByte(adr); - } + if ((adr>=0x04000400)&&(adr<0x04000520)) + { + return SPU_ReadByte(adr); + } if (adr == REG_RTC) return (u8)rtcRead(); @@ -4348,6 +4418,8 @@ u8 FASTCALL _MMU_ARM7_read08(u32 adr) case REG_DISPx_VCOUNT: return nds.VCount&0xFF; case REG_DISPx_VCOUNT+1: return (nds.VCount>>8)&0xFF; + + case REG_WRAMSTAT: return MMU.WRAMCNT; } return MMU.MMU_MEM[ARMCPU_ARM7][adr>>20][adr&MMU.MMU_MASK[ARMCPU_ARM7][adr>>20]]; @@ -4357,7 +4429,7 @@ u8 FASTCALL _MMU_ARM7_read08(u32 adr) adr = MMU_LCDmap(adr,unmapped, restricted); if(unmapped) return 0; - return MMU.MMU_MEM[ARMCPU_ARM7][adr>>20][adr&MMU.MMU_MASK[ARMCPU_ARM7][adr>>20]]; + return MMU.MMU_MEM[ARMCPU_ARM7][adr>>20][adr&MMU.MMU_MASK[ARMCPU_ARM7][adr>>20]]; } //================================================= MMU ARM7 read 16 u16 FASTCALL _MMU_ARM7_read16(u32 adr) @@ -4424,6 +4496,11 @@ u16 FASTCALL _MMU_ARM7_read16(u32 adr) case REG_TM3CNTL : return read_timer(ARMCPU_ARM7,(adr&0xF)>>2); + case REG_VRAMSTAT: + //make sure WRAMSTAT is stashed and then fallthrough to return the value from memory. i know, gross. + T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x241, MMU.WRAMCNT); + break; + case REG_AUXSPICNT: return MMU.AUX_SPI_CNT; @@ -4519,7 +4596,12 @@ u32 FASTCALL _MMU_ARM7_read32(u32 adr) case REG_GCDATAIN: return MMU_readFromGC(); + case REG_VRAMSTAT: + //make sure WRAMSTAT is stashed and then fallthrough return the value from memory. i know, gross. + T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x241, MMU.WRAMCNT); + break; } + return T1ReadLong_guaranteedAligned(MMU.MMU_MEM[ARMCPU_ARM7][adr>>20], adr & MMU.MMU_MASK[ARMCPU_ARM7][adr>>20]); } diff --git a/desmume/src/MMU.h b/desmume/src/MMU.h index d0f3c5d6e..85d56f1e1 100644 --- a/desmume/src/MMU.h +++ b/desmume/src/MMU.h @@ -366,7 +366,7 @@ struct MMU_struct u32 LCD_VRAM_ADDR[10]; u8 LCDCenable[10]; - //Shared ram + //32KB of shared WRAM - can be switched between ARM7 & ARM9 in two blocks u8 SWIRAM[0x8000]; //Card rom & ram @@ -420,6 +420,8 @@ struct MMU_struct u16 AUX_SPI_CNT; u16 AUX_SPI_CMD; + u8 WRAMCNT; + u64 gfx3dCycles; u8 powerMan_CntReg; diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp index f8e438719..7fcbf0b43 100644 --- a/desmume/src/NDSSystem.cpp +++ b/desmume/src/NDSSystem.cpp @@ -2197,8 +2197,6 @@ void NDS_Reset() { singleStep = false; nds_debug_continuing[0] = nds_debug_continuing[1] = false; - u32 src = 0; - u32 dst = 0; bool fw_success = false; FILE* inf = NULL; NDS_header * header = NDS_getROMHeader(); @@ -2428,16 +2426,21 @@ void NDS_Reset() } firmware = new CFIRMWARE(); fw_success = firmware->load(); + if (NDS_ARM7.BIOS_loaded && NDS_ARM9.BIOS_loaded && CommonSettings.BootFromFirmware && fw_success) { - // Copy secure area to memory if needed + //Copy secure area to memory if needed. + //could we get a comment about what's going on here? + //how does this stuff get copied before anything ever even runs? + //does it get mapped straight to the rom somehow? + //This code could be made more clear too. if ((header->ARM9src >= 0x4000) && (header->ARM9src < 0x8000)) { - src = header->ARM9src; - dst = header->ARM9cpy; + u32 src = header->ARM9src; + u32 dst = header->ARM9cpy; u32 size = (0x8000 - src) >> 2; - //INFO("Copy secure area from 0x%08X to 0x%08X (size %i/0x%08X)\n", src, dst, size, size); + for (u32 i = 0; i < size; i++) { _MMU_write32(dst, T1ReadLong(MMU.CART_ROM, src)); @@ -2445,6 +2448,7 @@ void NDS_Reset() } } + //TODO someone describe why here if (firmware->patched) { armcpu_init(&NDS_ARM7, 0x00000008); @@ -2452,23 +2456,22 @@ void NDS_Reset() } else { - //INFO("Booting at ARM9: 0x%08X, ARM7: 0x%08X\n", firmware->ARM9bootAddr, firmware->ARM7bootAddr); - // need for firmware - //armcpu_init(&NDS_ARM7, 0x00000008); - //armcpu_init(&NDS_ARM9, 0xFFFF0008); + //set the cpus to an initial state with their respective firmware program entrypoints armcpu_init(&NDS_ARM7, firmware->ARM7bootAddr); armcpu_init(&NDS_ARM9, firmware->ARM9bootAddr); } - // REG_POSTFLG - MMU.ARM9_REG[0x300] = 0; - MMU.ARM7_REG[0x300] = 0; + //set REG_POSTFLG to the value indicating pre-firmware status + MMU.ARM9_REG[0x300] = 0; + MMU.ARM7_REG[0x300] = 0; } else { - src = header->ARM9src; - dst = header->ARM9cpy; + //fake firmware boot-up process + //copy the arm9 program to the address specified by rom header + u32 src = header->ARM9src; + u32 dst = header->ARM9cpy; for(u32 i = 0; i < (header->ARM9binSize>>2); ++i) { _MMU_write32(dst, T1ReadLong(MMU.CART_ROM, src)); @@ -2476,9 +2479,9 @@ void NDS_Reset() src += 4; } + //copy the arm7 program to the address specified by rom header src = header->ARM7src; dst = header->ARM7cpy; - for(u32 i = 0; i < (header->ARM7binSize>>2); ++i) { _MMU_write32(dst, T1ReadLong(MMU.CART_ROM, src)); @@ -2486,13 +2489,20 @@ void NDS_Reset() src += 4; } + //set the cpus to an initial state with their respective programs entrypoints armcpu_init(&NDS_ARM7, header->ARM7exe); armcpu_init(&NDS_ARM9, header->ARM9exe); + + //TODO reading REG_WRAMSTAT ( + //according to smea, this is initialized to 3. who does this? we're doing it here because we're not sure if the firmware depends on it + //but it mustve been done by the time the game boots, unless it was libnds doing it. + _MMU_write08(REG_WRAMCNT,3); - // REG_POSTFLG + //set REG_POSTFLG to the value indicating post-firmware status MMU.ARM9_REG[0x300] = 1; MMU.ARM7_REG[0x300] = 1; } + // only ARM9 have co-processor reconstruct(&cp15); cp15.reset(&NDS_ARM9); diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index f2921b104..715b77e9f 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -62,7 +62,7 @@ int SPU_currentCoreNum = SNDCORE_DUMMY; static int volume = 100; -static int buffersize = 0; +static size_t buffersize = 0; static ESynchMode synchmode = ESynchMode_DualSynchAsynch; static ESynchMethod synchmethod = ESynchMethod_N; diff --git a/desmume/src/saves.cpp b/desmume/src/saves.cpp index a3a3e78d3..f0266fb52 100644 --- a/desmume/src/saves.cpp +++ b/desmume/src/saves.cpp @@ -216,6 +216,8 @@ SFORMAT SF_MMU[]={ { "MASX", 1, 2, &MMU.AUX_SPI_CNT}, { "MASC", 1, 2, &MMU.AUX_SPI_CMD}, + { "MWRA", 1, 2, &MMU.WRAMCNT}, + { "MDV1", 4, 1, &MMU.divRunning}, { "MDV2", 8, 1, &MMU.divResult}, { "MDV3", 8, 1, &MMU.divMod}, @@ -272,7 +274,7 @@ SFORMAT SF_MOVIE[]={ static void mmu_savestate(EMUFILE* os) { - u32 version = 6; + u32 version = 7; write32le(version,os); //version 2: @@ -455,6 +457,12 @@ static bool mmu_loadstate(EMUFILE* is, int size) MMU_new.dsi_tsc.load_state(is); + if(version < 7) + { + //recover WRAMCNT from the stashed WRAMSTAT memory location + MMU.WRAMCNT = MMU.MMU_MEM[ARMCPU_ARM7][0x40][0x241]; + } + return ok; }