diff --git a/desmume/src/MMU.cpp b/desmume/src/MMU.cpp index 48ac06e24..6e618f367 100644 --- a/desmume/src/MMU.cpp +++ b/desmume/src/MMU.cpp @@ -1,7 +1,4 @@ /* Copyright (C) 2006 yopyop - yopyop156@ifrance.com - yopyop156.ifrance.com - Copyright (C) 2007 shash Copyright (C) 2007-2009 DeSmuME team @@ -935,12 +932,10 @@ void MMU_Reset() MMU.divRunning = 0; MMU.divResult = 0; MMU.divMod = 0; - MMU.divCnt = 0; MMU.divCycles = 0; MMU.sqrtRunning = 0; MMU.sqrtResult = 0; - MMU.sqrtCnt = 0; MMU.sqrtCycles = 0; MMU.SPI_CNT = 0; @@ -1026,31 +1021,36 @@ char txt[80]; static void execsqrt() { u32 ret; - u16 cnt = T1ReadWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2B0); + u8 mode = MMU_new.sqrt.mode; + MMU_new.sqrt.busy = 1; - if (cnt&1) { + if (mode) { u64 v = T1ReadQuad(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2B8); ret = (u32)isqrt(v); } else { u32 v = T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2B8); ret = (u32)isqrt(v); } + + //clear the result while the sqrt unit is busy + //todo - is this right? is it reasonable? T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2B4, 0); - T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2B0, cnt | 0x8000); MMU.sqrtCycles = nds_timer + 26; MMU.sqrtResult = ret; - MMU.sqrtCnt = (cnt & 0x7FFF); MMU.sqrtRunning = TRUE; NDS_Reschedule(); } static void execdiv() { - u16 cnt = T1ReadWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x280); + s64 num,den; s64 res,mod; + u8 mode = MMU_new.div.mode; + MMU_new.div.busy = 1; + MMU_new.div.div0 = 0; - switch(cnt&3) + switch(mode) { case 0: num = (s64) (s32) T1ReadLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x290); @@ -1075,14 +1075,12 @@ static void execdiv() { { res = ((num < 0) ? 1 : -1); mod = num; - cnt |= 0x4000; - cnt &= 0x7FFF; + MMU_new.div.div0 = 1; } else { res = num / den; mod = num % den; - cnt &= 0x3FFF; } DIVLOG("DIV %08X%08X / %08X%08X = %08X%08X\r\n", (u32)(num>>32), (u32)num, @@ -1093,11 +1091,9 @@ static void execdiv() { T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2A4, 0); T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2A8, 0); T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2AC, 0); - T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x280, ((cnt & 0xBFFF) | 0x8000)); MMU.divResult = res; MMU.divMod = mod; - MMU.divCnt = (cnt & 0x7FFF); MMU.divRunning = TRUE; NDS_Reschedule(); } @@ -2141,6 +2137,11 @@ void FASTCALL _MMU_ARM9_write08(u32 adr, u8 val) switch(adr) { + case REG_SQRTCNT: printf("ERROR 8bit SQRTCNT WRITE\n"); return; + case REG_SQRTCNT+1: printf("ERROR 8bit SQRTCNT WRITE\n"); return; + case REG_SQRTCNT+2: printf("ERROR 8bit SQRTCNT WRITE\n"); return; + case REG_SQRTCNT+3: printf("ERROR 8bit SQRTCNT WRITE\n"); return; + case REG_DISPA_DISP3DCNT: { u32 &disp3dcnt = MainScreen.gpu->dispx_st->dispA_DISP3DCNT.val; @@ -2406,40 +2407,50 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val) } // Alpha test reference value - Parameters:1 - case 0x04000340: + case eng_3D_ALPHA_TEST_REF: { ((u16 *)(MMU.MMU_MEM[ARMCPU_ARM9][0x40]))[0x340>>1] = val; gfx3d_glAlphaFunc(val); return; } // Clear background color setup - Parameters:2 - case 0x04000350: + case eng_3D_CLEAR_COLOR: { ((u16 *)(MMU.MMU_MEM[ARMCPU_ARM9][0x40]))[0x350>>1] = val; gfx3d_glClearColor(val); return; } // Clear background depth setup - Parameters:2 - case 0x04000354: + case eng_3D_CLEAR_DEPTH: { ((u16 *)(MMU.MMU_MEM[ARMCPU_ARM9][0x40]))[0x354>>1] = val; gfx3d_glClearDepth(val); return; } // Fog Color - Parameters:4b - case 0x04000358: + case eng_3D_FOG_COLOR: { ((u16 *)(MMU.MMU_MEM[ARMCPU_ARM9][0x40]))[0x358>>1] = val; gfx3d_glFogColor(val); return; } - case 0x0400035C: + case eng_3D_FOG_OFFSET: { ((u32 *)(MMU.MMU_MEM[ARMCPU_ARM9][0x40]))[0x35C>>1] = val; gfx3d_glFogOffset(val); return; } + case REG_DIVCNT: + MMU_new.div.write16(val); + execdiv(); + return; + + case REG_SQRTCNT: + MMU_new.sqrt.write16(val); + execsqrt(); + return; + case REG_DISPA_BLDCNT: GPU_setBLDCNT(MainScreen.gpu,val) ; break ; @@ -2924,6 +2935,10 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val) switch(adr) { + case REG_SQRTCNT: printf("ERROR 32bit SQRTCNT WRITE\n"); return; + case REG_DIVCNT: printf("ERROR 32bit DIVCNT WRITE\n"); return; + + case eng_3D_GXSTAT: MMU_new.gxstat.write32(val); break; @@ -3239,6 +3254,15 @@ u8 FASTCALL _MMU_ARM9_read08(u32 adr) switch(adr) { + case REG_SQRTCNT: printf("ERROR 8bit SQRTCNT READ\n"); return 0; + case REG_SQRTCNT+1: printf("ERROR 8bit SQRTCNT READ\n"); return 0; + case REG_SQRTCNT+2: printf("ERROR 8bit SQRTCNT READ\n"); return 0; + case REG_SQRTCNT+3: printf("ERROR 8bit SQRTCNT READ\n"); return 0; + case REG_DIVCNT: printf("ERROR 8bit DIVCNT READ\n"); return 0; + case REG_DIVCNT+1: printf("ERROR 8bit DIVCNT READ\n"); return 0; + case REG_DIVCNT+2: printf("ERROR 8bit DIVCNT READ\n"); return 0; + case REG_DIVCNT+3: printf("ERROR 8bit DIVCNT READ\n"); return 0; + case eng_3D_GXSTAT: return MMU_new.gxstat.read(8,adr); } @@ -3271,8 +3295,9 @@ u16 FASTCALL _MMU_ARM9_read16(u32 adr) // Address is an IO register switch(adr) { - case eng_3D_GXSTAT: - return MMU_new.gxstat.read(16,adr); + case REG_SQRTCNT: return MMU_new.sqrt.read16(); + case REG_DIVCNT: return MMU_new.div.read16(); + case eng_3D_GXSTAT: return MMU_new.gxstat.read(16,adr); // ============================================= 3D case eng_3D_RAM_COUNT: @@ -3352,50 +3377,53 @@ u32 FASTCALL _MMU_ARM9_read32(u32 adr) switch(adr) { - case 0x04000640: - case 0x04000644: - case 0x04000648: - case 0x0400064C: - case 0x04000650: - case 0x04000654: - case 0x04000658: - case 0x0400065C: - case 0x04000660: - case 0x04000664: - case 0x04000668: - case 0x0400066C: - case 0x04000670: - case 0x04000674: - case 0x04000678: - case 0x0400067C: + case REG_SQRTCNT: printf("ERROR 32bit SQRTCNT READ\n"); return 0; + case REG_DIVCNT: printf("ERROR 32bit DIVCNT READ\n"); return 0; + + case eng_3D_CLIPMTX_RESULT: + case eng_3D_CLIPMTX_RESULT+4: + case eng_3D_CLIPMTX_RESULT+8: + case eng_3D_CLIPMTX_RESULT+12: + case eng_3D_CLIPMTX_RESULT+16: + case eng_3D_CLIPMTX_RESULT+20: + case eng_3D_CLIPMTX_RESULT+24: + case eng_3D_CLIPMTX_RESULT+28: + case eng_3D_CLIPMTX_RESULT+32: + case eng_3D_CLIPMTX_RESULT+36: + case eng_3D_CLIPMTX_RESULT+40: + case eng_3D_CLIPMTX_RESULT+44: + case eng_3D_CLIPMTX_RESULT+48: + case eng_3D_CLIPMTX_RESULT+52: + case eng_3D_CLIPMTX_RESULT+56: + case eng_3D_CLIPMTX_RESULT+60: { //LOG("4000640h..67Fh - CLIPMTX_RESULT - Read Current Clip Coordinates Matrix (R)"); return gfx3d_GetClipMatrix ((adr-0x04000640)/4); } - case 0x04000680: - case 0x04000684: - case 0x04000688: - case 0x0400068C: - case 0x04000690: - case 0x04000694: - case 0x04000698: - case 0x0400069C: - case 0x040006A0: + case eng_3D_VECMTX_RESULT: + case eng_3D_VECMTX_RESULT+4: + case eng_3D_VECMTX_RESULT+8: + case eng_3D_VECMTX_RESULT+12: + case eng_3D_VECMTX_RESULT+16: + case eng_3D_VECMTX_RESULT+20: + case eng_3D_VECMTX_RESULT+24: + case eng_3D_VECMTX_RESULT+28: + case eng_3D_VECMTX_RESULT+32: { //LOG("4000680h..6A3h - VECMTX_RESULT - Read Current Directional Vector Matrix (R)"); return gfx3d_GetDirectionalMatrix ((adr-0x04000680)/4); } - case 0x4000604: + case eng_3D_RAM_COUNT: { return (gfx3d_GetNumPolys()) | ((gfx3d_GetNumVertex()) << 16); //LOG ("read32 - RAM_COUNT -> 0x%X", ((u32 *)(MMU.MMU_MEM[ARMCPU_ARM9][(adr>>20)&0xFF]))[(adr&MMU.MMU_MASK[ARMCPU_ARM9][(adr>>20)&0xFF])>>2]); } - case 0x04000620: - case 0x04000624: - case 0x04000628: - case 0x0400062C: + case eng_3D_POS_RESULT: + case eng_3D_POS_RESULT+4: + case eng_3D_POS_RESULT+8: + case eng_3D_POS_RESULT+12: { return gfx3d_glGetPosRes((adr & 0xF) >> 2); } @@ -3420,18 +3448,8 @@ u32 FASTCALL _MMU_ARM9_read32(u32 adr) u32 val = T1ReadWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], (adr + 2) & 0xFFF); return MMU.timer[ARMCPU_ARM9][(adr&0xF)>>2] | (val<<16); } - /* - case 0x04000640 : // TODO (clear): again, ??? - LOG("read proj\r\n"); - return 0; - case 0x04000680 : - LOG("read roat\r\n"); - return 0; - case 0x04000620 : - LOG("point res\r\n"); - return 0; - */ - case REG_GCDATAIN: + + case REG_GCDATAIN: return MMU_readFromGC(); } return T1ReadLong_guaranteedAligned(MMU.MMU_MEM[ARMCPU_ARM9][0x40], adr & MMU.MMU_MASK[ARMCPU_ARM9][(adr >> 20)]); diff --git a/desmume/src/MMU.h b/desmume/src/MMU.h index a4a63af51..bb7367362 100644 --- a/desmume/src/MMU.h +++ b/desmume/src/MMU.h @@ -1,7 +1,4 @@ /* Copyright (C) 2006 yopyop - yopyop156@ifrance.com - yopyop156.ifrance.com - Copyright (C) 2007 shash Copyright (C) 2007-2009 DeSmuME team @@ -30,6 +27,7 @@ #include "registers.h" #include "mc.h" #include "bits.h" +#include "readwrite.h" #ifdef HAVE_LUA #include "lua-engine.h" #endif @@ -77,7 +75,9 @@ enum EDMADestinationUpdate EDMADestinationUpdate_IncrementReload = 3, }; - +//TODO +//n.b. this may be a bad idea, for complex registers like the dma control register. +//we need to know exactly what part was written to, instead of assuming all 32bits were written. class TRegister_32 { public: @@ -131,6 +131,60 @@ struct TGXSTAT : public TRegister_32 void triggerDma(EDMAMode mode); +class DivController +{ +public: + DivController() + : mode(0), busy(0) + {} + void exec(); + u8 mode, busy, div0; + u16 read16() { return mode|(busy<<15)|(div0<<14); } + void write16(u16 val) { + mode = val&3; + //todo - do we clear the div0 flag here or is that strictly done by the divider unit? + } + void savestate(EMUFILE* os) + { + write8le(&mode,os); + write8le(&busy,os); + write8le(&div0,os); + } + bool loadstate(EMUFILE* is, int version) + { + int ret = 1; + ret &= read8le(&mode,is); + ret &= read8le(&busy,is); + ret &= read8le(&div0,is); + return ret==1; + } +}; + +class SqrtController +{ +public: + SqrtController() + : mode(0), busy(0) + {} + void exec(); + u8 mode, busy; + u16 read16() { return mode|(busy<<15); } + void write16(u16 val) { mode = val&1; } + void savestate(EMUFILE* os) + { + write8le(&mode,os); + write8le(&busy,os); + } + bool loadstate(EMUFILE* is, int version) + { + int ret=1; + ret &= read8le(&mode,is); + ret &= read8le(&busy,is); + return ret==1; + } +}; + + class DmaController { public: @@ -334,12 +388,10 @@ struct MMU_struct BOOL divRunning; s64 divResult; s64 divMod; - u32 divCnt; u64 divCycles; BOOL sqrtRunning; u32 sqrtResult; - u32 sqrtCnt; u64 sqrtCycles; u16 SPI_CNT; @@ -365,6 +417,8 @@ struct MMU_struct_new BackupDevice backupDevice; DmaController dma[2][4]; TGXSTAT gxstat; + SqrtController sqrt; + DivController div; void write_dma(const int proc, const int size, const u32 adr, const u32 val); u32 read_dma(const int proc, const int size, const u32 adr); diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp index 37c72d694..fe8d140a3 100644 --- a/desmume/src/NDSSystem.cpp +++ b/desmume/src/NDSSystem.cpp @@ -1777,11 +1777,11 @@ struct TSequenceItem_divider : public TSequenceItem void exec() { IF_DEVELOPER(DEBUG_statistics.sequencerExecutionCounters[2]++); + MMU_new.div.busy = 0; T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2A0, (u32)MMU.divResult); T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2A4, (u32)(MMU.divResult >> 32)); T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2A8, (u32)MMU.divMod); T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2AC, (u32)(MMU.divMod >> 32)); - T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x280, MMU.divCnt); MMU.divRunning = FALSE; } @@ -1804,8 +1804,8 @@ struct TSequenceItem_sqrtunit : public TSequenceItem FORCEINLINE void exec() { IF_DEVELOPER(DEBUG_statistics.sequencerExecutionCounters[3]++); + MMU_new.sqrt.busy = 0; T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2B4, MMU.sqrtResult); - T1WriteLong(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x2B0, MMU.sqrtCnt); MMU.sqrtRunning = FALSE; } @@ -2254,7 +2254,7 @@ bool nds_loadstate(EMUFILE* is, int size) //#define LOG_ARM9 //#define LOG_ARM7 -//static bool dolog = true; +//bool dolog = true; FORCEINLINE void arm9log() { @@ -2267,8 +2267,8 @@ FORCEINLINE void arm9log() else des_arm_instructions_set[INDEX(NDS_ARM9.instruction)](NDS_ARM9.instruct_adr, NDS_ARM9.instruction, dasmbuf); - printf("%05d %12lld 9:%08X %08X %-30s R00:%08X R01:%08X R02:%08X R03:%08X R04:%08X R05:%08X R06:%08X R07:%08X R08:%08X R09:%08X R10:%08X R11:%08X R12:%08X R13:%08X R14:%08X R15:%08X\n", - currFrameCounter, nds_timer, + printf("%05d:%03d %12lld 9:%08X %08X %-30s R00:%08X R01:%08X R02:%08X R03:%08X R04:%08X R05:%08X R06:%08X R07:%08X R08:%08X R09:%08X R10:%08X R11:%08X R12:%08X R13:%08X R14:%08X R15:%08X\n", + currFrameCounter, nds.VCount, nds_timer, NDS_ARM9.instruct_adr,NDS_ARM9.instruction, dasmbuf, NDS_ARM9.R[0], NDS_ARM9.R[1], NDS_ARM9.R[2], NDS_ARM9.R[3], NDS_ARM9.R[4], NDS_ARM9.R[5], NDS_ARM9.R[6], NDS_ARM9.R[7], NDS_ARM9.R[8], NDS_ARM9.R[9], NDS_ARM9.R[10], NDS_ARM9.R[11], NDS_ARM9.R[12], NDS_ARM9.R[13], NDS_ARM9.R[14], NDS_ARM9.R[15]); @@ -2287,8 +2287,8 @@ FORCEINLINE void arm7log() else des_arm_instructions_set[INDEX(NDS_ARM7.instruction)](NDS_ARM7.instruct_adr, NDS_ARM7.instruction, dasmbuf); - printf("%05d %12lld 7:%08X %08X %-30s R00:%08X R01:%08X R02:%08X R03:%08X R04:%08X R05:%08X R06:%08X R07:%08X R08:%08X R09:%08X R10:%08X R11:%08X R12:%08X R13:%08X R14:%08X R15:%08X\n", - currFrameCounter, nds_timer, + printf("%05d:%03d %12lld 7:%08X %08X %-30s R00:%08X R01:%08X R02:%08X R03:%08X R04:%08X R05:%08X R06:%08X R07:%08X R08:%08X R09:%08X R10:%08X R11:%08X R12:%08X R13:%08X R14:%08X R15:%08X\n", + currFrameCounter, nds.VCount, nds_timer, NDS_ARM7.instruct_adr,NDS_ARM7.instruction, dasmbuf, NDS_ARM7.R[0], NDS_ARM7.R[1], NDS_ARM7.R[2], NDS_ARM7.R[3], NDS_ARM7.R[4], NDS_ARM7.R[5], NDS_ARM7.R[6], NDS_ARM7.R[7], NDS_ARM7.R[8], NDS_ARM7.R[9], NDS_ARM7.R[10], NDS_ARM7.R[11], NDS_ARM7.R[12], NDS_ARM7.R[13], NDS_ARM7.R[14], NDS_ARM7.R[15]); diff --git a/desmume/src/readwrite.h b/desmume/src/readwrite.h index b5b684bf8..e95e16b2e 100644 --- a/desmume/src/readwrite.h +++ b/desmume/src/readwrite.h @@ -28,6 +28,7 @@ //well. just for the sake of consistency int write8le(u8 b, EMUFILE *fp); +inline int write8le(u8* b, EMUFILE *fp) { return write8le(*b,fp); } int write16le(u16 b, EMUFILE* os); int write32le(u32 b, EMUFILE* os); int write64le(u64 b, EMUFILE* os); diff --git a/desmume/src/saves.cpp b/desmume/src/saves.cpp index ab93259fe..c36dec417 100644 --- a/desmume/src/saves.cpp +++ b/desmume/src/saves.cpp @@ -205,12 +205,10 @@ SFORMAT SF_MMU[]={ { "MDV1", 4, 1, &MMU.divRunning}, { "MDV2", 8, 1, &MMU.divResult}, { "MDV3", 8, 1, &MMU.divMod}, - { "MDV4", 4, 1, &MMU.divCnt}, { "MDV5", 8, 1, &MMU.divCycles}, { "MSQ1", 4, 1, &MMU.sqrtRunning}, { "MSQ2", 4, 1, &MMU.sqrtResult}, - { "MSQ3", 4, 1, &MMU.sqrtCnt}, { "MSQ4", 8, 1, &MMU.sqrtCycles}, //begin memory chips @@ -254,7 +252,7 @@ SFORMAT SF_MOVIE[]={ static void mmu_savestate(EMUFILE* os) { - u32 version = 3; + u32 version = 4; write32le(version,os); //version 2: @@ -272,6 +270,10 @@ static void mmu_savestate(EMUFILE* os) MMU_timing.arm7dataFetch.savestate(os, version); MMU_timing.arm9codeCache.savestate(os, version); MMU_timing.arm9dataCache.savestate(os, version); + + //version 4: + MMU_new.sqrt.savestate(os); + MMU_new.div.savestate(os); } SFORMAT SF_WIFI[]={ @@ -414,13 +416,11 @@ static bool mmu_loadstate(EMUFILE* is, int size) if(is->fail()) return false; } - if(version < 2) - return true; + if(version < 2) return true; bool ok = MMU_new.backupDevice.load_state(is); - if(version < 3) - return true; + if(version < 3) return true; ok &= MMU_new.gxstat.loadstate(is); @@ -435,6 +435,11 @@ static bool mmu_loadstate(EMUFILE* is, int size) ok &= MMU_timing.arm9codeCache.loadstate(is, version); ok &= MMU_timing.arm9dataCache.loadstate(is, version); + if(version < 4) return true; + + ok &= MMU_new.sqrt.loadstate(is,version); + ok &= MMU_new.div.loadstate(is,version); + return ok; }