From 6cf200b1e1ab92213130a0a31a4e5918f2ba956f Mon Sep 17 00:00:00 2001 From: zeromus Date: Tue, 22 Sep 2009 03:02:39 +0000 Subject: [PATCH] fix glitches in dma and poor code in command unpacking which were causing some slowdown. clean up ioregview refresh triggering code and add a note about how to cut compile time of gpu.cpp in half. --- desmume/src/GPU.cpp | 8 ++- desmume/src/MMU.cpp | 23 +++--- desmume/src/MMU_timing.h | 1 + desmume/src/NDSSystem.cpp | 13 ++-- desmume/src/driver.h | 7 ++ desmume/src/gfx3d.cpp | 115 +++++++++++++----------------- desmume/src/windows/IORegView.cpp | 3 + desmume/src/windows/main.cpp | 7 ++ 8 files changed, 92 insertions(+), 85 deletions(-) diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index 2359eccab..c38e618da 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -42,6 +42,9 @@ //#define FORCEINLINE //#define SSE2_NOINTRIN +//compilation speed hack (cuts time exactly in half by cutting out permutations) +//#define DISABLE_MOSAIC + extern BOOL click; NDS_Screen MainScreen; NDS_Screen SubScreen; @@ -2133,9 +2136,12 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l) //useful for debugging individual layers //if(gpu->core == 1 || i16 != 2) continue; +#ifndef DISABLE_MOSAIC if(gpu->curr_mosaic_enabled) gpu->modeRender(i16); - else gpu->modeRender(i16); + else +#endif + gpu->modeRender(i16); } //layer enabled } } diff --git a/desmume/src/MMU.cpp b/desmume/src/MMU.cpp index 9a2d3be4a..f3ce67408 100644 --- a/desmume/src/MMU.cpp +++ b/desmume/src/MMU.cpp @@ -47,10 +47,6 @@ #include "readwrite.h" #include "MMU_timing.h" -#ifdef WIN32 -#include "windows/IORegView.h" -#endif - #ifdef DO_ASSERT_UNALIGNED #define ASSERT_UNALIGNED(x) assert(x) #else @@ -767,6 +763,7 @@ static inline void MMU_VRAMmapControl(u8 block, u8 VRAMBankCnt) MMU_VRAMmapRefreshBank(i); //printf(vramConfiguration.describe().c_str()); + //printf("vram remapped at vcount=%d\n",nds.VCount); //if texInfo changed, trigger notifications if(memcmp(&oldTexInfo,&MMU.texInfo,sizeof(MMU_struct::TextureInfo))) @@ -1815,7 +1812,7 @@ u32 TGXSTAT::read32() ret |= gxfifo_irq; //user's irq flags - //printf("Returning gxstat read: %08X\n",ret); + //printf("vc=%03d Returning gxstat read: %08X\n",nds.VCount,ret); return ret; } @@ -1995,10 +1992,7 @@ if(_startmode==0 && wordcount==1) { if(!doNotStart) doSchedule(); - //todo - make a driver stub for this so that we dont have to conditionalize it everywhere -#ifdef WIN32 - RefreshAllIORegViews(); -#endif + driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA); } void DmaController::exec() @@ -2050,7 +2044,7 @@ start: if(triggered) { - //if(procnum==0) printf("%08lld trig type %d dma#%d with words %d at src:%08X dst:%08X gxf:%d",nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size); + //if(procnum==0) printf("vc=%03d %08lld trig type %d dma#%d w/words %d at src:%08X dst:%08X gxf:%d",nds.VCount,nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size); if(saddr ==0x023BCCEC && wordcount==118) { int zzz=9; } @@ -2064,9 +2058,7 @@ start: } } -#ifdef WIN32 - RefreshAllIORegViews(); -#endif + driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA); } void DmaController::doCopy() @@ -2162,6 +2154,7 @@ void triggerDma(EDMAMode mode) void DmaController::tryTrigger(EDMAMode mode) { if(startmode != mode) return; + if(!enable) return; //hmm dont trigger it if its already running! //but paused things need triggers to continue @@ -2696,13 +2689,13 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val) #endif if(val & (1<<15)) { - //LOG("Main core on top\n"); + //printf("Main core on top (vcount=%d)\n",nds.VCount); MainScreen.offset = 0; SubScreen.offset = 192; } else { - //LOG("Main core on bottom\n"); + //printf("Main core on bottom (vcount=%d)\n",nds.VCount); MainScreen.offset = 192; SubScreen.offset = 0; } diff --git a/desmume/src/MMU_timing.h b/desmume/src/MMU_timing.h index cb704d5e0..4f405a8b3 100644 --- a/desmume/src/MMU_timing.h +++ b/desmume/src/MMU_timing.h @@ -44,6 +44,7 @@ // makes non-sequential accesses slower than sequential ones. #define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS + //(SOMETIMES THIS IS A BIG SPEED HIT!) // enables emulation of code fetch waits. #define ACCOUNT_FOR_CODE_FETCH_CYCLES diff --git a/desmume/src/NDSSystem.cpp b/desmume/src/NDSSystem.cpp index 483e41506..f30c4826a 100644 --- a/desmume/src/NDSSystem.cpp +++ b/desmume/src/NDSSystem.cpp @@ -1953,9 +1953,12 @@ static void execHardware_hstart_vcount() u16 vmatch = T1ReadWord(MMU.ARM9_REG, 4); if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8)))) { + //arm9 vmatch T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) | 4); - if(T1ReadWord(MMU.ARM9_REG, 4) & 32) + if(T1ReadWord(MMU.ARM9_REG, 4) & 32) { + //printf("VMATCH FIRING! vc=%03d\n",nds.VCount); NDS_makeARM9Int(2); + } } else T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) & 0xFFFB); @@ -1963,6 +1966,7 @@ static void execHardware_hstart_vcount() vmatch = T1ReadWord(MMU.ARM7_REG, 4); if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8)))) { + //arm7 vmatch T1WriteWord(MMU.ARM7_REG, 4, T1ReadWord(MMU.ARM7_REG, 4) | 4); if(T1ReadWord(MMU.ARM7_REG, 4) & 32) NDS_makeARM7Int(2); @@ -2153,7 +2157,7 @@ bool nds_loadstate(EMUFILE* is, int size) //#define LOG_ARM9 //#define LOG_ARM7 -//static bool dolog = false; +//static bool dolog = true; FORCEINLINE void arm9log() { @@ -2311,8 +2315,8 @@ void NDS_exec(s32 nb) #ifndef NDEBUG //what we find here is dependent on the timing constants above - if(nds_timer>next && (nds_timer-next)>22) - printf("curious. please report: over by %d\n",(int)(nds_timer-next)); + //if(nds_timer>next && (nds_timer-next)>22) + // printf("curious. please report: over by %d\n",(int)(nds_timer-next)); #endif //if we were waiting for an irq, don't wait too long: @@ -2347,6 +2351,7 @@ void execHardware_interrupts() if ( armcpu_irqException(&NDS_ARM9)) #endif { + //printf("ARM9 interrupt! flags: %08X ; mask: %08X ; result: %08X\n",MMU.reg_IF[0],MMU.reg_IE[0],MMU.reg_IF[0]&MMU.reg_IE[0]); //nds.ARM9Cycle = nds.cycles; } } diff --git a/desmume/src/driver.h b/desmume/src/driver.h index a6741065b..ec8c8e9f2 100644 --- a/desmume/src/driver.h +++ b/desmume/src/driver.h @@ -53,6 +53,13 @@ public: virtual bool EMU_IsFastForwarding() { return false; } virtual bool EMU_HasEmulationStarted() { return true; } virtual bool EMU_IsAtFrameBoundary() { return true; } + + enum eDebug_IOReg + { + EDEBUG_IOREG_DMA + }; + + virtual void DEBUG_UpdateIORegView(eDebug_IOReg category) { } }; extern BaseDriver* driver; diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index a504a6635..de8a44fe5 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -96,39 +96,18 @@ public: { reset(); } - void test() { - printf("test 1: 0x00412321\n"); - receive(0x00412321); receive(1); receive(2); receive(3); receive(0); - //21: 1 - //23: 2 - //23: 3 - //41: 4 (a dud parameter) - printf("test 2: 0x00002321\n"); - receive(0x00002321); receive(1); receive(2); receive(3); - //21: 1 - //23: 2 - //23: 3 - printf("test 3: 0x11111111\n"); - receive(0x11111111); - receive(1); - //11: 1 (a dud parameter) - printf("test 4: 0x00000011, 0x29111111\n"); - receive(0x00000011); receive(0x29111111); receive(1); - //11: 17 - //11: 688984337 - //11: 1 - printf("test 5: 0x00004127\n"); - receive(0x00004127); receive(1); receive(0);//final one should be a dud - } void reset() { countdown = 0; - commandsPending = std::queue(); - countdowns = std::queue(); + commandCursor = 4; + for(int i=0;i<4;i++) { + commandsPending[i].command = 0; + commandsPending[i].countdown = 0; + } } //todo - things in here other than the very first thing involving GFX3D_NOP_NOARG_HACK I am not too sure about. void receive(u32 val) { bool hack = false; - if(commandsPending.size()>0 && (commandsPending.front() == 0x15 || commandsPending.front() == GFX3D_NOP_NOARG_HACK || commandsPending.front() == 0x11 || commandsPending.front() == 0x41) && val != 0) { + if(size()>0 && (front().command == 0x15 || front().command == GFX3D_NOP_NOARG_HACK || front().command == 0x11 || front().command == 0x41) && val != 0) { //apparently a zero is swallowed in this case but if another value is sent //processing will continue //if(commandsPending.front() == GFX3D_NOP_NOARG_HACK) @@ -136,7 +115,7 @@ public: //else { //printf("gxf: sending hack %02X: (dummy=0)\n", commandsPending.front()); - GFX_FIFOsend(commandsPending.front(),0); + GFX_FIFOsend(front().command,0); } hack = true; goto hackTrigger; @@ -147,21 +126,20 @@ public: //if(commandsPending.front() == GFX3D_NOP_NOARG_HACK) //{} //else - GFX_FIFOsend(commandsPending.front(),val); + GFX_FIFOsend(front().command,val); hackTrigger: countdown--; while(countdown==0) { - commandsPending.pop(); - countdowns.pop(); + dequeue(); trigger: //dont set hack to false if you jumped from below! it needs to be true for when you jump down from above. //oh my what a mess. - if(countdowns.empty()) break; - countdown = countdowns.front(); + if(size()==0) break; + countdown = front().countdown; if(!countdown) { - if(commandsPending.front() != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) { + if(front().command != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) { //printf("[%06d]gxf: sending %02X: (dummy=0)\n", currFrameCounter,commandsPending.front()); - GFX_FIFOsend(commandsPending.front(),0); + GFX_FIFOsend(front().command,0); } } } @@ -182,11 +160,11 @@ decode: u8 cmd = commands[i]; u8 type = commandTypes[i]; if(type == INVALID_COMMAND) { - commandsPending.push(INVALID_COMMAND); + commandsPending[i].command = INVALID_COMMAND; } else { if(type == UNDEFINED_COMMAND) - commandsPending.push(GFX3D_NOP_NOARG_HACK); //enqueue a single undefined command we know how to handle - else commandsPending.push(cmd); + commandsPending[i].command = GFX3D_NOP_NOARG_HACK; //enqueue a single undefined command we know how to handle + else commandsPending[i].command = cmd; } if(type == UNDEFINED_COMMAND || type == 0x00) { //these are valid commands with no parameters. they might need special handling @@ -199,35 +177,44 @@ decode: } } if(safe) { - countdowns.push(0); + commandsPending[i].countdown = 0; } else { //we need to receive a dummy parameter in this case - countdowns.push(1); + commandsPending[i].countdown = 1; } } else if(type != INVALID_COMMAND) { - countdowns.push(type); - } else countdowns.push(0); + commandsPending[i].countdown = type; + } else commandsPending[i].countdown = 0; } - countdown = countdowns.front(); + commandCursor = 0; + countdown = front().countdown; if(countdown==0) goto trigger; } } - std::queue commandsPending; - std::queue countdowns; + + struct CommandItem { + u8 command, countdown; + } commandsPending[4]; + + u32 commandCursor; u8 countdown; +private: + void dequeue() { commandCursor++; } + CommandItem& front() { return commandsPending[commandCursor]; } + u32 size() { return 4-commandCursor; } +public: + void savestate(EMUFILE *f) { + //TODO - next time we invalidate savestates, simplify this format. write32le(0,f); //version - std::queue temp = commandsPending; - write32le(temp.size(),f); - while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); } - temp = countdowns; - write32le(temp.size(),f); - while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); } - + write32le(size(),f); + for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].command,f); + write32le(0,f); + for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].countdown,f); write8le(countdown,f); } @@ -237,14 +224,14 @@ decode: if(read32le(&version,f) != 1) return false; if(version != 0) return false; - assert(commandsPending.size()==0); - assert(countdowns.size()==0); - - u32 temp; - read32le(&temp,f); - for(u32 i=0;i TIORegViewList; static TIORegViewList liveIORegViews; +bool anyLiveIORegViews = false; void RefreshAllIORegViews() { @@ -303,6 +304,7 @@ CIORegView::CIORegView() , yoff(0) { liveIORegViews.push_back(this); + anyLiveIORegViews = true; } CIORegView::~CIORegView() @@ -311,6 +313,7 @@ CIORegView::~CIORegView() UnregWndClass("DeSmuME_IORegView"); //TODO - is this thread safe? which thread do these calls come from liveIORegViews.erase(std::find(liveIORegViews.begin(),liveIORegViews.end(),this)); + if(liveIORegViews.size()==0) anyLiveIORegViews = false; } /*--------------------------------------------------------------------------*/ diff --git a/desmume/src/windows/main.cpp b/desmume/src/windows/main.cpp index f9392e772..7d162c032 100644 --- a/desmume/src/windows/main.cpp +++ b/desmume/src/windows/main.cpp @@ -1819,6 +1819,13 @@ class WinDriver : public BaseDriver return ESTEP_DONE; } + + virtual void DEBUG_UpdateIORegView(eDebug_IOReg category) + { + extern bool anyLiveIORegViews; + if(anyLiveIORegViews) + RefreshAllIORegViews(); + } }; std::string GetPrivateProfileStdString(LPCSTR lpAppName,LPCSTR lpKeyName,LPCSTR lpDefault)