fix glitches in dma and poor code in command unpacking which were causing some slowdown. clean up ioregview refresh triggering code and add a note about how to cut compile time of gpu.cpp in half.

This commit is contained in:
zeromus 2009-09-22 03:02:39 +00:00
parent 14c011d8de
commit 6cf200b1e1
8 changed files with 92 additions and 85 deletions

View File

@ -42,6 +42,9 @@
//#define FORCEINLINE //#define FORCEINLINE
//#define SSE2_NOINTRIN //#define SSE2_NOINTRIN
//compilation speed hack (cuts time exactly in half by cutting out permutations)
//#define DISABLE_MOSAIC
extern BOOL click; extern BOOL click;
NDS_Screen MainScreen; NDS_Screen MainScreen;
NDS_Screen SubScreen; NDS_Screen SubScreen;
@ -2133,9 +2136,12 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
//useful for debugging individual layers //useful for debugging individual layers
//if(gpu->core == 1 || i16 != 2) continue; //if(gpu->core == 1 || i16 != 2) continue;
#ifndef DISABLE_MOSAIC
if(gpu->curr_mosaic_enabled) if(gpu->curr_mosaic_enabled)
gpu->modeRender<true>(i16); gpu->modeRender<true>(i16);
else gpu->modeRender<false>(i16); else
#endif
gpu->modeRender<false>(i16);
} //layer enabled } //layer enabled
} }
} }

View File

@ -47,10 +47,6 @@
#include "readwrite.h" #include "readwrite.h"
#include "MMU_timing.h" #include "MMU_timing.h"
#ifdef WIN32
#include "windows/IORegView.h"
#endif
#ifdef DO_ASSERT_UNALIGNED #ifdef DO_ASSERT_UNALIGNED
#define ASSERT_UNALIGNED(x) assert(x) #define ASSERT_UNALIGNED(x) assert(x)
#else #else
@ -767,6 +763,7 @@ static inline void MMU_VRAMmapControl(u8 block, u8 VRAMBankCnt)
MMU_VRAMmapRefreshBank(i); MMU_VRAMmapRefreshBank(i);
//printf(vramConfiguration.describe().c_str()); //printf(vramConfiguration.describe().c_str());
//printf("vram remapped at vcount=%d\n",nds.VCount);
//if texInfo changed, trigger notifications //if texInfo changed, trigger notifications
if(memcmp(&oldTexInfo,&MMU.texInfo,sizeof(MMU_struct::TextureInfo))) if(memcmp(&oldTexInfo,&MMU.texInfo,sizeof(MMU_struct::TextureInfo)))
@ -1815,7 +1812,7 @@ u32 TGXSTAT::read32()
ret |= gxfifo_irq; //user's irq flags ret |= gxfifo_irq; //user's irq flags
//printf("Returning gxstat read: %08X\n",ret); //printf("vc=%03d Returning gxstat read: %08X\n",nds.VCount,ret);
return ret; return ret;
} }
@ -1995,10 +1992,7 @@ if(_startmode==0 && wordcount==1) {
if(!doNotStart) if(!doNotStart)
doSchedule(); doSchedule();
//todo - make a driver stub for this so that we dont have to conditionalize it everywhere driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA);
#ifdef WIN32
RefreshAllIORegViews();
#endif
} }
void DmaController::exec() void DmaController::exec()
@ -2050,7 +2044,7 @@ start:
if(triggered) if(triggered)
{ {
//if(procnum==0) printf("%08lld trig type %d dma#%d with words %d at src:%08X dst:%08X gxf:%d",nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size); //if(procnum==0) printf("vc=%03d %08lld trig type %d dma#%d w/words %d at src:%08X dst:%08X gxf:%d",nds.VCount,nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size);
if(saddr ==0x023BCCEC && wordcount==118) { if(saddr ==0x023BCCEC && wordcount==118) {
int zzz=9; int zzz=9;
} }
@ -2064,9 +2058,7 @@ start:
} }
} }
#ifdef WIN32 driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA);
RefreshAllIORegViews();
#endif
} }
void DmaController::doCopy() void DmaController::doCopy()
@ -2162,6 +2154,7 @@ void triggerDma(EDMAMode mode)
void DmaController::tryTrigger(EDMAMode mode) void DmaController::tryTrigger(EDMAMode mode)
{ {
if(startmode != mode) return; if(startmode != mode) return;
if(!enable) return;
//hmm dont trigger it if its already running! //hmm dont trigger it if its already running!
//but paused things need triggers to continue //but paused things need triggers to continue
@ -2696,13 +2689,13 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
#endif #endif
if(val & (1<<15)) if(val & (1<<15))
{ {
//LOG("Main core on top\n"); //printf("Main core on top (vcount=%d)\n",nds.VCount);
MainScreen.offset = 0; MainScreen.offset = 0;
SubScreen.offset = 192; SubScreen.offset = 192;
} }
else else
{ {
//LOG("Main core on bottom\n"); //printf("Main core on bottom (vcount=%d)\n",nds.VCount);
MainScreen.offset = 192; MainScreen.offset = 192;
SubScreen.offset = 0; SubScreen.offset = 0;
} }

View File

@ -44,6 +44,7 @@
// makes non-sequential accesses slower than sequential ones. // makes non-sequential accesses slower than sequential ones.
#define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS #define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
//(SOMETIMES THIS IS A BIG SPEED HIT!)
// enables emulation of code fetch waits. // enables emulation of code fetch waits.
#define ACCOUNT_FOR_CODE_FETCH_CYCLES #define ACCOUNT_FOR_CODE_FETCH_CYCLES

View File

@ -1953,16 +1953,20 @@ static void execHardware_hstart_vcount()
u16 vmatch = T1ReadWord(MMU.ARM9_REG, 4); u16 vmatch = T1ReadWord(MMU.ARM9_REG, 4);
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8)))) if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
{ {
//arm9 vmatch
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) | 4); T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) | 4);
if(T1ReadWord(MMU.ARM9_REG, 4) & 32) if(T1ReadWord(MMU.ARM9_REG, 4) & 32) {
//printf("VMATCH FIRING! vc=%03d\n",nds.VCount);
NDS_makeARM9Int(2); NDS_makeARM9Int(2);
} }
}
else else
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) & 0xFFFB); T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) & 0xFFFB);
vmatch = T1ReadWord(MMU.ARM7_REG, 4); vmatch = T1ReadWord(MMU.ARM7_REG, 4);
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8)))) if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
{ {
//arm7 vmatch
T1WriteWord(MMU.ARM7_REG, 4, T1ReadWord(MMU.ARM7_REG, 4) | 4); T1WriteWord(MMU.ARM7_REG, 4, T1ReadWord(MMU.ARM7_REG, 4) | 4);
if(T1ReadWord(MMU.ARM7_REG, 4) & 32) if(T1ReadWord(MMU.ARM7_REG, 4) & 32)
NDS_makeARM7Int(2); NDS_makeARM7Int(2);
@ -2153,7 +2157,7 @@ bool nds_loadstate(EMUFILE* is, int size)
//#define LOG_ARM9 //#define LOG_ARM9
//#define LOG_ARM7 //#define LOG_ARM7
//static bool dolog = false; //static bool dolog = true;
FORCEINLINE void arm9log() FORCEINLINE void arm9log()
{ {
@ -2311,8 +2315,8 @@ void NDS_exec(s32 nb)
#ifndef NDEBUG #ifndef NDEBUG
//what we find here is dependent on the timing constants above //what we find here is dependent on the timing constants above
if(nds_timer>next && (nds_timer-next)>22) //if(nds_timer>next && (nds_timer-next)>22)
printf("curious. please report: over by %d\n",(int)(nds_timer-next)); // printf("curious. please report: over by %d\n",(int)(nds_timer-next));
#endif #endif
//if we were waiting for an irq, don't wait too long: //if we were waiting for an irq, don't wait too long:
@ -2347,6 +2351,7 @@ void execHardware_interrupts()
if ( armcpu_irqException(&NDS_ARM9)) if ( armcpu_irqException(&NDS_ARM9))
#endif #endif
{ {
//printf("ARM9 interrupt! flags: %08X ; mask: %08X ; result: %08X\n",MMU.reg_IF[0],MMU.reg_IE[0],MMU.reg_IF[0]&MMU.reg_IE[0]);
//nds.ARM9Cycle = nds.cycles; //nds.ARM9Cycle = nds.cycles;
} }
} }

View File

@ -53,6 +53,13 @@ public:
virtual bool EMU_IsFastForwarding() { return false; } virtual bool EMU_IsFastForwarding() { return false; }
virtual bool EMU_HasEmulationStarted() { return true; } virtual bool EMU_HasEmulationStarted() { return true; }
virtual bool EMU_IsAtFrameBoundary() { return true; } virtual bool EMU_IsAtFrameBoundary() { return true; }
enum eDebug_IOReg
{
EDEBUG_IOREG_DMA
};
virtual void DEBUG_UpdateIORegView(eDebug_IOReg category) { }
}; };
extern BaseDriver* driver; extern BaseDriver* driver;

View File

@ -96,39 +96,18 @@ public:
{ {
reset(); reset();
} }
void test() {
printf("test 1: 0x00412321\n");
receive(0x00412321); receive(1); receive(2); receive(3); receive(0);
//21: 1
//23: 2
//23: 3
//41: 4 (a dud parameter)
printf("test 2: 0x00002321\n");
receive(0x00002321); receive(1); receive(2); receive(3);
//21: 1
//23: 2
//23: 3
printf("test 3: 0x11111111\n");
receive(0x11111111);
receive(1);
//11: 1 (a dud parameter)
printf("test 4: 0x00000011, 0x29111111\n");
receive(0x00000011); receive(0x29111111); receive(1);
//11: 17
//11: 688984337
//11: 1
printf("test 5: 0x00004127\n");
receive(0x00004127); receive(1); receive(0);//final one should be a dud
}
void reset() { void reset() {
countdown = 0; countdown = 0;
commandsPending = std::queue<u8>(); commandCursor = 4;
countdowns = std::queue<u8>(); for(int i=0;i<4;i++) {
commandsPending[i].command = 0;
commandsPending[i].countdown = 0;
}
} }
//todo - things in here other than the very first thing involving GFX3D_NOP_NOARG_HACK I am not too sure about. //todo - things in here other than the very first thing involving GFX3D_NOP_NOARG_HACK I am not too sure about.
void receive(u32 val) { void receive(u32 val) {
bool hack = false; bool hack = false;
if(commandsPending.size()>0 && (commandsPending.front() == 0x15 || commandsPending.front() == GFX3D_NOP_NOARG_HACK || commandsPending.front() == 0x11 || commandsPending.front() == 0x41) && val != 0) { if(size()>0 && (front().command == 0x15 || front().command == GFX3D_NOP_NOARG_HACK || front().command == 0x11 || front().command == 0x41) && val != 0) {
//apparently a zero is swallowed in this case but if another value is sent //apparently a zero is swallowed in this case but if another value is sent
//processing will continue //processing will continue
//if(commandsPending.front() == GFX3D_NOP_NOARG_HACK) //if(commandsPending.front() == GFX3D_NOP_NOARG_HACK)
@ -136,7 +115,7 @@ public:
//else //else
{ {
//printf("gxf: sending hack %02X: (dummy=0)\n", commandsPending.front()); //printf("gxf: sending hack %02X: (dummy=0)\n", commandsPending.front());
GFX_FIFOsend(commandsPending.front(),0); GFX_FIFOsend(front().command,0);
} }
hack = true; hack = true;
goto hackTrigger; goto hackTrigger;
@ -147,21 +126,20 @@ public:
//if(commandsPending.front() == GFX3D_NOP_NOARG_HACK) //if(commandsPending.front() == GFX3D_NOP_NOARG_HACK)
//{} //{}
//else //else
GFX_FIFOsend(commandsPending.front(),val); GFX_FIFOsend(front().command,val);
hackTrigger: hackTrigger:
countdown--; countdown--;
while(countdown==0) { while(countdown==0) {
commandsPending.pop(); dequeue();
countdowns.pop();
trigger: trigger:
//dont set hack to false if you jumped from below! it needs to be true for when you jump down from above. //dont set hack to false if you jumped from below! it needs to be true for when you jump down from above.
//oh my what a mess. //oh my what a mess.
if(countdowns.empty()) break; if(size()==0) break;
countdown = countdowns.front(); countdown = front().countdown;
if(!countdown) { if(!countdown) {
if(commandsPending.front() != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) { if(front().command != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) {
//printf("[%06d]gxf: sending %02X: (dummy=0)\n", currFrameCounter,commandsPending.front()); //printf("[%06d]gxf: sending %02X: (dummy=0)\n", currFrameCounter,commandsPending.front());
GFX_FIFOsend(commandsPending.front(),0); GFX_FIFOsend(front().command,0);
} }
} }
} }
@ -182,11 +160,11 @@ decode:
u8 cmd = commands[i]; u8 cmd = commands[i];
u8 type = commandTypes[i]; u8 type = commandTypes[i];
if(type == INVALID_COMMAND) { if(type == INVALID_COMMAND) {
commandsPending.push(INVALID_COMMAND); commandsPending[i].command = INVALID_COMMAND;
} else { } else {
if(type == UNDEFINED_COMMAND) if(type == UNDEFINED_COMMAND)
commandsPending.push(GFX3D_NOP_NOARG_HACK); //enqueue a single undefined command we know how to handle commandsPending[i].command = GFX3D_NOP_NOARG_HACK; //enqueue a single undefined command we know how to handle
else commandsPending.push(cmd); else commandsPending[i].command = cmd;
} }
if(type == UNDEFINED_COMMAND || type == 0x00) { if(type == UNDEFINED_COMMAND || type == 0x00) {
//these are valid commands with no parameters. they might need special handling //these are valid commands with no parameters. they might need special handling
@ -199,35 +177,44 @@ decode:
} }
} }
if(safe) { if(safe) {
countdowns.push(0); commandsPending[i].countdown = 0;
} else { } else {
//we need to receive a dummy parameter in this case //we need to receive a dummy parameter in this case
countdowns.push(1); commandsPending[i].countdown = 1;
} }
} else if(type != INVALID_COMMAND) { } else if(type != INVALID_COMMAND) {
countdowns.push(type); commandsPending[i].countdown = type;
} else countdowns.push(0); } else commandsPending[i].countdown = 0;
} }
countdown = countdowns.front(); commandCursor = 0;
countdown = front().countdown;
if(countdown==0) if(countdown==0)
goto trigger; goto trigger;
} }
} }
std::queue<u8> commandsPending;
std::queue<u8> countdowns; struct CommandItem {
u8 command, countdown;
} commandsPending[4];
u32 commandCursor;
u8 countdown; u8 countdown;
private:
void dequeue() { commandCursor++; }
CommandItem& front() { return commandsPending[commandCursor]; }
u32 size() { return 4-commandCursor; }
public:
void savestate(EMUFILE *f) void savestate(EMUFILE *f)
{ {
//TODO - next time we invalidate savestates, simplify this format.
write32le(0,f); //version write32le(0,f); //version
std::queue<u8> temp = commandsPending; write32le(size(),f);
write32le(temp.size(),f); for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].command,f);
while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); } write32le(0,f);
temp = countdowns; for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].countdown,f);
write32le(temp.size(),f);
while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); }
write8le(countdown,f); write8le(countdown,f);
} }
@ -237,14 +224,14 @@ decode:
if(read32le(&version,f) != 1) return false; if(read32le(&version,f) != 1) return false;
if(version != 0) return false; if(version != 0) return false;
assert(commandsPending.size()==0); u32 tempsize;
assert(countdowns.size()==0); read32le(&tempsize,f);
commandCursor = 4-tempsize;
u32 temp; for(u32 i=0;i<commandCursor;i++) commandsPending[i].command = 0;
read32le(&temp,f); for(u32 i=commandCursor;i<4;i++) read8le(&commandsPending[i].command,f);
for(u32 i=0;i<temp;i++) { u8 temp8; read8le(&temp8,f); commandsPending.push(temp8); } read32le(&tempsize,f);
read32le(&temp,f); for(u32 i=0;i<commandCursor;i++) commandsPending[i].countdown = 0;
for(u32 i=0;i<temp;i++) { u8 temp8; read8le(&temp8,f); countdowns.push(temp8); } for(u32 i=commandCursor;i<4;i++) read8le(&commandsPending[i].countdown,f);
read8le(&countdown,f); read8le(&countdown,f);
@ -363,7 +350,6 @@ static u32 clInd = 0;
static u32 clInd2 = 0; static u32 clInd2 = 0;
BOOL isSwapBuffers = FALSE; BOOL isSwapBuffers = FALSE;
bool isVBlank = false;
static u32 BTind = 0; static u32 BTind = 0;
static u32 PTind = 0; static u32 PTind = 0;
@ -553,7 +539,6 @@ void gfx3d_reset()
clInd2 = 0; clInd2 = 0;
isSwapBuffers = FALSE; isSwapBuffers = FALSE;
isVBlank = false;
GFX_PIPEclear(); GFX_PIPEclear();
GFX_FIFOclear(); GFX_FIFOclear();
@ -1713,6 +1698,8 @@ void gfx3d_execute3D()
if(GFX_PIPErecv(&cmd, &param)) if(GFX_PIPErecv(&cmd, &param))
{ {
//if (isSwapBuffers) printf("Executing while swapbuffers is pending: %d:%08X\n",cmd,param);
//since we did anything at all, incur a pipeline motion cost. //since we did anything at all, incur a pipeline motion cost.
//also, we can't let gxfifo sequencer stall until the fifo is empty. //also, we can't let gxfifo sequencer stall until the fifo is empty.
//see... //see...
@ -1746,6 +1733,7 @@ void gfx3d_execute3D()
void gfx3d_glFlush(u32 v) void gfx3d_glFlush(u32 v)
{ {
//printf("-------------FLUSH------------- (vcount=%d\n",nds.VCount);
gfx3d.sortmode = BIT0(v); gfx3d.sortmode = BIT0(v);
gfx3d.wbuffer = BIT1(v); gfx3d.wbuffer = BIT1(v);
#if 0 #if 0
@ -1871,7 +1859,6 @@ static void gfx3d_doFlush()
void gfx3d_VBlankSignal() void gfx3d_VBlankSignal()
{ {
isVBlank = true;
if (isSwapBuffers) if (isSwapBuffers)
{ {
#ifndef FLUSHMODE_HACK #ifndef FLUSHMODE_HACK
@ -1884,8 +1871,6 @@ void gfx3d_VBlankSignal()
void gfx3d_VBlankEndSignal(bool skipFrame) void gfx3d_VBlankEndSignal(bool skipFrame)
{ {
isVBlank = false;
if (!drawPending) return; if (!drawPending) return;
drawPending = FALSE; drawPending = FALSE;

View File

@ -282,6 +282,7 @@ static const int kYMargin = 1;
typedef std::vector<CIORegView*> TIORegViewList; typedef std::vector<CIORegView*> TIORegViewList;
static TIORegViewList liveIORegViews; static TIORegViewList liveIORegViews;
bool anyLiveIORegViews = false;
void RefreshAllIORegViews() void RefreshAllIORegViews()
{ {
@ -303,6 +304,7 @@ CIORegView::CIORegView()
, yoff(0) , yoff(0)
{ {
liveIORegViews.push_back(this); liveIORegViews.push_back(this);
anyLiveIORegViews = true;
} }
CIORegView::~CIORegView() CIORegView::~CIORegView()
@ -311,6 +313,7 @@ CIORegView::~CIORegView()
UnregWndClass("DeSmuME_IORegView"); UnregWndClass("DeSmuME_IORegView");
//TODO - is this thread safe? which thread do these calls come from //TODO - is this thread safe? which thread do these calls come from
liveIORegViews.erase(std::find(liveIORegViews.begin(),liveIORegViews.end(),this)); liveIORegViews.erase(std::find(liveIORegViews.begin(),liveIORegViews.end(),this));
if(liveIORegViews.size()==0) anyLiveIORegViews = false;
} }
/*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/

View File

@ -1819,6 +1819,13 @@ class WinDriver : public BaseDriver
return ESTEP_DONE; return ESTEP_DONE;
} }
virtual void DEBUG_UpdateIORegView(eDebug_IOReg category)
{
extern bool anyLiveIORegViews;
if(anyLiveIORegViews)
RefreshAllIORegViews();
}
}; };
std::string GetPrivateProfileStdString(LPCSTR lpAppName,LPCSTR lpKeyName,LPCSTR lpDefault) std::string GetPrivateProfileStdString(LPCSTR lpAppName,LPCSTR lpKeyName,LPCSTR lpDefault)