fix glitches in dma and poor code in command unpacking which were causing some slowdown. clean up ioregview refresh triggering code and add a note about how to cut compile time of gpu.cpp in half.

This commit is contained in:
zeromus 2009-09-22 03:02:39 +00:00
parent 14c011d8de
commit 6cf200b1e1
8 changed files with 92 additions and 85 deletions

View File

@ -42,6 +42,9 @@
//#define FORCEINLINE
//#define SSE2_NOINTRIN
//compilation speed hack (cuts time exactly in half by cutting out permutations)
//#define DISABLE_MOSAIC
extern BOOL click;
NDS_Screen MainScreen;
NDS_Screen SubScreen;
@ -2133,9 +2136,12 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
//useful for debugging individual layers
//if(gpu->core == 1 || i16 != 2) continue;
#ifndef DISABLE_MOSAIC
if(gpu->curr_mosaic_enabled)
gpu->modeRender<true>(i16);
else gpu->modeRender<false>(i16);
else
#endif
gpu->modeRender<false>(i16);
} //layer enabled
}
}

View File

@ -47,10 +47,6 @@
#include "readwrite.h"
#include "MMU_timing.h"
#ifdef WIN32
#include "windows/IORegView.h"
#endif
#ifdef DO_ASSERT_UNALIGNED
#define ASSERT_UNALIGNED(x) assert(x)
#else
@ -767,6 +763,7 @@ static inline void MMU_VRAMmapControl(u8 block, u8 VRAMBankCnt)
MMU_VRAMmapRefreshBank(i);
//printf(vramConfiguration.describe().c_str());
//printf("vram remapped at vcount=%d\n",nds.VCount);
//if texInfo changed, trigger notifications
if(memcmp(&oldTexInfo,&MMU.texInfo,sizeof(MMU_struct::TextureInfo)))
@ -1815,7 +1812,7 @@ u32 TGXSTAT::read32()
ret |= gxfifo_irq; //user's irq flags
//printf("Returning gxstat read: %08X\n",ret);
//printf("vc=%03d Returning gxstat read: %08X\n",nds.VCount,ret);
return ret;
}
@ -1995,10 +1992,7 @@ if(_startmode==0 && wordcount==1) {
if(!doNotStart)
doSchedule();
//todo - make a driver stub for this so that we dont have to conditionalize it everywhere
#ifdef WIN32
RefreshAllIORegViews();
#endif
driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA);
}
void DmaController::exec()
@ -2050,7 +2044,7 @@ start:
if(triggered)
{
//if(procnum==0) printf("%08lld trig type %d dma#%d with words %d at src:%08X dst:%08X gxf:%d",nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size);
//if(procnum==0) printf("vc=%03d %08lld trig type %d dma#%d w/words %d at src:%08X dst:%08X gxf:%d",nds.VCount,nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size);
if(saddr ==0x023BCCEC && wordcount==118) {
int zzz=9;
}
@ -2064,9 +2058,7 @@ start:
}
}
#ifdef WIN32
RefreshAllIORegViews();
#endif
driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA);
}
void DmaController::doCopy()
@ -2162,6 +2154,7 @@ void triggerDma(EDMAMode mode)
void DmaController::tryTrigger(EDMAMode mode)
{
if(startmode != mode) return;
if(!enable) return;
//hmm dont trigger it if its already running!
//but paused things need triggers to continue
@ -2696,13 +2689,13 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
#endif
if(val & (1<<15))
{
//LOG("Main core on top\n");
//printf("Main core on top (vcount=%d)\n",nds.VCount);
MainScreen.offset = 0;
SubScreen.offset = 192;
}
else
{
//LOG("Main core on bottom\n");
//printf("Main core on bottom (vcount=%d)\n",nds.VCount);
MainScreen.offset = 192;
SubScreen.offset = 0;
}

View File

@ -44,6 +44,7 @@
// makes non-sequential accesses slower than sequential ones.
#define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
//(SOMETIMES THIS IS A BIG SPEED HIT!)
// enables emulation of code fetch waits.
#define ACCOUNT_FOR_CODE_FETCH_CYCLES

View File

@ -1953,9 +1953,12 @@ static void execHardware_hstart_vcount()
u16 vmatch = T1ReadWord(MMU.ARM9_REG, 4);
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
{
//arm9 vmatch
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) | 4);
if(T1ReadWord(MMU.ARM9_REG, 4) & 32)
if(T1ReadWord(MMU.ARM9_REG, 4) & 32) {
//printf("VMATCH FIRING! vc=%03d\n",nds.VCount);
NDS_makeARM9Int(2);
}
}
else
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) & 0xFFFB);
@ -1963,6 +1966,7 @@ static void execHardware_hstart_vcount()
vmatch = T1ReadWord(MMU.ARM7_REG, 4);
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
{
//arm7 vmatch
T1WriteWord(MMU.ARM7_REG, 4, T1ReadWord(MMU.ARM7_REG, 4) | 4);
if(T1ReadWord(MMU.ARM7_REG, 4) & 32)
NDS_makeARM7Int(2);
@ -2153,7 +2157,7 @@ bool nds_loadstate(EMUFILE* is, int size)
//#define LOG_ARM9
//#define LOG_ARM7
//static bool dolog = false;
//static bool dolog = true;
FORCEINLINE void arm9log()
{
@ -2311,8 +2315,8 @@ void NDS_exec(s32 nb)
#ifndef NDEBUG
//what we find here is dependent on the timing constants above
if(nds_timer>next && (nds_timer-next)>22)
printf("curious. please report: over by %d\n",(int)(nds_timer-next));
//if(nds_timer>next && (nds_timer-next)>22)
// printf("curious. please report: over by %d\n",(int)(nds_timer-next));
#endif
//if we were waiting for an irq, don't wait too long:
@ -2347,6 +2351,7 @@ void execHardware_interrupts()
if ( armcpu_irqException(&NDS_ARM9))
#endif
{
//printf("ARM9 interrupt! flags: %08X ; mask: %08X ; result: %08X\n",MMU.reg_IF[0],MMU.reg_IE[0],MMU.reg_IF[0]&MMU.reg_IE[0]);
//nds.ARM9Cycle = nds.cycles;
}
}

View File

@ -53,6 +53,13 @@ public:
virtual bool EMU_IsFastForwarding() { return false; }
virtual bool EMU_HasEmulationStarted() { return true; }
virtual bool EMU_IsAtFrameBoundary() { return true; }
enum eDebug_IOReg
{
EDEBUG_IOREG_DMA
};
virtual void DEBUG_UpdateIORegView(eDebug_IOReg category) { }
};
extern BaseDriver* driver;

View File

@ -96,39 +96,18 @@ public:
{
reset();
}
void test() {
printf("test 1: 0x00412321\n");
receive(0x00412321); receive(1); receive(2); receive(3); receive(0);
//21: 1
//23: 2
//23: 3
//41: 4 (a dud parameter)
printf("test 2: 0x00002321\n");
receive(0x00002321); receive(1); receive(2); receive(3);
//21: 1
//23: 2
//23: 3
printf("test 3: 0x11111111\n");
receive(0x11111111);
receive(1);
//11: 1 (a dud parameter)
printf("test 4: 0x00000011, 0x29111111\n");
receive(0x00000011); receive(0x29111111); receive(1);
//11: 17
//11: 688984337
//11: 1
printf("test 5: 0x00004127\n");
receive(0x00004127); receive(1); receive(0);//final one should be a dud
}
void reset() {
countdown = 0;
commandsPending = std::queue<u8>();
countdowns = std::queue<u8>();
commandCursor = 4;
for(int i=0;i<4;i++) {
commandsPending[i].command = 0;
commandsPending[i].countdown = 0;
}
}
//todo - things in here other than the very first thing involving GFX3D_NOP_NOARG_HACK I am not too sure about.
void receive(u32 val) {
bool hack = false;
if(commandsPending.size()>0 && (commandsPending.front() == 0x15 || commandsPending.front() == GFX3D_NOP_NOARG_HACK || commandsPending.front() == 0x11 || commandsPending.front() == 0x41) && val != 0) {
if(size()>0 && (front().command == 0x15 || front().command == GFX3D_NOP_NOARG_HACK || front().command == 0x11 || front().command == 0x41) && val != 0) {
//apparently a zero is swallowed in this case but if another value is sent
//processing will continue
//if(commandsPending.front() == GFX3D_NOP_NOARG_HACK)
@ -136,7 +115,7 @@ public:
//else
{
//printf("gxf: sending hack %02X: (dummy=0)\n", commandsPending.front());
GFX_FIFOsend(commandsPending.front(),0);
GFX_FIFOsend(front().command,0);
}
hack = true;
goto hackTrigger;
@ -147,21 +126,20 @@ public:
//if(commandsPending.front() == GFX3D_NOP_NOARG_HACK)
//{}
//else
GFX_FIFOsend(commandsPending.front(),val);
GFX_FIFOsend(front().command,val);
hackTrigger:
countdown--;
while(countdown==0) {
commandsPending.pop();
countdowns.pop();
dequeue();
trigger:
//dont set hack to false if you jumped from below! it needs to be true for when you jump down from above.
//oh my what a mess.
if(countdowns.empty()) break;
countdown = countdowns.front();
if(size()==0) break;
countdown = front().countdown;
if(!countdown) {
if(commandsPending.front() != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) {
if(front().command != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) {
//printf("[%06d]gxf: sending %02X: (dummy=0)\n", currFrameCounter,commandsPending.front());
GFX_FIFOsend(commandsPending.front(),0);
GFX_FIFOsend(front().command,0);
}
}
}
@ -182,11 +160,11 @@ decode:
u8 cmd = commands[i];
u8 type = commandTypes[i];
if(type == INVALID_COMMAND) {
commandsPending.push(INVALID_COMMAND);
commandsPending[i].command = INVALID_COMMAND;
} else {
if(type == UNDEFINED_COMMAND)
commandsPending.push(GFX3D_NOP_NOARG_HACK); //enqueue a single undefined command we know how to handle
else commandsPending.push(cmd);
commandsPending[i].command = GFX3D_NOP_NOARG_HACK; //enqueue a single undefined command we know how to handle
else commandsPending[i].command = cmd;
}
if(type == UNDEFINED_COMMAND || type == 0x00) {
//these are valid commands with no parameters. they might need special handling
@ -199,35 +177,44 @@ decode:
}
}
if(safe) {
countdowns.push(0);
commandsPending[i].countdown = 0;
} else {
//we need to receive a dummy parameter in this case
countdowns.push(1);
commandsPending[i].countdown = 1;
}
} else if(type != INVALID_COMMAND) {
countdowns.push(type);
} else countdowns.push(0);
commandsPending[i].countdown = type;
} else commandsPending[i].countdown = 0;
}
countdown = countdowns.front();
commandCursor = 0;
countdown = front().countdown;
if(countdown==0)
goto trigger;
}
}
std::queue<u8> commandsPending;
std::queue<u8> countdowns;
struct CommandItem {
u8 command, countdown;
} commandsPending[4];
u32 commandCursor;
u8 countdown;
private:
void dequeue() { commandCursor++; }
CommandItem& front() { return commandsPending[commandCursor]; }
u32 size() { return 4-commandCursor; }
public:
void savestate(EMUFILE *f)
{
//TODO - next time we invalidate savestates, simplify this format.
write32le(0,f); //version
std::queue<u8> temp = commandsPending;
write32le(temp.size(),f);
while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); }
temp = countdowns;
write32le(temp.size(),f);
while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); }
write32le(size(),f);
for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].command,f);
write32le(0,f);
for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].countdown,f);
write8le(countdown,f);
}
@ -237,14 +224,14 @@ decode:
if(read32le(&version,f) != 1) return false;
if(version != 0) return false;
assert(commandsPending.size()==0);
assert(countdowns.size()==0);
u32 temp;
read32le(&temp,f);
for(u32 i=0;i<temp;i++) { u8 temp8; read8le(&temp8,f); commandsPending.push(temp8); }
read32le(&temp,f);
for(u32 i=0;i<temp;i++) { u8 temp8; read8le(&temp8,f); countdowns.push(temp8); }
u32 tempsize;
read32le(&tempsize,f);
commandCursor = 4-tempsize;
for(u32 i=0;i<commandCursor;i++) commandsPending[i].command = 0;
for(u32 i=commandCursor;i<4;i++) read8le(&commandsPending[i].command,f);
read32le(&tempsize,f);
for(u32 i=0;i<commandCursor;i++) commandsPending[i].countdown = 0;
for(u32 i=commandCursor;i<4;i++) read8le(&commandsPending[i].countdown,f);
read8le(&countdown,f);
@ -363,7 +350,6 @@ static u32 clInd = 0;
static u32 clInd2 = 0;
BOOL isSwapBuffers = FALSE;
bool isVBlank = false;
static u32 BTind = 0;
static u32 PTind = 0;
@ -553,7 +539,6 @@ void gfx3d_reset()
clInd2 = 0;
isSwapBuffers = FALSE;
isVBlank = false;
GFX_PIPEclear();
GFX_FIFOclear();
@ -1713,6 +1698,8 @@ void gfx3d_execute3D()
if(GFX_PIPErecv(&cmd, &param))
{
//if (isSwapBuffers) printf("Executing while swapbuffers is pending: %d:%08X\n",cmd,param);
//since we did anything at all, incur a pipeline motion cost.
//also, we can't let gxfifo sequencer stall until the fifo is empty.
//see...
@ -1746,6 +1733,7 @@ void gfx3d_execute3D()
void gfx3d_glFlush(u32 v)
{
//printf("-------------FLUSH------------- (vcount=%d\n",nds.VCount);
gfx3d.sortmode = BIT0(v);
gfx3d.wbuffer = BIT1(v);
#if 0
@ -1871,7 +1859,6 @@ static void gfx3d_doFlush()
void gfx3d_VBlankSignal()
{
isVBlank = true;
if (isSwapBuffers)
{
#ifndef FLUSHMODE_HACK
@ -1884,8 +1871,6 @@ void gfx3d_VBlankSignal()
void gfx3d_VBlankEndSignal(bool skipFrame)
{
isVBlank = false;
if (!drawPending) return;
drawPending = FALSE;

View File

@ -282,6 +282,7 @@ static const int kYMargin = 1;
typedef std::vector<CIORegView*> TIORegViewList;
static TIORegViewList liveIORegViews;
bool anyLiveIORegViews = false;
void RefreshAllIORegViews()
{
@ -303,6 +304,7 @@ CIORegView::CIORegView()
, yoff(0)
{
liveIORegViews.push_back(this);
anyLiveIORegViews = true;
}
CIORegView::~CIORegView()
@ -311,6 +313,7 @@ CIORegView::~CIORegView()
UnregWndClass("DeSmuME_IORegView");
//TODO - is this thread safe? which thread do these calls come from
liveIORegViews.erase(std::find(liveIORegViews.begin(),liveIORegViews.end(),this));
if(liveIORegViews.size()==0) anyLiveIORegViews = false;
}
/*--------------------------------------------------------------------------*/

View File

@ -1819,6 +1819,13 @@ class WinDriver : public BaseDriver
return ESTEP_DONE;
}
virtual void DEBUG_UpdateIORegView(eDebug_IOReg category)
{
extern bool anyLiveIORegViews;
if(anyLiveIORegViews)
RefreshAllIORegViews();
}
};
std::string GetPrivateProfileStdString(LPCSTR lpAppName,LPCSTR lpKeyName,LPCSTR lpDefault)