fix glitches in dma and poor code in command unpacking which were causing some slowdown. clean up ioregview refresh triggering code and add a note about how to cut compile time of gpu.cpp in half.
This commit is contained in:
parent
14c011d8de
commit
6cf200b1e1
|
@ -42,6 +42,9 @@
|
|||
//#define FORCEINLINE
|
||||
//#define SSE2_NOINTRIN
|
||||
|
||||
//compilation speed hack (cuts time exactly in half by cutting out permutations)
|
||||
//#define DISABLE_MOSAIC
|
||||
|
||||
extern BOOL click;
|
||||
NDS_Screen MainScreen;
|
||||
NDS_Screen SubScreen;
|
||||
|
@ -2133,9 +2136,12 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
//useful for debugging individual layers
|
||||
//if(gpu->core == 1 || i16 != 2) continue;
|
||||
|
||||
#ifndef DISABLE_MOSAIC
|
||||
if(gpu->curr_mosaic_enabled)
|
||||
gpu->modeRender<true>(i16);
|
||||
else gpu->modeRender<false>(i16);
|
||||
else
|
||||
#endif
|
||||
gpu->modeRender<false>(i16);
|
||||
} //layer enabled
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,10 +47,6 @@
|
|||
#include "readwrite.h"
|
||||
#include "MMU_timing.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include "windows/IORegView.h"
|
||||
#endif
|
||||
|
||||
#ifdef DO_ASSERT_UNALIGNED
|
||||
#define ASSERT_UNALIGNED(x) assert(x)
|
||||
#else
|
||||
|
@ -767,6 +763,7 @@ static inline void MMU_VRAMmapControl(u8 block, u8 VRAMBankCnt)
|
|||
MMU_VRAMmapRefreshBank(i);
|
||||
|
||||
//printf(vramConfiguration.describe().c_str());
|
||||
//printf("vram remapped at vcount=%d\n",nds.VCount);
|
||||
|
||||
//if texInfo changed, trigger notifications
|
||||
if(memcmp(&oldTexInfo,&MMU.texInfo,sizeof(MMU_struct::TextureInfo)))
|
||||
|
@ -1815,7 +1812,7 @@ u32 TGXSTAT::read32()
|
|||
|
||||
ret |= gxfifo_irq; //user's irq flags
|
||||
|
||||
//printf("Returning gxstat read: %08X\n",ret);
|
||||
//printf("vc=%03d Returning gxstat read: %08X\n",nds.VCount,ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1995,10 +1992,7 @@ if(_startmode==0 && wordcount==1) {
|
|||
if(!doNotStart)
|
||||
doSchedule();
|
||||
|
||||
//todo - make a driver stub for this so that we dont have to conditionalize it everywhere
|
||||
#ifdef WIN32
|
||||
RefreshAllIORegViews();
|
||||
#endif
|
||||
driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA);
|
||||
}
|
||||
|
||||
void DmaController::exec()
|
||||
|
@ -2050,7 +2044,7 @@ start:
|
|||
|
||||
if(triggered)
|
||||
{
|
||||
//if(procnum==0) printf("%08lld trig type %d dma#%d with words %d at src:%08X dst:%08X gxf:%d",nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size);
|
||||
//if(procnum==0) printf("vc=%03d %08lld trig type %d dma#%d w/words %d at src:%08X dst:%08X gxf:%d",nds.VCount,nds_timer,startmode,chan,wordcount,saddr,daddr,gxFIFO.size);
|
||||
if(saddr ==0x023BCCEC && wordcount==118) {
|
||||
int zzz=9;
|
||||
}
|
||||
|
@ -2064,9 +2058,7 @@ start:
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
RefreshAllIORegViews();
|
||||
#endif
|
||||
driver->DEBUG_UpdateIORegView(BaseDriver::EDEBUG_IOREG_DMA);
|
||||
}
|
||||
|
||||
void DmaController::doCopy()
|
||||
|
@ -2162,6 +2154,7 @@ void triggerDma(EDMAMode mode)
|
|||
void DmaController::tryTrigger(EDMAMode mode)
|
||||
{
|
||||
if(startmode != mode) return;
|
||||
if(!enable) return;
|
||||
|
||||
//hmm dont trigger it if its already running!
|
||||
//but paused things need triggers to continue
|
||||
|
@ -2696,13 +2689,13 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
|
|||
#endif
|
||||
if(val & (1<<15))
|
||||
{
|
||||
//LOG("Main core on top\n");
|
||||
//printf("Main core on top (vcount=%d)\n",nds.VCount);
|
||||
MainScreen.offset = 0;
|
||||
SubScreen.offset = 192;
|
||||
}
|
||||
else
|
||||
{
|
||||
//LOG("Main core on bottom\n");
|
||||
//printf("Main core on bottom (vcount=%d)\n",nds.VCount);
|
||||
MainScreen.offset = 192;
|
||||
SubScreen.offset = 0;
|
||||
}
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
|
||||
// makes non-sequential accesses slower than sequential ones.
|
||||
#define ACCOUNT_FOR_NON_SEQUENTIAL_ACCESS
|
||||
//(SOMETIMES THIS IS A BIG SPEED HIT!)
|
||||
|
||||
// enables emulation of code fetch waits.
|
||||
#define ACCOUNT_FOR_CODE_FETCH_CYCLES
|
||||
|
|
|
@ -1953,9 +1953,12 @@ static void execHardware_hstart_vcount()
|
|||
u16 vmatch = T1ReadWord(MMU.ARM9_REG, 4);
|
||||
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
|
||||
{
|
||||
//arm9 vmatch
|
||||
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) | 4);
|
||||
if(T1ReadWord(MMU.ARM9_REG, 4) & 32)
|
||||
if(T1ReadWord(MMU.ARM9_REG, 4) & 32) {
|
||||
//printf("VMATCH FIRING! vc=%03d\n",nds.VCount);
|
||||
NDS_makeARM9Int(2);
|
||||
}
|
||||
}
|
||||
else
|
||||
T1WriteWord(MMU.ARM9_REG, 4, T1ReadWord(MMU.ARM9_REG, 4) & 0xFFFB);
|
||||
|
@ -1963,6 +1966,7 @@ static void execHardware_hstart_vcount()
|
|||
vmatch = T1ReadWord(MMU.ARM7_REG, 4);
|
||||
if(nds.VCount==((vmatch>>8)|((vmatch<<1)&(1<<8))))
|
||||
{
|
||||
//arm7 vmatch
|
||||
T1WriteWord(MMU.ARM7_REG, 4, T1ReadWord(MMU.ARM7_REG, 4) | 4);
|
||||
if(T1ReadWord(MMU.ARM7_REG, 4) & 32)
|
||||
NDS_makeARM7Int(2);
|
||||
|
@ -2153,7 +2157,7 @@ bool nds_loadstate(EMUFILE* is, int size)
|
|||
|
||||
//#define LOG_ARM9
|
||||
//#define LOG_ARM7
|
||||
//static bool dolog = false;
|
||||
//static bool dolog = true;
|
||||
|
||||
FORCEINLINE void arm9log()
|
||||
{
|
||||
|
@ -2311,8 +2315,8 @@ void NDS_exec(s32 nb)
|
|||
|
||||
#ifndef NDEBUG
|
||||
//what we find here is dependent on the timing constants above
|
||||
if(nds_timer>next && (nds_timer-next)>22)
|
||||
printf("curious. please report: over by %d\n",(int)(nds_timer-next));
|
||||
//if(nds_timer>next && (nds_timer-next)>22)
|
||||
// printf("curious. please report: over by %d\n",(int)(nds_timer-next));
|
||||
#endif
|
||||
|
||||
//if we were waiting for an irq, don't wait too long:
|
||||
|
@ -2347,6 +2351,7 @@ void execHardware_interrupts()
|
|||
if ( armcpu_irqException(&NDS_ARM9))
|
||||
#endif
|
||||
{
|
||||
//printf("ARM9 interrupt! flags: %08X ; mask: %08X ; result: %08X\n",MMU.reg_IF[0],MMU.reg_IE[0],MMU.reg_IF[0]&MMU.reg_IE[0]);
|
||||
//nds.ARM9Cycle = nds.cycles;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,6 +53,13 @@ public:
|
|||
virtual bool EMU_IsFastForwarding() { return false; }
|
||||
virtual bool EMU_HasEmulationStarted() { return true; }
|
||||
virtual bool EMU_IsAtFrameBoundary() { return true; }
|
||||
|
||||
enum eDebug_IOReg
|
||||
{
|
||||
EDEBUG_IOREG_DMA
|
||||
};
|
||||
|
||||
virtual void DEBUG_UpdateIORegView(eDebug_IOReg category) { }
|
||||
};
|
||||
extern BaseDriver* driver;
|
||||
|
||||
|
|
|
@ -96,39 +96,18 @@ public:
|
|||
{
|
||||
reset();
|
||||
}
|
||||
void test() {
|
||||
printf("test 1: 0x00412321\n");
|
||||
receive(0x00412321); receive(1); receive(2); receive(3); receive(0);
|
||||
//21: 1
|
||||
//23: 2
|
||||
//23: 3
|
||||
//41: 4 (a dud parameter)
|
||||
printf("test 2: 0x00002321\n");
|
||||
receive(0x00002321); receive(1); receive(2); receive(3);
|
||||
//21: 1
|
||||
//23: 2
|
||||
//23: 3
|
||||
printf("test 3: 0x11111111\n");
|
||||
receive(0x11111111);
|
||||
receive(1);
|
||||
//11: 1 (a dud parameter)
|
||||
printf("test 4: 0x00000011, 0x29111111\n");
|
||||
receive(0x00000011); receive(0x29111111); receive(1);
|
||||
//11: 17
|
||||
//11: 688984337
|
||||
//11: 1
|
||||
printf("test 5: 0x00004127\n");
|
||||
receive(0x00004127); receive(1); receive(0);//final one should be a dud
|
||||
}
|
||||
void reset() {
|
||||
countdown = 0;
|
||||
commandsPending = std::queue<u8>();
|
||||
countdowns = std::queue<u8>();
|
||||
commandCursor = 4;
|
||||
for(int i=0;i<4;i++) {
|
||||
commandsPending[i].command = 0;
|
||||
commandsPending[i].countdown = 0;
|
||||
}
|
||||
}
|
||||
//todo - things in here other than the very first thing involving GFX3D_NOP_NOARG_HACK I am not too sure about.
|
||||
void receive(u32 val) {
|
||||
bool hack = false;
|
||||
if(commandsPending.size()>0 && (commandsPending.front() == 0x15 || commandsPending.front() == GFX3D_NOP_NOARG_HACK || commandsPending.front() == 0x11 || commandsPending.front() == 0x41) && val != 0) {
|
||||
if(size()>0 && (front().command == 0x15 || front().command == GFX3D_NOP_NOARG_HACK || front().command == 0x11 || front().command == 0x41) && val != 0) {
|
||||
//apparently a zero is swallowed in this case but if another value is sent
|
||||
//processing will continue
|
||||
//if(commandsPending.front() == GFX3D_NOP_NOARG_HACK)
|
||||
|
@ -136,7 +115,7 @@ public:
|
|||
//else
|
||||
{
|
||||
//printf("gxf: sending hack %02X: (dummy=0)\n", commandsPending.front());
|
||||
GFX_FIFOsend(commandsPending.front(),0);
|
||||
GFX_FIFOsend(front().command,0);
|
||||
}
|
||||
hack = true;
|
||||
goto hackTrigger;
|
||||
|
@ -147,21 +126,20 @@ public:
|
|||
//if(commandsPending.front() == GFX3D_NOP_NOARG_HACK)
|
||||
//{}
|
||||
//else
|
||||
GFX_FIFOsend(commandsPending.front(),val);
|
||||
GFX_FIFOsend(front().command,val);
|
||||
hackTrigger:
|
||||
countdown--;
|
||||
while(countdown==0) {
|
||||
commandsPending.pop();
|
||||
countdowns.pop();
|
||||
dequeue();
|
||||
trigger:
|
||||
//dont set hack to false if you jumped from below! it needs to be true for when you jump down from above.
|
||||
//oh my what a mess.
|
||||
if(countdowns.empty()) break;
|
||||
countdown = countdowns.front();
|
||||
if(size()==0) break;
|
||||
countdown = front().countdown;
|
||||
if(!countdown) {
|
||||
if(commandsPending.front() != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) {
|
||||
if(front().command != INVALID_COMMAND /*&& commandsPending.front() != GFX3D_NOP_NOARG_HACK*/) {
|
||||
//printf("[%06d]gxf: sending %02X: (dummy=0)\n", currFrameCounter,commandsPending.front());
|
||||
GFX_FIFOsend(commandsPending.front(),0);
|
||||
GFX_FIFOsend(front().command,0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -182,11 +160,11 @@ decode:
|
|||
u8 cmd = commands[i];
|
||||
u8 type = commandTypes[i];
|
||||
if(type == INVALID_COMMAND) {
|
||||
commandsPending.push(INVALID_COMMAND);
|
||||
commandsPending[i].command = INVALID_COMMAND;
|
||||
} else {
|
||||
if(type == UNDEFINED_COMMAND)
|
||||
commandsPending.push(GFX3D_NOP_NOARG_HACK); //enqueue a single undefined command we know how to handle
|
||||
else commandsPending.push(cmd);
|
||||
commandsPending[i].command = GFX3D_NOP_NOARG_HACK; //enqueue a single undefined command we know how to handle
|
||||
else commandsPending[i].command = cmd;
|
||||
}
|
||||
if(type == UNDEFINED_COMMAND || type == 0x00) {
|
||||
//these are valid commands with no parameters. they might need special handling
|
||||
|
@ -199,35 +177,44 @@ decode:
|
|||
}
|
||||
}
|
||||
if(safe) {
|
||||
countdowns.push(0);
|
||||
commandsPending[i].countdown = 0;
|
||||
} else {
|
||||
//we need to receive a dummy parameter in this case
|
||||
countdowns.push(1);
|
||||
commandsPending[i].countdown = 1;
|
||||
}
|
||||
} else if(type != INVALID_COMMAND) {
|
||||
countdowns.push(type);
|
||||
} else countdowns.push(0);
|
||||
commandsPending[i].countdown = type;
|
||||
} else commandsPending[i].countdown = 0;
|
||||
}
|
||||
|
||||
countdown = countdowns.front();
|
||||
commandCursor = 0;
|
||||
countdown = front().countdown;
|
||||
if(countdown==0)
|
||||
goto trigger;
|
||||
}
|
||||
}
|
||||
std::queue<u8> commandsPending;
|
||||
std::queue<u8> countdowns;
|
||||
|
||||
struct CommandItem {
|
||||
u8 command, countdown;
|
||||
} commandsPending[4];
|
||||
|
||||
u32 commandCursor;
|
||||
u8 countdown;
|
||||
|
||||
private:
|
||||
void dequeue() { commandCursor++; }
|
||||
CommandItem& front() { return commandsPending[commandCursor]; }
|
||||
u32 size() { return 4-commandCursor; }
|
||||
public:
|
||||
|
||||
void savestate(EMUFILE *f)
|
||||
{
|
||||
//TODO - next time we invalidate savestates, simplify this format.
|
||||
write32le(0,f); //version
|
||||
std::queue<u8> temp = commandsPending;
|
||||
write32le(temp.size(),f);
|
||||
while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); }
|
||||
temp = countdowns;
|
||||
write32le(temp.size(),f);
|
||||
while(!temp.empty()) { write8le(temp.front(),f); temp.pop(); }
|
||||
|
||||
write32le(size(),f);
|
||||
for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].command,f);
|
||||
write32le(0,f);
|
||||
for(u32 i=commandCursor;i<4;i++) write8le(commandsPending[i].countdown,f);
|
||||
write8le(countdown,f);
|
||||
}
|
||||
|
||||
|
@ -237,14 +224,14 @@ decode:
|
|||
if(read32le(&version,f) != 1) return false;
|
||||
if(version != 0) return false;
|
||||
|
||||
assert(commandsPending.size()==0);
|
||||
assert(countdowns.size()==0);
|
||||
|
||||
u32 temp;
|
||||
read32le(&temp,f);
|
||||
for(u32 i=0;i<temp;i++) { u8 temp8; read8le(&temp8,f); commandsPending.push(temp8); }
|
||||
read32le(&temp,f);
|
||||
for(u32 i=0;i<temp;i++) { u8 temp8; read8le(&temp8,f); countdowns.push(temp8); }
|
||||
u32 tempsize;
|
||||
read32le(&tempsize,f);
|
||||
commandCursor = 4-tempsize;
|
||||
for(u32 i=0;i<commandCursor;i++) commandsPending[i].command = 0;
|
||||
for(u32 i=commandCursor;i<4;i++) read8le(&commandsPending[i].command,f);
|
||||
read32le(&tempsize,f);
|
||||
for(u32 i=0;i<commandCursor;i++) commandsPending[i].countdown = 0;
|
||||
for(u32 i=commandCursor;i<4;i++) read8le(&commandsPending[i].countdown,f);
|
||||
|
||||
read8le(&countdown,f);
|
||||
|
||||
|
@ -363,7 +350,6 @@ static u32 clInd = 0;
|
|||
|
||||
static u32 clInd2 = 0;
|
||||
BOOL isSwapBuffers = FALSE;
|
||||
bool isVBlank = false;
|
||||
|
||||
static u32 BTind = 0;
|
||||
static u32 PTind = 0;
|
||||
|
@ -553,7 +539,6 @@ void gfx3d_reset()
|
|||
|
||||
clInd2 = 0;
|
||||
isSwapBuffers = FALSE;
|
||||
isVBlank = false;
|
||||
|
||||
GFX_PIPEclear();
|
||||
GFX_FIFOclear();
|
||||
|
@ -1713,6 +1698,8 @@ void gfx3d_execute3D()
|
|||
|
||||
if(GFX_PIPErecv(&cmd, ¶m))
|
||||
{
|
||||
//if (isSwapBuffers) printf("Executing while swapbuffers is pending: %d:%08X\n",cmd,param);
|
||||
|
||||
//since we did anything at all, incur a pipeline motion cost.
|
||||
//also, we can't let gxfifo sequencer stall until the fifo is empty.
|
||||
//see...
|
||||
|
@ -1746,6 +1733,7 @@ void gfx3d_execute3D()
|
|||
|
||||
void gfx3d_glFlush(u32 v)
|
||||
{
|
||||
//printf("-------------FLUSH------------- (vcount=%d\n",nds.VCount);
|
||||
gfx3d.sortmode = BIT0(v);
|
||||
gfx3d.wbuffer = BIT1(v);
|
||||
#if 0
|
||||
|
@ -1871,7 +1859,6 @@ static void gfx3d_doFlush()
|
|||
|
||||
void gfx3d_VBlankSignal()
|
||||
{
|
||||
isVBlank = true;
|
||||
if (isSwapBuffers)
|
||||
{
|
||||
#ifndef FLUSHMODE_HACK
|
||||
|
@ -1884,8 +1871,6 @@ void gfx3d_VBlankSignal()
|
|||
|
||||
void gfx3d_VBlankEndSignal(bool skipFrame)
|
||||
{
|
||||
isVBlank = false;
|
||||
|
||||
if (!drawPending) return;
|
||||
drawPending = FALSE;
|
||||
|
||||
|
|
|
@ -282,6 +282,7 @@ static const int kYMargin = 1;
|
|||
|
||||
typedef std::vector<CIORegView*> TIORegViewList;
|
||||
static TIORegViewList liveIORegViews;
|
||||
bool anyLiveIORegViews = false;
|
||||
|
||||
void RefreshAllIORegViews()
|
||||
{
|
||||
|
@ -303,6 +304,7 @@ CIORegView::CIORegView()
|
|||
, yoff(0)
|
||||
{
|
||||
liveIORegViews.push_back(this);
|
||||
anyLiveIORegViews = true;
|
||||
}
|
||||
|
||||
CIORegView::~CIORegView()
|
||||
|
@ -311,6 +313,7 @@ CIORegView::~CIORegView()
|
|||
UnregWndClass("DeSmuME_IORegView");
|
||||
//TODO - is this thread safe? which thread do these calls come from
|
||||
liveIORegViews.erase(std::find(liveIORegViews.begin(),liveIORegViews.end(),this));
|
||||
if(liveIORegViews.size()==0) anyLiveIORegViews = false;
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
|
|
@ -1819,6 +1819,13 @@ class WinDriver : public BaseDriver
|
|||
|
||||
return ESTEP_DONE;
|
||||
}
|
||||
|
||||
virtual void DEBUG_UpdateIORegView(eDebug_IOReg category)
|
||||
{
|
||||
extern bool anyLiveIORegViews;
|
||||
if(anyLiveIORegViews)
|
||||
RefreshAllIORegViews();
|
||||
}
|
||||
};
|
||||
|
||||
std::string GetPrivateProfileStdString(LPCSTR lpAppName,LPCSTR lpKeyName,LPCSTR lpDefault)
|
||||
|
|
Loading…
Reference in New Issue