a grab bag of optimizations good for about 4fps on my system

This commit is contained in:
zeromus 2009-07-17 08:33:35 +00:00
parent dbe2226498
commit ecf5b68df6
4 changed files with 22 additions and 21 deletions

View File

@ -573,12 +573,14 @@ static void GPU_InitFadeColors()
} }
} }
static CACHE_ALIGN GPU GPU_main, GPU_sub;
GPU * GPU_Init(u8 l) GPU * GPU_Init(u8 l)
{ {
GPU * g; GPU * g;
if ((g = (GPU *) malloc(sizeof(GPU))) == NULL) if(l==0) g = &GPU_main;
return NULL; else g = &GPU_sub;
GPU_Reset(g, l); GPU_Reset(g, l);
GPU_InitFadeColors(); GPU_InitFadeColors();
@ -591,8 +593,6 @@ GPU * GPU_Init(u8 l)
g->setFinalColor3d_funcNum = 0; g->setFinalColor3d_funcNum = 0;
g->setFinalColorSpr = _master_setFinalOBJColor<None,false>; g->setFinalColorSpr = _master_setFinalOBJColor<None,false>;
return g; return g;
} }
@ -1133,9 +1133,8 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
//This is probably the best place to enforce it, since almost every single color that comes in here //This is probably the best place to enforce it, since almost every single color that comes in here
//will be pulled from a palette that needs the top bit stripped off anyway. //will be pulled from a palette that needs the top bit stripped off anyway.
//assert((color&0x8000)==0); //assert((color&0x8000)==0);
color &= 0x7FFF; if(!BACKDROP) color &= 0x7FFF; //but for the backdrop we can easily guarantee earlier that theres no bit here
//if someone disagrees with these, they could be reimplemented as a function pointer easily
bool draw=true; bool draw=true;
switch(setFinalColorBck_funcNum) switch(setFinalColorBck_funcNum)
{ {
@ -1149,10 +1148,10 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
case 0x7: draw=setFinalBGColorSpecialDecreaseWnd<BACKDROP>(color,x); break; case 0x7: draw=setFinalBGColorSpecialDecreaseWnd<BACKDROP>(color,x); break;
}; };
if(draw) if(BACKDROP || draw) //backdrop must always be drawn
{ {
T2WriteWord(currDst, x<<1, color | 0x8000); T2WriteWord(currDst, x<<1, color | 0x8000);
bgPixels[x] = currBgNum; if(!BACKDROP) bgPixels[x] = currBgNum; //lets do this in the backdrop drawing loop, should be faster
} }
} }
@ -2359,7 +2358,6 @@ void GPU_set_DISPCAPCNT(u32 val)
} }
// #define BRIGHT_TABLES // #define BRIGHT_TABLES
static void GPU_ligne_layer(NDS_Screen * screen, u16 l) static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
{ {
GPU * gpu = screen->gpu; GPU * gpu = screen->gpu;
@ -2381,6 +2379,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
for(int x=0;x<256;x++) { for(int x=0;x<256;x++) {
gpu->__setFinalColorBck<false,true>(backdrop_color,x,1); gpu->__setFinalColorBck<false,true>(backdrop_color,x,1);
} }
memset(gpu->bgPixels,5,256);
//this check isnt really helpful. it just slows us down in the cases where we need the most speed //this check isnt really helpful. it just slows us down in the cases where we need the most speed
//if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3] && !gpu->LayersEnable[4]) return; //if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3] && !gpu->LayersEnable[4]) return;
@ -2395,7 +2394,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
for (int i=0; i<NB_PRIORITIES; i++) { for (int i=0; i<NB_PRIORITIES; i++) {
gpu->itemsForPriority[i].nbPixelsX = 0; gpu->itemsForPriority[i].nbPixelsX = 0;
} }
// for all the pixels in the line // for all the pixels in the line
if (gpu->LayersEnable[4]) if (gpu->LayersEnable[4])
{ {

View File

@ -726,7 +726,7 @@ struct GPU
u8 MasterBrightMode; u8 MasterBrightMode;
u32 MasterBrightFactor; u32 MasterBrightFactor;
u8 bgPixels[1024]; //yes indeed, this is oversized. map debug tools try to write to it CACHE_ALIGN u8 bgPixels[1024]; //yes indeed, this is oversized. map debug tools try to write to it
u32 currLine; u32 currLine;
u8 currBgNum; u8 currBgNum;

View File

@ -495,22 +495,22 @@ FORCEINLINE void _MMU_write32(const int PROCNUM, const MMU_ACCESS_TYPE AT, const
#endif #endif
template<int PROCNUM, MMU_ACCESS_TYPE AT> template<int PROCNUM, MMU_ACCESS_TYPE AT>
u8 _MMU_read08(u32 addr) { return _MMU_read08(PROCNUM, AT, addr); } FORCEINLINE u8 _MMU_read08(u32 addr) { return _MMU_read08(PROCNUM, AT, addr); }
template<int PROCNUM, MMU_ACCESS_TYPE AT> template<int PROCNUM, MMU_ACCESS_TYPE AT>
u16 _MMU_read16(u32 addr) { return _MMU_read16(PROCNUM, AT, addr); } FORCEINLINE u16 _MMU_read16(u32 addr) { return _MMU_read16(PROCNUM, AT, addr); }
template<int PROCNUM, MMU_ACCESS_TYPE AT> template<int PROCNUM, MMU_ACCESS_TYPE AT>
u32 _MMU_read32(u32 addr) { return _MMU_read32(PROCNUM, AT, addr); } FORCEINLINE u32 _MMU_read32(u32 addr) { return _MMU_read32(PROCNUM, AT, addr); }
template<int PROCNUM, MMU_ACCESS_TYPE AT> template<int PROCNUM, MMU_ACCESS_TYPE AT>
void _MMU_write08(u32 addr, u8 val) { _MMU_write08(PROCNUM, AT, addr, val); } FORCEINLINE void _MMU_write08(u32 addr, u8 val) { _MMU_write08(PROCNUM, AT, addr, val); }
template<int PROCNUM, MMU_ACCESS_TYPE AT> template<int PROCNUM, MMU_ACCESS_TYPE AT>
void _MMU_write16(u32 addr, u16 val) { _MMU_write16(PROCNUM, AT, addr, val); } FORCEINLINE void _MMU_write16(u32 addr, u16 val) { _MMU_write16(PROCNUM, AT, addr, val); }
template<int PROCNUM, MMU_ACCESS_TYPE AT> template<int PROCNUM, MMU_ACCESS_TYPE AT>
void _MMU_write32(u32 addr, u32 val) { _MMU_write32(PROCNUM, AT, addr, val); } FORCEINLINE void _MMU_write32(u32 addr, u32 val) { _MMU_write32(PROCNUM, AT, addr, val); }
void FASTCALL MMU_DumpMemBlock(u8 proc, u32 address, u32 size, u8 *buffer); void FASTCALL MMU_DumpMemBlock(u8 proc, u32 address, u32 size, u8 *buffer);

View File

@ -33,7 +33,7 @@
template<u32> static u32 armcpu_prefetch(); template<u32> static u32 armcpu_prefetch();
inline u32 armcpu_prefetch(armcpu_t *armcpu) { FORCEINLINE u32 armcpu_prefetch(armcpu_t *armcpu) {
if(armcpu->proc_ID==0) return armcpu_prefetch<0>(); if(armcpu->proc_ID==0) return armcpu_prefetch<0>();
else return armcpu_prefetch<1>(); else return armcpu_prefetch<1>();
} }
@ -363,8 +363,7 @@ u32 armcpu_switchMode(armcpu_t *armcpu, u8 mode)
} }
template<u32 PROCNUM> template<u32 PROCNUM>
static u32 FORCEINLINE static u32 armcpu_prefetch()
armcpu_prefetch()
{ {
armcpu_t* const armcpu = &ARMPROC; armcpu_t* const armcpu = &ARMPROC;
#ifdef GDB_STUB #ifdef GDB_STUB
@ -521,7 +520,10 @@ u32 armcpu_exec()
if(ARMPROC.CPSR.bits.T == 0) if(ARMPROC.CPSR.bits.T == 0)
{ {
if((TEST_COND(CONDITION(ARMPROC.instruction), CODE(ARMPROC.instruction), ARMPROC.CPSR))) if(
CONDITION(ARMPROC.instruction) == 0x0E //fast path for unconditional instructions
|| (TEST_COND(CONDITION(ARMPROC.instruction), CODE(ARMPROC.instruction), ARMPROC.CPSR)) //handles any condition
)
{ {
if(PROCNUM==0) { if(PROCNUM==0) {
#ifdef WANTASMLISTING #ifdef WANTASMLISTING