a grab bag of optimizations good for about 4fps on my system

This commit is contained in:
zeromus 2009-07-17 08:33:35 +00:00
parent dbe2226498
commit ecf5b68df6
4 changed files with 22 additions and 21 deletions

View File

@ -573,12 +573,14 @@ static void GPU_InitFadeColors()
}
}
static CACHE_ALIGN GPU GPU_main, GPU_sub;
GPU * GPU_Init(u8 l)
{
GPU * g;
if ((g = (GPU *) malloc(sizeof(GPU))) == NULL)
return NULL;
if(l==0) g = &GPU_main;
else g = &GPU_sub;
GPU_Reset(g, l);
GPU_InitFadeColors();
@ -591,8 +593,6 @@ GPU * GPU_Init(u8 l)
g->setFinalColor3d_funcNum = 0;
g->setFinalColorSpr = _master_setFinalOBJColor<None,false>;
return g;
}
@ -1133,9 +1133,8 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
//This is probably the best place to enforce it, since almost every single color that comes in here
//will be pulled from a palette that needs the top bit stripped off anyway.
//assert((color&0x8000)==0);
color &= 0x7FFF;
if(!BACKDROP) color &= 0x7FFF; //but for the backdrop we can easily guarantee earlier that theres no bit here
//if someone disagrees with these, they could be reimplemented as a function pointer easily
bool draw=true;
switch(setFinalColorBck_funcNum)
{
@ -1149,10 +1148,10 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
case 0x7: draw=setFinalBGColorSpecialDecreaseWnd<BACKDROP>(color,x); break;
};
if(draw)
if(BACKDROP || draw) //backdrop must always be drawn
{
T2WriteWord(currDst, x<<1, color | 0x8000);
bgPixels[x] = currBgNum;
if(!BACKDROP) bgPixels[x] = currBgNum; //lets do this in the backdrop drawing loop, should be faster
}
}
@ -2359,7 +2358,6 @@ void GPU_set_DISPCAPCNT(u32 val)
}
// #define BRIGHT_TABLES
static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
{
GPU * gpu = screen->gpu;
@ -2381,6 +2379,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
for(int x=0;x<256;x++) {
gpu->__setFinalColorBck<false,true>(backdrop_color,x,1);
}
memset(gpu->bgPixels,5,256);
//this check isnt really helpful. it just slows us down in the cases where we need the most speed
//if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3] && !gpu->LayersEnable[4]) return;
@ -2395,7 +2394,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
for (int i=0; i<NB_PRIORITIES; i++) {
gpu->itemsForPriority[i].nbPixelsX = 0;
}
// for all the pixels in the line
if (gpu->LayersEnable[4])
{

View File

@ -726,7 +726,7 @@ struct GPU
u8 MasterBrightMode;
u32 MasterBrightFactor;
u8 bgPixels[1024]; //yes indeed, this is oversized. map debug tools try to write to it
CACHE_ALIGN u8 bgPixels[1024]; //yes indeed, this is oversized. map debug tools try to write to it
u32 currLine;
u8 currBgNum;

View File

@ -495,22 +495,22 @@ FORCEINLINE void _MMU_write32(const int PROCNUM, const MMU_ACCESS_TYPE AT, const
#endif
template<int PROCNUM, MMU_ACCESS_TYPE AT>
u8 _MMU_read08(u32 addr) { return _MMU_read08(PROCNUM, AT, addr); }
FORCEINLINE u8 _MMU_read08(u32 addr) { return _MMU_read08(PROCNUM, AT, addr); }
template<int PROCNUM, MMU_ACCESS_TYPE AT>
u16 _MMU_read16(u32 addr) { return _MMU_read16(PROCNUM, AT, addr); }
FORCEINLINE u16 _MMU_read16(u32 addr) { return _MMU_read16(PROCNUM, AT, addr); }
template<int PROCNUM, MMU_ACCESS_TYPE AT>
u32 _MMU_read32(u32 addr) { return _MMU_read32(PROCNUM, AT, addr); }
FORCEINLINE u32 _MMU_read32(u32 addr) { return _MMU_read32(PROCNUM, AT, addr); }
template<int PROCNUM, MMU_ACCESS_TYPE AT>
void _MMU_write08(u32 addr, u8 val) { _MMU_write08(PROCNUM, AT, addr, val); }
FORCEINLINE void _MMU_write08(u32 addr, u8 val) { _MMU_write08(PROCNUM, AT, addr, val); }
template<int PROCNUM, MMU_ACCESS_TYPE AT>
void _MMU_write16(u32 addr, u16 val) { _MMU_write16(PROCNUM, AT, addr, val); }
FORCEINLINE void _MMU_write16(u32 addr, u16 val) { _MMU_write16(PROCNUM, AT, addr, val); }
template<int PROCNUM, MMU_ACCESS_TYPE AT>
void _MMU_write32(u32 addr, u32 val) { _MMU_write32(PROCNUM, AT, addr, val); }
FORCEINLINE void _MMU_write32(u32 addr, u32 val) { _MMU_write32(PROCNUM, AT, addr, val); }
void FASTCALL MMU_DumpMemBlock(u8 proc, u32 address, u32 size, u8 *buffer);

View File

@ -33,7 +33,7 @@
template<u32> static u32 armcpu_prefetch();
inline u32 armcpu_prefetch(armcpu_t *armcpu) {
FORCEINLINE u32 armcpu_prefetch(armcpu_t *armcpu) {
if(armcpu->proc_ID==0) return armcpu_prefetch<0>();
else return armcpu_prefetch<1>();
}
@ -363,8 +363,7 @@ u32 armcpu_switchMode(armcpu_t *armcpu, u8 mode)
}
template<u32 PROCNUM>
static u32
armcpu_prefetch()
FORCEINLINE static u32 armcpu_prefetch()
{
armcpu_t* const armcpu = &ARMPROC;
#ifdef GDB_STUB
@ -521,7 +520,10 @@ u32 armcpu_exec()
if(ARMPROC.CPSR.bits.T == 0)
{
if((TEST_COND(CONDITION(ARMPROC.instruction), CODE(ARMPROC.instruction), ARMPROC.CPSR)))
if(
CONDITION(ARMPROC.instruction) == 0x0E //fast path for unconditional instructions
|| (TEST_COND(CONDITION(ARMPROC.instruction), CODE(ARMPROC.instruction), ARMPROC.CPSR)) //handles any condition
)
{
if(PROCNUM==0) {
#ifdef WANTASMLISTING