a grab bag of optimizations good for about 4fps on my system
This commit is contained in:
parent
dbe2226498
commit
ecf5b68df6
|
@ -573,12 +573,14 @@ static void GPU_InitFadeColors()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static CACHE_ALIGN GPU GPU_main, GPU_sub;
|
||||||
|
|
||||||
GPU * GPU_Init(u8 l)
|
GPU * GPU_Init(u8 l)
|
||||||
{
|
{
|
||||||
GPU * g;
|
GPU * g;
|
||||||
|
|
||||||
if ((g = (GPU *) malloc(sizeof(GPU))) == NULL)
|
if(l==0) g = &GPU_main;
|
||||||
return NULL;
|
else g = &GPU_sub;
|
||||||
|
|
||||||
GPU_Reset(g, l);
|
GPU_Reset(g, l);
|
||||||
GPU_InitFadeColors();
|
GPU_InitFadeColors();
|
||||||
|
@ -591,8 +593,6 @@ GPU * GPU_Init(u8 l)
|
||||||
g->setFinalColor3d_funcNum = 0;
|
g->setFinalColor3d_funcNum = 0;
|
||||||
g->setFinalColorSpr = _master_setFinalOBJColor<None,false>;
|
g->setFinalColorSpr = _master_setFinalOBJColor<None,false>;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1133,9 +1133,8 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
|
||||||
//This is probably the best place to enforce it, since almost every single color that comes in here
|
//This is probably the best place to enforce it, since almost every single color that comes in here
|
||||||
//will be pulled from a palette that needs the top bit stripped off anyway.
|
//will be pulled from a palette that needs the top bit stripped off anyway.
|
||||||
//assert((color&0x8000)==0);
|
//assert((color&0x8000)==0);
|
||||||
color &= 0x7FFF;
|
if(!BACKDROP) color &= 0x7FFF; //but for the backdrop we can easily guarantee earlier that theres no bit here
|
||||||
|
|
||||||
//if someone disagrees with these, they could be reimplemented as a function pointer easily
|
|
||||||
bool draw=true;
|
bool draw=true;
|
||||||
switch(setFinalColorBck_funcNum)
|
switch(setFinalColorBck_funcNum)
|
||||||
{
|
{
|
||||||
|
@ -1149,10 +1148,10 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
|
||||||
case 0x7: draw=setFinalBGColorSpecialDecreaseWnd<BACKDROP>(color,x); break;
|
case 0x7: draw=setFinalBGColorSpecialDecreaseWnd<BACKDROP>(color,x); break;
|
||||||
};
|
};
|
||||||
|
|
||||||
if(draw)
|
if(BACKDROP || draw) //backdrop must always be drawn
|
||||||
{
|
{
|
||||||
T2WriteWord(currDst, x<<1, color | 0x8000);
|
T2WriteWord(currDst, x<<1, color | 0x8000);
|
||||||
bgPixels[x] = currBgNum;
|
if(!BACKDROP) bgPixels[x] = currBgNum; //lets do this in the backdrop drawing loop, should be faster
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2359,7 +2358,6 @@ void GPU_set_DISPCAPCNT(u32 val)
|
||||||
}
|
}
|
||||||
// #define BRIGHT_TABLES
|
// #define BRIGHT_TABLES
|
||||||
|
|
||||||
|
|
||||||
static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
||||||
{
|
{
|
||||||
GPU * gpu = screen->gpu;
|
GPU * gpu = screen->gpu;
|
||||||
|
@ -2381,6 +2379,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
||||||
for(int x=0;x<256;x++) {
|
for(int x=0;x<256;x++) {
|
||||||
gpu->__setFinalColorBck<false,true>(backdrop_color,x,1);
|
gpu->__setFinalColorBck<false,true>(backdrop_color,x,1);
|
||||||
}
|
}
|
||||||
|
memset(gpu->bgPixels,5,256);
|
||||||
|
|
||||||
//this check isnt really helpful. it just slows us down in the cases where we need the most speed
|
//this check isnt really helpful. it just slows us down in the cases where we need the most speed
|
||||||
//if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3] && !gpu->LayersEnable[4]) return;
|
//if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3] && !gpu->LayersEnable[4]) return;
|
||||||
|
|
|
@ -726,7 +726,7 @@ struct GPU
|
||||||
u8 MasterBrightMode;
|
u8 MasterBrightMode;
|
||||||
u32 MasterBrightFactor;
|
u32 MasterBrightFactor;
|
||||||
|
|
||||||
u8 bgPixels[1024]; //yes indeed, this is oversized. map debug tools try to write to it
|
CACHE_ALIGN u8 bgPixels[1024]; //yes indeed, this is oversized. map debug tools try to write to it
|
||||||
|
|
||||||
u32 currLine;
|
u32 currLine;
|
||||||
u8 currBgNum;
|
u8 currBgNum;
|
||||||
|
|
|
@ -495,22 +495,22 @@ FORCEINLINE void _MMU_write32(const int PROCNUM, const MMU_ACCESS_TYPE AT, const
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
||||||
u8 _MMU_read08(u32 addr) { return _MMU_read08(PROCNUM, AT, addr); }
|
FORCEINLINE u8 _MMU_read08(u32 addr) { return _MMU_read08(PROCNUM, AT, addr); }
|
||||||
|
|
||||||
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
||||||
u16 _MMU_read16(u32 addr) { return _MMU_read16(PROCNUM, AT, addr); }
|
FORCEINLINE u16 _MMU_read16(u32 addr) { return _MMU_read16(PROCNUM, AT, addr); }
|
||||||
|
|
||||||
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
||||||
u32 _MMU_read32(u32 addr) { return _MMU_read32(PROCNUM, AT, addr); }
|
FORCEINLINE u32 _MMU_read32(u32 addr) { return _MMU_read32(PROCNUM, AT, addr); }
|
||||||
|
|
||||||
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
||||||
void _MMU_write08(u32 addr, u8 val) { _MMU_write08(PROCNUM, AT, addr, val); }
|
FORCEINLINE void _MMU_write08(u32 addr, u8 val) { _MMU_write08(PROCNUM, AT, addr, val); }
|
||||||
|
|
||||||
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
||||||
void _MMU_write16(u32 addr, u16 val) { _MMU_write16(PROCNUM, AT, addr, val); }
|
FORCEINLINE void _MMU_write16(u32 addr, u16 val) { _MMU_write16(PROCNUM, AT, addr, val); }
|
||||||
|
|
||||||
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
template<int PROCNUM, MMU_ACCESS_TYPE AT>
|
||||||
void _MMU_write32(u32 addr, u32 val) { _MMU_write32(PROCNUM, AT, addr, val); }
|
FORCEINLINE void _MMU_write32(u32 addr, u32 val) { _MMU_write32(PROCNUM, AT, addr, val); }
|
||||||
|
|
||||||
void FASTCALL MMU_DumpMemBlock(u8 proc, u32 address, u32 size, u8 *buffer);
|
void FASTCALL MMU_DumpMemBlock(u8 proc, u32 address, u32 size, u8 *buffer);
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
|
|
||||||
template<u32> static u32 armcpu_prefetch();
|
template<u32> static u32 armcpu_prefetch();
|
||||||
|
|
||||||
inline u32 armcpu_prefetch(armcpu_t *armcpu) {
|
FORCEINLINE u32 armcpu_prefetch(armcpu_t *armcpu) {
|
||||||
if(armcpu->proc_ID==0) return armcpu_prefetch<0>();
|
if(armcpu->proc_ID==0) return armcpu_prefetch<0>();
|
||||||
else return armcpu_prefetch<1>();
|
else return armcpu_prefetch<1>();
|
||||||
}
|
}
|
||||||
|
@ -363,8 +363,7 @@ u32 armcpu_switchMode(armcpu_t *armcpu, u8 mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<u32 PROCNUM>
|
template<u32 PROCNUM>
|
||||||
static u32
|
FORCEINLINE static u32 armcpu_prefetch()
|
||||||
armcpu_prefetch()
|
|
||||||
{
|
{
|
||||||
armcpu_t* const armcpu = &ARMPROC;
|
armcpu_t* const armcpu = &ARMPROC;
|
||||||
#ifdef GDB_STUB
|
#ifdef GDB_STUB
|
||||||
|
@ -521,7 +520,10 @@ u32 armcpu_exec()
|
||||||
|
|
||||||
if(ARMPROC.CPSR.bits.T == 0)
|
if(ARMPROC.CPSR.bits.T == 0)
|
||||||
{
|
{
|
||||||
if((TEST_COND(CONDITION(ARMPROC.instruction), CODE(ARMPROC.instruction), ARMPROC.CPSR)))
|
if(
|
||||||
|
CONDITION(ARMPROC.instruction) == 0x0E //fast path for unconditional instructions
|
||||||
|
|| (TEST_COND(CONDITION(ARMPROC.instruction), CODE(ARMPROC.instruction), ARMPROC.CPSR)) //handles any condition
|
||||||
|
)
|
||||||
{
|
{
|
||||||
if(PROCNUM==0) {
|
if(PROCNUM==0) {
|
||||||
#ifdef WANTASMLISTING
|
#ifdef WANTASMLISTING
|
||||||
|
|
Loading…
Reference in New Issue