fixed sprite window inefficiencies. Big speedup; major speedup in SPP. I do not know a good sprite window test case, so i had to test it by turning on sprite window for SPP in a hacky way, so keep your eyes peeled for problems. But even if there are problems, the old way of handling sprite window was unacceptable.

This commit is contained in:
zeromus 2008-12-29 07:26:15 +00:00
parent b4a3551b1c
commit 9e04b4eceb
4 changed files with 175 additions and 159 deletions

View File

@ -43,10 +43,9 @@
- Added texture caching (speedup 3D core) [CrazyMax]
- Render shadow volumes [zeromus, luigi__]
- Toon shading system [luigi__]
- carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
- Carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
- Fix clear depth (ex. Castlevania now doesnt flip) [lugi__]
- Make GE matrix mult and load commands clear out unused rows and cols to identity correctly [zeromus]
- Setup to track polycount better, but still worthless, not accounting for clipping and culling [zeromus]
- Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus]
- Fix error in command unpacking which caused some display lists to totally blow up [zeromus]
- Convert alpha and material values from [0,31], [0,7] etc ranges to opengl [0,maxint] ranges in a more precise way [zeromus]
@ -55,6 +54,7 @@
- Move lighting model to software instead of using opengl for more precision [zeromus]
- Fix a bug in texture transformation mode 1 [zeromus]
- Add 3d layer h-scrolling [zeromus]
- Removed some serious inefficiencies from sprite window code [zeromus]
Mac OS X port:
- Fixed: Filenames and paths with unicode characters now work. [Jeff]
- Fixed: Load state from file button works again. [Jeff]

View File

@ -77,7 +77,9 @@ NDS_Screen SubScreen;
//#define DEBUG_TRI
u8 GPU_screen[4*256*192];
CACHE_ALIGN u8 GPU_screen[4*256*192];
CACHE_ALIGN u8 sprWin[256];
OSDCLASS *osd = NULL;
OSDCLASS *osdA = NULL;
@ -235,7 +237,11 @@ static void GPU_resortBGs(GPU *gpu)
struct _DISPCNT * cnt = &gpu->dispx_st->dispx_DISPCNT.bits;
itemsForPriority_t * item;
memset(gpu->sprWin,0, 256*192);
//zero 29-dec-2008 - this really doesnt make sense to me.
//i changed the sprwin to be line by line,
//and resetting it here is pointless since line rendering is instantaneous
//and completely produces and consumes sprwin after which the contents of this buffer are useless
//memset(gpu->sprWin,0, 256*192);
// we don't need to check for windows here...
// if we tick boxes, invisible layers become invisible & vice versa
@ -481,11 +487,12 @@ static INLINE void renderline_checkWindows(const GPU *gpu, u8 bgnum, u16 x, u16
}
}
//if(true) //sprwin test hack
if (gpu->WINOBJ_ENABLED)
{
// it is in winOBJ, do we display ?
// low priority
if (gpu->sprWin[y][x])
if (sprWin[x])
{
*draw = (gpu->WINOBJ >> bgnum)&1;
*effect = (gpu->WINOBJ_SPECIAL);
@ -506,8 +513,13 @@ static INLINE void renderline_checkWindows(const GPU *gpu, u8 bgnum, u16 x, u16
static BOOL setFinalColorSpecialNone (const GPU *gpu, u32 passing, u8 bgnum, u8 *dst, u16 color, u16 x, u16 y)
{
T2WriteWord(dst, passing, color);
//sprwin test hack - use this code
//BOOL windowDraw = TRUE, windowEffect = TRUE;
//renderline_checkWindows(gpu,bgnum,x,y, &windowDraw, &windowEffect);
//if(windowDraw) T2WriteWord(dst, passing, color);
//return 1;
T2WriteWord(dst, passing, color);
return 1;
}
@ -1260,7 +1272,7 @@ INLINE void render_sprite_Win (GPU * gpu, u16 l, u8 * src,
u16 x1;
if (col256) {
for(i = 0; i < lg; i++, sprX++,x+=xdir)
gpu->sprWin[l][sprX] = (src[x])?1:0;
sprWin[sprX] = (src[x])?1:0;
} else {
for(i = 0; i < lg; i++, ++sprX, x+=xdir)
{
@ -1268,7 +1280,7 @@ INLINE void render_sprite_Win (GPU * gpu, u16 l, u8 * src,
palette = src[(x1&0x3) + ((x1&0xFFFC)<<3)];
if (x & 1) palette_entry = palette >> 4;
else palette_entry = palette & 0xF;
gpu->sprWin[l][sprX] = (palette_entry)?1:0;
sprWin[sprX] = (palette_entry)?1:0;
}
}
}
@ -1588,7 +1600,9 @@ void sprite1D(GPU * gpu, u16 l, u8 * dst, u8 * prioTab)
else
pal = (u16*)(ARM9Mem.ARM9_VMEM + 0x200 + gpu->core *0x400);
//sprwin test hack - to enable, only draw win and not sprite
render_sprite_256 (gpu, l, dst, src, pal, prioTab, prio, lg, sprX, x, xdir, spriteInfo->Mode == 1);
//render_sprite_Win (gpu, l, src, spriteInfo->Depth, lg, sprX, x, xdir);
continue;
}
@ -1598,7 +1612,10 @@ void sprite1D(GPU * gpu, u16 l, u8 * dst, u8 * prioTab)
pal = (u16*)(ARM9Mem.ARM9_VMEM + 0x200 + gpu->core * 0x400);
pal += (spriteInfo->PaletteIndex<<4);
//sprwin test hack - to enable, only draw win and not sprite
render_sprite_16 (gpu, l, dst, src, pal, prioTab, prio, lg, sprX, x, xdir, spriteInfo->Mode == 1);
//render_sprite_Win (gpu, l, src, spriteInfo->Depth, lg, sprX, x, xdir);
}
}
@ -2120,7 +2137,7 @@ static INLINE void GPU_ligne_layer(NDS_Screen * screen, u16 l)
// init background color & priorities
memset(sprPrio,0xFF,256);
memset(&gpu->sprWin[l],0,256);
memset(sprWin,0,256);
// init pixels priorities
for (int i=0; i<NB_PRIORITIES; i++) {

View File

@ -599,7 +599,6 @@ struct _GPU
DISPCAPCNT dispCapCnt;
BOOL LayersEnable[5];
itemsForPriority_t itemsForPriority[NB_PRIORITIES];
u8 sprWin[192][256];
#define BGBmpBB BG_bmp_ram
#define BGChBB BG_tile_ram
@ -685,7 +684,7 @@ static void REG_DISPx_pack_test(GPU * gpu)
}
*/
extern u8 GPU_screen[4*256*192];
CACHE_ALIGN extern u8 GPU_screen[4*256*192];
GPU * GPU_Init(u8 l);

View File

@ -1,149 +1,149 @@
/* Copyright (C) 2006 yopyop
yopyop156@ifrance.com
yopyop156.ifrance.com
Copyright (C) 2006-2008 DeSmuME team
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "GPU_osd.h"
#include "mem.h"
#include <string.h> //mem funcs
#include <stdarg.h> //va_start, etc
#include "debug.h"
#include "softrender.h"
#include "softrender_v3sysfont.h"
#include "softrender_desmumefont.h"
using namespace softrender;
extern u8 GPU_screen[4*256*192];
image screenshell;
OSDCLASS::OSDCLASS(u8 core)
{
memset(screen, 0, sizeof(screen));
memset(name,0,7);
//memset(line, 0, sizeof(line));
memset(timer, 0, sizeof(timer));
memset(color, 0, sizeof(color));
old_msg = new char[512];
memset(old_msg, 0, 512);
current_color = 0x8F;
mode=core;
offset=0;
/* Copyright (C) 2006 yopyop
yopyop156@ifrance.com
yopyop156.ifrance.com
Copyright (C) 2006-2008 DeSmuME team
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "GPU_osd.h"
#include "GPU.h"
#include "mem.h"
#include <string.h> //mem funcs
#include <stdarg.h> //va_start, etc
#include "debug.h"
#include "softrender.h"
#include "softrender_v3sysfont.h"
#include "softrender_desmumefont.h"
using namespace softrender;
image screenshell;
OSDCLASS::OSDCLASS(u8 core)
{
memset(screen, 0, sizeof(screen));
memset(name,0,7);
//memset(line, 0, sizeof(line));
memset(timer, 0, sizeof(timer));
memset(color, 0, sizeof(color));
old_msg = new char[512];
memset(old_msg, 0, 512);
current_color = 0x8F;
mode=core;
offset=0;
startline=0;
lastline=0;
needUpdate = false;
if (core==0)
memcpy(name,"Core A",6);
else
if (core==1)
memcpy(name,"Core B",6);
else
{
memcpy(name,"Main",6);
mode=255;
}
screenshell.shell = true;
screenshell.data = screen;
screenshell.bpp = 15;
screenshell.width = 256;
screenshell.height = 384;
screenshell.pitch = 256;
screenshell.cx1 = 0;
screenshell.cx2 = 256-1;
screenshell.cy1 = 0;
screenshell.cy2 = 384-1;
LOG("OSD_Init (%s)\n",name);
}
OSDCLASS::~OSDCLASS()
{
LOG("OSD_Deinit (%s)\n",name);
delete[] old_msg;
}
void OSDCLASS::setOffset(u16 ofs)
{
offset=ofs;
}
void OSDCLASS::clear()
{
memset(screen, 0, sizeof(screen));
memset(line, 0, sizeof(line));
memset(timer, 0, sizeof(timer));
needUpdate=false;
}
void OSDCLASS::setColor(u16 col)
{
current_color = col;
}
void OSDCLASS::update() // don't optimized
{
if (!needUpdate) return; // don't update if buffer empty (speed up)
u16 *dst=(u16*)GPU_screen;
if (mode!=255)
dst+=offset*512;
for (int i=0; i<256*192; i++)
{
if(screen[i]&0x8000)
T2WriteWord((u8*)dst,(i << 1), screen[i] );
}
}
void OSDCLASS::addLine(const char *fmt, ...)
{
}
void OSDCLASS::addFixed(u16 x, u16 y, const char *fmt, ...)
{
va_list list;
char msg[1024];
// memset(msg,0,1024);
va_start(list,fmt);
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
_vsnprintf(msg,1023,fmt,list);
#else
vsnprintf(msg,1023,fmt,list);
#endif
va_end(list);
if (strcmp(msg, old_msg) == 0) return;
render51.PrintString<DesmumeFont>(1,x,y,render51.MakeColor(128,0,0),msg,&screenshell);
needUpdate = true;
}
lastline=0;
needUpdate = false;
if (core==0)
memcpy(name,"Core A",6);
else
if (core==1)
memcpy(name,"Core B",6);
else
{
memcpy(name,"Main",6);
mode=255;
}
screenshell.shell = true;
screenshell.data = screen;
screenshell.bpp = 15;
screenshell.width = 256;
screenshell.height = 384;
screenshell.pitch = 256;
screenshell.cx1 = 0;
screenshell.cx2 = 256-1;
screenshell.cy1 = 0;
screenshell.cy2 = 384-1;
LOG("OSD_Init (%s)\n",name);
}
OSDCLASS::~OSDCLASS()
{
LOG("OSD_Deinit (%s)\n",name);
delete[] old_msg;
}
void OSDCLASS::setOffset(u16 ofs)
{
offset=ofs;
}
void OSDCLASS::clear()
{
memset(screen, 0, sizeof(screen));
memset(line, 0, sizeof(line));
memset(timer, 0, sizeof(timer));
needUpdate=false;
}
void OSDCLASS::setColor(u16 col)
{
current_color = col;
}
void OSDCLASS::update() // don't optimized
{
if (!needUpdate) return; // don't update if buffer empty (speed up)
u16 *dst=(u16*)GPU_screen;
if (mode!=255)
dst+=offset*512;
for (int i=0; i<256*192; i++)
{
if(screen[i]&0x8000)
T2WriteWord((u8*)dst,(i << 1), screen[i] );
}
}
void OSDCLASS::addLine(const char *fmt, ...)
{
}
void OSDCLASS::addFixed(u16 x, u16 y, const char *fmt, ...)
{
va_list list;
char msg[1024];
// memset(msg,0,1024);
va_start(list,fmt);
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
_vsnprintf(msg,1023,fmt,list);
#else
vsnprintf(msg,1023,fmt,list);
#endif
va_end(list);
if (strcmp(msg, old_msg) == 0) return;
render51.PrintString<DesmumeFont>(1,x,y,render51.MakeColor(128,0,0),msg,&screenshell);
needUpdate = true;
}