fixed sprite window inefficiencies. Big speedup; major speedup in SPP. I do not know a good sprite window test case, so i had to test it by turning on sprite window for SPP in a hacky way, so keep your eyes peeled for problems. But even if there are problems, the old way of handling sprite window was unacceptable.

This commit is contained in:
zeromus 2008-12-29 07:26:15 +00:00
parent b4a3551b1c
commit 9e04b4eceb
4 changed files with 175 additions and 159 deletions

View File

@ -43,10 +43,9 @@
- Added texture caching (speedup 3D core) [CrazyMax] - Added texture caching (speedup 3D core) [CrazyMax]
- Render shadow volumes [zeromus, luigi__] - Render shadow volumes [zeromus, luigi__]
- Toon shading system [luigi__] - Toon shading system [luigi__]
- carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus] - Carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
- Fix clear depth (ex. Castlevania now doesnt flip) [lugi__] - Fix clear depth (ex. Castlevania now doesnt flip) [lugi__]
- Make GE matrix mult and load commands clear out unused rows and cols to identity correctly [zeromus] - Make GE matrix mult and load commands clear out unused rows and cols to identity correctly [zeromus]
- Setup to track polycount better, but still worthless, not accounting for clipping and culling [zeromus]
- Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus] - Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus]
- Fix error in command unpacking which caused some display lists to totally blow up [zeromus] - Fix error in command unpacking which caused some display lists to totally blow up [zeromus]
- Convert alpha and material values from [0,31], [0,7] etc ranges to opengl [0,maxint] ranges in a more precise way [zeromus] - Convert alpha and material values from [0,31], [0,7] etc ranges to opengl [0,maxint] ranges in a more precise way [zeromus]
@ -55,6 +54,7 @@
- Move lighting model to software instead of using opengl for more precision [zeromus] - Move lighting model to software instead of using opengl for more precision [zeromus]
- Fix a bug in texture transformation mode 1 [zeromus] - Fix a bug in texture transformation mode 1 [zeromus]
- Add 3d layer h-scrolling [zeromus] - Add 3d layer h-scrolling [zeromus]
- Removed some serious inefficiencies from sprite window code [zeromus]
Mac OS X port: Mac OS X port:
- Fixed: Filenames and paths with unicode characters now work. [Jeff] - Fixed: Filenames and paths with unicode characters now work. [Jeff]
- Fixed: Load state from file button works again. [Jeff] - Fixed: Load state from file button works again. [Jeff]

View File

@ -77,7 +77,9 @@ NDS_Screen SubScreen;
//#define DEBUG_TRI //#define DEBUG_TRI
u8 GPU_screen[4*256*192]; CACHE_ALIGN u8 GPU_screen[4*256*192];
CACHE_ALIGN u8 sprWin[256];
OSDCLASS *osd = NULL; OSDCLASS *osd = NULL;
OSDCLASS *osdA = NULL; OSDCLASS *osdA = NULL;
@ -235,7 +237,11 @@ static void GPU_resortBGs(GPU *gpu)
struct _DISPCNT * cnt = &gpu->dispx_st->dispx_DISPCNT.bits; struct _DISPCNT * cnt = &gpu->dispx_st->dispx_DISPCNT.bits;
itemsForPriority_t * item; itemsForPriority_t * item;
memset(gpu->sprWin,0, 256*192); //zero 29-dec-2008 - this really doesnt make sense to me.
//i changed the sprwin to be line by line,
//and resetting it here is pointless since line rendering is instantaneous
//and completely produces and consumes sprwin after which the contents of this buffer are useless
//memset(gpu->sprWin,0, 256*192);
// we don't need to check for windows here... // we don't need to check for windows here...
// if we tick boxes, invisible layers become invisible & vice versa // if we tick boxes, invisible layers become invisible & vice versa
@ -481,11 +487,12 @@ static INLINE void renderline_checkWindows(const GPU *gpu, u8 bgnum, u16 x, u16
} }
} }
//if(true) //sprwin test hack
if (gpu->WINOBJ_ENABLED) if (gpu->WINOBJ_ENABLED)
{ {
// it is in winOBJ, do we display ? // it is in winOBJ, do we display ?
// low priority // low priority
if (gpu->sprWin[y][x]) if (sprWin[x])
{ {
*draw = (gpu->WINOBJ >> bgnum)&1; *draw = (gpu->WINOBJ >> bgnum)&1;
*effect = (gpu->WINOBJ_SPECIAL); *effect = (gpu->WINOBJ_SPECIAL);
@ -506,8 +513,13 @@ static INLINE void renderline_checkWindows(const GPU *gpu, u8 bgnum, u16 x, u16
static BOOL setFinalColorSpecialNone (const GPU *gpu, u32 passing, u8 bgnum, u8 *dst, u16 color, u16 x, u16 y) static BOOL setFinalColorSpecialNone (const GPU *gpu, u32 passing, u8 bgnum, u8 *dst, u16 color, u16 x, u16 y)
{ {
T2WriteWord(dst, passing, color); //sprwin test hack - use this code
//BOOL windowDraw = TRUE, windowEffect = TRUE;
//renderline_checkWindows(gpu,bgnum,x,y, &windowDraw, &windowEffect);
//if(windowDraw) T2WriteWord(dst, passing, color);
//return 1;
T2WriteWord(dst, passing, color);
return 1; return 1;
} }
@ -1260,7 +1272,7 @@ INLINE void render_sprite_Win (GPU * gpu, u16 l, u8 * src,
u16 x1; u16 x1;
if (col256) { if (col256) {
for(i = 0; i < lg; i++, sprX++,x+=xdir) for(i = 0; i < lg; i++, sprX++,x+=xdir)
gpu->sprWin[l][sprX] = (src[x])?1:0; sprWin[sprX] = (src[x])?1:0;
} else { } else {
for(i = 0; i < lg; i++, ++sprX, x+=xdir) for(i = 0; i < lg; i++, ++sprX, x+=xdir)
{ {
@ -1268,7 +1280,7 @@ INLINE void render_sprite_Win (GPU * gpu, u16 l, u8 * src,
palette = src[(x1&0x3) + ((x1&0xFFFC)<<3)]; palette = src[(x1&0x3) + ((x1&0xFFFC)<<3)];
if (x & 1) palette_entry = palette >> 4; if (x & 1) palette_entry = palette >> 4;
else palette_entry = palette & 0xF; else palette_entry = palette & 0xF;
gpu->sprWin[l][sprX] = (palette_entry)?1:0; sprWin[sprX] = (palette_entry)?1:0;
} }
} }
} }
@ -1588,7 +1600,9 @@ void sprite1D(GPU * gpu, u16 l, u8 * dst, u8 * prioTab)
else else
pal = (u16*)(ARM9Mem.ARM9_VMEM + 0x200 + gpu->core *0x400); pal = (u16*)(ARM9Mem.ARM9_VMEM + 0x200 + gpu->core *0x400);
//sprwin test hack - to enable, only draw win and not sprite
render_sprite_256 (gpu, l, dst, src, pal, prioTab, prio, lg, sprX, x, xdir, spriteInfo->Mode == 1); render_sprite_256 (gpu, l, dst, src, pal, prioTab, prio, lg, sprX, x, xdir, spriteInfo->Mode == 1);
//render_sprite_Win (gpu, l, src, spriteInfo->Depth, lg, sprX, x, xdir);
continue; continue;
} }
@ -1598,7 +1612,10 @@ void sprite1D(GPU * gpu, u16 l, u8 * dst, u8 * prioTab)
pal = (u16*)(ARM9Mem.ARM9_VMEM + 0x200 + gpu->core * 0x400); pal = (u16*)(ARM9Mem.ARM9_VMEM + 0x200 + gpu->core * 0x400);
pal += (spriteInfo->PaletteIndex<<4); pal += (spriteInfo->PaletteIndex<<4);
//sprwin test hack - to enable, only draw win and not sprite
render_sprite_16 (gpu, l, dst, src, pal, prioTab, prio, lg, sprX, x, xdir, spriteInfo->Mode == 1); render_sprite_16 (gpu, l, dst, src, pal, prioTab, prio, lg, sprX, x, xdir, spriteInfo->Mode == 1);
//render_sprite_Win (gpu, l, src, spriteInfo->Depth, lg, sprX, x, xdir);
} }
} }
@ -2120,7 +2137,7 @@ static INLINE void GPU_ligne_layer(NDS_Screen * screen, u16 l)
// init background color & priorities // init background color & priorities
memset(sprPrio,0xFF,256); memset(sprPrio,0xFF,256);
memset(&gpu->sprWin[l],0,256); memset(sprWin,0,256);
// init pixels priorities // init pixels priorities
for (int i=0; i<NB_PRIORITIES; i++) { for (int i=0; i<NB_PRIORITIES; i++) {

View File

@ -599,7 +599,6 @@ struct _GPU
DISPCAPCNT dispCapCnt; DISPCAPCNT dispCapCnt;
BOOL LayersEnable[5]; BOOL LayersEnable[5];
itemsForPriority_t itemsForPriority[NB_PRIORITIES]; itemsForPriority_t itemsForPriority[NB_PRIORITIES];
u8 sprWin[192][256];
#define BGBmpBB BG_bmp_ram #define BGBmpBB BG_bmp_ram
#define BGChBB BG_tile_ram #define BGChBB BG_tile_ram
@ -685,7 +684,7 @@ static void REG_DISPx_pack_test(GPU * gpu)
} }
*/ */
extern u8 GPU_screen[4*256*192]; CACHE_ALIGN extern u8 GPU_screen[4*256*192];
GPU * GPU_Init(u8 l); GPU * GPU_Init(u8 l);

View File

@ -1,149 +1,149 @@
/* Copyright (C) 2006 yopyop /* Copyright (C) 2006 yopyop
yopyop156@ifrance.com yopyop156@ifrance.com
yopyop156.ifrance.com yopyop156.ifrance.com
Copyright (C) 2006-2008 DeSmuME team Copyright (C) 2006-2008 DeSmuME team
This file is part of DeSmuME This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
DeSmuME is distributed in the hope that it will be useful, DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/ */
#include "GPU_osd.h" #include "GPU_osd.h"
#include "mem.h" #include "GPU.h"
#include <string.h> //mem funcs #include "mem.h"
#include <stdarg.h> //va_start, etc #include <string.h> //mem funcs
#include "debug.h" #include <stdarg.h> //va_start, etc
#include "debug.h"
#include "softrender.h"
#include "softrender.h"
#include "softrender_v3sysfont.h"
#include "softrender_desmumefont.h" #include "softrender_v3sysfont.h"
#include "softrender_desmumefont.h"
using namespace softrender;
using namespace softrender;
extern u8 GPU_screen[4*256*192];
image screenshell; image screenshell;
OSDCLASS::OSDCLASS(u8 core) OSDCLASS::OSDCLASS(u8 core)
{ {
memset(screen, 0, sizeof(screen)); memset(screen, 0, sizeof(screen));
memset(name,0,7); memset(name,0,7);
//memset(line, 0, sizeof(line)); //memset(line, 0, sizeof(line));
memset(timer, 0, sizeof(timer)); memset(timer, 0, sizeof(timer));
memset(color, 0, sizeof(color)); memset(color, 0, sizeof(color));
old_msg = new char[512]; old_msg = new char[512];
memset(old_msg, 0, 512); memset(old_msg, 0, 512);
current_color = 0x8F; current_color = 0x8F;
mode=core; mode=core;
offset=0; offset=0;
startline=0; startline=0;
lastline=0; lastline=0;
needUpdate = false; needUpdate = false;
if (core==0) if (core==0)
memcpy(name,"Core A",6); memcpy(name,"Core A",6);
else else
if (core==1) if (core==1)
memcpy(name,"Core B",6); memcpy(name,"Core B",6);
else else
{ {
memcpy(name,"Main",6); memcpy(name,"Main",6);
mode=255; mode=255;
} }
screenshell.shell = true; screenshell.shell = true;
screenshell.data = screen; screenshell.data = screen;
screenshell.bpp = 15; screenshell.bpp = 15;
screenshell.width = 256; screenshell.width = 256;
screenshell.height = 384; screenshell.height = 384;
screenshell.pitch = 256; screenshell.pitch = 256;
screenshell.cx1 = 0; screenshell.cx1 = 0;
screenshell.cx2 = 256-1; screenshell.cx2 = 256-1;
screenshell.cy1 = 0; screenshell.cy1 = 0;
screenshell.cy2 = 384-1; screenshell.cy2 = 384-1;
LOG("OSD_Init (%s)\n",name); LOG("OSD_Init (%s)\n",name);
} }
OSDCLASS::~OSDCLASS() OSDCLASS::~OSDCLASS()
{ {
LOG("OSD_Deinit (%s)\n",name); LOG("OSD_Deinit (%s)\n",name);
delete[] old_msg; delete[] old_msg;
} }
void OSDCLASS::setOffset(u16 ofs) void OSDCLASS::setOffset(u16 ofs)
{ {
offset=ofs; offset=ofs;
} }
void OSDCLASS::clear() void OSDCLASS::clear()
{ {
memset(screen, 0, sizeof(screen)); memset(screen, 0, sizeof(screen));
memset(line, 0, sizeof(line)); memset(line, 0, sizeof(line));
memset(timer, 0, sizeof(timer)); memset(timer, 0, sizeof(timer));
needUpdate=false; needUpdate=false;
} }
void OSDCLASS::setColor(u16 col) void OSDCLASS::setColor(u16 col)
{ {
current_color = col; current_color = col;
} }
void OSDCLASS::update() // don't optimized void OSDCLASS::update() // don't optimized
{ {
if (!needUpdate) return; // don't update if buffer empty (speed up) if (!needUpdate) return; // don't update if buffer empty (speed up)
u16 *dst=(u16*)GPU_screen; u16 *dst=(u16*)GPU_screen;
if (mode!=255) if (mode!=255)
dst+=offset*512; dst+=offset*512;
for (int i=0; i<256*192; i++) for (int i=0; i<256*192; i++)
{ {
if(screen[i]&0x8000) if(screen[i]&0x8000)
T2WriteWord((u8*)dst,(i << 1), screen[i] ); T2WriteWord((u8*)dst,(i << 1), screen[i] );
} }
} }
void OSDCLASS::addLine(const char *fmt, ...) void OSDCLASS::addLine(const char *fmt, ...)
{ {
} }
void OSDCLASS::addFixed(u16 x, u16 y, const char *fmt, ...) void OSDCLASS::addFixed(u16 x, u16 y, const char *fmt, ...)
{ {
va_list list; va_list list;
char msg[1024]; char msg[1024];
// memset(msg,0,1024); // memset(msg,0,1024);
va_start(list,fmt); va_start(list,fmt);
#if defined(_MSC_VER) || defined(__INTEL_COMPILER) #if defined(_MSC_VER) || defined(__INTEL_COMPILER)
_vsnprintf(msg,1023,fmt,list); _vsnprintf(msg,1023,fmt,list);
#else #else
vsnprintf(msg,1023,fmt,list); vsnprintf(msg,1023,fmt,list);
#endif #endif
va_end(list); va_end(list);
if (strcmp(msg, old_msg) == 0) return; if (strcmp(msg, old_msg) == 0) return;
render51.PrintString<DesmumeFont>(1,x,y,render51.MakeColor(128,0,0),msg,&screenshell); render51.PrintString<DesmumeFont>(1,x,y,render51.MakeColor(128,0,0),msg,&screenshell);
needUpdate = true; needUpdate = true;
} }