- Be smarter about manually inlining functions. Greatly reduces the generated code size, and fixes making optimized builds on MSVC. (Regression from r5248.)
- This change may affect performance. This will need additional testing.
This commit is contained in:
rogerman 2015-08-26 01:27:24 +00:00
parent c36c379e1f
commit 2967cd2c62
2 changed files with 55 additions and 79 deletions

View File

@ -566,7 +566,7 @@ void GPU_addBack(GPU *gpu, const size_t num)
/*****************************************************************************/ /*****************************************************************************/
template<int WIN_NUM> template<int WIN_NUM>
FORCEINLINE u8 GPU::withinRect(const size_t x) const u8 GPU::withinRect(const size_t x) const
{ {
return curr_win[WIN_NUM][x]; return curr_win[WIN_NUM][x];
} }
@ -575,7 +575,7 @@ FORCEINLINE u8 GPU::withinRect(const size_t x) const
// Now assumes that *draw and *effect are different from 0 when called, so we can avoid // Now assumes that *draw and *effect are different from 0 when called, so we can avoid
// setting some values twice // setting some values twice
FORCEINLINE void GPU::renderline_checkWindows(const size_t srcX, bool &draw, bool &effect) const void GPU::renderline_checkWindows(const size_t srcX, bool &draw, bool &effect) const
{ {
// Check if win0 if enabled, and only check if it is // Check if win0 if enabled, and only check if it is
// howevever, this has already been taken care of by the window precalculation // howevever, this has already been taken care of by the window precalculation
@ -878,16 +878,8 @@ FORCEINLINE void GPU::__setFinalColorBck(const u16 color, const size_t srcX, con
return ___setFinalColorBck<MOSAIC, BACKDROP, false, 0>(color, srcX, opaque); return ___setFinalColorBck<MOSAIC, BACKDROP, false, 0>(color, srcX, opaque);
} }
//this was forced inline because most of the time it just falls through to setFinalColorBck() and the function call template<bool BACKDROP, bool USECUSTOMVRAM, int FUNCNUM>
//overhead was ridiculous and terrible FORCEINLINE void GPU::____setFinalColorBck(const u16 color, const size_t srcX)
template<bool MOSAIC, bool BACKDROP, bool USECUSTOMVRAM, int FUNCNUM>
FORCEINLINE void GPU::___setFinalColorBck(u16 color, const size_t srcX, const bool opaque)
{
//due to this early out, we will get incorrect behavior in cases where
//we enable mosaic in the middle of a frame. this is deemed unlikely.
if (!MOSAIC)
{
if (opaque)
{ {
u16 *dstLine = this->currDst; u16 *dstLine = this->currDst;
u8 *bgLine = this->bgPixels; u8 *bgLine = this->bgPixels;
@ -923,6 +915,20 @@ FORCEINLINE void GPU::___setFinalColorBck(u16 color, const size_t srcX, const bo
} }
} }
//this was forced inline because most of the time it just falls through to setFinalColorBck() and the function call
//overhead was ridiculous and terrible
template<bool MOSAIC, bool BACKDROP, bool USECUSTOMVRAM, int FUNCNUM>
FORCEINLINE void GPU::___setFinalColorBck(u16 color, const size_t srcX, const bool opaque)
{
//due to this early out, we will get incorrect behavior in cases where
//we enable mosaic in the middle of a frame. this is deemed unlikely.
if (!MOSAIC)
{
if (opaque)
{
this->____setFinalColorBck<BACKDROP, USECUSTOMVRAM, FUNCNUM>(color, srcX);
}
return; return;
} }
@ -945,38 +951,7 @@ FORCEINLINE void GPU::___setFinalColorBck(u16 color, const size_t srcX, const bo
if (color != 0xFFFF) if (color != 0xFFFF)
{ {
u16 *dstLine = currDst; this->____setFinalColorBck<BACKDROP, USECUSTOMVRAM, FUNCNUM>(color, srcX);
u8 *bgLine = bgPixels;
if (this->isCustomRenderingNeeded)
{
for (size_t line = 0; line < _gpuDstLineCount[this->currLine]; line++)
{
const u16 *srcLine = (USECUSTOMVRAM) ? _gpuCustomVRAM + (this->vramBlockBGIndex * _gpuVRAMBlockOffset) + ((_gpuDstLineIndex[this->currLine] + line) * _displayInfo.customWidth) : NULL;
for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++)
{
const size_t dstX = _gpuDstPitchIndex[srcX] + p;
setFinalColorBG<BACKDROP,FUNCNUM>(srcX,
dstX,
dstLine,
bgLine,
(USECUSTOMVRAM) ? srcLine[dstX] : color);
}
dstLine += _displayInfo.customWidth;
bgLine += _displayInfo.customWidth;
}
}
else
{
setFinalColorBG<BACKDROP,FUNCNUM>(srcX,
srcX,
dstLine,
bgLine,
color);
}
} }
} }
@ -1018,7 +993,7 @@ static void mosaicSpriteLinePixel(GPU *gpu, const size_t x, u16 l, u16 *dst, u8
if(!objColor.opaque) prioTab[x] = 0xFF; if(!objColor.opaque) prioTab[x] = 0xFF;
} }
FORCEINLINE static void mosaicSpriteLine(GPU *gpu, u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab) static void mosaicSpriteLine(GPU *gpu, u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab)
{ {
//don't even try this unless the mosaic is effective //don't even try this unless the mosaic is effective
if (gpu->mosaicLookup.widthValue != 0 || gpu->mosaicLookup.heightValue != 0) if (gpu->mosaicLookup.widthValue != 0 || gpu->mosaicLookup.heightValue != 0)
@ -1064,7 +1039,7 @@ void lineLarge8bpp(GPU *gpu)
/*****************************************************************************/ /*****************************************************************************/
// render a text background to the combined pixelbuffer // render a text background to the combined pixelbuffer
template<bool MOSAIC> template<bool MOSAIC>
INLINE void renderline_textBG(GPU *gpu, u16 XBG, u16 YBG, u16 LG) void renderline_textBG(GPU *gpu, u16 XBG, u16 YBG, u16 LG)
{ {
const u8 num = gpu->currBgNum; const u8 num = gpu->currBgNum;
struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[num].bits; struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[num].bits;
@ -1261,7 +1236,7 @@ FORCEINLINE void rot_BMP_map(GPU *gpu, const s32 auxX, const s32 auxY, const int
typedef void (*rot_fun)(GPU *gpu, const s32 auxX, const s32 auxY, const int lg, const u32 map, const u32 tile, const u16 *pal, const size_t i); typedef void (*rot_fun)(GPU *gpu, const s32 auxX, const s32 auxY, const int lg, const u32 map, const u32 tile, const u16 *pal, const size_t i);
template<rot_fun fun, bool WRAP> template<rot_fun fun, bool WRAP>
FORCEINLINE void rot_scale_op(GPU *gpu, const BGxPARMS &param, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal) void rot_scale_op(GPU *gpu, const BGxPARMS &param, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal)
{ {
ROTOCOORD x, y; ROTOCOORD x, y;
x.val = param.BGxX; x.val = param.BGxX;
@ -1303,7 +1278,7 @@ FORCEINLINE void rot_scale_op(GPU *gpu, const BGxPARMS &param, const u16 LG, con
} }
template<rot_fun fun> template<rot_fun fun>
FORCEINLINE void apply_rot_fun(GPU *gpu, const BGxPARMS &param, const u16 LG, const u32 map, const u32 tile, const u16 *pal) void apply_rot_fun(GPU *gpu, const BGxPARMS &param, const u16 LG, const u32 map, const u32 tile, const u16 *pal)
{ {
struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[gpu->currBgNum].bits; struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[gpu->currBgNum].bits;
s32 wh = gpu->BGSize[gpu->currBgNum][0]; s32 wh = gpu->BGSize[gpu->currBgNum][0];
@ -1317,7 +1292,7 @@ FORCEINLINE void apply_rot_fun(GPU *gpu, const BGxPARMS &param, const u16 LG, co
template<bool MOSAIC> template<bool MOSAIC>
FORCEINLINE void rotBG2(GPU *gpu, const BGxPARMS &param, const u16 LG) void rotBG2(GPU *gpu, const BGxPARMS &param, const u16 LG)
{ {
const size_t num = gpu->currBgNum; const size_t num = gpu->currBgNum;
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + gpu->core * ADDRESS_STEP_1KB); const u16 *pal = (u16 *)(MMU.ARM9_VMEM + gpu->core * ADDRESS_STEP_1KB);
@ -1326,7 +1301,7 @@ FORCEINLINE void rotBG2(GPU *gpu, const BGxPARMS &param, const u16 LG)
} }
template<bool MOSAIC> template<bool MOSAIC>
FORCEINLINE void extRotBG2(GPU *gpu, const BGxPARMS &param, const u16 LG) void extRotBG2(GPU *gpu, const BGxPARMS &param, const u16 LG)
{ {
const size_t num = gpu->currBgNum; const size_t num = gpu->currBgNum;
struct _DISPCNT *dispCnt = &(gpu->dispx_st)->dispx_DISPCNT.bits; struct _DISPCNT *dispCnt = &(gpu->dispx_st)->dispx_DISPCNT.bits;
@ -1444,7 +1419,7 @@ void lineExtRot(GPU *gpu)
/* if i understand it correct, and it fixes some sprite problems in chameleon shot */ /* if i understand it correct, and it fixes some sprite problems in chameleon shot */
/* we have a 15 bit color, and should use the pal entry bits as alpha ?*/ /* we have a 15 bit color, and should use the pal entry bits as alpha ?*/
/* http://nocash.emubase.de/gbatek.htm#dsvideoobjs */ /* http://nocash.emubase.de/gbatek.htm#dsvideoobjs */
INLINE void render_sprite_BMP(GPU *gpu, const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha) void render_sprite_BMP(GPU *gpu, const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha)
{ {
for (size_t i = 0; i < lg; i++, ++sprX, x += xdir) for (size_t i = 0; i < lg; i++, ++sprX, x += xdir)
{ {
@ -1462,7 +1437,7 @@ INLINE void render_sprite_BMP(GPU *gpu, const u8 spriteNum, const u16 l, u16 *ds
} }
} }
INLINE void render_sprite_256(GPU *gpu, const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, const u16 *pal, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha) void render_sprite_256(GPU *gpu, const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, const u16 *pal, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha)
{ {
for (size_t i = 0; i < lg; i++, ++sprX, x += xdir) for (size_t i = 0; i < lg; i++, ++sprX, x += xdir)
{ {
@ -1483,7 +1458,7 @@ INLINE void render_sprite_256(GPU *gpu, const u8 spriteNum, const u16 l, u16 *ds
} }
} }
INLINE void render_sprite_16(GPU *gpu, const u16 l, u16 *dst, const u32 srcadr, const u16 *pal, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha) void render_sprite_16(GPU *gpu, const u16 l, u16 *dst, const u32 srcadr, const u16 *pal, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha)
{ {
for (size_t i = 0; i < lg; i++, ++sprX, x += xdir) for (size_t i = 0; i < lg; i++, ++sprX, x += xdir)
{ {
@ -1505,7 +1480,7 @@ INLINE void render_sprite_16(GPU *gpu, const u16 l, u16 *dst, const u32 srcadr,
} }
} }
INLINE void render_sprite_Win(const u8 *src, const bool col256, const size_t lg, size_t sprX, size_t x, const s32 xdir) void render_sprite_Win(const u8 *src, const bool col256, const size_t lg, size_t sprX, size_t x, const s32 xdir)
{ {
if (col256) if (col256)
{ {
@ -1535,7 +1510,7 @@ INLINE void render_sprite_Win(const u8 *src, const bool col256, const size_t lg,
} }
// return val means if the sprite is to be drawn or not // return val means if the sprite is to be drawn or not
FORCEINLINE bool compute_sprite_vars(const OAMAttributes &spriteInfo, const u16 l, bool compute_sprite_vars(const OAMAttributes &spriteInfo, const u16 l,
SpriteSize &sprSize, s32 &sprX, s32 &sprY, s32 &x, s32 &y, s32 &lg, s32 &xdir) SpriteSize &sprSize, s32 &sprX, s32 &sprY, s32 &x, s32 &y, s32 &lg, s32 &xdir)
{ {
x = 0; x = 0;
@ -3004,7 +2979,7 @@ static void GPU_RenderLine_DispCapture(const u16 l)
} }
} }
static INLINE void GPU_RenderLine_MasterBrightness(const GPUMasterBrightMode mode, const u32 factor, u16 *dstLine, const size_t dstLineWidth, const size_t dstLineCount) static void GPU_RenderLine_MasterBrightness(const GPUMasterBrightMode mode, const u32 factor, u16 *dstLine, const size_t dstLineWidth, const size_t dstLineCount)
{ {
//isn't it odd that we can set uselessly high factors here? //isn't it odd that we can set uselessly high factors here?
//factors above 16 change nothing. curious. //factors above 16 change nothing. curious.
@ -3098,7 +3073,7 @@ static INLINE void GPU_RenderLine_MasterBrightness(const GPUMasterBrightMode mod
} }
template<size_t WIN_NUM> template<size_t WIN_NUM>
FORCEINLINE void GPU::setup_windows() void GPU::setup_windows()
{ {
const u8 y = currLine; const u8 y = currLine;
const u16 startY = (WIN_NUM == 0) ? WIN0V0 : WIN1V0; const u16 startY = (WIN_NUM == 0) ? WIN0V0 : WIN1V0;

View File

@ -866,6 +866,7 @@ struct GPU
void setFinalColorSpr(const size_t srcX, const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const u16 src, const u8 alpha, const u8 type); void setFinalColorSpr(const size_t srcX, const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const u16 src, const u8 alpha, const u8 type);
template<bool BACKDROP, int FUNCNUM> void setFinalColorBG(const size_t srcX, const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, u16 src); template<bool BACKDROP, int FUNCNUM> void setFinalColorBG(const size_t srcX, const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, u16 src);
template<bool BACKDROP, bool USECUSTOMVRAM, int FUNCNUM> FORCEINLINE void ____setFinalColorBck(const u16 color, const size_t srcX);
template<bool MOSAIC, bool BACKDROP> FORCEINLINE void __setFinalColorBck(u16 color, const size_t srcX, const bool opaque); template<bool MOSAIC, bool BACKDROP> FORCEINLINE void __setFinalColorBck(u16 color, const size_t srcX, const bool opaque);
template<bool MOSAIC, bool BACKDROP, bool USECUSTOMVRAM, int FUNCNUM> FORCEINLINE void ___setFinalColorBck(u16 color, const size_t srcX, const bool opaque); template<bool MOSAIC, bool BACKDROP, bool USECUSTOMVRAM, int FUNCNUM> FORCEINLINE void ___setFinalColorBck(u16 color, const size_t srcX, const bool opaque);