From 690eed9e262ae794715ec915d4cc44db844a27b9 Mon Sep 17 00:00:00 2001 From: RSDuck <rsduck@users.noreply.github.com> Date: Mon, 16 Nov 2020 18:33:58 +0100 Subject: [PATCH] GPU2D: don't an indirect call in tight loops --- src/GPU2D.cpp | 68 ++++++++++++++++++++++++++++++++++----------------- src/GPU2D.h | 12 +++++---- 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index 7774c650..27aa6083 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -228,9 +228,6 @@ void GPU2D::SetFramebuffer(u32* buf) void GPU2D::SetRenderSettings(bool accel) { Accelerated = accel; - - if (Accelerated) DrawPixel = DrawPixel_Accel; - else DrawPixel = DrawPixel_Normal; } @@ -1330,10 +1327,36 @@ void GPU2D::CalculateWindowMask(u32 line) #define DoDrawBG(type, line, num) \ - { if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_##type<true>(line, num); else DrawBG_##type<false>(line, num); } + { \ + if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \ + else DrawBG_##type<true, DrawPixel_Normal>(line, num); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \ + else DrawBG_##type<false, DrawPixel_Normal>(line, num); \ + } \ + } #define DoDrawBG_Large(line) \ - { if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) DrawBG_Large<true>(line); else DrawBG_Large<false>(line); } + do \ + { \ + if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \ + { \ + if (Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \ + else DrawBG_Large<true, DrawPixel_Normal>(line); \ + } \ + else \ + { \ + if (Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \ + else DrawBG_Large<false, DrawPixel_Normal>(line); \ + } \ + } while (false) + +#define DoInterleaveSprites(prio) \ + if (Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio); template<u32 bgmode> void GPU2D::DrawScanlineBGMode(u32 line) @@ -1382,7 +1405,7 @@ void GPU2D::DrawScanlineBGMode(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)); } } @@ -1394,7 +1417,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line) { if (DispCnt & 0x0400) { - DoDrawBG_Large(line) + DoDrawBG_Large(line); } } if ((BGCnt[0] & 0x3) == i) @@ -1406,7 +1429,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)) } } @@ -1434,7 +1457,7 @@ void GPU2D::DrawScanlineBGMode7(u32 line) } } if ((DispCnt & 0x1000) && NumSprites) - InterleaveSprites(0x40000 | (i<<16)); + DoInterleaveSprites(0x40000 | (i<<16)) } } @@ -1674,7 +1697,7 @@ void GPU2D::DrawBG_3D() } } -template<bool mosaic> +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Text(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1774,7 +1797,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff); if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } xoff++; @@ -1827,7 +1850,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) } if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } xoff++; @@ -1835,7 +1858,7 @@ void GPU2D::DrawBG_Text(u32 line, u32 bgnum) } } -template<bool mosaic> +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -1920,7 +1943,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG<u8>(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff); if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); } } @@ -1932,7 +1955,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -template<bool mosaic> +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; @@ -2015,7 +2038,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)); if (color & 0x8000) - DrawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum); } } @@ -2054,7 +2077,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); } } @@ -2136,7 +2159,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); if (color) - DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); + drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } } @@ -2149,7 +2172,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -template<bool mosaic> +template<bool mosaic, GPU2D::DrawPixel drawPixel> void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 { u16 bgcnt = BGCnt[2]; @@ -2231,7 +2254,7 @@ void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)); if (color) - DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); + drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); } } @@ -2274,6 +2297,7 @@ void GPU2D::ApplySpriteMosaicX() } } +template <GPU2D::DrawPixel drawPixel> void GPU2D::InterleaveSprites(u32 prio) { u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; @@ -2297,7 +2321,7 @@ void GPU2D::InterleaveSprites(u32 prio) else color = extpal[pixel & 0xFFF]; - DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); } } else @@ -2317,7 +2341,7 @@ void GPU2D::InterleaveSprites(u32 prio) else color = pal[pixel & 0xFF]; - DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); + drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); } } } diff --git a/src/GPU2D.h b/src/GPU2D.h index 521adf01..469d6a24 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -147,15 +147,17 @@ private: static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); - void (*DrawPixel)(u32* dst, u16 color, u32 flag); + + typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); void DrawBG_3D(); - template<bool mosaic> void DrawBG_Text(u32 line, u32 bgnum); - template<bool mosaic> void DrawBG_Affine(u32 line, u32 bgnum); - template<bool mosaic> void DrawBG_Extended(u32 line, u32 bgnum); - template<bool mosaic> void DrawBG_Large(u32 line); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line); void ApplySpriteMosaicX(); + template<DrawPixel drawPixel> void InterleaveSprites(u32 prio); template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);