From 54e4d914575be4ad901fa451a91305777c23fdf6 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 16 Aug 2016 21:05:55 -0700 Subject: [PATCH] GBA Video: Optimize mode 0 rendering --- CHANGES | 1 + src/gba/renderers/software-bg.c | 2 +- src/gba/renderers/software-mode0.c | 134 ++++++++++----------------- src/gba/renderers/software-private.h | 29 +++--- 4 files changed, 65 insertions(+), 101 deletions(-) diff --git a/CHANGES b/CHANGES index 15c39e47f..80ef8f455 100644 --- a/CHANGES +++ b/CHANGES @@ -57,6 +57,7 @@ Misc: - GBA BIOS: Use custom ArcTan, not relying on OS - PSP2: Sync files per descriptor - GBA Savedata: Add realistic timing for EEPROM + - GBA Video: Optimize mode 0 rendering 0.4.1: (2016-07-11) Bugfixes: diff --git a/src/gba/renderers/software-bg.c b/src/gba/renderers/software-bg.c index 56a79933b..dafc0dff5 100644 --- a/src/gba/renderers/software-bg.c +++ b/src/gba/renderers/software-bg.c @@ -45,7 +45,7 @@ } \ MOSAIC(COORD) \ if (pixelData) { \ - COMPOSITE_256_ ## OBJWIN (BLEND); \ + COMPOSITE_256_ ## OBJWIN (BLEND, 0); \ } \ } diff --git a/src/gba/renderers/software-mode0.c b/src/gba/renderers/software-mode0.c index 21e5bf650..704ef3d61 100644 --- a/src/gba/renderers/software-mode0.c +++ b/src/gba/renderers/software-mode0.c @@ -28,12 +28,12 @@ if (!GBA_TEXT_MAP_HFLIP(mapData)) { \ tileData >>= 4 * mod8; \ for (; outX < end; ++outX, ++pixel) { \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 0); \ } \ } else { \ for (outX = end - 1; outX >= renderer->start; --outX) { \ uint32_t* pixel = &renderer->row[outX]; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 0); \ } \ } @@ -50,7 +50,7 @@ pixel = &renderer->row[outX]; \ } \ for (; outX < renderer->end; ++outX, ++pixel) { \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 0); \ } \ } else { \ tileData >>= 4 * (0x8 - mod8); \ @@ -61,7 +61,7 @@ outX = renderer->end - 1; \ pixel = &renderer->row[outX]; \ for (; outX > end; --outX, --pixel) { \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 0); \ } \ /* Needed for consistency checks */ \ if (VIDEO_CHECKS) { \ @@ -128,7 +128,7 @@ mosaicWait = mosaicH; \ } \ --mosaicWait; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 0); \ ++pixel; \ } \ x = 0; \ @@ -147,44 +147,26 @@ LOAD_32(tileData, charBase, vram); \ if (tileData) { \ if (!GBA_TEXT_MAP_HFLIP(mapData)) { \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - ++pixel; \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 0); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 1); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 2); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 3); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 4); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 5); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 6); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 7); \ } else { \ - pixel += 7; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN); \ - pixel += 8; \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 7); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 6); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 5); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 4); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 3); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 2); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 1); \ + BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, 0); \ } \ - } else { \ - pixel += 8; \ } \ + pixel += 8; \ } #define DRAW_BACKGROUND_MODE_0_TILE_SUFFIX_256(BLEND, OBJWIN) \ @@ -198,7 +180,7 @@ tileData >>= 8 * shift; \ shift = 0; \ for (; outX < end2; ++outX, ++pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ } \ } \ @@ -207,7 +189,7 @@ LOAD_32(tileData, charBase + 4, vram); \ tileData >>= 8 * shift; \ for (; outX < end; ++outX, ++pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ } \ } else { \ @@ -218,7 +200,7 @@ if (end2 > start) { \ LOAD_32(tileData, charBase, vram); \ for (; outX >= end2; --outX, --pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ charBase += 4; \ } \ @@ -227,7 +209,7 @@ if (LIKELY(charBase < 0x10000)) { \ LOAD_32(tileData, charBase, vram); \ for (; outX >= renderer->start; --outX, --pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ } \ outX = end; \ @@ -245,14 +227,14 @@ if (end > 0) { \ LOAD_32(tileData, charBase, vram); \ for (; outX < renderer->end - end; ++outX, ++pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ charBase += 4; \ } \ \ LOAD_32(tileData, charBase, vram); \ for (; outX < renderer->end; ++outX, ++pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ } else { \ int shift = (8 - mod8) & 0x3; \ @@ -263,7 +245,7 @@ LOAD_32(tileData, charBase, vram); \ tileData >>= 8 * shift; \ for (; outX >= start + 4; --outX, --pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ shift = 0; \ } \ @@ -271,7 +253,7 @@ LOAD_32(tileData, charBase + 4, vram); \ tileData >>= 8 * shift; \ for (; outX >= start; --outX, --pixel) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ /* Needed for consistency checks */ \ if (VIDEO_CHECKS) { \ @@ -291,53 +273,35 @@ if (!GBA_TEXT_MAP_HFLIP(mapData)) { \ LOAD_32(tileData, charBase, vram); \ if (tileData) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - } else { \ - pixel += 4; \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 1); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 2); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 3); \ } \ + pixel += 4; \ LOAD_32(tileData, charBase + 4, vram); \ if (tileData) { \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - ++pixel; \ - } else { \ - pixel += 4; \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 1); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 2); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 3); \ } \ + pixel += 4; \ } else { \ LOAD_32(tileData, charBase + 4, vram); \ if (tileData) { \ - pixel += 3; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 3); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 2); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 1); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ pixel += 4; \ LOAD_32(tileData, charBase, vram); \ if (tileData) { \ - pixel += 3; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ - --pixel; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 3); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 2); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 1); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ } \ pixel += 4; \ } \ @@ -377,7 +341,7 @@ } \ tileData |= tileData << 8; \ --mosaicWait; \ - BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN); \ + BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, 0); \ ++pixel; \ } \ } diff --git a/src/gba/renderers/software-private.h b/src/gba/renderers/software-private.h index 98783d801..6933e3c28 100644 --- a/src/gba/renderers/software-private.h +++ b/src/gba/renderers/software-private.h @@ -34,7 +34,7 @@ static unsigned _mix(int weightA, unsigned colorA, int weightB, unsigned colorB) // We stash the priority on the top bits so we can do a one-operator comparison -// The lower the number, the higher the priority, and sprites take precendence over backgrounds +// The lower the number, the higher the priority, and sprites take precedence over backgrounds // We want to do special processing if the color pixel is target 1, however static inline void _compositeBlendObjwin(struct GBAVideoSoftwareRenderer* renderer, uint32_t* pixel, uint32_t color, uint32_t current) { @@ -83,45 +83,44 @@ static inline void _compositeNoBlendNoObjwin(struct GBAVideoSoftwareRenderer* re *pixel = color; } -#define COMPOSITE_16_OBJWIN(BLEND) \ +#define COMPOSITE_16_OBJWIN(BLEND, IDX) \ if (objwinForceEnable || (!(current & FLAG_OBJWIN)) == objwinOnly) { \ unsigned color = (current & FLAG_OBJWIN) ? objwinPalette[paletteData | pixelData] : palette[pixelData]; \ unsigned mergedFlags = flags; \ if (current & FLAG_OBJWIN) { \ mergedFlags = objwinFlags; \ } \ - _composite ## BLEND ## Objwin(renderer, pixel, color | mergedFlags, current); \ + _composite ## BLEND ## Objwin(renderer, &pixel[IDX], color | mergedFlags, current); \ } -#define COMPOSITE_16_NO_OBJWIN(BLEND) \ - _composite ## BLEND ## NoObjwin(renderer, pixel, palette[pixelData] | flags, current); +#define COMPOSITE_16_NO_OBJWIN(BLEND, IDX) \ + _composite ## BLEND ## NoObjwin(renderer, &pixel[IDX], palette[pixelData] | flags, current); -#define COMPOSITE_256_OBJWIN(BLEND) \ +#define COMPOSITE_256_OBJWIN(BLEND, IDX) \ if (objwinForceEnable || (!(current & FLAG_OBJWIN)) == objwinOnly) { \ unsigned color = (current & FLAG_OBJWIN) ? objwinPalette[pixelData] : palette[pixelData]; \ unsigned mergedFlags = flags; \ if (current & FLAG_OBJWIN) { \ mergedFlags = objwinFlags; \ } \ - _composite ## BLEND ## Objwin(renderer, pixel, color | mergedFlags, current); \ + _composite ## BLEND ## Objwin(renderer, &pixel[IDX], color | mergedFlags, current); \ } -#define COMPOSITE_256_NO_OBJWIN(BLEND) \ - COMPOSITE_16_NO_OBJWIN(BLEND) +#define COMPOSITE_256_NO_OBJWIN COMPOSITE_16_NO_OBJWIN -#define BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN) \ +#define BACKGROUND_DRAW_PIXEL_16(BLEND, OBJWIN, IDX) \ pixelData = tileData & 0xF; \ - current = *pixel; \ + current = pixel[IDX]; \ if (pixelData && IS_WRITABLE(current)) { \ - COMPOSITE_16_ ## OBJWIN (BLEND); \ + COMPOSITE_16_ ## OBJWIN (BLEND, IDX); \ } \ tileData >>= 4; -#define BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN) \ +#define BACKGROUND_DRAW_PIXEL_256(BLEND, OBJWIN, IDX) \ pixelData = tileData & 0xFF; \ - current = *pixel; \ + current = pixel[IDX]; \ if (pixelData && IS_WRITABLE(current)) { \ - COMPOSITE_256_ ## OBJWIN (BLEND); \ + COMPOSITE_256_ ## OBJWIN (BLEND, IDX); \ } \ tileData >>= 8;