From 871c21fb6cf9041b4ac7c7f7074e0b2eb77484fe Mon Sep 17 00:00:00 2001
From: Vicki Pfau <vi@endrift.com>
Date: Wed, 2 Aug 2017 14:47:23 -0700
Subject: [PATCH] GB Video: Further SGB work, support for ATTR_BLK

---
 CHANGES                                       |   2 +-
 include/mgba/core/interface.h                 |  22 ++
 include/mgba/internal/gb/renderers/software.h |   5 +
 include/mgba/internal/gb/video.h              |   6 +-
 src/gb/renderers/software.c                   | 266 ++++++++++++------
 src/gb/video.c                                |  95 ++++++-
 src/gba/renderers/software-bg.c               |   2 +
 src/gba/renderers/video-software.c            |  17 +-
 src/platform/python/_builder.h                |   3 +-
 src/platform/sdl/gl-sdl.c                     |   8 +-
 src/platform/sdl/gles2-sdl.c                  |   2 +-
 11 files changed, 307 insertions(+), 121 deletions(-)

diff --git a/CHANGES b/CHANGES
index 54f8fb1f7..83f89025d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,7 +4,7 @@ Features:
  - Game Boy Camera support
  - Qt: Set default Game Boy colors
  - Game Boy Printer support
- - Super Game Boy borders
+ - Super Game Boy support
 Bugfixes:
  - GB Audio: Make audio unsigned with bias (fixes mgba.io/i/749)
  - Python: Fix importing .gb or .gba before .core
diff --git a/include/mgba/core/interface.h b/include/mgba/core/interface.h
index b3bbad782..053b5b1d1 100644
--- a/include/mgba/core/interface.h
+++ b/include/mgba/core/interface.h
@@ -35,6 +35,28 @@ typedef uint32_t color_t;
 #define M_RGB8_TO_BGR5(X) ((((X) & 0xF8) >> 3) | (((X) & 0xF800) >> 6) | (((X) & 0xF80000) >> 9))
 #define M_RGB8_TO_RGB5(X) ((((X) & 0xF8) << 7) | (((X) & 0xF800) >> 6) | (((X) & 0xF80000) >> 19))
 
+#ifndef PYCPARSE
+static inline color_t mColorFrom555(uint16_t value) {
+#ifdef COLOR_16_BIT
+#ifdef COLOR_5_6_5
+	color_t color = 0;
+	color |= (value & 0x001F) << 11;
+	color |= (value & 0x03E0) << 1;
+	color |= (value & 0x7C00) >> 10;
+#else
+	color_t color = value;
+#endif
+#else
+	color_t color = 0;
+	color |= (value << 3) & 0xF8;
+	color |= (value << 6) & 0xF800;
+	color |= (value << 9) & 0xF80000;
+	color |= (color >> 5) & 0x070707;
+#endif
+	return color;
+}
+#endif
+
 struct blip_t;
 
 enum mColorFormat {
diff --git a/include/mgba/internal/gb/renderers/software.h b/include/mgba/internal/gb/renderers/software.h
index 875973774..4ee36d850 100644
--- a/include/mgba/internal/gb/renderers/software.h
+++ b/include/mgba/internal/gb/renderers/software.h
@@ -23,6 +23,7 @@ struct GBVideoSoftwareRenderer {
 	uint8_t row[GB_VIDEO_HORIZONTAL_PIXELS + 8];
 
 	color_t palette[128];
+	uint8_t lookup[64];
 
 	uint32_t* temporaryBuffer;
 
@@ -37,6 +38,10 @@ struct GBVideoSoftwareRenderer {
 
 	int sgbTransfer;
 	uint8_t sgbPacket[16];
+	uint8_t sgbCommandHeader;
+	int sgbPacketId;
+	int sgbDataSets;
+	uint8_t sgbPartialDataSet[15];
 };
 
 void GBVideoSoftwareRendererCreate(struct GBVideoSoftwareRenderer*);
diff --git a/include/mgba/internal/gb/video.h b/include/mgba/internal/gb/video.h
index 539ce2253..f9648168e 100644
--- a/include/mgba/internal/gb/video.h
+++ b/include/mgba/internal/gb/video.h
@@ -34,7 +34,8 @@ enum {
 
 	SGB_SIZE_CHAR_RAM = 0x2000,
 	SGB_SIZE_MAP_RAM = 0x1000,
-	SGB_SIZE_PAL_RAM = 0x1000
+	SGB_SIZE_PAL_RAM = 0x1000,
+	SGB_SIZE_ATF_RAM = 0x1000
 };
 
 DECL_BITFIELD(GBObjAttributes, uint8_t);
@@ -89,6 +90,8 @@ struct GBVideoRenderer {
 	uint8_t* sgbMapRam;
 	uint8_t* sgbPalRam;
 	int sgbRenderMode;
+	uint8_t* sgbAttributes;
+	uint8_t* sgbAttributeFiles;
 
 	bool disableBG;
 	bool disableOBJ;
@@ -140,6 +143,7 @@ struct GBVideo {
 	bool bcpIncrement;
 	int ocpIndex;
 	bool ocpIncrement;
+	uint8_t sgbCommandHeader;
 
 	uint16_t dmgPalette[4];
 	uint16_t palette[64];
diff --git a/src/gb/renderers/software.c b/src/gb/renderers/software.c
index f2235c946..b76228a53 100644
--- a/src/gb/renderers/software.c
+++ b/src/gb/renderers/software.c
@@ -45,7 +45,7 @@ static void _clearScreen(struct GBVideoSoftwareRenderer* renderer) {
 }
 
 static void _regenerateSGBBorder(struct GBVideoSoftwareRenderer* renderer) {
-	int  i;
+	int i;
 	for (i = 0; i < 0x40; ++i) {
 		uint16_t color;
 		LOAD_16LE(color, 0x800 + i * 2, renderer->d.sgbMapRam);
@@ -54,23 +54,24 @@ static void _regenerateSGBBorder(struct GBVideoSoftwareRenderer* renderer) {
 	int x, y;
 	for (y = 0; y < 224; ++y) {
 		for (x = 0; x < 256; x += 8) {
-			uint16_t mapData;
-			LOAD_16LE(mapData, (x >> 2) + (y & ~7) * 8, renderer->d.sgbMapRam);
-			if (UNLIKELY(SGBBgAttributesGetTile(mapData) > 0x100)) {
+			if (x >= 48 && x < 208 && y >= 40 && y < 104) {
 				continue;
 			}
+			uint16_t mapData;
+			LOAD_16LE(mapData, (x >> 2) + (y & ~7) * 8, renderer->d.sgbMapRam);
+			if (UNLIKELY(SGBBgAttributesGetTile(mapData) >= 0x100)) {
+				continue;
+			}
+
 			int localY = y & 0x7;
 			if (SGBBgAttributesIsYFlip(mapData)) {
-				localY = 7 - y;
+				localY = 7 - localY;
 			}
 			uint8_t tileData[4];
 			tileData[0] = renderer->d.sgbCharRam[(SGBBgAttributesGetTile(mapData) * 16 + localY) * 2 + 0x00];
 			tileData[1] = renderer->d.sgbCharRam[(SGBBgAttributesGetTile(mapData) * 16 + localY) * 2 + 0x01];
 			tileData[2] = renderer->d.sgbCharRam[(SGBBgAttributesGetTile(mapData) * 16 + localY) * 2 + 0x10];
 			tileData[3] = renderer->d.sgbCharRam[(SGBBgAttributesGetTile(mapData) * 16 + localY) * 2 + 0x11];
-			if (!(tileData[0] | tileData[1] | tileData[2] | tileData[3])) {
-				continue;
-			}
 
 			size_t base = y * renderer->outputBufferStride + x;
 			int p = SGBBgAttributesGetPalette(mapData) * 0x10;
@@ -97,6 +98,53 @@ static void _regenerateSGBBorder(struct GBVideoSoftwareRenderer* renderer) {
 	}
 }
 
+static inline void _setAttribute(uint8_t* sgbAttributes, unsigned x, unsigned y, int palette) {
+	int p = sgbAttributes[(x >> 2) + 5 * y];
+	p &= ~(3 << (2 * (3 - (x & 3))));
+	p |= palette << (2 * (3 - (x & 3)));
+	sgbAttributes[(x >> 2) + 5 * y] = p;
+}
+
+static void _parseAttrBlock(struct GBVideoSoftwareRenderer* renderer, int start) {
+	uint8_t block[6];
+	if (start < 0) {
+		memcpy(block, renderer->sgbPartialDataSet, -start);
+		memcpy(&block[-start], renderer->sgbPacket, 6 + start);
+	} else {
+		memcpy(block, &renderer->sgbPacket[start], 6);
+	}
+	unsigned x0 = block[2];
+	unsigned x1 = block[4];
+	unsigned y0 = block[3];
+	unsigned y1 = block[5];
+	unsigned x, y;
+	int pIn = block[1] & 3;
+	int pPerim = (block[1] >> 2) & 3;
+	int pOut = (block[1] >> 4) & 3;
+
+	for (y = 0; y < GB_VIDEO_VERTICAL_PIXELS / 8; ++y) {
+		for (x = 0; x < GB_VIDEO_HORIZONTAL_PIXELS / 8; ++x) {
+			if (y > y0 && y < y1 && x > x0 && x < x1) {
+				if (block[0] & 1) {
+					_setAttribute(renderer->d.sgbAttributes, x, y, pIn);
+				}
+			} else if (y < y0 || y > y1 || x < x0 || x > x1) {
+				if (block[0] & 4) {
+					_setAttribute(renderer->d.sgbAttributes, x, y, pOut);
+				}
+			} else {
+				if (block[0] & 2) {
+					_setAttribute(renderer->d.sgbAttributes, x, y, pPerim);
+				} else if (block[0] & 1) {
+					_setAttribute(renderer->d.sgbAttributes, x, y, pIn);
+				} else if (block[0] & 4) {
+					_setAttribute(renderer->d.sgbAttributes, x, y, pOut);
+				}
+			}
+		}
+	}
+}
+
 void GBVideoSoftwareRendererCreate(struct GBVideoSoftwareRenderer* renderer) {
 	renderer->d.init = GBVideoSoftwareRendererInit;
 	renderer->d.deinit = GBVideoSoftwareRendererDeinit;
@@ -128,6 +176,14 @@ static void GBVideoSoftwareRendererInit(struct GBVideoRenderer* renderer, enum G
 	softwareRenderer->wx = 0;
 	softwareRenderer->model = model;
 	softwareRenderer->sgbTransfer = 0;
+	softwareRenderer->sgbCommandHeader = 0;
+	int i;
+	for (i = 0; i < 64; ++i) {
+		softwareRenderer->lookup[i] = i;
+		softwareRenderer->lookup[i] = i;
+		softwareRenderer->lookup[i] = i;
+		softwareRenderer->lookup[i] = i;
+	}
 }
 
 static void GBVideoSoftwareRendererDeinit(struct GBVideoRenderer* renderer) {
@@ -153,6 +209,24 @@ static uint8_t GBVideoSoftwareRendererWriteVideoRegister(struct GBVideoRenderer*
 	case REG_WX:
 		softwareRenderer->wx = value;
 		break;
+	case REG_BGP:
+		softwareRenderer->lookup[0] = value & 3;
+		softwareRenderer->lookup[1] = (value >> 2) & 3;
+		softwareRenderer->lookup[2] = (value >> 4) & 3;
+		softwareRenderer->lookup[3] = (value >> 6) & 3;
+		break;
+	case REG_OBP0:
+		softwareRenderer->lookup[0x20 + 0] = value & 3;
+		softwareRenderer->lookup[0x20 + 1] = (value >> 2) & 3;
+		softwareRenderer->lookup[0x20 + 2] = (value >> 4) & 3;
+		softwareRenderer->lookup[0x20 + 3] = (value >> 6) & 3;
+		break;
+	case REG_OBP1:
+		softwareRenderer->lookup[0x24 + 0] = value & 3;
+		softwareRenderer->lookup[0x24 + 1] = (value >> 2) & 3;
+		softwareRenderer->lookup[0x24 + 2] = (value >> 4) & 3;
+		softwareRenderer->lookup[0x24 + 3] = (value >> 6) & 3;
+		break;
 	}
 	return value;
 }
@@ -160,26 +234,48 @@ static uint8_t GBVideoSoftwareRendererWriteVideoRegister(struct GBVideoRenderer*
 static void GBVideoSoftwareRendererWriteSGBPacket(struct GBVideoRenderer* renderer, uint8_t* data) {
 	struct GBVideoSoftwareRenderer* softwareRenderer = (struct GBVideoSoftwareRenderer*) renderer;
 	memcpy(softwareRenderer->sgbPacket, data, sizeof(softwareRenderer->sgbPacket));
+	int i;
+	if (!(softwareRenderer->sgbCommandHeader & 7)) {
+		softwareRenderer->sgbCommandHeader = data[0];
+		softwareRenderer->sgbPacketId = 0;
+		softwareRenderer->sgbTransfer = 0;
+	}
+	--softwareRenderer->sgbCommandHeader;
+	++softwareRenderer->sgbPacketId;
+	int set;
+	switch (softwareRenderer->sgbCommandHeader >> 3) {
+	case SGB_PAL_SET:
+		softwareRenderer->sgbPacket[1] = data[9];
+		if (!(data[9] & 0x80)) {
+			break;
+		}
+		// Fall through
+	case SGB_ATTR_SET:
+		set = softwareRenderer->sgbPacket[1] & 0x3F;
+		if (set <= 0x2C) {
+			memcpy(renderer->sgbAttributes, &renderer->sgbAttributeFiles[set * 90], 90);
+		}
+		break;
+	case SGB_ATTR_BLK:
+		if (softwareRenderer->sgbPacketId == 1) {
+			softwareRenderer->sgbDataSets = softwareRenderer->sgbPacket[1];
+			i = 2;
+		} else {
+			i = (9 - softwareRenderer->sgbPacketId) % 3 * -2;
+		}
+		for (; i <= 10 && softwareRenderer->sgbDataSets; i += 6, --softwareRenderer->sgbDataSets) {
+			_parseAttrBlock(softwareRenderer, i);
+		}
+		if (i < 16 && softwareRenderer->sgbDataSets) {
+			memcpy(softwareRenderer->sgbPartialDataSet, &softwareRenderer->sgbPacket[i], 16 - i);
+		}
+		break;
+	}
 }
 
 static void GBVideoSoftwareRendererWritePalette(struct GBVideoRenderer* renderer, int index, uint16_t value) {
 	struct GBVideoSoftwareRenderer* softwareRenderer = (struct GBVideoSoftwareRenderer*) renderer;
-#ifdef COLOR_16_BIT
-#ifdef COLOR_5_6_5
-	color_t color = 0;
-	color |= (value & 0x001F) << 11;
-	color |= (value & 0x03E0) << 1;
-	color |= (value & 0x7C00) >> 10;
-#else
-	color_t color = value;
-#endif
-#else
-	color_t color = 0;
-	color |= (value << 3) & 0xF8;
-	color |= (value << 6) & 0xF800;
-	color |= (value << 9) & 0xF80000;
-	color |= (color >> 5) & 0x070707;
-#endif
+	color_t color = mColorFrom555(value);
 	softwareRenderer->palette[index] = color;
 	if (renderer->cache) {
 		mTileCacheWritePalette(renderer->cache, index << 1);
@@ -233,6 +329,10 @@ static void GBVideoSoftwareRendererDrawRange(struct GBVideoRenderer* renderer, i
 			GBVideoSoftwareRendererDrawObj(softwareRenderer, &obj[i], startX, endX, y);
 		}
 	}
+}
+
+static void GBVideoSoftwareRendererFinishScanline(struct GBVideoRenderer* renderer, int y) {
+	struct GBVideoSoftwareRenderer* softwareRenderer = (struct GBVideoSoftwareRenderer*) renderer;
 	size_t sgbOffset = 0;
 	if (softwareRenderer->model == GB_MODEL_SGB) {
 		sgbOffset = softwareRenderer->outputBufferStride * 40 + 48;
@@ -241,24 +341,28 @@ static void GBVideoSoftwareRendererDrawRange(struct GBVideoRenderer* renderer, i
 	int x;
 	switch (softwareRenderer->d.sgbRenderMode) {
 	case 0:
-		for (x = startX; x + 7 < (endX & ~7); x += 8) {
-			row[x] = softwareRenderer->palette[softwareRenderer->row[x] & 0x7F];
-			row[x + 1] = softwareRenderer->palette[softwareRenderer->row[x + 1] & 0x7F];
-			row[x + 2] = softwareRenderer->palette[softwareRenderer->row[x + 2] & 0x7F];
-			row[x + 3] = softwareRenderer->palette[softwareRenderer->row[x + 3] & 0x7F];
-			row[x + 4] = softwareRenderer->palette[softwareRenderer->row[x + 4] & 0x7F];
-			row[x + 5] = softwareRenderer->palette[softwareRenderer->row[x + 5] & 0x7F];
-			row[x + 6] = softwareRenderer->palette[softwareRenderer->row[x + 6] & 0x7F];
-			row[x + 7] = softwareRenderer->palette[softwareRenderer->row[x + 7] & 0x7F];
-		}
-		for (; x < endX; ++x) {
-			row[x] = softwareRenderer->palette[softwareRenderer->row[x] & 0x7F];
+		for (x = 0; x < GB_VIDEO_HORIZONTAL_PIXELS; x += 8) {
+			int p = 0;
+			if (softwareRenderer->model == GB_MODEL_SGB) {
+				p = softwareRenderer->d.sgbAttributes[(x >> 5) + 5 * (y >> 3)];
+				p >>= 6 - ((x / 4) & 0x6);
+				p &= 3;
+				p <<= 2;
+			}
+			row[x + 0] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x] & 0x7F]];
+			row[x + 1] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x + 1] & 0x7F]];
+			row[x + 2] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x + 2] & 0x7F]];
+			row[x + 3] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x + 3] & 0x7F]];
+			row[x + 4] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x + 4] & 0x7F]];
+			row[x + 5] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x + 5] & 0x7F]];
+			row[x + 6] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x + 6] & 0x7F]];
+			row[x + 7] = softwareRenderer->palette[p | softwareRenderer->lookup[softwareRenderer->row[x + 7] & 0x7F]];
 		}
 		break;
 	case 1:
-		return;
+		break;
 	case 2:
-		for (x = startX; x + 7 < (endX & ~7); x += 8) {
+		for (x = 0; x < GB_VIDEO_HORIZONTAL_PIXELS; x += 8) {
 			row[x] = 0;
 			row[x + 1] = 0;
 			row[x + 2] = 0;
@@ -268,12 +372,9 @@ static void GBVideoSoftwareRendererDrawRange(struct GBVideoRenderer* renderer, i
 			row[x + 6] = 0;
 			row[x + 7] = 0;
 		}
-		for (; x < endX; ++x) {
-			row[x] = 0;
-		}
-		return;
+		break;
 	case 3:
-		for (x = startX; x + 7 < (endX & ~7); x += 8) {
+		for (x = 0; x < GB_VIDEO_HORIZONTAL_PIXELS; x += 8) {
 			row[x] = softwareRenderer->palette[0];
 			row[x + 1] = softwareRenderer->palette[0];
 			row[x + 2] = softwareRenderer->palette[0];
@@ -283,21 +384,19 @@ static void GBVideoSoftwareRendererDrawRange(struct GBVideoRenderer* renderer, i
 			row[x + 6] = softwareRenderer->palette[0];
 			row[x + 7] = softwareRenderer->palette[0];
 		}
-		for (; x < endX; ++x) {
-			row[x] = softwareRenderer->palette[0];
-		}
-		return;
+		break;
 	}
-}
 
-static void GBVideoSoftwareRendererFinishScanline(struct GBVideoRenderer* renderer, int y) {
-	struct GBVideoSoftwareRenderer* softwareRenderer = (struct GBVideoSoftwareRenderer*) renderer;
 	if (GBRegisterLCDCIsWindow(softwareRenderer->lcdc) && softwareRenderer->wy <= y && softwareRenderer->wx - 7 < GB_VIDEO_HORIZONTAL_PIXELS) {
 		++softwareRenderer->currentWy;
 	}
 	if (softwareRenderer->sgbTransfer == 1) {
+		size_t offset = 2 * ((y & 7) + (y >> 3) * GB_VIDEO_HORIZONTAL_PIXELS);
+		if (offset >= 0x1000) {
+			return;
+		}
 		uint8_t* buffer = NULL;
-		switch (softwareRenderer->sgbPacket[0] >> 3) {
+		switch (softwareRenderer->sgbCommandHeader >> 3) {
 		case SGB_PAL_TRN:
 			buffer = renderer->sgbPalRam;
 			break;
@@ -307,38 +406,38 @@ static void GBVideoSoftwareRendererFinishScanline(struct GBVideoRenderer* render
 		case SGB_PCT_TRN:
 			buffer = renderer->sgbMapRam;
 			break;
+		case SGB_ATTR_TRN:
+			buffer = renderer->sgbAttributeFiles;
+			break;
 		default:
 			break;
 		}
 		if (buffer) {
-			size_t offset = 2 * ((y & 7) + (y >> 3) * GB_VIDEO_HORIZONTAL_PIXELS);
-			if (offset < 0x1000) {
-				int i;
-				for (i = 0; i < GB_VIDEO_HORIZONTAL_PIXELS; i += 8) {
-					if (UNLIKELY(offset + (i << 1) + 1 >= 0x1000)) {
-						break;
-					}
-					uint8_t hi = 0;
-					uint8_t lo = 0;
-					hi |= (softwareRenderer->row[i + 0] & 0x2) << 6;
-					lo |= (softwareRenderer->row[i + 0] & 0x1) << 7;
-					hi |= (softwareRenderer->row[i + 1] & 0x2) << 5;
-					lo |= (softwareRenderer->row[i + 1] & 0x1) << 6;
-					hi |= (softwareRenderer->row[i + 2] & 0x2) << 4;
-					lo |= (softwareRenderer->row[i + 2] & 0x1) << 5;
-					hi |= (softwareRenderer->row[i + 3] & 0x2) << 3;
-					lo |= (softwareRenderer->row[i + 3] & 0x1) << 4;
-					hi |= (softwareRenderer->row[i + 4] & 0x2) << 2;
-					lo |= (softwareRenderer->row[i + 4] & 0x1) << 3;
-					hi |= (softwareRenderer->row[i + 5] & 0x2) << 1;
-					lo |= (softwareRenderer->row[i + 5] & 0x1) << 2;
-					hi |= (softwareRenderer->row[i + 6] & 0x2) << 0;
-					lo |= (softwareRenderer->row[i + 6] & 0x1) << 1;
-					hi |= (softwareRenderer->row[i + 7] & 0x2) >> 1;
-					lo |= (softwareRenderer->row[i + 7] & 0x1) >> 0;
-					buffer[offset + (i << 1) + 0] = lo;
-					buffer[offset + (i << 1) + 1] = hi;
+			int i;
+			for (i = 0; i < GB_VIDEO_HORIZONTAL_PIXELS; i += 8) {
+				if (UNLIKELY(offset + (i << 1) + 1 >= 0x1000)) {
+					break;
 				}
+				uint8_t hi = 0;
+				uint8_t lo = 0;
+				hi |= (softwareRenderer->row[i + 0] & 0x2) << 6;
+				lo |= (softwareRenderer->row[i + 0] & 0x1) << 7;
+				hi |= (softwareRenderer->row[i + 1] & 0x2) << 5;
+				lo |= (softwareRenderer->row[i + 1] & 0x1) << 6;
+				hi |= (softwareRenderer->row[i + 2] & 0x2) << 4;
+				lo |= (softwareRenderer->row[i + 2] & 0x1) << 5;
+				hi |= (softwareRenderer->row[i + 3] & 0x2) << 3;
+				lo |= (softwareRenderer->row[i + 3] & 0x1) << 4;
+				hi |= (softwareRenderer->row[i + 4] & 0x2) << 2;
+				lo |= (softwareRenderer->row[i + 4] & 0x1) << 3;
+				hi |= (softwareRenderer->row[i + 5] & 0x2) << 1;
+				lo |= (softwareRenderer->row[i + 5] & 0x1) << 2;
+				hi |= (softwareRenderer->row[i + 6] & 0x2) << 0;
+				lo |= (softwareRenderer->row[i + 6] & 0x1) << 1;
+				hi |= (softwareRenderer->row[i + 7] & 0x2) >> 1;
+				lo |= (softwareRenderer->row[i + 7] & 0x1) >> 0;
+				buffer[offset + (i << 1) + 0] = lo;
+				buffer[offset + (i << 1) + 1] = hi;
 			}
 		}
 	}
@@ -355,12 +454,8 @@ static void GBVideoSoftwareRendererFinishFrame(struct GBVideoRenderer* renderer)
 		_clearScreen(softwareRenderer);
 	}
 	if (softwareRenderer->model == GB_MODEL_SGB) {
-		switch (softwareRenderer->sgbPacket[0] >> 3) {
+		switch (softwareRenderer->sgbCommandHeader >> 3) {
 		case SGB_PAL_SET:
-			if (softwareRenderer->sgbPacket[9] & 0x40) {
-				renderer->sgbRenderMode = 0;
-			}
-			break;
 		case SGB_ATTR_SET:
 			if (softwareRenderer->sgbPacket[1] & 0x40) {
 				renderer->sgbRenderMode = 0;
@@ -373,10 +468,11 @@ static void GBVideoSoftwareRendererFinishFrame(struct GBVideoRenderer* renderer)
 				// Make sure every buffer sees this if we're multibuffering
 				_regenerateSGBBorder(softwareRenderer);
 			}
+			// Fall through
+		case SGB_ATTR_TRN:
 			++softwareRenderer->sgbTransfer;
 			if (softwareRenderer->sgbTransfer == 5) {
-				softwareRenderer->sgbTransfer = 0;
-				softwareRenderer->sgbPacket[0] = 0;
+				softwareRenderer->sgbCommandHeader = 0;
 			}
 		default:
 			break;
diff --git a/src/gb/video.c b/src/gb/video.c
index 61d38c0fd..840689e7f 100644
--- a/src/gb/video.c
+++ b/src/gb/video.c
@@ -75,6 +75,8 @@ void GBVideoInit(struct GBVideo* video) {
 	video->renderer->sgbCharRam = NULL;
 	video->renderer->sgbMapRam = NULL;
 	video->renderer->sgbPalRam = NULL;
+	video->renderer->sgbAttributes = NULL;
+	video->renderer->sgbAttributeFiles = NULL;
 }
 
 void GBVideoReset(struct GBVideo* video) {
@@ -100,10 +102,40 @@ void GBVideoReset(struct GBVideo* video) {
 		video->renderer->sgbCharRam = anonymousMemoryMap(SGB_SIZE_CHAR_RAM);
 		video->renderer->sgbMapRam = anonymousMemoryMap(SGB_SIZE_MAP_RAM);
 		video->renderer->sgbPalRam = anonymousMemoryMap(SGB_SIZE_PAL_RAM);
+		video->renderer->sgbAttributeFiles = anonymousMemoryMap(SGB_SIZE_ATF_RAM);
+		video->renderer->sgbAttributes = malloc(90 * 45);
+		memset(video->renderer->sgbAttributes, 0, 90 * 45);
+		video->sgbCommandHeader = 0;
 	}
 
+	video->palette[0] = video->dmgPalette[0];
+	video->palette[1] = video->dmgPalette[1];
+	video->palette[2] = video->dmgPalette[2];
+	video->palette[3] = video->dmgPalette[3];
+	video->palette[8 * 4 + 0] = video->dmgPalette[0];
+	video->palette[8 * 4 + 1] = video->dmgPalette[1];
+	video->palette[8 * 4 + 2] = video->dmgPalette[2];
+	video->palette[8 * 4 + 3] = video->dmgPalette[3];
+	video->palette[9 * 4 + 0] = video->dmgPalette[0];
+	video->palette[9 * 4 + 1] = video->dmgPalette[1];
+	video->palette[9 * 4 + 2] = video->dmgPalette[2];
+	video->palette[9 * 4 + 3] = video->dmgPalette[3];
+
 	video->renderer->deinit(video->renderer);
 	video->renderer->init(video->renderer, video->p->model);
+
+	video->renderer->writePalette(video->renderer, 0, video->palette[0]);
+	video->renderer->writePalette(video->renderer, 1, video->palette[1]);
+	video->renderer->writePalette(video->renderer, 2, video->palette[2]);
+	video->renderer->writePalette(video->renderer, 3, video->palette[3]);
+	video->renderer->writePalette(video->renderer, 8 * 4 + 0, video->palette[8 * 4 + 0]);
+	video->renderer->writePalette(video->renderer, 8 * 4 + 1, video->palette[8 * 4 + 1]);
+	video->renderer->writePalette(video->renderer, 8 * 4 + 2, video->palette[8 * 4 + 2]);
+	video->renderer->writePalette(video->renderer, 8 * 4 + 3, video->palette[8 * 4 + 3]);
+	video->renderer->writePalette(video->renderer, 9 * 4 + 0, video->palette[9 * 4 + 0]);
+	video->renderer->writePalette(video->renderer, 9 * 4 + 1, video->palette[9 * 4 + 1]);
+	video->renderer->writePalette(video->renderer, 9 * 4 + 2, video->palette[9 * 4 + 2]);
+	video->renderer->writePalette(video->renderer, 9 * 4 + 3, video->palette[9 * 4 + 3]);
 }
 
 void GBVideoDeinit(struct GBVideo* video) {
@@ -121,6 +153,14 @@ void GBVideoDeinit(struct GBVideo* video) {
 		mappedMemoryFree(video->renderer->sgbPalRam, SGB_SIZE_PAL_RAM);
 		video->renderer->sgbPalRam = NULL;
 	}
+	if (video->renderer->sgbAttributeFiles) {
+		mappedMemoryFree(video->renderer->sgbAttributeFiles, SGB_SIZE_ATF_RAM);
+		video->renderer->sgbAttributeFiles = NULL;
+	}
+	if (video->renderer->sgbAttributes) {
+		free(video->renderer->sgbAttributes);
+		video->renderer->sgbAttributes = NULL;
+	}
 }
 
 void GBVideoAssociateRenderer(struct GBVideo* video, struct GBVideoRenderer* renderer) {
@@ -130,6 +170,8 @@ void GBVideoAssociateRenderer(struct GBVideo* video, struct GBVideoRenderer* ren
 	renderer->sgbCharRam = video->renderer->sgbCharRam;
 	renderer->sgbMapRam = video->renderer->sgbMapRam;
 	renderer->sgbPalRam = video->renderer->sgbPalRam;
+	renderer->sgbAttributeFiles = video->renderer->sgbAttributeFiles;
+	renderer->sgbAttributes = video->renderer->sgbAttributes;
 	video->renderer = renderer;
 	renderer->vram = video->vram;
 	video->renderer->init(video->renderer, video->p->model);
@@ -419,7 +461,7 @@ void GBVideoWriteLYC(struct GBVideo* video, uint8_t value) {
 }
 
 void GBVideoWritePalette(struct GBVideo* video, uint16_t address, uint8_t value) {
-	if (video->p->model < GB_MODEL_CGB) {
+	if (video->p->model < GB_MODEL_SGB) {
 		switch (address) {
 		case REG_BGP:
 			video->palette[0] = video->dmgPalette[value & 3];
@@ -452,6 +494,8 @@ void GBVideoWritePalette(struct GBVideo* video, uint16_t address, uint8_t value)
 			video->renderer->writePalette(video->renderer, 9 * 4 + 3, video->palette[9 * 4 + 3]);
 			break;
 		}
+	} else if (video->p->model == GB_MODEL_SGB) {
+		video->renderer->writeVideoRegister(video->renderer, address, value);
 	} else {
 		switch (address) {
 		case REG_BCPD:
@@ -506,7 +550,12 @@ void GBVideoSetPalette(struct GBVideo* video, unsigned index, uint32_t color) {
 }
 
 void GBVideoWriteSGBPacket(struct GBVideo* video, uint8_t* data) {
-	switch (data[0] >> 3) {
+	int i;
+	if (!(video->sgbCommandHeader & 7)) {
+		video->sgbCommandHeader = data[0];
+	}
+	--video->sgbCommandHeader;
+	switch (video->sgbCommandHeader >> 3) {
 	case SGB_PAL01:
 		video->palette[0] = data[1] | (data[2] << 8);
 		video->palette[1] = data[3] | (data[4] << 8);
@@ -516,13 +565,17 @@ void GBVideoWriteSGBPacket(struct GBVideo* video, uint8_t* data) {
 		video->palette[17] = data[9] | (data[10] << 8);
 		video->palette[18] = data[11] | (data[12] << 8);
 		video->palette[19] = data[13] | (data[14] << 8);
+
 		video->renderer->writePalette(video->renderer, 0, video->palette[0]);
 		video->renderer->writePalette(video->renderer, 1, video->palette[1]);
 		video->renderer->writePalette(video->renderer, 2, video->palette[2]);
 		video->renderer->writePalette(video->renderer, 3, video->palette[3]);
+		video->renderer->writePalette(video->renderer, 16, video->palette[0]);
 		video->renderer->writePalette(video->renderer, 17, video->palette[17]);
 		video->renderer->writePalette(video->renderer, 18, video->palette[18]);
 		video->renderer->writePalette(video->renderer, 19, video->palette[19]);
+		video->renderer->writePalette(video->renderer, 32, video->palette[0]);
+		video->renderer->writePalette(video->renderer, 48, video->palette[0]);
 		break;
 	case SGB_PAL23:
 		video->palette[32] = data[1] | (data[2] << 8);
@@ -533,7 +586,6 @@ void GBVideoWriteSGBPacket(struct GBVideo* video, uint8_t* data) {
 		video->palette[49] = data[9] | (data[10] << 8);
 		video->palette[50] = data[11] | (data[12] << 8);
 		video->palette[51] = data[13] | (data[14] << 8);
-		video->renderer->writePalette(video->renderer, 32, video->palette[32]);
 		video->renderer->writePalette(video->renderer, 33, video->palette[33]);
 		video->renderer->writePalette(video->renderer, 34, video->palette[34]);
 		video->renderer->writePalette(video->renderer, 35, video->palette[35]);
@@ -547,6 +599,7 @@ void GBVideoWriteSGBPacket(struct GBVideo* video, uint8_t* data) {
 		video->palette[2] = data[5] | (data[6] << 8);
 		video->palette[3] = data[7] | (data[8] << 8);
 
+		video->palette[48] = data[1] | (data[2] << 8);
 		video->palette[49] = data[9] | (data[10] << 8);
 		video->palette[50] = data[11] | (data[12] << 8);
 		video->palette[51] = data[13] | (data[14] << 8);
@@ -554,6 +607,9 @@ void GBVideoWriteSGBPacket(struct GBVideo* video, uint8_t* data) {
 		video->renderer->writePalette(video->renderer, 1, video->palette[1]);
 		video->renderer->writePalette(video->renderer, 2, video->palette[2]);
 		video->renderer->writePalette(video->renderer, 3, video->palette[3]);
+		video->renderer->writePalette(video->renderer, 16, video->palette[0]);
+		video->renderer->writePalette(video->renderer, 32, video->palette[0]);
+		video->renderer->writePalette(video->renderer, 48, video->palette[0]);
 		video->renderer->writePalette(video->renderer, 49, video->palette[49]);
 		video->renderer->writePalette(video->renderer, 50, video->palette[50]);
 		video->renderer->writePalette(video->renderer, 51, video->palette[51]);
@@ -567,7 +623,6 @@ void GBVideoWriteSGBPacket(struct GBVideo* video, uint8_t* data) {
 		video->palette[33] = data[9] | (data[10] << 8);
 		video->palette[34] = data[11] | (data[12] << 8);
 		video->palette[35] = data[13] | (data[14] << 8);
-		video->renderer->writePalette(video->renderer, 16, video->palette[16]);
 		video->renderer->writePalette(video->renderer, 17, video->palette[17]);
 		video->renderer->writePalette(video->renderer, 18, video->palette[18]);
 		video->renderer->writePalette(video->renderer, 19, video->palette[19]);
@@ -575,19 +630,35 @@ void GBVideoWriteSGBPacket(struct GBVideo* video, uint8_t* data) {
 		video->renderer->writePalette(video->renderer, 34, video->palette[34]);
 		video->renderer->writePalette(video->renderer, 35, video->palette[35]);
 		break;
+	case SGB_PAL_SET:
+		for (i = 0; i < 4; ++i) {
+			uint16_t entry = (data[2 + (i * 2)] << 8) | data[1 + (i * 2)];
+			if (entry >= 0x200) {
+				mLOG(GB, STUB, "Unimplemented SGB palette overflow: %03X", entry);
+				continue;
+			}
+			LOAD_16LE(video->palette[i * 4 + 0], entry * 8 + 0, video->renderer->sgbPalRam);
+			video->renderer->writePalette(video->renderer, i * 4 + 0, video->palette[0]);
+			LOAD_16LE(video->palette[i * 4 + 1], entry * 8 + 2, video->renderer->sgbPalRam);
+			video->renderer->writePalette(video->renderer, i * 4 + 1, video->palette[i * 4 + 1]);
+			LOAD_16LE(video->palette[i * 4 + 2], entry * 8 + 4, video->renderer->sgbPalRam);
+			video->renderer->writePalette(video->renderer, i * 4 + 2, video->palette[i * 4 + 2]);
+			LOAD_16LE(video->palette[i * 4 + 3], entry * 8 + 6, video->renderer->sgbPalRam);
+			video->renderer->writePalette(video->renderer, i * 4 + 3, video->palette[i * 4 + 3]);
+		}
+		break;
+	case SGB_ATTR_BLK:
+	case SGB_PAL_TRN:
+	case SGB_CHR_TRN:
+	case SGB_PCT_TRN:
+	case SGB_ATTR_TRN:
+	case SGB_ATTR_SET:
+		break;
 	case SGB_MLT_REG:
 		return;
 	case SGB_MASK_EN:
 		video->renderer->sgbRenderMode = data[1] & 0x3;
 		break;
-	case SGB_PAL_TRN:
-	case SGB_CHR_TRN:
-	case SGB_PCT_TRN:
-		break;
-	case SGB_PAL_SET:
-	case SGB_ATTR_SET:
-		mLOG(GB, STUB, "Unimplemented SGB command: %02X", data[0] >> 3);
-		break;
 	default:
 		mLOG(GB, STUB, "Unimplemented SGB command: %02X", data[0] >> 3);
 		return;
diff --git a/src/gba/renderers/software-bg.c b/src/gba/renderers/software-bg.c
index f22a3a589..93c30a87e 100644
--- a/src/gba/renderers/software-bg.c
+++ b/src/gba/renderers/software-bg.c
@@ -5,6 +5,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include "gba/renderers/software-private.h"
 
+#include <mgba/core/interface.h>
 #include <mgba/internal/gba/gba.h>
 
 #define MODE_2_COORD_OVERFLOW \
@@ -101,6 +102,7 @@ void GBAVideoSoftwareRendererDrawBackgroundMode3(struct GBAVideoSoftwareRenderer
 
 		if (!mosaicWait) {
 			LOAD_16(color, ((localX >> 8) + (localY >> 8) * VIDEO_HORIZONTAL_PIXELS) << 1, renderer->d.vram);
+			color = mColorFrom555(color);
 #ifndef COLOR_16_BIT
 			unsigned color32;
 			color32 = 0;
diff --git a/src/gba/renderers/video-software.c b/src/gba/renderers/video-software.c
index 70dac8811..9898fa9ae 100644
--- a/src/gba/renderers/video-software.c
+++ b/src/gba/renderers/video-software.c
@@ -359,22 +359,7 @@ static void GBAVideoSoftwareRendererWriteOAM(struct GBAVideoRenderer* renderer,
 
 static void GBAVideoSoftwareRendererWritePalette(struct GBAVideoRenderer* renderer, uint32_t address, uint16_t value) {
 	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
-#ifdef COLOR_16_BIT
-#ifdef COLOR_5_6_5
-	unsigned color = 0;
-	color |= (value & 0x001F) << 11;
-	color |= (value & 0x03E0) << 1;
-	color |= (value & 0x7C00) >> 10;
-#else
-	unsigned color = value;
-#endif
-#else
-	unsigned color = 0;
-	color |= (value << 3) & 0xF8;
-	color |= (value << 6) & 0xF800;
-	color |= (value << 9) & 0xF80000;
-	color |= (color >> 5) & 0x070707;
-#endif
+	color_t color = mColorFrom555(value);
 	softwareRenderer->normalPalette[address >> 1] = color;
 	if (softwareRenderer->blendEffect == BLEND_BRIGHTEN) {
 		softwareRenderer->variantPalette[address >> 1] = _brighten(color, softwareRenderer->bldy);
diff --git a/src/platform/python/_builder.h b/src/platform/python/_builder.h
index 19842c402..2dc4f7b5a 100644
--- a/src/platform/python/_builder.h
+++ b/src/platform/python/_builder.h
@@ -14,6 +14,8 @@
 #define CXX_GUARD_START
 #define CXX_GUARD_END
 
+#define PYCPARSE
+
 typedef int... time_t;
 typedef int... off_t;
 typedef ... va_list;
@@ -24,7 +26,6 @@ typedef ...* png_unknown_chunkp;
 void free(void*);
 
 #include <limits.h>
-#undef const
 
 #include "flags.h"
 
diff --git a/src/platform/sdl/gl-sdl.c b/src/platform/sdl/gl-sdl.c
index d4fd0ff01..5504673d1 100644
--- a/src/platform/sdl/gl-sdl.c
+++ b/src/platform/sdl/gl-sdl.c
@@ -33,7 +33,7 @@ void mSDLGLCreate(struct mSDLRenderer* renderer) {
 bool mSDLGLInit(struct mSDLRenderer* renderer) {
 	mSDLGLCommonInit(renderer);
 
-	size_t size = toPow2(renderer->width) * renderer->height * BYTES_PER_PIXEL;
+	size_t size = toPow2(renderer->width) * toPow2(renderer->height) * BYTES_PER_PIXEL;
 	renderer->outputBuffer = malloc(size);
 	memset(renderer->outputBuffer, 0, size);
 	renderer->core->setVideoBuffer(renderer->core, renderer->outputBuffer, toPow2(renderer->width));
@@ -67,9 +67,9 @@ void mSDLGLRunloop(struct mSDLRenderer* renderer, void* user) {
 				renderer->player.windowUpdated = 0;
 			}
 #endif
-			if (renderer->width != v->width || renderer->height != v->height) {
-				renderer->gl.d.setDimensions(&renderer->gl.d, renderer->width, renderer->height);
-			}
+		}
+		if (renderer->width != v->width || renderer->height != v->height) {
+			v->setDimensions(v, renderer->width, renderer->height);
 		}
 
 		if (mCoreSyncWaitFrameStart(&context->impl->sync)) {
diff --git a/src/platform/sdl/gles2-sdl.c b/src/platform/sdl/gles2-sdl.c
index b80ab33c5..81daa0f90 100644
--- a/src/platform/sdl/gles2-sdl.c
+++ b/src/platform/sdl/gles2-sdl.c
@@ -98,7 +98,7 @@ bool mSDLGLES2Init(struct mSDLRenderer* renderer) {
 	mSDLGLCommonInit(renderer);
 #endif
 
-	size_t size = toPow2(renderer->width) * renderer->height * BYTES_PER_PIXEL;
+	size_t size = toPow2(renderer->width) * toPow2(renderer->height) * BYTES_PER_PIXEL;
 #ifndef __APPLE__
 	renderer->outputBuffer = memalign(16, size);
 #else