diff --git a/pcsx2/GS/GSLocalMemory.cpp b/pcsx2/GS/GSLocalMemory.cpp index 3a0ce7929a..5e1efbd96e 100644 --- a/pcsx2/GS/GSLocalMemory.cpp +++ b/pcsx2/GS/GSLocalMemory.cpp @@ -815,8 +815,8 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB if (TRXREG.RRW == 0) return; - int l = (int)TRXPOS.DSAX; - int r = l + (int)TRXREG.RRW; + const int l = (int)TRXPOS.DSAX; + const int r = l + (int)TRXREG.RRW; // finish the incomplete row first @@ -828,12 +828,32 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB len -= n; } - int la = (l + (bsx - 1)) & ~(bsx - 1); - int ra = r & ~(bsx - 1); + const int la = (l + (bsx - 1)) & ~(bsx - 1); + const int ra = r & ~(bsx - 1); // Round up to the nearest byte (NFL 2K5 does r = 1, l = 0 bpp =4, causing divide by zero) - int srcpitch = (((r - l) * trbpp) + 7) >> 3; + const int srcpitch = (((r - l) * trbpp) + 7) >> 3; int h = len / srcpitch; + // Slow path for odd width 4bpp, the fast path expects everything to be perfectly aligned and great, + // but things get hairy with 4bpp pixels and odd widths since the lowest size we can address is 8bits, it goes out of sync. + // Although I call this a slow path, it's probably faster than modifying the data alignment every other line. + // GT3 demo, Jak 2 Japanese subtitles, and the BG Dark Alliance minimap do this. + if (trbpp == 4 && (TRXREG.RRW & 0x1)) + { + int count = 0; + const int t = TRXPOS.DSAY; + const int b = t + (int)TRXREG.RRH; + for (int y = t; y < b; y++) + { + for (int x = l; x < r; x++) + { + WritePixel4(x, y, src[count >> 1] >> ((count & 1) << 2), BITBLTBUF.DBP, BITBLTBUF.DBW); + count++; + } + } + return; + } + if (ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row { const u8* s = &src[-l * trbpp >> 3];