GS: Add slow path for odd width 4bpp host->local transfers

This commit is contained in:
refractionpcsx2 2022-08-09 21:55:12 +01:00
parent 8b375d66fa
commit cd2d7c91c3
1 changed files with 25 additions and 5 deletions

View File

@ -815,8 +815,8 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB
if (TRXREG.RRW == 0)
return;
int l = (int)TRXPOS.DSAX;
int r = l + (int)TRXREG.RRW;
const int l = (int)TRXPOS.DSAX;
const int r = l + (int)TRXREG.RRW;
// finish the incomplete row first
@ -828,12 +828,32 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB
len -= n;
}
int la = (l + (bsx - 1)) & ~(bsx - 1);
int ra = r & ~(bsx - 1);
const int la = (l + (bsx - 1)) & ~(bsx - 1);
const int ra = r & ~(bsx - 1);
// Round up to the nearest byte (NFL 2K5 does r = 1, l = 0 bpp =4, causing divide by zero)
int srcpitch = (((r - l) * trbpp) + 7) >> 3;
const int srcpitch = (((r - l) * trbpp) + 7) >> 3;
int h = len / srcpitch;
// Slow path for odd width 4bpp, the fast path expects everything to be perfectly aligned and great,
// but things get hairy with 4bpp pixels and odd widths since the lowest size we can address is 8bits, it goes out of sync.
// Although I call this a slow path, it's probably faster than modifying the data alignment every other line.
// GT3 demo, Jak 2 Japanese subtitles, and the BG Dark Alliance minimap do this.
if (trbpp == 4 && (TRXREG.RRW & 0x1))
{
int count = 0;
const int t = TRXPOS.DSAY;
const int b = t + (int)TRXREG.RRH;
for (int y = t; y < b; y++)
{
for (int x = l; x < r; x++)
{
WritePixel4(x, y, src[count >> 1] >> ((count & 1) << 2), BITBLTBUF.DBP, BITBLTBUF.DBW);
count++;
}
}
return;
}
if (ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row
{
const u8* s = &src[-l * trbpp >> 3];