mirror of https://github.com/PCSX2/pcsx2.git
GS: Add slow path for odd width 4bpp host->local transfers
This commit is contained in:
parent
8b375d66fa
commit
cd2d7c91c3
|
@ -815,8 +815,8 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB
|
||||||
if (TRXREG.RRW == 0)
|
if (TRXREG.RRW == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int l = (int)TRXPOS.DSAX;
|
const int l = (int)TRXPOS.DSAX;
|
||||||
int r = l + (int)TRXREG.RRW;
|
const int r = l + (int)TRXREG.RRW;
|
||||||
|
|
||||||
// finish the incomplete row first
|
// finish the incomplete row first
|
||||||
|
|
||||||
|
@ -828,12 +828,32 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB
|
||||||
len -= n;
|
len -= n;
|
||||||
}
|
}
|
||||||
|
|
||||||
int la = (l + (bsx - 1)) & ~(bsx - 1);
|
const int la = (l + (bsx - 1)) & ~(bsx - 1);
|
||||||
int ra = r & ~(bsx - 1);
|
const int ra = r & ~(bsx - 1);
|
||||||
// Round up to the nearest byte (NFL 2K5 does r = 1, l = 0 bpp =4, causing divide by zero)
|
// Round up to the nearest byte (NFL 2K5 does r = 1, l = 0 bpp =4, causing divide by zero)
|
||||||
int srcpitch = (((r - l) * trbpp) + 7) >> 3;
|
const int srcpitch = (((r - l) * trbpp) + 7) >> 3;
|
||||||
int h = len / srcpitch;
|
int h = len / srcpitch;
|
||||||
|
|
||||||
|
// Slow path for odd width 4bpp, the fast path expects everything to be perfectly aligned and great,
|
||||||
|
// but things get hairy with 4bpp pixels and odd widths since the lowest size we can address is 8bits, it goes out of sync.
|
||||||
|
// Although I call this a slow path, it's probably faster than modifying the data alignment every other line.
|
||||||
|
// GT3 demo, Jak 2 Japanese subtitles, and the BG Dark Alliance minimap do this.
|
||||||
|
if (trbpp == 4 && (TRXREG.RRW & 0x1))
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
const int t = TRXPOS.DSAY;
|
||||||
|
const int b = t + (int)TRXREG.RRH;
|
||||||
|
for (int y = t; y < b; y++)
|
||||||
|
{
|
||||||
|
for (int x = l; x < r; x++)
|
||||||
|
{
|
||||||
|
WritePixel4(x, y, src[count >> 1] >> ((count & 1) << 2), BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row
|
if (ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row
|
||||||
{
|
{
|
||||||
const u8* s = &src[-l * trbpp >> 3];
|
const u8* s = &src[-l * trbpp >> 3];
|
||||||
|
|
Loading…
Reference in New Issue