mirror of https://github.com/PCSX2/pcsx2.git
GS: Faster GSOffset::PAHelper
This commit is contained in:
parent
b901c6af71
commit
59fd815c3d
|
@ -197,14 +197,13 @@ template <int n>
|
||||||
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
|
||||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
|
auto pa = off.paMulti(m_mem->m_vm32, TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
|
|
||||||
int x = TEXCLUT.COU << 4;
|
|
||||||
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; x++, i++)
|
for (int i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
uint32 c = m_mem->m_vm32[pa.value(x)];
|
uint32 c = *pa.value(i);
|
||||||
|
|
||||||
clut[i] = (uint16)(c & 0xffff);
|
clut[i] = (uint16)(c & 0xffff);
|
||||||
clut[i + 256] = (uint16)(c >> 16);
|
clut[i + 256] = (uint16)(c >> 16);
|
||||||
|
@ -215,14 +214,13 @@ template <int n>
|
||||||
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
|
||||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
|
auto pa = off.paMulti(m_mem->m_vm16, TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
|
|
||||||
int x = TEXCLUT.COU << 4;
|
|
||||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; x++, i++)
|
for (int i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
clut[i] = m_mem->m_vm16[pa.value(x)];
|
clut[i] = *pa.value(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,14 +228,13 @@ template <int n>
|
||||||
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
|
||||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COV);
|
auto pa = off.paMulti(m_mem->m_vm16, TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
|
|
||||||
int x = TEXCLUT.COU << 4;
|
|
||||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; x++, i++)
|
for (int i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
clut[i] = m_mem->m_vm16[pa.value(x)];
|
clut[i] = *pa.value(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1065,6 +1065,63 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, G
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper for WriteImageX and ReadImageX
|
||||||
|
/// `len` is in pixels, unlike WriteImageX/ReadImageX where it's bytes
|
||||||
|
/// `xinc` is the amount to increment `x` by per iteration
|
||||||
|
/// Calls `paGetter` on a starting (x, y) to get some sort of pixel address helper for each line,
|
||||||
|
/// then `fn` on the helper and an x offset once for every `xinc` pixels along that line
|
||||||
|
template <typename PAGetter, typename Fn>
|
||||||
|
static void readWriteHelperImpl(int& tx, int& ty, int len, int xinc, int sx, int w, PAGetter&& paGetter, Fn&& fn)
|
||||||
|
{
|
||||||
|
int y = ty;
|
||||||
|
int ex = sx + w;
|
||||||
|
int remX = ex - tx;
|
||||||
|
|
||||||
|
ASSERT(remX >= 0);
|
||||||
|
|
||||||
|
auto pa = paGetter(tx, y);
|
||||||
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
int stop = std::min(remX, len);
|
||||||
|
len -= stop;
|
||||||
|
remX -= stop;
|
||||||
|
|
||||||
|
for (int x = 0; x < stop; x += xinc)
|
||||||
|
fn(pa, x);
|
||||||
|
|
||||||
|
if (remX == 0)
|
||||||
|
{
|
||||||
|
y++;
|
||||||
|
remX = w;
|
||||||
|
pa = paGetter(sx, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tx = ex - remX;
|
||||||
|
ty = y;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for WriteImageX and ReadImageX
|
||||||
|
/// `len` is in pixels, unlike WriteImageX/ReadImageX where it's bytes
|
||||||
|
/// `xinc` is the amount to increment `x` by per iteration
|
||||||
|
/// Calls `fn` with a `PAHelper` representing the current line and an int representing the x offset in that line
|
||||||
|
template <typename Fn>
|
||||||
|
static void readWriteHelper(int& tx, int& ty, int len, int xinc, int sx, int w, const GSOffset& off, Fn&& fn)
|
||||||
|
{
|
||||||
|
readWriteHelperImpl(tx, ty, len, xinc, sx, w, [&](int x, int y){ return off.paMulti(x, y); }, std::forward<Fn>(fn));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for WriteImageX and ReadImageX
|
||||||
|
/// `len` is in pixels, unlike WriteImageX/ReadImageX where it's bytes
|
||||||
|
/// `xinc` is the amount to increment `x` by per iteration
|
||||||
|
/// Calls `fn` with a `PAPtrHelper` representing the current line and an int representing the x offset in that line
|
||||||
|
template <typename VM, typename Fn>
|
||||||
|
static void readWriteHelper(VM* vm, int& tx, int& ty, int len, int xinc, int sx, int w, const GSOffset& off, Fn&& fn)
|
||||||
|
{
|
||||||
|
readWriteHelperImpl(tx, ty, len, xinc, sx, w, [&](int x, int y){ return off.paMulti(vm, x, y); }, std::forward<Fn>(fn));
|
||||||
|
}
|
||||||
|
|
||||||
void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (len <= 0)
|
if (len <= 0)
|
||||||
|
@ -1077,27 +1134,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
uint32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
uint32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
int x = tx;
|
int sx = TRXPOS.DSAX;
|
||||||
int y = ty;
|
int w = TRXREG.RRW;
|
||||||
int sx = (int)TRXPOS.DSAX;
|
|
||||||
int ex = sx + (int)TRXREG.RRW;
|
|
||||||
|
|
||||||
auto copy = [&](int len, const GSOffset& off, auto&& fn)
|
|
||||||
{
|
|
||||||
GSOffset::PAHelper pa = off.paMulti(y);
|
|
||||||
|
|
||||||
for (; len > 0; len--)
|
|
||||||
{
|
|
||||||
fn(pa);
|
|
||||||
x++;
|
|
||||||
if (x >= ex)
|
|
||||||
{
|
|
||||||
y++;
|
|
||||||
x = sx;
|
|
||||||
pa = off.paMulti(y);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM);
|
GSOffset off = GetOffset(bp, bw, BITBLTBUF.DPSM);
|
||||||
|
|
||||||
|
@ -1105,16 +1143,16 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
{
|
{
|
||||||
case PSM_PSMCT32:
|
case PSM_PSMCT32:
|
||||||
case PSM_PSMZ32:
|
case PSM_PSMZ32:
|
||||||
copy(len / 4, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len / 4, 1, sx, w, off.assertSizesMatch(swizzle32), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel32(pa.value(x), *pd);
|
*pa.value(x) = *pd;
|
||||||
pd++;
|
pd++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMCT24:
|
case PSM_PSMCT24:
|
||||||
case PSM_PSMZ24:
|
case PSM_PSMZ24:
|
||||||
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(swizzle32), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel24(pa.value(x), *(uint32*)pb);
|
WritePixel24(pa.value(x), *(uint32*)pb);
|
||||||
pb += 3;
|
pb += 3;
|
||||||
|
@ -1125,32 +1163,32 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
case PSM_PSMCT16S:
|
case PSM_PSMCT16S:
|
||||||
case PSM_PSMZ16:
|
case PSM_PSMZ16:
|
||||||
case PSM_PSMZ16S:
|
case PSM_PSMZ16S:
|
||||||
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm16, tx, ty, len / 2, 1, sx, w, off.assertSizesMatch(swizzle16), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel16(pa.value(x), *pw);
|
*pa.value(x) = *pw;
|
||||||
pw++;
|
pw++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT8:
|
case PSM_PSMT8:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm8, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel8(pa.value(x), *pb);
|
*pa.value(x) = *pb;
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT4:
|
case PSM_PSMT4:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel4(pa.value(x++), *pb & 0xf);
|
WritePixel4(pa.value(x), *pb & 0xf);
|
||||||
WritePixel4(pa.value(x), *pb >> 4);
|
WritePixel4(pa.value(x + 1), *pb >> 4);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT8H:
|
case PSM_PSMT8H:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel8H(pa.value(x), *pb);
|
WritePixel8H(pa.value(x), *pb);
|
||||||
pb++;
|
pb++;
|
||||||
|
@ -1158,26 +1196,23 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT4HL:
|
case PSM_PSMT4HL:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel4HL(pa.value(x++), *pb & 0xf);
|
WritePixel4HL(pa.value(x), *pb & 0xf);
|
||||||
WritePixel4HL(pa.value(x), *pb >> 4);
|
WritePixel4HL(pa.value(x + 1), *pb >> 4);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT4HH:
|
case PSM_PSMT4HH:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel4HH(pa.value(x++), *pb & 0xf);
|
WritePixel4HH(pa.value(x), *pb & 0xf);
|
||||||
WritePixel4HH(pa.value(x), *pb >> 4);
|
WritePixel4HH(pa.value(x + 1), *pb >> 4);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
tx = x;
|
|
||||||
ty = y;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -1193,29 +1228,9 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.SBP;
|
uint32 bp = BITBLTBUF.SBP;
|
||||||
uint32 bw = BITBLTBUF.SBW;
|
uint32 bw = BITBLTBUF.SBW;
|
||||||
psm_t* RESTRICT psm = &m_psm[BITBLTBUF.SPSM];
|
|
||||||
|
|
||||||
int x = tx;
|
int sx = TRXPOS.SSAX;
|
||||||
int y = ty;
|
int w = TRXREG.RRW;
|
||||||
int sx = (int)TRXPOS.SSAX;
|
|
||||||
int ex = sx + (int)TRXREG.RRW;
|
|
||||||
|
|
||||||
auto copy = [&](int len, const GSOffset& off, auto&& fn)
|
|
||||||
{
|
|
||||||
GSOffset::PAHelper pa = off.paMulti(y);
|
|
||||||
|
|
||||||
for (; len > 0; len--)
|
|
||||||
{
|
|
||||||
fn(pa);
|
|
||||||
x++;
|
|
||||||
if (x >= ex)
|
|
||||||
{
|
|
||||||
y++;
|
|
||||||
x = sx;
|
|
||||||
pa = off.paMulti(y);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM);
|
GSOffset off = GetOffset(bp, bw, BITBLTBUF.SPSM);
|
||||||
|
|
||||||
|
@ -1228,50 +1243,57 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
{
|
{
|
||||||
// MGS1 intro, fade effect between two scenes (airplane outside-inside transition)
|
// MGS1 intro, fade effect between two scenes (airplane outside-inside transition)
|
||||||
|
|
||||||
|
int x = tx;
|
||||||
|
int y = ty;
|
||||||
|
int ex = sx + w;
|
||||||
|
|
||||||
len /= 4;
|
len /= 4;
|
||||||
|
|
||||||
GSOffset::PAHelper pa = off.assertSizesMatch(swizzle32).paMulti(y);
|
GSOffset::PAPtrHelper pa = off.assertSizesMatch(swizzle32).paMulti(m_vm32, 0, y);
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
|
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
|
||||||
{
|
{
|
||||||
*pd = m_vm32[pa.value(x)];
|
*pd = *pa.value(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// aligned to a column
|
// aligned to a column
|
||||||
|
|
||||||
for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
|
for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
|
||||||
{
|
{
|
||||||
uint32* ps = m_vm32 + pa.value(x);
|
uint32* ps = pa.value(x);
|
||||||
|
|
||||||
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
|
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
|
||||||
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
|
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
|
||||||
|
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
ASSERT(pd[i] == m_vm32[pa.value(x + i)]);
|
ASSERT(pd[i] == *pa.value(x + i));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; len > 0 && x < ex; len--, x++, pd++)
|
for (; len > 0 && x < ex; len--, x++, pd++)
|
||||||
{
|
{
|
||||||
*pd = m_vm32[pa.value(x)];
|
*pd = *pa.value(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (x == ex)
|
if (x == ex)
|
||||||
{
|
{
|
||||||
y++;
|
y++;
|
||||||
x = sx;
|
x = sx;
|
||||||
pa = off.assertSizesMatch(swizzle32).paMulti(y);
|
pa = off.assertSizesMatch(swizzle32).paMulti(m_vm32, 0, y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tx = x;
|
||||||
|
ty = y;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMCT24:
|
case PSM_PSMCT24:
|
||||||
case PSM_PSMZ24:
|
case PSM_PSMZ24:
|
||||||
copy(len / 3, off.assertSizesMatch(swizzle32), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(swizzle32), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
uint32 c = m_vm32[pa.value(x)];
|
uint32 c = *pa.value(x);
|
||||||
pb[0] = (uint8)(c);
|
pb[0] = (uint8)(c);
|
||||||
pb[1] = (uint8)(c >> 8);
|
pb[1] = (uint8)(c >> 8);
|
||||||
pb[2] = (uint8)(c >> 16);
|
pb[2] = (uint8)(c >> 16);
|
||||||
|
@ -1283,62 +1305,58 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
case PSM_PSMCT16S:
|
case PSM_PSMCT16S:
|
||||||
case PSM_PSMZ16:
|
case PSM_PSMZ16:
|
||||||
case PSM_PSMZ16S:
|
case PSM_PSMZ16S:
|
||||||
copy(len / 2, off.assertSizesMatch(swizzle16), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm16, tx, ty, len / 2, 1, sx, w, off.assertSizesMatch(swizzle16), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
*pw = m_vm16[pa.value(x)];
|
*pw = *pa.value(x);
|
||||||
pw++;
|
pw++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT8:
|
case PSM_PSMT8:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm8, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
*pb = m_vm8[pa.value(x)];
|
*pb = *pa.value(x);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT4:
|
case PSM_PSMT4:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa, int x)
|
||||||
{
|
{
|
||||||
uint8 low = ReadPixel4(pa.value(x++));
|
uint8 low = ReadPixel4(pa.value(x));
|
||||||
uint8 high = ReadPixel4(pa.value(x));
|
uint8 high = ReadPixel4(pa.value(x + 1));
|
||||||
*pb = low | (high << 4);
|
*pb = low | (high << 4);
|
||||||
pb++;
|
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT8H:
|
case PSM_PSMT8H:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
*pb = (uint8)(m_vm32[pa.value(x)] >> 24);
|
*pb = (uint8)(*pa.value(x) >> 24);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT4HL:
|
case PSM_PSMT4HL:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
uint32 c0 = m_vm32[pa.value(x++)] >> 24 & 0x0f;
|
uint32 c0 = *pa.value(x) >> 24 & 0x0f;
|
||||||
uint32 c1 = m_vm32[pa.value(x)] >> 20 & 0xf0;
|
uint32 c1 = *pa.value(x + 1) >> 20 & 0xf0;
|
||||||
*pb = (uint8)(c0 | c1);
|
*pb = (uint8)(c0 | c1);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSM_PSMT4HH:
|
case PSM_PSMT4HH:
|
||||||
copy(len, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](GSOffset::PAHelper& pa)
|
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
uint32 c0 = m_vm32[pa.value(x++)] >> 28 & 0x0f;
|
uint32 c0 = *pa.value(x) >> 28 & 0x0f;
|
||||||
uint32 c1 = m_vm32[pa.value(x)] >> 24 & 0xf0;
|
uint32 c1 = *pa.value(x + 1) >> 24 & 0xf0;
|
||||||
*pb = (uint8)(c0 | c1);
|
*pb = (uint8)(c0 | c1);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
tx = x;
|
|
||||||
ty = y;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
|
|
@ -206,41 +206,80 @@ public:
|
||||||
fn(bn.value());
|
fn(bn.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Calculate the pixel address at the given y position with x of 0
|
||||||
|
int pixelAddressZeroX(int y) const
|
||||||
|
{
|
||||||
|
int base = m_bp << (m_pageShiftX + m_pageShiftY - 5); // Offset from base pointer
|
||||||
|
base += ((y & ~m_pageMask.y) * m_bwPg) << m_pageShiftX; // Offset from pages in y direction
|
||||||
|
// TODO: Old GSOffset masked here but is that useful? Probably should mask at end or not at all...
|
||||||
|
base &= (MAX_PAGES << (m_pageShiftX + m_pageShiftY)) - 1; // Mask
|
||||||
|
base += m_pixelSwizzleCol[y & m_pageMask.y]; // Add offset from y within page
|
||||||
|
return base;
|
||||||
|
}
|
||||||
|
|
||||||
/// Helper class for efficiently getting the addresses of multiple pixels in a line (along the x axis)
|
/// Helper class for efficiently getting the addresses of multiple pixels in a line (along the x axis)
|
||||||
class PAHelper
|
class PAHelper
|
||||||
{
|
{
|
||||||
/// Pixel swizzle array
|
/// Pixel swizzle array
|
||||||
const GSPixelRowOffsetTable* m_pixelSwizzleRow;
|
const int* m_pixelSwizzleRow;
|
||||||
int m_base;
|
int m_base;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PAHelper() = default;
|
PAHelper() = default;
|
||||||
PAHelper(const GSOffset& off, int y)
|
PAHelper(const GSOffset& off, int x, int y)
|
||||||
{
|
{
|
||||||
m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask];
|
m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask]->value + x;
|
||||||
m_base = off.m_bp << (off.m_pageShiftX + off.m_pageShiftY - 5);
|
m_base = off.pixelAddressZeroX(y);
|
||||||
m_base += ((y & ~off.m_pageMask.y) * off.m_bwPg) << off.m_pageShiftX;
|
|
||||||
m_base &= (MAX_PAGES << (off.m_pageShiftX + off.m_pageShiftY)) - 1;
|
|
||||||
m_base += off.m_pixelSwizzleCol[y & off.m_pageMask.y];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get current pixel address
|
/// Get pixel reference for the given x offset from the one used to create the PAHelper
|
||||||
uint32 value(size_t x) const
|
uint32 value(int x) const
|
||||||
{
|
{
|
||||||
return m_base + (*m_pixelSwizzleRow)[x];
|
return m_base + m_pixelSwizzleRow[x];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Helper class for efficiently getting the addresses of multiple pixels in a line (along the x axis)
|
||||||
|
/// Slightly more efficient than PAHelper by pre-adding the base offset to the VM pointer
|
||||||
|
template <typename VM>
|
||||||
|
class PAPtrHelper
|
||||||
|
{
|
||||||
|
/// Pixel swizzle array
|
||||||
|
const int* m_pixelSwizzleRow;
|
||||||
|
VM* m_base;
|
||||||
|
|
||||||
|
public:
|
||||||
|
PAPtrHelper() = default;
|
||||||
|
PAPtrHelper(const GSOffset& off, VM* vm, int x, int y)
|
||||||
|
{
|
||||||
|
m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask]->value + x;
|
||||||
|
m_base = &vm[off.pixelAddressZeroX(y)];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get pixel reference for the given x offset from the one used to create the PAPtrHelper
|
||||||
|
VM* value(int x) const
|
||||||
|
{
|
||||||
|
return m_base + m_pixelSwizzleRow[x];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Get the address of the given pixel
|
/// Get the address of the given pixel
|
||||||
uint32 pa(int x, int y) const
|
uint32 pa(int x, int y) const
|
||||||
{
|
{
|
||||||
return PAHelper(*this, y).value(x);
|
return PAHelper(*this, 0, y).value(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis)
|
/// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis)
|
||||||
PAHelper paMulti(int y) const
|
PAHelper paMulti(int x, int y) const
|
||||||
{
|
{
|
||||||
return PAHelper(*this, y);
|
return PAHelper(*this, x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis)
|
||||||
|
template <typename VM>
|
||||||
|
PAPtrHelper<VM> paMulti(VM* vm, int x, int y) const
|
||||||
|
{
|
||||||
|
return PAPtrHelper(*this, vm, x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Loop over the pixels in the given rectangle
|
/// Loop over the pixels in the given rectangle
|
||||||
|
@ -252,10 +291,10 @@ public:
|
||||||
|
|
||||||
for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast<Src*>(reinterpret_cast<uint8*>(px) + pitch))
|
for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast<Src*>(reinterpret_cast<uint8*>(px) + pitch))
|
||||||
{
|
{
|
||||||
PAHelper pa = paMulti(y);
|
PAPtrHelper<VM> pa = paMulti(vm, 0, y);
|
||||||
for (int x = r.left; x < r.right; x++)
|
for (int x = r.left; x < r.right; x++)
|
||||||
{
|
{
|
||||||
fn(vm + pa.value(x), px + x);
|
fn(pa.value(x), px + x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -739,9 +778,14 @@ public:
|
||||||
m_vm32[addr] = c;
|
m_vm32[addr] = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline static void WritePixel24(uint32* addr, uint32 c)
|
||||||
|
{
|
||||||
|
*addr = (*addr & 0xff000000) | (c & 0x00ffffff);
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel24(uint32 addr, uint32 c)
|
__forceinline void WritePixel24(uint32 addr, uint32 c)
|
||||||
{
|
{
|
||||||
m_vm32[addr] = (m_vm32[addr] & 0xff000000) | (c & 0x00ffffff);
|
WritePixel24(m_vm32 + addr, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel16(uint32 addr, uint32 c)
|
__forceinline void WritePixel16(uint32 addr, uint32 c)
|
||||||
|
@ -762,19 +806,34 @@ public:
|
||||||
m_vm8[addr] = (uint8)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift));
|
m_vm8[addr] = (uint8)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline static void WritePixel8H(uint32* addr, uint32 c)
|
||||||
|
{
|
||||||
|
*addr = (*addr & 0x00ffffff) | (c << 24);
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel8H(uint32 addr, uint32 c)
|
__forceinline void WritePixel8H(uint32 addr, uint32 c)
|
||||||
{
|
{
|
||||||
m_vm32[addr] = (m_vm32[addr] & 0x00ffffff) | (c << 24);
|
WritePixel8H(m_vm32 + addr, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static void WritePixel4HL(uint32* addr, uint32 c)
|
||||||
|
{
|
||||||
|
*addr = (*addr & 0xf0ffffff) | ((c & 0x0f) << 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel4HL(uint32 addr, uint32 c)
|
__forceinline void WritePixel4HL(uint32 addr, uint32 c)
|
||||||
{
|
{
|
||||||
m_vm32[addr] = (m_vm32[addr] & 0xf0ffffff) | ((c & 0x0f) << 24);
|
WritePixel4HL(m_vm32 + addr, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static void WritePixel4HH(uint32* addr, uint32 c)
|
||||||
|
{
|
||||||
|
*addr = (*addr & 0x0fffffff) | ((c & 0x0f) << 28);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WritePixel4HH(uint32 addr, uint32 c)
|
__forceinline void WritePixel4HH(uint32 addr, uint32 c)
|
||||||
{
|
{
|
||||||
m_vm32[addr] = (m_vm32[addr] & 0x0fffffff) | ((c & 0x0f) << 28);
|
WritePixel4HH(m_vm32 + addr, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void WriteFrame16(uint32 addr, uint32 c)
|
__forceinline void WriteFrame16(uint32 addr, uint32 c)
|
||||||
|
|
|
@ -1657,66 +1657,86 @@ void GSState::Move()
|
||||||
GSOffset spo = m_mem.GetOffset(sbp, sbw, m_env.BITBLTBUF.SPSM);
|
GSOffset spo = m_mem.GetOffset(sbp, sbw, m_env.BITBLTBUF.SPSM);
|
||||||
GSOffset dpo = m_mem.GetOffset(dbp, dbw, m_env.BITBLTBUF.DPSM);
|
GSOffset dpo = m_mem.GetOffset(dbp, dbw, m_env.BITBLTBUF.DPSM);
|
||||||
|
|
||||||
auto copy = [&](const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
|
auto genericCopy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& getPAHelper, auto&& pxCopyFn)
|
||||||
{
|
{
|
||||||
|
int _sy = sy, _dy = dy; // Faster with local copied variables, compiler optimizations are dumb
|
||||||
if (xinc > 0)
|
if (xinc > 0)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
for (int y = 0; y < h; y++, _sy += yinc, _dy += yinc)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper s = spo.paMulti(sy);
|
auto s = getPAHelper(spo, sx, _sy);
|
||||||
GSOffset::PAHelper d = dpo.paMulti(dy);
|
auto d = getPAHelper(dpo, dx, _dy);
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
for (int x = 0; x < w; x++)
|
||||||
{
|
{
|
||||||
pxCopyFn(d.value(dx + x), s.value(sx + x));
|
pxCopyFn(d, s, x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
|
for (int y = 0; y < h; y++, _sy += yinc, _dy += yinc)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper s = spo.paMulti(sy);
|
auto s = getPAHelper(spo, sx, _sy);
|
||||||
GSOffset::PAHelper d = dpo.paMulti(dy);
|
auto d = getPAHelper(dpo, dx, _dy);
|
||||||
|
|
||||||
for (int x = 0; x < w; x++)
|
for (int x = 0; x < w; x++)
|
||||||
{
|
{
|
||||||
pxCopyFn(d.value(dx - x), s.value(sx - x));
|
pxCopyFn(d, s, -x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
auto copy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
|
||||||
|
{
|
||||||
|
genericCopy(dpo, spo,
|
||||||
|
[](const GSOffset& o, int x, int y) { return o.paMulti(x, y); },
|
||||||
|
[=](const GSOffset::PAHelper& d, const GSOffset::PAHelper& s, int x)
|
||||||
|
{
|
||||||
|
return pxCopyFn(d.value(x), s.value(x));
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
auto copyFast = [=](auto* vm, const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
|
||||||
|
{
|
||||||
|
genericCopy(dpo, spo,
|
||||||
|
[=](const GSOffset& o, int x, int y) { return o.paMulti(vm, x, y); },
|
||||||
|
[=](const auto& d, const auto& s, int x)
|
||||||
|
{
|
||||||
|
return pxCopyFn(d.value(x), s.value(x));
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
|
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
|
||||||
{
|
{
|
||||||
if (spsm.trbpp == 32)
|
if (spsm.trbpp == 32)
|
||||||
{
|
{
|
||||||
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [&](uint32 doff, uint32 soff)
|
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](uint32* d, uint32* s)
|
||||||
{
|
{
|
||||||
m_mem.m_vm32[doff] = m_mem.m_vm32[soff];
|
*d = *s;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else if (spsm.trbpp == 24)
|
else if (spsm.trbpp == 24)
|
||||||
{
|
{
|
||||||
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [&](uint32 doff, uint32 soff)
|
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](uint32* d, uint32* s)
|
||||||
{
|
{
|
||||||
uint32& d = m_mem.m_vm32[doff];
|
*d = (*d & 0xff000000) | (*s & 0x00ffffff);
|
||||||
d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else // if(spsm.trbpp == 16)
|
else // if(spsm.trbpp == 16)
|
||||||
{
|
{
|
||||||
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [&](uint32 doff, uint32 soff)
|
copyFast(m_mem.m_vm16, dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [](uint16* d, uint16* s)
|
||||||
{
|
{
|
||||||
m_mem.m_vm16[doff] = m_mem.m_vm16[soff];
|
*d = *s;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
||||||
{
|
{
|
||||||
copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT8), [&](uint32 doff, uint32 soff)
|
copyFast(m_mem.m_vm8, GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT8), [](uint8* d, uint8* s)
|
||||||
{
|
{
|
||||||
m_mem.m_vm8[doff] = m_mem.m_vm8[soff];
|
*d = *s;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
||||||
|
|
|
@ -907,23 +907,23 @@ void GSRendererHW::SwSpriteRender()
|
||||||
|
|
||||||
for (int y = 0; y < h; y++, ++sy, ++dy)
|
for (int y = 0; y < h; y++, ++sy, ++dy)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sy) : GSOffset::PAHelper();
|
auto spa = texture_mapping_enabled ? spo.paMulti(m_mem.m_vm32, sx, sy) : GSOffset::PAPtrHelper<uint32>();
|
||||||
GSOffset::PAHelper dpa = dpo.paMulti(dy);
|
auto dpa = dpo.paMulti(m_mem.m_vm32, dx, dy);
|
||||||
|
|
||||||
ASSERT(w % 2 == 0);
|
ASSERT(w % 2 == 0);
|
||||||
|
|
||||||
for (int x = 0; x < w; x += 2)
|
for (int x = 0; x < w; x += 2)
|
||||||
{
|
{
|
||||||
uint32 di = dpa.value(dx + x);
|
uint32* di = dpa.value(x);
|
||||||
ASSERT(di + 1 == dpa.value(dx + x + 1)); // Destination pixel pair is adjacent in memory
|
ASSERT(*di + 1 == *dpa.value(x + 1)); // Destination pixel pair is adjacent in memory
|
||||||
|
|
||||||
GSVector4i sc;
|
GSVector4i sc;
|
||||||
if (texture_mapping_enabled)
|
if (texture_mapping_enabled)
|
||||||
{
|
{
|
||||||
uint32 si = spa.value(sx + x);
|
uint32* si = spa.value(x);
|
||||||
// Read 2 source pixel colors
|
// Read 2 source pixel colors
|
||||||
ASSERT((si + 1) == spa.value(sx + x + 1)); // Source pixel pair is adjacent in memory
|
ASSERT((*si + 1) == *spa.value(x + 1)); // Source pixel pair is adjacent in memory
|
||||||
sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
sc = GSVector4i::loadl(si).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
||||||
|
|
||||||
// Apply TFX
|
// Apply TFX
|
||||||
ASSERT(tex0_tfx == 0 || tex0_tfx == 1);
|
ASSERT(tex0_tfx == 0 || tex0_tfx == 1);
|
||||||
|
@ -944,7 +944,7 @@ void GSRendererHW::SwSpriteRender()
|
||||||
if (alpha_blending_enabled || fb_mask_enabled)
|
if (alpha_blending_enabled || fb_mask_enabled)
|
||||||
{
|
{
|
||||||
// Read 2 destination pixel colors
|
// Read 2 destination pixel colors
|
||||||
dc0 = GSVector4i::loadl(&m_mem.m_vm32[di]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
dc0 = GSVector4i::loadl(di).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alpha_blending_enabled)
|
if (alpha_blending_enabled)
|
||||||
|
@ -999,7 +999,7 @@ void GSRendererHW::SwSpriteRender()
|
||||||
|
|
||||||
// Store 2 pixel colors
|
// Store 2 pixel colors
|
||||||
dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr
|
dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr
|
||||||
GSVector4i::storel(&m_mem.m_vm32[di], dc);
|
GSVector4i::storel(di, dc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1823,11 +1823,11 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel32
|
// Based on WritePixel32
|
||||||
for (int y = r.top; y < r.bottom; y++)
|
for (int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
|
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.m_vm32, 0, y);
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
for (int x = r.left; x < r.right; x++)
|
||||||
{
|
{
|
||||||
m_mem.m_vm32[pa.value(x)] = 0; // Here the constant color
|
*pa.value(x) = 0; // Here the constant color
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1836,11 +1836,11 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel24
|
// Based on WritePixel24
|
||||||
for (int y = r.top; y < r.bottom; y++)
|
for (int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(y);
|
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.m_vm32, 0, y);
|
||||||
|
|
||||||
for (int x = r.left; x < r.right; x++)
|
for (int x = r.left; x < r.right; x++)
|
||||||
{
|
{
|
||||||
m_mem.m_vm32[pa.value(x)] &= 0xff000000; // Clear the color
|
*pa.value(x) &= 0xff000000; // Clear the color
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1851,11 +1851,11 @@ void GSRendererHW::OI_GsMemClear()
|
||||||
// Based on WritePixel16
|
// Based on WritePixel16
|
||||||
for(int y = r.top; y < r.bottom; y++)
|
for(int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(y);
|
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(m_mem.m_vm16, 0, y);
|
||||||
|
|
||||||
for(int x = r.left; x < r.right; x++)
|
for(int x = r.left; x < r.right; x++)
|
||||||
{
|
{
|
||||||
m_mem.m_vm16[pa.value(x)] = 0; // Here the constant color
|
*pa.value(x) = 0; // Here the constant color
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2942,11 +2942,11 @@ void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c
|
||||||
|
|
||||||
for (int y = r.y; y < r.w; y++)
|
for (int y = r.y; y < r.w; y++)
|
||||||
{
|
{
|
||||||
GSOffset::PAHelper pa = off.paMulti(y);
|
auto pa = off.paMulti(vm, 0, y);
|
||||||
|
|
||||||
for (int x = r.x; x < r.z; x++)
|
for (int x = r.x; x < r.z; x++)
|
||||||
{
|
{
|
||||||
T& d = vm[pa.value(x)];
|
T& d = *pa.value(x);
|
||||||
d = (T)(!masked ? c : (c | (d & m)));
|
d = (T)(!masked ? c : (c | (d & m)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2992,9 +2992,11 @@ void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const G
|
||||||
|
|
||||||
for (int y = r.y; y < r.w; y += 8)
|
for (int y = r.y; y < r.w; y += 8)
|
||||||
{
|
{
|
||||||
|
auto pa = off.paMulti(vm, 0, y);
|
||||||
|
|
||||||
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
||||||
{
|
{
|
||||||
GSVector4i* RESTRICT p = (GSVector4i*)&vm[off.pa(x, y)];
|
GSVector4i* RESTRICT p = (GSVector4i*)pa.value(x);
|
||||||
|
|
||||||
for (int i = 0; i < 16; i += 4)
|
for (int i = 0; i < 16; i += 4)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue