mirror of https://github.com/PCSX2/pcsx2.git
GS: Remove GSOffset::PAPtrHelper
Not compatible with the real GS's Z addressing, RIP
This commit is contained in:
parent
13880354cf
commit
2a6f2939be
|
@ -256,13 +256,14 @@ template <int n>
|
|||
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSMCT32);
|
||||
auto pa = off.paMulti(m_mem->vm32(), TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
|
||||
u32* vm = m_mem->vm32();
|
||||
u16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
u32 c = *pa.value(i);
|
||||
u32 c = vm[pa.value(i)];
|
||||
|
||||
clut[i] = (u16)(c & 0xffff);
|
||||
clut[i + 256] = (u16)(c >> 16);
|
||||
|
@ -273,13 +274,14 @@ template <int n>
|
|||
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSMCT16);
|
||||
auto pa = off.paMulti(m_mem->vm16(), TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
|
||||
u16* vm = m_mem->vm16();
|
||||
u16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
clut[i] = *pa.value(i);
|
||||
clut[i] = vm[pa.value(i)];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -287,13 +289,14 @@ template <int n>
|
|||
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||
{
|
||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSMCT16S);
|
||||
auto pa = off.paMulti(m_mem->vm16(), TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||
|
||||
u16* vm = m_mem->vm16();
|
||||
u16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
clut[i] = *pa.value(i);
|
||||
clut[i] = vm[pa.value(i)];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -250,30 +250,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/// Helper class for efficiently getting the addresses of multiple pixels in a line (along the x axis)
|
||||
/// Slightly more efficient than PAHelper by pre-adding the base offset to the VM pointer
|
||||
template <typename VM>
|
||||
class PAPtrHelper
|
||||
{
|
||||
/// Pixel swizzle array
|
||||
const int* m_pixelSwizzleRow;
|
||||
VM* m_base;
|
||||
|
||||
public:
|
||||
PAPtrHelper() = default;
|
||||
PAPtrHelper(const GSOffset& off, VM* vm, int x, int y)
|
||||
{
|
||||
m_pixelSwizzleRow = off.m_pixelSwizzleRow[y & off.m_pixelRowMask]->value + x;
|
||||
m_base = &vm[off.pixelAddressZeroX(y)];
|
||||
}
|
||||
|
||||
/// Get pixel reference for the given x offset from the one used to create the PAPtrHelper
|
||||
VM* value(int x) const
|
||||
{
|
||||
return m_base + m_pixelSwizzleRow[x];
|
||||
}
|
||||
};
|
||||
|
||||
/// Get the address of the given pixel
|
||||
u32 pa(int x, int y) const
|
||||
{
|
||||
|
@ -286,13 +262,6 @@ public:
|
|||
return PAHelper(*this, x, y);
|
||||
}
|
||||
|
||||
/// Get a helper class for efficiently calculating multiple pixel addresses in a line (along the x axis)
|
||||
template <typename VM>
|
||||
PAPtrHelper<VM> paMulti(VM* vm, int x, int y) const
|
||||
{
|
||||
return PAPtrHelper(*this, vm, x, y);
|
||||
}
|
||||
|
||||
/// Loop over the pixels in the given rectangle
|
||||
/// Fn should be void(*)(VM*, Src*)
|
||||
template <typename VM, typename Src, typename Fn>
|
||||
|
@ -302,10 +271,10 @@ public:
|
|||
|
||||
for (int y = r.top; y < r.bottom; y++, px = reinterpret_cast<Src*>(reinterpret_cast<u8*>(px) + pitch))
|
||||
{
|
||||
PAPtrHelper<VM> pa = paMulti(vm, 0, y);
|
||||
PAHelper pa = paMulti(0, y);
|
||||
for (int x = r.left; x < r.right; x++)
|
||||
{
|
||||
fn(pa.value(x), px + x);
|
||||
fn(&vm[pa.value(x)], px + x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -782,10 +782,10 @@ void GSLocalMemoryFunctions::WriteImage24Z(GSLocalMemory& mem, int& tx, int& ty,
|
|||
/// Helper for WriteImageX and ReadImageX
|
||||
/// `len` is in pixels, unlike WriteImageX/ReadImageX where it's bytes
|
||||
/// `xinc` is the amount to increment `x` by per iteration
|
||||
/// Calls `paGetter` on a starting (x, y) to get some sort of pixel address helper for each line,
|
||||
/// Creates a GSOffset::PAHelper on a starting (x, y) to get the base address for each line,
|
||||
/// then `fn` on the helper and an x offset once for every `xinc` pixels along that line
|
||||
template <typename PAGetter, typename Fn>
|
||||
static void readWriteHelperImpl(int& tx, int& ty, int len, int xinc, int sx, int w, PAGetter&& paGetter, Fn&& fn)
|
||||
template <typename Fn>
|
||||
static void readWriteHelper(int& tx, int& ty, int len, int xinc, int sx, int w, const GSOffset& off, Fn&& fn)
|
||||
{
|
||||
int y = ty;
|
||||
int ex = sx + w;
|
||||
|
@ -793,7 +793,7 @@ static void readWriteHelperImpl(int& tx, int& ty, int len, int xinc, int sx, int
|
|||
|
||||
ASSERT(remX >= 0);
|
||||
|
||||
auto pa = paGetter(tx, y);
|
||||
GSOffset::PAHelper pa = off.paMulti(tx, y);
|
||||
|
||||
while (len > 0)
|
||||
{
|
||||
|
@ -808,7 +808,7 @@ static void readWriteHelperImpl(int& tx, int& ty, int len, int xinc, int sx, int
|
|||
{
|
||||
y++;
|
||||
remX = w;
|
||||
pa = paGetter(sx, y);
|
||||
pa = off.paMulti(sx, y);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -816,26 +816,6 @@ static void readWriteHelperImpl(int& tx, int& ty, int len, int xinc, int sx, int
|
|||
ty = y;
|
||||
}
|
||||
|
||||
/// Helper for WriteImageX and ReadImageX
|
||||
/// `len` is in pixels, unlike WriteImageX/ReadImageX where it's bytes
|
||||
/// `xinc` is the amount to increment `x` by per iteration
|
||||
/// Calls `fn` with a `PAHelper` representing the current line and an int representing the x offset in that line
|
||||
template <typename Fn>
|
||||
static void readWriteHelper(int& tx, int& ty, int len, int xinc, int sx, int w, const GSOffset& off, Fn&& fn)
|
||||
{
|
||||
readWriteHelperImpl(tx, ty, len, xinc, sx, w, [&](int x, int y){ return off.paMulti(x, y); }, std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
/// Helper for WriteImageX and ReadImageX
|
||||
/// `len` is in pixels, unlike WriteImageX/ReadImageX where it's bytes
|
||||
/// `xinc` is the amount to increment `x` by per iteration
|
||||
/// Calls `fn` with a `PAPtrHelper` representing the current line and an int representing the x offset in that line
|
||||
template <typename VM, typename Fn>
|
||||
static void readWriteHelper(VM* vm, int& tx, int& ty, int len, int xinc, int sx, int w, const GSOffset& off, Fn&& fn)
|
||||
{
|
||||
readWriteHelperImpl(tx, ty, len, xinc, sx, w, [&](int x, int y){ return off.paMulti(vm, x, y); }, std::forward<Fn>(fn));
|
||||
}
|
||||
|
||||
void GSLocalMemoryFunctions::WriteImageX(GSLocalMemory& mem, int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||
{
|
||||
if (len <= 0)
|
||||
|
@ -844,6 +824,9 @@ void GSLocalMemoryFunctions::WriteImageX(GSLocalMemory& mem, int& tx, int& ty, c
|
|||
const u8* pb = (u8*)src;
|
||||
const u16* pw = (u16*)src;
|
||||
const u32* pd = (u32*)src;
|
||||
u8* vm8 = mem.vm8();
|
||||
u16* vm16 = mem.vm16();
|
||||
u32* vm32 = mem.vm32();
|
||||
|
||||
u32 bp = BITBLTBUF.DBP;
|
||||
u32 bw = BITBLTBUF.DBW;
|
||||
|
@ -857,18 +840,18 @@ void GSLocalMemoryFunctions::WriteImageX(GSLocalMemory& mem, int& tx, int& ty, c
|
|||
{
|
||||
case PSMCT32:
|
||||
case PSMZ32:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len / 4, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle32), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len / 4, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle32), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
*pa.value(x) = *pd;
|
||||
vm32[pa.value(x)] = *pd;
|
||||
pd++;
|
||||
});
|
||||
break;
|
||||
|
||||
case PSMCT24:
|
||||
case PSMZ24:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle32), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle32), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
mem.WritePixel24(pa.value(x), *(u32*)pb);
|
||||
mem.WritePixel24(&vm32[pa.value(x)], *(u32*)pb);
|
||||
pb += 3;
|
||||
});
|
||||
break;
|
||||
|
@ -877,17 +860,17 @@ void GSLocalMemoryFunctions::WriteImageX(GSLocalMemory& mem, int& tx, int& ty, c
|
|||
case PSMCT16S:
|
||||
case PSMZ16:
|
||||
case PSMZ16S:
|
||||
readWriteHelper(mem.vm16(), tx, ty, len / 2, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle16), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len / 2, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle16), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
*pa.value(x) = *pw;
|
||||
vm16[pa.value(x)] = *pw;
|
||||
pw++;
|
||||
});
|
||||
break;
|
||||
|
||||
case PSMT8:
|
||||
readWriteHelper(mem.m_vm8, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
*pa.value(x) = *pb;
|
||||
vm8[pa.value(x)] = *pb;
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -902,27 +885,27 @@ void GSLocalMemoryFunctions::WriteImageX(GSLocalMemory& mem, int& tx, int& ty, c
|
|||
break;
|
||||
|
||||
case PSMT8H:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8H), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8H), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
mem.WritePixel8H(pa.value(x), *pb);
|
||||
mem.WritePixel8H(&vm32[pa.value(x)], *pb);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HL), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HL), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
mem.WritePixel4HL(pa.value(x), *pb & 0xf);
|
||||
mem.WritePixel4HL(pa.value(x + 1), *pb >> 4);
|
||||
mem.WritePixel4HL(&vm32[pa.value(x)], *pb & 0xf);
|
||||
mem.WritePixel4HL(&vm32[pa.value(x + 1)], *pb >> 4);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HH), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HH), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
mem.WritePixel4HH(pa.value(x), *pb & 0xf);
|
||||
mem.WritePixel4HH(pa.value(x + 1), *pb >> 4);
|
||||
mem.WritePixel4HH(&vm32[pa.value(x)], *pb & 0xf);
|
||||
mem.WritePixel4HH(&vm32[pa.value(x + 1)], *pb >> 4);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -939,6 +922,9 @@ void GSLocalMemoryFunctions::ReadImageX(const GSLocalMemory& mem, int& tx, int&
|
|||
u8* RESTRICT pb = (u8*)dst;
|
||||
u16* RESTRICT pw = (u16*)dst;
|
||||
u32* RESTRICT pd = (u32*)dst;
|
||||
const u32* vm32 = mem.vm32();
|
||||
const u16* vm16 = mem.vm16();
|
||||
const u8* vm8 = mem.vm8();
|
||||
|
||||
u32 bp = BITBLTBUF.SBP;
|
||||
u32 bw = BITBLTBUF.SBW;
|
||||
|
@ -963,38 +949,39 @@ void GSLocalMemoryFunctions::ReadImageX(const GSLocalMemory& mem, int& tx, int&
|
|||
|
||||
len /= 4;
|
||||
|
||||
GSOffset::PAPtrHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(mem.vm32(), 0, y);
|
||||
u32* vm = mem.vm32();
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
|
||||
|
||||
while (len > 0)
|
||||
{
|
||||
for (; len > 0 && x < ex && (x & 7); len--, x++, pd++)
|
||||
{
|
||||
*pd = *pa.value(x);
|
||||
*pd = vm[pa.value(x)];
|
||||
}
|
||||
|
||||
// aligned to a column
|
||||
|
||||
for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
|
||||
{
|
||||
u32* ps = pa.value(x);
|
||||
u32* ps = &vm[pa.value(x)];
|
||||
|
||||
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
|
||||
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
ASSERT(pd[i] == *pa.value(x + i));
|
||||
ASSERT(pd[i] == vm[pa.value(x + i)]);
|
||||
}
|
||||
|
||||
for (; len > 0 && x < ex; len--, x++, pd++)
|
||||
{
|
||||
*pd = *pa.value(x);
|
||||
*pd = vm[pa.value(x)];
|
||||
}
|
||||
|
||||
if (x == ex)
|
||||
{
|
||||
y++;
|
||||
x = sx;
|
||||
pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(mem.vm32(), 0, y);
|
||||
pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1005,9 +992,9 @@ void GSLocalMemoryFunctions::ReadImageX(const GSLocalMemory& mem, int& tx, int&
|
|||
|
||||
case PSMCT24:
|
||||
case PSMZ24:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle32), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle32), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
u32 c = *pa.value(x);
|
||||
u32 c = vm32[pa.value(x)];
|
||||
pb[0] = (u8)(c);
|
||||
pb[1] = (u8)(c >> 8);
|
||||
pb[2] = (u8)(c >> 16);
|
||||
|
@ -1018,17 +1005,17 @@ void GSLocalMemoryFunctions::ReadImageX(const GSLocalMemory& mem, int& tx, int&
|
|||
case PSMCT16S:
|
||||
case PSMZ16:
|
||||
case PSMZ16S:
|
||||
readWriteHelper(mem.vm16(), tx, ty, len / 2, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle16), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len / 2, 1, sx, w, off.assertSizesMatch(GSLocalMemory::swizzle16), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
*pw = *pa.value(x);
|
||||
*pw = vm16[pa.value(x)];
|
||||
pw++;
|
||||
});
|
||||
break;
|
||||
|
||||
case PSMT8:
|
||||
readWriteHelper(mem.m_vm8, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
*pb = *pa.value(x);
|
||||
*pb = vm8[pa.value(x)];
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
@ -1043,28 +1030,28 @@ void GSLocalMemoryFunctions::ReadImageX(const GSLocalMemory& mem, int& tx, int&
|
|||
break;
|
||||
|
||||
case PSMT8H:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8H), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT8H), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
*pb = (u8)(*pa.value(x) >> 24);
|
||||
*pb = (u8)(vm32[pa.value(x)] >> 24);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HL), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HL), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
u32 c0 = *pa.value(x) >> 24 & 0x0f;
|
||||
u32 c1 = *pa.value(x + 1) >> 20 & 0xf0;
|
||||
u32 c0 = vm32[pa.value(x)] >> 24 & 0x0f;
|
||||
u32 c1 = vm32[pa.value(x + 1)] >> 20 & 0xf0;
|
||||
*pb = (u8)(c0 | c1);
|
||||
pb++;
|
||||
});
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
readWriteHelper(mem.vm32(), tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HH), [&](auto& pa, int x)
|
||||
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSMT4HH), [&](GSOffset::PAHelper& pa, int x)
|
||||
{
|
||||
u32 c0 = *pa.value(x) >> 28 & 0x0f;
|
||||
u32 c1 = *pa.value(x + 1) >> 24 & 0xf0;
|
||||
u32 c0 = vm32[pa.value(x)] >> 28 & 0x0f;
|
||||
u32 c1 = vm32[pa.value(x + 1)] >> 24 & 0xf0;
|
||||
*pb = (u8)(c0 | c1);
|
||||
pb++;
|
||||
});
|
||||
|
|
|
@ -2052,7 +2052,7 @@ void GSState::Move()
|
|||
m_draw_transfers.push_back(new_transfer);
|
||||
}
|
||||
|
||||
auto genericCopy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& getPAHelper, auto&& pxCopyFn)
|
||||
auto copy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
|
||||
{
|
||||
int _sy = sy, _dy = dy; // Faster with local copied variables, compiler optimizations are dumb
|
||||
if (xinc > 0)
|
||||
|
@ -2083,21 +2083,21 @@ void GSState::Move()
|
|||
|
||||
for (int y = starty; y != endy; y+= y_inc, _sy += y_inc, _dy += y_inc)
|
||||
{
|
||||
auto s = getPAHelper(spo, 0, _sy);
|
||||
auto d = getPAHelper(dpo, 0, _dy);
|
||||
GSOffset::PAHelper s = spo.paMulti(0, _sy);
|
||||
GSOffset::PAHelper d = dpo.paMulti(0, _dy);
|
||||
|
||||
if (((sx < dx) && ((xpage + page_width) > dx)))
|
||||
{
|
||||
for (int x = w - 1; x >= 0; x--)
|
||||
{
|
||||
pxCopyFn(d, s, (dx + x) & 2047, (sx + x) & 2047);
|
||||
pxCopyFn(d.value((dx + x) & 2047), s.value((sx + x) & 2047));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
pxCopyFn(d, s, (dx + x) & 2047, (sx + x) & 2047);
|
||||
pxCopyFn(d.value((dx + x) & 2047), s.value((sx + x) & 2047));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2106,12 +2106,12 @@ void GSState::Move()
|
|||
{
|
||||
for (int y = 0; y < h; y++, _sy += yinc, _dy += yinc)
|
||||
{
|
||||
auto s = getPAHelper(spo, 0, _sy);
|
||||
auto d = getPAHelper(dpo, 0, _dy);
|
||||
GSOffset::PAHelper s = spo.paMulti(0, _sy);
|
||||
GSOffset::PAHelper d = dpo.paMulti(0, _dy);
|
||||
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
pxCopyFn(d, s, (dx + x) & 2047, (sx + x) & 2047);
|
||||
pxCopyFn(d.value((dx + x) & 2047), s.value((sx + x) & 2047));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2120,66 +2120,50 @@ void GSState::Move()
|
|||
{
|
||||
for (int y = 0; y < h; y++, _sy += yinc, _dy += yinc)
|
||||
{
|
||||
auto s = getPAHelper(spo, 0, _sy);
|
||||
auto d = getPAHelper(dpo, 0, _dy);
|
||||
GSOffset::PAHelper s = spo.paMulti(0, _sy);
|
||||
GSOffset::PAHelper d = dpo.paMulti(0, _dy);
|
||||
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
pxCopyFn(d, s, (dx - x) & 2047, (sx - x) & 2047);
|
||||
pxCopyFn(d.value((dx - x) & 2047), s.value((sx - x) & 2047));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto copy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
|
||||
{
|
||||
genericCopy(dpo, spo,
|
||||
[](const GSOffset& o, int x, int y) { return o.paMulti(x, y); },
|
||||
[=](const GSOffset::PAHelper& d, const GSOffset::PAHelper& s, int dx, int sx)
|
||||
{
|
||||
return pxCopyFn(d.value(dx), s.value(sx));
|
||||
});
|
||||
};
|
||||
|
||||
auto copyFast = [=](auto* vm, const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
|
||||
{
|
||||
genericCopy(dpo, spo,
|
||||
[=](const GSOffset& o, int x, int y) { return o.paMulti(vm, x, y); },
|
||||
[=](const auto& d, const auto& s, int dx, int sx)
|
||||
{
|
||||
return pxCopyFn(d.value(dx), s.value(sx));
|
||||
});
|
||||
};
|
||||
|
||||
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
|
||||
{
|
||||
if (spsm.trbpp == 32)
|
||||
{
|
||||
copyFast(m_mem.vm32(), dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](u32* d, u32* s)
|
||||
u32* vm = m_mem.vm32();
|
||||
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [vm](u32 doff, u32 soff)
|
||||
{
|
||||
*d = *s;
|
||||
vm[doff] = vm[soff];
|
||||
});
|
||||
}
|
||||
else if (spsm.trbpp == 24)
|
||||
{
|
||||
copyFast(m_mem.vm32(), dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](u32* d, u32* s)
|
||||
u32* vm = m_mem.vm32();
|
||||
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [vm](u32 doff, u32 soff)
|
||||
{
|
||||
*d = (*d & 0xff000000) | (*s & 0x00ffffff);
|
||||
vm[doff] = (vm[doff] & 0xff000000) | (vm[soff] & 0x00ffffff);
|
||||
});
|
||||
}
|
||||
else // if(spsm.trbpp == 16)
|
||||
{
|
||||
copyFast(m_mem.vm16(), dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [](u16* d, u16* s)
|
||||
u16* vm = m_mem.vm16();
|
||||
copy(dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [vm](u32 doff, u32 soff)
|
||||
{
|
||||
*d = *s;
|
||||
vm[doff] = vm[soff];
|
||||
});
|
||||
}
|
||||
}
|
||||
else if (m_env.BITBLTBUF.SPSM == PSMT8 && m_env.BITBLTBUF.DPSM == PSMT8)
|
||||
{
|
||||
copyFast(m_mem.m_vm8, GSOffset::fromKnownPSM(dbp, dbw, PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSMT8), [](u8* d, u8* s)
|
||||
u8* vm = m_mem.m_vm8;
|
||||
copy(GSOffset::fromKnownPSM(dbp, dbw, PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSMT8), [vm](u32 doff, u32 soff)
|
||||
{
|
||||
*d = *s;
|
||||
vm[doff] = vm[soff];
|
||||
});
|
||||
}
|
||||
else if (m_env.BITBLTBUF.SPSM == PSMT4 && m_env.BITBLTBUF.DPSM == PSMT4)
|
||||
|
|
|
@ -1399,22 +1399,23 @@ void GSRendererHW::SwSpriteRender()
|
|||
|
||||
for (int y = 0; y < h; y++, ++sy, ++dy)
|
||||
{
|
||||
const auto& spa = spo.paMulti(m_mem.vm32(), sx, sy);
|
||||
const auto& dpa = dpo.paMulti(m_mem.vm32(), dx, dy);
|
||||
u32* vm = m_mem.vm32();
|
||||
const GSOffset::PAHelper spa = spo.paMulti(sx, sy);
|
||||
const GSOffset::PAHelper dpa = dpo.paMulti(dx, dy);
|
||||
|
||||
ASSERT(w % 2 == 0);
|
||||
|
||||
for (int x = 0; x < w; x += 2)
|
||||
{
|
||||
u32* di = dpa.value(x);
|
||||
ASSERT(di + 1 == dpa.value(x + 1)); // Destination pixel pair is adjacent in memory
|
||||
u32* di = &vm[dpa.value(x)];
|
||||
ASSERT(di + 1 == &vm[dpa.value(x + 1)]); // Destination pixel pair is adjacent in memory
|
||||
|
||||
GSVector4i sc = {};
|
||||
if (texture_mapping_enabled)
|
||||
{
|
||||
const u32* si = spa.value(x);
|
||||
const u32* si = &vm[spa.value(x)];
|
||||
// Read 2 source pixel colors
|
||||
ASSERT(si + 1 == spa.value(x + 1)); // Source pixel pair is adjacent in memory
|
||||
ASSERT(si + 1 == &vm[spa.value(x + 1)]); // Source pixel pair is adjacent in memory
|
||||
sc = GSVector4i::loadl(si).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
||||
|
||||
// Apply TFX
|
||||
|
@ -1444,10 +1445,11 @@ void GSRendererHW::SwSpriteRender()
|
|||
// Blending
|
||||
const GSVector4i A = alpha_a == 0 ? sc : alpha_a == 1 ? dc0 : GSVector4i::zero();
|
||||
const GSVector4i B = alpha_b == 0 ? sc : alpha_b == 1 ? dc0 : GSVector4i::zero();
|
||||
const GSVector4i C = alpha_c == 2 ? GSVector4i(alpha_fix).xxxx().ps32() : (alpha_c == 0 ? sc : dc0).yyww() // 0x00AA00BB00AA00BB00aa00bb00aa00bb
|
||||
.srl32(16) // 0x000000AA000000AA000000aa000000aa
|
||||
.ps32() // 0x00AA00AA00aa00aa00AA00AA00aa00aa
|
||||
.xxyy(); // 0x00AA00AA00AA00AA00aa00aa00aa00aa
|
||||
const GSVector4i C = alpha_c == 2 ? GSVector4i(alpha_fix).xxxx().ps32()
|
||||
: (alpha_c == 0 ? sc : dc0).yyww() // 0x00AA00BB00AA00BB00aa00bb00aa00bb
|
||||
.srl32(16) // 0x000000AA000000AA000000aa000000aa
|
||||
.ps32() // 0x00AA00AA00aa00aa00AA00AA00aa00aa
|
||||
.xxyy(); // 0x00AA00AA00AA00AA00aa00aa00aa00aa
|
||||
const GSVector4i D = alpha_d == 0 ? sc : alpha_d == 1 ? dc0 : GSVector4i::zero();
|
||||
dc = A.sub16(B).mul16l(C).sra16(7).add16(D); // (((A - B) * C) >> 7) + D, must use sra16 due to signed 16 bit values.
|
||||
// dc alpha channels (dc.u16[3], dc.u16[7]) dirty
|
||||
|
@ -5962,24 +5964,26 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
|
|||
if (format == 0)
|
||||
{
|
||||
// Based on WritePixel32
|
||||
u32* vm = m_mem.vm32();
|
||||
for (int y = top; y < bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.vm32(), 0, y);
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
|
||||
|
||||
for (int x = left; x < right; x++)
|
||||
*pa.value(x) = vert_color;
|
||||
vm[pa.value(x)] = vert_color;
|
||||
}
|
||||
}
|
||||
else if (format == 1)
|
||||
{
|
||||
// Based on WritePixel24
|
||||
u32* vm = m_mem.vm32();
|
||||
const u32 write_color = vert_color & 0xffffffu;
|
||||
for (int y = top; y < bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(m_mem.vm32(), 0, y);
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
|
||||
|
||||
for (int x = left; x < right; x++)
|
||||
*pa.value(x) = (*pa.value(x) & 0xff000000u) | write_color;
|
||||
vm[pa.value(x)] = (vm[pa.value(x)] & 0xff000000u) | write_color;
|
||||
}
|
||||
}
|
||||
else if (format == 2)
|
||||
|
@ -5987,12 +5991,13 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
|
|||
const u16 converted_color = ((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) | ((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F);
|
||||
|
||||
// Based on WritePixel16
|
||||
u16* vm = m_mem.vm16();
|
||||
for (int y = top; y < bottom; y++)
|
||||
{
|
||||
auto pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(m_mem.vm16(), 0, y);
|
||||
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(0, y);
|
||||
|
||||
for (int x = left; x < right; x++)
|
||||
*pa.value(x) = converted_color;
|
||||
vm[pa.value(x)] = converted_color;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1749,11 +1749,11 @@ __ri static void FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m
|
|||
|
||||
for (int y = r.y; y < r.w; y++)
|
||||
{
|
||||
auto pa = off.paMulti(vm, 0, y);
|
||||
GSOffset::PAHelper pa = off.paMulti(0, y);
|
||||
|
||||
for (int x = r.x; x < r.z; x++)
|
||||
{
|
||||
T& d = *pa.value(x);
|
||||
T& d = vm[pa.value(x)];
|
||||
d = (T)(!masked ? c : (c | (d & m)));
|
||||
}
|
||||
}
|
||||
|
@ -1799,11 +1799,11 @@ __ri static void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVec
|
|||
|
||||
for (int y = r.y; y < r.w; y += 8)
|
||||
{
|
||||
auto pa = off.paMulti(vm, 0, y);
|
||||
GSOffset::PAHelper pa = off.paMulti(0, y);
|
||||
|
||||
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
||||
{
|
||||
GSVector4i* RESTRICT p = (GSVector4i*)pa.value(x);
|
||||
GSVector4i* RESTRICT p = (GSVector4i*)&vm[pa.value(x)];
|
||||
|
||||
for (int i = 0; i < 16; i += 4)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue