mirror of https://github.com/PCSX2/pcsx2.git
GS: Remove special casing alignment on AVX+
Extra code, unneeded as AVX+ has fast unaligned loads
This commit is contained in:
parent
bdc7dc2cd8
commit
4139da82b9
|
@ -42,50 +42,10 @@ public:
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
GSVector8i v0, v1;
|
||||
GSVector8i v0 = GSVector8i::load<false>(s0).acbd();
|
||||
GSVector8i v1 = GSVector8i::load<false>(s1).acbd();
|
||||
|
||||
if (alignment == 32)
|
||||
{
|
||||
v0 = GSVector8i::load<true>(s0).acbd();
|
||||
v1 = GSVector8i::load<true>(s1).acbd();
|
||||
|
||||
GSVector8i::sw64(v0, v1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (alignment == 16)
|
||||
{
|
||||
v0 = GSVector8i::load(&s0[0], &s0[16]).acbd();
|
||||
v1 = GSVector8i::load(&s1[0], &s1[16]).acbd();
|
||||
|
||||
GSVector8i::sw64(v0, v1);
|
||||
}
|
||||
else
|
||||
{
|
||||
//v0 = GSVector8i::load(&s0[0], &s0[16], &s0[8], &s0[24]);
|
||||
//v1 = GSVector8i::load(&s1[0], &s1[16], &s1[8], &s1[24]);
|
||||
|
||||
GSVector4i v4 = GSVector4i::load(&s0[0], &s1[0]);
|
||||
GSVector4i v5 = GSVector4i::load(&s0[8], &s1[8]);
|
||||
GSVector4i v6 = GSVector4i::load(&s0[16], &s1[16]);
|
||||
GSVector4i v7 = GSVector4i::load(&s0[24], &s1[24]);
|
||||
|
||||
if (mask == 0xffffffff)
|
||||
{
|
||||
// just write them out directly
|
||||
|
||||
((GSVector4i*)dst)[i * 4 + 0] = v4;
|
||||
((GSVector4i*)dst)[i * 4 + 1] = v5;
|
||||
((GSVector4i*)dst)[i * 4 + 2] = v6;
|
||||
((GSVector4i*)dst)[i * 4 + 3] = v7;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
v0 = GSVector8i::cast(v4).insert<1>(v5);
|
||||
v1 = GSVector8i::cast(v6).insert<1>(v7);
|
||||
}
|
||||
}
|
||||
GSVector8i::sw64(v0, v1);
|
||||
|
||||
if (mask == 0xffffffff)
|
||||
{
|
||||
|
@ -112,6 +72,17 @@ public:
|
|||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
#if FAST_UNALIGNED
|
||||
|
||||
v0 = GSVector4i::load<false>(&s0[0]);
|
||||
v1 = GSVector4i::load<false>(&s0[16]);
|
||||
v2 = GSVector4i::load<false>(&s1[0]);
|
||||
v3 = GSVector4i::load<false>(&s1[16]);
|
||||
|
||||
GSVector4i::sw64(v0, v2, v1, v3);
|
||||
|
||||
#else
|
||||
|
||||
if (alignment != 0)
|
||||
{
|
||||
v0 = GSVector4i::load<true>(&s0[0]);
|
||||
|
@ -129,6 +100,8 @@ public:
|
|||
v3 = GSVector4i::load(&s0[24], &s1[24]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if (mask == 0xffffffff)
|
||||
{
|
||||
((GSVector4i*)dst)[i * 4 + 0] = v0;
|
||||
|
@ -169,31 +142,11 @@ public:
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
GSVector8i v0, v1;
|
||||
GSVector8i v0 = GSVector8i::load<false>(s0);
|
||||
GSVector8i v1 = GSVector8i::load<false>(s1);
|
||||
|
||||
if (alignment == 32)
|
||||
{
|
||||
v0 = GSVector8i::load<true>(s0);
|
||||
v1 = GSVector8i::load<true>(s1);
|
||||
|
||||
GSVector8i::sw128(v0, v1);
|
||||
GSVector8i::sw16(v0, v1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (alignment == 16)
|
||||
{
|
||||
v0 = GSVector8i::load(&s0[0], &s1[0]);
|
||||
v1 = GSVector8i::load(&s0[16], &s1[16]);
|
||||
}
|
||||
else
|
||||
{
|
||||
v0 = GSVector8i::load(&s0[0], &s0[8], &s1[0], &s1[8]);
|
||||
v1 = GSVector8i::load(&s0[16], &s0[24], &s1[16], &s1[24]);
|
||||
}
|
||||
|
||||
GSVector8i::sw16(v0, v1);
|
||||
}
|
||||
GSVector8i::sw128(v0, v1);
|
||||
GSVector8i::sw16(v0, v1);
|
||||
|
||||
v0 = v0.acbd();
|
||||
v1 = v1.acbd();
|
||||
|
@ -205,6 +158,18 @@ public:
|
|||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
#if FAST_UNALIGNED
|
||||
|
||||
v0 = GSVector4i::load<false>(&s0[0]);
|
||||
v1 = GSVector4i::load<false>(&s0[16]);
|
||||
v2 = GSVector4i::load<false>(&s1[0]);
|
||||
v3 = GSVector4i::load<false>(&s1[16]);
|
||||
|
||||
GSVector4i::sw16(v0, v1, v2, v3);
|
||||
GSVector4i::sw64(v0, v1, v2, v3);
|
||||
|
||||
#else
|
||||
|
||||
if (alignment != 0)
|
||||
{
|
||||
v0 = GSVector4i::load<true>(&s0[0]);
|
||||
|
@ -225,6 +190,8 @@ public:
|
|||
GSVector4i::sw64(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
((GSVector4i*)dst)[i * 4 + 0] = v0;
|
||||
((GSVector4i*)dst)[i * 4 + 1] = v2;
|
||||
((GSVector4i*)dst)[i * 4 + 2] = v1;
|
||||
|
@ -240,10 +207,10 @@ public:
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
GSVector4i v4 = GSVector4i::load<alignment != 0>(&src[srcpitch * 0]);
|
||||
GSVector4i v5 = GSVector4i::load<alignment != 0>(&src[srcpitch * 1]);
|
||||
GSVector4i v6 = GSVector4i::load<alignment != 0>(&src[srcpitch * 2]);
|
||||
GSVector4i v7 = GSVector4i::load<alignment != 0>(&src[srcpitch * 3]);
|
||||
GSVector4i v4 = GSVector4i::load<false>(&src[srcpitch * 0]);
|
||||
GSVector4i v5 = GSVector4i::load<false>(&src[srcpitch * 1]);
|
||||
GSVector4i v6 = GSVector4i::load<false>(&src[srcpitch * 2]);
|
||||
GSVector4i v7 = GSVector4i::load<false>(&src[srcpitch * 3]);
|
||||
|
||||
GSVector8i v0(v4, v5);
|
||||
GSVector8i v1(v6, v7);
|
||||
|
|
|
@ -689,6 +689,9 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const u8* sr
|
|||
|
||||
if (h2 > 0)
|
||||
{
|
||||
#if FAST_UNALIGNED
|
||||
WriteImageColumn<psm, bsx, bsy, 0>(l, r, y, h2, src, srcpitch, BITBLTBUF);
|
||||
#else
|
||||
size_t addr = (size_t)&src[l * trbpp >> 3];
|
||||
|
||||
if ((addr & 31) == 0 && (srcpitch & 31) == 0)
|
||||
|
@ -703,6 +706,7 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const u8* sr
|
|||
{
|
||||
WriteImageColumn<psm, bsx, bsy, 0>(l, r, y, h2, src, srcpitch, BITBLTBUF);
|
||||
}
|
||||
#endif
|
||||
|
||||
src += srcpitch * h2;
|
||||
y += h2;
|
||||
|
@ -839,6 +843,9 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB
|
|||
|
||||
if (h2 > 0)
|
||||
{
|
||||
#if FAST_UNALIGNED
|
||||
WriteImageBlock<psm, bsx, bsy, 0>(la, ra, ty, h2, s, srcpitch, BITBLTBUF);
|
||||
#else
|
||||
size_t addr = (size_t)&s[la * trbpp >> 3];
|
||||
|
||||
if ((addr & 31) == 0 && (srcpitch & 31) == 0)
|
||||
|
@ -853,6 +860,7 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegB
|
|||
{
|
||||
WriteImageBlock<psm, bsx, bsy, 0>(la, ra, ty, h2, s, srcpitch, BITBLTBUF);
|
||||
}
|
||||
#endif
|
||||
|
||||
s += srcpitch * h2;
|
||||
ty += h2;
|
||||
|
|
|
@ -35,3 +35,11 @@
|
|||
#elif _M_SSE < 0x401
|
||||
#error PCSX2 requires compiling for at least SSE 4.1
|
||||
#endif
|
||||
|
||||
// Starting with AVX, processors have fast unaligned loads
|
||||
// Reduce code duplication by not compiling multiple versions
|
||||
#if _M_SSE >= 0x500
|
||||
#define FAST_UNALIGNED 1
|
||||
#else
|
||||
#define FAST_UNALIGNED 0
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue