GS: Add class for swizzle calculations

This commit is contained in:
TellowKrinkle 2021-02-12 22:08:57 -06:00 committed by refractionpcsx2
parent 74f3bb2aa9
commit 951604475b
6 changed files with 143 additions and 75 deletions

View File

@ -220,7 +220,9 @@ option(USE_PGO_OPTIMIZE "Enable PGO optimization (use profile)")
# Note1: Builtin strcmp/memcmp was proved to be slower on Mesa than stdlib version.
# Note2: float operation SSE is impacted by the PCSX2 SSE configuration. In particular, flush to zero denormal.
if(NOT MSVC)
if(MSVC)
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/Zc:externConstexpr>")
else()
add_compile_options(-pipe -fvisibility=hidden -pthread -fno-builtin-strcmp -fno-builtin-memcmp -mfpmath=sse -fno-operator-names)
endif()

View File

@ -73,6 +73,58 @@ struct GSPixelOffset4
uint32 fbp, zbp, fpsm, zpsm, bw;
};
class GSSwizzleInfo
{
/// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page
const uint32* m_pixelSwizzle;
GSVector2i m_pageMask; ///< Mask for getting the offset of a pixel that's within a page (may also be used as page dimensions - 1)
GSVector2i m_blockMask; ///< Mask for getting the offset of a pixel that's within a block (may also be used as block dimensions - 1)
uint8 m_pageShiftX; ///< Amount to rshift x value by to get page offset
uint8 m_pageShiftY; ///< Amount to rshift y value by to get page offset
uint8 m_blockShiftX; ///< Amount to rshift x value by to get offset in block
uint8 m_blockShiftY; ///< Amount to rshift y value by to get offset in block
static constexpr uint8 ilog2(uint32 i) { return i < 2 ? 0 : 1 + ilog2(i>>1); }
public:
GSSwizzleInfo() = default;
/// @param PageWidth Width of page in pixels
/// @param PageHeight Height of page in pixels
/// @param blockSize Size of block in pixels
template <int PageWidth, int PageHeight>
constexpr GSSwizzleInfo(GSVector2i blockSize, const GSBlockSwizzleTable* blockSwizzle, const uint32 (&pxSwizzle)[32][PageHeight][PageWidth])
: m_blockSwizzle(blockSwizzle)
, m_pixelSwizzle(&pxSwizzle[0][0][0])
, m_pageMask{PageWidth - 1, PageHeight - 1}
, m_blockMask{blockSize.x - 1, blockSize.y - 1}
, m_pageShiftX(ilog2(PageWidth)), m_pageShiftY(ilog2(PageHeight))
, m_blockShiftX(ilog2(blockSize.x)), m_blockShiftY(ilog2(blockSize.y))
{
static_assert(1 << ilog2(PageWidth) == PageWidth, "PageWidth must be a power of 2");
static_assert(1 << ilog2(PageHeight) == PageHeight, "PageHeight must be a power of 2");
}
/// Get the block number of the given pixel
uint32 bn(int x, int y, uint32 bp, uint32 bw) const
{
int yAmt = ((y >> (m_pageShiftY - 5)) & ~0x1f) * (bw >> (m_pageShiftX - 6));
int xAmt = ((x >> (m_pageShiftX - 5)) & ~0x1f);
return bp + yAmt + xAmt + m_blockSwizzle->lookup(x >> m_blockShiftX, y >> m_blockShiftY);
}
/// Get the address of the given pixel
uint32 pa(int x, int y, uint32 bp, uint32 bw) const
{
int shift = m_pageShiftX + m_pageShiftY;
uint32 page = ((bp >> 5) + (y >> m_pageShiftY) * (bw >> (m_pageShiftX - 6)) + (x >> m_pageShiftX)) % MAX_PAGES;
// equivalent of pageOffset[bp & 0x1f][y & pageMaskY][x & pageMaskX]
uint32 offsetIdx = ((bp & 0x1f) << shift) + ((y & m_pageMask.y) << m_pageShiftX) + (x & m_pageMask.x);
uint32 word = (page << shift) + m_pixelSwizzle[offsetIdx];
return word;
}
};
class GSLocalMemory : public GSAlignedClass<32>
{
public:
@ -151,6 +203,17 @@ protected:
static short blockOffset8[256];
static short blockOffset4[256];
public:
static constexpr GSSwizzleInfo swizzle32{{8, 8}, &blockTable32, pageOffset32};
static constexpr GSSwizzleInfo swizzle32Z{{8, 8}, &blockTable32Z, pageOffset32Z};
static constexpr GSSwizzleInfo swizzle16{{16, 8}, &blockTable16, pageOffset16};
static constexpr GSSwizzleInfo swizzle16S{{16, 8}, &blockTable16S, pageOffset16S};
static constexpr GSSwizzleInfo swizzle16Z{{16, 8}, &blockTable16Z, pageOffset16Z};
static constexpr GSSwizzleInfo swizzle16SZ{{16, 8}, &blockTable16SZ, pageOffset16SZ};
static constexpr GSSwizzleInfo swizzle8{{16, 16}, &blockTable8, pageOffset8};
static constexpr GSSwizzleInfo swizzle4{{32, 16}, &blockTable4, pageOffset4};
protected:
__forceinline static uint32 Expand24To32(uint32 c, const GIFRegTEXA& TEXA)
{
return (((!TEXA.AEM | (c & 0xffffff)) ? TEXA.TA0 : 0) << 24) | (c & 0xffffff);
@ -188,46 +251,46 @@ public:
static uint32 BlockNumber32(int x, int y, uint32 bp, uint32 bw)
{
return bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable32[(y >> 3) & 3][(x >> 3) & 7];
return swizzle32.bn(x, y, bp, bw);
}
static uint32 BlockNumber16(int x, int y, uint32 bp, uint32 bw)
{
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16[(y >> 3) & 7][(x >> 4) & 3];
return swizzle16.bn(x, y, bp, bw);
}
static uint32 BlockNumber16S(int x, int y, uint32 bp, uint32 bw)
{
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16S[(y >> 3) & 7][(x >> 4) & 3];
return swizzle16S.bn(x, y, bp, bw);
}
static uint32 BlockNumber8(int x, int y, uint32 bp, uint32 bw)
{
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
return bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable8[(y >> 4) & 3][(x >> 4) & 7];
return swizzle8.bn(x, y, bp, bw);
}
static uint32 BlockNumber4(int x, int y, uint32 bp, uint32 bw)
{
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
return bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f) + blockTable4[(y >> 4) & 7][(x >> 5) & 3];
return swizzle4.bn(x, y, bp, bw);
}
static uint32 BlockNumber32Z(int x, int y, uint32 bp, uint32 bw)
{
return bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable32Z[(y >> 3) & 3][(x >> 3) & 7];
return swizzle32Z.bn(x, y, bp, bw);
}
static uint32 BlockNumber16Z(int x, int y, uint32 bp, uint32 bw)
{
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16Z[(y >> 3) & 7][(x >> 4) & 3];
return swizzle16Z.bn(x, y, bp, bw);
}
static uint32 BlockNumber16SZ(int x, int y, uint32 bp, uint32 bw)
{
return bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f) + blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3];
return swizzle16SZ.bn(x, y, bp, bw);
}
uint8* BlockPtr(uint32 bp) const
@ -317,70 +380,46 @@ public:
static __forceinline uint32 PixelAddress32(int x, int y, uint32 bp, uint32 bw)
{
uint32 page = ((bp >> 5) + (y >> 5) * bw + (x >> 6)) % MAX_PAGES;
uint32 word = (page << 11) + pageOffset32[bp & 0x1f][y & 0x1f][x & 0x3f];
return word;
return swizzle32.pa(x, y, bp, bw);
}
static __forceinline uint32 PixelAddress16(int x, int y, uint32 bp, uint32 bw)
{
uint32 page = ((bp >> 5) + (y >> 6) * bw + (x >> 6)) % MAX_PAGES;
uint32 word = (page << 12) + pageOffset16[bp & 0x1f][y & 0x3f][x & 0x3f];
return word;
return swizzle16.pa(x, y, bp, bw);
}
static __forceinline uint32 PixelAddress16S(int x, int y, uint32 bp, uint32 bw)
{
uint32 page = ((bp >> 5) + (y >> 6) * bw + (x >> 6)) % MAX_PAGES;
uint32 word = (page << 12) + pageOffset16S[bp & 0x1f][y & 0x3f][x & 0x3f];
return word;
return swizzle16S.pa(x, y, bp, bw);
}
static __forceinline uint32 PixelAddress8(int x, int y, uint32 bp, uint32 bw)
{
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
uint32 page = ((bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7)) % MAX_PAGES;
uint32 word = (page << 13) + pageOffset8[bp & 0x1f][y & 0x3f][x & 0x7f];
return word;
return swizzle8.pa(x, y, bp, bw);
}
static __forceinline uint32 PixelAddress4(int x, int y, uint32 bp, uint32 bw)
{
// ASSERT((bw & 1) == 0); // allowed for mipmap levels
uint32 page = ((bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7)) % MAX_PAGES;
uint32 word = (page << 14) + pageOffset4[bp & 0x1f][y & 0x7f][x & 0x7f];
return word;
return swizzle4.pa(x, y, bp, bw);
}
static __forceinline uint32 PixelAddress32Z(int x, int y, uint32 bp, uint32 bw)
{
uint32 page = ((bp >> 5) + (y >> 5) * bw + (x >> 6)) % MAX_PAGES;
uint32 word = (page << 11) + pageOffset32Z[bp & 0x1f][y & 0x1f][x & 0x3f];
return word;
return swizzle32Z.pa(x, y, bp, bw);
}
static __forceinline uint32 PixelAddress16Z(int x, int y, uint32 bp, uint32 bw)
{
uint32 page = ((bp >> 5) + (y >> 6) * bw + (x >> 6)) % MAX_PAGES;
uint32 word = (page << 12) + pageOffset16Z[bp & 0x1f][y & 0x3f][x & 0x3f];
return word;
return swizzle16Z.pa(x, y, bp, bw);
}
static __forceinline uint32 PixelAddress16SZ(int x, int y, uint32 bp, uint32 bw)
{
uint32 page = ((bp >> 5) + (y >> 6) * bw + (x >> 6)) % MAX_PAGES;
uint32 word = (page << 12) + pageOffset16SZ[bp & 0x1f][y & 0x3f][x & 0x3f];
return word;
return swizzle16SZ.pa(x, y, bp, bw);
}
// pixel R/W

View File

@ -19,7 +19,18 @@
#include "GSTables.h"
#include "GS_types.h"
const uint8 blockTable32[4][8] =
template <int Width, int Height>
static constexpr GSBlockSwizzleTable makeSwizzleTable(const uint8 (&arr)[Height][Width]) {
GSBlockSwizzleTable table = {};
for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) {
table.value[y][x] = arr[y % Height][x % Width];
}
}
return table;
}
static constexpr uint8 _blockTable32[4][8] =
{
{ 0, 1, 4, 5, 16, 17, 20, 21},
{ 2, 3, 6, 7, 18, 19, 22, 23},
@ -27,7 +38,7 @@ const uint8 blockTable32[4][8] =
{ 10, 11, 14, 15, 26, 27, 30, 31}
};
const uint8 blockTable32Z[4][8] =
static constexpr uint8 _blockTable32Z[4][8] =
{
{ 24, 25, 28, 29, 8, 9, 12, 13},
{ 26, 27, 30, 31, 10, 11, 14, 15},
@ -35,7 +46,7 @@ const uint8 blockTable32Z[4][8] =
{ 18, 19, 22, 23, 2, 3, 6, 7}
};
const uint8 blockTable16[8][4] =
static constexpr uint8 _blockTable16[8][4] =
{
{ 0, 2, 8, 10 },
{ 1, 3, 9, 11 },
@ -47,7 +58,7 @@ const uint8 blockTable16[8][4] =
{ 21, 23, 29, 31 }
};
const uint8 blockTable16S[8][4] =
static constexpr uint8 _blockTable16S[8][4] =
{
{ 0, 2, 16, 18 },
{ 1, 3, 17, 19 },
@ -59,7 +70,7 @@ const uint8 blockTable16S[8][4] =
{ 13, 15, 29, 31 }
};
const uint8 blockTable16Z[8][4] =
static constexpr uint8 _blockTable16Z[8][4] =
{
{ 24, 26, 16, 18 },
{ 25, 27, 17, 19 },
@ -71,7 +82,7 @@ const uint8 blockTable16Z[8][4] =
{ 13, 15, 5, 7 }
};
const uint8 blockTable16SZ[8][4] =
static constexpr uint8 _blockTable16SZ[8][4] =
{
{ 24, 26, 8, 10 },
{ 25, 27, 9, 11 },
@ -83,7 +94,7 @@ const uint8 blockTable16SZ[8][4] =
{ 21, 23, 5, 7 }
};
const uint8 blockTable8[4][8] =
static constexpr uint8 _blockTable8[4][8] =
{
{ 0, 1, 4, 5, 16, 17, 20, 21},
{ 2, 3, 6, 7, 18, 19, 22, 23},
@ -91,7 +102,7 @@ const uint8 blockTable8[4][8] =
{ 10, 11, 14, 15, 26, 27, 30, 31}
};
const uint8 blockTable4[8][4] =
static constexpr uint8 _blockTable4[8][4] =
{
{ 0, 2, 8, 10 },
{ 1, 3, 9, 11 },
@ -103,7 +114,16 @@ const uint8 blockTable4[8][4] =
{ 21, 23, 29, 31 }
};
const uint8 columnTable32[8][8] =
constexpr GSBlockSwizzleTable blockTable32 = makeSwizzleTable(_blockTable32);
constexpr GSBlockSwizzleTable blockTable32Z = makeSwizzleTable(_blockTable32Z);
constexpr GSBlockSwizzleTable blockTable16 = makeSwizzleTable(_blockTable16);
constexpr GSBlockSwizzleTable blockTable16S = makeSwizzleTable(_blockTable16S);
constexpr GSBlockSwizzleTable blockTable16Z = makeSwizzleTable(_blockTable16Z);
constexpr GSBlockSwizzleTable blockTable16SZ = makeSwizzleTable(_blockTable16SZ);
constexpr GSBlockSwizzleTable blockTable8 = makeSwizzleTable(_blockTable8);
constexpr GSBlockSwizzleTable blockTable4 = makeSwizzleTable(_blockTable4);
constexpr uint8 columnTable32[8][8] =
{
{ 0, 1, 4, 5, 8, 9, 12, 13 },
{ 2, 3, 6, 7, 10, 11, 14, 15 },
@ -115,7 +135,7 @@ const uint8 columnTable32[8][8] =
{ 50, 51, 54, 55, 58, 59, 62, 63 },
};
const uint8 columnTable16[8][16] =
constexpr uint8 columnTable16[8][16] =
{
{ 0, 2, 8, 10, 16, 18, 24, 26,
1, 3, 9, 11, 17, 19, 25, 27 },
@ -135,7 +155,7 @@ const uint8 columnTable16[8][16] =
101, 103, 109, 111, 117, 119, 125, 127 },
};
const uint8 columnTable8[16][16] =
constexpr uint8 columnTable8[16][16] =
{
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
2, 6, 18, 22, 34, 38, 50, 54 },
@ -171,7 +191,7 @@ const uint8 columnTable8[16][16] =
203, 207, 219, 223, 235, 239, 251, 255 },
};
const uint16 columnTable4[16][32] =
constexpr uint16 columnTable4[16][32] =
{
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
2, 10, 34, 42, 66, 74, 98, 106,
@ -239,7 +259,7 @@ const uint16 columnTable4[16][32] =
407, 415, 439, 447, 471, 479, 503, 511 },
};
const uint8 clutTableT32I8[128] =
constexpr uint8 clutTableT32I8[128] =
{
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79,
@ -251,13 +271,13 @@ const uint8 clutTableT32I8[128] =
112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127
};
const uint8 clutTableT32I4[16] =
constexpr uint8 clutTableT32I4[16] =
{
0, 1, 4, 5, 8, 9, 12, 13,
2, 3, 6, 7, 10, 11, 14, 15
};
const uint8 clutTableT16I8[32] =
constexpr uint8 clutTableT16I8[32] =
{
0, 2, 8, 10, 16, 18, 24, 26,
4, 6, 12, 14, 20, 22, 28, 30,
@ -265,7 +285,7 @@ const uint8 clutTableT16I8[32] =
5, 7, 13, 15, 21, 23, 29, 31
};
const uint8 clutTableT16I4[16] =
constexpr uint8 clutTableT16I4[16] =
{
0, 2, 8, 10, 16, 18, 24, 26,
4, 6, 12, 14, 20, 22, 28, 30

View File

@ -17,14 +17,26 @@
#include "GS_types.h"
extern const uint8 blockTable32[4][8];
extern const uint8 blockTable32Z[4][8];
extern const uint8 blockTable16[8][4];
extern const uint8 blockTable16S[8][4];
extern const uint8 blockTable16Z[8][4];
extern const uint8 blockTable16SZ[8][4];
extern const uint8 blockTable8[4][8];
extern const uint8 blockTable4[8][4];
/// Table for storing swizzling of blocks within a page
struct alignas(64) GSBlockSwizzleTable
{
// Some swizzles are 4x8 and others are 8x4. An 8x8 table can store either at the cost of 2x size
uint8 value[8][8];
constexpr uint8 lookup(int x, int y) const
{
return value[y & 7][x & 7];
}
};
extern const GSBlockSwizzleTable blockTable32;
extern const GSBlockSwizzleTable blockTable32Z;
extern const GSBlockSwizzleTable blockTable16;
extern const GSBlockSwizzleTable blockTable16S;
extern const GSBlockSwizzleTable blockTable16Z;
extern const GSBlockSwizzleTable blockTable16SZ;
extern const GSBlockSwizzleTable blockTable8;
extern const GSBlockSwizzleTable blockTable4;
extern const uint8 columnTable32[8][8];
extern const uint8 columnTable16[8][16];
extern const uint8 columnTable8[16][16];

View File

@ -53,20 +53,14 @@ public:
struct { T v[2]; };
};
GSVector2T()
GSVector2T() = default;
constexpr GSVector2T(T x): x(x), y(x)
{
}
GSVector2T(T x)
constexpr GSVector2T(T x, T y): x(x), y(y)
{
this->x = x;
this->y = x;
}
GSVector2T(T x, T y)
{
this->x = x;
this->y = y;
}
bool operator==(const GSVector2T& v) const

View File

@ -46,6 +46,7 @@
<PrecompiledHeaderFile>PrecompiledHeader.h</PrecompiledHeaderFile>
<ForcedIncludeFiles>PrecompiledHeader.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
<EnableEnhancedInstructionSet>NoExtensions</EnableEnhancedInstructionSet>
<AdditionalOptions>/Zc:externConstexpr %(AdditionalOptions)</AdditionalOptions>
<PreprocessorDefinitions>WIN32_LEAN_AND_MEAN;LZMA_API_STATIC;BUILD_DX=1;SPU2X_PORTAUDIO;DIRECTINPUT_VERSION=0x0800;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="$(Configuration.Contains(Debug))">PCSX2_DEBUG;PCSX2_DEVBUILD;_SECURE_SCL_=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="$(Configuration.Contains(Devel))">PCSX2_DEVEL;PCSX2_DEVBUILD;NDEBUG;_SECURE_SCL_=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>