GS: GSLocalMemory Multi-ISA support

This commit is contained in:
TellowKrinkle 2021-12-18 01:15:56 -06:00 committed by TellowKrinkle
parent d56e6427a9
commit b69c270c31
13 changed files with 1421 additions and 1363 deletions

View File

@ -665,6 +665,7 @@ set(pcsx2GSSources
GS/GSDrawingContext.cpp
GS/GSDump.cpp
GS/GSLocalMemory.cpp
GS/GSLocalMemoryMultiISA.cpp
GS/GSLzma.cpp
GS/GSPerfMon.cpp
GS/GSPng.cpp

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@
#include "GSVector.h"
#include "GSBlock.h"
#include "GSClut.h"
#include "MultiISA.h"
#include <array>
#include <unordered_map>
@ -434,8 +435,14 @@ inline u32 GSSwizzleInfo::pa(int x, int y, u32 bp, u32 bw) const
return GSOffset(*this, bp, bw, 0).pa(x, y);
}
class GSLocalMemory;
MULTI_ISA_DEF(class GSLocalMemoryFunctions;)
MULTI_ISA_DEF(void GSLocalMemoryPopulateFunctions(GSLocalMemory& mem);)
class GSLocalMemory : public GSAlignedClass<32>
{
MULTI_ISA_FRIEND(GSLocalMemoryFunctions)
public:
typedef u32 (*pixelAddress)(int x, int y, u32 bp, u32 bw);
typedef void (GSLocalMemory::*writePixel)(int x, int y, u32 c, u32 bp, u32 bw);
@ -447,10 +454,10 @@ public:
typedef u32(GSLocalMemory::*PixelAddr)(int x, int y, u32 bp, u32 bw) const;
typedef u32 (GSLocalMemory::*readPixelAddr)(u32 addr) const;
typedef u32 (GSLocalMemory::*readTexelAddr)(u32 addr, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, u8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTextureBlock)(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
typedef void (*writeImage)(GSLocalMemory& mem, int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (*readImage)(const GSLocalMemory& mem, int& tx, int& ty, u8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (*readTexture)(GSLocalMemory& mem, const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
typedef void (*readTextureBlock)(const GSLocalMemory& mem, u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
struct alignas(128) psm_t
{
@ -473,6 +480,7 @@ public:
};
static psm_t m_psm[64];
static readImage m_readImageX;
static const int m_vmsize = 1024 * 1024 * 4;
@ -1108,81 +1116,11 @@ public:
return ReadTexel16(PixelAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
//
__forceinline void ReadImageX(int& tx, int& ty, u8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const
{
m_readImageX(*this, tx, ty, dst, len, BITBLTBUF, TRXPOS, TRXREG);
}
template <int psm, int bsx, int bsy, int alignment>
void WriteImageColumn(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template <int psm, int bsx, int bsy, int alignment>
void WriteImageBlock(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template <int psm, int bsx, int bsy>
void WriteImageLeftRight(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template <int psm, int bsx, int bsy, int trbpp>
void WriteImageTopBottom(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF);
template <int psm, int bsx, int bsy, int trbpp>
void WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
void WriteImage24(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
void WriteImage8H(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
void WriteImage4HL(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
void WriteImage4HH(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
void WriteImage24Z(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
void WriteImageX(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
// TODO: ReadImage32/24/...
void ReadImageX(int& tx, int& ty, u8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
// * => 32
void ReadTexture32(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock32(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock24(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock16(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8H(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HL(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HH(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
#if _M_SSE == 0x501
void ReadTexture8HSW(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8HHSW(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock8HSW(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8HHSW(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
#endif
// pal ? 8 : 32
void ReadTexture8P(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock8P(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4P(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock8HP(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HLP(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4HHP(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
//
template <typename T>
void ReadTexture(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA);
//

File diff suppressed because it is too large Load Diff

View File

@ -1769,7 +1769,7 @@ void GSState::FlushWrite()
const GSLocalMemory::writeImage wi = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wi;
(m_mem.*wi)(m_tr.x, m_tr.y, &m_tr.buff[m_tr.start], len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
wi(m_mem, m_tr.x, m_tr.y, &m_tr.buff[m_tr.start], len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
m_tr.start += len;
@ -2029,7 +2029,7 @@ void GSState::Write(const u8* mem, int len)
ExpandTarget(m_env.BITBLTBUF, r);
InvalidateVideoMem(blit, r, true);
(m_mem.*psm.wi)(m_tr.x, m_tr.y, mem, m_tr.total, blit, m_env.TRXPOS, m_env.TRXREG);
psm.wi(m_mem, m_tr.x, m_tr.y, mem, m_tr.total, blit, m_env.TRXPOS, m_env.TRXREG);
m_tr.start = m_tr.end = m_tr.total;

View File

@ -2685,7 +2685,7 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
if ((r > tr).mask() & 0xff00)
{
(mem.*rtx)(off, r, buff, pitch, m_TEXA);
rtx(mem, off, r, buff, pitch, m_TEXA);
m_texture->Update(r.rintersect(tr), buff, pitch, layer);
}
@ -2695,13 +2695,13 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
if (m_texture->Map(m, &r, layer))
{
(mem.*rtx)(off, r, m.bits, m.pitch, m_TEXA);
rtx(mem, off, r, m.bits, m.pitch, m_TEXA);
m_texture->Unmap();
}
else
{
(mem.*rtx)(off, r, buff, pitch, m_TEXA);
rtx(mem, off, r, buff, pitch, m_TEXA);
m_texture->Update(r, buff, pitch, layer);
}
@ -3433,7 +3433,7 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
const GSLocalMemory::readTexture rtx = psm.rtxP;
// Use temp buffer for expanding, since we may not need to update.
(mem.*rtx)(off, block_rect, temp, pitch, TEXA);
rtx(mem, off, block_rect, temp, pitch, TEXA);
// Hash the expanded texture.
u8* ptr = temp;
@ -3499,7 +3499,7 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
GSTexture::GSMap map;
if (rect.eq(block_rect) && tex->Map(map, &rect, level))
{
(mem.*rtx)(off, block_rect, map.bits, map.pitch, TEXA);
rtx(mem, off, block_rect, map.bits, map.pitch, TEXA);
tex->Unmap();
}
else
@ -3508,7 +3508,7 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
pitch = Common::AlignUpPow2(pitch, 32);
u8* buff = m_temp;
(mem.*rtx)(off, block_rect, buff, pitch, TEXA);
rtx(mem, off, block_rect, buff, pitch, TEXA);
tex->Update(rect, buff, pitch, level);
}
}

View File

@ -615,7 +615,7 @@ void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash
// use per-texture buffer so we can compress the texture asynchronously and not block the GS thread
// must be 32 byte aligned for ReadTexture().
AlignedBuffer<u8, 32> buffer(pitch * static_cast<u32>(read_height));
(mem.*psm.rtx)(mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM), block_rect, buffer.GetPtr(), pitch, TEXA);
psm.rtx(mem, mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM), block_rect, buffer.GetPtr(), pitch, TEXA);
// okay, now we can actually dump it
QueueWorkerThreadItem([filename = std::move(filename), tw, th, pitch, buffer = std::move(buffer)]() {

View File

@ -172,7 +172,7 @@ GSTexture* GSRendererSW::GetOutput(int i, int& y_offset)
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM];
// Top left rect
(m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), r.ralign<Align_Outside>(psm.bs), m_output, pitch, m_env.TEXA);
psm.rtx(m_mem, m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), r.ralign<Align_Outside>(psm.bs), m_output, pitch, m_env.TEXA);
int top = (h_wrap) ? ((r.bottom - r.top) * pitch) : 0;
int left = (w_wrap) ? (r.right - r.left) * (GSLocalMemory::m_psm[DISPFB.PSM].bpp / 8) : 0;
@ -181,18 +181,18 @@ GSTexture* GSRendererSW::GetOutput(int i, int& y_offset)
// Top right rect
if (w_wrap)
(m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), rw.ralign<Align_Outside>(psm.bs), &m_output[left], pitch, m_env.TEXA);
psm.rtx(m_mem, m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), rw.ralign<Align_Outside>(psm.bs), &m_output[left], pitch, m_env.TEXA);
// Bottom left rect
if (h_wrap)
(m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), rh.ralign<Align_Outside>(psm.bs), &m_output[top], pitch, m_env.TEXA);
psm.rtx(m_mem, m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), rh.ralign<Align_Outside>(psm.bs), &m_output[top], pitch, m_env.TEXA);
// Bottom right rect
if (h_wrap && w_wrap)
{
// Needs also rw with the start/end height of rh, fills in the bottom right rect which will be missing if both overflow.
const GSVector4i rwh(rw.left, rh.top, rw.right, rh.bottom);
(m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), rwh.ralign<Align_Outside>(psm.bs), &m_output[top + left], pitch, m_env.TEXA);
psm.rtx(m_mem, m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), rwh.ralign<Align_Outside>(psm.bs), &m_output[top + left], pitch, m_env.TEXA);
}
m_texture[i]->Update(out_r, m_output, pitch);

View File

@ -279,7 +279,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
rtxbP(mem, block, &dst[bn.blkX() << shift], pitch, m_TEXA);
blocks++;
}
@ -301,7 +301,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
rtxbP(mem, block, &dst[bn.blkX() << shift], pitch, m_TEXA);
blocks++;
}

View File

@ -448,6 +448,7 @@
<ClCompile Include="GS\Renderers\Common\GSFunctionMap.cpp" />
<ClCompile Include="GS\Renderers\HW\GSHwHack.cpp" />
<ClCompile Include="GS\GSLocalMemory.cpp" />
<ClCompile Include="GS\GSLocalMemoryMultiISA.cpp" />
<ClCompile Include="GS\GSLzma.cpp" />
<ClCompile Include="GS\GSPerfMon.cpp" />
<ClCompile Include="GS\GSPng.cpp" />

View File

@ -1496,6 +1496,9 @@
<ClCompile Include="GS\GSLocalMemory.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>
<ClCompile Include="GS\GSLocalMemoryMultiISA.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>
<ClCompile Include="GS\GSPerfMon.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>

View File

@ -301,6 +301,7 @@
<ClCompile Include="GS\Renderers\Common\GSFunctionMap.cpp" />
<ClCompile Include="GS\Renderers\HW\GSHwHack.cpp" />
<ClCompile Include="GS\GSLocalMemory.cpp" />
<ClCompile Include="GS\GSLocalMemoryMultiISA.cpp" />
<ClCompile Include="GS\GSLzma.cpp" />
<ClCompile Include="GS\GSPerfMon.cpp" />
<ClCompile Include="GS\GSPng.cpp" />

View File

@ -998,6 +998,9 @@
<ClCompile Include="GS\GSLocalMemory.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>
<ClCompile Include="GS\GSLocalMemoryMultiISA.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>
<ClCompile Include="GS\GSPerfMon.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>