GS: Switch to new non-cached GSOffset

This commit is contained in:
TellowKrinkle 2021-02-23 19:37:35 -06:00 committed by refractionpcsx2
parent d9defb19f9
commit 8eb50c3517
18 changed files with 450 additions and 693 deletions

View File

@ -196,16 +196,14 @@ void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& T
template <int n>
void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
uint32* RESTRICT s = &m_mem->m_vm32[off->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
for (int i = 0; i < n; i++)
for (int i = 0; i < n; pa.incX(), i++)
{
uint32 c = s[col[i]];
uint32 c = m_mem->m_vm32[pa.value()];
clut[i] = (uint16)(c & 0xffff);
clut[i + 256] = (uint16)(c >> 16);
@ -215,32 +213,28 @@ void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCL
template <int n>
void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; i++)
for (int i = 0; i < n; pa.incX(), i++)
{
clut[i] = s[col[i]];
clut[i] = m_mem->m_vm16[pa.value()];
}
}
template <int n>
void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSOffset* off = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
uint16* RESTRICT s = &m_mem->m_vm16[off->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &off->pixel.col[0][TEXCLUT.COU << 4];
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
GSOffset::PAHelper pa = off.paMulti(TEXCLUT.COU << 4, TEXCLUT.COV);
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for (int i = 0; i < n; i++)
for (int i = 0; i < n; pa.incX(), i++)
{
clut[i] = s[col[i]];
clut[i] = m_mem->m_vm16[pa.value()];
}
}

View File

@ -48,9 +48,9 @@ public:
struct
{
GSOffset* fb;
GSOffset* zb;
GSOffset* tex;
GSOffset fb;
GSOffset zb;
GSOffset tex;
GSPixelOffset* fzb;
GSPixelOffset4* fzb4;
} offset;

View File

@ -35,6 +35,28 @@
#define FOREACH_BLOCK_END }}
template <typename Fn>
static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector4i& r, uint8* dst, int dstpitch, int bpp, Fn&& fn)
{
ASSERT(off.isBlockAligned(r));
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
int right = r.right >> off.blockShiftX();
int bottom = r.bottom >> off.blockShiftY();
int offset = dstpitch << off.blockShiftY();
int xAdd = (1 << off.blockShiftX()) * (bpp / 8);
for (; bn.blkY() < bottom; bn.nextBlockY(), dst += offset)
{
for (int x = 0; bn.blkX() < right; bn.nextBlockX(), x += xAdd)
{
const uint8* src = mem->BlockPtr(bn.value());
uint8* read_dst = dst + x;
fn(read_dst, src);
}
}
}
//
uint32 GSLocalMemory::pageOffset32[32][32][64];
@ -487,22 +509,9 @@ GSLocalMemory::~GSLocalMemory()
}
}
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
{
uint32 hash = bp | (bw << 14) | (psm << 20);
auto i = m_omap.find(hash);
if (i != m_omap.end())
{
return i->second;
}
GSOffset* off = new GSOffset(bp, bw, psm);
m_omap[hash] = off;
return off;
return GSOffset(m_psm[psm].info, bp, bw, psm);
}
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
@ -629,19 +638,18 @@ std::vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
int tw = std::max<int>(1 << TEX0.TW, bs.x);
int th = std::max<int>(1 << TEX0.TH, bs.y);
const GSOffset* off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSOffset off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSOffset::BNHelper bn = off.bnMulti(0, 0);
std::unordered_map<uint32, std::unordered_set<uint32>> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
for (int y = 0; y < th; y += bs.y)
for (; bn.blkY() < (th >> off.blockShiftY()); bn.nextBlockY())
{
uint32 base = off->block.row[y >> 3];
for (int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x)
for (; bn.blkX() < (tw >> off.blockShiftX()); bn.nextBlockX())
{
uint32 page = ((base + off->block.col[x >> 3]) >> 5) % MAX_PAGES;
uint32 page = (bn.value() >> 5) % MAX_PAGES;
tmp[page].insert(i >> 3); // ((y << 7) | x) >> 3
tmp[page].insert((bn.blkY() << 7) + bn.blkX());
}
}
@ -1703,42 +1711,38 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
///////////////////
void GSLocalMemory::ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadBlock32(src, read_dst, dstpitch);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
if (TEXA.AEM)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
});
}
else
{
FOREACH_BLOCK_START(r, 8, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
});
}
}
void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16)
foreachBlock(off, this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadBlock16(src, read_dst, dstpitch);
}
FOREACH_BLOCK_END
});
// Convert packed RGB scanline to 32 bits RGBA
ASSERT(dstpitch >= r.width() * 4);
@ -1753,79 +1757,72 @@ void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVecto
}
}
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture16(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
if (TEXA.AEM)
{
FOREACH_BLOCK_START(r, 16, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
});
}
else
{
FOREACH_BLOCK_START(r, 16, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
});
}
}
void GSLocalMemory::ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 16, 16, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint64* pal = m_clut;
FOREACH_BLOCK_START(r, 32, 16, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32)
foreachBlock(off, this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
}
FOREACH_BLOCK_END
});
}
///////////////////
@ -1902,9 +1899,9 @@ void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, con
///////////////////
void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const psm_t& psm = m_psm[off->psm];
const psm_t& psm = m_psm[off.psm()];
readTexel rt = psm.rt;
readTexture rtx = psm.rtx;
@ -1913,9 +1910,9 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
{
GIFRegTEX0 TEX0;
TEX0.TBP0 = off->bp;
TEX0.TBW = off->bw;
TEX0.PSM = off->psm;
TEX0.TBP0 = off.bp();
TEX0.TBW = off.bw();
TEX0.PSM = off.psm();
GSVector4i cr = r.ralign<Align_Inside>(psm.bs);
@ -1981,49 +1978,44 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i&
// 32/8
void GSLocalMemory::ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 16, 8)
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadBlock8(src, read_dst, dstpitch);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 32, 16, 8)
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadBlock4P(src, read_dst, dstpitch);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 8)
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 8)
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
}
FOREACH_BLOCK_END
});
}
void GSLocalMemory::ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 8)
foreachBlock(off, this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
{
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
}
FOREACH_BLOCK_END
});
}
//
@ -2103,14 +2095,14 @@ void GSLocalMemory::SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32
namespace
{
/// Helper for GSOffsetNew::pageLooperForRect
/// Helper for GSOffset::pageLooperForRect
struct alignas(16) TextureAligned
{
int ox1, oy1, ox2, oy2; ///< Block-aligned outer rect (smallest rectangle containing the original that is block-aligned)
int ix1, iy1, ix2, iy2; ///< Page-aligned inner rect (largest rectangle inside original that is page-aligned)
};
/// Helper for GSOffsetNew::pageLooperForRect
/// Helper for GSOffset::pageLooperForRect
TextureAligned align(const GSVector4i& rect, const GSVector2i& blockMask, const GSVector2i& pageMask, int blockShiftX, int blockShiftY)
{
GSVector4i outer = rect.ralign_presub<Align_Outside>(blockMask);
@ -2149,7 +2141,7 @@ namespace
} // namespace
GSOffsetNew::PageLooper GSOffsetNew::pageLooperForRect(const GSVector4i& rect) const
GSOffset::PageLooper GSOffset::pageLooperForRect(const GSVector4i& rect) const
{
// Plan:
// - Split texture into tiles on page lines
@ -2264,6 +2256,7 @@ GSOffsetNew::PageLooper GSOffsetNew::pageLooperForRect(const GSVector4i& rect) c
return out;
}
/*
GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm)
{
hash = _bp | (_bw << 14) | (_psm << 20);
@ -2414,3 +2407,4 @@ void* GSOffset::GetPagesAsBits(const GSVector4i& rect, void* pages)
return pages;
}
*/

View File

@ -21,38 +21,6 @@
#include "GSBlock.h"
#include "GSClut.h"
class GSOffset : public GSAlignedClass<32>
{
public:
struct alignas(32) Block
{
short row[256]; // yn (n = 0 8 16 ...)
short* col; // blockOffset*
};
struct alignas(32) Pixel
{
int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing)
int* col[8]; // rowOffset*
};
union { uint32 hash; struct { uint32 bp:14, bw:6, psm:6; }; };
Block block;
Pixel pixel;
std::array<uint32*, 256> pages_as_bit; // texture page coverage based on the texture size. Lazy allocated
GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset();
enum { EOP = 0xffffffff };
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
void* GetPagesAsBits(const GSVector4i& rect, void* pages);
uint32* GetPagesAsBits(const GIFRegTEX0& TEX0);
};
struct GSPixelOffset
{
// 16 bit offsets (m_vm16[...])
@ -75,7 +43,8 @@ struct GSPixelOffset4
class GSSwizzleInfo;
class GSOffsetNew {
class GSOffset
{
/// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page (size: uint32[PageHeight][PageWidth])
@ -88,12 +57,12 @@ class GSOffsetNew {
uint8 m_blockShiftY; ///< Amount to rshift y value by to get block y offset
int m_bp; ///< Offset's base pointer (same measurement as GS)
int m_bwPg; ///< Offset's buffer width in pages (not equal to bw in GS for 8 and 4-bit textures)
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffsetNew algorithms)
int m_psm; ///< Offset's pixel storage mode (just for storage, not used by any of the GSOffset algorithms)
public:
GSOffsetNew() = default;
constexpr GSOffsetNew(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm);
GSOffset() = default;
constexpr GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm);
/// Help the optimizer by using this method instead of GSLocalMemory::GetOffset when the PSM is known
constexpr static GSOffsetNew fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
constexpr static GSOffset fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm);
uint32 bp() const { return m_bp; }
uint32 bw() const { return m_bwPg << (m_pageShiftX - 6); }
@ -104,7 +73,7 @@ public:
/// Helper class for efficiently getting the numbers of multiple blocks in a scanning pattern (increment x then y)
class BNHelper
{
const GSBlockSwizzleTable* m_blockSwizzle; ///< Block swizzle table from GSOffsetNew
const GSBlockSwizzleTable* m_blockSwizzle; ///< Block swizzle table from GSOffset
int m_baseBP; ///< bp for start of current row (to return to the origin x when advancing y)
int m_bp; ///< bp for current position
int m_baseBlkX; ///< x of origin in blocks (to return to the origin x when advancing y)
@ -114,7 +83,7 @@ public:
int m_pageMaskY; ///< mask for y value of block coordinate to get position within page (to detect page crossing)
int m_addY; ///< Amount to add to bp to advance one page in y direction
public:
BNHelper(const GSOffsetNew& off, int x, int y)
BNHelper(const GSOffset& off, int x, int y)
{
m_blockSwizzle = off.m_blockSwizzle;
int yAmt = ((y >> (off.m_pageShiftY - 5)) & ~0x1f) * off.m_bwPg;
@ -209,7 +178,7 @@ public:
int m_shift; ///< Amount to lshift page number to get element offset for the start of that page
public:
PAHelper() = default;
PAHelper(const GSOffsetNew& off, int x, int y)
PAHelper(const GSOffset& off, int x, int y)
{
m_pixelSwizzle = off.m_pixelSwizzle + ((y & off.m_pageMask.y) << off.m_pageShiftX);
m_pageBase = (off.m_bp >> 5) + (y >> off.m_pageShiftY) * off.m_bwPg;
@ -264,7 +233,7 @@ public:
}
/// Helper class for looping over the pages in a rect
/// Create with GSOffsetNew::pageLooperForRect
/// Create with GSOffset::pageLooperForRect
class PageLooper
{
int firstRowPgXStart, firstRowPgXEnd; ///< Offset of start/end pages of the first line from x=0 page (only line for textures that don't cross page boundaries)
@ -274,7 +243,7 @@ public:
int yInc; ///< Amount to add to bp when increasing y by one page
int yCnt; ///< Number of pages the rect covers in the y direction
friend class GSOffsetNew;
friend class GSOffset;
public:
/// Loop over pages, fn can return `false` to break the loop
@ -333,7 +302,7 @@ public:
class GSSwizzleInfo
{
friend class GSOffsetNew;
friend class GSOffset;
/// Table for storing swizzling of blocks within a page
const GSBlockSwizzleTable* m_blockSwizzle;
/// Table for storing swizzling of pixels within a page
@ -367,31 +336,31 @@ public:
/// Get the block number of the given pixel
uint32 bn(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffsetNew(*this, bp, bw, 0).bn(x, y);
return GSOffset(*this, bp, bw, 0).bn(x, y);
}
/// Get the address of the given pixel
uint32 pa(int x, int y, uint32 bp, uint32 bw) const
{
return GSOffsetNew(*this, bp, bw, 0).pa(x, y);
return GSOffset(*this, bp, bw, 0).pa(x, y);
}
/// Loop over all the pages in the given rect, calling `fn` on each
template <typename Fn>
void loopPages(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{
GSOffsetNew(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
GSOffset(*this, bp, bw, 0).loopPages(rect, std::forward<Fn>(fn));
}
/// Loop over all the blocks in the given rect, calling `fn` on each
template <typename Fn>
void loopBlocks(const GSVector4i& rect, uint32 bp, uint32 bw, Fn&& fn) const
{
GSOffsetNew(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
GSOffset(*this, bp, bw, 0).loopBlocks(rect, std::forward<Fn>(fn));
}
};
constexpr inline GSOffsetNew::GSOffsetNew(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
constexpr inline GSOffset::GSOffset(const GSSwizzleInfo& swz, uint32 bp, uint32 bw, uint32 psm)
: m_blockSwizzle(swz.m_blockSwizzle)
, m_pixelSwizzle(swz.m_pixelSwizzle + ((bp & 0x1f) << (swz.m_pageShiftX + swz.m_pageShiftY)))
, m_pageMask(swz.m_pageMask), m_blockMask(swz.m_blockMask)
@ -417,7 +386,7 @@ public:
typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTexture)(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
struct alignas(128) psm_t
@ -520,7 +489,7 @@ public:
GSLocalMemory();
virtual ~GSLocalMemory();
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSOffset GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
std::vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
@ -985,75 +954,35 @@ public:
WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c);
}
__forceinline void WritePixel32(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
void WritePixel32(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = s[x];
}
}
off.loopPixels(r, m_vm32, (uint32*)src, pitch, [&](uint32* dst, uint32* src) { *dst = *src; });
}
__forceinline void WritePixel24(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
void WritePixel24(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for (int y = r.top; y < r.bottom; y++, src += pitch)
off.loopPixels(r, m_vm32, (uint32*)src, pitch,
[&](uint32* dst, uint32* src)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = (d[col[x]] & 0xff000000) | (s[x] & 0x00ffffff);
}
}
*dst = (*dst & 0xff000000) | (*src & 0x00ffffff);
});
}
__forceinline void WritePixel16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
void WritePixel16(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{
src -= r.left * sizeof(uint16);
for (int y = r.top; y < r.bottom; y++, src += pitch)
{
uint16* RESTRICT s = (uint16*)src;
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
for (int x = r.left; x < r.right; x++)
{
d[col[x]] = s[x];
}
}
off.loopPixels(r, m_vm16, (uint16*)src, pitch, [&](uint16* dst, uint16* src) { *dst = *src; });
}
__forceinline void WriteFrame16(uint8* RESTRICT src, uint32 pitch, GSOffset* off, const GSVector4i& r)
void WriteFrame16(uint8* RESTRICT src, uint32 pitch, const GSOffset& off, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for (int y = r.top; y < r.bottom; y++, src += pitch)
off.loopPixels(r, m_vm16, (uint32*)src, pitch,
[&](uint16* dst, uint32* src)
{
uint32* RESTRICT s = (uint32*)src;
uint16* RESTRICT d = &m_vm16[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
uint32 rb = *src & 0x00f800f8;
uint32 ga = *src & 0x8000f800;
for (int x = r.left; x < r.right; x++)
{
uint32 rb = s[x] & 0x00f800f8;
uint32 ga = s[x] & 0x8000f800;
d[col[x]] = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
}
}
*dst = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
});
}
__forceinline uint32 ReadTexel32(uint32 addr, const GIFRegTEXA& TEXA) const
@ -1191,17 +1120,17 @@ public:
// * => 32
void ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
@ -1214,11 +1143,11 @@ public:
// pal ? 8 : 32
void ReadTexture8P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
@ -1229,31 +1158,31 @@ public:
//
template <typename T>
void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
//
void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h);
};
constexpr inline GSOffsetNew GSOffsetNew::fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm)
constexpr inline GSOffset GSOffset::fromKnownPSM(uint32 bp, uint32 bw, GS_PSM psm)
{
switch (psm)
{
case PSM_PSMCT32: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMCT24: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMCT16: return GSOffsetNew(GSLocalMemory::swizzle16, bp, bw, psm);
case PSM_PSMCT16S: return GSOffsetNew(GSLocalMemory::swizzle16S, bp, bw, psm);
case PSM_PSGPU24: return GSOffsetNew(GSLocalMemory::swizzle16, bp, bw, psm);
case PSM_PSMT8: return GSOffsetNew(GSLocalMemory::swizzle8, bp, bw, psm);
case PSM_PSMT4: return GSOffsetNew(GSLocalMemory::swizzle4, bp, bw, psm);
case PSM_PSMT8H: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMT4HL: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMT4HH: return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMZ32: return GSOffsetNew(GSLocalMemory::swizzle32Z, bp, bw, psm);
case PSM_PSMZ24: return GSOffsetNew(GSLocalMemory::swizzle32Z, bp, bw, psm);
case PSM_PSMZ16: return GSOffsetNew(GSLocalMemory::swizzle16Z, bp, bw, psm);
case PSM_PSMZ16S: return GSOffsetNew(GSLocalMemory::swizzle16SZ, bp, bw, psm);
case PSM_PSMCT32: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMCT24: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMCT16: return GSOffset(GSLocalMemory::swizzle16, bp, bw, psm);
case PSM_PSMCT16S: return GSOffset(GSLocalMemory::swizzle16S, bp, bw, psm);
case PSM_PSGPU24: return GSOffset(GSLocalMemory::swizzle16, bp, bw, psm);
case PSM_PSMT8: return GSOffset(GSLocalMemory::swizzle8, bp, bw, psm);
case PSM_PSMT4: return GSOffset(GSLocalMemory::swizzle4, bp, bw, psm);
case PSM_PSMT8H: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMT4HL: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMT4HH: return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
case PSM_PSMZ32: return GSOffset(GSLocalMemory::swizzle32Z, bp, bw, psm);
case PSM_PSMZ24: return GSOffset(GSLocalMemory::swizzle32Z, bp, bw, psm);
case PSM_PSMZ16: return GSOffset(GSLocalMemory::swizzle16Z, bp, bw, psm);
case PSM_PSMZ16S: return GSOffset(GSLocalMemory::swizzle16SZ, bp, bw, psm);
}
return GSOffsetNew(GSLocalMemory::swizzle32, bp, bw, psm);
return GSOffset(GSLocalMemory::swizzle32, bp, bw, psm);
}

View File

@ -1650,182 +1650,88 @@ void GSState::Move()
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
GSOffset* RESTRICT spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
GSOffset* RESTRICT dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
GSOffset spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
GSOffset dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
auto copy = [&](auto&& pxCopyFn)
{
if (xinc > 0)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
GSOffset::PAHelper s = spo.paMulti(sx, sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d.value(), s.value());
s.incX();
d.incX();
}
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
GSOffset::PAHelper s = spo.paMulti(sx, sy);
GSOffset::PAHelper d = dpo.paMulti(dx, dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d.value(), s.value());
s.decX();
d.decX();
}
}
}
};
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{
int* RESTRICT scol = &spo->pixel.col[0][sx];
int* RESTRICT dcol = &dpo->pixel.col[0][dx];
if (spsm.trbpp == 32)
{
if (xinc > 0)
copy([&](uint32 doff, uint32 soff)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x < w; x++)
d[dcol[x]] = s[scol[x]];
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x > -w; x--)
d[dcol[x]] = s[scol[x]];
}
}
m_mem.m_vm32[doff] = m_mem.m_vm32[soff];
});
}
else if (spsm.trbpp == 24)
{
if (xinc > 0)
copy([&](uint32 doff, uint32 soff)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x < w; x++)
d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for (int x = 0; x > -w; x--)
d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
}
uint32& d = m_mem.m_vm32[doff];
d = (d & 0xff000000) | (m_mem.m_vm32[soff] & 0x00ffffff);
});
}
else // if(spsm.trbpp == 16)
{
if (xinc > 0)
copy([&](uint32 doff, uint32 soff)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
for (int x = 0; x < w; x++)
d[dcol[x]] = s[scol[x]];
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
for (int x = 0; x > -w; x--)
d[dcol[x]] = s[scol[x]];
}
}
m_mem.m_vm16[doff] = m_mem.m_vm16[soff];
});
}
}
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
{
if (xinc > 0)
copy([&](uint32 doff, uint32 soff)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x < w; x++)
d[dcol[x]] = s[scol[x]];
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x > -w; x--)
d[dcol[x]] = s[scol[x]];
}
}
m_mem.m_vm8[doff] = m_mem.m_vm8[soff];
});
}
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
{
if (xinc > 0)
copy([&](uint32 doff, uint32 soff)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x < w; x++)
m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
}
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
});
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
copy([&](uint32 doff, uint32 soff)
{
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x > -w; x--)
m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
}
}
}
else
{
if (xinc > 0)
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x < w; x++)
(m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
}
}
else
{
for (int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for (int x = 0; x > -w; x--)
(m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
}
}
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
});
}
}

View File

@ -79,7 +79,7 @@ void GSTextureCache11::Read(Target* t, const GSVector4i& r)
{
// TODO: block level write
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
switch (TEX0.PSM)
{
@ -124,7 +124,7 @@ void GSTextureCache11::Read(Source* t, const GSVector4i& r)
if (offscreen->Map(m, &r_offscreen))
{
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -885,11 +885,8 @@ void GSRendererHW::SwSpriteRender()
InvalidateLocalMem(bitbltbuf, GSVector4i(sx, sy, sx + w, sy + h));
InvalidateVideoMem(bitbltbuf, GSVector4i(dx, dy, dx + w, dy + h));
GSOffset* RESTRICT spo = texture_mapping_enabled ? m_mem.GetOffset(bitbltbuf.SBP, bitbltbuf.SBW, bitbltbuf.SPSM) : nullptr;
GSOffset* RESTRICT dpo = m_mem.GetOffset(bitbltbuf.DBP, bitbltbuf.DBW, bitbltbuf.DPSM);
const int* RESTRICT scol = texture_mapping_enabled ? &spo->pixel.col[0][sx] : nullptr;
int* RESTRICT dcol = &dpo->pixel.col[0][dx];
GSOffset spo = texture_mapping_enabled ? m_mem.GetOffset(bitbltbuf.SBP, bitbltbuf.SBW, bitbltbuf.SPSM) : GSOffset();
GSOffset dpo = m_mem.GetOffset(bitbltbuf.DBP, bitbltbuf.DBW, bitbltbuf.DPSM);
const bool alpha_blending_enabled = PRIM->ABE;
@ -910,19 +907,27 @@ void GSRendererHW::SwSpriteRender()
for (int y = 0; y < h; y++, ++sy, ++dy)
{
const uint32* RESTRICT s = texture_mapping_enabled ? &m_mem.m_vm32[spo->pixel.row[sy]] : nullptr;
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
GSOffset::PAHelper spa = texture_mapping_enabled ? spo.paMulti(sx, sy) : GSOffset::PAHelper();
GSOffset::PAHelper dpa = dpo.paMulti(dx, dy);
ASSERT(w % 2 == 0);
for (int x = 0; x < w; x += 2)
{
uint32 di = dpa.value();
dpa.incX();
ASSERT(di + 1 == dpa.value()); // Destination pixel pair is adjacent in memory
dpa.incX();
GSVector4i sc;
if (texture_mapping_enabled)
{
uint32 si = spa.value();
spa.incX();
// Read 2 source pixel colors
ASSERT((scol[x] + 1) == scol[x + 1]); // Source pixel pair is adjacent in memory
sc = GSVector4i::loadl(&s[scol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
ASSERT((si + 1) == spa.value()); // Source pixel pair is adjacent in memory
spa.incX();
sc = GSVector4i::loadl(&m_mem.m_vm32[si]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
// Apply TFX
ASSERT(tex0_tfx == 0 || tex0_tfx == 1);
@ -943,8 +948,7 @@ void GSRendererHW::SwSpriteRender()
if (alpha_blending_enabled || fb_mask_enabled)
{
// Read 2 destination pixel colors
ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory
dc0 = GSVector4i::loadl(&d[dcol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
dc0 = GSVector4i::loadl(&m_mem.m_vm32[di]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
}
if (alpha_blending_enabled)
@ -999,8 +1003,7 @@ void GSRendererHW::SwSpriteRender()
// Store 2 pixel colors
dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr
ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory
GSVector4i::storel(&d[dcol[x]], dc);
GSVector4i::storel(&m_mem.m_vm32[di], dc);
}
}
}
@ -1806,7 +1809,7 @@ void GSRendererHW::OI_GsMemClear()
// Limit it further to a full screen 0 write
if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0)))
{
GSOffset* off = m_context->offset.fb;
const GSOffset& off = m_context->offset.fb;
const GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
// Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen
// but hopefully it will be enough.
@ -1824,12 +1827,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel32
for (int y = r.top; y < r.bottom; y++)
{
uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
GSOffset::PAHelper pa = off.paMulti(r.left, y);
for (int x = r.left; x < r.right; x++)
for (; pa.x() < r.right; pa.incX())
{
d[col[x]] = 0; // Here the constant color
m_mem.m_vm32[pa.value()] = 0; // Here the constant color
}
}
}
@ -1838,12 +1840,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel24
for (int y = r.top; y < r.bottom; y++)
{
uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
GSOffset::PAHelper pa = off.paMulti(r.left, y);
for (int x = r.left; x < r.right; x++)
for (; pa.x() < r.right; pa.incX())
{
d[col[x]] &= 0xff000000; // Clear the color
m_mem.m_vm32[pa.value()] &= 0xff000000; // Clear the color
}
}
}
@ -1854,12 +1855,11 @@ void GSRendererHW::OI_GsMemClear()
// Based on WritePixel16
for(int y = r.top; y < r.bottom; y++)
{
uint32* RESTRICT d = &m_mem.m_vm16[off->pixel.row[y]];
int* RESTRICT col = off->pixel.col[0];
GSOffset::PAHelper pa = off.paMulti(r.left, y);
for(int x = r.left; x < r.right; x++)
{
d[col[x]] = 0; // Here the constant color
m_mem.m_vm16[pa.value()] = 0; // Here the constant color
}
}
#endif

View File

@ -797,14 +797,11 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified
// Called each time you want to write to the GS memory
void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target)
void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& rect, bool target)
{
if (!off)
return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
uint32 bp = off->bp;
uint32 bw = off->bw;
uint32 psm = off->psm;
uint32 bp = off.bp();
uint32 bw = off.bw();
uint32 psm = off.psm();
if (!target)
{
@ -871,18 +868,12 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
}
}
GSVector4i r;
uint32* pages = (uint32*)m_temp;
off->GetPages(rect, pages, &r);
bool found = false;
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
{
uint32 page = *p;
GSVector4i r = rect.ralign<Align_Outside>((bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs);
off.loopPages(rect, [&](uint32 page)
{
auto& list = m_src.m_map[page];
for (auto i = list.begin(); i != list.end();)
{
@ -937,7 +928,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
}
}
}
}
});
if (!target)
return;
@ -1045,10 +1036,10 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
// Goal: retrive the data from the GPU to the GS memory.
// Called each time you want to read from the GS memory
void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r)
void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r)
{
uint32 bp = off->bp;
uint32 psm = off->psm;
uint32 bp = off.bp();
uint32 psm = off.psm();
//uint32 bw = off->bw;
// No depth handling please.
@ -1777,8 +1768,7 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
}
GSOffset* off = m_renderer->m_context->offset.tex;
m_pages_as_bit = off->GetPagesAsBits(m_TEX0);
m_pages = m_renderer->m_context->offset.tex.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
}
}
@ -1808,23 +1798,23 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
m_complete = true; // lame, but better than nothing
}
const GSOffset* off = m_renderer->m_context->offset.tex;
const GSOffset& off = m_renderer->m_context->offset.tex;
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
uint32 blocks = 0;
if (m_repeating)
{
for (int y = r.top; y < r.bottom; y += bs.y)
for (int y = r.top; y < r.bottom; y += bs.y, bn.nextBlockY())
{
uint32 base = off->block.row[y >> 3u];
for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
for (int x = r.left; x < r.right; bn.nextBlockX(), x += bs.x)
{
uint32 block = base + off->block.col[x >> 3u];
int i = (bn.blkY() << 7) + bn.blkX();
uint32 block = bn.valueNoWrap();
if (block < MAX_BLOCKS || m_wrap_gs_mem)
{
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 addr = i % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
@ -1843,13 +1833,11 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
}
else
{
for (int y = r.top; y < r.bottom; y += bs.y)
for (int y = r.top; y < r.bottom; y += bs.y, bn.nextBlockY())
{
uint32 base = off->block.row[y >> 3u];
for (int x = r.left; x < r.right; x += bs.x)
for (int x = r.left; x < r.right; x += bs.x, bn.nextBlockX())
{
uint32 block = base + off->block.col[x >> 3u];
uint32 block = bn.valueNoWrap();
if (block < MAX_BLOCKS || m_wrap_gs_mem)
{
@ -1951,7 +1939,7 @@ void GSTextureCache::Source::Flush(uint32 count, int layer)
GSLocalMemory& mem = m_renderer->m_mem;
const GSOffset* off = m_renderer->m_context->offset.tex;
const GSOffset& off = m_renderer->m_context->offset.tex;
GSLocalMemory::readTexture rtx = psm.rtx;
@ -2079,7 +2067,7 @@ void GSTextureCache::Target::Update()
GSTexture* t = m_renderer->m_dev->CreateTexture(w, h);
const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSOffset off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSTexture::GSMap m;
@ -2130,7 +2118,7 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect)
// GSTextureCache::SourceMap
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off)
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset& off)
{
m_surfaces.insert(s);
@ -2147,26 +2135,10 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
}
// The source pointer will be stored/duplicated in all m_map[array of pages]
for (size_t i = 0; i < countof(m_pages); i++)
s->m_pages.loopPages([this, s](uint32 page)
{
if (uint32 p = s->m_pages_as_bit[i])
{
auto* m = &m_map[i << 5];
auto* e = &s->m_erase_it[i << 5];
unsigned long j;
while (_BitScanForward(&j, p))
{
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
e[j] = m[j].InsertFront(s);
}
}
}
s->m_erase_it[page] = m_map[page].InsertFront(s);
});
}
void GSTextureCache::SourceMap::RemoveAll()
@ -2197,26 +2169,10 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
}
else
{
for (size_t i = 0; i < countof(m_pages); i++)
s->m_pages.loopPages([this, s](uint32 page)
{
if (uint32 p = s->m_pages_as_bit[i])
{
auto* m = &m_map[i << 5];
const auto* e = &s->m_erase_it[i << 5];
unsigned long j;
while (_BitScanForward(&j, p))
{
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
m[j].EraseIndex(e[j]);
}
}
}
m_map[page].EraseIndex(s->m_erase_it[page]);
});
}
delete s;

View File

@ -125,7 +125,7 @@ public:
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
std::array<uint16, MAX_PAGES> m_erase_it;
uint32* m_pages_as_bit;
GSOffset::PageLooper m_pages;
public:
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
@ -189,7 +189,7 @@ public:
memset(m_pages, 0, sizeof(m_pages));
}
void Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off);
void Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset& off);
void RemoveAll();
void RemovePartial();
void RemoveAt(Source* s);
@ -249,8 +249,8 @@ public:
void InvalidateVideoMemType(int type, uint32 bp);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(GSOffset* off, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(GSOffset* off, const GSVector4i& r);
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r);
void IncAge();
bool UserHacks_HalfPixelOffset;

View File

@ -78,7 +78,7 @@ void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r)
{
// TODO: block level write
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
switch (TEX0.PSM)
{
@ -125,7 +125,7 @@ void GSTextureCacheOGL::Read(Source* t, const GSVector4i& r)
if (offscreen->Map(m, &r_offscreen))
{
GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
GSOffset off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);

View File

@ -2811,31 +2811,28 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
if (m != 0xffffffff)
{
const int* zbr = m_global.zbr;
const int* zbc = m_global.zbc;
uint32 z = v.t.u32[3]; // (uint32)v.p.z;
if (m_global.sel.zpsm != 2)
{
if (m == 0)
{
DrawRectT<uint32, false>(zbr, zbc, r, z, m);
DrawRectT<uint32, false>(m_global.zbo, r, z, m);
}
else
{
DrawRectT<uint32, true>(zbr, zbc, r, z, m);
DrawRectT<uint32, true>(m_global.zbo, r, z, m);
}
}
else
{
if ((m & 0xffff) == 0)
{
DrawRectT<uint16, false>(zbr, zbc, r, z, m);
DrawRectT<uint16, false>(m_global.zbo, r, z, m);
}
else
{
DrawRectT<uint16, true>(zbr, zbc, r, z, m);
DrawRectT<uint16, true>(m_global.zbo, r, z, m);
}
}
}
@ -2848,9 +2845,6 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
if (m != 0xffffffff)
{
const int* fbr = m_global.fbr;
const int* fbc = m_global.fbc;
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
if (m_global.sel.fba)
@ -2862,11 +2856,11 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
{
if (m == 0)
{
DrawRectT<uint32, false>(fbr, fbc, r, c, m);
DrawRectT<uint32, false>(m_global.fbo, r, c, m);
}
else
{
DrawRectT<uint32, true>(fbr, fbc, r, c, m);
DrawRectT<uint32, true>(m_global.fbo, r, c, m);
}
}
else
@ -2875,18 +2869,18 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
if ((m & 0xffff) == 0)
{
DrawRectT<uint16, false>(fbr, fbc, r, c, m);
DrawRectT<uint16, false>(m_global.fbo, r, c, m);
}
else
{
DrawRectT<uint16, true>(fbr, fbc, r, c, m);
DrawRectT<uint16, true>(m_global.fbo, r, c, m);
}
}
}
}
template <class T, bool masked>
void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
{
if (m == 0xffffffff)
return;
@ -2921,25 +2915,25 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
if (!br.rempty())
{
FillRect<T, masked>(row, col, GSVector4i(r.x, r.y, r.z, br.y), c, m);
FillRect<T, masked>(row, col, GSVector4i(r.x, br.w, r.z, r.w), c, m);
FillRect<T, masked>(off, GSVector4i(r.x, r.y, r.z, br.y), c, m);
FillRect<T, masked>(off, GSVector4i(r.x, br.w, r.z, r.w), c, m);
if (r.x < br.x || br.z < r.z)
{
FillRect<T, masked>(row, col, GSVector4i(r.x, br.y, br.x, br.w), c, m);
FillRect<T, masked>(row, col, GSVector4i(br.z, br.y, r.z, br.w), c, m);
FillRect<T, masked>(off, GSVector4i(r.x, br.y, br.x, br.w), c, m);
FillRect<T, masked>(off, GSVector4i(br.z, br.y, r.z, br.w), c, m);
}
FillBlock<T, masked>(row, col, br, color, mask);
FillBlock<T, masked>(off, br, color, mask);
}
else
{
FillRect<T, masked>(row, col, r, c, m);
FillRect<T, masked>(off, r, c, m);
}
}
template <class T, bool masked>
void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
{
if (r.x >= r.z)
return;
@ -2948,11 +2942,12 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
for (int y = r.y; y < r.w; y++)
{
T* RESTRICT d = &vm[row[y]];
GSOffset::PAHelper pa = off.paMulti(r.x, y);
for (int x = r.x; x < r.z; x++)
for (; pa.x() < r.z; pa.incX())
{
d[col[x]] = (T)(!masked ? c : (c | (d[col[x]] & m)));
T& d = vm[pa.value()];
d = (T)(!masked ? c : (c | (d & m)));
}
}
}
@ -2960,7 +2955,7 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
#if _M_SSE >= 0x501
template <class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
{
if (r.x >= r.z)
return;
@ -2969,11 +2964,9 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
for (int y = r.y; y < r.w; y += 8)
{
T* RESTRICT d = &vm[row[y]];
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector8i* RESTRICT p = (GSVector8i*)&d[col[x]];
GSVector8i* RESTRICT p = (GSVector8i*)&vm[off.pa(x, y)];
p[0] = !masked ? c : (c | (p[0] & m));
p[1] = !masked ? c : (c | (p[1] & m));
@ -2990,7 +2983,7 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
#else
template <class T, bool masked>
void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
{
if (r.x >= r.z)
return;
@ -2999,11 +2992,9 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
for (int y = r.y; y < r.w; y += 8)
{
T* RESTRICT d = &vm[row[y]];
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector4i* RESTRICT p = (GSVector4i*)&d[col[x]];
GSVector4i* RESTRICT p = (GSVector4i*)&vm[off.pa(x, y)];
for (int i = 0; i < 16; i += 4)
{

View File

@ -38,20 +38,20 @@ protected:
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
template <class T, bool masked>
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
void DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
template <class T, bool masked>
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
__forceinline void FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
#if _M_SSE >= 0x501
template <class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
__forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
#else
template <class T, bool masked>
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
__forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
#endif

View File

@ -409,17 +409,20 @@ void GSRendererSW::Draw()
// GSScanlineGlobalData& gd = sd->global;
uint32* fb_pages = NULL;
uint32* zb_pages = NULL;
GSOffset::PageLooper* fb_pages = NULL;
GSOffset::PageLooper* zb_pages = NULL;
GSOffset::PageLooper _fb_pages, _zb_pages;
if (sd->global.sel.fb)
{
fb_pages = m_context->offset.fb->GetPages(r);
_fb_pages = m_context->offset.fb.pageLooperForRect(r);
fb_pages = &_fb_pages;
}
if (sd->global.sel.zb)
{
zb_pages = m_context->offset.zb->GetPages(r);
_zb_pages = m_context->offset.zb.pageLooperForRect(r);
zb_pages = &_zb_pages;
}
// check if there is an overlap between this and previous targets
@ -438,7 +441,7 @@ void GSRendererSW::Draw()
// addref source and target pages
sd->UsePages(fb_pages, m_context->offset.fb->psm, zb_pages, m_context->offset.zb->psm);
sd->UsePages(fb_pages, m_context->offset.fb.psm(), zb_pages, m_context->offset.zb.psm());
//
@ -641,26 +644,26 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
fflush(s_fp);
}
GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
off->GetPages(r, m_tmp_pages);
GSOffset off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
GSOffset::PageLooper pages = off.pageLooperForRect(r);
// check if the changing pages either used as a texture or a target
if (!m_rl->IsSynced())
{
for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++)
pages.loopPagesWithBreak([&](uint32 page)
{
if (m_fzb_pages[*p] | m_tex_pages[*p])
if (m_fzb_pages[page] | m_tex_pages[page])
{
Sync(6);
break;
}
return false;
}
return true;
});
}
m_tc->InvalidatePages(m_tmp_pages, off->psm); // if texture update runs on a thread and Sync(5) happens then this must come later
m_tc->InvalidatePages(pages, off.psm()); // if texture update runs on a thread and Sync(5) happens then this must come later
}
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
@ -673,77 +676,92 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
if (!m_rl->IsSynced())
{
GSOffset* off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
GSOffset off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
GSOffset::PageLooper pages = off.pageLooperForRect(r);
off->GetPages(r, m_tmp_pages);
for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++)
pages.loopPagesWithBreak([&](uint32 page)
{
if (m_fzb_pages[*p])
if (m_fzb_pages[page])
{
Sync(7);
break;
}
return false;
}
return true;
});
}
}
void GSRendererSW::UsePages(const uint32* pages, const int type)
void GSRendererSW::UsePages(const GSOffset::PageLooper& pages, const int type)
{
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
pages.loopPages([=](uint32 page)
{
switch (type)
{
case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX);
m_fzb_pages[*p] += 1;
ASSERT((m_fzb_pages[page] & 0xFFFF) < USHRT_MAX);
m_fzb_pages[page] += 1;
break;
case 1:
ASSERT((m_fzb_pages[*p] >> 16) < USHRT_MAX);
m_fzb_pages[*p] += 0x10000;
ASSERT((m_fzb_pages[page] >> 16) < USHRT_MAX);
m_fzb_pages[page] += 0x10000;
break;
case 2:
ASSERT(m_tex_pages[*p] < USHRT_MAX);
m_tex_pages[*p] += 1;
ASSERT(m_tex_pages[page] < USHRT_MAX);
m_tex_pages[page] += 1;
break;
default:
break;
}
}
});
}
void GSRendererSW::ReleasePages(const uint32* pages, const int type)
void GSRendererSW::ReleasePages(const GSOffset::PageLooper& pages, const int type)
{
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
pages.loopPages([=](uint32 page)
{
switch (type)
{
case 0:
ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0);
m_fzb_pages[*p] -= 1;
ASSERT((m_fzb_pages[page] & 0xFFFF) > 0);
m_fzb_pages[page] -= 1;
break;
case 1:
ASSERT((m_fzb_pages[*p] >> 16) > 0);
m_fzb_pages[*p] -= 0x10000;
ASSERT((m_fzb_pages[page] >> 16) > 0);
m_fzb_pages[page] -= 0x10000;
break;
case 2:
ASSERT(m_tex_pages[*p] > 0);
m_tex_pages[*p] -= 1;
ASSERT(m_tex_pages[page] > 0);
m_tex_pages[page] -= 1;
break;
default:
break;
}
}
});
}
bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r)
bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const GSOffset::PageLooper* zb_pages, const GSVector4i& r)
{
bool synced = m_rl->IsSynced();
bool fb = fb_pages != NULL;
bool zb = zb_pages != NULL;
GSOffset::PageLooper _fb_pages, _zb_pages;
auto requirePages = [&]
{
if (fb_pages == NULL)
{
_fb_pages = m_context->offset.fb.pageLooperForRect(r);
fb_pages = &_fb_pages;
}
if (zb_pages == NULL)
{
_zb_pages = m_context->offset.zb.pageLooperForRect(r);
zb_pages = &_zb_pages;
}
};
bool res = false;
if (m_fzb != m_context->offset.fzb4)
@ -753,17 +771,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
m_fzb = m_context->offset.fzb4;
m_fzb_bbox = r;
if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r);
if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r);
memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages));
uint32 used = 0;
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
{
uint32 i = *p;
requirePages();
fb_pages->loopPages([&](uint32 i)
{
uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
@ -771,12 +786,10 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i];
used |= m_tex_pages[i];
}
});
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
zb_pages->loopPages([&](uint32 i)
{
uint32 i = *p;
uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
@ -784,7 +797,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i];
used |= m_tex_pages[i];
}
});
if (!synced)
{
@ -816,15 +829,12 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
{
// drawing area is larger than previous time, check new parts only to avoid false positives (m_fzb_cur_pages guards)
if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r);
if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r);
requirePages();
uint32 used = 0;
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
fb_pages->loopPages([&](uint32 i)
{
uint32 i = *p;
uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
@ -834,12 +844,10 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i];
}
}
});
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
zb_pages->loopPages([&](uint32 i)
{
uint32 i = *p;
uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
@ -849,7 +857,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
used |= m_fzb_pages[i];
}
}
});
if (!synced)
{
@ -873,9 +881,9 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
if (fb && !res)
{
for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
fb_pages->loopPagesWithBreak([&](uint32 page)
{
if (m_fzb_pages[*p] & 0xffff0000)
if (m_fzb_pages[page] & 0xffff0000)
{
if (LOG)
{
@ -885,16 +893,17 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
res = true;
break;
}
return false;
}
return true;
});
}
if (zb && !res)
{
for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
zb_pages->loopPagesWithBreak([&](uint32 page)
{
if (m_fzb_pages[*p] & 0x0000ffff)
if (m_fzb_pages[page] & 0x0000ffff)
{
if (LOG)
{
@ -904,15 +913,13 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
res = true;
break;
return false;
}
return true;
});
}
}
}
}
}
if (!fb && fb_pages != NULL) delete[] fb_pages;
if (!zb && zb_pages != NULL) delete[] zb_pages;
return res;
}
@ -923,19 +930,22 @@ bool GSRendererSW::CheckSourcePages(SharedData* sd)
{
for (size_t i = 0; sd->m_tex[i].t != NULL; i++)
{
sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages);
GSOffset::PageLooper pages = sd->m_tex[i].t->m_offset.pageLooperForRect(sd->m_tex[i].r);
uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n;
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
bool ret = false;
pages.loopPagesWithBreak([&](uint32 pages)
{
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
if (m_fzb_pages[*p]) // currently being drawn to? => sync
if (m_fzb_pages[pages]) // currently being drawn to? => sync
{
ret = true;
return false;
}
return true;
});
if (ret)
return true;
}
}
}
}
@ -954,10 +964,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.vm = m_mem.m_vm8;
gd.fbr = context->offset.fb->pixel.row;
gd.zbr = context->offset.zb->pixel.row;
gd.fbc = context->offset.fb->pixel.col[0];
gd.zbc = context->offset.zb->pixel.col[0];
gd.fbo = context->offset.fb;
gd.zbo = context->offset.zb;
gd.fzbr = context->offset.fzb4->row;
gd.fzbc = context->offset.fzb4->col;
@ -1423,8 +1431,6 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
: m_parent(parent)
, m_fb_pages(NULL)
, m_zb_pages(NULL)
, m_fpsm(0)
, m_zpsm(0)
, m_using_pages(false)
@ -1460,7 +1466,7 @@ GSRendererSW::SharedData::~SharedData()
//static TransactionScope::Lock s_lock;
void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm)
void GSRendererSW::SharedData::UsePages(const GSOffset::PageLooper* fb_pages, int fpsm, const GSOffset::PageLooper* zb_pages, int zpsm)
{
if (m_using_pages)
return;
@ -1468,24 +1474,26 @@ void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const
{
//TransactionScope scope(s_lock);
if (global.sel.fb && fb_pages != NULL)
if (global.sel.fb)
{
m_parent->UsePages(fb_pages, 0);
m_parent->UsePages(*fb_pages, 0);
}
if (global.sel.zb && zb_pages != NULL)
if (global.sel.zb)
{
m_parent->UsePages(zb_pages, 1);
m_parent->UsePages(*zb_pages, 1);
}
for (size_t i = 0; m_tex[i].t != NULL; i++)
{
m_parent->UsePages(m_tex[i].t->m_pages.n, 2);
m_parent->UsePages(m_tex[i].t->m_pages, 2);
}
}
m_fb_pages = fb_pages;
m_zb_pages = zb_pages;
if (fb_pages)
m_fb_pages = *fb_pages;
if (zb_pages)
m_zb_pages = *zb_pages;
m_fpsm = fpsm;
m_zpsm = zpsm;
@ -1512,16 +1520,10 @@ void GSRendererSW::SharedData::ReleasePages()
for (size_t i = 0; m_tex[i].t != NULL; i++)
{
m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2);
m_parent->ReleasePages(m_tex[i].t->m_pages, 2);
}
}
delete[] m_fb_pages;
delete[] m_zb_pages;
m_fb_pages = NULL;
m_zb_pages = NULL;
m_using_pages = false;
}

View File

@ -35,8 +35,8 @@ class GSRendererSW : public GSRenderer
public:
GSRendererSW* m_parent;
const uint32* m_fb_pages;
const uint32* m_zb_pages;
GSOffset::PageLooper m_fb_pages;
GSOffset::PageLooper m_zb_pages;
int m_fpsm;
int m_zpsm;
bool m_using_pages;
@ -52,7 +52,7 @@ class GSRendererSW : public GSRenderer
SharedData(GSRendererSW* parent);
virtual ~SharedData();
void UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm);
void UsePages(const GSOffset::PageLooper* fb_pages, int fpsm, const GSOffset::PageLooper* zb_pages, int zpsm);
void ReleasePages();
void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level);
@ -76,7 +76,6 @@ protected:
uint32 m_fzb_cur_pages[16];
std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
std::atomic<uint16> m_tex_pages[512];
uint32 m_tmp_pages[512 + 1];
void Reset();
void VSync(int field);
@ -90,10 +89,10 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UsePages(const uint32* pages, const int type);
void ReleasePages(const uint32* pages, const int type);
void UsePages(const GSOffset::PageLooper& pages, const int type);
void ReleasePages(const GSOffset::PageLooper& pages, const int type);
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
bool CheckTargetPages(const GSOffset::PageLooper* fb_pages, const GSOffset::PageLooper* zb_pages, const GSVector4i& r);
bool CheckSourcePages(SharedData* sd);
bool GetScanlineGlobalData(SharedData* data);

View File

@ -127,10 +127,8 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p
uint32* clut;
GSVector4i* dimx;
const int* fbr;
const int* zbr;
const int* fbc;
const int* zbc;
GSOffset fbo;
GSOffset zbo;
const GSVector2i* fzbr;
const GSVector2i* fzbc;

View File

@ -62,21 +62,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
m_textures.insert(t);
for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
t->m_pages.loopPages([&](uint32 page)
{
const uint32 page = *p;
t->m_erase_it[page] = m_map[page].InsertFront(t);
}
});
return t;
}
void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
void GSTextureCacheSW::InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm)
{
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
pages.loopPages([&](uint32 page)
{
const uint32 page = *p;
for (Texture* t : m_map[page])
{
if (GSUtil::HasSharedBits(psm, t->m_sharedbits))
@ -98,7 +95,7 @@ void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
t->m_complete = false;
}
}
}
});
}
void GSTextureCacheSW::RemoveAll()
@ -124,11 +121,10 @@ void GSTextureCacheSW::IncAge()
{
i = m_textures.erase(i);
for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
t->m_pages.loopPages([&](uint32 page)
{
const uint32 page = *p;
m_map[page].EraseIndex(t->m_erase_it[page]);
}
});
delete t;
}
@ -162,9 +158,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
memcpy(m_pages.bm, m_offset->GetPagesAsBits(TEX0), sizeof(m_pages.bm));
m_pages = m_offset.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower
@ -176,8 +170,6 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
GSTextureCacheSW::Texture::~Texture()
{
delete[] m_pages.n;
if (m_buff)
{
_aligned_free(m_buff);
@ -223,7 +215,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
GSLocalMemory& mem = m_state->m_mem;
const GSOffset* RESTRICT off = m_offset;
GSOffset off = m_offset;
uint32 blocks = 0;
@ -235,22 +227,20 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
int block_pitch = pitch * bs.y;
r = r.srl32(3);
shift += off.blockShiftX();
int bottom = r.bottom >> off.blockShiftY();
int right = r.right >> off.blockShiftX();
bs.x >>= 3;
bs.y >>= 3;
shift += 3;
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
if (m_repeating)
{
for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
for (; bn.blkY() < bottom; bn.nextBlockY(), dst += block_pitch)
{
uint32 base = off->block.row[y];
for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
for (; bn.blkX() < right; bn.nextBlockX())
{
uint32 block = (base + off->block.col[x]) % MAX_BLOCKS;
int i = (bn.blkY() << 7) + bn.blkX();
uint32 block = bn.value();
uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
@ -259,7 +249,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA);
(mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
blocks++;
}
@ -268,13 +258,11 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
}
else
{
for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
for (; bn.blkY() < bottom; bn.nextBlockY(), dst += block_pitch)
{
uint32 base = off->block.row[y];
for (int x = r.left; x < r.right; x += bs.x)
for (; bn.blkX() < right; bn.nextBlockX())
{
uint32 block = (base + off->block.col[x]) % MAX_BLOCKS;
uint32 block = bn.value();
uint32 row = block >> 5;
uint32 col = 1 << (block & 31);
@ -283,7 +271,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
{
m_valid[row] |= col;
(mem.*rtxbP)(block, &dst[x << shift], pitch, m_TEXA);
(mem.*rtxbP)(block, &dst[bn.blkX() << shift], pitch, m_TEXA);
blocks++;
}

View File

@ -25,7 +25,8 @@ public:
{
public:
GSState* m_state;
GSOffset* m_offset;
GSOffset m_offset;
GSOffset::PageLooper m_pages;
GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA;
void* m_buff;
@ -36,7 +37,6 @@ public:
std::vector<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES];
std::array<uint16, MAX_PAGES> m_erase_it;
struct { uint32 bm[16]; const uint32* n; } m_pages;
const uint32* RESTRICT m_sharedbits;
// m_valid
@ -61,7 +61,7 @@ public:
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidatePages(const uint32* pages, uint32 psm);
void InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm);
void RemoveAll();
void IncAge();

View File

@ -21,7 +21,7 @@
#include "GSLocalMemory.h"
GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
GSOffset GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
{
abort();
}