mirror of https://github.com/PCSX2/pcsx2.git
GSdx: sps fixed, some code clean up and optimization, ps2 logo still broken in hw mode, I'll check it later
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5062 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
872301fbeb
commit
2eec75c2ae
|
@ -35,9 +35,9 @@ GPUDrawScanline::~GPUDrawScanline()
|
|||
{
|
||||
}
|
||||
|
||||
void GPUDrawScanline::BeginDraw(const void* param)
|
||||
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||
{
|
||||
memcpy(&m_global, param, sizeof(m_global));
|
||||
memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
|
||||
|
||||
if(m_global.sel.tme && m_global.sel.twin)
|
||||
{
|
||||
|
@ -83,7 +83,7 @@ void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
|
|||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
||||
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
|
||||
void GPUDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan)
|
||||
{
|
||||
GPUScanlineSelector sel = m_global.sel;
|
||||
|
||||
|
@ -93,7 +93,7 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
|
|||
{
|
||||
if(sel.sprite)
|
||||
{
|
||||
GSVector4i t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
|
||||
GSVector4i t = (GSVector4i(vertex.t) >> 8) - GSVector4i::x00000001();
|
||||
|
||||
t = t.ps32(t);
|
||||
t = t.upl16(t);
|
||||
|
|
|
@ -29,6 +29,25 @@
|
|||
|
||||
class GPUDrawScanline : public IDrawScanline
|
||||
{
|
||||
public:
|
||||
class SharedData : public GSRasterizerData
|
||||
{
|
||||
public:
|
||||
GPUScanlineGlobalData global;
|
||||
|
||||
public:
|
||||
SharedData()
|
||||
{
|
||||
global.clut = NULL;
|
||||
}
|
||||
|
||||
virtual ~SharedData()
|
||||
{
|
||||
if(global.clut) _aligned_free(global.clut);
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
GPUScanlineGlobalData m_global;
|
||||
GPUScanlineLocalData m_local;
|
||||
|
||||
|
@ -41,12 +60,12 @@ public:
|
|||
|
||||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const void* param);
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(uint64 frame, uint64 ticks, int pixels);
|
||||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
||||
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan);
|
||||
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
|
|
@ -69,9 +69,11 @@ GSTexture* GPURendererSW::GetOutput()
|
|||
|
||||
void GPURendererSW::Draw()
|
||||
{
|
||||
shared_ptr<GSRasterizerData> data(new GPURasterizerData());
|
||||
GPUDrawScanline::SharedData* sd = new GPUDrawScanline::SharedData();
|
||||
|
||||
GPUScanlineGlobalData& gd = *(GPUScanlineGlobalData*)data->param;
|
||||
shared_ptr<GSRasterizerData> data(sd);
|
||||
|
||||
GPUScanlineGlobalData& gd = sd->global;
|
||||
|
||||
const GPUDrawingEnvironment& env = m_env;
|
||||
|
||||
|
@ -169,7 +171,7 @@ void GPURendererSW::Draw()
|
|||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
m_perfmon.Put(GSPerfMon::Prim, prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, data->pixels);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
|
||||
}
|
||||
|
||||
void GPURendererSW::VertexKick()
|
||||
|
|
|
@ -26,28 +26,6 @@
|
|||
|
||||
class GPURendererSW : public GPURendererT<GSVertexSW>
|
||||
{
|
||||
class GPURasterizerData : public GSRasterizerData
|
||||
{
|
||||
public:
|
||||
GPURasterizerData()
|
||||
{
|
||||
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)_aligned_malloc(sizeof(GPUScanlineGlobalData), 32);
|
||||
|
||||
gd->clut = NULL;
|
||||
|
||||
param = gd;
|
||||
}
|
||||
|
||||
virtual ~GPURasterizerData()
|
||||
{
|
||||
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)param;
|
||||
|
||||
if(gd->clut) _aligned_free(gd->clut);
|
||||
|
||||
_aligned_free(gd);
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
IRasterizer* m_rl;
|
||||
GSTexture* m_texture;
|
||||
|
|
|
@ -1021,7 +1021,6 @@ REG128_(GIFPacked, XYZF2)
|
|||
uint16 _PAD1;
|
||||
uint16 Y;
|
||||
uint16 _PAD2;
|
||||
|
||||
uint32 _PAD3:4;
|
||||
uint32 Z:24;
|
||||
uint32 _PAD4:4;
|
||||
|
@ -1097,19 +1096,24 @@ __aligned(struct, 32) GIFPath
|
|||
GSVector4i::store<true>(&tag, v);
|
||||
reg = 0;
|
||||
regs = v.uph8(v >> 4) & 0x0f0f0f0f;
|
||||
nreg = tag.NREG;
|
||||
nreg = tag.NREG ? tag.NREG : 16;
|
||||
nloop = tag.NLOOP;
|
||||
adonly = nreg == 1 && regs.u8[0] == GIF_REG_A_D;
|
||||
adonly = regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1;
|
||||
}
|
||||
|
||||
__forceinline uint8 GetReg()
|
||||
{
|
||||
return regs.u8[reg]; // GET_GIF_REG(tag, reg);
|
||||
return regs.u8[reg];
|
||||
}
|
||||
|
||||
__forceinline uint8 GetReg(uint32 index)
|
||||
{
|
||||
return regs.u8[index];
|
||||
}
|
||||
|
||||
__forceinline bool StepReg()
|
||||
{
|
||||
if((++reg & 0xf) == nreg)
|
||||
if(++reg == nreg)
|
||||
{
|
||||
reg = 0;
|
||||
|
||||
|
|
|
@ -43,16 +43,31 @@ class GSBlock
|
|||
|
||||
public:
|
||||
template<int i, bool aligned, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
||||
{
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
if(aligned)
|
||||
{
|
||||
const GSVector4i* s0 = (const GSVector4i*)&src[srcpitch * 0];
|
||||
const GSVector4i* s1 = (const GSVector4i*)&src[srcpitch * 1];
|
||||
|
||||
GSVector4i v0 = GSVector4i::load<aligned>(&s0[0]);
|
||||
GSVector4i v1 = GSVector4i::load<aligned>(&s0[1]);
|
||||
GSVector4i v2 = GSVector4i::load<aligned>(&s1[0]);
|
||||
GSVector4i v3 = GSVector4i::load<aligned>(&s1[1]);
|
||||
v0 = GSVector4i::load<aligned>(&s0[0]);
|
||||
v1 = GSVector4i::load<aligned>(&s0[1]);
|
||||
v2 = GSVector4i::load<aligned>(&s1[0]);
|
||||
v3 = GSVector4i::load<aligned>(&s1[1]);
|
||||
|
||||
GSVector4i::sw64(v0, v2, v1, v3);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint8* s0 = &src[srcpitch * 0];
|
||||
const uint8* s1 = &src[srcpitch * 1];
|
||||
|
||||
v0 = GSVector4i::load(&s0[0], &s1[0]);
|
||||
v1 = GSVector4i::load(&s0[8], &s1[8]);
|
||||
v2 = GSVector4i::load(&s0[16], &s1[16]);
|
||||
v3 = GSVector4i::load(&s0[24], &s1[24]);
|
||||
}
|
||||
|
||||
if(mask == 0xffffffff)
|
||||
{
|
||||
|
@ -263,15 +278,27 @@ public:
|
|||
}
|
||||
|
||||
template<int i, bool aligned> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
||||
{
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
if(aligned)
|
||||
{
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
GSVector4i v0 = s[i * 4 + 0];
|
||||
GSVector4i v1 = s[i * 4 + 1];
|
||||
GSVector4i v2 = s[i * 4 + 2];
|
||||
GSVector4i v3 = s[i * 4 + 3];
|
||||
v0 = s[i * 4 + 0];
|
||||
v1 = s[i * 4 + 1];
|
||||
v2 = s[i * 4 + 2];
|
||||
v3 = s[i * 4 + 3];
|
||||
|
||||
GSVector4i::sw64(v0, v1, v2, v3);
|
||||
}
|
||||
else
|
||||
{
|
||||
v0 = GSVector4i::load(&src[i * 64 + 0], &src[i * 64 + 16]);
|
||||
v1 = GSVector4i::load(&src[i * 64 + 32], &src[i * 64 + 48]);
|
||||
v2 = GSVector4i::load(&src[i * 64 + 8], &src[i * 64 + 24]);
|
||||
v3 = GSVector4i::load(&src[i * 64 + 40], &src[i * 64 + 56]);
|
||||
}
|
||||
|
||||
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
|
||||
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
|
||||
|
|
|
@ -36,9 +36,9 @@ GSDrawScanline::~GSDrawScanline()
|
|||
{
|
||||
}
|
||||
|
||||
void GSDrawScanline::BeginDraw(const void* param)
|
||||
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||
{
|
||||
memcpy(&m_global, param, sizeof(m_global));
|
||||
memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
|
||||
|
||||
if(m_global.sel.mmin && m_global.sel.lcm)
|
||||
{
|
||||
|
@ -102,6 +102,8 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
|
|||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
||||
// FIXME: something's not right with the sky in burnout 3
|
||||
|
||||
void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan)
|
||||
{
|
||||
GSScanlineSelector sel = m_global.sel;
|
||||
|
|
|
@ -29,6 +29,14 @@
|
|||
|
||||
class GSDrawScanline : public IDrawScanline
|
||||
{
|
||||
public:
|
||||
class SharedData : public GSRasterizerData
|
||||
{
|
||||
public:
|
||||
GSScanlineGlobalData global;
|
||||
};
|
||||
|
||||
protected:
|
||||
GSScanlineGlobalData m_global;
|
||||
GSScanlineLocalData m_local;
|
||||
|
||||
|
@ -50,14 +58,14 @@ public:
|
|||
|
||||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const void* param);
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(uint64 frame, uint64 ticks, int pixels);
|
||||
|
||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
||||
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan);
|
||||
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
|
||||
|
|
|
@ -1301,13 +1301,13 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
{
|
||||
if(len <= 0) return;
|
||||
|
||||
uint8* pb = (uint8*)dst;
|
||||
uint16* pw = (uint16*)dst;
|
||||
uint32* pd = (uint32*)dst;
|
||||
uint8* RESTRICT pb = (uint8*)dst;
|
||||
uint16* RESTRICT pw = (uint16*)dst;
|
||||
uint32* RESTRICT pd = (uint32*)dst;
|
||||
|
||||
uint32 bp = BITBLTBUF.SBP;
|
||||
uint32 bw = BITBLTBUF.SBW;
|
||||
psm_t* psm = &m_psm[BITBLTBUF.SPSM];
|
||||
psm_t* RESTRICT psm = &m_psm[BITBLTBUF.SPSM];
|
||||
|
||||
int x = tx;
|
||||
int y = ty;
|
||||
|
@ -1319,16 +1319,26 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
case PSM_PSMCT32:
|
||||
case PSM_PSMZ32:
|
||||
|
||||
// MGS1 intro, fade effect between two scenes (airplane outside-inside transition)
|
||||
|
||||
len /= 4;
|
||||
|
||||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
|
||||
|
||||
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pd += 4)
|
||||
{
|
||||
pd[0] = ps[offset[x + 0]];
|
||||
pd[1] = ps[offset[x + 1]];
|
||||
pd[2] = ps[offset[x + 2]];
|
||||
pd[3] = ps[offset[x + 3]];
|
||||
}
|
||||
|
||||
for(; len > 0 && x < ex; len--, x++, pd++)
|
||||
{
|
||||
*pd = ReadPixel32(addr + offset[x]);
|
||||
*pd = ps[offset[x]];
|
||||
}
|
||||
|
||||
if(x == ex) {x = sx; y++;}
|
||||
|
@ -1343,16 +1353,16 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
|
||||
|
||||
for(; len > 0 && x < ex; len--, x++, pb += 3)
|
||||
{
|
||||
uint32 c = ReadPixel32(addr + offset[x]);
|
||||
uint32 c = ps[offset[x]];
|
||||
|
||||
pb[0] = ((uint8*)&c)[0];
|
||||
pb[1] = ((uint8*)&c)[1];
|
||||
pb[2] = ((uint8*)&c)[2];
|
||||
pb[0] = (uint8)(c);
|
||||
pb[1] = (uint8)(c >> 8);
|
||||
pb[2] = (uint8)(c >> 16);
|
||||
}
|
||||
|
||||
if(x == ex) {x = sx; y++;}
|
||||
|
@ -1369,12 +1379,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||
uint16* RESTRICT ps = &m_vm16[psm->pa(0, y, bp, bw)];
|
||||
|
||||
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4)
|
||||
{
|
||||
pw[0] = ps[offset[x + 0]];
|
||||
pw[1] = ps[offset[x + 1]];
|
||||
pw[2] = ps[offset[x + 2]];
|
||||
pw[3] = ps[offset[x + 3]];
|
||||
}
|
||||
|
||||
for(; len > 0 && x < ex; len--, x++, pw++)
|
||||
{
|
||||
*pw = ReadPixel16(addr + offset[x]);
|
||||
*pw = ps[offset[x]];
|
||||
}
|
||||
|
||||
if(x == ex) {x = sx; y++;}
|
||||
|
@ -1386,12 +1404,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||
uint8* RESTRICT ps = &m_vm8[psm->pa(0, y, bp, bw)];
|
||||
|
||||
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
|
||||
{
|
||||
pb[0] = ps[offset[x + 0]];
|
||||
pb[1] = ps[offset[x + 1]];
|
||||
pb[2] = ps[offset[x + 2]];
|
||||
pb[3] = ps[offset[x + 3]];
|
||||
}
|
||||
|
||||
for(; len > 0 && x < ex; len--, x++, pb++)
|
||||
{
|
||||
*pb = ReadPixel8(addr + offset[x]);
|
||||
*pb = ps[offset[x]];
|
||||
}
|
||||
|
||||
if(x == ex) {x = sx; y++;}
|
||||
|
@ -1404,7 +1430,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||
|
||||
for(; len > 0 && x < ex; len--, x += 2, pb++)
|
||||
{
|
||||
|
@ -1420,12 +1446,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
|
||||
|
||||
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
|
||||
{
|
||||
pb[0] = (uint8)(ps[offset[x + 0]] >> 24);
|
||||
pb[1] = (uint8)(ps[offset[x + 1]] >> 24);
|
||||
pb[2] = (uint8)(ps[offset[x + 2]] >> 24);
|
||||
pb[3] = (uint8)(ps[offset[x + 3]] >> 24);
|
||||
}
|
||||
|
||||
for(; len > 0 && x < ex; len--, x++, pb++)
|
||||
{
|
||||
*pb = ReadPixel8H(addr + offset[x]);
|
||||
*pb = (uint8)(ps[offset[x]] >> 24);
|
||||
}
|
||||
|
||||
if(x == ex) {x = sx; y++;}
|
||||
|
@ -1437,12 +1471,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
|
||||
|
||||
for(; len > 0 && x < ex; len--, x += 2, pb++)
|
||||
{
|
||||
*pb = ReadPixel4HL(addr + offset[x + 0]) | (ReadPixel4HL(addr + offset[x + 1]) << 4);
|
||||
uint32 c0 = (ps[offset[x + 0]] >> 24) & 0x0f;
|
||||
uint32 c1 = (ps[offset[x + 1]] >> 20) & 0xf0;
|
||||
|
||||
*pb = (uint8)(c0 | c1);
|
||||
}
|
||||
|
||||
if(x == ex) {x = sx; y++;}
|
||||
|
@ -1454,12 +1491,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
|||
|
||||
while(len > 0)
|
||||
{
|
||||
uint32 addr = psm->pa(0, y, bp, bw);
|
||||
int* offset = psm->rowOffset[y & 7];
|
||||
int* RESTRICT offset = psm->rowOffset[y & 7];
|
||||
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
|
||||
|
||||
for(; len > 0 && x < ex; len--, x += 2, pb++)
|
||||
{
|
||||
*pb = ReadPixel4HH(addr + offset[x + 0]) | (ReadPixel4HH(addr + offset[x + 1]) << 4);
|
||||
uint32 c0 = (ps[offset[x + 0]] >> 28) & 0x0f;
|
||||
uint32 c1 = (ps[offset[x + 1]] >> 24) & 0xf0;
|
||||
|
||||
*pb = (uint8)(c0 | c1);
|
||||
}
|
||||
|
||||
if(x == ex) {x = sx; y++;}
|
||||
|
|
|
@ -35,6 +35,7 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
|
|||
, m_id(id)
|
||||
, m_threads(threads)
|
||||
, m_perfmon(perfmon)
|
||||
, m_pixels(0)
|
||||
{
|
||||
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
||||
m_edge.count = 0;
|
||||
|
@ -98,16 +99,28 @@ int GSRasterizer::FindMyNextScanline(int top) const
|
|||
|
||||
void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
|
||||
{
|
||||
Draw(data);
|
||||
Draw(data.get());
|
||||
}
|
||||
|
||||
void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
||||
int GSRasterizer::GetPixels(bool reset)
|
||||
{
|
||||
int pixels = m_pixels;
|
||||
|
||||
if(reset)
|
||||
{
|
||||
m_pixels = 0;
|
||||
}
|
||||
|
||||
return pixels;
|
||||
}
|
||||
|
||||
void GSRasterizer::Draw(GSRasterizerData* data)
|
||||
{
|
||||
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
|
||||
|
||||
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
||||
|
||||
m_ds->BeginDraw(data->param);
|
||||
m_ds->BeginDraw(data);
|
||||
|
||||
const GSVertexSW* vertex = data->vertex;
|
||||
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
|
||||
|
@ -123,8 +136,6 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
|||
m_fscissor_x = GSVector4(data->scissor).xzxz();
|
||||
m_fscissor_y = GSVector4(data->scissor).ywyw();
|
||||
|
||||
m_pixels = 0;
|
||||
|
||||
uint64 start = __rdtsc();
|
||||
|
||||
switch(data->primclass)
|
||||
|
@ -193,9 +204,6 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
|||
|
||||
uint64 ticks = __rdtsc() - start;
|
||||
|
||||
_InterlockedExchangeAdd(&data->ticks, (long)ticks);
|
||||
_InterlockedExchangeAdd(&data->pixels, m_pixels);
|
||||
|
||||
m_ds->EndDraw(data->frame, ticks, m_pixels);
|
||||
}
|
||||
|
||||
|
@ -907,6 +915,18 @@ void GSRasterizerList::Sync()
|
|||
m_sync_count++;
|
||||
}
|
||||
|
||||
int GSRasterizerList::GetPixels(bool reset)
|
||||
{
|
||||
int pixels = 0;
|
||||
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
{
|
||||
pixels += m_workers[i]->GetPixels(reset);
|
||||
}
|
||||
|
||||
return pixels;
|
||||
}
|
||||
|
||||
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
if(item->solidrect)
|
||||
|
@ -945,6 +965,11 @@ GSRasterizerList::GSWorker::~GSWorker()
|
|||
delete m_r;
|
||||
}
|
||||
|
||||
int GSRasterizerList::GSWorker::GetPixels(bool reset)
|
||||
{
|
||||
return m_r->GetPixels(reset);
|
||||
}
|
||||
|
||||
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
GSVector4i r = item->bbox.rintersect(item->scissor);
|
||||
|
@ -957,5 +982,5 @@ void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
|
|||
|
||||
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
m_r->Draw(item);
|
||||
m_r->Draw(item.get());
|
||||
}
|
||||
|
|
|
@ -42,12 +42,6 @@ public:
|
|||
bool solidrect;
|
||||
bool syncpoint;
|
||||
uint64 frame;
|
||||
void* param;
|
||||
|
||||
// drawing stats
|
||||
|
||||
volatile long ticks;
|
||||
volatile long pixels;
|
||||
|
||||
GSRasterizerData()
|
||||
: scissor(GSVector4i::zero())
|
||||
|
@ -61,17 +55,12 @@ public:
|
|||
, solidrect(false)
|
||||
, syncpoint(false)
|
||||
, frame(0)
|
||||
, param(NULL)
|
||||
, ticks(0)
|
||||
, pixels(0)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~GSRasterizerData()
|
||||
{
|
||||
if(buff != NULL) _aligned_free(buff);
|
||||
|
||||
// derived class should free param and its members
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -92,7 +81,7 @@ public:
|
|||
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
|
||||
virtual ~IDrawScanline() {}
|
||||
|
||||
virtual void BeginDraw(const void* param) = 0;
|
||||
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
||||
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
|
||||
|
||||
#ifdef ENABLE_JIT_RASTERIZER
|
||||
|
@ -121,6 +110,7 @@ public:
|
|||
|
||||
virtual void Queue(shared_ptr<GSRasterizerData> data) = 0;
|
||||
virtual void Sync() = 0;
|
||||
virtual int GetPixels(bool reset = true) = 0;
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSRasterizer : public IRasterizer
|
||||
|
@ -160,12 +150,13 @@ public:
|
|||
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
|
||||
__forceinline int FindMyNextScanline(int top) const;
|
||||
|
||||
void Draw(shared_ptr<GSRasterizerData> data);
|
||||
void Draw(GSRasterizerData* data);
|
||||
|
||||
// IRasterizer
|
||||
|
||||
void Queue(shared_ptr<GSRasterizerData> data);
|
||||
void Sync() {}
|
||||
int GetPixels(bool reset);
|
||||
};
|
||||
|
||||
class GSRasterizerList
|
||||
|
@ -181,6 +172,8 @@ protected:
|
|||
GSWorker(GSRasterizer* r);
|
||||
virtual ~GSWorker();
|
||||
|
||||
int GetPixels(bool reset);
|
||||
|
||||
// GSJobQueue
|
||||
|
||||
void Push(const shared_ptr<GSRasterizerData>& item);
|
||||
|
@ -227,4 +220,5 @@ public:
|
|||
|
||||
void Queue(shared_ptr<GSRasterizerData> data);
|
||||
void Sync();
|
||||
int GetPixels(bool reset);
|
||||
};
|
||||
|
|
|
@ -80,7 +80,7 @@ void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
|
|||
{
|
||||
if(fst)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16());
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -88,7 +88,7 @@ void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
|
|||
}
|
||||
}
|
||||
|
||||
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG.u32[1])));
|
||||
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
|
||||
|
||||
d->p = p;
|
||||
d->t = t;
|
||||
|
|
|
@ -31,7 +31,6 @@ GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCac
|
|||
, m_reset(false)
|
||||
, m_upscale_multiplier(1)
|
||||
{
|
||||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
|
||||
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
|
||||
|
||||
|
@ -52,7 +51,10 @@ GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCac
|
|||
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
|
||||
}
|
||||
}
|
||||
else m_upscale_multiplier = 1;
|
||||
else
|
||||
{
|
||||
m_upscale_multiplier = 1;
|
||||
}
|
||||
}
|
||||
|
||||
GSRendererHW::~GSRendererHW()
|
||||
|
@ -173,8 +175,6 @@ void GSRendererHW::Draw()
|
|||
{
|
||||
if(m_dev->IsLost()) return;
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
|
||||
#ifndef DISABLE_CRC_HACKS
|
||||
|
||||
if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
|
||||
|
@ -459,8 +459,10 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
|
||||
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
|
||||
|
||||
memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 2)], m_vertex.stride);
|
||||
memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 1)], m_vertex.stride);
|
||||
size_t stride = m_vertex.stride;
|
||||
|
||||
memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride);
|
||||
memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride);
|
||||
|
||||
m_index.buff[0] = 0;
|
||||
m_index.buff[1] = 1;
|
||||
|
|
|
@ -33,7 +33,6 @@ private:
|
|||
int m_height;
|
||||
int m_skip;
|
||||
bool m_reset;
|
||||
bool m_nativeres;
|
||||
int m_upscale_multiplier;
|
||||
int m_userhacks_skipdraw;
|
||||
|
||||
|
|
|
@ -30,14 +30,7 @@ GSRendererSW::GSRendererSW(int threads)
|
|||
{
|
||||
InitConvertVertex(GSRendererSW);
|
||||
|
||||
m_ci[GS_POINTLIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_POINTLIST>;
|
||||
m_ci[GS_LINELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINELIST>;
|
||||
m_ci[GS_LINESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINESTRIP>;
|
||||
m_ci[GS_TRIANGLELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLELIST>;
|
||||
m_ci[GS_TRIANGLESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLESTRIP>;
|
||||
m_ci[GS_TRIANGLEFAN] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLEFAN>;
|
||||
m_ci[GS_SPRITE] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_SPRITE>;
|
||||
m_ci[GS_INVALID] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_INVALID>;
|
||||
m_nativeres = true; // ignore ini, sw is always native
|
||||
|
||||
m_tc = new GSTextureCacheSW(this);
|
||||
|
||||
|
@ -165,10 +158,12 @@ void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
|
|||
GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
|
||||
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
|
||||
|
||||
ASSERT(d->_pad.u32[0] != 0x12345678);
|
||||
|
||||
uint32 z = s->XYZ.Z;
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
GSVector4 p, t, c;
|
||||
|
||||
|
@ -178,7 +173,7 @@ void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
|
|||
{
|
||||
if(fst)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16() << (16 - 4));
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -193,138 +188,41 @@ void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
|
|||
d->c = c;
|
||||
d->t = t;
|
||||
|
||||
#ifdef _DEBUG
|
||||
d->_pad.u32[0] = 0x12345678; // means trouble if this has already been set, should only convert each vertex once
|
||||
#endif
|
||||
|
||||
if(prim == GS_SPRITE)
|
||||
{
|
||||
d->t.u32[3] = z;
|
||||
}
|
||||
}
|
||||
|
||||
template<uint32 prim>
|
||||
size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count)
|
||||
{
|
||||
//
|
||||
memcpy(dst, src, sizeof(uint32) * count); return count;
|
||||
|
||||
// TODO: IsQuad
|
||||
|
||||
GSVector4 scissor = m_context->scissor.ex;
|
||||
|
||||
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
|
||||
const uint32* src_end = src + count;
|
||||
uint32* dst_base = dst;
|
||||
|
||||
while(src < src_end)
|
||||
{
|
||||
GSVector4 pmin, pmax;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
pmin = v[src[0]].p;
|
||||
pmax = v[src[0]].p;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
pmin = v[src[0]].p.min(v[src[1]].p);
|
||||
pmax = v[src[0]].p.max(v[src[1]].p);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p);
|
||||
pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p);
|
||||
break;
|
||||
}
|
||||
|
||||
GSVector4 test = GSVector4::zero(); // (pmax < scissor) | (pmin > scissor.zwxy());
|
||||
/*
|
||||
GSVector4 tmp;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
// are in line or just two of them are the same (cross product == 0)
|
||||
tmp = (v[src[1]].p - v[src[0]].p) * (v[src[2]].p - v[src[0]].p).yxwz();
|
||||
test |= tmp == tmp.yxwz();
|
||||
break;
|
||||
}
|
||||
*/
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
case GS_SPRITE:
|
||||
test |= pmin.ceil() == pmax.ceil();
|
||||
break;
|
||||
}
|
||||
|
||||
bool pass = test.xyxy().allfalse();
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
if(pass) {dst[0] = src[0]; dst++;}
|
||||
src++;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst += 2;}
|
||||
src += 2;
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;}
|
||||
src += 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return dst - dst_base;
|
||||
}
|
||||
|
||||
void GSRendererSW::UpdateVertexKick()
|
||||
{
|
||||
GSRenderer::UpdateVertexKick();
|
||||
|
||||
m_cif = m_ci[PRIM->PRIM];
|
||||
}
|
||||
|
||||
void GSRendererSW::Draw()
|
||||
{
|
||||
const GSDrawingContext* context = m_context;
|
||||
SharedData* sd = new SharedData(this);
|
||||
|
||||
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
|
||||
shared_ptr<GSRasterizerData> data(sd);
|
||||
|
||||
data->primclass = GSUtil::GetPrimClass(PRIM->PRIM);
|
||||
if(!GetScanlineGlobalData(sd)) return;
|
||||
|
||||
//
|
||||
|
||||
data->primclass = m_vt->m_primclass;
|
||||
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
|
||||
data->vertex = (GSVertexSW*)data->buff;
|
||||
data->vertex_count = m_vertex.next;
|
||||
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.next);
|
||||
data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail);
|
||||
|
||||
m_index.tail = data->index_count;
|
||||
|
||||
if(data->index_count == 0) return;
|
||||
|
||||
// TODO: merge these
|
||||
data->index_count = m_index.tail;
|
||||
|
||||
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
|
||||
|
||||
m_vt->Update(data->vertex, data->index, data->index_count, data->primclass);
|
||||
memcpy(data->index, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
//
|
||||
|
||||
GSRasterizerData2* data2 = (GSRasterizerData2*)data.get();
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
if(!GetScanlineGlobalData(data2)) return;
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
|
||||
GSScanlineGlobalData& gd = sd->global;
|
||||
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
|
||||
|
@ -333,7 +231,7 @@ void GSRendererSW::Draw()
|
|||
|
||||
data->scissor = scissor;
|
||||
data->bbox = bbox;
|
||||
data->solidrect = gd->sel.IsSolidRect();
|
||||
data->solidrect = gd.sel.IsSolidRect();
|
||||
data->frame = m_perfmon.GetFrame();
|
||||
|
||||
//
|
||||
|
@ -343,25 +241,25 @@ void GSRendererSW::Draw()
|
|||
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
if(gd.sel.fwrite)
|
||||
{
|
||||
fb_pages = m_context->offset.fb->GetPages(r);
|
||||
fb_pages = context->offset.fb->GetPages(r);
|
||||
|
||||
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
|
||||
m_tc->InvalidatePages(fb_pages, context->offset.fb->psm);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
if(gd.sel.zwrite)
|
||||
{
|
||||
zb_pages = m_context->offset.zb->GetPages(r);
|
||||
zb_pages = context->offset.zb->GetPages(r);
|
||||
|
||||
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
|
||||
m_tc->InvalidatePages(zb_pages, context->offset.zb->psm);
|
||||
}
|
||||
|
||||
// set data->syncpoint
|
||||
|
||||
if(m_fzb != m_context->offset.fzb)
|
||||
if(m_fzb != context->offset.fzb)
|
||||
{
|
||||
m_fzb = m_context->offset.fzb;
|
||||
m_fzb = context->offset.fzb;
|
||||
|
||||
data->syncpoint = true;
|
||||
}
|
||||
|
@ -371,7 +269,7 @@ void GSRendererSW::Draw()
|
|||
|
||||
if(!data->syncpoint)
|
||||
{
|
||||
if(gd->sel.fwrite)
|
||||
if(gd.sel.fwrite)
|
||||
{
|
||||
for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
|
@ -387,7 +285,7 @@ void GSRendererSW::Draw()
|
|||
|
||||
if(!data->syncpoint)
|
||||
{
|
||||
if(gd->sel.zwrite)
|
||||
if(gd.sel.zwrite)
|
||||
{
|
||||
for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
|
@ -403,7 +301,7 @@ void GSRendererSW::Draw()
|
|||
|
||||
//
|
||||
|
||||
data2->UseTargetPages(fb_pages, zb_pages);
|
||||
sd->UseTargetPages(fb_pages, zb_pages);
|
||||
|
||||
//
|
||||
|
||||
|
@ -484,6 +382,8 @@ void GSRendererSW::Sync(int reason)
|
|||
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
|
||||
|
||||
m_rl->Sync();
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
|
@ -592,9 +492,9 @@ void GSRendererSW::ReleasePages(const uint32* pages, int type)
|
|||
|
||||
#include "GSTextureSW.h"
|
||||
|
||||
bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
||||
bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||
{
|
||||
GSScanlineGlobalData& gd = *(GSScanlineGlobalData*)data2->param;
|
||||
GSScanlineGlobalData& gd = data->global;
|
||||
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
@ -710,7 +610,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
data2->UseSourcePages(t, 0);
|
||||
data->UseSourcePages(t, 0);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
|
@ -863,7 +763,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
data2->UseSourcePages(t, i);
|
||||
data->UseSourcePages(t, i);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
|
@ -908,19 +808,19 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
{
|
||||
// skip per pixel division if q is constant
|
||||
|
||||
GSVertexSW* RESTRICT v = data2->vertex;
|
||||
GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;// data->vertex;
|
||||
|
||||
if(m_vt->m_eq.q)
|
||||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
const GSVector4& t = v[data2->index[0]].t;
|
||||
const GSVector4& t = v[m_index.buff[0]].t; // v[data->index[0]].t;
|
||||
|
||||
if(t.z != 1.0f)
|
||||
{
|
||||
GSVector4 w = t.zzzz().rcpnr();
|
||||
|
||||
for(int i = 0, j = data2->vertex_count; i < j; i++)
|
||||
for(int i = 0, j = m_vertex.next/*data->vertex_count*/; i < j; i++)
|
||||
{
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
|
@ -932,7 +832,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
for(int i = 0, j = data2->vertex_count; i < j; i += 2)
|
||||
for(int i = 0, j = m_vertex.next/*data->vertex_count*/; i < j; i += 2)
|
||||
{
|
||||
GSVector4 t0 = v[i + 0].t;
|
||||
GSVector4 t1 = v[i + 1].t;
|
||||
|
@ -953,9 +853,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
|
||||
GSVector4 half(0x8000, 0x8000);
|
||||
|
||||
GSVertexSW* RESTRICT v = data2->vertex;
|
||||
GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;// data->vertex;
|
||||
|
||||
for(int i = 0, j = data2->vertex_count; i < j; i++)
|
||||
for(int i = 0, j = m_vertex.next/*data->vertex_count*/; i < j; i++)
|
||||
{
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
|
@ -1117,36 +1017,30 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
return true;
|
||||
}
|
||||
|
||||
GSRendererSW::GSRasterizerData2::GSRasterizerData2(GSRendererSW* parent)
|
||||
GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
|
||||
: m_parent(parent)
|
||||
, m_fb_pages(NULL)
|
||||
, m_zb_pages(NULL)
|
||||
, m_using_pages(false)
|
||||
{
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
m_tex_pages[0] = NULL;
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
|
||||
global.sel.key = 0;
|
||||
|
||||
gd->sel.key = 0;
|
||||
|
||||
gd->clut = NULL;
|
||||
gd->dimx = NULL;
|
||||
|
||||
param = gd;
|
||||
global.clut = NULL;
|
||||
global.dimx = NULL;
|
||||
}
|
||||
|
||||
GSRendererSW::GSRasterizerData2::~GSRasterizerData2()
|
||||
GSRendererSW::SharedData::~SharedData()
|
||||
{
|
||||
if(m_using_pages)
|
||||
{
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
if(global.sel.fwrite)
|
||||
{
|
||||
m_parent->ReleasePages(m_fb_pages, 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
if(global.sel.zwrite)
|
||||
{
|
||||
m_parent->ReleasePages(m_zb_pages, 1);
|
||||
}
|
||||
|
@ -1160,31 +1054,23 @@ GSRendererSW::GSRasterizerData2::~GSRasterizerData2()
|
|||
m_parent->ReleasePages(m_tex_pages[i], 2);
|
||||
}
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->clut) _aligned_free(gd->clut);
|
||||
if(gd->dimx) _aligned_free(gd->dimx);
|
||||
|
||||
_aligned_free(gd);
|
||||
|
||||
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
|
||||
if(global.clut) _aligned_free(global.clut);
|
||||
if(global.dimx) _aligned_free(global.dimx);
|
||||
}
|
||||
|
||||
void GSRendererSW::GSRasterizerData2::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
|
||||
void GSRendererSW::SharedData::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
|
||||
{
|
||||
if(m_using_pages) return;
|
||||
|
||||
m_fb_pages = fb_pages;
|
||||
m_zb_pages = zb_pages;
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
if(global.sel.fwrite)
|
||||
{
|
||||
m_parent->UsePages(fb_pages, 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
if(global.sel.zwrite)
|
||||
{
|
||||
m_parent->UsePages(zb_pages, 1);
|
||||
}
|
||||
|
@ -1192,11 +1078,12 @@ void GSRendererSW::GSRasterizerData2::UseTargetPages(const uint32* fb_pages, con
|
|||
m_using_pages = true;
|
||||
}
|
||||
|
||||
void GSRendererSW::GSRasterizerData2::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
|
||||
void GSRendererSW::SharedData::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
|
||||
{
|
||||
ASSERT(m_tex_pages[level] == NULL);
|
||||
|
||||
m_tex_pages[level] = t->m_pages.n;
|
||||
m_tex_pages[level + 1] = NULL;
|
||||
|
||||
m_parent->UsePages(t->m_pages.n, 2);
|
||||
}
|
||||
|
|
|
@ -27,17 +27,17 @@
|
|||
|
||||
class GSRendererSW : public GSRenderer
|
||||
{
|
||||
class GSRasterizerData2 : public GSRasterizerData
|
||||
class SharedData : public GSDrawScanline::SharedData
|
||||
{
|
||||
GSRendererSW* m_parent;
|
||||
const uint32* m_fb_pages;
|
||||
const uint32* m_zb_pages;
|
||||
const uint32* m_tex_pages[7];
|
||||
const uint32* m_tex_pages[7 + 1]; // NULL terminated
|
||||
bool m_using_pages;
|
||||
|
||||
public:
|
||||
GSRasterizerData2(GSRendererSW* parent);
|
||||
virtual ~GSRasterizerData2();
|
||||
SharedData(GSRendererSW* parent);
|
||||
virtual ~SharedData();
|
||||
|
||||
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
|
||||
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
|
||||
|
@ -67,20 +67,11 @@ protected:
|
|||
void UsePages(const uint32* pages, int type);
|
||||
void ReleasePages(const uint32* pages, int type);
|
||||
|
||||
bool GetScanlineGlobalData(GSRasterizerData2* data2);
|
||||
|
||||
typedef size_t (GSState::*ConvertIndexPtr)(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
|
||||
|
||||
ConvertIndexPtr m_ci[8], m_cif;
|
||||
bool GetScanlineGlobalData(SharedData* data);
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
|
||||
template<uint32 prim>
|
||||
size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
|
||||
|
||||
void UpdateVertexKick();
|
||||
|
||||
public:
|
||||
GSRendererSW(int threads);
|
||||
virtual ~GSRendererSW();
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "stdafx.h"
|
||||
#include "GSState.h"
|
||||
#include "GSdx.h"
|
||||
|
||||
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
|
||||
//#define Offset_UV // Fixes / breaks various titles
|
||||
|
@ -36,6 +37,8 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
, m_frameskip(0)
|
||||
, m_vt(vt)
|
||||
{
|
||||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
|
||||
memset(&m_v, 0, sizeof(m_v));
|
||||
m_q = 1.0f;
|
||||
memset(&m_vertex, 0, sizeof(m_vertex));
|
||||
|
@ -44,19 +47,10 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
ASSERT(vertex_stride >= sizeof(GSVertex));
|
||||
|
||||
m_vertex.stride = vertex_stride;
|
||||
m_vertex.tmp = (uint8*)_aligned_malloc(vertex_stride * 2, 32);
|
||||
m_vertex.tmp = (uint8*)_aligned_malloc(m_vertex.stride * 2, 32);
|
||||
|
||||
GrowVertexBuffer();
|
||||
|
||||
m_vk[GS_POINTLIST] = (VertexKickPtr)&GSState::VertexKick<GS_POINTLIST>;
|
||||
m_vk[GS_LINELIST] = (VertexKickPtr)&GSState::VertexKick<GS_LINELIST>;
|
||||
m_vk[GS_LINESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_LINESTRIP>;
|
||||
m_vk[GS_TRIANGLELIST] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLELIST>;
|
||||
m_vk[GS_TRIANGLESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLESTRIP>;
|
||||
m_vk[GS_TRIANGLEFAN] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLEFAN>;
|
||||
m_vk[GS_SPRITE] = (VertexKickPtr)&GSState::VertexKick<GS_SPRITE>;
|
||||
m_vk[GS_INVALID] = (VertexKickPtr)&GSState::VertexKick<GS_INVALID>;
|
||||
|
||||
memset(m_cv, 0, sizeof(m_cv));
|
||||
|
||||
m_sssize = 0;
|
||||
|
@ -99,8 +93,9 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
m_sssize += sizeof(m_v.RGBAQ);
|
||||
m_sssize += sizeof(m_v.ST);
|
||||
m_sssize += sizeof(m_v.UV);
|
||||
m_sssize += sizeof(m_v.FOG);
|
||||
m_sssize += sizeof(m_v.XYZ);
|
||||
m_sssize += sizeof(m_v.FOG); // obsolete
|
||||
m_sssize += sizeof(GIFReg); // obsolete
|
||||
|
||||
m_sssize += sizeof(m_tr.x);
|
||||
m_sssize += sizeof(m_tr.y);
|
||||
|
@ -189,24 +184,26 @@ void GSState::SetFrameSkip(int skip)
|
|||
}
|
||||
else
|
||||
{
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZF3;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZ3;
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
|
||||
|
||||
UpdateVertexKick();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -239,18 +236,33 @@ void GSState::ResetHandlers()
|
|||
m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZF3;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZ3;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
|
||||
|
||||
#define SetHandlerXYZ(P) \
|
||||
m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2<P, 0>; \
|
||||
m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2<P, 1>; \
|
||||
m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2<P, 0>; \
|
||||
m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2<P, 1>; \
|
||||
m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2<P, 0>; \
|
||||
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1>; \
|
||||
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0>; \
|
||||
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1>; \
|
||||
|
||||
SetHandlerXYZ(GS_POINTLIST);
|
||||
SetHandlerXYZ(GS_LINELIST);
|
||||
SetHandlerXYZ(GS_LINESTRIP);
|
||||
SetHandlerXYZ(GS_TRIANGLELIST);
|
||||
SetHandlerXYZ(GS_TRIANGLESTRIP);
|
||||
SetHandlerXYZ(GS_TRIANGLEFAN);
|
||||
SetHandlerXYZ(GS_SPRITE);
|
||||
SetHandlerXYZ(GS_INVALID);
|
||||
|
||||
for(size_t i = 0; i < countof(m_fpGIFRegHandlers); i++)
|
||||
{
|
||||
m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull;
|
||||
|
@ -260,15 +272,11 @@ void GSState::ResetHandlers()
|
|||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>;
|
||||
|
@ -417,12 +425,12 @@ float GSState::GetFPS()
|
|||
|
||||
// GIFPackedRegHandler*
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
|
||||
void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
// ASSERT(0);
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
|
||||
void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
#if _M_SSE >= 0x301
|
||||
|
||||
|
@ -449,7 +457,7 @@ __forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT
|
|||
m_v.RGBAQ.Q = m_q;
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
|
||||
void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
#if defined(_M_AMD64)
|
||||
|
||||
|
@ -476,19 +484,11 @@ __forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT
|
|||
#endif
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
|
||||
void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
|
||||
m_v.UV.u32[0] = (uint32)GSVector4i::store(v.ps32(v));
|
||||
|
||||
#else
|
||||
|
||||
m_v.UV.U = r->UV.U;
|
||||
m_v.UV.V = r->UV.V;
|
||||
|
||||
#endif
|
||||
m_v.UV = (uint32)GSVector4i::store(v.ps32(v));
|
||||
|
||||
#ifdef Offset_UV
|
||||
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v.UV.U - 4U));
|
||||
|
@ -496,36 +496,66 @@ __forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r
|
|||
#endif
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
|
||||
template<uint32 prim, uint32 adc>
|
||||
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
if(adc)
|
||||
{
|
||||
// not sure what the difference is between this and XYZF2 with ADC bit set
|
||||
|
||||
//printf("XYZF3 X %d Y %d Z %d F %d ADC %d\n", r->XYZF2.X, r->XYZF2.Y, r->XYZF2.Z, r->XYZF2.F, r->XYZF2.ADC);
|
||||
}
|
||||
|
||||
/*
|
||||
m_v.XYZ.X = r->XYZF2.X;
|
||||
m_v.XYZ.Y = r->XYZF2.Y;
|
||||
m_v.XYZ.Z = r->XYZF2.Z;
|
||||
m_v.FOG.F = r->XYZF2.F;
|
||||
m_v.FOG = r->XYZF2.F;
|
||||
*/
|
||||
GSVector4i xy = GSVector4i::loadl(&r->u64[0]);
|
||||
GSVector4i zf = GSVector4i::loadl(&r->u64[1]);
|
||||
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::loadl(&m_v.UV));
|
||||
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
|
||||
|
||||
(this->*m_vkf)(r->XYZF2.Skip());
|
||||
m_v.m[1] = xy.upl32(zf);
|
||||
|
||||
VertexKick<prim>(adc ? 1 : r->XYZF2.Skip());
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
|
||||
template<uint32 prim, uint32 adc>
|
||||
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
if(adc)
|
||||
{
|
||||
// not sure what the difference is between this and XYZ2 with ADC bit set
|
||||
|
||||
//printf("XYZ3 X %d Y %d Z %d ADC %d\n", r->XYZ2.X, r->XYZ2.Y, r->XYZ2.Z, r->XYZ2.ADC);
|
||||
}
|
||||
/*
|
||||
m_v.XYZ.X = r->XYZ2.X;
|
||||
m_v.XYZ.Y = r->XYZ2.Y;
|
||||
m_v.XYZ.Z = r->XYZ2.Z;
|
||||
*/
|
||||
GSVector4i xy = GSVector4i::loadl(&r->u64[0]);
|
||||
GSVector4i z = GSVector4i::loadl(&r->u64[1]);
|
||||
GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
|
||||
|
||||
(this->*m_vkf)(r->XYZ2.Skip());
|
||||
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
|
||||
|
||||
VertexKick<prim>(adc ? 1 : r->XYZ2.Skip());
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
|
||||
void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
m_v.FOG.F = r->FOG.F;
|
||||
m_v.FOG = r->FOG.F;
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r)
|
||||
void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
(this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r);
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
|
||||
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -571,12 +601,12 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
|
|||
ApplyPRIM(r->PRIM);
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
|
||||
void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.RGBAQ = (GSVector4i)r->RGBAQ;
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
|
||||
void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.ST = (GSVector4i)r->ST;
|
||||
|
||||
|
@ -587,16 +617,17 @@ __forceinline void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
|
|||
#endif
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
|
||||
void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
|
||||
m_v.UV = r->UV.u32[0] & 0x3fff3fff;
|
||||
|
||||
#ifdef Offset_UV
|
||||
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v._UV.U - 4U));
|
||||
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v._UV.V - 4U));
|
||||
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v.UV.U - 4U));
|
||||
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v.UV.V - 4U));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 adc>
|
||||
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
|
||||
{
|
||||
/*
|
||||
|
@ -605,21 +636,33 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
|
|||
m_v.XYZ.Z = r->XYZF.Z;
|
||||
m_v.FOG.F = r->XYZF.F;
|
||||
*/
|
||||
|
||||
/*
|
||||
m_v.XYZ.u32[0] = r->XYZF.u32[0];
|
||||
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
|
||||
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
|
||||
m_v.FOG = r->XYZF.u32[1] >> 24;
|
||||
*/
|
||||
|
||||
(this->*m_vkf)(0);
|
||||
GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
|
||||
GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff()));
|
||||
GSVector4i uvf = GSVector4i::loadl(&m_v.UV).upl32(xyzf.srl32(24).srl<4>());
|
||||
|
||||
m_v.m[1] = xyz.upl64(uvf);
|
||||
|
||||
VertexKick<prim>(adc);
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 adc>
|
||||
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||
// m_v.XYZ = (GSVector4i)r->XYZ;
|
||||
|
||||
(this->*m_vkf)(0);
|
||||
m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV);
|
||||
|
||||
VertexKick<prim>(adc);
|
||||
}
|
||||
|
||||
void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
|
||||
template<int i> void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
|
||||
{
|
||||
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
|
||||
|
||||
|
@ -674,7 +717,7 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
|||
if(TEX0.TW > 10) TEX0.TW = 10;
|
||||
if(TEX0.TH > 10) TEX0.TH = 10;
|
||||
|
||||
ApplyTEX0(i, TEX0);
|
||||
ApplyTEX0<i>(TEX0);
|
||||
|
||||
if(m_env.CTXT[i].TEX1.MTBA)
|
||||
{
|
||||
|
@ -730,29 +773,7 @@ template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
|
|||
|
||||
void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.FOG.u32[1] = r->FOG.u32[1];
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
|
||||
{
|
||||
/*
|
||||
m_v.XYZ.X = r->XYZF.X;
|
||||
m_v.XYZ.Y = r->XYZF.Y;
|
||||
m_v.XYZ.Z = r->XYZF.Z;
|
||||
m_v.FOG.F = r->XYZF.F;
|
||||
*/
|
||||
m_v.XYZ.u32[0] = r->XYZF.u32[0];
|
||||
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
|
||||
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
|
||||
|
||||
(this->*m_vkf)(1);
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||
|
||||
(this->*m_vkf)(1);
|
||||
m_v.FOG = r->FOG.F;
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
|
||||
|
@ -785,7 +806,7 @@ template<int i> void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
|
|||
|
||||
TEX0.u64 = (m_env.CTXT[i].TEX0.u64 & ~mask) | (r->u64 & mask);
|
||||
|
||||
ApplyTEX0(i, TEX0);
|
||||
ApplyTEX0<i>(TEX0);
|
||||
}
|
||||
|
||||
template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
|
||||
|
@ -1211,12 +1232,45 @@ void GSState::FlushPrim()
|
|||
{
|
||||
if(m_index.tail > 0)
|
||||
{
|
||||
if(0)
|
||||
{
|
||||
uint8* buff = new uint8[m_vertex.next];
|
||||
|
||||
memset(buff, 0, m_vertex.next);
|
||||
|
||||
for(size_t i = 0; i < m_index.tail; i++)
|
||||
{
|
||||
ASSERT(m_index.buff[i] < m_vertex.next);
|
||||
|
||||
buff[m_index.buff[i]] = 1;
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
|
||||
for(size_t i = 0; i < m_vertex.next; i++)
|
||||
{
|
||||
if(buff[i] == 0)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if(count > 0)
|
||||
{
|
||||
printf("unref %lld %d/%d\n", m_perfmon.GetFrame(), count, m_vertex.next);
|
||||
}
|
||||
|
||||
delete [] buff;
|
||||
}
|
||||
|
||||
uint8* buff = m_vertex.tmp;
|
||||
|
||||
size_t stride = m_vertex.stride;
|
||||
size_t head = m_vertex.head;
|
||||
size_t tail = m_vertex.tail;
|
||||
|
||||
if(tail > head)
|
||||
{
|
||||
switch(PRIM->PRIM)
|
||||
{
|
||||
case GS_LINESTRIP:
|
||||
|
@ -1239,19 +1293,25 @@ void GSState::FlushPrim()
|
|||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
|
||||
{
|
||||
// FIXME: berserk fpsm = 27 (8H)
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
|
||||
Draw();
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
|
||||
}
|
||||
|
||||
m_vertex.head = 0;
|
||||
m_vertex.tail = 0;
|
||||
|
||||
if(tail > head)
|
||||
{
|
||||
switch(PRIM->PRIM)
|
||||
{
|
||||
case GS_LINESTRIP:
|
||||
|
@ -1271,16 +1331,17 @@ void GSState::FlushPrim()
|
|||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
m_vertex.next = m_vertex.tail;
|
||||
m_index.tail = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_vertex.tail = 0;
|
||||
}
|
||||
|
||||
m_vertex.head = 0;
|
||||
m_vertex.tail = 0;
|
||||
m_vertex.next = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -1644,23 +1705,60 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
}
|
||||
else
|
||||
{
|
||||
uint32 total;
|
||||
|
||||
switch(path.tag.FLG)
|
||||
{
|
||||
case GIF_FLG_PACKED:
|
||||
|
||||
// first try a shortcut for a very common case
|
||||
// get to the start of the loop
|
||||
|
||||
if(path.adonly && size >= path.nloop)
|
||||
if(path.reg != 0)
|
||||
{
|
||||
size -= path.nloop;
|
||||
do
|
||||
{
|
||||
(this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem);
|
||||
|
||||
mem += sizeof(GIFPackedReg);
|
||||
size--;
|
||||
}
|
||||
while(path.StepReg() && size > 0 && path.reg != 0);
|
||||
}
|
||||
|
||||
// all data available? usually is
|
||||
|
||||
total = path.nloop * path.nreg;
|
||||
|
||||
if(size >= total)
|
||||
{
|
||||
size -= total;
|
||||
|
||||
if(path.adonly)
|
||||
{
|
||||
do
|
||||
{
|
||||
(this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR])(&((GIFPackedReg*)mem)->r);
|
||||
|
||||
mem += sizeof(GIFPackedReg);
|
||||
}
|
||||
while(--path.nloop > 0);
|
||||
while(--total > 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32 reg = 0;
|
||||
|
||||
do
|
||||
{
|
||||
(this->*m_fpGIFPackedRegHandlers[path.GetReg(reg++)])((GIFPackedReg*)mem);
|
||||
|
||||
mem += sizeof(GIFPackedReg);
|
||||
|
||||
reg = reg & ((int)(reg - path.nreg) >> 31); // resets reg back to 0 when it becomes equal to path.nreg
|
||||
}
|
||||
while(--total > 0);
|
||||
}
|
||||
|
||||
path.nloop = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1678,6 +1776,8 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
|
||||
case GIF_FLG_REGLIST:
|
||||
|
||||
// TODO: do it similar to packed operation
|
||||
|
||||
size *= 2;
|
||||
|
||||
do
|
||||
|
@ -1848,8 +1948,9 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
|
|||
WriteState(data, &m_v.RGBAQ);
|
||||
WriteState(data, &m_v.ST);
|
||||
WriteState(data, &m_v.UV);
|
||||
WriteState(data, &m_v.XYZ);
|
||||
WriteState(data, &m_v.FOG);
|
||||
WriteState(data, &m_v.XYZ);
|
||||
data += sizeof(GIFReg); // obsolite
|
||||
WriteState(data, &m_tr.x);
|
||||
WriteState(data, &m_tr.y);
|
||||
WriteState(data, m_mem.m_vm8, m_mem.m_vmsize);
|
||||
|
@ -1942,8 +2043,9 @@ int GSState::Defrost(const GSFreezeData* fd)
|
|||
ReadState(&m_v.RGBAQ, data);
|
||||
ReadState(&m_v.ST, data);
|
||||
ReadState(&m_v.UV, data);
|
||||
ReadState(&m_v.XYZ, data);
|
||||
ReadState(&m_v.FOG, data);
|
||||
ReadState(&m_v.XYZ, data);
|
||||
data += sizeof(GIFReg); // obsolite
|
||||
ReadState(&m_tr.x, data);
|
||||
ReadState(&m_tr.y, data);
|
||||
ReadState(m_mem.m_vm8, data, m_mem.m_vmsize);
|
||||
|
@ -1994,8 +2096,19 @@ void GSState::SetGameCRC(uint32 crc, int options)
|
|||
|
||||
void GSState::UpdateVertexKick()
|
||||
{
|
||||
m_vkf = m_vk[PRIM->PRIM];
|
||||
m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST];
|
||||
uint32 prim = PRIM->PRIM;
|
||||
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = m_fpGIFPackedRegHandlerXYZ[prim][0];
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = m_fpGIFPackedRegHandlerXYZ[prim][1];
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = m_fpGIFPackedRegHandlerXYZ[prim][2];
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = m_fpGIFPackedRegHandlerXYZ[prim][3];
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = m_fpGIFRegHandlerXYZ[prim][0];
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = m_fpGIFRegHandlerXYZ[prim][1];
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = m_fpGIFRegHandlerXYZ[prim][2];
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3];
|
||||
|
||||
m_cvf = m_cv[prim][PRIM->TME][PRIM->FST];
|
||||
}
|
||||
|
||||
void GSState::GrowVertexBuffer()
|
||||
|
@ -2024,20 +2137,34 @@ void GSState::GrowVertexBuffer()
|
|||
m_index.buff = index;
|
||||
}
|
||||
|
||||
static uint32 s_tmp[4];
|
||||
static size_t s_tmp_i = 0;
|
||||
static GSVector4i s_tmp_zw_sign = GSVector4i::x80000000().sll<8>();
|
||||
static GSVector4i s_zw_sign = GSVector4i::x80000000().sll<8>();
|
||||
|
||||
template<uint32 prim>
|
||||
void GSState::VertexKick(uint32 skip)
|
||||
__forceinline void GSState::VertexKick(uint32 skip)
|
||||
{
|
||||
s_tmp[s_tmp_i++ & 3] = m_v.XYZ.u32[0];
|
||||
|
||||
size_t head = m_vertex.head;
|
||||
size_t tail = m_vertex.tail;
|
||||
size_t next = m_vertex.next;
|
||||
size_t xy_tail = m_vertex.xy_tail;
|
||||
|
||||
*(GSVertex*)&m_vertex.buff[m_vertex.stride * tail] = m_v;
|
||||
// callers should write XYZUVF to m_v.m[1] in one piece to have this load store-forwarded, either by the cpu or the compiler when this function is inlined
|
||||
|
||||
GSVector4i v0(m_v.m[0]);
|
||||
GSVector4i v1(m_v.m[1]);
|
||||
|
||||
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[m_vertex.stride * tail];
|
||||
|
||||
tailptr[0] = v0;
|
||||
tailptr[1] = v1;
|
||||
|
||||
m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.add16(GSVector4i::x000f()).srl16(4)).upl16());
|
||||
|
||||
#ifdef _DEBUG
|
||||
memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex));
|
||||
#endif
|
||||
|
||||
m_vertex.tail = ++tail;
|
||||
m_vertex.xy_tail = ++xy_tail;
|
||||
|
||||
size_t n = 0;
|
||||
|
||||
|
@ -2053,8 +2180,6 @@ void GSState::VertexKick(uint32 skip)
|
|||
case GS_INVALID: n = 1; break;
|
||||
}
|
||||
|
||||
m_vertex.tail = ++tail;
|
||||
|
||||
size_t m = tail - head;
|
||||
|
||||
if(m < n)
|
||||
|
@ -2062,54 +2187,68 @@ void GSState::VertexKick(uint32 skip)
|
|||
return;
|
||||
}
|
||||
|
||||
if(skip == 0)
|
||||
if(skip == 0 && (prim != GS_TRIANGLEFAN || m <= 4)) // m_vertex.xy only knows about the last 4 vertices, head could be far behind for fan
|
||||
{
|
||||
int p0 = (int)s_tmp[(s_tmp_i + 1) & 3];
|
||||
int p1 = (int)s_tmp[(s_tmp_i + 2) & 3];
|
||||
int p2 = (int)s_tmp[(s_tmp_i + 3) & 3];
|
||||
int p3 = (int)s_tmp[(s_tmp_i - m) & 3];
|
||||
GSVector4 v0, v1, v2, v3;
|
||||
|
||||
GSVector4i p(p0, p1, p2, p3);
|
||||
v0 = m_vertex.xy[(xy_tail + 1) & 3]; // T-3
|
||||
v1 = m_vertex.xy[(xy_tail + 2) & 3]; // T-2
|
||||
v2 = m_vertex.xy[(xy_tail + 3) & 3]; // T-1
|
||||
v3 = m_vertex.xy[(xy_tail - m) & 3]; // H
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
v1 = p.upl16();
|
||||
v3 = p.uph16();
|
||||
|
||||
v0 = v1.xyxy();
|
||||
v1 = v1.zwzw();
|
||||
v2 = v3.xyxy();
|
||||
v3 = v3.zwzw();
|
||||
|
||||
GSVector4i s = m_context->scissor.dx10;
|
||||
GSVector4i sm = s_tmp_zw_sign;
|
||||
|
||||
GSVector4 cross;
|
||||
GSVector4 pmin, pmax, cross;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
skip = ((v2 - s) ^ sm).mask() & 0x8888;
|
||||
pmin = v2;
|
||||
pmax = v2;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
|
||||
skip |= p1 == p2;
|
||||
pmin = v2.min(v1);
|
||||
pmax = v2.max(v1);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
skip = (((v0 - s) ^ sm) & ((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
|
||||
cross = (GSVector4(v1) - GSVector4(v0)) * (GSVector4(v2) - GSVector4(v0)).yxyx();
|
||||
skip |= (cross == cross.yxyx()).mask();
|
||||
pmin = v2.min(v1.min(v0));
|
||||
pmax = v2.max(v1.max(v0));
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
if(m > 4) break; // s_tmp only knows about the last 4 vertices, head could be far behind
|
||||
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm) & ((v3 - s) ^ sm)).mask() & 0x8888;
|
||||
cross = (GSVector4(v1) - GSVector4(v3)) * (GSVector4(v2) - GSVector4(v3)).yxyx();
|
||||
skip |= (cross == cross.yxyx()).mask();
|
||||
pmin = v2.min(v1.min(v3));
|
||||
pmax = v2.max(v1.max(v3));
|
||||
break;
|
||||
}
|
||||
|
||||
GSVector4 scissor = m_context->scissor.dx9;
|
||||
|
||||
GSVector4 test = pmax < scissor | pmin > scissor.zwxy();
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
case GS_SPRITE:
|
||||
test |= m_nativeres ? (pmin == pmax).zwzw() : pmin == pmax;
|
||||
break;
|
||||
}
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
cross = (v2 - v1) * (v2 - v0).yxwz();
|
||||
test |= cross == cross.yxwz();
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
cross = (v2 - v1) * (v2 - v3).yxwz();
|
||||
test |= cross == cross.yxwz();
|
||||
break;
|
||||
}
|
||||
|
||||
skip |= test.mask() & 3;
|
||||
}
|
||||
|
||||
if(skip != 0)
|
||||
|
@ -2171,7 +2310,7 @@ void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
|
||||
if(head + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
|
||||
/*if(head + 1 >= next)*/ (this->*m_cvf)(head + 1, src_index + 1); // this is always a new vertex
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2194,10 +2333,10 @@ void GSState::VertexKick(uint32 skip)
|
|||
m_index.tail += 3;
|
||||
if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
|
||||
if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
|
||||
if(src_index + 2 >= next) (this->*m_cvf)(head + 2, src_index + 2);
|
||||
/*if(src_index + 2 >= next)*/ (this->*m_cvf)(head + 2, src_index + 2); // this is always a new vertex
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
// TODO: remove gaps
|
||||
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
|
||||
buff[0] = head + 0;
|
||||
buff[1] = tail - 2;
|
||||
buff[2] = tail - 1;
|
||||
|
@ -2205,7 +2344,7 @@ void GSState::VertexKick(uint32 skip)
|
|||
m_index.tail += 3;
|
||||
if(head >= next) (this->*m_cvf)(head, head);
|
||||
if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2);
|
||||
if(tail - 1 >= next) (this->*m_cvf)(tail - 1, tail - 1);
|
||||
/*if(tail - 1 >= next)*/ (this->*m_cvf)(tail - 1, tail - 1); // this is always a new vertex
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2341,7 +2480,21 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
|
|||
}
|
||||
}
|
||||
|
||||
r = vr.rintersect(tr);
|
||||
vr = vr.rintersect(tr);
|
||||
|
||||
if(vr.rempty())
|
||||
{
|
||||
// NOTE: this can happen when texcoords are all outside the texture or clamping area is zero, but we can't
|
||||
// let the texture cache update nothing, the sampler will still need a single texel from the border somewhere
|
||||
// examples:
|
||||
// - ICO opening menu (texture looks like the miniature silhouette of everything except the sky)
|
||||
// - THPS (no visible problems)
|
||||
// - NFSMW (strange rectangles on screen, might be unrelated)
|
||||
|
||||
vr = (vr + GSVector4i(-1, +1).xxyy()).rintersect(tr);
|
||||
}
|
||||
|
||||
r = vr;
|
||||
}
|
||||
|
||||
void GSState::GetAlphaMinMax()
|
||||
|
|
|
@ -42,13 +42,14 @@ class GSState : public GSAlignedClass<32>
|
|||
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r);
|
||||
|
||||
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
|
||||
GIFPackedRegHandler m_fpGIFPackedRegHandlerXYZ[8][4];
|
||||
|
||||
void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
|
||||
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
|
||||
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
|
||||
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
|
||||
|
@ -56,8 +57,9 @@ class GSState : public GSAlignedClass<32>
|
|||
typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r);
|
||||
|
||||
GIFRegHandler m_fpGIFRegHandlers[256];
|
||||
GIFRegHandler m_fpGIFRegHandlerXYZ[8][4];
|
||||
|
||||
void ApplyTEX0(int i, GIFRegTEX0& TEX0);
|
||||
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
|
||||
void ApplyPRIM(const GIFRegPRIM& PRIM);
|
||||
|
||||
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
|
||||
|
@ -65,13 +67,11 @@ class GSState : public GSAlignedClass<32>
|
|||
void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerST(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
|
||||
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
|
||||
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
|
||||
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
|
||||
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerXYZF3(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerXYZ3(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerNOP(const GIFReg* RESTRICT r);
|
||||
template<int i> void GIFRegHandlerTEX1(const GIFReg* RESTRICT r);
|
||||
template<int i> void GIFRegHandlerTEX2(const GIFReg* RESTRICT r);
|
||||
|
@ -131,13 +131,25 @@ protected:
|
|||
|
||||
GSVertex m_v;
|
||||
float m_q;
|
||||
struct {uint8* buff; size_t head, tail, next, maxcount, stride, n; uint8* tmp;} m_vertex; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
|
||||
struct {uint32* buff; size_t tail;} m_index;
|
||||
|
||||
typedef void (GSState::*VertexKickPtr)(uint32 skip);
|
||||
struct
|
||||
{
|
||||
uint8* buff;
|
||||
size_t stride;
|
||||
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
|
||||
GSVector4 xy[4];
|
||||
size_t xy_tail;
|
||||
uint8* tmp;
|
||||
} m_vertex;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32* buff;
|
||||
size_t tail;
|
||||
} m_index;
|
||||
|
||||
typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
|
||||
|
||||
VertexKickPtr m_vk[8], m_vkf;
|
||||
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
|
||||
|
||||
#define InitConvertVertex2(T, P) \
|
||||
|
@ -186,6 +198,7 @@ public:
|
|||
bool m_framelimit;
|
||||
CRC::Game m_game;
|
||||
GSDump m_dump;
|
||||
bool m_nativeres;
|
||||
|
||||
public:
|
||||
GSState(GSVertexTrace* vt, size_t vertex_stride);
|
||||
|
|
|
@ -270,22 +270,29 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
|||
|
||||
uint8* dst = (uint8*)m_buff + pitch * r.top;
|
||||
|
||||
int block_pitch = pitch * bs.y;
|
||||
|
||||
r = r.srl32(3);
|
||||
|
||||
bs.x >>= 3;
|
||||
bs.y >>= 3;
|
||||
|
||||
shift += 3;
|
||||
|
||||
if(m_repeating)
|
||||
{
|
||||
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
|
||||
for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
uint32 base = o->block.row[y];
|
||||
|
||||
for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
uint32 block = base + o->block.col[x];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 addr = i >> 3;
|
||||
|
||||
uint32 row = addr >> 5;
|
||||
uint32 col = 1 << (addr & 31);
|
||||
uint32 row = i >> 5;
|
||||
uint32 col = 1 << (i & 31);
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
{
|
||||
|
@ -301,13 +308,13 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
|||
}
|
||||
else
|
||||
{
|
||||
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
|
||||
for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
|
||||
{
|
||||
uint32 base = o->block.row[y >> 3];
|
||||
uint32 base = o->block.row[y];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 block = base + o->block.col[x >> 3];
|
||||
uint32 block = base + o->block.col[x];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
|
|
|
@ -37,7 +37,7 @@ __aligned(struct, 32) GSVertex
|
|||
GIFRegST ST;
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
GIFRegXYZ XYZ;
|
||||
union {GIFRegUV UV; GIFRegFOG FOG;}; // UV.u32[0] | FOG.u32[1]
|
||||
uint32 UV, FOG;
|
||||
};
|
||||
|
||||
__m128i m[2];
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
__aligned(struct, 32) GSVertexSW
|
||||
{
|
||||
GSVector4 p, t, c;
|
||||
GSVector4 p, t, c, _pad;
|
||||
|
||||
__forceinline GSVertexSW() {}
|
||||
__forceinline GSVertexSW(const GSVertexSW& v) {*this = v;}
|
||||
|
|
|
@ -618,7 +618,7 @@ VS_OUTPUT vs_main(VS_INPUT input)
|
|||
}
|
||||
|
||||
output.c = input.c;
|
||||
output.t.z = input.f.a;
|
||||
output.t.z = input.f.r;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
@ -765,7 +765,7 @@ VS_OUTPUT vs_main(VS_INPUT input)
|
|||
}
|
||||
|
||||
output.c = input.c;
|
||||
output.t.z = input.f.a;
|
||||
output.t.z = input.f.b;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue