GSdx: sps fixed, some code clean up and optimization, ps2 logo still broken in hw mode, I'll check it later

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5062 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-08 17:10:00 +00:00
parent 872301fbeb
commit 2eec75c2ae
22 changed files with 654 additions and 503 deletions

View File

@ -35,9 +35,9 @@ GPUDrawScanline::~GPUDrawScanline()
{
}
void GPUDrawScanline::BeginDraw(const void* param)
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
{
memcpy(&m_global, param, sizeof(m_global));
memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
if(m_global.sel.tme && m_global.sel.twin)
{
@ -83,7 +83,7 @@ void GPUDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
void GPUDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan)
{
GPUScanlineSelector sel = m_global.sel;
@ -93,7 +93,7 @@ void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& ds
{
if(sel.sprite)
{
GSVector4i t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
GSVector4i t = (GSVector4i(vertex.t) >> 8) - GSVector4i::x00000001();
t = t.ps32(t);
t = t.upl16(t);

View File

@ -29,6 +29,25 @@
class GPUDrawScanline : public IDrawScanline
{
public:
class SharedData : public GSRasterizerData
{
public:
GPUScanlineGlobalData global;
public:
SharedData()
{
global.clut = NULL;
}
virtual ~SharedData()
{
if(global.clut) _aligned_free(global.clut);
}
};
protected:
GPUScanlineGlobalData m_global;
GPUScanlineLocalData m_local;
@ -41,12 +60,12 @@ public:
// IDrawScanline
void BeginDraw(const void* param);
void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels);
#ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);

View File

@ -69,9 +69,11 @@ GSTexture* GPURendererSW::GetOutput()
void GPURendererSW::Draw()
{
shared_ptr<GSRasterizerData> data(new GPURasterizerData());
GPUDrawScanline::SharedData* sd = new GPUDrawScanline::SharedData();
GPUScanlineGlobalData& gd = *(GPUScanlineGlobalData*)data->param;
shared_ptr<GSRasterizerData> data(sd);
GPUScanlineGlobalData& gd = sd->global;
const GPUDrawingEnvironment& env = m_env;
@ -169,7 +171,7 @@ void GPURendererSW::Draw()
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, prims);
m_perfmon.Put(GSPerfMon::Fillrate, data->pixels);
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
}
void GPURendererSW::VertexKick()

View File

@ -26,28 +26,6 @@
class GPURendererSW : public GPURendererT<GSVertexSW>
{
class GPURasterizerData : public GSRasterizerData
{
public:
GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)_aligned_malloc(sizeof(GPUScanlineGlobalData), 32);
gd->clut = NULL;
param = gd;
}
virtual ~GPURasterizerData()
{
GPUScanlineGlobalData* gd = (GPUScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
_aligned_free(gd);
}
};
protected:
IRasterizer* m_rl;
GSTexture* m_texture;

View File

@ -1021,7 +1021,6 @@ REG128_(GIFPacked, XYZF2)
uint16 _PAD1;
uint16 Y;
uint16 _PAD2;
uint32 _PAD3:4;
uint32 Z:24;
uint32 _PAD4:4;
@ -1097,19 +1096,24 @@ __aligned(struct, 32) GIFPath
GSVector4i::store<true>(&tag, v);
reg = 0;
regs = v.uph8(v >> 4) & 0x0f0f0f0f;
nreg = tag.NREG;
nreg = tag.NREG ? tag.NREG : 16;
nloop = tag.NLOOP;
adonly = nreg == 1 && regs.u8[0] == GIF_REG_A_D;
adonly = regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1;
}
__forceinline uint8 GetReg()
{
return regs.u8[reg]; // GET_GIF_REG(tag, reg);
return regs.u8[reg];
}
__forceinline uint8 GetReg(uint32 index)
{
return regs.u8[index];
}
__forceinline bool StepReg()
{
if((++reg & 0xf) == nreg)
if(++reg == nreg)
{
reg = 0;

View File

@ -44,15 +44,30 @@ class GSBlock
public:
template<int i, bool aligned, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{
const GSVector4i* s0 = (const GSVector4i*)&src[srcpitch * 0];
const GSVector4i* s1 = (const GSVector4i*)&src[srcpitch * 1];
GSVector4i v0, v1, v2, v3;
GSVector4i v0 = GSVector4i::load<aligned>(&s0[0]);
GSVector4i v1 = GSVector4i::load<aligned>(&s0[1]);
GSVector4i v2 = GSVector4i::load<aligned>(&s1[0]);
GSVector4i v3 = GSVector4i::load<aligned>(&s1[1]);
if(aligned)
{
const GSVector4i* s0 = (const GSVector4i*)&src[srcpitch * 0];
const GSVector4i* s1 = (const GSVector4i*)&src[srcpitch * 1];
GSVector4i::sw64(v0, v2, v1, v3);
v0 = GSVector4i::load<aligned>(&s0[0]);
v1 = GSVector4i::load<aligned>(&s0[1]);
v2 = GSVector4i::load<aligned>(&s1[0]);
v3 = GSVector4i::load<aligned>(&s1[1]);
GSVector4i::sw64(v0, v2, v1, v3);
}
else
{
const uint8* s0 = &src[srcpitch * 0];
const uint8* s1 = &src[srcpitch * 1];
v0 = GSVector4i::load(&s0[0], &s1[0]);
v1 = GSVector4i::load(&s0[8], &s1[8]);
v2 = GSVector4i::load(&s0[16], &s1[16]);
v3 = GSVector4i::load(&s0[24], &s1[24]);
}
if(mask == 0xffffffff)
{
@ -264,14 +279,26 @@ public:
template<int i, bool aligned> __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
{
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
GSVector4i v0 = s[i * 4 + 0];
GSVector4i v1 = s[i * 4 + 1];
GSVector4i v2 = s[i * 4 + 2];
GSVector4i v3 = s[i * 4 + 3];
if(aligned)
{
const GSVector4i* s = (const GSVector4i*)src;
v0 = s[i * 4 + 0];
v1 = s[i * 4 + 1];
v2 = s[i * 4 + 2];
v3 = s[i * 4 + 3];
GSVector4i::sw64(v0, v1, v2, v3);
GSVector4i::sw64(v0, v1, v2, v3);
}
else
{
v0 = GSVector4i::load(&src[i * 64 + 0], &src[i * 64 + 16]);
v1 = GSVector4i::load(&src[i * 64 + 32], &src[i * 64 + 48]);
v2 = GSVector4i::load(&src[i * 64 + 8], &src[i * 64 + 24]);
v3 = GSVector4i::load(&src[i * 64 + 40], &src[i * 64 + 56]);
}
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];

View File

@ -36,9 +36,9 @@ GSDrawScanline::~GSDrawScanline()
{
}
void GSDrawScanline::BeginDraw(const void* param)
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
{
memcpy(&m_global, param, sizeof(m_global));
memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global));
if(m_global.sel.mmin && m_global.sel.lcm)
{
@ -102,6 +102,8 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER
// FIXME: something's not right with the sky in burnout 3
void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan)
{
GSScanlineSelector sel = m_global.sel;

View File

@ -29,6 +29,14 @@
class GSDrawScanline : public IDrawScanline
{
public:
class SharedData : public GSRasterizerData
{
public:
GSScanlineGlobalData global;
};
protected:
GSScanlineGlobalData m_global;
GSScanlineLocalData m_local;
@ -50,14 +58,14 @@ public:
// IDrawScanline
void BeginDraw(const void* param);
void BeginDraw(const GSRasterizerData* data);
void EndDraw(uint64 frame, uint64 ticks, int pixels);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
#ifndef ENABLE_JIT_RASTERIZER
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);

View File

@ -1301,13 +1301,13 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
{
if(len <= 0) return;
uint8* pb = (uint8*)dst;
uint16* pw = (uint16*)dst;
uint32* pd = (uint32*)dst;
uint8* RESTRICT pb = (uint8*)dst;
uint16* RESTRICT pw = (uint16*)dst;
uint32* RESTRICT pd = (uint32*)dst;
uint32 bp = BITBLTBUF.SBP;
uint32 bw = BITBLTBUF.SBW;
psm_t* psm = &m_psm[BITBLTBUF.SPSM];
psm_t* RESTRICT psm = &m_psm[BITBLTBUF.SPSM];
int x = tx;
int y = ty;
@ -1319,16 +1319,26 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
case PSM_PSMCT32:
case PSM_PSMZ32:
// MGS1 intro, fade effect between two scenes (airplane outside-inside transition)
len /= 4;
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pd += 4)
{
pd[0] = ps[offset[x + 0]];
pd[1] = ps[offset[x + 1]];
pd[2] = ps[offset[x + 2]];
pd[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pd++)
{
*pd = ReadPixel32(addr + offset[x]);
*pd = ps[offset[x]];
}
if(x == ex) {x = sx; y++;}
@ -1343,16 +1353,16 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x++, pb += 3)
{
uint32 c = ReadPixel32(addr + offset[x]);
uint32 c = ps[offset[x]];
pb[0] = ((uint8*)&c)[0];
pb[1] = ((uint8*)&c)[1];
pb[2] = ((uint8*)&c)[2];
pb[0] = (uint8)(c);
pb[1] = (uint8)(c >> 8);
pb[2] = (uint8)(c >> 16);
}
if(x == ex) {x = sx; y++;}
@ -1369,12 +1379,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint16* RESTRICT ps = &m_vm16[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4)
{
pw[0] = ps[offset[x + 0]];
pw[1] = ps[offset[x + 1]];
pw[2] = ps[offset[x + 2]];
pw[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pw++)
{
*pw = ReadPixel16(addr + offset[x]);
*pw = ps[offset[x]];
}
if(x == ex) {x = sx; y++;}
@ -1386,12 +1404,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint8* RESTRICT ps = &m_vm8[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{
pb[0] = ps[offset[x + 0]];
pb[1] = ps[offset[x + 1]];
pb[2] = ps[offset[x + 2]];
pb[3] = ps[offset[x + 3]];
}
for(; len > 0 && x < ex; len--, x++, pb++)
{
*pb = ReadPixel8(addr + offset[x]);
*pb = ps[offset[x]];
}
if(x == ex) {x = sx; y++;}
@ -1404,7 +1430,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
for(; len > 0 && x < ex; len--, x += 2, pb++)
{
@ -1420,12 +1446,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4)
{
pb[0] = (uint8)(ps[offset[x + 0]] >> 24);
pb[1] = (uint8)(ps[offset[x + 1]] >> 24);
pb[2] = (uint8)(ps[offset[x + 2]] >> 24);
pb[3] = (uint8)(ps[offset[x + 3]] >> 24);
}
for(; len > 0 && x < ex; len--, x++, pb++)
{
*pb = ReadPixel8H(addr + offset[x]);
*pb = (uint8)(ps[offset[x]] >> 24);
}
if(x == ex) {x = sx; y++;}
@ -1437,12 +1471,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x += 2, pb++)
{
*pb = ReadPixel4HL(addr + offset[x + 0]) | (ReadPixel4HL(addr + offset[x + 1]) << 4);
uint32 c0 = (ps[offset[x + 0]] >> 24) & 0x0f;
uint32 c1 = (ps[offset[x + 1]] >> 20) & 0xf0;
*pb = (uint8)(c0 | c1);
}
if(x == ex) {x = sx; y++;}
@ -1454,12 +1491,15 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
while(len > 0)
{
uint32 addr = psm->pa(0, y, bp, bw);
int* offset = psm->rowOffset[y & 7];
int* RESTRICT offset = psm->rowOffset[y & 7];
uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)];
for(; len > 0 && x < ex; len--, x += 2, pb++)
{
*pb = ReadPixel4HH(addr + offset[x + 0]) | (ReadPixel4HH(addr + offset[x + 1]) << 4);
uint32 c0 = (ps[offset[x + 0]] >> 28) & 0x0f;
uint32 c1 = (ps[offset[x + 1]] >> 24) & 0xf0;
*pb = (uint8)(c0 | c1);
}
if(x == ex) {x = sx; y++;}

View File

@ -35,6 +35,7 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
, m_id(id)
, m_threads(threads)
, m_perfmon(perfmon)
, m_pixels(0)
{
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
m_edge.count = 0;
@ -98,16 +99,28 @@ int GSRasterizer::FindMyNextScanline(int top) const
void GSRasterizer::Queue(shared_ptr<GSRasterizerData> data)
{
Draw(data);
Draw(data.get());
}
void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
int GSRasterizer::GetPixels(bool reset)
{
int pixels = m_pixels;
if(reset)
{
m_pixels = 0;
}
return pixels;
}
void GSRasterizer::Draw(GSRasterizerData* data)
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
m_ds->BeginDraw(data->param);
m_ds->BeginDraw(data);
const GSVertexSW* vertex = data->vertex;
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
@ -123,8 +136,6 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
m_fscissor_x = GSVector4(data->scissor).xzxz();
m_fscissor_y = GSVector4(data->scissor).ywyw();
m_pixels = 0;
uint64 start = __rdtsc();
switch(data->primclass)
@ -193,9 +204,6 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
uint64 ticks = __rdtsc() - start;
_InterlockedExchangeAdd(&data->ticks, (long)ticks);
_InterlockedExchangeAdd(&data->pixels, m_pixels);
m_ds->EndDraw(data->frame, ticks, m_pixels);
}
@ -907,6 +915,18 @@ void GSRasterizerList::Sync()
m_sync_count++;
}
int GSRasterizerList::GetPixels(bool reset)
{
int pixels = 0;
for(size_t i = 0; i < m_workers.size(); i++)
{
pixels += m_workers[i]->GetPixels(reset);
}
return pixels;
}
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
if(item->solidrect)
@ -945,6 +965,11 @@ GSRasterizerList::GSWorker::~GSWorker()
delete m_r;
}
int GSRasterizerList::GSWorker::GetPixels(bool reset)
{
return m_r->GetPixels(reset);
}
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
{
GSVector4i r = item->bbox.rintersect(item->scissor);
@ -957,5 +982,5 @@ void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item);
m_r->Draw(item.get());
}

View File

@ -42,12 +42,6 @@ public:
bool solidrect;
bool syncpoint;
uint64 frame;
void* param;
// drawing stats
volatile long ticks;
volatile long pixels;
GSRasterizerData()
: scissor(GSVector4i::zero())
@ -61,17 +55,12 @@ public:
, solidrect(false)
, syncpoint(false)
, frame(0)
, param(NULL)
, ticks(0)
, pixels(0)
{
}
virtual ~GSRasterizerData()
{
if(buff != NULL) _aligned_free(buff);
// derived class should free param and its members
}
};
@ -92,7 +81,7 @@ public:
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
virtual ~IDrawScanline() {}
virtual void BeginDraw(const void* param) = 0;
virtual void BeginDraw(const GSRasterizerData* data) = 0;
virtual void EndDraw(uint64 frame, uint64 ticks, int pixels) = 0;
#ifdef ENABLE_JIT_RASTERIZER
@ -121,6 +110,7 @@ public:
virtual void Queue(shared_ptr<GSRasterizerData> data) = 0;
virtual void Sync() = 0;
virtual int GetPixels(bool reset = true) = 0;
};
__aligned(class, 32) GSRasterizer : public IRasterizer
@ -160,12 +150,13 @@ public:
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
__forceinline int FindMyNextScanline(int top) const;
void Draw(shared_ptr<GSRasterizerData> data);
void Draw(GSRasterizerData* data);
// IRasterizer
void Queue(shared_ptr<GSRasterizerData> data);
void Sync() {}
int GetPixels(bool reset);
};
class GSRasterizerList
@ -181,6 +172,8 @@ protected:
GSWorker(GSRasterizer* r);
virtual ~GSWorker();
int GetPixels(bool reset);
// GSJobQueue
void Push(const shared_ptr<GSRasterizerData>& item);
@ -227,4 +220,5 @@ public:
void Queue(shared_ptr<GSRasterizerData> data);
void Sync();
int GetPixels(bool reset);
};

View File

@ -80,7 +80,7 @@ void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
{
if(fst)
{
t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16());
t = GSVector4(GSVector4i::load(s->UV).upl16());
}
else
{
@ -88,7 +88,7 @@ void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
}
}
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG.u32[1])));
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
d->p = p;
d->t = t;

View File

@ -31,7 +31,6 @@ GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCac
, m_reset(false)
, m_upscale_multiplier(1)
{
m_nativeres = !!theApp.GetConfig("nativeres", 0);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
@ -52,7 +51,10 @@ GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCac
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
}
}
else m_upscale_multiplier = 1;
else
{
m_upscale_multiplier = 1;
}
}
GSRendererHW::~GSRendererHW()
@ -173,8 +175,6 @@ void GSRendererHW::Draw()
{
if(m_dev->IsLost()) return;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
#ifndef DISABLE_CRC_HACKS
if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
@ -459,8 +459,10 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 2)], m_vertex.stride);
memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.next - 1)], m_vertex.stride);
size_t stride = m_vertex.stride;
memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride);
memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride);
m_index.buff[0] = 0;
m_index.buff[1] = 1;

View File

@ -33,7 +33,6 @@ private:
int m_height;
int m_skip;
bool m_reset;
bool m_nativeres;
int m_upscale_multiplier;
int m_userhacks_skipdraw;

View File

@ -30,14 +30,7 @@ GSRendererSW::GSRendererSW(int threads)
{
InitConvertVertex(GSRendererSW);
m_ci[GS_POINTLIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_POINTLIST>;
m_ci[GS_LINELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINELIST>;
m_ci[GS_LINESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_LINESTRIP>;
m_ci[GS_TRIANGLELIST] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLELIST>;
m_ci[GS_TRIANGLESTRIP] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLESTRIP>;
m_ci[GS_TRIANGLEFAN] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_TRIANGLEFAN>;
m_ci[GS_SPRITE] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_SPRITE>;
m_ci[GS_INVALID] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<GS_INVALID>;
m_nativeres = true; // ignore ini, sw is always native
m_tc = new GSTextureCacheSW(this);
@ -165,10 +158,12 @@ void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
ASSERT(d->_pad.u32[0] != 0x12345678);
uint32 z = s->XYZ.Z;
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
GSVector4 p, t, c;
@ -178,7 +173,7 @@ void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
{
if(fst)
{
t = GSVector4(GSVector4i::load(s->UV.u32[0]).upl16() << (16 - 4));
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
}
else
{
@ -193,138 +188,41 @@ void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
d->c = c;
d->t = t;
#ifdef _DEBUG
d->_pad.u32[0] = 0x12345678; // means trouble if this has already been set, should only convert each vertex once
#endif
if(prim == GS_SPRITE)
{
d->t.u32[3] = z;
}
}
template<uint32 prim>
size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count)
{
//
memcpy(dst, src, sizeof(uint32) * count); return count;
// TODO: IsQuad
GSVector4 scissor = m_context->scissor.ex;
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
const uint32* src_end = src + count;
uint32* dst_base = dst;
while(src < src_end)
{
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[src[0]].p;
pmax = v[src[0]].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[src[0]].p.min(v[src[1]].p);
pmax = v[src[0]].p.max(v[src[1]].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p);
pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p);
break;
}
GSVector4 test = GSVector4::zero(); // (pmax < scissor) | (pmin > scissor.zwxy());
/*
GSVector4 tmp;
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
// are in line or just two of them are the same (cross product == 0)
tmp = (v[src[1]].p - v[src[0]].p) * (v[src[2]].p - v[src[0]].p).yxwz();
test |= tmp == tmp.yxwz();
break;
}
*/
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin.ceil() == pmax.ceil();
break;
}
bool pass = test.xyxy().allfalse();
switch(prim)
{
case GS_POINTLIST:
if(pass) {dst[0] = src[0]; dst++;}
src++;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst += 2;}
src += 2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;}
src += 3;
break;
}
}
return dst - dst_base;
}
void GSRendererSW::UpdateVertexKick()
{
GSRenderer::UpdateVertexKick();
m_cif = m_ci[PRIM->PRIM];
}
void GSRendererSW::Draw()
{
const GSDrawingContext* context = m_context;
SharedData* sd = new SharedData(this);
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
shared_ptr<GSRasterizerData> data(sd);
data->primclass = GSUtil::GetPrimClass(PRIM->PRIM);
if(!GetScanlineGlobalData(sd)) return;
//
data->primclass = m_vt->m_primclass;
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
data->vertex = (GSVertexSW*)data->buff;
data->vertex_count = m_vertex.next;
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.next);
data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail);
m_index.tail = data->index_count;
if(data->index_count == 0) return;
// TODO: merge these
data->index_count = m_index.tail;
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
m_vt->Update(data->vertex, data->index, data->index_count, data->primclass);
memcpy(data->index, m_index.buff, sizeof(uint32) * m_index.tail);
//
GSRasterizerData2* data2 = (GSRasterizerData2*)data.get();
const GSDrawingContext* context = m_context;
if(!GetScanlineGlobalData(data2)) return;
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
GSScanlineGlobalData& gd = sd->global;
GSVector4i scissor = GSVector4i(context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
@ -333,7 +231,7 @@ void GSRendererSW::Draw()
data->scissor = scissor;
data->bbox = bbox;
data->solidrect = gd->sel.IsSolidRect();
data->solidrect = gd.sel.IsSolidRect();
data->frame = m_perfmon.GetFrame();
//
@ -343,25 +241,25 @@ void GSRendererSW::Draw()
GSVector4i r = bbox.rintersect(scissor);
if(gd->sel.fwrite)
if(gd.sel.fwrite)
{
fb_pages = m_context->offset.fb->GetPages(r);
fb_pages = context->offset.fb->GetPages(r);
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
m_tc->InvalidatePages(fb_pages, context->offset.fb->psm);
}
if(gd->sel.zwrite)
if(gd.sel.zwrite)
{
zb_pages = m_context->offset.zb->GetPages(r);
zb_pages = context->offset.zb->GetPages(r);
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
m_tc->InvalidatePages(zb_pages, context->offset.zb->psm);
}
// set data->syncpoint
if(m_fzb != m_context->offset.fzb)
if(m_fzb != context->offset.fzb)
{
m_fzb = m_context->offset.fzb;
m_fzb = context->offset.fzb;
data->syncpoint = true;
}
@ -371,7 +269,7 @@ void GSRendererSW::Draw()
if(!data->syncpoint)
{
if(gd->sel.fwrite)
if(gd.sel.fwrite)
{
for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
{
@ -387,7 +285,7 @@ void GSRendererSW::Draw()
if(!data->syncpoint)
{
if(gd->sel.zwrite)
if(gd.sel.zwrite)
{
for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
{
@ -403,7 +301,7 @@ void GSRendererSW::Draw()
//
data2->UseTargetPages(fb_pages, zb_pages);
sd->UseTargetPages(fb_pages, zb_pages);
//
@ -484,6 +382,8 @@ void GSRendererSW::Sync(int reason)
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
m_rl->Sync();
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
}
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
@ -592,9 +492,9 @@ void GSRendererSW::ReleasePages(const uint32* pages, int type)
#include "GSTextureSW.h"
bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{
GSScanlineGlobalData& gd = *(GSScanlineGlobalData*)data2->param;
GSScanlineGlobalData& gd = data->global;
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
@ -710,7 +610,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
if(t == NULL) {ASSERT(0); return false;}
data2->UseSourcePages(t, 0);
data->UseSourcePages(t, 0);
GSVector4i r;
@ -863,7 +763,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
if(t == NULL) {ASSERT(0); return false;}
data2->UseSourcePages(t, i);
data->UseSourcePages(t, i);
GSVector4i r;
@ -908,19 +808,19 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
{
// skip per pixel division if q is constant
GSVertexSW* RESTRICT v = data2->vertex;
GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;// data->vertex;
if(m_vt->m_eq.q)
{
gd.sel.fst = 1;
const GSVector4& t = v[data2->index[0]].t;
const GSVector4& t = v[m_index.buff[0]].t; // v[data->index[0]].t;
if(t.z != 1.0f)
{
GSVector4 w = t.zzzz().rcpnr();
for(int i = 0, j = data2->vertex_count; i < j; i++)
for(int i = 0, j = m_vertex.next/*data->vertex_count*/; i < j; i++)
{
GSVector4 t = v[i].t;
@ -932,7 +832,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
{
gd.sel.fst = 1;
for(int i = 0, j = data2->vertex_count; i < j; i += 2)
for(int i = 0, j = m_vertex.next/*data->vertex_count*/; i < j; i += 2)
{
GSVector4 t0 = v[i + 0].t;
GSVector4 t1 = v[i + 1].t;
@ -953,9 +853,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
GSVector4 half(0x8000, 0x8000);
GSVertexSW* RESTRICT v = data2->vertex;
GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;// data->vertex;
for(int i = 0, j = data2->vertex_count; i < j; i++)
for(int i = 0, j = m_vertex.next/*data->vertex_count*/; i < j; i++)
{
GSVector4 t = v[i].t;
@ -1117,36 +1017,30 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
return true;
}
GSRendererSW::GSRasterizerData2::GSRasterizerData2(GSRendererSW* parent)
GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
: m_parent(parent)
, m_fb_pages(NULL)
, m_zb_pages(NULL)
, m_using_pages(false)
{
memset(m_tex_pages, 0, sizeof(m_tex_pages));
m_tex_pages[0] = NULL;
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
global.sel.key = 0;
gd->sel.key = 0;
gd->clut = NULL;
gd->dimx = NULL;
param = gd;
global.clut = NULL;
global.dimx = NULL;
}
GSRendererSW::GSRasterizerData2::~GSRasterizerData2()
GSRendererSW::SharedData::~SharedData()
{
if(m_using_pages)
{
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
if(global.sel.fwrite)
{
m_parent->ReleasePages(m_fb_pages, 0);
}
if(gd->sel.zwrite)
if(global.sel.zwrite)
{
m_parent->ReleasePages(m_zb_pages, 1);
}
@ -1160,31 +1054,23 @@ GSRendererSW::GSRasterizerData2::~GSRasterizerData2()
m_parent->ReleasePages(m_tex_pages[i], 2);
}
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->clut) _aligned_free(gd->clut);
if(gd->dimx) _aligned_free(gd->dimx);
_aligned_free(gd);
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
if(global.clut) _aligned_free(global.clut);
if(global.dimx) _aligned_free(global.dimx);
}
void GSRendererSW::GSRasterizerData2::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
void GSRendererSW::SharedData::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
{
if(m_using_pages) return;
m_fb_pages = fb_pages;
m_zb_pages = zb_pages;
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)param;
if(gd->sel.fwrite)
if(global.sel.fwrite)
{
m_parent->UsePages(fb_pages, 0);
}
if(gd->sel.zwrite)
if(global.sel.zwrite)
{
m_parent->UsePages(zb_pages, 1);
}
@ -1192,11 +1078,12 @@ void GSRendererSW::GSRasterizerData2::UseTargetPages(const uint32* fb_pages, con
m_using_pages = true;
}
void GSRendererSW::GSRasterizerData2::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
void GSRendererSW::SharedData::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
{
ASSERT(m_tex_pages[level] == NULL);
m_tex_pages[level] = t->m_pages.n;
m_tex_pages[level + 1] = NULL;
m_parent->UsePages(t->m_pages.n, 2);
}

View File

@ -27,17 +27,17 @@
class GSRendererSW : public GSRenderer
{
class GSRasterizerData2 : public GSRasterizerData
class SharedData : public GSDrawScanline::SharedData
{
GSRendererSW* m_parent;
const uint32* m_fb_pages;
const uint32* m_zb_pages;
const uint32* m_tex_pages[7];
const uint32* m_tex_pages[7 + 1]; // NULL terminated
bool m_using_pages;
public:
GSRasterizerData2(GSRendererSW* parent);
virtual ~GSRasterizerData2();
SharedData(GSRendererSW* parent);
virtual ~SharedData();
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
@ -67,20 +67,11 @@ protected:
void UsePages(const uint32* pages, int type);
void ReleasePages(const uint32* pages, int type);
bool GetScanlineGlobalData(GSRasterizerData2* data2);
typedef size_t (GSState::*ConvertIndexPtr)(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
ConvertIndexPtr m_ci[8], m_cif;
bool GetScanlineGlobalData(SharedData* data);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
template<uint32 prim>
size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
void UpdateVertexKick();
public:
GSRendererSW(int threads);
virtual ~GSRendererSW();

View File

@ -21,6 +21,7 @@
#include "stdafx.h"
#include "GSState.h"
#include "GSdx.h"
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
//#define Offset_UV // Fixes / breaks various titles
@ -36,6 +37,8 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
, m_frameskip(0)
, m_vt(vt)
{
m_nativeres = !!theApp.GetConfig("nativeres", 0);
memset(&m_v, 0, sizeof(m_v));
m_q = 1.0f;
memset(&m_vertex, 0, sizeof(m_vertex));
@ -44,19 +47,10 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
ASSERT(vertex_stride >= sizeof(GSVertex));
m_vertex.stride = vertex_stride;
m_vertex.tmp = (uint8*)_aligned_malloc(vertex_stride * 2, 32);
m_vertex.tmp = (uint8*)_aligned_malloc(m_vertex.stride * 2, 32);
GrowVertexBuffer();
m_vk[GS_POINTLIST] = (VertexKickPtr)&GSState::VertexKick<GS_POINTLIST>;
m_vk[GS_LINELIST] = (VertexKickPtr)&GSState::VertexKick<GS_LINELIST>;
m_vk[GS_LINESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_LINESTRIP>;
m_vk[GS_TRIANGLELIST] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLELIST>;
m_vk[GS_TRIANGLESTRIP] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLESTRIP>;
m_vk[GS_TRIANGLEFAN] = (VertexKickPtr)&GSState::VertexKick<GS_TRIANGLEFAN>;
m_vk[GS_SPRITE] = (VertexKickPtr)&GSState::VertexKick<GS_SPRITE>;
m_vk[GS_INVALID] = (VertexKickPtr)&GSState::VertexKick<GS_INVALID>;
memset(m_cv, 0, sizeof(m_cv));
m_sssize = 0;
@ -99,8 +93,9 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
m_sssize += sizeof(m_v.RGBAQ);
m_sssize += sizeof(m_v.ST);
m_sssize += sizeof(m_v.UV);
m_sssize += sizeof(m_v.FOG);
m_sssize += sizeof(m_v.XYZ);
m_sssize += sizeof(m_v.FOG); // obsolete
m_sssize += sizeof(GIFReg); // obsolete
m_sssize += sizeof(m_tr.x);
m_sssize += sizeof(m_tr.y);
@ -189,24 +184,26 @@ void GSState::SetFrameSkip(int skip)
}
else
{
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 0>;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 0>;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 1>;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 1>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZF3;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZ3;
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 0>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 0>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 1>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 1>;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
UpdateVertexKick();
}
}
@ -239,18 +236,33 @@ void GSState::ResetHandlers()
m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA;
m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ;
m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<0>;
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<1>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZF3;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerXYZ3;
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
#define SetHandlerXYZ(P) \
m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2<P, 0>; \
m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2<P, 1>; \
m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2<P, 0>; \
m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2<P, 1>; \
m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2<P, 0>; \
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1>; \
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0>; \
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1>; \
SetHandlerXYZ(GS_POINTLIST);
SetHandlerXYZ(GS_LINELIST);
SetHandlerXYZ(GS_LINESTRIP);
SetHandlerXYZ(GS_TRIANGLELIST);
SetHandlerXYZ(GS_TRIANGLESTRIP);
SetHandlerXYZ(GS_TRIANGLEFAN);
SetHandlerXYZ(GS_SPRITE);
SetHandlerXYZ(GS_INVALID);
for(size_t i = 0; i < countof(m_fpGIFRegHandlers); i++)
{
m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull;
@ -260,15 +272,11 @@ void GSState::ResetHandlers()
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>;
@ -417,12 +425,12 @@ float GSState::GetFPS()
// GIFPackedRegHandler*
__forceinline void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
{
// ASSERT(0);
}
__forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
{
#if _M_SSE >= 0x301
@ -449,7 +457,7 @@ __forceinline void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT
m_v.RGBAQ.Q = m_q;
}
__forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
{
#if defined(_M_AMD64)
@ -476,19 +484,11 @@ __forceinline void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT
#endif
}
__forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
{
#if _M_SSE >= 0x200
GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
m_v.UV.u32[0] = (uint32)GSVector4i::store(v.ps32(v));
#else
m_v.UV.U = r->UV.U;
m_v.UV.V = r->UV.V;
#endif
m_v.UV = (uint32)GSVector4i::store(v.ps32(v));
#ifdef Offset_UV
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v.UV.U - 4U));
@ -496,36 +496,66 @@ __forceinline void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r
#endif
}
__forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
template<uint32 prim, uint32 adc>
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
{
if(adc)
{
// not sure what the difference is between this and XYZF2 with ADC bit set
//printf("XYZF3 X %d Y %d Z %d F %d ADC %d\n", r->XYZF2.X, r->XYZF2.Y, r->XYZF2.Z, r->XYZF2.F, r->XYZF2.ADC);
}
/*
m_v.XYZ.X = r->XYZF2.X;
m_v.XYZ.Y = r->XYZF2.Y;
m_v.XYZ.Z = r->XYZF2.Z;
m_v.FOG.F = r->XYZF2.F;
m_v.FOG = r->XYZF2.F;
*/
GSVector4i xy = GSVector4i::loadl(&r->u64[0]);
GSVector4i zf = GSVector4i::loadl(&r->u64[1]);
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::loadl(&m_v.UV));
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
(this->*m_vkf)(r->XYZF2.Skip());
m_v.m[1] = xy.upl32(zf);
VertexKick<prim>(adc ? 1 : r->XYZF2.Skip());
}
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
template<uint32 prim, uint32 adc>
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
{
if(adc)
{
// not sure what the difference is between this and XYZ2 with ADC bit set
//printf("XYZ3 X %d Y %d Z %d ADC %d\n", r->XYZ2.X, r->XYZ2.Y, r->XYZ2.Z, r->XYZ2.ADC);
}
/*
m_v.XYZ.X = r->XYZ2.X;
m_v.XYZ.Y = r->XYZ2.Y;
m_v.XYZ.Z = r->XYZ2.Z;
*/
GSVector4i xy = GSVector4i::loadl(&r->u64[0]);
GSVector4i z = GSVector4i::loadl(&r->u64[1]);
GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
(this->*m_vkf)(r->XYZ2.Skip());
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
VertexKick<prim>(adc ? 1 : r->XYZ2.Skip());
}
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
{
m_v.FOG.F = r->FOG.F;
m_v.FOG = r->FOG.F;
}
__forceinline void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r)
void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r)
{
(this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r);
}
__forceinline void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
{
}
@ -571,12 +601,12 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
ApplyPRIM(r->PRIM);
}
__forceinline void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
{
m_v.RGBAQ = (GSVector4i)r->RGBAQ;
}
__forceinline void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
{
m_v.ST = (GSVector4i)r->ST;
@ -587,16 +617,17 @@ __forceinline void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
#endif
}
__forceinline void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
{
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
m_v.UV = r->UV.u32[0] & 0x3fff3fff;
#ifdef Offset_UV
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v._UV.U - 4U));
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v._UV.V - 4U));
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v.UV.U - 4U));
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v.UV.V - 4U));
#endif
}
template<uint32 prim, uint32 adc>
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
{
/*
@ -605,21 +636,33 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
m_v.XYZ.Z = r->XYZF.Z;
m_v.FOG.F = r->XYZF.F;
*/
/*
m_v.XYZ.u32[0] = r->XYZF.u32[0];
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
m_v.FOG = r->XYZF.u32[1] >> 24;
*/
(this->*m_vkf)(0);
GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff()));
GSVector4i uvf = GSVector4i::loadl(&m_v.UV).upl32(xyzf.srl32(24).srl<4>());
m_v.m[1] = xyz.upl64(uvf);
VertexKick<prim>(adc);
}
template<uint32 prim, uint32 adc>
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
// m_v.XYZ = (GSVector4i)r->XYZ;
(this->*m_vkf)(0);
m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV);
VertexKick<prim>(adc);
}
void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
template<int i> void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
{
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
@ -674,7 +717,7 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
if(TEX0.TW > 10) TEX0.TW = 10;
if(TEX0.TH > 10) TEX0.TH = 10;
ApplyTEX0(i, TEX0);
ApplyTEX0<i>(TEX0);
if(m_env.CTXT[i].TEX1.MTBA)
{
@ -730,29 +773,7 @@ template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r)
{
m_v.FOG.u32[1] = r->FOG.u32[1];
}
void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
{
/*
m_v.XYZ.X = r->XYZF.X;
m_v.XYZ.Y = r->XYZF.Y;
m_v.XYZ.Z = r->XYZF.Z;
m_v.FOG.F = r->XYZF.F;
*/
m_v.XYZ.u32[0] = r->XYZF.u32[0];
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
(this->*m_vkf)(1);
}
void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
(this->*m_vkf)(1);
m_v.FOG = r->FOG.F;
}
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
@ -785,7 +806,7 @@ template<int i> void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
TEX0.u64 = (m_env.CTXT[i].TEX0.u64 & ~mask) | (r->u64 & mask);
ApplyTEX0(i, TEX0);
ApplyTEX0<i>(TEX0);
}
template<int i> void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
@ -1211,76 +1232,116 @@ void GSState::FlushPrim()
{
if(m_index.tail > 0)
{
if(0)
{
uint8* buff = new uint8[m_vertex.next];
memset(buff, 0, m_vertex.next);
for(size_t i = 0; i < m_index.tail; i++)
{
ASSERT(m_index.buff[i] < m_vertex.next);
buff[m_index.buff[i]] = 1;
}
size_t count = 0;
for(size_t i = 0; i < m_vertex.next; i++)
{
if(buff[i] == 0)
{
count++;
}
}
if(count > 0)
{
printf("unref %lld %d/%d\n", m_perfmon.GetFrame(), count, m_vertex.next);
}
delete [] buff;
}
uint8* buff = m_vertex.tmp;
size_t stride = m_vertex.stride;
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
switch(PRIM->PRIM)
if(tail > head)
{
case GS_LINESTRIP:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
break;
case GS_TRIANGLESTRIP:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (head + 1)], stride);
break;
case GS_TRIANGLEFAN:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (tail - 1)], stride);
break;
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID:
break;
default:
__assume(0);
switch(PRIM->PRIM)
{
case GS_LINESTRIP:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
break;
case GS_TRIANGLESTRIP:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (head + 1)], stride);
break;
case GS_TRIANGLEFAN:
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (tail - 1)], stride);
break;
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID:
break;
default:
__assume(0);
}
}
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
{
// FIXME: berserk fpsm = 27 (8H)
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
Draw();
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
}
m_vertex.head = 0;
m_vertex.tail = 0;
switch(PRIM->PRIM)
if(tail > head)
{
case GS_LINESTRIP:
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
break;
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
if(tail > head + 1) {memcpy(&m_vertex.buff[stride * 1], &buff[stride * 1], stride); m_vertex.tail++;}
break;
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID:
break;
default:
__assume(0);
switch(PRIM->PRIM)
{
case GS_LINESTRIP:
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
break;
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
if(tail > head + 1) {memcpy(&m_vertex.buff[stride * 1], &buff[stride * 1], stride); m_vertex.tail++;}
break;
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID:
break;
default:
__assume(0);
}
}
m_vertex.next = m_vertex.tail;
m_index.tail = 0;
}
else
{
m_vertex.head = 0;
m_vertex.tail = 0;
m_vertex.next = 0;
}
m_vertex.head = 0;
m_vertex.next = 0;
}
//
@ -1644,23 +1705,60 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
}
else
{
uint32 total;
switch(path.tag.FLG)
{
case GIF_FLG_PACKED:
// first try a shortcut for a very common case
// get to the start of the loop
if(path.adonly && size >= path.nloop)
if(path.reg != 0)
{
size -= path.nloop;
do
{
(this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR])(&((GIFPackedReg*)mem)->r);
(this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem);
mem += sizeof(GIFPackedReg);
size--;
}
while(--path.nloop > 0);
while(path.StepReg() && size > 0 && path.reg != 0);
}
// all data available? usually is
total = path.nloop * path.nreg;
if(size >= total)
{
size -= total;
if(path.adonly)
{
do
{
(this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR])(&((GIFPackedReg*)mem)->r);
mem += sizeof(GIFPackedReg);
}
while(--total > 0);
}
else
{
uint32 reg = 0;
do
{
(this->*m_fpGIFPackedRegHandlers[path.GetReg(reg++)])((GIFPackedReg*)mem);
mem += sizeof(GIFPackedReg);
reg = reg & ((int)(reg - path.nreg) >> 31); // resets reg back to 0 when it becomes equal to path.nreg
}
while(--total > 0);
}
path.nloop = 0;
}
else
{
@ -1678,6 +1776,8 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
case GIF_FLG_REGLIST:
// TODO: do it similar to packed operation
size *= 2;
do
@ -1848,8 +1948,9 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
WriteState(data, &m_v.RGBAQ);
WriteState(data, &m_v.ST);
WriteState(data, &m_v.UV);
WriteState(data, &m_v.XYZ);
WriteState(data, &m_v.FOG);
WriteState(data, &m_v.XYZ);
data += sizeof(GIFReg); // obsolite
WriteState(data, &m_tr.x);
WriteState(data, &m_tr.y);
WriteState(data, m_mem.m_vm8, m_mem.m_vmsize);
@ -1942,8 +2043,9 @@ int GSState::Defrost(const GSFreezeData* fd)
ReadState(&m_v.RGBAQ, data);
ReadState(&m_v.ST, data);
ReadState(&m_v.UV, data);
ReadState(&m_v.XYZ, data);
ReadState(&m_v.FOG, data);
ReadState(&m_v.XYZ, data);
data += sizeof(GIFReg); // obsolite
ReadState(&m_tr.x, data);
ReadState(&m_tr.y, data);
ReadState(m_mem.m_vm8, data, m_mem.m_vmsize);
@ -1994,8 +2096,19 @@ void GSState::SetGameCRC(uint32 crc, int options)
void GSState::UpdateVertexKick()
{
m_vkf = m_vk[PRIM->PRIM];
m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST];
uint32 prim = PRIM->PRIM;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = m_fpGIFPackedRegHandlerXYZ[prim][0];
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = m_fpGIFPackedRegHandlerXYZ[prim][1];
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = m_fpGIFPackedRegHandlerXYZ[prim][2];
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = m_fpGIFPackedRegHandlerXYZ[prim][3];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = m_fpGIFRegHandlerXYZ[prim][0];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = m_fpGIFRegHandlerXYZ[prim][1];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = m_fpGIFRegHandlerXYZ[prim][2];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3];
m_cvf = m_cv[prim][PRIM->TME][PRIM->FST];
}
void GSState::GrowVertexBuffer()
@ -2024,20 +2137,34 @@ void GSState::GrowVertexBuffer()
m_index.buff = index;
}
static uint32 s_tmp[4];
static size_t s_tmp_i = 0;
static GSVector4i s_tmp_zw_sign = GSVector4i::x80000000().sll<8>();
static GSVector4i s_zw_sign = GSVector4i::x80000000().sll<8>();
template<uint32 prim>
void GSState::VertexKick(uint32 skip)
__forceinline void GSState::VertexKick(uint32 skip)
{
s_tmp[s_tmp_i++ & 3] = m_v.XYZ.u32[0];
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
size_t next = m_vertex.next;
*(GSVertex*)&m_vertex.buff[m_vertex.stride * tail] = m_v;
size_t xy_tail = m_vertex.xy_tail;
// callers should write XYZUVF to m_v.m[1] in one piece to have this load store-forwarded, either by the cpu or the compiler when this function is inlined
GSVector4i v0(m_v.m[0]);
GSVector4i v1(m_v.m[1]);
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[m_vertex.stride * tail];
tailptr[0] = v0;
tailptr[1] = v1;
m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.add16(GSVector4i::x000f()).srl16(4)).upl16());
#ifdef _DEBUG
memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex));
#endif
m_vertex.tail = ++tail;
m_vertex.xy_tail = ++xy_tail;
size_t n = 0;
@ -2053,8 +2180,6 @@ void GSState::VertexKick(uint32 skip)
case GS_INVALID: n = 1; break;
}
m_vertex.tail = ++tail;
size_t m = tail - head;
if(m < n)
@ -2062,54 +2187,68 @@ void GSState::VertexKick(uint32 skip)
return;
}
if(skip == 0)
if(skip == 0 && (prim != GS_TRIANGLEFAN || m <= 4)) // m_vertex.xy only knows about the last 4 vertices, head could be far behind for fan
{
int p0 = (int)s_tmp[(s_tmp_i + 1) & 3];
int p1 = (int)s_tmp[(s_tmp_i + 2) & 3];
int p2 = (int)s_tmp[(s_tmp_i + 3) & 3];
int p3 = (int)s_tmp[(s_tmp_i - m) & 3];
GSVector4 v0, v1, v2, v3;
GSVector4i p(p0, p1, p2, p3);
GSVector4i v0, v1, v2, v3;
v0 = m_vertex.xy[(xy_tail + 1) & 3]; // T-3
v1 = m_vertex.xy[(xy_tail + 2) & 3]; // T-2
v2 = m_vertex.xy[(xy_tail + 3) & 3]; // T-1
v3 = m_vertex.xy[(xy_tail - m) & 3]; // H
v1 = p.upl16();
v3 = p.uph16();
v0 = v1.xyxy();
v1 = v1.zwzw();
v2 = v3.xyxy();
v3 = v3.zwzw();
GSVector4i s = m_context->scissor.dx10;
GSVector4i sm = s_tmp_zw_sign;
GSVector4 cross;
GSVector4 pmin, pmax, cross;
switch(prim)
{
case GS_POINTLIST:
skip = ((v2 - s) ^ sm).mask() & 0x8888;
pmin = v2;
pmax = v2;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
skip |= p1 == p2;
pmin = v2.min(v1);
pmax = v2.max(v1);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
skip = (((v0 - s) ^ sm) & ((v1 - s) ^ sm) & ((v2 - s) ^ sm)).mask() & 0x8888;
cross = (GSVector4(v1) - GSVector4(v0)) * (GSVector4(v2) - GSVector4(v0)).yxyx();
skip |= (cross == cross.yxyx()).mask();
pmin = v2.min(v1.min(v0));
pmax = v2.max(v1.max(v0));
break;
case GS_TRIANGLEFAN:
if(m > 4) break; // s_tmp only knows about the last 4 vertices, head could be far behind
skip = (((v1 - s) ^ sm) & ((v2 - s) ^ sm) & ((v3 - s) ^ sm)).mask() & 0x8888;
cross = (GSVector4(v1) - GSVector4(v3)) * (GSVector4(v2) - GSVector4(v3)).yxyx();
skip |= (cross == cross.yxyx()).mask();
pmin = v2.min(v1.min(v3));
pmax = v2.max(v1.max(v3));
break;
}
GSVector4 scissor = m_context->scissor.dx9;
GSVector4 test = pmax < scissor | pmin > scissor.zwxy();
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= m_nativeres ? (pmin == pmax).zwzw() : pmin == pmax;
break;
}
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
cross = (v2 - v1) * (v2 - v0).yxwz();
test |= cross == cross.yxwz();
break;
case GS_TRIANGLEFAN:
cross = (v2 - v1) * (v2 - v3).yxwz();
test |= cross == cross.yxwz();
break;
}
skip |= test.mask() & 3;
}
if(skip != 0)
@ -2171,7 +2310,7 @@ void GSState::VertexKick(uint32 skip)
m_vertex.next = head + 2;
m_index.tail += 2;
if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
if(head + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
/*if(head + 1 >= next)*/ (this->*m_cvf)(head + 1, src_index + 1); // this is always a new vertex
break;
case GS_TRIANGLELIST:
buff[0] = head + 0;
@ -2194,10 +2333,10 @@ void GSState::VertexKick(uint32 skip)
m_index.tail += 3;
if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
if(src_index + 2 >= next) (this->*m_cvf)(head + 2, src_index + 2);
/*if(src_index + 2 >= next)*/ (this->*m_cvf)(head + 2, src_index + 2); // this is always a new vertex
break;
case GS_TRIANGLEFAN:
// TODO: remove gaps
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
buff[0] = head + 0;
buff[1] = tail - 2;
buff[2] = tail - 1;
@ -2205,7 +2344,7 @@ void GSState::VertexKick(uint32 skip)
m_index.tail += 3;
if(head >= next) (this->*m_cvf)(head, head);
if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2);
if(tail - 1 >= next) (this->*m_cvf)(tail - 1, tail - 1);
/*if(tail - 1 >= next)*/ (this->*m_cvf)(tail - 1, tail - 1); // this is always a new vertex
break;
case GS_SPRITE:
buff[0] = head + 0;
@ -2341,7 +2480,21 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
}
}
r = vr.rintersect(tr);
vr = vr.rintersect(tr);
if(vr.rempty())
{
// NOTE: this can happen when texcoords are all outside the texture or clamping area is zero, but we can't
// let the texture cache update nothing, the sampler will still need a single texel from the border somewhere
// examples:
// - ICO opening menu (texture looks like the miniature silhouette of everything except the sky)
// - THPS (no visible problems)
// - NFSMW (strange rectangles on screen, might be unrelated)
vr = (vr + GSVector4i(-1, +1).xxyy()).rintersect(tr);
}
r = vr;
}
void GSState::GetAlphaMinMax()

View File

@ -42,13 +42,14 @@ class GSState : public GSAlignedClass<32>
typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* RESTRICT r);
GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
GIFPackedRegHandler m_fpGIFPackedRegHandlerXYZ[8][4];
void GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
@ -56,8 +57,9 @@ class GSState : public GSAlignedClass<32>
typedef void (GSState::*GIFRegHandler)(const GIFReg* RESTRICT r);
GIFRegHandler m_fpGIFRegHandlers[256];
GIFRegHandler m_fpGIFRegHandlerXYZ[8][4];
void ApplyTEX0(int i, GIFRegTEX0& TEX0);
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
void ApplyPRIM(const GIFRegPRIM& PRIM);
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
@ -65,13 +67,11 @@ class GSState : public GSAlignedClass<32>
void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r);
void GIFRegHandlerST(const GIFReg* RESTRICT r);
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
template<uint32 prim, uint32 adc> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZF3(const GIFReg* RESTRICT r);
void GIFRegHandlerXYZ3(const GIFReg* RESTRICT r);
void GIFRegHandlerNOP(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX1(const GIFReg* RESTRICT r);
template<int i> void GIFRegHandlerTEX2(const GIFReg* RESTRICT r);
@ -131,13 +131,25 @@ protected:
GSVertex m_v;
float m_q;
struct {uint8* buff; size_t head, tail, next, maxcount, stride, n; uint8* tmp;} m_vertex; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
struct {uint32* buff; size_t tail;} m_index;
struct
{
uint8* buff;
size_t stride;
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
GSVector4 xy[4];
size_t xy_tail;
uint8* tmp;
} m_vertex;
struct
{
uint32* buff;
size_t tail;
} m_index;
typedef void (GSState::*VertexKickPtr)(uint32 skip);
typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
VertexKickPtr m_vk[8], m_vkf;
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
#define InitConvertVertex2(T, P) \
@ -186,6 +198,7 @@ public:
bool m_framelimit;
CRC::Game m_game;
GSDump m_dump;
bool m_nativeres;
public:
GSState(GSVertexTrace* vt, size_t vertex_stride);

View File

@ -270,22 +270,29 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
uint8* dst = (uint8*)m_buff + pitch * r.top;
int block_pitch = pitch * bs.y;
r = r.srl32(3);
bs.x >>= 3;
bs.y >>= 3;
shift += 3;
if(m_repeating)
{
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 base = o->block.row[y >> 3];
uint32 base = o->block.row[y];
for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
uint32 block = base + o->block.col[x];
if(block < MAX_BLOCKS)
{
uint32 addr = i >> 3;
uint32 row = addr >> 5;
uint32 col = 1 << (addr & 31);
uint32 row = i >> 5;
uint32 col = 1 << (i & 31);
if((m_valid[row] & col) == 0)
{
@ -301,13 +308,13 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
}
else
{
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 base = o->block.row[y >> 3];
uint32 base = o->block.row[y];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + o->block.col[x >> 3];
uint32 block = base + o->block.col[x];
if(block < MAX_BLOCKS)
{

View File

@ -37,7 +37,7 @@ __aligned(struct, 32) GSVertex
GIFRegST ST;
GIFRegRGBAQ RGBAQ;
GIFRegXYZ XYZ;
union {GIFRegUV UV; GIFRegFOG FOG;}; // UV.u32[0] | FOG.u32[1]
uint32 UV, FOG;
};
__m128i m[2];

View File

@ -25,7 +25,7 @@
__aligned(struct, 32) GSVertexSW
{
GSVector4 p, t, c;
GSVector4 p, t, c, _pad;
__forceinline GSVertexSW() {}
__forceinline GSVertexSW(const GSVertexSW& v) {*this = v;}

View File

@ -618,7 +618,7 @@ VS_OUTPUT vs_main(VS_INPUT input)
}
output.c = input.c;
output.t.z = input.f.a;
output.t.z = input.f.r;
return output;
}
@ -765,7 +765,7 @@ VS_OUTPUT vs_main(VS_INPUT input)
}
output.c = input.c;
output.t.z = input.f.a;
output.t.z = input.f.b;
return output;
}