mirror of https://github.com/PCSX2/pcsx2.git
GSdx: broken frame skipping should be fixed, and a few random sw renderer optimizations.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5077 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
6a86a6520d
commit
9b8c753ead
|
@ -1111,9 +1111,9 @@ __aligned(struct, 32) GIFPath
|
|||
nreg = tag.NREG ? tag.NREG : 16;
|
||||
regs = v.uph8(v >> 4) & GSVector4i::x0f(nreg);
|
||||
nloop = tag.NLOOP;
|
||||
type = TYPE_UNKNOWN;
|
||||
if(regs.u32[0] == 0x00040102 && nreg == 3) type = TYPE_STQRGBAXYZF2;
|
||||
else if(regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1) type = TYPE_ADONLY;
|
||||
else type = TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
__forceinline uint8 GetReg()
|
||||
|
|
|
@ -932,11 +932,6 @@ GSRasterizerList::~GSRasterizerList()
|
|||
|
||||
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
|
||||
{
|
||||
if(data->syncpoint)
|
||||
{
|
||||
Sync();
|
||||
}
|
||||
|
||||
GSVector4i r = data->bbox.rintersect(data->scissor);
|
||||
|
||||
ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048);
|
||||
|
|
|
@ -39,7 +39,6 @@ public:
|
|||
int vertex_count;
|
||||
uint32* index;
|
||||
int index_count;
|
||||
bool syncpoint;
|
||||
uint64 frame;
|
||||
|
||||
GSRasterizerData()
|
||||
|
@ -51,7 +50,6 @@ public:
|
|||
, vertex_count(0)
|
||||
, index(NULL)
|
||||
, index_count(0)
|
||||
, syncpoint(false)
|
||||
, frame(0)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -43,6 +43,17 @@ GSRendererSW::GSRendererSW(int threads)
|
|||
|
||||
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
|
||||
memset(m_tex_pages, 0, sizeof(m_tex_pages));
|
||||
|
||||
#define InitCVB(P) \
|
||||
m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer<P, 0, 0>; \
|
||||
m_cvb[P][0][1] = &GSRendererSW::ConvertVertexBuffer<P, 0, 1>; \
|
||||
m_cvb[P][1][0] = &GSRendererSW::ConvertVertexBuffer<P, 1, 0>; \
|
||||
m_cvb[P][1][1] = &GSRendererSW::ConvertVertexBuffer<P, 1, 1>; \
|
||||
|
||||
InitCVB(GS_POINT_CLASS);
|
||||
InitCVB(GS_LINE_CLASS);
|
||||
InitCVB(GS_TRIANGLE_CLASS);
|
||||
InitCVB(GS_SPRITE_CLASS);
|
||||
}
|
||||
|
||||
GSRendererSW::~GSRendererSW()
|
||||
|
@ -72,7 +83,7 @@ void GSRendererSW::VSync(int field)
|
|||
{
|
||||
Sync(0); // IncAge might delete a cached texture in use
|
||||
|
||||
if(LOG)
|
||||
if(0) if(LOG)
|
||||
{
|
||||
fprintf(s_fp, "%lld\n", m_perfmon.GetFrame());
|
||||
|
||||
|
@ -230,6 +241,74 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
|||
return m_texture[i];
|
||||
}
|
||||
|
||||
template<uint32 primclass, uint32 tme, uint32 fst>
|
||||
void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
||||
{
|
||||
GSVector4i o = (GSVector4i)m_context->XYOFFSET;
|
||||
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0);
|
||||
|
||||
for(size_t i = 0; i < m_vertex.next; i++, src++, dst++)
|
||||
{
|
||||
GSVector4 stcq = GSVector4::load<true>(&src->m[0]); // s t rgba q
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i xyzuvf(src->m[1]);
|
||||
|
||||
GSVector4i xy = xyzuvf.upl16() - o;
|
||||
GSVector4i zf = xyzuvf.ywww().min_u32(GSVector4i::xffffff00());
|
||||
|
||||
#else
|
||||
|
||||
uint32 z = src->XYZ.Z;
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)src->XYZ.u32[0]).upl16() - o;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), src->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
#endif
|
||||
|
||||
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
|
||||
|
||||
GSVector4 t;
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(fst)
|
||||
{
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
t = GSVector4(xyzuvf.uph16() << (16 - 4));
|
||||
|
||||
#else
|
||||
|
||||
t = GSVector4(GSVector4i::load(src->UV).upl16() << (16 - 4));
|
||||
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
t = stcq.xyww() * tsize;
|
||||
}
|
||||
}
|
||||
|
||||
if(primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
t = t.insert<1, 3>(GSVector4::cast(xyzuvf));
|
||||
|
||||
#else
|
||||
|
||||
t = t.insert<0, 3>(GSVector4::cast(GSVector4i::load(z)));
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
dst->t = t;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::Draw()
|
||||
{
|
||||
SharedData* sd = new SharedData(this);
|
||||
|
@ -243,62 +322,10 @@ void GSRendererSW::Draw()
|
|||
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next);
|
||||
sd->index_count = m_index.tail;
|
||||
|
||||
{
|
||||
// TODO: template, JIT
|
||||
|
||||
GSVertex* RESTRICT s = m_vertex.buff;
|
||||
GSVertexSW* RESTRICT d = sd->vertex;
|
||||
|
||||
GSVector4i o = (GSVector4i)m_context->XYOFFSET;
|
||||
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0);
|
||||
|
||||
for(size_t i = 0; i < m_vertex.next; i++, s++, d++)
|
||||
{
|
||||
// TODO: load xyzuvf in one piece
|
||||
|
||||
uint32 z = s->XYZ.Z;
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - o;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
GSVector4 p, t, c;
|
||||
|
||||
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
|
||||
GSVector4 stcq = GSVector4::load<true>(&s->m[0]); // s t rgba q
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
if(PRIM->FST)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
t = stcq.xyww() * tsize;
|
||||
}
|
||||
}
|
||||
|
||||
c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
|
||||
|
||||
d->p = p;
|
||||
d->c = c;
|
||||
d->t = t;
|
||||
|
||||
if(sd->primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
d->t.u32[3] = z; // TODO: store this to the 4th unused GSVector4?
|
||||
}
|
||||
}
|
||||
}
|
||||
(this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST])(sd->vertex, m_vertex.buff, m_vertex.next);
|
||||
|
||||
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
// TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs
|
||||
// TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true;
|
||||
// TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers
|
||||
// TODO: m_tc->InvalidatePages must be called after texture->Update, move that inside GSRasterizerData::Update too
|
||||
|
||||
if(!GetScanlineGlobalData(sd)) return;
|
||||
|
||||
//
|
||||
|
@ -316,33 +343,7 @@ void GSRendererSW::Draw()
|
|||
sd->bbox = bbox;
|
||||
sd->frame = m_perfmon.GetFrame();
|
||||
|
||||
//
|
||||
|
||||
uint32* fb_pages = NULL;
|
||||
uint32* zb_pages = NULL;
|
||||
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
if(gd.sel.fwrite)
|
||||
{
|
||||
fb_pages = context->offset.fb->GetPages(r);
|
||||
|
||||
m_tc->InvalidatePages(fb_pages, context->offset.fb->psm);
|
||||
}
|
||||
|
||||
if(gd.sel.zwrite)
|
||||
{
|
||||
zb_pages = context->offset.zb->GetPages(r);
|
||||
|
||||
m_tc->InvalidatePages(zb_pages, context->offset.zb->psm);
|
||||
}
|
||||
|
||||
if(CheckTargetPages(fb_pages, zb_pages, r))
|
||||
{
|
||||
sd->syncpoint = true;
|
||||
}
|
||||
|
||||
sd->UseTargetPages(fb_pages, zb_pages);
|
||||
CheckDependencies(sd);
|
||||
|
||||
if(LOG) {fprintf(s_fp, "queue %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
|
||||
m_context->FRAME.Block(), m_context->FRAME.PSM,
|
||||
|
@ -505,19 +506,6 @@ void GSRendererSW::UsePages(const uint32* pages, int type)
|
|||
}
|
||||
else
|
||||
{
|
||||
if(!m_rl->IsSynced())
|
||||
{
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
if(m_fzb_pages[*p]) // currently being drawn to? => sync
|
||||
{
|
||||
Sync(7);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
ASSERT(m_tex_pages[*p] < SHRT_MAX);
|
||||
|
@ -549,6 +537,71 @@ void GSRendererSW::ReleasePages(const uint32* pages, int type)
|
|||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::CheckDependencies(SharedData* sd)
|
||||
{
|
||||
GSVector4i r = sd->bbox.rintersect(sd->scissor);
|
||||
|
||||
uint32* fb_pages = NULL;
|
||||
uint32* zb_pages = NULL;
|
||||
|
||||
if(sd->global.sel.fwrite)
|
||||
{
|
||||
fb_pages = m_context->offset.fb->GetPages(r);
|
||||
}
|
||||
|
||||
if(sd->global.sel.zwrite)
|
||||
{
|
||||
zb_pages = m_context->offset.zb->GetPages(r);
|
||||
}
|
||||
|
||||
// check if there is an overlap between this and previous targets
|
||||
|
||||
bool target_syncpoint = false;
|
||||
|
||||
if(CheckTargetPages(fb_pages, zb_pages, r))
|
||||
{
|
||||
target_syncpoint = true;
|
||||
}
|
||||
|
||||
// check if the texture is not part of a target currently in use
|
||||
|
||||
bool source_syncpoint = false;
|
||||
|
||||
if(CheckSourcePages(sd))
|
||||
{
|
||||
source_syncpoint = true;
|
||||
target_syncpoint = false;
|
||||
}
|
||||
|
||||
// addref target pages
|
||||
|
||||
sd->UseTargetPages(fb_pages, zb_pages);
|
||||
|
||||
// addref texture pages and update previously invalidated parts
|
||||
|
||||
if(source_syncpoint)
|
||||
{
|
||||
Sync(7);
|
||||
}
|
||||
|
||||
sd->UseSourcePages();
|
||||
|
||||
if(sd->global.sel.fwrite)
|
||||
{
|
||||
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
|
||||
}
|
||||
|
||||
if(sd->global.sel.zwrite)
|
||||
{
|
||||
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
|
||||
}
|
||||
|
||||
if(target_syncpoint)
|
||||
{
|
||||
Sync(8);
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r)
|
||||
{
|
||||
bool synced = m_rl->IsSynced();
|
||||
|
@ -699,6 +752,31 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
|
|||
return false;
|
||||
}
|
||||
|
||||
bool GSRendererSW::CheckSourcePages(SharedData* sd)
|
||||
{
|
||||
if(!m_rl->IsSynced())
|
||||
{
|
||||
for(size_t i = 0; sd->m_tex[i].t != NULL; i++)
|
||||
{
|
||||
sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages);
|
||||
|
||||
uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n;
|
||||
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
|
||||
|
||||
if(m_fzb_pages[*p]) // currently being drawn to? => sync
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "GSTextureSW.h"
|
||||
|
||||
bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||
|
@ -819,29 +897,12 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
data->UseSourcePages(t, 0);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, gd.sel.ltf);
|
||||
|
||||
if(!t->Update(r)) {ASSERT(0); return false;}
|
||||
data->SetSource(t, r, 0);
|
||||
|
||||
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex32_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
|
||||
|
||||
t->Save(s);
|
||||
}
|
||||
}
|
||||
|
||||
gd.tex[0] = t->m_buff;
|
||||
gd.sel.tw = t->m_tw - 3;
|
||||
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
|
||||
|
@ -972,38 +1033,11 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
|
||||
data->UseSourcePages(t, i);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
|
||||
|
||||
if(!t->Update(r)) {ASSERT(0); return false;}
|
||||
|
||||
gd.tex[i] = t->m_buff;
|
||||
|
||||
if(0)
|
||||
//if(context->TEX0.TH > context->TEX0.TW)
|
||||
//if(s_n >= s_saven && s_n < s_saven + 3)
|
||||
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
|
||||
{
|
||||
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
|
||||
/*
|
||||
GIFRegTEX0 TEX0 = MIP_TEX0;
|
||||
TEX0.TBP0 = context->TEX0.TBP0;
|
||||
do
|
||||
{
|
||||
TEX0.TBP0++;
|
||||
const GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA, r, gd.sel.tw + 3);
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, TEX0.TBP0, i));
|
||||
}
|
||||
while(TEX0.TBP0 < 0x3fff);
|
||||
*/
|
||||
|
||||
int i = 0;
|
||||
}
|
||||
|
||||
data->SetSource(t, r, i);
|
||||
}
|
||||
|
||||
s_counter++;
|
||||
|
@ -1232,7 +1266,7 @@ GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
|
|||
, m_zb_pages(NULL)
|
||||
, m_using_pages(false)
|
||||
{
|
||||
m_tex_pages[0] = NULL;
|
||||
m_tex[0].t = NULL;
|
||||
|
||||
global.sel.key = 0;
|
||||
|
||||
|
@ -1258,9 +1292,9 @@ GSRendererSW::SharedData::~SharedData()
|
|||
delete [] m_fb_pages;
|
||||
delete [] m_zb_pages;
|
||||
|
||||
for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++)
|
||||
for(size_t i = 0; m_tex[i].t != NULL; i++)
|
||||
{
|
||||
m_parent->ReleasePages(m_tex_pages[i], 2);
|
||||
m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2);
|
||||
}
|
||||
|
||||
if(global.clut) _aligned_free(global.clut);
|
||||
|
@ -1287,14 +1321,41 @@ void GSRendererSW::SharedData::UseTargetPages(const uint32* fb_pages, const uint
|
|||
m_using_pages = true;
|
||||
}
|
||||
|
||||
void GSRendererSW::SharedData::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
|
||||
void GSRendererSW::SharedData::SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level)
|
||||
{
|
||||
ASSERT(m_tex_pages[level] == NULL);
|
||||
ASSERT(m_tex[level].t == NULL);
|
||||
|
||||
const uint32* pages = t->m_pages.n;
|
||||
m_tex[level].t = t;
|
||||
m_tex[level].r = r;
|
||||
|
||||
m_tex_pages[level] = pages;
|
||||
m_tex_pages[level + 1] = NULL;
|
||||
|
||||
m_parent->UsePages(pages, 2);
|
||||
m_tex[level + 1].t = NULL;
|
||||
}
|
||||
|
||||
void GSRendererSW::SharedData::UseSourcePages()
|
||||
{
|
||||
for(size_t i = 0; m_tex[i].t != NULL; i++)
|
||||
{
|
||||
m_parent->UsePages(m_tex[i].t->m_pages.n, 2);
|
||||
|
||||
m_tex[i].t->Update(m_tex[i].r); // TODO: check return value, false (out-of-memory) then disable texturing
|
||||
|
||||
global.tex[i] = m_tex[i].t->m_buff;
|
||||
|
||||
// TODO
|
||||
/*
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex%d_%05x_%d.bmp", s_n, frame, i, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
|
||||
|
||||
sd->m_tex[i].t->Save(s);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,20 +29,30 @@ class GSRendererSW : public GSRenderer
|
|||
{
|
||||
class SharedData : public GSDrawScanline::SharedData
|
||||
{
|
||||
public:
|
||||
GSRendererSW* m_parent;
|
||||
const uint32* m_fb_pages;
|
||||
const uint32* m_zb_pages;
|
||||
const uint32* m_tex_pages[7 + 1]; // NULL terminated
|
||||
bool m_using_pages;
|
||||
__aligned(struct, 16) {GSVector4i r; GSTextureCacheSW::Texture* t;} m_tex[7 + 1]; // NULL terminated
|
||||
|
||||
public:
|
||||
SharedData(GSRendererSW* parent);
|
||||
virtual ~SharedData();
|
||||
|
||||
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
|
||||
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
|
||||
|
||||
void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level);
|
||||
void UseSourcePages();
|
||||
};
|
||||
|
||||
typedef void (GSRendererSW::*ConvertVertexBufferPtr)(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
|
||||
ConvertVertexBufferPtr m_cvb[4][2][2];
|
||||
|
||||
template<uint32 primclass, uint32 tme, uint32 fst>
|
||||
void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
|
||||
protected:
|
||||
IRasterizer* m_rl;
|
||||
GSTextureCacheSW* m_tc;
|
||||
|
@ -67,7 +77,10 @@ protected:
|
|||
|
||||
void UsePages(const uint32* pages, int type);
|
||||
void ReleasePages(const uint32* pages, int type);
|
||||
|
||||
void CheckDependencies(SharedData* sd);
|
||||
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
|
||||
bool CheckSourcePages(SharedData* sd);
|
||||
|
||||
bool GetScanlineGlobalData(SharedData* data);
|
||||
|
||||
|
|
|
@ -40,10 +40,11 @@ GSState::GSState()
|
|||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
|
||||
memset(&m_v, 0, sizeof(m_v));
|
||||
m_q = 1.0f;
|
||||
memset(&m_vertex, 0, sizeof(m_vertex));
|
||||
memset(&m_index, 0, sizeof(m_index));
|
||||
|
||||
m_v.RGBAQ.Q = 1.0f;
|
||||
|
||||
GrowVertexBuffer();
|
||||
|
||||
m_sssize = 0;
|
||||
|
@ -94,7 +95,7 @@ GSState::GSState()
|
|||
m_sssize += sizeof(m_tr.y);
|
||||
m_sssize += m_mem.m_vmsize;
|
||||
m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * countof(m_path);
|
||||
m_sssize += sizeof(m_q);
|
||||
m_sssize += sizeof(float); // obsolite
|
||||
|
||||
PRIM = &m_env.PRIM;
|
||||
// CSR->rREV = 0x20;
|
||||
|
@ -156,44 +157,18 @@ void GSState::SetFrameSkip(int skip)
|
|||
{
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP;
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP;
|
||||
|
||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
|
||||
|
||||
UpdateVertexKick();
|
||||
}
|
||||
}
|
||||
|
@ -442,22 +417,13 @@ void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
|
|||
|
||||
m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v);
|
||||
|
||||
#elif _M_SSE >= 0x200
|
||||
#else
|
||||
|
||||
GSVector4i v = GSVector4i::load<false>(r) & GSVector4i::x000000ff();
|
||||
|
||||
m_v.RGBAQ.u32[0] = v.rgba32();
|
||||
|
||||
#else
|
||||
|
||||
m_v.RGBAQ.R = r->RGBA.R;
|
||||
m_v.RGBAQ.G = r->RGBA.G;
|
||||
m_v.RGBAQ.B = r->RGBA.B;
|
||||
m_v.RGBAQ.A = r->RGBA.A;
|
||||
|
||||
#endif
|
||||
|
||||
m_v.RGBAQ.Q = m_q;
|
||||
}
|
||||
|
||||
void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
|
||||
|
@ -466,19 +432,14 @@ void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
|
|||
|
||||
m_v.ST.u64 = r->u64[0];
|
||||
|
||||
#elif _M_SSE >= 0x200
|
||||
#else
|
||||
|
||||
GSVector4i v = GSVector4i::loadl(r);
|
||||
GSVector4i::storel(&m_v.ST.u64, v);
|
||||
|
||||
#else
|
||||
|
||||
m_v.ST.S = r->STQ.S;
|
||||
m_v.ST.T = r->STQ.T;
|
||||
|
||||
#endif
|
||||
|
||||
m_q = r->STQ.Q;
|
||||
m_v.RGBAQ.Q = r->STQ.Q;
|
||||
|
||||
#ifdef Offset_ST
|
||||
GIFRegTEX0 TEX0 = m_context->TEX0;
|
||||
|
@ -562,21 +523,23 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, ui
|
|||
GSVector4i q = GSVector4i::loadl(&r[0].u64[1]);
|
||||
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
|
||||
|
||||
m_v.m[0] = st.upl64(rgba.upl32(q));
|
||||
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
|
||||
|
||||
GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]);
|
||||
GSVector4i zf = GSVector4i::loadl(&r[2].u64[1]);
|
||||
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::loadl(&m_v.UV));
|
||||
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
|
||||
|
||||
m_v.m[1] = xy.upl32(zf);
|
||||
m_v.m[1] = xy.upl32(zf); // TODO: only store the last one
|
||||
|
||||
VertexKick<prim>(r[2].XYZF2.Skip());
|
||||
|
||||
r += 3;
|
||||
}
|
||||
}
|
||||
|
||||
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
|
||||
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size)
|
||||
{
|
||||
}
|
||||
|
||||
// GIFRegHandler*
|
||||
|
@ -719,8 +682,10 @@ template<int i> void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
|
|||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = TEX0.CSM;
|
||||
|
||||
GSVector4i r = GSVector4i::zero();
|
||||
GSVector4i r;
|
||||
|
||||
r.left = 0;
|
||||
r.top = 0;
|
||||
r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x;
|
||||
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y;
|
||||
|
||||
|
@ -1644,7 +1609,7 @@ void GSState::SoftReset(uint32 mask)
|
|||
|
||||
m_env.TRXDIR.XDIR = 3; //-1 ; set it to invalid value
|
||||
|
||||
m_q = 1;
|
||||
m_v.RGBAQ.Q = 1.0f;
|
||||
}
|
||||
|
||||
void GSState::ReadFIFO(uint8* mem, int size)
|
||||
|
@ -1687,7 +1652,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
|
||||
if(path.nloop > 0) // eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and values other than the EOP field are disregarded."
|
||||
{
|
||||
m_q = 1.0f;
|
||||
m_v.RGBAQ.Q = 1.0f;
|
||||
|
||||
// ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts
|
||||
|
||||
|
@ -1980,7 +1945,7 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
|
|||
WriteState(data, &m_path[i].reg);
|
||||
}
|
||||
|
||||
WriteState(data, &m_q);
|
||||
data += sizeof(float); // obsolite
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2076,7 +2041,7 @@ int GSState::Defrost(const GSFreezeData* fd)
|
|||
m_path[i].SetTag(&m_path[i].tag); // expand regs
|
||||
}
|
||||
|
||||
ReadState(&m_q, data);
|
||||
data += sizeof(float); // obsolite
|
||||
|
||||
PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM;
|
||||
|
||||
|
|
|
@ -65,6 +65,7 @@ class GSState : public GSAlignedClass<32>
|
|||
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8];
|
||||
|
||||
template<uint32 prim> void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size);
|
||||
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size);
|
||||
|
||||
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
|
||||
void ApplyPRIM(const GIFRegPRIM& PRIM);
|
||||
|
@ -137,7 +138,6 @@ protected:
|
|||
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
|
||||
|
||||
GSVertex m_v;
|
||||
float m_q;
|
||||
GSVector4 m_scissor;
|
||||
uint32 m_ofxy;
|
||||
|
||||
|
|
|
@ -178,6 +178,11 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
|||
m_TEX0 = TEX0;
|
||||
m_TEXA = TEXA;
|
||||
|
||||
if(m_tw == 0)
|
||||
{
|
||||
m_tw = std::max<int>(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
|
||||
}
|
||||
|
||||
memset(m_valid, 0, sizeof(m_valid));
|
||||
memset(m_pages.bm, 0, sizeof(m_pages.bm));
|
||||
|
||||
|
@ -239,17 +244,6 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
|||
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
uint32 tw0 = std::max<int>(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
|
||||
|
||||
if(m_tw == 0)
|
||||
{
|
||||
m_tw = tw0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(m_tw >= tw0);
|
||||
}
|
||||
|
||||
uint32 pitch = (1 << m_tw) << shift;
|
||||
|
||||
m_buff = _aligned_malloc(pitch * th * 4, 32);
|
||||
|
|
|
@ -2915,6 +2915,11 @@ public:
|
|||
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
|
||||
}
|
||||
|
||||
__forceinline static void storent(void* p, const GSVector4& v)
|
||||
{
|
||||
_mm_stream_ps((float*)p, v.m);
|
||||
}
|
||||
|
||||
__forceinline static void storel(void* p, const GSVector4& v)
|
||||
{
|
||||
_mm_store_sd((double*)p, _mm_castps_pd(v.m));
|
||||
|
|
Loading…
Reference in New Issue