GSdx: broken frame skipping should be fixed, and a few random sw renderer optimizations.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5077 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-21 04:44:04 +00:00
parent 6a86a6520d
commit 9b8c753ead
9 changed files with 258 additions and 227 deletions

View File

@ -1111,9 +1111,9 @@ __aligned(struct, 32) GIFPath
nreg = tag.NREG ? tag.NREG : 16;
regs = v.uph8(v >> 4) & GSVector4i::x0f(nreg);
nloop = tag.NLOOP;
type = TYPE_UNKNOWN;
if(regs.u32[0] == 0x00040102 && nreg == 3) type = TYPE_STQRGBAXYZF2;
else if(regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1) type = TYPE_ADONLY;
else type = TYPE_UNKNOWN;
}
__forceinline uint8 GetReg()

View File

@ -932,11 +932,6 @@ GSRasterizerList::~GSRasterizerList()
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
{
if(data->syncpoint)
{
Sync();
}
GSVector4i r = data->bbox.rintersect(data->scissor);
ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048);

View File

@ -39,7 +39,6 @@ public:
int vertex_count;
uint32* index;
int index_count;
bool syncpoint;
uint64 frame;
GSRasterizerData()
@ -51,7 +50,6 @@ public:
, vertex_count(0)
, index(NULL)
, index_count(0)
, syncpoint(false)
, frame(0)
{
}

View File

@ -43,6 +43,17 @@ GSRendererSW::GSRendererSW(int threads)
memset(m_fzb_pages, 0, sizeof(m_fzb_pages));
memset(m_tex_pages, 0, sizeof(m_tex_pages));
#define InitCVB(P) \
m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer<P, 0, 0>; \
m_cvb[P][0][1] = &GSRendererSW::ConvertVertexBuffer<P, 0, 1>; \
m_cvb[P][1][0] = &GSRendererSW::ConvertVertexBuffer<P, 1, 0>; \
m_cvb[P][1][1] = &GSRendererSW::ConvertVertexBuffer<P, 1, 1>; \
InitCVB(GS_POINT_CLASS);
InitCVB(GS_LINE_CLASS);
InitCVB(GS_TRIANGLE_CLASS);
InitCVB(GS_SPRITE_CLASS);
}
GSRendererSW::~GSRendererSW()
@ -72,7 +83,7 @@ void GSRendererSW::VSync(int field)
{
Sync(0); // IncAge might delete a cached texture in use
if(LOG)
if(0) if(LOG)
{
fprintf(s_fp, "%lld\n", m_perfmon.GetFrame());
@ -230,6 +241,74 @@ GSTexture* GSRendererSW::GetOutput(int i)
return m_texture[i];
}
template<uint32 primclass, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
{
GSVector4i o = (GSVector4i)m_context->XYOFFSET;
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0);
for(size_t i = 0; i < m_vertex.next; i++, src++, dst++)
{
GSVector4 stcq = GSVector4::load<true>(&src->m[0]); // s t rgba q
#if _M_SSE >= 0x401
GSVector4i xyzuvf(src->m[1]);
GSVector4i xy = xyzuvf.upl16() - o;
GSVector4i zf = xyzuvf.ywww().min_u32(GSVector4i::xffffff00());
#else
uint32 z = src->XYZ.Z;
GSVector4i xy = GSVector4i::load((int)src->XYZ.u32[0]).upl16() - o;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), src->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
#endif
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
GSVector4 t;
if(tme)
{
if(fst)
{
#if _M_SSE >= 0x401
t = GSVector4(xyzuvf.uph16() << (16 - 4));
#else
t = GSVector4(GSVector4i::load(src->UV).upl16() << (16 - 4));
#endif
}
else
{
t = stcq.xyww() * tsize;
}
}
if(primclass == GS_SPRITE_CLASS)
{
#if _M_SSE >= 0x401
t = t.insert<1, 3>(GSVector4::cast(xyzuvf));
#else
t = t.insert<0, 3>(GSVector4::cast(GSVector4i::load(z)));
#endif
}
dst->t = t;
}
}
void GSRendererSW::Draw()
{
SharedData* sd = new SharedData(this);
@ -243,62 +322,10 @@ void GSRendererSW::Draw()
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next);
sd->index_count = m_index.tail;
{
// TODO: template, JIT
GSVertex* RESTRICT s = m_vertex.buff;
GSVertexSW* RESTRICT d = sd->vertex;
GSVector4i o = (GSVector4i)m_context->XYOFFSET;
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0);
for(size_t i = 0; i < m_vertex.next; i++, s++, d++)
{
// TODO: load xyzuvf in one piece
uint32 z = s->XYZ.Z;
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - o;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
GSVector4 p, t, c;
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
GSVector4 stcq = GSVector4::load<true>(&s->m[0]); // s t rgba q
if(PRIM->TME)
{
if(PRIM->FST)
{
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
}
else
{
t = stcq.xyww() * tsize;
}
}
c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
d->p = p;
d->c = c;
d->t = t;
if(sd->primclass == GS_SPRITE_CLASS)
{
d->t.u32[3] = z; // TODO: store this to the 4th unused GSVector4?
}
}
}
(this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST])(sd->vertex, m_vertex.buff, m_vertex.next);
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
// TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs
// TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true;
// TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers
// TODO: m_tc->InvalidatePages must be called after texture->Update, move that inside GSRasterizerData::Update too
if(!GetScanlineGlobalData(sd)) return;
//
@ -316,33 +343,7 @@ void GSRendererSW::Draw()
sd->bbox = bbox;
sd->frame = m_perfmon.GetFrame();
//
uint32* fb_pages = NULL;
uint32* zb_pages = NULL;
GSVector4i r = bbox.rintersect(scissor);
if(gd.sel.fwrite)
{
fb_pages = context->offset.fb->GetPages(r);
m_tc->InvalidatePages(fb_pages, context->offset.fb->psm);
}
if(gd.sel.zwrite)
{
zb_pages = context->offset.zb->GetPages(r);
m_tc->InvalidatePages(zb_pages, context->offset.zb->psm);
}
if(CheckTargetPages(fb_pages, zb_pages, r))
{
sd->syncpoint = true;
}
sd->UseTargetPages(fb_pages, zb_pages);
CheckDependencies(sd);
if(LOG) {fprintf(s_fp, "queue %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
m_context->FRAME.Block(), m_context->FRAME.PSM,
@ -505,19 +506,6 @@ void GSRendererSW::UsePages(const uint32* pages, int type)
}
else
{
if(!m_rl->IsSynced())
{
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
if(m_fzb_pages[*p]) // currently being drawn to? => sync
{
Sync(7);
break;
}
}
}
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*p] < SHRT_MAX);
@ -549,6 +537,71 @@ void GSRendererSW::ReleasePages(const uint32* pages, int type)
}
}
void GSRendererSW::CheckDependencies(SharedData* sd)
{
GSVector4i r = sd->bbox.rintersect(sd->scissor);
uint32* fb_pages = NULL;
uint32* zb_pages = NULL;
if(sd->global.sel.fwrite)
{
fb_pages = m_context->offset.fb->GetPages(r);
}
if(sd->global.sel.zwrite)
{
zb_pages = m_context->offset.zb->GetPages(r);
}
// check if there is an overlap between this and previous targets
bool target_syncpoint = false;
if(CheckTargetPages(fb_pages, zb_pages, r))
{
target_syncpoint = true;
}
// check if the texture is not part of a target currently in use
bool source_syncpoint = false;
if(CheckSourcePages(sd))
{
source_syncpoint = true;
target_syncpoint = false;
}
// addref target pages
sd->UseTargetPages(fb_pages, zb_pages);
// addref texture pages and update previously invalidated parts
if(source_syncpoint)
{
Sync(7);
}
sd->UseSourcePages();
if(sd->global.sel.fwrite)
{
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
}
if(sd->global.sel.zwrite)
{
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
}
if(target_syncpoint)
{
Sync(8);
}
}
bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r)
{
bool synced = m_rl->IsSynced();
@ -699,6 +752,31 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag
return false;
}
bool GSRendererSW::CheckSourcePages(SharedData* sd)
{
if(!m_rl->IsSynced())
{
for(size_t i = 0; sd->m_tex[i].t != NULL; i++)
{
sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages);
uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n;
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
if(m_fzb_pages[*p]) // currently being drawn to? => sync
{
return true;
}
}
}
}
return false;
}
#include "GSTextureSW.h"
bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
@ -819,29 +897,12 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
if(t == NULL) {ASSERT(0); return false;}
data->UseSourcePages(t, 0);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, gd.sel.ltf);
if(!t->Update(r)) {ASSERT(0); return false;}
data->SetSource(t, r, 0);
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp1\\_%05d_f%lld_tex32_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
t->Save(s);
}
}
gd.tex[0] = t->m_buff;
gd.sel.tw = t->m_tw - 3;
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
@ -972,38 +1033,11 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
if(t == NULL) {ASSERT(0); return false;}
data->UseSourcePages(t, i);
GSVector4i r;
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
if(!t->Update(r)) {ASSERT(0); return false;}
gd.tex[i] = t->m_buff;
if(0)
//if(context->TEX0.TH > context->TEX0.TW)
//if(s_n >= s_saven && s_n < s_saven + 3)
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
{
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
/*
GIFRegTEX0 TEX0 = MIP_TEX0;
TEX0.TBP0 = context->TEX0.TBP0;
do
{
TEX0.TBP0++;
const GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA, r, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return false;}
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, TEX0.TBP0, i));
}
while(TEX0.TBP0 < 0x3fff);
*/
int i = 0;
}
data->SetSource(t, r, i);
}
s_counter++;
@ -1232,7 +1266,7 @@ GSRendererSW::SharedData::SharedData(GSRendererSW* parent)
, m_zb_pages(NULL)
, m_using_pages(false)
{
m_tex_pages[0] = NULL;
m_tex[0].t = NULL;
global.sel.key = 0;
@ -1258,9 +1292,9 @@ GSRendererSW::SharedData::~SharedData()
delete [] m_fb_pages;
delete [] m_zb_pages;
for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++)
for(size_t i = 0; m_tex[i].t != NULL; i++)
{
m_parent->ReleasePages(m_tex_pages[i], 2);
m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2);
}
if(global.clut) _aligned_free(global.clut);
@ -1287,14 +1321,41 @@ void GSRendererSW::SharedData::UseTargetPages(const uint32* fb_pages, const uint
m_using_pages = true;
}
void GSRendererSW::SharedData::UseSourcePages(GSTextureCacheSW::Texture* t, int level)
void GSRendererSW::SharedData::SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level)
{
ASSERT(m_tex_pages[level] == NULL);
ASSERT(m_tex[level].t == NULL);
const uint32* pages = t->m_pages.n;
m_tex[level].t = t;
m_tex[level].r = r;
m_tex_pages[level] = pages;
m_tex_pages[level + 1] = NULL;
m_parent->UsePages(pages, 2);
m_tex[level + 1].t = NULL;
}
void GSRendererSW::SharedData::UseSourcePages()
{
for(size_t i = 0; m_tex[i].t != NULL; i++)
{
m_parent->UsePages(m_tex[i].t->m_pages.n, 2);
m_tex[i].t->Update(m_tex[i].r); // TODO: check return value, false (out-of-memory) then disable texturing
global.tex[i] = m_tex[i].t->m_buff;
// TODO
/*
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp1\\_%05d_f%lld_tex%d_%05x_%d.bmp", s_n, frame, i, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
sd->m_tex[i].t->Save(s);
}
}
*/
}
}

View File

@ -29,20 +29,30 @@ class GSRendererSW : public GSRenderer
{
class SharedData : public GSDrawScanline::SharedData
{
public:
GSRendererSW* m_parent;
const uint32* m_fb_pages;
const uint32* m_zb_pages;
const uint32* m_tex_pages[7 + 1]; // NULL terminated
bool m_using_pages;
__aligned(struct, 16) {GSVector4i r; GSTextureCacheSW::Texture* t;} m_tex[7 + 1]; // NULL terminated
public:
SharedData(GSRendererSW* parent);
virtual ~SharedData();
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level);
void UseSourcePages();
};
typedef void (GSRendererSW::*ConvertVertexBufferPtr)(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
ConvertVertexBufferPtr m_cvb[4][2][2];
template<uint32 primclass, uint32 tme, uint32 fst>
void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
protected:
IRasterizer* m_rl;
GSTextureCacheSW* m_tc;
@ -67,7 +77,10 @@ protected:
void UsePages(const uint32* pages, int type);
void ReleasePages(const uint32* pages, int type);
void CheckDependencies(SharedData* sd);
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
bool CheckSourcePages(SharedData* sd);
bool GetScanlineGlobalData(SharedData* data);

View File

@ -40,10 +40,11 @@ GSState::GSState()
m_nativeres = !!theApp.GetConfig("nativeres", 0);
memset(&m_v, 0, sizeof(m_v));
m_q = 1.0f;
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
m_v.RGBAQ.Q = 1.0f;
GrowVertexBuffer();
m_sssize = 0;
@ -94,7 +95,7 @@ GSState::GSState()
m_sssize += sizeof(m_tr.y);
m_sssize += m_mem.m_vmsize;
m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * countof(m_path);
m_sssize += sizeof(m_q);
m_sssize += sizeof(float); // obsolite
PRIM = &m_env.PRIM;
// CSR->rREV = 0x20;
@ -156,44 +157,18 @@ void GSState::SetFrameSkip(int skip)
{
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP;
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = &GSState::GIFPackedRegHandlerNOP;
}
else
{
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 0>;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 0>;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 1>;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 1>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 0>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 0>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 1>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 1>;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
UpdateVertexKick();
}
}
@ -442,22 +417,13 @@ void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v);
#elif _M_SSE >= 0x200
#else
GSVector4i v = GSVector4i::load<false>(r) & GSVector4i::x000000ff();
m_v.RGBAQ.u32[0] = v.rgba32();
#else
m_v.RGBAQ.R = r->RGBA.R;
m_v.RGBAQ.G = r->RGBA.G;
m_v.RGBAQ.B = r->RGBA.B;
m_v.RGBAQ.A = r->RGBA.A;
#endif
m_v.RGBAQ.Q = m_q;
}
void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
@ -466,19 +432,14 @@ void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
m_v.ST.u64 = r->u64[0];
#elif _M_SSE >= 0x200
#else
GSVector4i v = GSVector4i::loadl(r);
GSVector4i::storel(&m_v.ST.u64, v);
#else
m_v.ST.S = r->STQ.S;
m_v.ST.T = r->STQ.T;
#endif
m_q = r->STQ.Q;
m_v.RGBAQ.Q = r->STQ.Q;
#ifdef Offset_ST
GIFRegTEX0 TEX0 = m_context->TEX0;
@ -562,21 +523,23 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, ui
GSVector4i q = GSVector4i::loadl(&r[0].u64[1]);
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
m_v.m[0] = st.upl64(rgba.upl32(q));
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]);
GSVector4i zf = GSVector4i::loadl(&r[2].u64[1]);
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::loadl(&m_v.UV));
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
m_v.m[1] = xy.upl32(zf);
m_v.m[1] = xy.upl32(zf); // TODO: only store the last one
VertexKick<prim>(r[2].XYZF2.Skip());
r += 3;
}
}
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size)
{
}
// GIFRegHandler*
@ -719,8 +682,10 @@ template<int i> void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = TEX0.CSM;
GSVector4i r = GSVector4i::zero();
GSVector4i r;
r.left = 0;
r.top = 0;
r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x;
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y;
@ -1644,7 +1609,7 @@ void GSState::SoftReset(uint32 mask)
m_env.TRXDIR.XDIR = 3; //-1 ; set it to invalid value
m_q = 1;
m_v.RGBAQ.Q = 1.0f;
}
void GSState::ReadFIFO(uint8* mem, int size)
@ -1687,7 +1652,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
if(path.nloop > 0) // eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and values other than the EOP field are disregarded."
{
m_q = 1.0f;
m_v.RGBAQ.Q = 1.0f;
// ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts
@ -1980,7 +1945,7 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
WriteState(data, &m_path[i].reg);
}
WriteState(data, &m_q);
data += sizeof(float); // obsolite
return 0;
}
@ -2076,7 +2041,7 @@ int GSState::Defrost(const GSFreezeData* fd)
m_path[i].SetTag(&m_path[i].tag); // expand regs
}
ReadState(&m_q, data);
data += sizeof(float); // obsolite
PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM;

View File

@ -65,6 +65,7 @@ class GSState : public GSAlignedClass<32>
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8];
template<uint32 prim> void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size);
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size);
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
void ApplyPRIM(const GIFRegPRIM& PRIM);
@ -137,7 +138,6 @@ protected:
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
GSVertex m_v;
float m_q;
GSVector4 m_scissor;
uint32 m_ofxy;

View File

@ -178,6 +178,11 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
m_TEX0 = TEX0;
m_TEXA = TEXA;
if(m_tw == 0)
{
m_tw = std::max<int>(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
}
memset(m_valid, 0, sizeof(m_valid));
memset(m_pages.bm, 0, sizeof(m_pages.bm));
@ -239,17 +244,6 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
if(m_buff == NULL)
{
uint32 tw0 = std::max<int>(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
if(m_tw == 0)
{
m_tw = tw0;
}
else
{
ASSERT(m_tw >= tw0);
}
uint32 pitch = (1 << m_tw) << shift;
m_buff = _aligned_malloc(pitch * th * 4, 32);

View File

@ -2915,6 +2915,11 @@ public:
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
}
__forceinline static void storent(void* p, const GSVector4& v)
{
_mm_stream_ps((float*)p, v.m);
}
__forceinline static void storel(void* p, const GSVector4& v)
{
_mm_store_sd((double*)p, _mm_castps_pd(v.m));