From 645188e0e86d7204ee76d277301bf7f1bc87c16c Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 17 Aug 2010 04:38:49 +0000 Subject: [PATCH] GSdx: Minor optimization and some code simplifications relating to VertexKick/DrawingKick and the Packed register handlers. I also added preliminary work for a switch-based packed register dispatcher (WIP, doesn't support frameskipping yet). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3649 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.h | 149 ++++++----- plugins/GSdx/GSRenderer.h | 5 +- plugins/GSdx/GSRendererDX11.cpp | 178 ++++++------- plugins/GSdx/GSRendererDX11.h | 5 +- plugins/GSdx/GSRendererDX9.cpp | 190 +++++++------- plugins/GSdx/GSRendererDX9.h | 5 +- plugins/GSdx/GSRendererNull.h | 7 +- plugins/GSdx/GSRendererSW.cpp | 154 +++++------ plugins/GSdx/GSRendererSW.h | 6 +- plugins/GSdx/GSState.cpp | 436 +++++++++++++++++--------------- plugins/GSdx/GSState.h | 80 ++---- 11 files changed, 619 insertions(+), 596 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 0475e377ed..59eab56486 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -264,11 +264,10 @@ union name \ #define REG_SET_END }; REG64_(GSReg, BGCOLOR) - uint32 R:8; - uint32 G:8; - uint32 B:8; - uint32 _PAD1:8; - uint32 _PAD2:32; + uint8 R; + uint8 G; + uint8 B; + uint8 _PAD1[5]; REG_END REG64_(GSReg, BUSDIR) @@ -402,8 +401,8 @@ union REG_END REG64_(GSReg, SIGLBLID) - uint32 SIGID:32; - uint32 LBLID:32; + uint32 SIGID; + uint32 LBLID; REG_END REG64_(GSReg, SMODE1) @@ -518,7 +517,7 @@ REG128(GIFTag) uint32 PRIM:11; uint32 FLG:2; // enum GIF_FLG uint32 NREG:4; - uint64 REGS:64; + uint64 REGS; REG_END // GIFReg @@ -529,8 +528,8 @@ REG64_(GIFReg, ALPHA) uint32 C:2; uint32 D:2; uint32 _PAD1:24; - uint32 FIX:8; - uint32 _PAD2:24; + uint8 FIX; + uint8 _PAD2[3]; REG_END2 // opaque => output will be Cs/As __forceinline bool IsOpaque() const {return (A == B || C == 2 && FIX == 0) && D == 0 || (A == 0 && B == D && C == 2 && FIX == 0x80);} @@ -630,22 +629,19 @@ REG64_(GIFReg, FBA) REG_END REG64_(GIFReg, FINISH) - uint32 _PAD1:32; - uint32 _PAD2:32; + uint32 _PAD1[2]; REG_END REG64_(GIFReg, FOG) - uint32 _PAD1:32; - uint32 _PAD2:24; - uint32 F:8; + uint8 _PAD1[4+3]; + uint8 F:8; REG_END REG64_(GIFReg, FOGCOL) - uint32 FCR:8; - uint32 FCG:8; - uint32 FCB:8; - uint32 _PAD1:8; - uint32 _PAD2:32; + uint8 FCR; + uint8 FCG; + uint8 FCB; + uint8 _PAD1[5]; REG_END REG64_(GIFReg, FRAME) @@ -655,19 +651,19 @@ REG64_(GIFReg, FRAME) uint32 _PAD2:2; uint32 PSM:6; uint32 _PAD3:2; - uint32 FBMSK:32; + uint32 FBMSK; REG_END2 uint32 Block() const {return FBP << 5;} REG_END2 REG64_(GIFReg, HWREG) - uint32 DATA_LOWER:32; - uint32 DATA_UPPER:32; + uint32 DATA_LOWER; + uint32 DATA_UPPER; REG_END REG64_(GIFReg, LABEL) - uint32 ID:32; - uint32 IDMSK:32; + uint32 ID; + uint32 IDMSK; REG_END REG64_(GIFReg, MIPTBP1) @@ -691,8 +687,7 @@ REG64_(GIFReg, MIPTBP2) REG_END REG64_(GIFReg, NOP) - uint32 _PAD1:32; - uint32 _PAD2:32; + uint32 _PAD[2]; REG_END REG64_(GIFReg, PABE) @@ -736,10 +731,10 @@ REG64_(GIFReg, PRMODECONT) REG_END REG64_(GIFReg, RGBAQ) - uint32 R:8; - uint32 G:8; - uint32 B:8; - uint32 A:8; + uint8 R; + uint8 G; + uint8 B; + uint8 A; float Q; REG_END @@ -761,8 +756,8 @@ REG64_(GIFReg, SCISSOR) REG_END REG64_(GIFReg, SIGNAL) - uint32 ID:32; - uint32 IDMSK:32; + uint32 ID; + uint32 IDMSK; REG_END REG64_(GIFReg, ST) @@ -848,12 +843,12 @@ REG64_(GIFReg, TEX2) REG_END REG64_(GIFReg, TEXA) - uint32 TA0:8; - uint32 _PAD1:7; - uint32 AEM:1; - uint32 _PAD2:16; - uint32 TA1:8; - uint32 _PAD3:24; + uint8 TA0; + uint8 _PAD1:7; + uint8 AEM:1; + uint16 _PAD2; + uint8 TA1:8; + uint8 _PAD3[3]; REG_END REG64_(GIFReg, TEXCLUT) @@ -898,11 +893,11 @@ REG_END // GSState::GIFPackedRegHandlerUV and GSState::GIFRegHandlerUV will make sure that the _PAD1/2 bits are set to zero REG64_(GIFReg, UV) - uint32 U:16; + uint16 U; // uint32 _PAD1:2; - uint32 V:16; + uint16 V; // uint32 _PAD2:2; - uint32 _PAD3:32; + uint32 _PAD3; REG_END // GSState::GIFRegHandlerXYOFFSET will make sure that the _PAD1/2 bits are set to zero @@ -913,14 +908,14 @@ REG64_(GIFReg, XYOFFSET) REG_END REG64_(GIFReg, XYZ) - uint32 X:16; - uint32 Y:16; - uint32 Z:32; + uint16 X; + uint16 Y; + uint32 Z; REG_END REG64_(GIFReg, XYZF) - uint32 X:16; - uint32 Y:16; + uint16 X; + uint16 Y; uint32 Z:24; uint32 F:8; REG_END @@ -986,20 +981,18 @@ REG_SET_END REG128_(GIFPacked, PRIM) uint32 PRIM:11; uint32 _PAD1:21; - uint32 _PAD2:32; - uint32 _PAD3:32; - uint32 _PAD4:32; + uint32 _PAD2[3]; REG_END REG128_(GIFPacked, RGBA) - uint32 R:8; - uint32 _PAD1:24; - uint32 G:8; - uint32 _PAD2:24; - uint32 B:8; - uint32 _PAD3:24; - uint32 A:8; - uint32 _PAD4:24; + uint8 R; + uint8 _PAD1[3]; + uint8 G; + uint8 _PAD2[3]; + uint8 B; + uint8 _PAD3[3]; + uint8 A; + uint8 _PAD4[3]; REG_END REG128_(GIFPacked, STQ) @@ -1019,10 +1012,11 @@ REG128_(GIFPacked, UV) REG_END REG128_(GIFPacked, XYZF2) - uint32 X:16; - uint32 _PAD1:16; - uint32 Y:16; - uint32 _PAD2:16; + uint16 X; + uint16 _PAD1; + uint16 Y; + uint16 _PAD2; + uint32 _PAD3:4; uint32 Z:24; uint32 _PAD4:4; @@ -1034,37 +1028,36 @@ REG128_(GIFPacked, XYZF2) REG_END REG128_(GIFPacked, XYZ2) - uint32 X:16; - uint32 _PAD1:16; - uint32 Y:16; - uint32 _PAD2:16; - uint32 Z:32; + uint16 X; + uint16 _PAD1; + uint16 Y; + uint16 _PAD2; + uint32 Z; uint32 _PAD3:15; uint32 ADC:1; uint32 _PAD4:16; REG_END REG128_(GIFPacked, FOG) - uint32 _PAD1:32; - uint32 _PAD2:32; - uint32 _PAD3:32; + uint32 _PAD1; + uint32 _PAD2; + uint32 _PAD3; uint32 _PAD4:4; uint32 F:8; uint32 _PAD5:20; REG_END REG128_(GIFPacked, A_D) - uint64 DATA:64; - uint32 ADDR:8; // enum GIF_A_D_REG - uint32 _PAD1:24; - uint32 _PAD2:32; + uint64 DATA; + uint8 ADDR:8; // enum GIF_A_D_REG + uint8 _PAD1[3+4]; REG_END REG128_(GIFPacked, NOP) - uint32 _PAD1:32; - uint32 _PAD2:32; - uint32 _PAD3:32; - uint32 _PAD4:32; + uint32 _PAD1; + uint32 _PAD2; + uint32 _PAD3; + uint32 _PAD4; REG_END REG128_SET(GIFPackedReg) diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index 90d8b277bd..5caf328024 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -158,7 +158,8 @@ protected: m_maxcount -= 100; } - template __forceinline Vertex* DrawingKick(bool skip, int& count) + // Returns a pointer to the drawing vertex. Can return NULL! + template __fi Vertex* BaseDrawingKick(int& count) { switch(prim) { @@ -232,7 +233,7 @@ protected: __assume(0); } - return !skip ? v : NULL; + return v; } virtual void Draw() = 0; diff --git a/plugins/GSdx/GSRendererDX11.cpp b/plugins/GSdx/GSRendererDX11.cpp index 8bf8c902ba..a97eaa3d43 100644 --- a/plugins/GSdx/GSRendererDX11.cpp +++ b/plugins/GSdx/GSRendererDX11.cpp @@ -38,9 +38,11 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev) return true; } -template -void GSRendererDX11::VertexKick(bool skip) +void GSRendererDX11::DoVertexKick() { + const bool tme = PRIM->TME; + const bool fst = PRIM->FST; + GSVertexHW11& dst = m_vl.AddTail(); dst.vi[0] = m_v.vi[0]; @@ -108,94 +110,98 @@ void GSRendererDX11::VertexKick(bool skip) GSVector4::storel(&dst.ST, m_v.GetUV()); } #endif +} - int count = 0; +template< uint32 prim > +void GSRendererDX11::DrawingKick( bool skip ) +{ + int count; - if(GSVertexHW11* v = DrawingKick(skip, count)) + GSVertexHW11* v = BaseDrawingKick(count); + if (skip || !v) return; + + GSVector4i scissor = m_context->scissor.dx10; + + GSVector4i pmin, pmax; + + #if _M_SSE >= 0x401 + + GSVector4i v0, v1, v2; + + switch(prim) { - GSVector4i scissor = m_context->scissor.dx10; - - GSVector4i pmin, pmax; - - #if _M_SSE >= 0x401 - - GSVector4i v0, v1, v2; - - switch(prim) - { - case GS_POINTLIST: - v0 = GSVector4i::load((int)v[0].p.xy).upl16(); - pmin = v0; - pmax = v0; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - v0 = GSVector4i::load((int)v[0].p.xy); - v1 = GSVector4i::load((int)v[1].p.xy); - pmin = v0.min_u16(v1).upl16(); - pmax = v0.max_u16(v1).upl16(); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - v0 = GSVector4i::load((int)v[0].p.xy); - v1 = GSVector4i::load((int)v[1].p.xy); - v2 = GSVector4i::load((int)v[2].p.xy); - pmin = v0.min_u16(v1).min_u16(v2).upl16(); - pmax = v0.max_u16(v1).max_u16(v2).upl16(); - break; - } - - #else - - switch(prim) - { - case GS_POINTLIST: - pmin.x = v[0].p.x; - pmin.y = v[0].p.y; - pmax.x = v[0].p.x; - pmax.y = v[0].p.y; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - pmin.x = std::min(v[0].p.x, v[1].p.x); - pmin.y = std::min(v[0].p.y, v[1].p.y); - pmax.x = std::max(v[0].p.x, v[1].p.x); - pmax.y = std::max(v[0].p.y, v[1].p.y); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - pmin.x = std::min(std::min(v[0].p.x, v[1].p.x), v[2].p.x); - pmin.y = std::min(std::min(v[0].p.y, v[1].p.y), v[2].p.y); - pmax.x = std::max(std::max(v[0].p.x, v[1].p.x), v[2].p.x); - pmax.y = std::max(std::max(v[0].p.y, v[1].p.y), v[2].p.y); - break; - } - - #endif - - GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy()); - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: - test |= pmin == pmax; - break; - } - - if(test.mask() & 0xff) - { - return; - } - - m_count += count; + case GS_POINTLIST: + v0 = GSVector4i::load((int)v[0].p.xy).upl16(); + pmin = v0; + pmax = v0; + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + v0 = GSVector4i::load((int)v[0].p.xy); + v1 = GSVector4i::load((int)v[1].p.xy); + pmin = v0.min_u16(v1).upl16(); + pmax = v0.max_u16(v1).upl16(); + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + v0 = GSVector4i::load((int)v[0].p.xy); + v1 = GSVector4i::load((int)v[1].p.xy); + v2 = GSVector4i::load((int)v[2].p.xy); + pmin = v0.min_u16(v1).min_u16(v2).upl16(); + pmax = v0.max_u16(v1).max_u16(v2).upl16(); + break; } + + #else + + switch(prim) + { + case GS_POINTLIST: + pmin.x = v[0].p.x; + pmin.y = v[0].p.y; + pmax.x = v[0].p.x; + pmax.y = v[0].p.y; + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + pmin.x = std::min(v[0].p.x, v[1].p.x); + pmin.y = std::min(v[0].p.y, v[1].p.y); + pmax.x = std::max(v[0].p.x, v[1].p.x); + pmax.y = std::max(v[0].p.y, v[1].p.y); + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + pmin.x = std::min(std::min(v[0].p.x, v[1].p.x), v[2].p.x); + pmin.y = std::min(std::min(v[0].p.y, v[1].p.y), v[2].p.y); + pmax.x = std::max(std::max(v[0].p.x, v[1].p.x), v[2].p.x); + pmax.y = std::max(std::max(v[0].p.y, v[1].p.y), v[2].p.y); + break; + } + + #endif + + GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy()); + + switch(prim) + { + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + case GS_SPRITE: + test |= pmin == pmax; + break; + } + + if(test.mask() & 0xff) + { + return; + } + + m_count += count; } void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) diff --git a/plugins/GSdx/GSRendererDX11.h b/plugins/GSdx/GSRendererDX11.h index fc7f8a0337..81c1c54663 100644 --- a/plugins/GSdx/GSRendererDX11.h +++ b/plugins/GSdx/GSRendererDX11.h @@ -36,5 +36,8 @@ public: bool CreateDevice(GSDevice* dev); - template void VertexKick(bool skip); + template + void DrawingKick( bool skip ); + + void DoVertexKick(); }; diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index 06b816fe5a..2d03de03bc 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -57,20 +57,22 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev) return true; } -template -void GSRendererDX9::VertexKick(bool skip) +void GSRendererDX9::DoVertexKick() { - GSVertexHW9 v; + const bool tme = PRIM->TME; + const bool fst = PRIM->FST; - v.p = GSVector4(((GSVector4i)m_v.XYZ).upl16()); + GSVertexHW9& dst = m_vl.AddTail(); + + dst.p = GSVector4(((GSVector4i)m_v.XYZ).upl16()); if(tme && !fst) { - v.p = v.p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q)); + dst.p = dst.p.xyxy(GSVector4((float)m_v.XYZ.Z, m_v.RGBAQ.Q)); } else { - v.p = v.p.xyxy(GSVector4::load((float)m_v.XYZ.Z)); + dst.p = dst.p.xyxy(GSVector4::load((float)m_v.XYZ.Z)); } int Uadjust = 0; @@ -80,7 +82,7 @@ void GSRendererDX9::VertexKick(bool skip) { if(fst) { - v.t = m_v.GetUV(); + dst.t = m_v.GetUV(); #ifdef USE_UPSCALE_HACKS @@ -125,107 +127,107 @@ void GSRendererDX9::VertexKick(bool skip) else if (Vdiff <= 1) { Vadjust = 1; } } } + + dst.t.x -= (float) Uadjust; + dst.t.y -= (float) Vadjust; + #endif } else { - v.t = GSVector4::loadl(&m_v.ST); + dst.t = GSVector4::loadl(&m_v.ST); } } - GSVertexHW9& dst = m_vl.AddTail(); - - dst = v; - -#ifdef USE_UPSCALE_HACKS - if(tme && fst) - { - dst.t.x -= (float) Uadjust; - dst.t.y -= (float) Vadjust; - } -#endif - dst.c0 = m_v.RGBAQ.u32[0]; dst.c1 = m_v.FOG.u32[1]; +} - int count = 0; +template< uint32 prim > +void GSRendererDX9::DrawingKick( bool skip ) +{ + int count; - if(GSVertexHW9* v = DrawingKick(skip, count)) + // BaseDrawingKick can never return NULL here because the DrawingKick function + // tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only + // condition where this function would return NULL). + + GSVertexHW9* v = BaseDrawingKick(count); + if (skip || !v) return; + + GSVector4 scissor = m_context->scissor.dx9; + + GSVector4 pmin, pmax; + + switch(prim) { - GSVector4 scissor = m_context->scissor.dx9; - - GSVector4 pmin, pmax; - - switch(prim) - { - case GS_POINTLIST: - pmin = v[0].p; - pmax = v[0].p; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - pmin = v[0].p.min(v[1].p); - pmax = v[0].p.max(v[1].p); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - pmin = v[0].p.min(v[1].p).min(v[2].p); - pmax = v[0].p.max(v[1].p).max(v[2].p); - break; - } - - GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: - test |= pmin == pmax; - break; - } - - if(test.mask() & 3) - { - return; - } - - switch(prim) - { - case GS_POINTLIST: - break; - case GS_LINELIST: - case GS_LINESTRIP: - if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;} - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - if(PRIM->IIP == 0) {v[0].c0 = v[1].c0 = v[2].c0;} - break; - case GS_SPRITE: - if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;} - v[0].p.z = v[1].p.z; - v[0].p.w = v[1].p.w; - v[0].c1 = v[1].c1; - v[2] = v[1]; - v[3] = v[1]; - v[1].p.y = v[0].p.y; - v[1].t.y = v[0].t.y; - v[2].p.x = v[0].p.x; - v[2].t.x = v[0].t.x; - v[4] = v[1]; - v[5] = v[2]; - count += 4; - break; - } - - m_count += count; + case GS_POINTLIST: + pmin = v[0].p; + pmax = v[0].p; + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + pmin = v[0].p.min(v[1].p); + pmax = v[0].p.max(v[1].p); + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + pmin = v[0].p.min(v[1].p).min(v[2].p); + pmax = v[0].p.max(v[1].p).max(v[2].p); + break; } + + GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); + + switch(prim) + { + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + case GS_SPRITE: + test |= pmin == pmax; + break; + } + + if(test.mask() & 3) + { + return; + } + + switch(prim) + { + case GS_POINTLIST: + break; + case GS_LINELIST: + case GS_LINESTRIP: + if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;} + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + if(PRIM->IIP == 0) {v[0].c0 = v[1].c0 = v[2].c0;} + break; + case GS_SPRITE: + if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;} + v[0].p.z = v[1].p.z; + v[0].p.w = v[1].p.w; + v[0].c1 = v[1].c1; + v[2] = v[1]; + v[3] = v[1]; + v[1].p.y = v[0].p.y; + v[1].t.y = v[0].t.y; + v[2].p.x = v[0].p.x; + v[2].t.x = v[0].t.x; + v[4] = v[1]; + v[5] = v[2]; + count += 4; + break; + } + + m_count += count; } void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) diff --git a/plugins/GSdx/GSRendererDX9.h b/plugins/GSdx/GSRendererDX9.h index f4d8eb73a8..4483c2c9d9 100644 --- a/plugins/GSdx/GSRendererDX9.h +++ b/plugins/GSdx/GSRendererDX9.h @@ -43,5 +43,8 @@ public: bool CreateDevice(GSDevice* dev); - template void VertexKick(bool skip); + template + void DrawingKick( bool skip ); + + void DoVertexKick(); }; diff --git a/plugins/GSdx/GSRendererNull.h b/plugins/GSdx/GSRendererNull.h index 5d54ebbf35..845965e5fd 100644 --- a/plugins/GSdx/GSRendererNull.h +++ b/plugins/GSdx/GSRendererNull.h @@ -45,7 +45,8 @@ public: virtual ~GSRendererNull() {} - template void VertexKick(bool skip) - { - } + template + void DrawingKick( bool skip ) {} + + void DoVertexKick() {} }; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index f367d1e12b..80ee90df54 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -427,22 +427,24 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) } } -template -void GSRendererSW::VertexKick(bool skip) +void GSRendererSW::DoVertexKick() { - const GSDrawingContext* context = m_context; + const bool tme = PRIM->TME; + const bool fst = PRIM->FST; + + const GSDrawingContext& context = *m_context; GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]); xy = xy.insert16<3>(m_v.FOG.F); xy = xy.upl16(); - xy -= context->XYOFFSET; + xy -= context.XYOFFSET; - GSVertexSW v; + GSVertexSW& dst = m_vl.AddTail(); - v.p = GSVector4(xy) * g_pos_scale; + dst.p = GSVector4(xy) * g_pos_scale; - v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7); + dst.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7); if(tme) { @@ -450,31 +452,37 @@ void GSRendererSW::VertexKick(bool skip) if(fst) { - v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4)); + dst.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4)); q = 1.0f; } else { - v.t = GSVector4(m_v.ST.S, m_v.ST.T); - v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH); + dst.t = GSVector4(m_v.ST.S, m_v.ST.T); + dst.t *= GSVector4(0x10000 << context.TEX0.TW, 0x10000 << context.TEX0.TH); q = m_v.RGBAQ.Q; } - v.t = v.t.xyxy(GSVector4::load(q)); + dst.t = dst.t.xyxy(GSVector4::load(q)); } - GSVertexSW& dst = m_vl.AddTail(); - - dst = v; - dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion +} - int count = 0; - if(GSVertexSW* v = DrawingKick(skip, count)) - { -if(!m_dump) +template< uint32 prim > +void GSRendererSW::DrawingKick( bool skip ) { + int count; + + // BaseDrawingKick can never return NULL here because the DrawingKick function + // tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only + // condition where this function would return NULL). + + GSVertexSW* v = BaseDrawingKick(count); + if (skip || !v) return; + + if(!m_dump) + { GSVector4 pmin, pmax; switch(prim) @@ -497,7 +505,7 @@ if(!m_dump) break; } - GSVector4 scissor = context->scissor.ex; + GSVector4 scissor = m_context->scissor.ex; GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); @@ -526,72 +534,72 @@ if(!m_dump) { return; } -} + } + + switch(prim) + { + case GS_POINTLIST: + break; + case GS_LINELIST: + case GS_LINESTRIP: + if(PRIM->IIP == 0) {v[0].c = v[1].c;} + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} + break; + case GS_SPRITE: + break; + } + + if(m_count < 30 && m_count >= 3) + { + GSVertexSW* v = &m_vertices[m_count - 3]; + + int tl = 0; + int br = 0; + + bool isquad = false; + switch(prim) { - case GS_POINTLIST: - break; - case GS_LINELIST: - case GS_LINESTRIP: - if(PRIM->IIP == 0) {v[0].c = v[1].c;} - break; - case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: - if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} - break; - case GS_SPRITE: + case GS_TRIANGLELIST: + isquad = GSVertexSW::IsQuad(v, tl, br); break; } - if(m_count < 30 && m_count >= 3) + if(isquad) { - GSVertexSW* v = &m_vertices[m_count - 3]; + m_count -= 3; - int tl = 0; - int br = 0; - - bool isquad = false; - - switch(prim) + if(m_count > 0) { - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_TRIANGLELIST: - isquad = GSVertexSW::IsQuad(v, tl, br); - break; - } - - if(isquad) - { - m_count -= 3; - - if(m_count > 0) - { - tl += m_count; - br += m_count; - - Flush(); - } - - if(tl != 0) m_vertices[0] = m_vertices[tl]; - if(br != 1) m_vertices[1] = m_vertices[br]; - - m_count = 2; - - uint32 tmp = PRIM->PRIM; - PRIM->PRIM = GS_SPRITE; + tl += m_count; + br += m_count; Flush(); - - PRIM->PRIM = tmp; - - m_perfmon.Put(GSPerfMon::Quad, 1); - - return; } - } - m_count += count; + if(tl != 0) m_vertices[0] = m_vertices[tl]; + if(br != 1) m_vertices[1] = m_vertices[br]; + + m_count = 2; + + uint32 tmp = PRIM->PRIM; + PRIM->PRIM = GS_SPRITE; + + Flush(); + + PRIM->PRIM = tmp; + + m_perfmon.Put(GSPerfMon::Quad, 1); + + return; + } } + + m_count += count; } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index f6b09b623c..15a3fe07a1 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -47,8 +47,10 @@ public: GSRendererSW(); virtual ~GSRendererSW(); - template - void VertexKick(bool skip); + template + void DrawingKick( bool skip ); + + void DoVertexKick(); void InvalidateTextureCache() { diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index a6c8d773e0..6268ecf97f 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -37,7 +37,6 @@ GSState::GSState() , m_vprim(1) , m_version(6) , m_frameskip(0) - , m_vkf(NULL) { m_sssize = 0; @@ -122,15 +121,68 @@ void GSState::SetMultithreaded( bool isMT ) m_mt = isMT; if( isMT ) { - m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerNull; - m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerNull; - m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerNull; + m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerNull; + m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerNull; + m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerNull; } else { - m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerSIGNAL; - m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerFINISH; - m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerLABEL; + m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerSIGNAL; + m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerFINISH; + m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerLABEL; + } +} + +void GSState::SetFrameSkip(int skip) +{ + if(m_frameskip == skip) return; + m_frameskip = skip; + + if(skip) + { + #if !UsePackedRegSwitch + m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP; + m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP; + m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP; + #endif + + m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP; + } + else + { + #if !UsePackedRegSwitch + m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<0>; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<1>; + m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG; + m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZF3; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3; + #endif + + m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; + m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; + m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; + m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE; } } @@ -151,84 +203,86 @@ void GSState::Reset() void GSState::ResetHandlers() { + #if !UsePackedRegSwitch for(int i = 0; i < countof(m_fpGIFPackedRegHandlers); i++) { m_fpGIFPackedRegHandlers[i] = &GSState::GIFPackedRegHandlerNull; } - m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = &GSState::GIFPackedRegHandlerPRIM; - m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA; - m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ; - m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2; - m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = &GSState::GIFPackedRegHandlerTEX0<0>; - m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = &GSState::GIFPackedRegHandlerTEX0<1>; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerCLAMP<0>; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerCLAMP<1>; - m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF3; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ3; - m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D; - m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP; + m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)&GSState::GIFRegHandlerPRIM; + m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA; + m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ; + m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV; + m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2; + m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerTEX0<0>; + m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerTEX0<1>; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<0>; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)&GSState::GIFRegHandlerCLAMP<1>; + m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG; + m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZF3; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = (GIFPackedRegHandler)&GSState::GIFRegHandlerXYZ3; + m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D; + m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP; + #endif for(int i = 0; i < countof(m_fpGIFRegHandlers); i++) { m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull; } - m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; - m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; - m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; - m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3; - m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE; - m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT; - m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA; - m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL; - m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH; - m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX; - m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE; - m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP; - m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE; - m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>; - m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>; - m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF; - m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS; - m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG; - m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR; - m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG; + m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; + m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; + m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; + m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3; + m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE; + m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT; + m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA; + m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL; + m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH; + m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX; + m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE; + m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP; + m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE; + m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF; + m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS; + m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG; + m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR; + m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG; SetMultithreaded( m_mt ); } @@ -326,14 +380,7 @@ void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* r) // ASSERT(0); } -void GSState::GIFPackedRegHandlerPRIM(const GIFPackedReg* r) -{ - // ASSERT(r->r.PRIM.PRIM < 7); - - GIFRegHandlerPRIM(&r->r); -} - -void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r) +void __fi GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r) { #if _M_SSE >= 0x301 @@ -358,7 +405,7 @@ void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* r) m_v.RGBAQ.Q = m_q; } -void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r) +void __fi GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r) { #if defined(_M_AMD64) @@ -379,7 +426,7 @@ void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* r) m_q = r->STQ.Q; } -void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r) +void __fi GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r) { #if _M_SSE >= 0x200 @@ -394,7 +441,7 @@ void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* r) #endif } -void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r) +void __fi GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r) { m_v.XYZ.X = r->XYZF2.X; m_v.XYZ.Y = r->XYZF2.Y; @@ -404,7 +451,7 @@ void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* r) VertexKick(r->XYZF2.ADC); } -void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r) +void __fi GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r) { m_v.XYZ.X = r->XYZ2.X; m_v.XYZ.Y = r->XYZ2.Y; @@ -413,32 +460,12 @@ void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* r) VertexKick(r->XYZ2.ADC); } -template void GSState::GIFPackedRegHandlerTEX0(const GIFPackedReg* r) -{ - GIFRegHandlerTEX0((GIFReg*)&r->u64[0]); -} - -template void GSState::GIFPackedRegHandlerCLAMP(const GIFPackedReg* r) -{ - GIFRegHandlerCLAMP((GIFReg*)&r->u64[0]); -} - -void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r) +void __fi GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* r) { m_v.FOG.F = r->FOG.F; } -void GSState::GIFPackedRegHandlerXYZF3(const GIFPackedReg* r) -{ - GIFRegHandlerXYZF3((GIFReg*)&r->u64[0]); -} - -void GSState::GIFPackedRegHandlerXYZ3(const GIFPackedReg* r) -{ - GIFRegHandlerXYZ3((GIFReg*)&r->u64[0]); -} - -void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r) +void __fi GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* r) { (this->*m_fpGIFRegHandlers[r->A_D.ADDR])(&r->r); } @@ -454,13 +481,13 @@ void GSState::GIFRegHandlerNull(const GIFReg* r) // ASSERT(0); } -void GSState::GIFRegHandlerPRIM(const GIFReg* r) +__forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim) { // ASSERT(r->PRIM.PRIM < 7); - if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(r->PRIM.PRIM)) + if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM)) { - if((m_env.PRIM.u32[0] ^ r->PRIM.u32[0]) & 0x7f8) // all fields except PRIM + if((m_env.PRIM.u32[0] ^ prim.u32[0]) & 0x7f8) // all fields except PRIM { Flush(); } @@ -470,16 +497,19 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* r) Flush(); } - m_env.PRIM = (GSVector4i)r->PRIM; - m_env.PRMODE._PRIM = r->PRIM.PRIM; + m_env.PRIM = (GSVector4i)prim; + m_env.PRMODE._PRIM = prim.PRIM; m_context = &m_env.CTXT[PRIM->CTXT]; - UpdateVertexKick(); - ResetPrim(); } +void GSState::GIFRegHandlerPRIM(const GIFReg* r) +{ + ApplyPRIM(r->PRIM); +} + void GSState::GIFRegHandlerRGBAQ(const GIFReg* r) { m_v.RGBAQ = (GSVector4i)r->RGBAQ; @@ -495,7 +525,7 @@ void GSState::GIFRegHandlerUV(const GIFReg* r) m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff; } -void GSState::GIFRegHandlerXYZF2(const GIFReg* r) +__fi void GSState::GIFRegHandlerXYZF2(const GIFReg* r) { /* m_v.XYZ.X = r->XYZF.X; @@ -510,14 +540,14 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* r) VertexKick(false); } -void GSState::GIFRegHandlerXYZ2(const GIFReg* r) +__fi void GSState::GIFRegHandlerXYZ2(const GIFReg* r) { m_v.XYZ = (GSVector4i)r->XYZ; VertexKick(false); } -__forceinline void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 ) +__fi void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 ) { // even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing @@ -540,7 +570,7 @@ __forceinline void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 ) m_env.CTXT[i].offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); } - m_env.CTXT[i].TEX0 = TEX0; + m_env.CTXT[i].TEX0 = (GSVector4i)TEX0; if(wt) { @@ -548,7 +578,7 @@ __forceinline void GSState::ApplyTEX0( uint i, GIFRegTEX0& TEX0 ) } } -template void GSState::GIFRegHandlerTEX0(const GIFReg* r) +template __fi void GSState::GIFRegHandlerTEX0(const GIFReg* r) { GIFRegTEX0 TEX0 = r->TEX0; @@ -558,7 +588,7 @@ template void GSState::GIFRegHandlerTEX0(const GIFReg* r) ApplyTEX0( i, TEX0 ); } -template void GSState::GIFRegHandlerCLAMP(const GIFReg* r) +template __fi void GSState::GIFRegHandlerCLAMP(const GIFReg* r) { if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP) { @@ -573,7 +603,7 @@ void GSState::GIFRegHandlerFOG(const GIFReg* r) m_v.FOG = (GSVector4i)r->FOG; } -void GSState::GIFRegHandlerXYZF3(const GIFReg* r) +__fi void GSState::GIFRegHandlerXYZF3(const GIFReg* r) { /* m_v.XYZ.X = r->XYZF.X; @@ -588,7 +618,7 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* r) VertexKick(true); } -void GSState::GIFRegHandlerXYZ3(const GIFReg* r) +__fi void GSState::GIFRegHandlerXYZ3(const GIFReg* r) { m_v.XYZ = (GSVector4i)r->XYZ; @@ -599,7 +629,7 @@ void GSState::GIFRegHandlerNOP(const GIFReg* r) { } -template void GSState::GIFRegHandlerTEX1(const GIFReg* r) +template __fi void GSState::GIFRegHandlerTEX1(const GIFReg* r) { if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1) { @@ -609,7 +639,7 @@ template void GSState::GIFRegHandlerTEX1(const GIFReg* r) m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1; } -template void GSState::GIFRegHandlerTEX2(const GIFReg* r) +template __fi void GSState::GIFRegHandlerTEX2(const GIFReg* r) { // m_env.CTXT[i].TEX2 = r->TEX2; // not used @@ -626,7 +656,7 @@ template void GSState::GIFRegHandlerTEX2(const GIFReg* r) ApplyTEX0(i, TEX0); } -template void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r) +template __fi void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r) { GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff(); @@ -640,7 +670,7 @@ template void GSState::GIFRegHandlerXYOFFSET(const GIFReg* r) m_env.CTXT[i].UpdateScissor(); } -void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r) +__fi void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r) { if(r->PRMODECONT != m_env.PRMODECONT) { @@ -654,11 +684,9 @@ void GSState::GIFRegHandlerPRMODECONT(const GIFReg* r) // if(PRIM->PRIM == 7) printf("Invalid PRMODECONT/PRIM\n"); m_context = &m_env.CTXT[PRIM->CTXT]; - - UpdateVertexKick(); } -void GSState::GIFRegHandlerPRMODE(const GIFReg* r) +__fi void GSState::GIFRegHandlerPRMODE(const GIFReg* r) { if(!m_env.PRMODECONT.AC) { @@ -670,11 +698,9 @@ void GSState::GIFRegHandlerPRMODE(const GIFReg* r) m_env.PRMODE._PRIM = _PRIM; m_context = &m_env.CTXT[PRIM->CTXT]; - - UpdateVertexKick(); } -void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r) +__fi void GSState::GIFRegHandlerTEXCLUT(const GIFReg* r) { if(r->TEXCLUT != m_env.TEXCLUT) { @@ -704,7 +730,7 @@ template void GSState::GIFRegHandlerMIPTBP1(const GIFReg* r) m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1; } -template void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r) +template __fi void GSState::GIFRegHandlerMIPTBP2(const GIFReg* r) { if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2) { @@ -741,7 +767,7 @@ void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* r) // InvalidateTextureCache(); } -template void GSState::GIFRegHandlerSCISSOR(const GIFReg* r) +template __fi void GSState::GIFRegHandlerSCISSOR(const GIFReg* r) { if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR) { @@ -753,7 +779,7 @@ template void GSState::GIFRegHandlerSCISSOR(const GIFReg* r) m_env.CTXT[i].UpdateScissor(); } -template void GSState::GIFRegHandlerALPHA(const GIFReg* r) +template __fi void GSState::GIFRegHandlerALPHA(const GIFReg* r) { ASSERT(r->ALPHA.A != 3); ASSERT(r->ALPHA.B != 3); @@ -1426,6 +1452,9 @@ template void GSState::Transfer<3>(const uint8* mem, uint32 size); template void GSState::Transfer(const uint8* mem, uint32 size) { + // [TODO] make me into a template parameter... I think. --air + static const bool FrameSkipIt = false; + GSPerfMonAutoTimer pmat(m_perfmon); const uint8* start = mem; @@ -1447,11 +1476,11 @@ template void GSState::Transfer(const uint8* mem, uint32 size) // ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts - if(path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED) + if(path.tag.PRE && (path.tag.FLG == GIF_FLG_PACKED) && !FrameSkipIt) { - GIFReg r; + GIFRegPRIM r; r.u64 = path.tag.PRIM; - GIFRegHandlerPRIM(&r); + ApplyPRIM(r); } } } @@ -1481,6 +1510,11 @@ template void GSState::Transfer(const uint8* mem, uint32 size) { uint32 reg = path.GetReg(); + #if 0 + // I assume this was some sort of debugging code? Why intercept and perform + // special handling for the first three entries in the table, and then do + // a LUT for the rest? Either do a switch for the whole table (best idea) + // or do a LUT for the whole table. switch(reg) { case GIF_REG_RGBA: @@ -1496,6 +1530,69 @@ template void GSState::Transfer(const uint8* mem, uint32 size) (this->*m_fpGIFPackedRegHandlers[reg])((GIFPackedReg*)mem); break; } + #endif + + #if UsePackedRegSwitch + // This is a switch statement version of the LUT above. Since there are only + // 16 entries, this is almost certainly ideal, since the compiler can inline + // all the handlers, and PGO will further optimize the switch dispatcher. + + if (FrameSkipIt) + { + // When skipping frames it looks like we only need to bother with the A_D handler + // and the TEX handlers. (and I'm thinking the TEX handlers might not be necessary + // if the PCSX2 side of the frameskipper is smart enough anyway). + switch(reg) + { + case GIF_REG_A_D: GIFPackedRegHandlerA_D ((GIFPackedReg*)mem); break; + case GIF_REG_TEX0_1: GIFRegHandlerTEX0<0> ((GIFReg*)mem); break; + case GIF_REG_TEX0_2: GIFRegHandlerTEX0<1> ((GIFReg*)mem); break; + + // Should RGBA/STQ/UV be NOPs when skipping frames? I think so, but maybe the original + // switch() (above) was some hack to enable them in frameskipping mode. --air + + case GIF_REG_RGBA: //GIFPackedRegHandlerRGBA ((GIFPackedReg*)mem); break; + case GIF_REG_STQ: //GIFPackedRegHandlerSTQ ((GIFPackedReg*)mem); break; + case GIF_REG_UV: //GIFPackedRegHandlerUV ((GIFPackedReg*)mem); break; + + case GIF_REG_XYZF2: //GIFPackedRegHandlerXYZF2((GIFPackedReg*)mem); break; + case GIF_REG_XYZ2: //GIFPackedRegHandlerXYZ2 ((GIFPackedReg*)mem); break; + case GIF_REG_CLAMP_1: //GIFRegHandlerCLAMP<0> ((GIFReg*)mem); break; + case GIF_REG_CLAMP_2: //GIFRegHandlerCLAMP<1> ((GIFReg*)mem); break; + case GIF_REG_FOG: //GIFPackedRegHandlerFOG ((GIFPackedReg*)mem); break; + case GIF_REG_XYZF3: //GIFRegHandlerXYZF3 ((GIFReg*)mem); break; + case GIF_REG_XYZ3: //GIFRegHandlerXYZ3 ((GIFReg*)mem); break; + case GIF_REG_NOP: break; + } + } + else + { + switch(reg) + { + case GIF_REG_RGBA: GIFPackedRegHandlerRGBA ((GIFPackedReg*)mem); break; + case GIF_REG_STQ: GIFPackedRegHandlerSTQ ((GIFPackedReg*)mem); break; + case GIF_REG_UV: GIFPackedRegHandlerUV ((GIFPackedReg*)mem); break; + case GIF_REG_XYZF2: GIFPackedRegHandlerXYZF2((GIFPackedReg*)mem); break; + case GIF_REG_XYZ2: GIFPackedRegHandlerXYZ2 ((GIFPackedReg*)mem); break; + case GIF_REG_TEX0_1: GIFRegHandlerTEX0<0> ((GIFReg*)mem); break; + case GIF_REG_TEX0_2: GIFRegHandlerTEX0<1> ((GIFReg*)mem); break; + case GIF_REG_CLAMP_1: GIFRegHandlerCLAMP<0> ((GIFReg*)mem); break; + case GIF_REG_CLAMP_2: GIFRegHandlerCLAMP<1> ((GIFReg*)mem); break; + case GIF_REG_FOG: GIFPackedRegHandlerFOG ((GIFPackedReg*)mem); break; + case GIF_REG_XYZF3: GIFRegHandlerXYZF3 ((GIFReg*)mem); break; + case GIF_REG_XYZ3: GIFRegHandlerXYZ3 ((GIFReg*)mem); break; + case GIF_REG_A_D: GIFPackedRegHandlerA_D ((GIFPackedReg*)mem); break; + case GIF_REG_NOP: break; + } + } + #else + + // This is the original LUT implementation of the packed reg dispatcher. + // Simple and clean, but the switch system below is probably more efficient. + + (this->*m_fpGIFPackedRegHandlers[reg])((GIFPackedReg*)mem); + + #endif mem += sizeof(GIFPackedReg); size--; @@ -1791,8 +1888,6 @@ int GSState::Defrost(const GSFreezeData* fd) m_context = &m_env.CTXT[PRIM->CTXT]; - UpdateVertexKick(); - m_env.UpdateDIMX(); for(int i = 0; i < 2; i++) @@ -1817,65 +1912,6 @@ void GSState::SetGameCRC(uint32 crc, int options) m_game = CRC::Lookup(crc); } -void GSState::SetFrameSkip(int skip) -{ - if(m_frameskip != skip) - { - m_frameskip = skip; - - if(skip) - { - m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP; - - m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP; - } - else - { - m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = &GSState::GIFPackedRegHandlerPRIM; - m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA; - m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ; - m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerCLAMP<0>; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerCLAMP<1>; - m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF3; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ3; - - m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; - m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; - m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; - m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE; - } - } -} - // GSTransferBuffer GSState::GSTransferBuffer::GSTransferBuffer() diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 51e28da939..a8875f02bd 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -36,24 +36,25 @@ #include "GSAlignedClass.h" #include "GSDump.h" +// Set this to 1 to enable a switch statement instead of a LUT for the packed register handler +// in the GifTransfer code. Switch statement is probably faster, but it isn't fully implemented +// yet (not properly supporting frameskipping). +#define UsePackedRegSwitch 0 + class GSState : public GSAlignedClass<16> { +#if !UsePackedRegSwitch typedef void (GSState::*GIFPackedRegHandler)(const GIFPackedReg* r); - GIFPackedRegHandler m_fpGIFPackedRegHandlers[16]; +#endif void GIFPackedRegHandlerNull(const GIFPackedReg* r); - void GIFPackedRegHandlerPRIM(const GIFPackedReg* r); void GIFPackedRegHandlerRGBA(const GIFPackedReg* r); void GIFPackedRegHandlerSTQ(const GIFPackedReg* r); void GIFPackedRegHandlerUV(const GIFPackedReg* r); void GIFPackedRegHandlerXYZF2(const GIFPackedReg* r); void GIFPackedRegHandlerXYZ2(const GIFPackedReg* r); - template void GIFPackedRegHandlerTEX0(const GIFPackedReg* r); - template void GIFPackedRegHandlerCLAMP(const GIFPackedReg* r); void GIFPackedRegHandlerFOG(const GIFPackedReg* r); - void GIFPackedRegHandlerXYZF3(const GIFPackedReg* r); - void GIFPackedRegHandlerXYZ3(const GIFPackedReg* r); void GIFPackedRegHandlerA_D(const GIFPackedReg* r); void GIFPackedRegHandlerNOP(const GIFPackedReg* r); @@ -62,6 +63,7 @@ class GSState : public GSAlignedClass<16> GIFRegHandler m_fpGIFRegHandlers[256]; void ApplyTEX0( uint i, GIFRegTEX0& TEX0 ); + void ApplyPRIM(const GIFRegPRIM& PRIM); void GIFRegHandlerNull(const GIFReg* r); void GIFRegHandlerPRIM(const GIFReg* r); @@ -134,67 +136,33 @@ class GSState : public GSAlignedClass<16> protected: bool IsBadFrame(int& skip, int UserHacks_SkipDraw); - typedef void (GSState::*VertexKickPtr)(bool skip); + typedef void (GSState::*DrawingKickPtr)(bool skip); - VertexKickPtr m_vk[8][2][2]; - VertexKickPtr m_vkf; + DrawingKickPtr m_dk[8]; template void InitVertexKick() { - m_vk[GS_POINTLIST][0][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_POINTLIST][0][1] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_POINTLIST][1][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_POINTLIST][1][1] = (VertexKickPtr)&T::VertexKick; - - m_vk[GS_LINELIST][0][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_LINELIST][0][1] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_LINELIST][1][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_LINELIST][1][1] = (VertexKickPtr)&T::VertexKick; - - m_vk[GS_LINESTRIP][0][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_LINESTRIP][0][1] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_LINESTRIP][1][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_LINESTRIP][1][1] = (VertexKickPtr)&T::VertexKick; - - m_vk[GS_TRIANGLELIST][0][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLELIST][0][1] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLELIST][1][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLELIST][1][1] = (VertexKickPtr)&T::VertexKick; - - m_vk[GS_TRIANGLESTRIP][0][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLESTRIP][0][1] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLESTRIP][1][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLESTRIP][1][1] = (VertexKickPtr)&T::VertexKick; - - m_vk[GS_TRIANGLEFAN][0][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLEFAN][0][1] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLEFAN][1][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_TRIANGLEFAN][1][1] = (VertexKickPtr)&T::VertexKick; - - m_vk[GS_SPRITE][0][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_SPRITE][0][1] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_SPRITE][1][0] = (VertexKickPtr)&T::VertexKick; - m_vk[GS_SPRITE][1][1] = (VertexKickPtr)&T::VertexKick; - - m_vk[GS_INVALID][0][0] = &GSState::VertexKickNull; - m_vk[GS_INVALID][0][1] = &GSState::VertexKickNull; - m_vk[GS_INVALID][1][0] = &GSState::VertexKickNull; - m_vk[GS_INVALID][1][1] = &GSState::VertexKickNull; + m_dk[GS_POINTLIST] = (DrawingKickPtr)&T::DrawingKick; + m_dk[GS_LINELIST] = (DrawingKickPtr)&T::DrawingKick; + m_dk[GS_LINESTRIP] = (DrawingKickPtr)&T::DrawingKick; + m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&T::DrawingKick; + m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&T::DrawingKick; + m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&T::DrawingKick; + m_dk[GS_SPRITE] = (DrawingKickPtr)&T::DrawingKick; + m_dk[GS_INVALID] = &GSState::DrawingKickNull; } - void UpdateVertexKick() - { - m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST]; - } - - void VertexKickNull(bool skip) + void DrawingKickNull(bool skip) { ASSERT(0); } - void VertexKick(bool skip) + virtual void DoVertexKick()=0; + + __fi void VertexKick(bool skip) { - (this->*m_vkf)(skip); + DoVertexKick(); + (this->*m_dk[PRIM->PRIM])(skip); } public: