diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index f5d928bd43..443d0c2232 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -1103,13 +1103,13 @@ __aligned(struct, 32) GIFPath enum {TYPE_UNKNOWN, TYPE_ADONLY, TYPE_STQRGBAXYZF2}; - void SetTag(const void* mem) + __forceinline void SetTag(const void* mem) { GSVector4i v = GSVector4i::load(mem); GSVector4i::store(&tag, v); reg = 0; - regs = v.uph8(v >> 4) & 0x0f0f0f0f; nreg = tag.NREG ? tag.NREG : 16; + regs = v.uph8(v >> 4) & GSVector4i::x0f(nreg); nloop = tag.NLOOP; type = TYPE_UNKNOWN; if(regs.u32[0] == 0x00040102 && nreg == 3) type = TYPE_STQRGBAXYZF2; diff --git a/plugins/GSdx/GSDevice11.cpp b/plugins/GSdx/GSDevice11.cpp index f2f1fff651..d2a8899828 100644 --- a/plugins/GSdx/GSDevice11.cpp +++ b/plugins/GSdx/GSDevice11.cpp @@ -720,6 +720,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert } void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) +{ + void* ptr = NULL; + + if(IAMapVertexBuffer(&ptr, stride, count)) + { + GSVector4i::storent(ptr, vertex, count * stride); + + IAUnmapVertexBuffer(); + } +} + +bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) { ASSERT(m_vertex.count == 0); @@ -747,7 +759,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); - if(FAILED(hr)) return; + if(FAILED(hr)) return false; } D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; @@ -761,17 +773,24 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou D3D11_MAPPED_SUBRESOURCE m; - if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m))) + if(FAILED(m_ctx->Map(m_vb, 0, type, 0, &m))) { - GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride); - - m_ctx->Unmap(m_vb, 0); + return false; } + *vertex = (uint8*)m.pData + m_vertex.start * stride; + m_vertex.count = count; m_vertex.stride = stride; - IASetVertexBuffer(m_vb, stride); + return true; +} + +void GSDevice11::IAUnmapVertexBuffer() +{ + m_ctx->Unmap(m_vb, 0); + + IASetVertexBuffer(m_vb, m_vertex.stride); } void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) diff --git a/plugins/GSdx/GSDevice11.h b/plugins/GSdx/GSDevice11.h index f46cf7eafe..203e9e848d 100644 --- a/plugins/GSdx/GSDevice11.h +++ b/plugins/GSdx/GSDevice11.h @@ -170,6 +170,8 @@ public: void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true); void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); + bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count); + void IAUnmapVertexBuffer(); void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride); void IASetIndexBuffer(const void* index, size_t count); void IASetIndexBuffer(ID3D11Buffer* ib); @@ -190,7 +192,6 @@ public: void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); void OMSetRenderTargets(const GSVector2i& rtsize, ID3D11UnorderedAccessView** uav, int count, const GSVector4i* scissor = NULL); - void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupGS(GSSelector sel); void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 3a8c875f50..b912b90198 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -911,6 +911,18 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti } void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) +{ + void* ptr = NULL; + + if(IAMapVertexBuffer(&ptr, stride, count)) + { + GSVector4i::storent(ptr, vertex, count * stride); + + IAUnmapVertexBuffer(); + } +} + +bool GSDevice9::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) { ASSERT(m_vertex.count == 0); @@ -930,7 +942,7 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL); - if(FAILED(hr)) return; + if(FAILED(hr)) return false; } uint32 flags = D3DLOCK_NOOVERWRITE; @@ -942,19 +954,22 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun flags = D3DLOCK_DISCARD; } - void* ptr = NULL; - - if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags))) + if(FAILED(m_vb->Lock(m_vertex.start * stride, count * stride, vertex, flags))) { - GSVector4i::storent(ptr, vertex, count * stride); - - m_vb->Unlock(); + return false; } m_vertex.count = count; m_vertex.stride = stride; - IASetVertexBuffer(m_vb, stride); + return true; +} + +void GSDevice9::IAUnmapVertexBuffer() +{ + m_vb->Unlock(); + + IASetVertexBuffer(m_vb, m_vertex.stride); } void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) diff --git a/plugins/GSdx/GSDevice9.h b/plugins/GSdx/GSDevice9.h index 3e46c94214..319e063d0f 100644 --- a/plugins/GSdx/GSDevice9.h +++ b/plugins/GSdx/GSDevice9.h @@ -196,6 +196,8 @@ public: void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true); void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); + bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count); + void IAUnmapVertexBuffer(); void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride); void IASetIndexBuffer(const void* index, size_t count); void IASetIndexBuffer(IDirect3DIndexBuffer9* ib); @@ -216,7 +218,6 @@ public: HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il); HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps); - void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupGS(GSSelector sel) {} void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); diff --git a/plugins/GSdx/GSDeviceDX.h b/plugins/GSdx/GSDeviceDX.h index 9d2f954472..4ce845665b 100644 --- a/plugins/GSdx/GSDeviceDX.h +++ b/plugins/GSdx/GSDeviceDX.h @@ -279,7 +279,6 @@ public: bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode); void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;} - virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0; virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0; virtual void SetupGS(GSSelector sel) = 0; virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0; diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index 151de0b311..8840f81b90 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -22,9 +22,8 @@ #include "stdafx.h" #include "GSRenderer.h" -GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride) - : GSState(vt, vertex_stride) - , m_dev(NULL) +GSRenderer::GSRenderer() + : m_dev(NULL) , m_shader(0) { m_GStitleInfoBuffer[0] = 0; diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index e78013bf4f..de83f32801 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -58,7 +58,7 @@ public: int s_saven; public: - GSRenderer(GSVertexTrace* vt, size_t vertex_stride); + GSRenderer(); virtual ~GSRenderer(); virtual bool CreateWnd(const string& title, int w, int h); diff --git a/plugins/GSdx/GSRendererCS.cpp b/plugins/GSdx/GSRendererCS.cpp index 1c864d09fb..7772084201 100644 --- a/plugins/GSdx/GSRendererCS.cpp +++ b/plugins/GSdx/GSRendererCS.cpp @@ -23,12 +23,10 @@ #include "GSRendererCS.h" GSRendererCS::GSRendererCS() - : GSRenderer(new GSVertexTraceDX11(this), sizeof(GSVertexHW11)) + : GSRenderer() { m_nativeres = true; - InitConvertVertex(GSRendererCS); - memset(m_vm_valid, 0, sizeof(m_vm_valid)); } @@ -233,26 +231,6 @@ GSTexture* GSRendererCS::GetOutput(int i) return NULL; } -template -void GSRendererCS::ConvertVertex(size_t dst_index, size_t src_index) -{ - GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index); - GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index; - - GSVector4i v0 = ((GSVector4i*)s)[0]; - GSVector4i v1 = ((GSVector4i*)s)[1]; - - if(tme && fst) - { - // TODO: modify VertexTrace to read uv from v1.u16[0], v1.u16[1], then this step is not needed - - v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st - } - - ((GSVector4i*)d)[0] = v0; - ((GSVector4i*)d)[1] = v1; -} - void GSRendererCS::Draw() { GSDrawingEnvironment& env = m_env; @@ -260,7 +238,7 @@ void GSRendererCS::Draw() GSVector2i rtsize(2048, 2048); GSVector4i scissor = GSVector4i(context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); - GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil())); + GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); GSVector4i r = bbox.rintersect(scissor); uint32 fm = context->FRAME.FBMSK; @@ -286,7 +264,7 @@ void GSRendererCS::Draw() GSVector4i r; - GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear()); + GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); // TODO: unswizzle pages of r to a texture, check m_vm_valid, bit not set cpu->gpu, set gpu->gpu @@ -317,7 +295,7 @@ void GSRendererCS::Draw() D3D11_PRIMITIVE_TOPOLOGY topology; - switch(m_vt->m_primclass) + switch(m_vt.m_primclass) { case GS_POINT_CLASS: topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; @@ -333,7 +311,7 @@ void GSRendererCS::Draw() __assume(0); } - dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertexHW11), m_vertex.next); + dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next); dev->IASetIndexBuffer(m_index.buff, m_index.tail); dev->IASetPrimitiveTopology(topology); @@ -407,7 +385,7 @@ void GSRendererCS::Draw() GSSelector gs_sel; gs_sel.iip = PRIM->IIP; - gs_sel.prim = m_vt->m_primclass; + gs_sel.prim = m_vt.m_primclass; CComPtr gs; diff --git a/plugins/GSdx/GSRendererCS.h b/plugins/GSdx/GSRendererCS.h index f51b44bb10..13684c9e60 100644 --- a/plugins/GSdx/GSRendererCS.h +++ b/plugins/GSdx/GSRendererCS.h @@ -120,9 +120,6 @@ class GSRendererCS : public GSRenderer bool GetOffsetBuffer(OffsetBuffer** fzbo); protected: - template - void ConvertVertex(size_t dst_index, size_t src_index); - bool CreateDevice(GSDevice* dev); void VSync(int field); GSTexture* GetOutput(int i); diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index e8a941c1d3..7a485950f3 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -23,10 +23,9 @@ #include "GSRendererDX.h" #include "GSDeviceDX.h" -GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter) - : GSRendererHW(vt, vertex_stride, tc) +GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter) + : GSRendererHW(tc) , m_pixelcenter(pixelcenter) - , m_topology(-1) { m_logz = !!theApp.GetConfig("logz", 0); m_fba = !!theApp.GetConfig("fba", 1); @@ -61,7 +60,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); GSVector4 o = GSVector4(-1.0f, 1.0f); - GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); + GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 dst = src * 2.0f + o.xxxx(); GSVertexPT1 vertices[] = @@ -111,7 +110,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc if(!IsOpaque()) { - om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS; + om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; @@ -154,11 +153,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc { if(context->ZBUF.PSM == PSM_PSMZ24) { - if(m_vt->m_max.p.z > 0xffffff) + if(m_vt.m_max.p.z > 0xffffff) { - ASSERT(m_vt->m_min.p.z > 0xffffff); + ASSERT(m_vt.m_min.p.z > 0xffffff); // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. - if (m_vt->m_min.p.z > 0xffffff) + if (m_vt.m_min.p.z > 0xffffff) { vs_sel.bppz = 1; om_dssel.ztst = ZTST_ALWAYS; @@ -167,11 +166,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { - if(m_vt->m_max.p.z > 0xffff) + if(m_vt.m_max.p.z > 0xffff) { - ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo + ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo // Fixme : Same as above, I guess. - if (m_vt->m_min.p.z > 0xffff) + if (m_vt.m_min.p.z > 0xffff) { vs_sel.bppz = 2; om_dssel.ztst = ZTST_ALWAYS; @@ -213,7 +212,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSDeviceDX::GSSelector gs_sel; gs_sel.iip = PRIM->IIP; - gs_sel.prim = m_vt->m_primclass; + gs_sel.prim = m_vt.m_primclass; // ps @@ -281,7 +280,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc ps_sel.aem = env.TEXA.AEM; ps_sel.tfx = context->TEX0.TFX; ps_sel.tcc = context->TEX0.TCC; - ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter; + ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; ps_sel.rt = tex->m_target; int w = tex->m_texture->GetWidth(); @@ -331,7 +330,6 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc uint8 afix = context->ALPHA.FIX; dev->SetupOM(om_dssel, om_bsel, afix); - dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology); dev->SetupVS(vs_sel, &vs_cb); dev->SetupGS(gs_sel); dev->SetupPS(ps_sel, &ps_cb, ps_ssel); diff --git a/plugins/GSdx/GSRendererDX.h b/plugins/GSdx/GSRendererDX.h index b693d89315..8db7f50e3a 100644 --- a/plugins/GSdx/GSRendererDX.h +++ b/plugins/GSdx/GSRendererDX.h @@ -32,13 +32,11 @@ class GSRendererDX : public GSRendererHW bool UserHacks_AlphaHack; protected: - int m_topology; - virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); virtual void UpdateFBA(GSTexture* rt) {} public: - GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0)); + GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0)); virtual ~GSRendererDX(); }; diff --git a/plugins/GSdx/GSRendererDX11.cpp b/plugins/GSdx/GSRendererDX11.cpp index d9b3c2c6d9..39805eff2d 100644 --- a/plugins/GSdx/GSRendererDX11.cpp +++ b/plugins/GSdx/GSRendererDX11.cpp @@ -25,9 +25,8 @@ #include "resource.h" GSRendererDX11::GSRendererDX11() - : GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f)) + : GSRendererDX(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f)) { - InitConvertVertex(GSRendererDX11); } bool GSRendererDX11::CreateDevice(GSDevice* dev) @@ -38,43 +37,40 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev) return true; } -template -void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index) -{ - GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index); - GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index; - - GSVector4i v0 = ((GSVector4i*)s)[0]; - GSVector4i v1 = ((GSVector4i*)s)[1]; - - if(tme && fst) - { - // TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed - - v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st - } - - ((GSVector4i*)d)[0] = v0; - ((GSVector4i*)d)[1] = v1; -} - void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { - switch(m_vt->m_primclass) + GSDevice11* dev = (GSDevice11*)m_dev; + + void* ptr = NULL; + + if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next)) + { + GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next); + + dev->IAUnmapVertexBuffer(); + } + + dev->IASetIndexBuffer(m_index.buff, m_index.tail); + + D3D11_PRIMITIVE_TOPOLOGY t; + + switch(m_vt.m_primclass) { case GS_POINT_CLASS: - m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; + t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; break; case GS_LINE_CLASS: case GS_SPRITE_CLASS: - m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; + t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; break; case GS_TRIANGLE_CLASS: - m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; default: __assume(0); } + + dev->IASetPrimitiveTopology(t); __super::DrawPrims(rt, ds, tex); } diff --git a/plugins/GSdx/GSRendererDX11.h b/plugins/GSdx/GSRendererDX11.h index 30451710c9..aac0358635 100644 --- a/plugins/GSdx/GSRendererDX11.h +++ b/plugins/GSdx/GSRendererDX11.h @@ -28,15 +28,8 @@ class GSRendererDX11 : public GSRendererDX { protected: - template - void ConvertVertex(size_t dst_index, size_t src_index); void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); - int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;} - int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;} - uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;} - void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;} - public: GSRendererDX11(); virtual ~GSRendererDX11() {} diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index a0dfa10af2..74a2221029 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -25,9 +25,8 @@ #include "resource.h" GSRendererDX9::GSRendererDX9() - : GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this)) + : GSRendererDX(new GSTextureCache9(this)) { - InitConvertVertex(GSRendererDX9); } bool GSRendererDX9::CreateDevice(GSDevice* dev) @@ -57,56 +56,21 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev) return true; } -template -void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index) -{ - GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index); - GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index; - - GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); - - if(tme && !fst) - { - p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); - } - else - { - p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); - } - - GSVector4 t = GSVector4::zero(); - - if(tme) - { - if(fst) - { - t = GSVector4(GSVector4i::load(s->UV).upl16()); - } - else - { - t = GSVector4::loadl(&s->ST); - } - } - - t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); - - d->p = p; - d->t = t; -} - void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) { - switch(m_vt->m_primclass) + D3DPRIMITIVETYPE topology; + + switch(m_vt.m_primclass) { case GS_POINT_CLASS: - m_topology = D3DPT_POINTLIST; + topology = D3DPT_POINTLIST; break; case GS_LINE_CLASS: - m_topology = D3DPT_LINELIST; + topology = D3DPT_LINELIST; if(PRIM->IIP == 0) { @@ -122,7 +86,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour case GS_TRIANGLE_CLASS: - m_topology = D3DPT_TRIANGLELIST; + topology = D3DPT_TRIANGLELIST; if(PRIM->IIP == 0) { @@ -138,7 +102,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour case GS_SPRITE_CLASS: - m_topology = D3DPT_TRIANGLELIST; + topology = D3DPT_TRIANGLELIST; // each sprite converted to quad needs twice the space @@ -154,29 +118,35 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour size_t count = m_vertex.next; int i = (int)count * 2 - 4; - GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2; - GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4; - uint32* RESTRICT index = &m_index.buff[count * 3] - 6; + GSVertex* s = &m_vertex.buff[count - 2]; + GSVertex* q = &m_vertex.buff[count * 2 - 4]; + uint32* RESTRICT index = &m_index.buff[count * 3 - 6]; for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) { - GSVertexHW9 v0 = s[0]; - GSVertexHW9 v1 = s[1]; + GSVertex v0 = s[0]; + GSVertex v1 = s[1]; - v0.p = v0.p.xyzw(v1.p); // z, q - v0.t = v0.t.xyzw(v1.t); // c, f + v0.RGBAQ = v1.RGBAQ; + v0.XYZ.Z = v1.XYZ.Z; + v0.FOG = v1.FOG; q[0] = v0; q[3] = v1; - // swap x, s + // swap x, s, u - GSVector4 p = v0.p.insert<0, 0>(v1.p); - GSVector4 t = v0.t.insert<0, 0>(v1.t); - v1.p = v1.p.insert<0, 0>(v0.p); - v1.t = v1.t.insert<0, 0>(v0.t); - v0.p = p; - v0.t = t; + uint16 x = v0.XYZ.X; + v0.XYZ.X = v1.XYZ.X; + v1.XYZ.X = x; + + float s = v0.ST.S; + v0.ST.S = v1.ST.S; + v1.ST.S = s; + + uint16 u = v0.U; + v0.U = v1.U; + v1.U = u; q[1] = v0; q[2] = v1; @@ -199,7 +169,56 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour __assume(0); } - (*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO + GSDevice9* dev = (GSDevice9*)m_dev; + + (*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO + + void* ptr = NULL; + + if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next)) + { + GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff; + GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr; + + for(int i = 0; i < m_vertex.next; i++, s++, d++) + { + GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16()); + + if(PRIM->TME && !PRIM->FST) + { + p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q)); + } + else + { + p = p.xyxy(GSVector4::load((float)s->XYZ.Z)); + } + + GSVector4 t = GSVector4::zero(); + + if(PRIM->TME) + { + if(PRIM->FST) + { + t = GSVector4(GSVector4i::load(s->UV).upl16()); + } + else + { + t = GSVector4::loadl(&s->ST); + } + } + + t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG))); + + d->p = p; + d->t = t; + } + + dev->IAUnmapVertexBuffer(); + } + + dev->IASetIndexBuffer(m_index.buff, m_index.tail); + + dev->IASetPrimitiveTopology(topology); __super::DrawPrims(rt, ds, tex); } @@ -220,7 +239,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt) GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); GSVector4 o = GSVector4(-1.0f, 1.0f); - GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); + GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 dst = src * 2.0f + o.xxxx(); GSVertexPT1 vertices[] = diff --git a/plugins/GSdx/GSRendererDX9.h b/plugins/GSdx/GSRendererDX9.h index f70a14bfe0..9122fb95b1 100644 --- a/plugins/GSdx/GSRendererDX9.h +++ b/plugins/GSdx/GSRendererDX9.h @@ -34,17 +34,9 @@ protected: Direct3DBlendState9 bs; } m_fba; - template - void ConvertVertex(size_t dst_index, size_t src_index); - void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); void UpdateFBA(GSTexture* rt); - int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;} - int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;} - uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];} - void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;} - public: GSRendererDX9(); virtual ~GSRendererDX9() {} diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 07ef77abc5..c773999d5c 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -22,9 +22,8 @@ #include "stdafx.h" #include "GSRendererHW.h" -GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc) - : GSRenderer(vt, vertex_stride) - , m_tc(tc) +GSRendererHW::GSRendererHW(GSTextureCache* tc) + : m_tc(tc) , m_width(1024) , m_height(1024) , m_skip(0) @@ -211,7 +210,7 @@ void GSRendererHW::Draw() GSVector4i r; - GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear()); + GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); @@ -298,7 +297,7 @@ void GSRendererHW::Draw() // - GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in)); + GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); if(fm != 0xffffffff) { @@ -410,14 +409,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source if(lines == 0) { - if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2)) + if(m_vt.m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2)) { lines = m_vertex.next / 2; } } else { - if(m_vt->m_primclass == GS_POINT_CLASS) + if(m_vt.m_primclass == GS_POINT_CLASS) { if(m_vertex.next >= 16 * 512) { @@ -428,14 +427,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source int ox = m_context->XYOFFSET.OFX; int oy = m_context->XYOFFSET.OFY; - const uint8* RESTRICT v = m_vertex.buff; + const GSVertex* RESTRICT v = m_vertex.buff; - for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride) + for(int i = (int)m_vertex.next; i >= 0; i--, v++) { - int x = (GetPosX(v) - ox) >> 4; - int y = (GetPosY(v) - oy) >> 4; + int x = (v->XYZ.X - ox) >> 4; + int y = (v->XYZ.Y - oy) >> 4; - video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v); + video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0]; } return false; @@ -445,7 +444,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source lines = 0; } } - else if(m_vt->m_primclass == GS_LINE_CLASS) + else if(m_vt.m_primclass == GS_LINE_CLASS) { if(m_vertex.next == lines * 2) { @@ -458,10 +457,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4); - size_t stride = m_vertex.stride; - - memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride); - memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride); + m_vertex.buff[2] = m_vertex.buff[m_vertex.next - 2]; + m_vertex.buff[3] = m_vertex.buff[m_vertex.next - 1]; m_index.buff[0] = 0; m_index.buff[1] = 1; @@ -473,7 +470,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source m_vertex.head = m_vertex.tail = m_vertex.next = 4; m_index.tail = 6; - m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS); + m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS); } else { @@ -505,11 +502,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S { // missing red channel fix (looks alright in pcsx2 r5000+) - uint8* RESTRICT v = m_vertex.buff; + GSVertex* RESTRICT v = m_vertex.buff; - for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride) + for(int i = (int)m_vertex.next; i >= 0; i--, v++) { - uint32 c = GetColor(v); + uint32 c = v->RGBAQ.u32[0]; uint32 r = (c >> 0) & 0xff; uint32 g = (c >> 8) & 0xff; @@ -517,11 +514,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S if(r == 0 && g != 0 && b != 0) { - SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1)); + v->RGBAQ.u32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1); } } - m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass); + m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt.m_primclass); return true; } @@ -701,7 +698,7 @@ bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTex } else if(PRIM->TME) { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0)) + if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_max.p.z == 0)) { m_dev->ClearDepth(ds, 0); } @@ -757,7 +754,7 @@ bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureC } else if(PRIM->TME) { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0)) + if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0)) { m_dev->ClearDepth(ds, 0); } @@ -783,7 +780,7 @@ bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureC } else if(PRIM->TME) { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0)) + if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0)) { m_dev->ClearDepth(ds, 0); } @@ -797,7 +794,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac uint32 FBP = m_context->FRAME.Block(); uint32 FPSM = m_context->FRAME.PSM; - if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z) + if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_eq.z) { m_context->TEST.ZTST = ZTST_ALWAYS; //m_dev->ClearDepth(ds, 0); @@ -809,7 +806,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) { - if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME) + if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME) { uint32 FBP = m_context->FRAME.Block(); uint32 FBW = m_context->FRAME.FBW; @@ -818,16 +815,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa { if(m_vertex.next == 16) { - uint8* RESTRICT v = m_vertex.buff; + GSVertex* RESTRICT v = m_vertex.buff; - for(int i = 0; i < 16; i++, v += m_vertex.stride) + for(int i = 0; i < 16; i++, v++) { - uint32 c = GetColor(v); + uint32 c = v->RGBAQ.u32[0]; uint32 a = c >> 24; c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff); - SetColor(v, c); + v->RGBAQ.u32[0] = c; m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW); } @@ -838,16 +835,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa } else if(m_vertex.next == 256) { - uint8* RESTRICT v = m_vertex.buff; + GSVertex* RESTRICT v = m_vertex.buff; - for(int i = 0; i < 256; i++, v += m_vertex.stride) + for(int i = 0; i < 256; i++, v++) { - uint32 c = GetColor(v); + uint32 c = v->RGBAQ.u32[0]; uint32 a = c >> 24; c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff); - SetColor(v, c); + v->RGBAQ.u32[0] = c; m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW); } diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 6aed9f469a..2b1befe516 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -126,11 +126,6 @@ private: } m_hacks; - virtual int GetPosX(const void* vertex) const = 0; - virtual int GetPosY(const void* vertex) const = 0; - virtual uint32 GetColor(const void* vertex) const = 0; - virtual void SetColor(void* vertex, uint32 c) const = 0; - #pragma endregion protected: @@ -139,7 +134,7 @@ protected: virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0; public: - GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc); + GSRendererHW(GSTextureCache* tc); virtual ~GSRendererHW(); void SetGameCRC(uint32 crc, int options); diff --git a/plugins/GSdx/GSRendererNull.h b/plugins/GSdx/GSRendererNull.h index 7db828a0c8..b9b06a415a 100644 --- a/plugins/GSdx/GSRendererNull.h +++ b/plugins/GSdx/GSRendererNull.h @@ -32,11 +32,6 @@ class GSRendererNull : public GSRenderer }; protected: - template - void ConvertVertex(size_t dst_index, size_t src_index) - { - } - void Draw() { } @@ -48,8 +43,7 @@ protected: public: GSRendererNull() - : GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex)) + : GSRenderer() { - InitConvertVertex(GSRendererNull); } }; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 393395299e..eca864ac19 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -29,11 +29,8 @@ static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL; const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); GSRendererSW::GSRendererSW(int threads) - : GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW)) - , m_fzb(NULL) + : m_fzb(NULL) { - InitConvertVertex(GSRendererSW); - m_nativeres = true; // ignore ini, sw is always native m_tc = new GSTextureCacheSW(this); @@ -233,73 +230,67 @@ GSTexture* GSRendererSW::GetOutput(int i) return m_texture[i]; } -template -void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index) -{ - GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index); - GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index; - - ASSERT(d->_pad.u32[0] != 0x12345678); - - uint32 z = s->XYZ.Z; - - GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET; - GSVector4i zf = GSVector4i((int)std::min(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later - - GSVector4 p, t, c; - - p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; - - if(tme) - { - if(fst) - { - t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4)); - } - else - { - t = GSVector4(s->ST.S, s->ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH); - t = t.xyxy(GSVector4::load(s->RGBAQ.Q)); - } - } - - c = GSVector4::rgba32(s->RGBAQ.u32[0], 7); - - d->p = p; - d->c = c; - d->t = t; - - #ifdef _DEBUG - d->_pad.u32[0] = 0x12345678; // means trouble if this has already been set, should only convert each vertex once - #endif - - if(prim == GS_SPRITE) - { - d->t.u32[3] = z; - } -} - void GSRendererSW::Draw() { SharedData* sd = new SharedData(this); shared_ptr data(sd); - sd->primclass = m_vt->m_primclass; + sd->primclass = m_vt.m_primclass; sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32); sd->vertex = (GSVertexSW*)sd->buff; sd->vertex_count = m_vertex.next; sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next); sd->index_count = m_index.tail; - memcpy(sd->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next); - memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail); - - for(size_t i = 0; i < m_index.tail; i++) { - ASSERT(((GSVertexSW*)m_vertex.buff + m_index.buff[i])->_pad.u32[0] == 0x12345678); + // TODO: template, JIT + + GSVertex* RESTRICT s = m_vertex.buff; + GSVertexSW* RESTRICT d = sd->vertex; + + GSVector4i o = (GSVector4i)m_context->XYOFFSET; + GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH); + + for(size_t i = 0; i < m_vertex.next; i++, s++, d++) + { + uint32 z = s->XYZ.Z; + + GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - o; + GSVector4i zf = GSVector4i((int)std::min(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later + + GSVector4 p, t, c; + + p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; + + if(PRIM->TME) + { + if(PRIM->FST) + { + t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4)); + } + else + { + t = GSVector4(s->ST.S, s->ST.T) * tsize; + t = t.xyxy(GSVector4::load(s->RGBAQ.Q)); + } + } + + c = GSVector4::rgba32(s->RGBAQ.u32[0], 7); + + d->p = p; + d->c = c; + d->t = t; + + if(sd->primclass == GS_SPRITE_CLASS) + { + d->t.u32[3] = z; + } + } } + memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail); + // TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs // TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true; // TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers @@ -314,7 +305,7 @@ void GSRendererSW::Draw() GSScanlineGlobalData& gd = sd->global; GSVector4i scissor = GSVector4i(context->scissor.in); - GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil())); + GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); scissor.z = std::min(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour @@ -713,7 +704,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) const GSDrawingEnvironment& env = m_env; const GSDrawingContext* context = m_context; - const GS_PRIM_CLASS primclass = m_vt->m_primclass; + const GS_PRIM_CLASS primclass = m_vt.m_primclass; gd.vm = m_mem.m_vm8; @@ -790,7 +781,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) { gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; - if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff) + if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) { gd.sel.iip = PRIM->IIP; } @@ -800,7 +791,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.sel.tfx = context->TEX0.TFX; gd.sel.tcc = context->TEX0.TCC; gd.sel.fst = PRIM->FST; - gd.sel.ltf = m_vt->IsLinear(); + gd.sel.ltf = m_vt.IsLinear(); if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { @@ -814,7 +805,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.sel.wms = context->CLAMP.WMS; gd.sel.wmt = context->CLAMP.WMT; - if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128))) + if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) { // modulate does not do anything when vertex color is 0x80 @@ -833,7 +824,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) if(!t->Update(r)) {ASSERT(0); return false;} - if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0) + if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0) { uint64 frame = m_perfmon.GetFrame(); @@ -850,7 +841,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.tex[0] = t->m_buff; gd.sel.tw = t->m_tw - 3; - if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0) + if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0) { // TEX1.MMIN // 000 p @@ -860,13 +851,13 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) // 100 l round // 101 l tri - if(m_vt->m_lod.x > 0) + if(m_vt.m_lod.x > 0) { gd.sel.ltf = context->TEX1.MMIN >> 2; } else { - // TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0 + // TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0 } gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri @@ -875,9 +866,9 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) int mxl = (std::min((int)context->TEX1.MXL, 6) << 16); int k = context->TEX1.K << 12; - if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL) + if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL) { - k = (int)m_vt->m_lod.x << 16; // set lod to max level + k = (int)m_vt.m_lod.x << 16; // set lod to max level gd.sel.lcm = 1; // lod is constant gd.sel.mmin = 1; // tri-linear is meaningless @@ -891,7 +882,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) if(gd.sel.fst) { ASSERT(gd.sel.lcm == 1); - ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) + ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) gd.sel.lcm = 1; } @@ -920,8 +911,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GIFRegTEX0 MIP_TEX0 = context->TEX0; GIFRegCLAMP MIP_CLAMP = context->CLAMP; - GSVector4 tmin = m_vt->m_min.t; - GSVector4 tmax = m_vt->m_max.t; + GSVector4 tmin = m_vt.m_min.t; + GSVector4 tmax = m_vt.m_max.t; static int s_counter = 0; @@ -971,8 +962,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) MIP_CLAMP.MAXU >>= 1; MIP_CLAMP.MAXV >>= 1; - m_vt->m_min.t *= 0.5f; - m_vt->m_max.t *= 0.5f; + m_vt.m_min.t *= 0.5f; + m_vt.m_max.t *= 0.5f; GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3); @@ -1014,8 +1005,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) s_counter++; - m_vt->m_min.t = tmin; - m_vt->m_max.t = tmax; + m_vt.m_min.t = tmin; + m_vt.m_max.t = tmax; } else { @@ -1025,7 +1016,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GSVertexSW* RESTRICT v = data->vertex; - if(m_vt->m_eq.q) + if(m_vt.m_eq.q) { gd.sel.fst = 1; @@ -1202,7 +1193,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) { gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt; gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS; - gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000; + gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000; } gd.fm = GSVector4i(fm); diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 8c9914530a..96c3f25d5a 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -71,9 +71,6 @@ protected: bool GetScanlineGlobalData(SharedData* data); - template - void ConvertVertex(size_t dst_index, size_t src_index); - public: GSRendererSW(int threads); virtual ~GSRendererSW(); diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 4817fdf40a..7db0cbc3d4 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -26,7 +26,7 @@ //#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering //#define Offset_UV // Fixes / breaks various titles -GSState::GSState(GSVertexTrace* vt, size_t vertex_stride) +GSState::GSState() : m_version(6) , m_mt(false) , m_irq(NULL) @@ -35,7 +35,7 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride) , m_crc(0) , m_options(0) , m_frameskip(0) - , m_vt(vt) + , m_vt(this) { m_nativeres = !!theApp.GetConfig("nativeres", 0); @@ -44,15 +44,8 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride) memset(&m_vertex, 0, sizeof(m_vertex)); memset(&m_index, 0, sizeof(m_index)); - ASSERT(vertex_stride >= sizeof(GSVertex)); - - m_vertex.stride = vertex_stride; - m_vertex.tmp = (uint8*)_aligned_malloc(m_vertex.stride * 2, 32); - GrowVertexBuffer(); - memset(m_cv, 0, sizeof(m_cv)); - m_sssize = 0; m_sssize += sizeof(m_version); @@ -114,8 +107,6 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride) GSState::~GSState() { - _aligned_free(m_vertex.tmp); - if(m_vertex.buff) _aligned_free(m_vertex.buff); if(m_index.buff) _aligned_free(m_index.buff); } @@ -211,7 +202,7 @@ void GSState::Reset() { printf("GS reset\n"); - memset(m_mem.m_vm8, 0, m_mem.m_vmsize); + // FIXME: memset(m_mem.m_vm8, 0, m_mem.m_vmsize); // bios logo not shown cut in half after reset, missing graphics in GoW after first FMV memset(&m_path[0], 0, sizeof(m_path[0]) * countof(m_path)); memset(&m_v, 0, sizeof(m_v)); @@ -1274,40 +1265,8 @@ void GSState::FlushPrim() { if(m_index.tail > 0) { - if(0) - { - uint8* buff = new uint8[m_vertex.next]; + GSVertex buff[2]; - memset(buff, 0, m_vertex.next); - - for(size_t i = 0; i < m_index.tail; i++) - { - ASSERT(m_index.buff[i] < m_vertex.next); - - buff[m_index.buff[i]] = 1; - } - - size_t count = 0; - - for(size_t i = 0; i < m_vertex.next; i++) - { - if(buff[i] == 0) - { - count++; - } - } - - if(count > 0) - { - printf("unref %lld %d/%d\n", m_perfmon.GetFrame(), count, m_vertex.next); - } - - delete [] buff; - } - - uint8* buff = m_vertex.tmp; - - size_t stride = m_vertex.stride; size_t head = m_vertex.head; size_t tail = m_vertex.tail; size_t next = m_vertex.next; @@ -1326,11 +1285,11 @@ void GSState::FlushPrim() case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: unused = tail - head; - memcpy(buff, &m_vertex.buff[stride * head], stride * unused); + memcpy(buff, &m_vertex.buff[head], sizeof(GSVertex) * unused); break; case GS_TRIANGLEFAN: - memcpy(buff, &m_vertex.buff[stride * head], stride); unused = 1; - if(tail - 1 > head) {memcpy(&buff[stride], &m_vertex.buff[stride * (tail - 1)], stride); unused = 2;} + buff[0] = m_vertex.buff[head]; unused = 1; + if(tail - 1 > head) {buff[1] = m_vertex.buff[tail - 1]; unused = 2;} break; case GS_INVALID: break; @@ -1345,7 +1304,7 @@ void GSState::FlushPrim() { // FIXME: berserk fpsm = 27 (8H) - m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); + m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); Draw(); @@ -1359,7 +1318,7 @@ void GSState::FlushPrim() if(unused > 0) { - memcpy(m_vertex.buff, buff, stride * unused); + memcpy(m_vertex.buff, buff, sizeof(GSVertex) * unused); m_vertex.tail = unused; m_vertex.next = next > head ? next - head : 0; @@ -2182,20 +2141,18 @@ void GSState::UpdateVertexKick() m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3]; m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim]; - - m_cvf = m_cv[prim][PRIM->TME][PRIM->FST]; } void GSState::GrowVertexBuffer() { int maxcount = std::max(m_vertex.maxcount * 3 / 2, 10000); - uint8* vertex = (uint8*)_aligned_malloc(m_vertex.stride * maxcount, 16); + GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 16); uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 16); // worst case is slightly less than vertex number * 3 if(m_vertex.buff != NULL) { - memcpy(vertex, m_vertex.buff, m_vertex.stride * m_vertex.tail); + memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail); _aligned_free(m_vertex.buff); } @@ -2227,17 +2184,13 @@ __forceinline void GSState::VertexKick(uint32 skip) GSVector4i v0(m_v.m[0]); GSVector4i v1(m_v.m[1]); - GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[m_vertex.stride * tail]; + GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[tail]; tailptr[0] = v0; tailptr[1] = v1; m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.sub16(GSVector4i::load(m_ofxy)).sra16(4)).upl16()); // zw not sign extended, only useful for eq tests - #ifdef _DEBUG - memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex)); - #endif - m_vertex.tail = ++tail; m_vertex.xy_tail = ++xy_tail; @@ -2353,8 +2306,6 @@ __forceinline void GSState::VertexKick(uint32 skip) uint32* RESTRICT buff = &m_index.buff[m_index.tail]; - size_t src_index = head; - switch(prim) { case GS_POINTLIST: @@ -2362,7 +2313,6 @@ __forceinline void GSState::VertexKick(uint32 skip) m_vertex.head = head + 1; m_vertex.next = head + 1; m_index.tail += 1; - (this->*m_cvf)(head, head); break; case GS_LINELIST: buff[0] = head + 0; @@ -2370,18 +2320,20 @@ __forceinline void GSState::VertexKick(uint32 skip) m_vertex.head = head + 2; m_vertex.next = head + 2; m_index.tail += 2; - (this->*m_cvf)(head + 0, head + 0); - (this->*m_cvf)(head + 1, head + 1); break; case GS_LINESTRIP: - if(next < head) {head = next; m_vertex.tail = next + 2;} + if(next < head) + { + m_vertex.buff[next + 0] = m_vertex.buff[head + 0]; + m_vertex.buff[next + 1] = m_vertex.buff[head + 1]; + head = next; + m_vertex.tail = next + 2; + } buff[0] = head + 0; buff[1] = head + 1; m_vertex.head = head + 1; m_vertex.next = head + 2; m_index.tail += 2; - if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0); - /*if(head + 1 >= next)*/ (this->*m_cvf)(head + 1, src_index + 1); // this is always a new vertex break; case GS_TRIANGLELIST: buff[0] = head + 0; @@ -2390,21 +2342,22 @@ __forceinline void GSState::VertexKick(uint32 skip) m_vertex.head = head + 3; m_vertex.next = head + 3; m_index.tail += 3; - (this->*m_cvf)(head + 0, head + 0); - (this->*m_cvf)(head + 1, head + 1); - (this->*m_cvf)(head + 2, head + 2); break; case GS_TRIANGLESTRIP: - if(next < head) {head = next; m_vertex.tail = next + 3;} + if(next < head) + { + m_vertex.buff[next + 0] = m_vertex.buff[head + 0]; + m_vertex.buff[next + 1] = m_vertex.buff[head + 1]; + m_vertex.buff[next + 2] = m_vertex.buff[head + 2]; + head = next; + m_vertex.tail = next + 3; + } buff[0] = head + 0; buff[1] = head + 1; buff[2] = head + 2; m_vertex.head = head + 1; m_vertex.next = head + 3; m_index.tail += 3; - if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0); - if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1); - /*if(src_index + 2 >= next)*/ (this->*m_cvf)(head + 2, src_index + 2); // this is always a new vertex break; case GS_TRIANGLEFAN: // TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare) @@ -2413,9 +2366,6 @@ __forceinline void GSState::VertexKick(uint32 skip) buff[2] = tail - 1; m_vertex.next = tail; m_index.tail += 3; - if(head >= next) (this->*m_cvf)(head, head); - if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2); - /*if(tail - 1 >= next)*/ (this->*m_cvf)(tail - 1, tail - 1); // this is always a new vertex break; case GS_SPRITE: buff[0] = head + 0; @@ -2423,10 +2373,8 @@ __forceinline void GSState::VertexKick(uint32 skip) m_vertex.head = head + 2; m_vertex.next = head + 2; m_index.tail += 2; - (this->*m_cvf)(head + 0, head + 0); - (this->*m_cvf)(head + 1, head + 1); break; - case GS_INVALID: + case GS_INVALID: m_vertex.tail = head; break; default: @@ -2492,7 +2440,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR if(wms + wmt < 6) { - GSVector4 st = m_vt->m_min.t.xyxy(m_vt->m_max.t); + GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t); if(linear) { @@ -2570,7 +2518,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR void GSState::GetAlphaMinMax() { - if(m_vt->m_alpha.valid) + if(m_vt.m_alpha.valid) { return; } @@ -2578,7 +2526,7 @@ void GSState::GetAlphaMinMax() const GSDrawingEnvironment& env = m_env; const GSDrawingContext* context = m_context; - GSVector4i a = m_vt->m_min.c.uph32(m_vt->m_max.c).zzww(); + GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww(); if(PRIM->TME && context->TEX0.TCC) { @@ -2630,9 +2578,9 @@ void GSState::GetAlphaMinMax() } } - m_vt->m_alpha.min = a.x; - m_vt->m_alpha.max = a.z; - m_vt->m_alpha.valid = true; + m_vt.m_alpha.min = a.x; + m_vt.m_alpha.max = a.z; + m_vt.m_alpha.valid = true; } bool GSState::TryAlphaTest(uint32& fm, uint32& zm) @@ -2649,8 +2597,8 @@ bool GSState::TryAlphaTest(uint32& fm, uint32& zm) { GetAlphaMinMax(); - int amin = m_vt->m_alpha.min; - int amax = m_vt->m_alpha.max; + int amin = m_vt.m_alpha.min; + int amax = m_vt.m_alpha.max; int aref = context->TEST.AREF; @@ -2734,8 +2682,8 @@ bool GSState::IsOpaque() { GetAlphaMinMax(); - amin = m_vt->m_alpha.min; - amax = m_vt->m_alpha.max; + amin = m_vt.m_alpha.min; + amax = m_vt.m_alpha.max; } else if(context->ALPHA.C == 1) { diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 52a8dbe81e..3cc03c3a0c 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -143,12 +143,10 @@ protected: struct { - uint8* buff; - size_t stride; + GSVertex* buff; size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1 GSVector4 xy[4]; size_t xy_tail; - uint8* tmp; } m_vertex; struct @@ -157,26 +155,6 @@ protected: size_t tail; } m_index; - typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index); - - ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST] - - #define InitConvertVertex2(T, P) \ - m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex; \ - m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex; \ - m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex; \ - m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex; \ - - #define InitConvertVertex(T) \ - InitConvertVertex2(T, GS_POINTLIST) \ - InitConvertVertex2(T, GS_LINELIST) \ - InitConvertVertex2(T, GS_LINESTRIP) \ - InitConvertVertex2(T, GS_TRIANGLELIST) \ - InitConvertVertex2(T, GS_TRIANGLESTRIP) \ - InitConvertVertex2(T, GS_TRIANGLEFAN) \ - InitConvertVertex2(T, GS_SPRITE) \ - InitConvertVertex2(T, GS_INVALID) \ - void UpdateContext(); void UpdateScissor(); @@ -189,7 +167,7 @@ protected: // following functions need m_vt to be initialized - GSVertexTrace* m_vt; + GSVertexTrace m_vt; void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear); void GetAlphaMinMax(); @@ -213,7 +191,7 @@ public: bool m_nativeres; public: - GSState(GSVertexTrace* vt, size_t vertex_stride); + GSState(); virtual ~GSState(); void ResetHandlers(); diff --git a/plugins/GSdx/GSTextureFX11.cpp b/plugins/GSdx/GSTextureFX11.cpp index 354efd530f..5e7171b294 100644 --- a/plugins/GSdx/GSTextureFX11.cpp +++ b/plugins/GSdx/GSTextureFX11.cpp @@ -82,13 +82,6 @@ bool GSDevice11::CreateTextureFX() return true; } -void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) -{ - IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count); - IASetIndexBuffer(index, index_count); - IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim); -} - void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb) { hash_map::const_iterator i = m_vs.find(sel); @@ -118,6 +111,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb) {"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, {"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0}, {"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0}, {"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0}, }; diff --git a/plugins/GSdx/GSTextureFX9.cpp b/plugins/GSdx/GSTextureFX9.cpp index aff4a33d90..6173334121 100644 --- a/plugins/GSdx/GSTextureFX9.cpp +++ b/plugins/GSdx/GSTextureFX9.cpp @@ -61,13 +61,6 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix) return t; } -void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) -{ - IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count); - IASetIndexBuffer(index, index_count); - IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim); -} - void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb) { hash_map::const_iterator i = m_vs.find(sel); diff --git a/plugins/GSdx/GSVector.cpp b/plugins/GSdx/GSVector.cpp index 47e724966c..7a824cb9ad 100644 --- a/plugins/GSdx/GSVector.cpp +++ b/plugins/GSdx/GSVector.cpp @@ -22,6 +22,46 @@ #include "stdafx.h" #include "GSVector.h" +const GSVector4i GSVector4i::m_xff[16] = +{ + GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), +}; + +const GSVector4i GSVector4i::m_x0f[16] = +{ + GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), +}; + const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f); const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f); const GSVector4 GSVector4::m_half(0.5f); diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index 98afea5d9f..f066a11016 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -79,6 +79,9 @@ class GSVector4; __aligned(class, 16) GSVector4i { + static const GSVector4i m_xff[16]; + static const GSVector4i m_x0f[16]; + public: union { @@ -2343,6 +2346,9 @@ public: __forceinline static GSVector4i xfff8(const GSVector4i& v) {return xffffffff(v).sll16( 3);} __forceinline static GSVector4i xfffc(const GSVector4i& v) {return xffffffff(v).sll16( 2);} __forceinline static GSVector4i xfffe(const GSVector4i& v) {return xffffffff(v).sll16( 1);} + + __forceinline static GSVector4i xff(int n) {return m_xff[n];} + __forceinline static GSVector4i x0f(int n) {return m_x0f[n];} }; __aligned(class, 16) GSVector4 diff --git a/plugins/GSdx/GSVertex.h b/plugins/GSdx/GSVertex.h index 22a9e3c560..1976a82669 100644 --- a/plugins/GSdx/GSVertex.h +++ b/plugins/GSdx/GSVertex.h @@ -37,7 +37,8 @@ __aligned(struct, 32) GSVertex GIFRegST ST; GIFRegRGBAQ RGBAQ; GIFRegXYZ XYZ; - uint32 UV, FOG; + union {uint32 UV; struct {uint16 U, V;};}; + uint32 FOG; }; __m128i m[2]; diff --git a/plugins/GSdx/GSVertexHW.h b/plugins/GSdx/GSVertexHW.h index 6262e28804..0f39edb836 100644 --- a/plugins/GSdx/GSVertexHW.h +++ b/plugins/GSdx/GSVertexHW.h @@ -37,45 +37,4 @@ __aligned(struct, 32) GSVertexHW9 GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;} }; -__aligned(union, 32) GSVertexHW11 -{ - struct - { - union - { - struct {float x, y;} t; - GIFRegST ST; - }; - - union - { - union {struct {uint8 r, g, b, a; float q;}; uint32 c0;}; - GIFRegRGBAQ RGBAQ; - }; - - union - { - struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p; - GIFRegXYZ XYZ; - }; - - union - { - struct {uint32 _pad; union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;};}; - GIFRegFOG FOG; - }; - }; - - GSVertexHW11& operator = (GSVertexHW11& v) - { - GSVector4i* RESTRICT src = (GSVector4i*)&v; - GSVector4i* RESTRICT dst = (GSVector4i*)this; - - dst[0] = src[0]; - dst[1] = src[1]; - - return *this; - } -}; - #pragma pack(pop) diff --git a/plugins/GSdx/GSVertexTrace.cpp b/plugins/GSdx/GSVertexTrace.cpp index 413d5799aa..bfb0857c48 100644 --- a/plugins/GSdx/GSVertexTrace.cpp +++ b/plugins/GSdx/GSVertexTrace.cpp @@ -29,10 +29,38 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX); GSVertexTrace::GSVertexTrace(const GSState* state) : m_state(state) { + #define InitUpdate3(P, IIP, TME, FST, COLOR) \ + m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax; + + #define InitUpdate2(P, IIP, TME) \ + InitUpdate3(P, IIP, TME, 0, 0) \ + InitUpdate3(P, IIP, TME, 0, 1) \ + InitUpdate3(P, IIP, TME, 1, 0) \ + InitUpdate3(P, IIP, TME, 1, 1) \ + + #define InitUpdate(P) \ + InitUpdate2(P, 0, 0) \ + InitUpdate2(P, 0, 1) \ + InitUpdate2(P, 1, 0) \ + InitUpdate2(P, 1, 1) \ + + InitUpdate(GS_POINT_CLASS); + InitUpdate(GS_LINE_CLASS); + InitUpdate(GS_TRIANGLE_CLASS); + InitUpdate(GS_SPRITE_CLASS); } void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) { + m_primclass = primclass; + + uint32 iip = m_state->PRIM->IIP; + uint32 tme = m_state->PRIM->TME; + uint32 fst = m_state->PRIM->FST; + uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC); + + (this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, count); + m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); m_alpha.valid = false; @@ -82,90 +110,108 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, G } } -uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass) +template +void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count) { - m_primclass = primclass; - - uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4); - - if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC)) - { - hash |= 1 << 5; - } - - return hash; -} - -GSVertexTraceSW::GSVertexTraceSW(const GSState* state) - : GSVertexTrace(state) - , m_map("VertexTraceSW", NULL) -{ -} - -void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) -{ - m_map[Hash(primclass)](count, vertex, index, m_min, m_max); - - GSVertexTrace::Update(vertex, index, count, primclass); -} - -GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state) - : GSVertexTrace(state) - , m_map("VertexTraceHW9", NULL) -{ -} - -void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) -{ - m_map[Hash(primclass)](count, vertex, index, m_min, m_max); - const GSDrawingContext* context = m_state->m_context; - GSVector4 o(context->XYOFFSET); - GSVector4 s(1.0f / 16, 1.0f / 16, 1.0f, 1.0f); + bool sprite = primclass == GS_SPRITE_CLASS; - m_min.p = (m_min.p - o) * s; - m_max.p = (m_max.p - o) * s; + int n = 1; - if(m_state->PRIM->TME) + switch(primclass) { - if(m_state->PRIM->FST) - { - s = GSVector4(1 << (16 - 4), 1).xxyy(); - } - else - { - s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1); - } - - m_min.t *= s; - m_max.t *= s; + case GS_POINT_CLASS: + n = 1; + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + n = 2; + break; + case GS_TRIANGLE_CLASS: + n = 3; + break; } - GSVertexTrace::Update(vertex, index, count, primclass); -} + GSVector4 pmin = s_minmax.xxxx(); + GSVector4 pmax = s_minmax.yyyy(); + GSVector4 tmin = s_minmax.xxxx(); + GSVector4 tmax = s_minmax.yyyy(); + GSVector4i cmin = GSVector4i::xffffffff(); + GSVector4i cmax = GSVector4i::zero(); -GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state) - : GSVertexTrace(state) - , m_map("VertexTraceHW11", NULL) -{ -} + const GSVertex* RESTRICT v = (GSVertex*)vertex; -void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) -{ - m_map[Hash(primclass)](count, vertex, index, m_min, m_max); + for(int i = 0; i < count; i += n) + { + GSVector4 q; + GSVector4i f; - const GSDrawingContext* context = m_state->m_context; + if(sprite) + { + if(tme && !fst) + { + q = GSVector4::load(&v[index[i + 1]]).wwww(); + } + + f = GSVector4i(v[index[i + 1]].m[1]).wwww(); + } + + for(int j = 0; j < n; j++) + { + GSVector4i c(v[index[i + j]].m[0]); + + if(color && (iip || j == n - 1)) // TODO: unroll, to avoid j == n - 1 + { + cmin = cmin.min_u8(c); + cmax = cmax.max_u8(c); + } + + if(tme) + { + if(!fst) + { + GSVector4 stq = GSVector4::cast(c); + + GSVector4 q2 = !sprite ? stq.wwww() : q; + + stq = (stq.xyww() * q2.rcpnr()).xyww(q2); + + tmin = tmin.min(stq); + tmax = tmax.max(stq); + } + else + { + GSVector4i uv(v[index[i + j]].m[1]); + + GSVector4 st = GSVector4(uv.uph16()).xyxy(); + + tmin = tmin.min(st); + tmax = tmax.max(st); + } + } + + GSVector4i xyzf(v[index[i + j]].m[1]); + + GSVector4i xy = xyzf.upl16(); + GSVector4i z = xyzf.yyyy().srl32(1); + + GSVector4 p = GSVector4(xy.upl64(z.upl32(!sprite ? xyzf.wwww() : f))); + + pmin = pmin.min(p); + pmax = pmax.max(p); + } + } GSVector4 o(context->XYOFFSET); GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f); - m_min.p = (m_min.p - o) * s; - m_max.p = (m_max.p - o) * s; + m_min.p = (pmin - o) * s; + m_max.p = (pmax - o) * s; - if(m_state->PRIM->TME) + if(tme) { - if(m_state->PRIM->FST) + if(fst) { s = GSVector4(1 << (16 - 4), 1).xxyy(); } @@ -174,10 +220,13 @@ void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int coun s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1); } - m_min.t *= s; - m_max.t *= s; + m_min.t = tmin * s; + m_max.t = tmax * s; } - GSVertexTrace::Update(vertex, index, count, primclass); + if(color) + { + m_min.c = cmin.zzzz().u8to32(); + m_max.c = cmax.zzzz().u8to32(); + } } - diff --git a/plugins/GSdx/GSVertexTrace.h b/plugins/GSdx/GSVertexTrace.h index 4c0a5653b0..a5734fafc7 100644 --- a/plugins/GSdx/GSVertexTrace.h +++ b/plugins/GSdx/GSVertexTrace.h @@ -38,12 +38,15 @@ public: protected: const GSState* m_state; - uint32 Hash(GS_PRIM_CLASS primclass); - - typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max); - static const GSVector4 s_minmax; + typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count); + + FindMinMaxPtr m_fmm[2][2][2][2][4]; + + template + void FindMinMax(const void* vertex, const uint32* index, int count); + public: GS_PRIM_CLASS m_primclass; @@ -69,55 +72,7 @@ public: GSVertexTrace(const GSState* state); virtual ~GSVertexTrace() {} - virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); + void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); bool IsLinear() const {return m_filter.linear;} }; - -__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace -{ - class CG : public GSCodeGenerator - { - public: - CG(const void* param, uint32 key, void* code, size_t maxsize); - }; - - GSCodeGeneratorFunctionMap m_map; - -public: - GSVertexTraceSW(const GSState* state); - - void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); -}; - -__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace -{ - class CG : public GSCodeGenerator - { - public: - CG(const void* param, uint32 key, void* code, size_t maxsize); - }; - - GSCodeGeneratorFunctionMap m_map; - -public: - GSVertexTraceDX9(const GSState* state); - - void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); -}; - -__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace -{ - class CG : public GSCodeGenerator - { - public: - CG(const void* param, uint32 key, void* code, size_t maxsize); - }; - - GSCodeGeneratorFunctionMap m_map; - -public: - GSVertexTraceDX11(const GSState* state); - - void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); -}; diff --git a/plugins/GSdx/GSVertexTrace.x64.avx.cpp b/plugins/GSdx/GSVertexTrace.x64.avx.cpp deleted file mode 100644 index 880e5644e4..0000000000 --- a/plugins/GSdx/GSVertexTrace.x64.avx.cpp +++ /dev/null @@ -1,496 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSVertexTrace.h" - -#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], xmm6); - vmovdqa(ptr[rsp + 16], xmm7); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - mov(rax, (size_t)&s_minmax); - - vbroadcastss(xmm4, ptr[rax + 0]); - vbroadcastss(xmm5, ptr[rax + 4]); - - if(color) - { - // min.c = FLT_MAX; - // max.c = -FLT_MAX; - - vmovaps(xmm2, xmm4); - vmovaps(xmm3, xmm5); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - vmovaps(xmm6, xmm4); - vmovaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - vmovaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); - vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - } - - for(int j = 0; j < n; j++) - { - if(color && (iip || j == n - 1)) - { - // min.c = min.c.minv(v[i + j].c); - // max.c = max.c.maxv(v[i + j].c); - - vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]); - - vminps(xmm2, xmm0); - vmaxps(xmm3, xmm0); - } - - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]); - - vminps(xmm4, xmm0); - vmaxps(xmm5, xmm0); - - if(tme) - { - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); - - if(!fst) - { - if(primclass != GS_SPRITE_CLASS) - { - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - } - - vdivps(xmm0, xmm1); - vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0)); - } - - vminps(xmm6, xmm0); - vmaxps(xmm7, xmm0); - } - } - - add(rdx, n * sizeof(GSVertexSW)); - sub(ecx, n); - - jg("loop"); - - // } - - if(color) - { - vcvttps2dq(xmm2, xmm2); - vpsrld(xmm2, 7); - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2); - - vcvttps2dq(xmm3, xmm3); - vpsrld(xmm3, 7); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - vmovdqa(xmm6, ptr[rsp + 0]); - vmovdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - case GS_SPRITE_CLASS: - n = 6; - break; - } - - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], xmm6); - vmovdqa(ptr[rsp + 16], xmm7); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - mov(rax, (size_t)&s_minmax); - - vbroadcastss(xmm4, ptr[rax + 0]); - vbroadcastss(xmm5, ptr[rax + 4]); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - vpcmpeqd(xmm2, xmm2); - vpxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - vmovaps(xmm6, xmm4); - vmovaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - vmovaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); - vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - } - - for(int j = 0; j < n; j++) - { - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); - - vminps(xmm4, xmm0); - vmaxps(xmm5, xmm0); - - if(tme && !fst && primclass != GS_SPRITE_CLASS) - { - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - } - - if(color && (iip || j == n - 1) || tme) - { - vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]); - } - - if(color && (iip || j == n - 1)) - { - // min.c = min.c.min_u8(v[i + j].c); - // max.c = max.c.min_u8(v[i + j].c); - - vpminub(xmm2, xmm0); - vpmaxub(xmm3, xmm0); - } - - if(tme) - { - vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral - - if(!fst) - { - // t /= p.wwww(); - - vdivps(xmm0, xmm1); - } - - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - vminps(xmm6, xmm0); - vmaxps(xmm7, xmm0); - } - } - - add(rdx, n * sizeof(GSVertexHW9)); - sub(ecx, n); - - jg("loop"); - - // } - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm2, xmm2); - - vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm3, xmm3); - - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin; - // m_max.p = pmax; - - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin.xyww(pmin); - // m_max.t = tmax.xyww(pmax); - - vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - vmovdqa(xmm6, ptr[rsp + 0]); - vmovdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - sub(rsp, 8 + 2 * 16); - - vmovdqa(ptr[rsp + 0], xmm6); - vmovdqa(ptr[rsp + 16], xmm7); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - mov(rax, (size_t)&s_minmax); - - vbroadcastss(xmm4, ptr[rax + 0]); - vbroadcastss(xmm5, ptr[rax + 4]); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - vpcmpeqd(xmm2, xmm2); - vpxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - vmovaps(xmm6, xmm4); - vmovaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - align(16); - - L("loop"); - - for(int j = 0; j < n; j++) - { - if(color && (iip || j == n - 1) || tme) - { - vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]); - } - - if(color && (iip || j == n - 1)) - { - vpminub(xmm2, xmm0); - vpmaxub(xmm3, xmm0); - } - - if(tme) - { - if(!fst) - { - vmovaps(xmm1, xmm0); - } - - vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral - - if(!fst) - { - vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - vdivps(xmm0, xmm1); - vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q - } - - vminps(xmm6, xmm0); - vmaxps(xmm7, xmm0); - } - - vmovdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]); - vpmovzxwd(xmm1, xmm0); - - vpsrld(xmm0, 1); - vpunpcklqdq(xmm1, xmm0); - vcvtdq2ps(xmm1, xmm1); - - vminps(xmm4, xmm1); - vmaxps(xmm5, xmm1); - } - - add(rdx, n * sizeof(GSVertexHW11)); - sub(ecx, n); - - jg("loop"); - - // } - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm2, xmm2); - - vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm3, xmm3); - - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin.xyww(); - // m_max.p = pmax.xyww(); - - vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin; - // m_max.t = tmax; - - vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6); - vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - vmovdqa(xmm6, ptr[rsp + 0]); - vmovdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx/GSVertexTrace.x64.cpp b/plugins/GSdx/GSVertexTrace.x64.cpp deleted file mode 100644 index 8dfc6db296..0000000000 --- a/plugins/GSdx/GSVertexTrace.x64.cpp +++ /dev/null @@ -1,543 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSVertexTrace.h" - -#if _M_SSE < 0x500 && (defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - sub(rsp, 8 + 2 * 16); - - movdqa(ptr[rsp + 0], xmm6); - movdqa(ptr[rsp + 16], xmm7); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - mov(rax, (size_t)&s_minmax); - - movss(xmm4, ptr[rax + 0]); - movss(xmm5, ptr[rax + 4]); - - shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); - - if(color) - { - // min.c = FLT_MAX; - // max.c = -FLT_MAX; - - movaps(xmm2, xmm4); - movaps(xmm3, xmm5); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - movaps(xmm6, xmm4); - movaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - movaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - } - - for(int j = 0; j < n; j++) - { - if(color && (iip || j == n - 1)) - { - // min.c = min.c.minv(v[i + j].c); - // max.c = max.c.maxv(v[i + j].c); - - movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]); - - minps(xmm2, xmm0); - maxps(xmm3, xmm0); - } - - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]); - - minps(xmm4, xmm0); - maxps(xmm5, xmm0); - - if(tme) - { - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); - - if(!fst) - { - if(primclass != GS_SPRITE_CLASS) - { - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - } - - divps(xmm0, xmm1); - shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0)); - } - - minps(xmm6, xmm0); - maxps(xmm7, xmm0); - } - } - - add(rdx, n * sizeof(GSVertexSW)); - sub(rcx, n); - - jg("loop"); - - // } - - if(color) - { - cvttps2dq(xmm2, xmm2); - psrld(xmm2, 7); - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2); - - cvttps2dq(xmm3, xmm3); - psrld(xmm3, 7); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - movdqa(xmm6, ptr[rsp + 0]); - movdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - case GS_SPRITE_CLASS: - n = 6; - break; - } - - sub(rsp, 8 + 2 * 16); - - movdqa(ptr[rsp + 0], xmm6); - movdqa(ptr[rsp + 16], xmm7); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - mov(rax, (size_t)&s_minmax); - - movss(xmm4, ptr[rax + 0]); - movss(xmm5, ptr[rax + 16]); - - shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - pcmpeqd(xmm2, xmm2); - pxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - movaps(xmm6, xmm4); - movaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - movaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - } - - for(int j = 0; j < n; j++) - { - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); - - minps(xmm4, xmm0); - maxps(xmm5, xmm0); - - if(tme && !fst && primclass != GS_SPRITE_CLASS) - { - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - } - - if(color && (iip || j == n - 1) || tme) - { - movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]); - } - - if(color && (iip || j == n - 1)) - { - // min.c = min.c.min_u8(v[i + j].c); - // max.c = max.c.min_u8(v[i + j].c); - - pminub(xmm2, xmm0); - pmaxub(xmm3, xmm0); - } - - if(tme) - { - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral - - if(!fst) - { - // t /= p.wwww(); - - divps(xmm0, xmm1); - } - - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - minps(xmm6, xmm0); - maxps(xmm7, xmm0); - } - } - - add(rdx, n * sizeof(GSVertexHW9)); - sub(ecx, n); - - jg("loop"); - - // } - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - if(m_cpu.has(util::Cpu::tSSE41)) - { - pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm2, xmm2); - - pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm3, xmm3); - } - else - { - pxor(xmm0, xmm0); - - punpckhbw(xmm2, xmm0); - punpcklwd(xmm2, xmm0); - - punpckhbw(xmm3, xmm0); - punpcklwd(xmm3, xmm0); - } - - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin; - // m_max.p = pmax; - - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin.xyww(pmin); - // m_max.t = tmax.xyww(pmax); - - shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - movdqa(xmm6, ptr[rsp + 0]); - movdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - sub(rsp, 8 + 2 * 16); - - movdqa(ptr[rsp + 0], xmm6); - movdqa(ptr[rsp + 16], xmm7); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - mov(rax, (size_t)&s_minmax); - - movss(xmm4, ptr[rax + 0]); - movss(xmm5, ptr[rax + 16]); - - shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - pcmpeqd(xmm2, xmm2); - pxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - movaps(xmm6, xmm4); - movaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - align(16); - - L("loop"); - - for(int j = 0; j < n; j++) - { - if(color && (iip || j == n - 1) || tme) - { - movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]); - } - - if(color && (iip || j == n - 1)) - { - pminub(xmm2, xmm0); - pmaxub(xmm3, xmm0); - } - - if(tme) - { - if(!fst) - { - movaps(xmm1, xmm0); - } - - shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral - - if(!fst) - { - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - divps(xmm0, xmm1); - shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q - } - - minps(xmm6, xmm0); - maxps(xmm7, xmm0); - } - - movdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]); - - if(m_cpu.has(util::Cpu::tSSE41)) - { - pmovzxwd(xmm1, xmm0); - } - else - { - movdqa(xmm1, xmm0); - punpcklwd(xmm1, xmm1); - psrld(xmm1, 16); - } - - psrld(xmm0, 1); - punpcklqdq(xmm1, xmm0); - cvtdq2ps(xmm1, xmm1); - - minps(xmm4, xmm1); - maxps(xmm5, xmm1); - } - - add(rdx, n * sizeof(GSVertexHW11)); - sub(ecx, n); - - jg("loop"); - - // } - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - if(m_cpu.has(util::Cpu::tSSE41)) - { - pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm2, xmm2); - - pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm3, xmm3); - } - else - { - pxor(xmm0, xmm0); - - punpckhbw(xmm2, xmm0); - punpcklwd(xmm2, xmm0); - - punpckhbw(xmm3, xmm0); - punpcklwd(xmm3, xmm0); - } - - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin.xyww(); - // m_max.p = pmax.xyww(); - - shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin; - // m_max.t = tmax; - - movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6); - movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - movdqa(xmm6, ptr[rsp + 0]); - movdqa(xmm7, ptr[rsp + 16]); - - add(rsp, 8 + 2 * 16); - - ret(); -} - -#endif diff --git a/plugins/GSdx/GSVertexTrace.x86.avx.cpp b/plugins/GSdx/GSVertexTrace.x86.avx.cpp deleted file mode 100644 index 560680285f..0000000000 --- a/plugins/GSdx/GSVertexTrace.x86.avx.cpp +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSVertexTrace.h" - -#if _M_SSE >= 0x500 && !(defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -static const int _args = 4; -static const int _count = _args + 4; // rcx -static const int _vertex = _args + 8; // rdx -static const int _index = _args + 12; // r8 -static const int _min = _args + 16; // r9 -static const int _max = _args + 20; // _args + 4 - -GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - push(ebx); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - vbroadcastss(xmm4, ptr[&s_minmax.x]); - vbroadcastss(xmm5, ptr[&s_minmax.y]); - - if(color) - { - // min.c = FLT_MAX; - // max.c = -FLT_MAX; - - vmovaps(xmm2, xmm4); - vmovaps(xmm3, xmm5); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - vmovaps(xmm6, xmm4); - vmovaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - mov(edx, dword[esp + _vertex]); - mov(ebx, dword[esp + _index]); - mov(ecx, dword[esp + _count]); - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - mov(eax, ptr[ebx + 1 * sizeof(uint32)]); - shl(eax, 6); // * sizeof(GSVertexSW) - - vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]); - vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - } - - for(int j = 0; j < n; j++) - { - mov(eax, ptr[ebx + j * sizeof(uint32)]); - shl(eax, 6); // * sizeof(GSVertexSW) - - if(color && (iip || j == n - 1)) - { - // min.c = min.c.minv(v[i + j].c); - // max.c = max.c.maxv(v[i + j].c); - - vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]); - - vminps(xmm2, xmm0); - vmaxps(xmm3, xmm0); - } - - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]); - - vminps(xmm4, xmm0); - vmaxps(xmm5, xmm0); - - if(tme) - { - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]); - - if(!fst) - { - if(primclass != GS_SPRITE_CLASS) - { - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - } - - vdivps(xmm0, xmm1); - vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0)); - } - - vminps(xmm6, xmm0); - vmaxps(xmm7, xmm0); - } - } - - add(ebx, n * sizeof(uint32)); - sub(ecx, n); - - jg("loop"); - - // } - - mov(eax, dword[esp + _min]); - mov(edx, dword[esp + _max]); - - if(color) - { - vcvttps2dq(xmm2, xmm2); - vpsrld(xmm2, 7); - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2); - - vcvttps2dq(xmm3, xmm3); - vpsrld(xmm3, 7); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - pop(ebx); - - ret(); -} - -GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_SPRITE_CLASS: - case GS_LINE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - push(ebx); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - vbroadcastss(xmm4, ptr[&s_minmax.x]); - vbroadcastss(xmm5, ptr[&s_minmax.y]); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - vpcmpeqd(xmm2, xmm2); - vpxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - vmovaps(xmm6, xmm4); - vmovaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - mov(edx, dword[esp + _vertex]); - mov(ebx, dword[esp + _index]); - mov(ecx, dword[esp + _count]); - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - mov(eax, ptr[ebx + 1 * sizeof(uint32)]); - shl(eax, 5); // * sizeof(GSVertexHW9) - - vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]); - vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - } - - for(int j = 0; j < n; j++) - { - mov(eax, ptr[ebx + j * sizeof(uint32)]); - shl(eax, 5); // * sizeof(GSVertexHW9) - - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]); - - vminps(xmm4, xmm0); - vmaxps(xmm5, xmm0); - - if(tme && !fst && primclass != GS_SPRITE_CLASS) - { - vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - } - - if(color && (iip || j == n - 1) || tme) - { - vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]); - } - - if(color && (iip || j == n - 1)) - { - // min.c = min.c.min_u8(v[i + j].c); - // max.c = max.c.min_u8(v[i + j].c); - - vpminub(xmm2, xmm0); - vpmaxub(xmm3, xmm0); - } - - if(tme) - { - vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral - - if(!fst) - { - // t /= p.wwww(); - - vdivps(xmm0, xmm1); - } - - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - vminps(xmm6, xmm0); - vmaxps(xmm7, xmm0); - } - } - - add(ebx, n * sizeof(uint32)); - sub(ecx, n); - - jg("loop"); - - // } - - mov(eax, dword[esp + _min]); - mov(edx, dword[esp + _max]); - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm2, xmm2); - - vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm3, xmm3); - - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin; - // m_max.p = pmax; - - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin.xyww(pmin); - // m_max.t = tmax.xyww(pmax); - - vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - pop(ebx); - - ret(); -} - -GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - push(ebx); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - vbroadcastss(xmm4, ptr[&s_minmax.x]); - vbroadcastss(xmm5, ptr[&s_minmax.y]); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - vpcmpeqd(xmm2, xmm2); - vpxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - vmovaps(xmm6, xmm4); - vmovaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - mov(edx, dword[esp + _vertex]); - mov(ebx, dword[esp + _index]); - mov(ecx, dword[esp + _count]); - - align(16); - - L("loop"); - - for(int j = 0; j < n; j++) - { - mov(eax, ptr[ebx + j * sizeof(uint32)]); - shl(eax, 5); // * sizeof(GSVertexHW11) - - if(color && (iip || j == n - 1) || tme) - { - vmovaps(xmm0, ptr[edx + eax]); - } - - if(color && (iip || j == n - 1)) - { - vpminub(xmm2, xmm0); - vpmaxub(xmm3, xmm0); - } - - if(tme) - { - if(!fst) - { - vmovaps(xmm1, xmm0); - } - - vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral - - if(!fst) - { - vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - vdivps(xmm0, xmm1); - vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q - } - - vminps(xmm6, xmm0); - vmaxps(xmm7, xmm0); - } - - vmovdqa(xmm0, ptr[edx + eax + 16]); - vpmovzxwd(xmm1, xmm0); - - vpsrld(xmm0, 1); - vpunpcklqdq(xmm1, xmm0); - vcvtdq2ps(xmm1, xmm1); - - vminps(xmm4, xmm1); - vmaxps(xmm5, xmm1); - } - - add(ebx, n * sizeof(uint32)); - sub(ecx, n); - - jg("loop"); - - // } - - mov(eax, dword[esp + _min]); - mov(edx, dword[esp + _max]); - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm2, xmm2); - - vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - vpmovzxbd(xmm3, xmm3); - - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin.xyww(); - // m_max.p = pmax.xyww(); - - vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin; - // m_max.t = tmax; - - vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6); - vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - pop(ebx); - - ret(); -} - -#endif diff --git a/plugins/GSdx/GSVertexTrace.x86.cpp b/plugins/GSdx/GSVertexTrace.x86.cpp deleted file mode 100644 index 58d4df2daa..0000000000 --- a/plugins/GSdx/GSVertexTrace.x86.cpp +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright (C) 2007-2009 Gabest - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#include "stdafx.h" -#include "GSVertexTrace.h" - -#if _M_SSE < 0x500 && !(defined(_M_AMD64) || defined(_WIN64)) - -using namespace Xbyak; - -static const int _args = 4; -static const int _count = _args + 4; // rcx -static const int _vertex = _args + 8; // rdx -static const int _index = _args + 12; // r8 -static const int _min = _args + 16; // r9 -static const int _max = _args + 20; // _args + 4 - -GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - push(ebx); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - movss(xmm4, ptr[&s_minmax.x]); - movss(xmm5, ptr[&s_minmax.y]); - - shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); - - if(color) - { - // min.c = FLT_MAX; - // max.c = -FLT_MAX; - - movaps(xmm2, xmm4); - movaps(xmm3, xmm5); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - movaps(xmm6, xmm4); - movaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - mov(edx, dword[esp + _vertex]); - mov(ebx, dword[esp + _index]); - mov(ecx, dword[esp + _count]); - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - mov(eax, ptr[ebx + 1 * sizeof(uint32)]); - shl(eax, 6); // * sizeof(GSVertexSW) - - movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - } - - for(int j = 0; j < n; j++) - { - mov(eax, ptr[ebx + j * sizeof(uint32)]); - shl(eax, 6); // * sizeof(GSVertexSW) - - if(color && (iip || j == n - 1)) - { - // min.c = min.c.minv(v[i + j].c); - // max.c = max.c.maxv(v[i + j].c); - - movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]); - - minps(xmm2, xmm0); - maxps(xmm3, xmm0); - } - - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]); - - minps(xmm4, xmm0); - maxps(xmm5, xmm0); - - if(tme) - { - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]); - - if(!fst) - { - if(primclass != GS_SPRITE_CLASS) - { - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - } - - divps(xmm0, xmm1); - shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0)); - } - - minps(xmm6, xmm0); - maxps(xmm7, xmm0); - } - } - - add(ebx, n * sizeof(uint32)); - sub(ecx, n); - - jg("loop"); - - // } - - mov(eax, dword[esp + _min]); - mov(edx, dword[esp + _max]); - - if(color) - { - cvttps2dq(xmm2, xmm2); - psrld(xmm2, 7); - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2); - - cvttps2dq(xmm3, xmm3); - psrld(xmm3, 7); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - pop(ebx); - - ret(); -} - -GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - case GS_SPRITE_CLASS: - n = 6; - break; - } - - push(ebx); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - movss(xmm4, ptr[&s_minmax.x]); - movss(xmm5, ptr[&s_minmax.y]); - - shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - pcmpeqd(xmm2, xmm2); - pxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - movaps(xmm6, xmm4); - movaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - mov(edx, dword[esp + _vertex]); - mov(ebx, dword[esp + _index]); - mov(ecx, dword[esp + _count]); - - align(16); - - L("loop"); - - if(tme && !fst && primclass == GS_SPRITE_CLASS) - { - mov(eax, ptr[ebx + 1 * sizeof(uint32)]); - shl(eax, 5); // * sizeof(GSVertexHW9) - - movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - } - - for(int j = 0; j < n; j++) - { - mov(eax, ptr[ebx + j * sizeof(uint32)]); - shl(eax, 5); // * sizeof(GSVertexHW9) - - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]); - - minps(xmm4, xmm0); - maxps(xmm5, xmm0); - - if(tme && !fst && primclass != GS_SPRITE_CLASS) - { - movaps(xmm1, xmm0); - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - } - - if(color && (iip || j == n - 1) || tme) - { - movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]); - } - - if(color && (iip || j == n - 1)) - { - // min.c = min.c.min_u8(v[i + j].c); - // max.c = max.c.min_u8(v[i + j].c); - - pminub(xmm2, xmm0); - pmaxub(xmm3, xmm0); - } - - if(tme) - { - shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral - - if(!fst) - { - // t /= p.wwww(); - - divps(xmm0, xmm1); - } - - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - minps(xmm6, xmm0); - maxps(xmm7, xmm0); - } - } - - add(ebx, n * sizeof(uint32)); - sub(ecx, n); - - jg("loop"); - - // } - - mov(eax, dword[esp + _min]); - mov(edx, dword[esp + _max]); - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - if(m_cpu.has(util::Cpu::tSSE41)) - { - pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm2, xmm2); - - pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm3, xmm3); - } - else - { - pxor(xmm0, xmm0); - - punpckhbw(xmm2, xmm0); - punpcklwd(xmm2, xmm0); - - punpckhbw(xmm3, xmm0); - punpcklwd(xmm3, xmm0); - } - - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin; - // m_max.p = pmax; - - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin.xyww(pmin); - // m_max.t = tmax.xyww(pmax); - - shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - pop(ebx); - - ret(); -} - -GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) - : GSCodeGenerator(code, maxsize) -{ - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 fst = (key >> 4) & 1; - uint32 color = (key >> 5) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - push(ebx); - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - movss(xmm4, ptr[&s_minmax.x]); - movss(xmm5, ptr[&s_minmax.y]); - - shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); - - if(color) - { - // min.c = 0xffffffff; - // max.c = 0; - - pcmpeqd(xmm2, xmm2); - pxor(xmm3, xmm3); - } - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - movaps(xmm6, xmm4); - movaps(xmm7, xmm5); - } - - // for(int i = 0; i < count; i += step) { - - mov(edx, dword[esp + _vertex]); - mov(ebx, dword[esp + _index]); - mov(ecx, dword[esp + _count]); - - align(16); - - L("loop"); - - for(int j = 0; j < n; j++) - { - mov(eax, ptr[ebx + j * sizeof(uint32)]); - shl(eax, 5); // * sizeof(GSVertexHW11) - - if(color && (iip || j == n - 1) || tme) - { - movaps(xmm0, ptr[edx + eax]); - } - - if(color && (iip || j == n - 1)) - { - pminub(xmm2, xmm0); - pmaxub(xmm3, xmm0); - } - - if(tme) - { - if(!fst) - { - movaps(xmm1, xmm0); - } - - shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral - - if(!fst) - { - shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - divps(xmm0, xmm1); - shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q - } - - minps(xmm6, xmm0); - maxps(xmm7, xmm0); - } - - movdqa(xmm0, ptr[edx + eax + 16]); - - if(m_cpu.has(util::Cpu::tSSE41)) - { - pmovzxwd(xmm1, xmm0); - } - else - { - movdqa(xmm1, xmm0); - punpcklwd(xmm1, xmm1); - psrld(xmm1, 16); - } - - psrld(xmm0, 1); - punpcklqdq(xmm1, xmm0); - cvtdq2ps(xmm1, xmm1); - - minps(xmm4, xmm1); - maxps(xmm5, xmm1); - } - - add(ebx, n * sizeof(uint32)); - sub(ecx, n); - - jg("loop"); - - // } - - mov(eax, dword[esp + _min]); - mov(edx, dword[esp + _max]); - - if(color) - { - // m_min.c = cmin.zzzz().u8to32(); - // m_max.c = cmax.zzzz().u8to32(); - - if(m_cpu.has(util::Cpu::tSSE41)) - { - pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm2, xmm2); - - pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); - pmovzxbd(xmm3, xmm3); - } - else - { - pxor(xmm0, xmm0); - - punpckhbw(xmm2, xmm0); - punpcklwd(xmm2, xmm0); - - punpckhbw(xmm3, xmm0); - punpcklwd(xmm3, xmm0); - } - - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3); - } - - // m_min.p = pmin.xyww(); - // m_max.p = pmax.xyww(); - - shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); - shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); - - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5); - - if(tme) - { - // m_min.t = tmin; - // m_max.t = tmax; - - movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6); - movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); - } - - pop(ebx); - - ret(); -} - -#endif \ No newline at end of file diff --git a/plugins/GSdx/GSdx.vcxproj b/plugins/GSdx/GSdx.vcxproj index b33203905f..fb69ea004e 100644 --- a/plugins/GSdx/GSdx.vcxproj +++ b/plugins/GSdx/GSdx.vcxproj @@ -618,62 +618,6 @@ - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - true - true - true - true - - - true - true - true - true - true - true - true - true - true - true - Create diff --git a/plugins/GSdx/GSdx.vcxproj.filters b/plugins/GSdx/GSdx.vcxproj.filters index 53417912be..a2bc719e26 100644 --- a/plugins/GSdx/GSdx.vcxproj.filters +++ b/plugins/GSdx/GSdx.vcxproj.filters @@ -288,18 +288,6 @@ Source Files - - Source Files - - - Source Files - - - Source Files - - - Source Files - Source Files diff --git a/plugins/GSdx/GSdx_vs2008.vcproj b/plugins/GSdx/GSdx_vs2008.vcproj index 67186e4337..30da187049 100644 --- a/plugins/GSdx/GSdx_vs2008.vcproj +++ b/plugins/GSdx/GSdx_vs2008.vcproj @@ -1244,110 +1244,6 @@ RelativePath=".\GSVertexTrace.cpp" > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/plugins/GSdx/res/tfx.fx b/plugins/GSdx/res/tfx.fx index 3ce182dcea..2372bbf1d4 100644 --- a/plugins/GSdx/res/tfx.fx +++ b/plugins/GSdx/res/tfx.fx @@ -40,11 +40,12 @@ struct VS_INPUT { + float2 st : TEXCOORD0; + float4 c : COLOR0; + float q : TEXCOORD1; uint2 p : POSITION0; uint z : POSITION1; - float2 t : TEXCOORD0; - float q : TEXCOORD1; - float4 c : COLOR0; + uint2 uv : TEXCOORD2; float4 f : COLOR1; }; @@ -602,12 +603,12 @@ VS_OUTPUT vs_main(VS_INPUT input) { if(VS_FST) { - output.t.xy = input.t * TextureScale; + output.t.xy = input.uv * TextureScale; output.t.w = 1.0f; } else { - output.t.xy = input.t; + output.t.xy = input.st; output.t.w = input.q; } }