From 6c58f3039a7b93c99cfc77f169fa12fed0352e9c Mon Sep 17 00:00:00 2001 From: gabest11 Date: Tue, 26 May 2009 03:40:31 +0000 Subject: [PATCH] GSdx: bit more work on the vertex buffer, and broken ffxii fmv fixed again. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1266 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDevice10.cpp | 97 +++++++++---- plugins/GSdx/GSDevice10.h | 11 +- plugins/GSdx/GSDevice9.cpp | 133 ++++++++++------- plugins/GSdx/GSDevice9.h | 10 +- plugins/GSdx/GSRendererHW.h | 12 +- plugins/GSdx/GSRendererHW10.cpp | 8 +- plugins/GSdx/GSRendererHW9.cpp | 35 +---- plugins/GSdx/GSRendererSW.cpp | 168 ++++++++++++++++++++++ plugins/GSdx/GSRendererSW.h | 168 +--------------------- plugins/GSdx/GSSetupPrimCodeGenerator.cpp | 2 - plugins/GSdx/GSState.cpp | 2 +- plugins/GSdx/GSTextureFX10.cpp | 63 +------- plugins/GSdx/GSTextureFX9.cpp | 57 +------- plugins/GSdx/GSTextureFX9.h | 6 - plugins/GSdx/GSVector.h | 39 ++++- 15 files changed, 398 insertions(+), 413 deletions(-) diff --git a/plugins/GSdx/GSDevice10.cpp b/plugins/GSdx/GSDevice10.cpp index f492eef496..bf22deb23d 100644 --- a/plugins/GSdx/GSDevice10.cpp +++ b/plugins/GSdx/GSDevice10.cpp @@ -44,6 +44,11 @@ GSDevice10::GSDevice10() { memset(m_ps_srv, 0, sizeof(m_ps_srv)); memset(m_ps_ss, 0, sizeof(m_ps_ss)); + + m_vertices.stride = 0; + m_vertices.start = 0; + m_vertices.count = 0; + m_vertices.limit = 0; } GSDevice10::~GSDevice10() @@ -115,16 +120,6 @@ bool GSDevice10::Create(HWND hWnd, bool vsync) hr = CompileShader(IDR_CONVERT10_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]); } - memset(&bd, 0, sizeof(bd)); - - bd.Usage = D3D10_USAGE_DEFAULT; - bd.BindFlags = D3D10_BIND_VERTEX_BUFFER; - bd.CPUAccessFlags = 0; - bd.MiscFlags = 0; - bd.ByteWidth = 4 * sizeof(GSVertexPT1); - - hr = m_dev->CreateBuffer(&bd, NULL, &m_convert.vb); - memset(&dsd, 0, sizeof(dsd)); dsd.DepthEnable = false; @@ -258,6 +253,11 @@ void GSDevice10::BeginScene() { } +void GSDevice10::DrawPrimitive() +{ + m_dev->Draw(m_vertices.count, m_vertices.start); +} + void GSDevice10::EndScene() { PSSetShaderResources(NULL, NULL); @@ -265,6 +265,9 @@ void GSDevice10::EndScene() // not clearing the rt/ds gives a little fps boost in complex games (5-10%) // OMSetRenderTargets(NULL, NULL); + + m_vertices.start += m_vertices.count; + m_vertices.count = 0; } void GSDevice10::ClearRenderTarget(GSTexture* t, const GSVector4& c) @@ -439,11 +442,7 @@ void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sr.z, sr.w)}, }; - D3D10_BOX box = {0, 0, 0, sizeof(vertices), 1, 1}; - - m_dev->UpdateSubresource(m_convert.vb, 0, &box, vertices, 0, 0); - - IASetVertexBuffer(m_convert.vb, sizeof(vertices[0])); + IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); IASetInputLayout(m_convert.il); IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); @@ -467,7 +466,7 @@ void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, // - DrawPrimitive(countof(vertices)); + DrawPrimitive(); // @@ -508,6 +507,61 @@ void GSDevice10::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool line StretchRect(st, sr, dt, dr, m_interlace.ps[shader], m_interlace.cb, linear); } +void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t count) +{ + ASSERT(m_vertices.count == 0); + + if(count > m_vertices.limit) + { + m_vertices.vb_old = m_vertices.vb; + m_vertices.vb = NULL; + m_vertices.start = 0; + m_vertices.count = 0; + m_vertices.limit = max(count * 3 / 2, 10000); + } + + if(m_vertices.vb == NULL) + { + D3D10_BUFFER_DESC bd; + + memset(&bd, 0, sizeof(bd)); + + bd.Usage = D3D10_USAGE_DYNAMIC; + bd.ByteWidth = m_vertices.limit * stride; + bd.BindFlags = D3D10_BIND_VERTEX_BUFFER; + bd.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE; + + HRESULT hr; + + hr = m_dev->CreateBuffer(&bd, NULL, &m_vertices.vb); + + if(FAILED(hr)) return; + } + + D3D10_MAP type = D3D10_MAP_WRITE_NO_OVERWRITE; + + if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride) + { + m_vertices.start = 0; + + type = D3D10_MAP_WRITE_DISCARD; + } + + void* v = NULL; + + if(SUCCEEDED(m_vertices.vb->Map(type, 0, &v))) + { + GSVector4i::storent((uint8*)v + m_vertices.start * stride, vertices, count * stride); + + m_vertices.vb->Unmap(); + } + + m_vertices.count = count; + m_vertices.stride = stride; + + IASetVertexBuffer(m_vertices.vb, stride); +} + void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride) { if(m_vb != vb || m_vb_stride != stride) @@ -688,11 +742,6 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds) } } -void GSDevice10::DrawPrimitive(uint32 count, uint32 start) -{ - m_dev->Draw(count, start); -} - HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_MACRO* macro, ID3D10VertexShader** ps, D3D10_INPUT_ELEMENT_DESC* layout, int count, ID3D10InputLayout** il) { HRESULT hr; @@ -703,7 +752,7 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M if(error) { - TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); + printf("%s\n", (const char*)error->GetBufferPointer()); } if(FAILED(hr)) @@ -738,7 +787,7 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M if(error) { - TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); + printf("%s\n", (const char*)error->GetBufferPointer()); } if(FAILED(hr)) @@ -766,7 +815,7 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M if(error) { - TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); + printf("%s\n", (const char*)error->GetBufferPointer()); } if(FAILED(hr)) diff --git a/plugins/GSdx/GSDevice10.h b/plugins/GSdx/GSDevice10.h index cd5266acc7..38793b42b6 100644 --- a/plugins/GSdx/GSDevice10.h +++ b/plugins/GSdx/GSDevice10.h @@ -58,14 +58,18 @@ private: CComPtr m_dev; CComPtr m_swapchain; - CComPtr m_font; + + struct + { + CComPtr vb, vb_old; + size_t stride, start, count, limit; + } m_vertices; public: // TODO CComPtr m_rs; struct { - CComPtr vb; CComPtr il; CComPtr vs; CComPtr ps[7]; @@ -98,6 +102,7 @@ public: void Flip(); void BeginScene(); + void DrawPrimitive(); void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); @@ -116,6 +121,7 @@ public: void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, ID3D10BlendState* bs, bool linear = true); + void IASetVertexBuffer(const void* vertices, size_t stride, size_t count); void IASetVertexBuffer(ID3D10Buffer* vb, size_t stride); void IASetInputLayout(ID3D10InputLayout* layout); void IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology); @@ -128,7 +134,6 @@ public: void OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref); void OMSetBlendState(ID3D10BlendState* bs, float bf); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds); - void DrawPrimitive(uint32 count, uint32 start = 0); ID3D10Device* operator->() {return m_dev;} operator ID3D10Device*() {return m_dev;} diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 0da5457c6d..e84029c303 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -48,6 +48,11 @@ GSDevice9::GSDevice9() memset(&m_ddcaps, 0, sizeof(m_ddcaps)); memset(&m_d3dcaps, 0, sizeof(m_d3dcaps)); memset(m_ps_srvs, 0, sizeof(m_ps_srvs)); + + m_vertices.stride = 0; + m_vertices.start = 0; + m_vertices.count = 0; + m_vertices.limit = 0; } GSDevice9::~GSDevice9() @@ -143,8 +148,6 @@ bool GSDevice9::Create(HWND hWnd, bool vsync) CompileShader(IDR_CONVERT9_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]); } - m_dev->CreateVertexBuffer(4 * sizeof(GSVertexPT1), D3DUSAGE_DYNAMIC, 0, D3DPOOL_DEFAULT, &m_convert.vb, NULL); - m_convert.dss.DepthEnable = false; m_convert.dss.StencilEnable = false; @@ -364,9 +367,39 @@ void GSDevice9::BeginScene() m_dev->BeginScene(); } +void GSDevice9::DrawPrimitive() +{ + int prims = 0; + + switch(m_topology) + { + case D3DPT_TRIANGLELIST: + prims = m_vertices.count / 3; + break; + case D3DPT_LINELIST: + prims = m_vertices.count / 2; + break; + case D3DPT_POINTLIST: + prims = m_vertices.count; + break; + case D3DPT_TRIANGLESTRIP: + case D3DPT_TRIANGLEFAN: + prims = m_vertices.count - 2; + break; + case D3DPT_LINESTRIP: + prims = m_vertices.count - 1; + break; + } + + m_dev->DrawPrimitive(m_topology, m_vertices.start, prims); +} + void GSDevice9::EndScene() { m_dev->EndScene(); + + m_vertices.start += m_vertices.count; + m_vertices.count = 0; } void GSDevice9::ClearRenderTarget(GSTexture* t, const GSVector4& c) @@ -550,16 +583,7 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c vertices[i].p.y += 1.0f / ds.y; } - void* buff = NULL; - - if(SUCCEEDED(m_convert.vb->Lock(0, 0, &buff, D3DLOCK_DISCARD))) - { - memcpy(buff, vertices, sizeof(vertices)); - - m_convert.vb->Unlock(); - } - - IASetVertexBuffer(m_convert.vb, sizeof(vertices[0])); + IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); IASetInputLayout(m_convert.il); IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); @@ -579,7 +603,7 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c // - DrawPrimitive(countof(vertices)); + DrawPrimitive(); // @@ -620,6 +644,52 @@ void GSDevice9::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linea StretchRect(st, sr, dt, dr, m_interlace.ps[shader], (const float*)&cb, 1, linear); } +void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t count) +{ + ASSERT(m_vertices.count == 0); + + if(count > m_vertices.limit) + { + m_vertices.vb_old = m_vertices.vb; + m_vertices.vb = NULL; + m_vertices.start = 0; + m_vertices.count = 0; + m_vertices.limit = max(count * 3 / 2, 10000); + } + + if(m_vertices.vb == NULL) + { + HRESULT hr; + + hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC, 0, D3DPOOL_DEFAULT, &m_vertices.vb, NULL); + + if(FAILED(hr)) return; + } + + uint32 flags = D3DLOCK_NOOVERWRITE; + + if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride) + { + m_vertices.start = 0; + + flags = D3DLOCK_DISCARD; + } + + void* v = NULL; + + if(SUCCEEDED(m_vertices.vb->Lock(m_vertices.start * stride, count * stride, &v, flags))) + { + GSVector4i::storent(v, vertices, count * stride); + + m_vertices.vb->Unlock(); + } + + m_vertices.count = count; + m_vertices.stride = stride; + + IASetVertexBuffer(m_vertices.vb, stride); +} + void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) { if(m_vb != vb || m_vb_stride != stride) @@ -846,33 +916,6 @@ void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds) } } -void GSDevice9::DrawPrimitive(uint32 count, uint32 start) -{ - int prims = 0; - - switch(m_topology) - { - case D3DPT_TRIANGLELIST: - prims = count / 3; - break; - case D3DPT_LINELIST: - prims = count / 2; - break; - case D3DPT_POINTLIST: - prims = count; - break; - case D3DPT_TRIANGLESTRIP: - case D3DPT_TRIANGLEFAN: - prims = count - 2; - break; - case D3DPT_LINESTRIP: - prims = count - 1; - break; - } - - m_dev->DrawPrimitive(m_topology, start, prims); -} - // FIXME: D3DXCompileShaderFromResource of d3dx9 v37 (march 2008) calls GetFullPathName on id for some reason and then crashes static HRESULT LoadShader(uint32 id, LPCSTR& data, uint32& size) @@ -934,9 +977,7 @@ HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO } else if(error) { - LPCSTR msg = (LPCSTR)error->GetBufferPointer(); - - TRACE(_T("%s\n"), CString(msg)); + printf("%s\n", (const char*)error->GetBufferPointer()); } ASSERT(SUCCEEDED(hr)); @@ -993,14 +1034,10 @@ HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO if(SUCCEEDED(hr)) { hr = m_dev->CreatePixelShader((DWORD*)shader->GetBufferPointer(), ps); - - ASSERT(SUCCEEDED(hr)); } else if(error) { - LPCSTR msg = (LPCSTR)error->GetBufferPointer(); - - TRACE(_T("%s\n"), CString(msg)); + printf("%s\n", (const char*)error->GetBufferPointer()); } ASSERT(SUCCEEDED(hr)); diff --git a/plugins/GSdx/GSDevice9.h b/plugins/GSdx/GSDevice9.h index dc13b3d894..a9a1123341 100644 --- a/plugins/GSdx/GSDevice9.h +++ b/plugins/GSdx/GSDevice9.h @@ -96,13 +96,18 @@ private: CComPtr m_dev; CComPtr m_swapchain; + struct + { + CComPtr vb, vb_old; + size_t stride, start, count, limit; + } m_vertices; + public: // TODO D3DPRESENT_PARAMETERS m_pp; CComPtr m_font; struct { - CComPtr vb; CComPtr il; CComPtr vs; CComPtr ps[7]; @@ -134,6 +139,7 @@ public: void Flip(); void BeginScene(); + void DrawPrimitive(); void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); @@ -154,6 +160,7 @@ public: void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true); + void IASetVertexBuffer(const void* vertices, size_t stride, size_t count); void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride); void IASetInputLayout(IDirect3DVertexDeclaration9* layout); void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology); @@ -165,7 +172,6 @@ public: void OMSetDepthStencilState(Direct3DDepthStencilState9* dss, uint32 sref); void OMSetBlendState(Direct3DBlendState9* bs, uint32 bf); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds); - void DrawPrimitive(uint32 count, uint32 start = 0); IDirect3DDevice9* operator->() {return m_dev;} operator IDirect3DDevice9*() {return m_dev;} diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 1fcde06e94..98bcb59d65 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -361,7 +361,7 @@ protected: int prim = PRIM->PRIM; - if(!OverrideInput(prim, rt->m_texture, ds->m_texture, tex ? tex->m_texture : NULL)) + if(!OverrideInput(prim, rt->m_texture, ds->m_texture, tex)) { return; } @@ -396,9 +396,9 @@ protected: m_tc->InvalidateTextures(context->FRAME, context->ZBUF); } - virtual void Draw(int prim, GSTexture* rt, GSTexture* ds, typename GSTextureCache::GSCachedTexture* tex) = 0; + virtual void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) = 0; - virtual bool OverrideInput(int& prim, GSTexture* rt, GSTexture* ds, GSTexture* t) + virtual bool OverrideInput(int& prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* t) { #pragma region ffxii pal video conversion @@ -437,9 +437,11 @@ protected: ok = false; - t = m_dev->CreateTexture(512, 512); + m_dev->Recycle(t->m_texture); - t->Update(GSVector4i(0, 0, 448, 512), video, 512 * 4); + t->m_texture = m_dev->CreateTexture(512, 512); + + t->m_texture->Update(GSVector4i(0, 0, 448, 512), video, 512 * 4); m_vertices[0] = m_vertices[0]; m_vertices[1] = m_vertices[1]; diff --git a/plugins/GSdx/GSRendererHW10.cpp b/plugins/GSdx/GSRendererHW10.cpp index f26c67cfd1..5f085798a1 100644 --- a/plugins/GSdx/GSRendererHW10.cpp +++ b/plugins/GSdx/GSRendererHW10.cpp @@ -561,11 +561,7 @@ void GSRendererHW10::SetupDATE(GSTexture* rt, GSTexture* ds) {GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(uv.z, uv.w)}, }; - D3D10_BOX box = {0, 0, 0, sizeof(vertices), 1, 1}; - - (*dev)->UpdateSubresource(dev->m_convert.vb, 0, &box, vertices, 0, 0); - - dev->IASetVertexBuffer(dev->m_convert.vb, sizeof(vertices[0])); + dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); dev->IASetInputLayout(dev->m_convert.il); dev->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); @@ -589,7 +585,7 @@ void GSRendererHW10::SetupDATE(GSTexture* rt, GSTexture* ds) // set - dev->DrawPrimitive(countof(vertices)); + dev->DrawPrimitive(); // diff --git a/plugins/GSdx/GSRendererHW9.cpp b/plugins/GSdx/GSRendererHW9.cpp index 4d366ea251..851a02b50d 100644 --- a/plugins/GSdx/GSRendererHW9.cpp +++ b/plugins/GSdx/GSRendererHW9.cpp @@ -536,16 +536,7 @@ void GSRendererHW9::SetupDATE(GSTexture* rt, GSTexture* ds) {GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(uv.z, uv.w)}, }; - void* buff = NULL; - - if(SUCCEEDED(dev->m_convert.vb->Lock(0, 0, &buff, D3DLOCK_DISCARD))) - { - memcpy(buff, vertices, sizeof(vertices)); - - dev->m_convert.vb->Unlock(); - } - - dev->IASetVertexBuffer(dev->m_convert.vb, sizeof(vertices[0])); + dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); dev->IASetInputLayout(dev->m_convert.il); dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); @@ -565,7 +556,7 @@ void GSRendererHW9::SetupDATE(GSTexture* rt, GSTexture* ds) // - dev->DrawPrimitive(countof(vertices)); + dev->DrawPrimitive(); // @@ -579,11 +570,6 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt) { GSDevice9* dev = (GSDevice9*)m_dev; - int w = rt->GetWidth(); - int h = rt->GetHeight(); - - GSVector4 mm = GSVector4(-1, -1, 1, 1); - dev->BeginScene(); // om @@ -593,6 +579,8 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt) // ia + GSVector4 mm = GSVector4(-1, -1, 1, 1); + GSVertexPT1 vertices[] = { {GSVector4(mm.x, -mm.y, 0.5f, 1.0f), GSVector2(0, 0)}, @@ -601,16 +589,7 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt) {GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(0, 0)}, }; - void* buff = NULL; - - if(SUCCEEDED(dev->m_convert.vb->Lock(0, 0, &buff, D3DLOCK_DISCARD))) - { - memcpy(buff, vertices, sizeof(vertices)); - - dev->m_convert.vb->Unlock(); - } - - dev->IASetVertexBuffer(dev->m_convert.vb, sizeof(vertices[0])); + dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices)); dev->IASetInputLayout(dev->m_convert.il); dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); @@ -624,11 +603,11 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt) // rs - dev->RSSet(w, h); + dev->RSSet(rt->GetWidth(), rt->GetHeight()); // - dev->DrawPrimitive(countof(vertices)); + dev->DrawPrimitive(); // diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 778cb3e4c9..2b78701155 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -793,3 +793,171 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) } } +template +void GSRendererSW::VertexKick(bool skip) +{ + const GSDrawingContext* context = m_context; + + GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]); + + xy = xy.insert16<3>(m_v.FOG.F); + xy = xy.upl16(); + xy -= context->XYOFFSET; + + GSVertexSW v; + + v.p = GSVector4(xy) * g_pos_scale; + + v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7); + + if(tme) + { + float q; + + if(fst) + { + v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4)); + q = 1.0f; + } + else + { + v.t = GSVector4(m_v.ST.S, m_v.ST.T); + v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH); + q = m_v.RGBAQ.Q; + } + + v.t = v.t.xyxy(GSVector4::load(q)); + } + + GSVertexSW& dst = m_vl.AddTail(); + + dst = v; + + dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion + + int count = 0; + + if(GSVertexSW* v = DrawingKick(skip, count)) + { +if(!m_dump) +{ + GSVector4 pmin, pmax; + + switch(prim) + { + case GS_POINTLIST: + pmin = v[0].p; + pmax = v[0].p; + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + pmin = v[0].p.minv(v[1].p); + pmax = v[0].p.maxv(v[1].p); + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + pmin = v[0].p.minv(v[1].p).minv(v[2].p); + pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); + break; + } + + GSVector4 scissor = context->scissor.ex; + + GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); + + switch(prim) + { + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + case GS_SPRITE: + test |= pmin.ceil() == pmax.ceil(); + break; + } + + switch(prim) + { + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + // are in line or just two of them are the same (cross product == 0) + GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz(); + test |= tmp == tmp.yxwz(); + break; + } + + if(test.mask() & 3) + { + return; + } +} + switch(prim) + { + case GS_POINTLIST: + break; + case GS_LINELIST: + case GS_LINESTRIP: + if(PRIM->IIP == 0) {v[0].c = v[1].c;} + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} + break; + case GS_SPRITE: + break; + } + + if(m_count < 30 && m_count >= 3) + { + GSVertexSW* v = &m_vertices[m_count - 3]; + + int tl = 0; + int br = 0; + + bool isquad = false; + + switch(prim) + { + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + case GS_TRIANGLELIST: + isquad = GSVertexSW::IsQuad(v, tl, br); + break; + } + + if(isquad) + { + m_count -= 3; + + if(m_count > 0) + { + tl += m_count; + br += m_count; + + Flush(); + } + + if(tl != 0) m_vertices[0] = m_vertices[tl]; + if(br != 1) m_vertices[1] = m_vertices[br]; + + m_count = 2; + + uint32 tmp = PRIM->PRIM; + PRIM->PRIM = GS_SPRITE; + + Flush(); + + PRIM->PRIM = tmp; + + m_perfmon.Put(GSPerfMon::Quad, 1); + + return; + } + } + + m_count += count; + } +} diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 367f999d79..a3e083d799 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -54,171 +54,5 @@ public: virtual ~GSRendererSW(); template - void VertexKick(bool skip) - { - const GSDrawingContext* context = m_context; - - GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]); - - xy = xy.insert16<3>(m_v.FOG.F); - xy = xy.upl16(); - xy -= context->XYOFFSET; - - GSVertexSW v; - - v.p = GSVector4(xy) * g_pos_scale; - - v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7); - - if(tme) - { - float q; - - if(fst) - { - v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4)); - q = 1.0f; - } - else - { - v.t = GSVector4(m_v.ST.S, m_v.ST.T); - v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH); - q = m_v.RGBAQ.Q; - } - - v.t = v.t.xyxy(GSVector4::load(q)); - } - - GSVertexSW& dst = m_vl.AddTail(); - - dst = v; - - dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion - - int count = 0; - - if(GSVertexSW* v = DrawingKick(skip, count)) - { -if(!m_dump) -{ - GSVector4 pmin, pmax; - - switch(prim) - { - case GS_POINTLIST: - pmin = v[0].p; - pmax = v[0].p; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - pmin = v[0].p.minv(v[1].p); - pmax = v[0].p.maxv(v[1].p); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - pmin = v[0].p.minv(v[1].p).minv(v[2].p); - pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); - break; - } - - GSVector4 scissor = context->scissor.ex; - - GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: - test |= pmin.ceil() == pmax.ceil(); - break; - } - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - // are in line or just two of them are the same (cross product == 0) - GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz(); - test |= tmp == tmp.yxwz(); - break; - } - - if(test.mask() & 3) - { - return; - } -} - switch(prim) - { - case GS_POINTLIST: - break; - case GS_LINELIST: - case GS_LINESTRIP: - if(PRIM->IIP == 0) {v[0].c = v[1].c;} - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} - break; - case GS_SPRITE: - break; - } - - if(m_count < 30 && m_count >= 3) - { - GSVertexSW* v = &m_vertices[m_count - 3]; - - int tl = 0; - int br = 0; - - bool isquad = false; - - switch(prim) - { - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_TRIANGLELIST: - isquad = GSVertexSW::IsQuad(v, tl, br); - break; - } - - if(isquad) - { - m_count -= 3; - - if(m_count > 0) - { - tl += m_count; - br += m_count; - - Flush(); - } - - if(tl != 0) m_vertices[0] = m_vertices[tl]; - if(br != 1) m_vertices[1] = m_vertices[br]; - - m_count = 2; - - uint32 tmp = PRIM->PRIM; - PRIM->PRIM = GS_SPRITE; - - Flush(); - - PRIM->PRIM = tmp; - - m_perfmon.Put(GSPerfMon::Quad, 1); - - return; - } - } - - m_count += count; - } - } + void VertexKick(bool skip); }; diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp index 57f204f855..3efc752076 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp @@ -44,8 +44,6 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, u void GSSetupPrimCodeGenerator::Generate() { - const int params = 0; - if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip) { for(int i = 0; i < 5; i++) diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index ab2f4ea660..65f7a6cf77 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -1213,7 +1213,7 @@ template void GSState::Transfer(uint8* mem, uint32 size) { GIFReg r; r.u64 = path.tag.PRIM; - (this->*m_fpGIFRegHandlers[GIF_A_D_REG_PRIM])(&r); + GIFRegHandlerPRIM(&r); } } } diff --git a/plugins/GSdx/GSTextureFX10.cpp b/plugins/GSdx/GSTextureFX10.cpp index 925ddd0b9b..d03cf0ac0d 100644 --- a/plugins/GSdx/GSTextureFX10.cpp +++ b/plugins/GSdx/GSTextureFX10.cpp @@ -25,9 +25,6 @@ GSTextureFX10::GSTextureFX10() : m_dev(NULL) - , m_vb_max(0) - , m_vb_start(0) - , m_vb_count(0) { memset(&m_vs_cb_cache, 0, sizeof(m_vs_cb_cache)); memset(&m_ps_cb_cache, 0, sizeof(m_ps_cb_cache)); @@ -94,63 +91,7 @@ bool GSTextureFX10::Create(GSDevice10* dev) bool GSTextureFX10::SetupIA(const GSVertexHW10* vertices, int count, D3D10_PRIMITIVE_TOPOLOGY prim) { - HRESULT hr; - - if(max(count * 3 / 2, 10000) > m_vb_max) - { - m_vb_old = m_vb; - m_vb = NULL; - m_vb_max = max(count * 2, 10000); - m_vb_start = 0; - m_vb_count = 0; - } - - if(!m_vb) - { - D3D10_BUFFER_DESC bd; - - memset(&bd, 0, sizeof(bd)); - - bd.Usage = D3D10_USAGE_DYNAMIC; - bd.ByteWidth = m_vb_max * sizeof(vertices[0]); - bd.BindFlags = D3D10_BIND_VERTEX_BUFFER; - bd.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE; - - hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_vb); - - if(FAILED(hr)) return false; - } - - GSVertexHW10* v = NULL; - - int next = m_vb_start + m_vb_count; - - if(next + count <= m_vb_max) - { - if(SUCCEEDED(m_vb->Map(D3D10_MAP_WRITE_NO_OVERWRITE, 0, (void**)&v))) - { - memcpy(&v[next], vertices, count * sizeof(vertices[0])); - - m_vb->Unmap(); - } - - m_vb_start = next; - m_vb_count = count; - } - else - { - if(SUCCEEDED(m_vb->Map(D3D10_MAP_WRITE_DISCARD, 0, (void**)&v))) - { - memcpy(v, vertices, count * sizeof(vertices[0])); - - m_vb->Unmap(); - } - - m_vb_start = 0; - m_vb_count = count; - } - - m_dev->IASetVertexBuffer(m_vb, sizeof(vertices[0])); + m_dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), count); m_dev->IASetInputLayout(m_il); m_dev->IASetPrimitiveTopology(prim); @@ -589,5 +530,5 @@ void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, void GSTextureFX10::Draw() { - m_dev->DrawPrimitive(m_vb_count, m_vb_start); + m_dev->DrawPrimitive(); } diff --git a/plugins/GSdx/GSTextureFX9.cpp b/plugins/GSdx/GSTextureFX9.cpp index 433b8565a7..db8c34c670 100644 --- a/plugins/GSdx/GSTextureFX9.cpp +++ b/plugins/GSdx/GSTextureFX9.cpp @@ -25,9 +25,6 @@ GSTextureFX9::GSTextureFX9() : m_dev(NULL) - , m_vb_max(0) - , m_vb_start(0) - , m_vb_count(0) { } @@ -89,57 +86,7 @@ GSTexture* GSTextureFX9::CreateMskFix(uint32 size, uint32 msk, uint32 fix) bool GSTextureFX9::SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVETYPE prim) { - HRESULT hr; - - if(max(count * 3 / 2, 10000) > m_vb_max) - { - m_vb_old = m_vb; - m_vb = NULL; - m_vb_max = max(count * 2, 10000); - m_vb_start = 0; - m_vb_count = 0; - } - - if(!m_vb) - { - hr = (*m_dev)->CreateVertexBuffer(m_vb_max * sizeof(vertices[0]), D3DUSAGE_DYNAMIC, 0, D3DPOOL_DEFAULT, &m_vb, NULL); - - if(FAILED(hr)) return false; - } - - GSVertexHW9* v = NULL; - - int next = m_vb_start + m_vb_count; - int size = count * sizeof(vertices[0]); - - if(next + count <= m_vb_max) - { - int offset = next * sizeof(vertices[0]); - - if(SUCCEEDED(m_vb->Lock(offset, size, (void**)&v, D3DLOCK_NOOVERWRITE))) - { - memcpy(v, vertices, size); - - m_vb->Unlock(); - } - - m_vb_start = next; - m_vb_count = count; - } - else - { - if(SUCCEEDED(m_vb->Lock(0, size, (void**)&v, D3DLOCK_DISCARD))) - { - memcpy(v, vertices, size); - - m_vb->Unlock(); - } - - m_vb_start = 0; - m_vb_count = count; - } - - m_dev->IASetVertexBuffer(m_vb, sizeof(vertices[0])); + m_dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), count); m_dev->IASetInputLayout(m_il); m_dev->IASetPrimitiveTopology(prim); @@ -526,5 +473,5 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, void GSTextureFX9::Draw() { - m_dev->DrawPrimitive(m_vb_count, m_vb_start); + m_dev->DrawPrimitive(); } diff --git a/plugins/GSdx/GSTextureFX9.h b/plugins/GSdx/GSTextureFX9.h index 831f72e1f6..65bb7bbfbf 100644 --- a/plugins/GSdx/GSTextureFX9.h +++ b/plugins/GSdx/GSTextureFX9.h @@ -155,12 +155,6 @@ private: hash_map m_ps_ss; hash_map m_om_dss; hash_map m_om_bs; - - CComPtr m_vb, m_vb_old; - int m_vb_max; - int m_vb_start; - int m_vb_count; - hash_map m_mskfix; GSTexture* CreateMskFix(uint32 size, uint32 msk, uint32 fix); diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index cd28cebad4..cfd2bc0203 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -1599,14 +1599,18 @@ public: #endif - #if _M_SSE >= 0x401 - static GSVector4i loadnt(const void* p) { - return GSVector4i(_mm_stream_load_si128((__m128i*)p)); - } + #if _M_SSE >= 0x401 - #endif + return GSVector4i(_mm_stream_load_si128((__m128i*)p)); + + #else + + return GSVector4i(_mm_load_si128((__m128i*)p)); + + #endif + } static GSVector4i loadl(const void* p) { @@ -1696,6 +1700,31 @@ public: #endif + static void storent(void* RESTRICT dst, const void* RESTRICT src, size_t size) + { + const GSVector4i* s = (const GSVector4i*)src; + GSVector4i* d = (GSVector4i*)dst; + + if(size == 0) return; + + size_t i = 0; + size_t j = size >> 6; + + for(; i < j; i++, s += 4, d += 4) + { + storent(&d[0], s[0]); + storent(&d[1], s[1]); + storent(&d[2], s[2]); + storent(&d[3], s[3]); + } + + size &= 63; + + if(size == 0) return; + + memcpy(d, s, size); + } + __forceinline static void transpose(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) { _MM_TRANSPOSE4_SI128(a.m, b.m, c.m, d.m);