GSdx: bit more work on the vertex buffer, and broken ffxii fmv fixed again.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1266 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-05-26 03:40:31 +00:00
parent 22535ad886
commit 6c58f3039a
15 changed files with 398 additions and 413 deletions

View File

@ -44,6 +44,11 @@ GSDevice10::GSDevice10()
{ {
memset(m_ps_srv, 0, sizeof(m_ps_srv)); memset(m_ps_srv, 0, sizeof(m_ps_srv));
memset(m_ps_ss, 0, sizeof(m_ps_ss)); memset(m_ps_ss, 0, sizeof(m_ps_ss));
m_vertices.stride = 0;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = 0;
} }
GSDevice10::~GSDevice10() GSDevice10::~GSDevice10()
@ -115,16 +120,6 @@ bool GSDevice10::Create(HWND hWnd, bool vsync)
hr = CompileShader(IDR_CONVERT10_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]); hr = CompileShader(IDR_CONVERT10_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]);
} }
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D10_USAGE_DEFAULT;
bd.BindFlags = D3D10_BIND_VERTEX_BUFFER;
bd.CPUAccessFlags = 0;
bd.MiscFlags = 0;
bd.ByteWidth = 4 * sizeof(GSVertexPT1);
hr = m_dev->CreateBuffer(&bd, NULL, &m_convert.vb);
memset(&dsd, 0, sizeof(dsd)); memset(&dsd, 0, sizeof(dsd));
dsd.DepthEnable = false; dsd.DepthEnable = false;
@ -258,6 +253,11 @@ void GSDevice10::BeginScene()
{ {
} }
void GSDevice10::DrawPrimitive()
{
m_dev->Draw(m_vertices.count, m_vertices.start);
}
void GSDevice10::EndScene() void GSDevice10::EndScene()
{ {
PSSetShaderResources(NULL, NULL); PSSetShaderResources(NULL, NULL);
@ -265,6 +265,9 @@ void GSDevice10::EndScene()
// not clearing the rt/ds gives a little fps boost in complex games (5-10%) // not clearing the rt/ds gives a little fps boost in complex games (5-10%)
// OMSetRenderTargets(NULL, NULL); // OMSetRenderTargets(NULL, NULL);
m_vertices.start += m_vertices.count;
m_vertices.count = 0;
} }
void GSDevice10::ClearRenderTarget(GSTexture* t, const GSVector4& c) void GSDevice10::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -439,11 +442,7 @@ void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sr.z, sr.w)}, {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sr.z, sr.w)},
}; };
D3D10_BOX box = {0, 0, 0, sizeof(vertices), 1, 1}; IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
m_dev->UpdateSubresource(m_convert.vb, 0, &box, vertices, 0, 0);
IASetVertexBuffer(m_convert.vb, sizeof(vertices[0]));
IASetInputLayout(m_convert.il); IASetInputLayout(m_convert.il);
IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
@ -467,7 +466,7 @@ void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
// //
DrawPrimitive(countof(vertices)); DrawPrimitive();
// //
@ -508,6 +507,61 @@ void GSDevice10::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool line
StretchRect(st, sr, dt, dr, m_interlace.ps[shader], m_interlace.cb, linear); StretchRect(st, sr, dt, dr, m_interlace.ps[shader], m_interlace.cb, linear);
} }
void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
{
ASSERT(m_vertices.count == 0);
if(count > m_vertices.limit)
{
m_vertices.vb_old = m_vertices.vb;
m_vertices.vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000);
}
if(m_vertices.vb == NULL)
{
D3D10_BUFFER_DESC bd;
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D10_USAGE_DYNAMIC;
bd.ByteWidth = m_vertices.limit * stride;
bd.BindFlags = D3D10_BIND_VERTEX_BUFFER;
bd.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE;
HRESULT hr;
hr = m_dev->CreateBuffer(&bd, NULL, &m_vertices.vb);
if(FAILED(hr)) return;
}
D3D10_MAP type = D3D10_MAP_WRITE_NO_OVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
{
m_vertices.start = 0;
type = D3D10_MAP_WRITE_DISCARD;
}
void* v = NULL;
if(SUCCEEDED(m_vertices.vb->Map(type, 0, &v)))
{
GSVector4i::storent((uint8*)v + m_vertices.start * stride, vertices, count * stride);
m_vertices.vb->Unmap();
}
m_vertices.count = count;
m_vertices.stride = stride;
IASetVertexBuffer(m_vertices.vb, stride);
}
void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride) void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride)
{ {
if(m_vb != vb || m_vb_stride != stride) if(m_vb != vb || m_vb_stride != stride)
@ -688,11 +742,6 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
} }
} }
void GSDevice10::DrawPrimitive(uint32 count, uint32 start)
{
m_dev->Draw(count, start);
}
HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_MACRO* macro, ID3D10VertexShader** ps, D3D10_INPUT_ELEMENT_DESC* layout, int count, ID3D10InputLayout** il) HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_MACRO* macro, ID3D10VertexShader** ps, D3D10_INPUT_ELEMENT_DESC* layout, int count, ID3D10InputLayout** il)
{ {
HRESULT hr; HRESULT hr;
@ -703,7 +752,7 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M
if(error) if(error)
{ {
TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); printf("%s\n", (const char*)error->GetBufferPointer());
} }
if(FAILED(hr)) if(FAILED(hr))
@ -738,7 +787,7 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M
if(error) if(error)
{ {
TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); printf("%s\n", (const char*)error->GetBufferPointer());
} }
if(FAILED(hr)) if(FAILED(hr))
@ -766,7 +815,7 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M
if(error) if(error)
{ {
TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); printf("%s\n", (const char*)error->GetBufferPointer());
} }
if(FAILED(hr)) if(FAILED(hr))

View File

@ -58,14 +58,18 @@ private:
CComPtr<ID3D10Device> m_dev; CComPtr<ID3D10Device> m_dev;
CComPtr<IDXGISwapChain> m_swapchain; CComPtr<IDXGISwapChain> m_swapchain;
CComPtr<ID3DX10Font> m_font;
struct
{
CComPtr<ID3D10Buffer> vb, vb_old;
size_t stride, start, count, limit;
} m_vertices;
public: // TODO public: // TODO
CComPtr<ID3D10RasterizerState> m_rs; CComPtr<ID3D10RasterizerState> m_rs;
struct struct
{ {
CComPtr<ID3D10Buffer> vb;
CComPtr<ID3D10InputLayout> il; CComPtr<ID3D10InputLayout> il;
CComPtr<ID3D10VertexShader> vs; CComPtr<ID3D10VertexShader> vs;
CComPtr<ID3D10PixelShader> ps[7]; CComPtr<ID3D10PixelShader> ps[7];
@ -98,6 +102,7 @@ public:
void Flip(); void Flip();
void BeginScene(); void BeginScene();
void DrawPrimitive();
void EndScene(); void EndScene();
void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, const GSVector4& c);
@ -116,6 +121,7 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, ID3D10BlendState* bs, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D10PixelShader* ps, ID3D10Buffer* ps_cb, ID3D10BlendState* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
void IASetVertexBuffer(ID3D10Buffer* vb, size_t stride); void IASetVertexBuffer(ID3D10Buffer* vb, size_t stride);
void IASetInputLayout(ID3D10InputLayout* layout); void IASetInputLayout(ID3D10InputLayout* layout);
void IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology); void IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology);
@ -128,7 +134,6 @@ public:
void OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref); void OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref);
void OMSetBlendState(ID3D10BlendState* bs, float bf); void OMSetBlendState(ID3D10BlendState* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);
void DrawPrimitive(uint32 count, uint32 start = 0);
ID3D10Device* operator->() {return m_dev;} ID3D10Device* operator->() {return m_dev;}
operator ID3D10Device*() {return m_dev;} operator ID3D10Device*() {return m_dev;}

View File

@ -48,6 +48,11 @@ GSDevice9::GSDevice9()
memset(&m_ddcaps, 0, sizeof(m_ddcaps)); memset(&m_ddcaps, 0, sizeof(m_ddcaps));
memset(&m_d3dcaps, 0, sizeof(m_d3dcaps)); memset(&m_d3dcaps, 0, sizeof(m_d3dcaps));
memset(m_ps_srvs, 0, sizeof(m_ps_srvs)); memset(m_ps_srvs, 0, sizeof(m_ps_srvs));
m_vertices.stride = 0;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = 0;
} }
GSDevice9::~GSDevice9() GSDevice9::~GSDevice9()
@ -143,8 +148,6 @@ bool GSDevice9::Create(HWND hWnd, bool vsync)
CompileShader(IDR_CONVERT9_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]); CompileShader(IDR_CONVERT9_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]);
} }
m_dev->CreateVertexBuffer(4 * sizeof(GSVertexPT1), D3DUSAGE_DYNAMIC, 0, D3DPOOL_DEFAULT, &m_convert.vb, NULL);
m_convert.dss.DepthEnable = false; m_convert.dss.DepthEnable = false;
m_convert.dss.StencilEnable = false; m_convert.dss.StencilEnable = false;
@ -364,9 +367,39 @@ void GSDevice9::BeginScene()
m_dev->BeginScene(); m_dev->BeginScene();
} }
void GSDevice9::DrawPrimitive()
{
int prims = 0;
switch(m_topology)
{
case D3DPT_TRIANGLELIST:
prims = m_vertices.count / 3;
break;
case D3DPT_LINELIST:
prims = m_vertices.count / 2;
break;
case D3DPT_POINTLIST:
prims = m_vertices.count;
break;
case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN:
prims = m_vertices.count - 2;
break;
case D3DPT_LINESTRIP:
prims = m_vertices.count - 1;
break;
}
m_dev->DrawPrimitive(m_topology, m_vertices.start, prims);
}
void GSDevice9::EndScene() void GSDevice9::EndScene()
{ {
m_dev->EndScene(); m_dev->EndScene();
m_vertices.start += m_vertices.count;
m_vertices.count = 0;
} }
void GSDevice9::ClearRenderTarget(GSTexture* t, const GSVector4& c) void GSDevice9::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -550,16 +583,7 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c
vertices[i].p.y += 1.0f / ds.y; vertices[i].p.y += 1.0f / ds.y;
} }
void* buff = NULL; IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
if(SUCCEEDED(m_convert.vb->Lock(0, 0, &buff, D3DLOCK_DISCARD)))
{
memcpy(buff, vertices, sizeof(vertices));
m_convert.vb->Unlock();
}
IASetVertexBuffer(m_convert.vb, sizeof(vertices[0]));
IASetInputLayout(m_convert.il); IASetInputLayout(m_convert.il);
IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP);
@ -579,7 +603,7 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c
// //
DrawPrimitive(countof(vertices)); DrawPrimitive();
// //
@ -620,6 +644,52 @@ void GSDevice9::DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linea
StretchRect(st, sr, dt, dr, m_interlace.ps[shader], (const float*)&cb, 1, linear); StretchRect(st, sr, dt, dr, m_interlace.ps[shader], (const float*)&cb, 1, linear);
} }
void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
{
ASSERT(m_vertices.count == 0);
if(count > m_vertices.limit)
{
m_vertices.vb_old = m_vertices.vb;
m_vertices.vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = max(count * 3 / 2, 10000);
}
if(m_vertices.vb == NULL)
{
HRESULT hr;
hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC, 0, D3DPOOL_DEFAULT, &m_vertices.vb, NULL);
if(FAILED(hr)) return;
}
uint32 flags = D3DLOCK_NOOVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
{
m_vertices.start = 0;
flags = D3DLOCK_DISCARD;
}
void* v = NULL;
if(SUCCEEDED(m_vertices.vb->Lock(m_vertices.start * stride, count * stride, &v, flags)))
{
GSVector4i::storent(v, vertices, count * stride);
m_vertices.vb->Unlock();
}
m_vertices.count = count;
m_vertices.stride = stride;
IASetVertexBuffer(m_vertices.vb, stride);
}
void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
{ {
if(m_vb != vb || m_vb_stride != stride) if(m_vb != vb || m_vb_stride != stride)
@ -846,33 +916,6 @@ void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
} }
} }
void GSDevice9::DrawPrimitive(uint32 count, uint32 start)
{
int prims = 0;
switch(m_topology)
{
case D3DPT_TRIANGLELIST:
prims = count / 3;
break;
case D3DPT_LINELIST:
prims = count / 2;
break;
case D3DPT_POINTLIST:
prims = count;
break;
case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN:
prims = count - 2;
break;
case D3DPT_LINESTRIP:
prims = count - 1;
break;
}
m_dev->DrawPrimitive(m_topology, start, prims);
}
// FIXME: D3DXCompileShaderFromResource of d3dx9 v37 (march 2008) calls GetFullPathName on id for some reason and then crashes // FIXME: D3DXCompileShaderFromResource of d3dx9 v37 (march 2008) calls GetFullPathName on id for some reason and then crashes
static HRESULT LoadShader(uint32 id, LPCSTR& data, uint32& size) static HRESULT LoadShader(uint32 id, LPCSTR& data, uint32& size)
@ -934,9 +977,7 @@ HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO
} }
else if(error) else if(error)
{ {
LPCSTR msg = (LPCSTR)error->GetBufferPointer(); printf("%s\n", (const char*)error->GetBufferPointer());
TRACE(_T("%s\n"), CString(msg));
} }
ASSERT(SUCCEEDED(hr)); ASSERT(SUCCEEDED(hr));
@ -993,14 +1034,10 @@ HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO
if(SUCCEEDED(hr)) if(SUCCEEDED(hr))
{ {
hr = m_dev->CreatePixelShader((DWORD*)shader->GetBufferPointer(), ps); hr = m_dev->CreatePixelShader((DWORD*)shader->GetBufferPointer(), ps);
ASSERT(SUCCEEDED(hr));
} }
else if(error) else if(error)
{ {
LPCSTR msg = (LPCSTR)error->GetBufferPointer(); printf("%s\n", (const char*)error->GetBufferPointer());
TRACE(_T("%s\n"), CString(msg));
} }
ASSERT(SUCCEEDED(hr)); ASSERT(SUCCEEDED(hr));

View File

@ -96,13 +96,18 @@ private:
CComPtr<IDirect3DDevice9> m_dev; CComPtr<IDirect3DDevice9> m_dev;
CComPtr<IDirect3DSwapChain9> m_swapchain; CComPtr<IDirect3DSwapChain9> m_swapchain;
struct
{
CComPtr<IDirect3DVertexBuffer9> vb, vb_old;
size_t stride, start, count, limit;
} m_vertices;
public: // TODO public: // TODO
D3DPRESENT_PARAMETERS m_pp; D3DPRESENT_PARAMETERS m_pp;
CComPtr<ID3DXFont> m_font; CComPtr<ID3DXFont> m_font;
struct struct
{ {
CComPtr<IDirect3DVertexBuffer9> vb;
CComPtr<IDirect3DVertexDeclaration9> il; CComPtr<IDirect3DVertexDeclaration9> il;
CComPtr<IDirect3DVertexShader9> vs; CComPtr<IDirect3DVertexShader9> vs;
CComPtr<IDirect3DPixelShader9> ps[7]; CComPtr<IDirect3DPixelShader9> ps[7];
@ -134,6 +139,7 @@ public:
void Flip(); void Flip();
void BeginScene(); void BeginScene();
void DrawPrimitive();
void EndScene(); void EndScene();
void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, const GSVector4& c);
@ -154,6 +160,7 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride); void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
void IASetInputLayout(IDirect3DVertexDeclaration9* layout); void IASetInputLayout(IDirect3DVertexDeclaration9* layout);
void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology); void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology);
@ -165,7 +172,6 @@ public:
void OMSetDepthStencilState(Direct3DDepthStencilState9* dss, uint32 sref); void OMSetDepthStencilState(Direct3DDepthStencilState9* dss, uint32 sref);
void OMSetBlendState(Direct3DBlendState9* bs, uint32 bf); void OMSetBlendState(Direct3DBlendState9* bs, uint32 bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);
void DrawPrimitive(uint32 count, uint32 start = 0);
IDirect3DDevice9* operator->() {return m_dev;} IDirect3DDevice9* operator->() {return m_dev;}
operator IDirect3DDevice9*() {return m_dev;} operator IDirect3DDevice9*() {return m_dev;}

View File

@ -361,7 +361,7 @@ protected:
int prim = PRIM->PRIM; int prim = PRIM->PRIM;
if(!OverrideInput(prim, rt->m_texture, ds->m_texture, tex ? tex->m_texture : NULL)) if(!OverrideInput(prim, rt->m_texture, ds->m_texture, tex))
{ {
return; return;
} }
@ -396,9 +396,9 @@ protected:
m_tc->InvalidateTextures(context->FRAME, context->ZBUF); m_tc->InvalidateTextures(context->FRAME, context->ZBUF);
} }
virtual void Draw(int prim, GSTexture* rt, GSTexture* ds, typename GSTextureCache::GSCachedTexture* tex) = 0; virtual void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) = 0;
virtual bool OverrideInput(int& prim, GSTexture* rt, GSTexture* ds, GSTexture* t) virtual bool OverrideInput(int& prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* t)
{ {
#pragma region ffxii pal video conversion #pragma region ffxii pal video conversion
@ -437,9 +437,11 @@ protected:
ok = false; ok = false;
t = m_dev->CreateTexture(512, 512); m_dev->Recycle(t->m_texture);
t->Update(GSVector4i(0, 0, 448, 512), video, 512 * 4); t->m_texture = m_dev->CreateTexture(512, 512);
t->m_texture->Update(GSVector4i(0, 0, 448, 512), video, 512 * 4);
m_vertices[0] = m_vertices[0]; m_vertices[0] = m_vertices[0];
m_vertices[1] = m_vertices[1]; m_vertices[1] = m_vertices[1];

View File

@ -561,11 +561,7 @@ void GSRendererHW10::SetupDATE(GSTexture* rt, GSTexture* ds)
{GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(uv.z, uv.w)}, {GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(uv.z, uv.w)},
}; };
D3D10_BOX box = {0, 0, 0, sizeof(vertices), 1, 1}; dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
(*dev)->UpdateSubresource(dev->m_convert.vb, 0, &box, vertices, 0, 0);
dev->IASetVertexBuffer(dev->m_convert.vb, sizeof(vertices[0]));
dev->IASetInputLayout(dev->m_convert.il); dev->IASetInputLayout(dev->m_convert.il);
dev->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); dev->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
@ -589,7 +585,7 @@ void GSRendererHW10::SetupDATE(GSTexture* rt, GSTexture* ds)
// set // set
dev->DrawPrimitive(countof(vertices)); dev->DrawPrimitive();
// //

View File

@ -536,16 +536,7 @@ void GSRendererHW9::SetupDATE(GSTexture* rt, GSTexture* ds)
{GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(uv.z, uv.w)}, {GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(uv.z, uv.w)},
}; };
void* buff = NULL; dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
if(SUCCEEDED(dev->m_convert.vb->Lock(0, 0, &buff, D3DLOCK_DISCARD)))
{
memcpy(buff, vertices, sizeof(vertices));
dev->m_convert.vb->Unlock();
}
dev->IASetVertexBuffer(dev->m_convert.vb, sizeof(vertices[0]));
dev->IASetInputLayout(dev->m_convert.il); dev->IASetInputLayout(dev->m_convert.il);
dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP);
@ -565,7 +556,7 @@ void GSRendererHW9::SetupDATE(GSTexture* rt, GSTexture* ds)
// //
dev->DrawPrimitive(countof(vertices)); dev->DrawPrimitive();
// //
@ -579,11 +570,6 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt)
{ {
GSDevice9* dev = (GSDevice9*)m_dev; GSDevice9* dev = (GSDevice9*)m_dev;
int w = rt->GetWidth();
int h = rt->GetHeight();
GSVector4 mm = GSVector4(-1, -1, 1, 1);
dev->BeginScene(); dev->BeginScene();
// om // om
@ -593,6 +579,8 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt)
// ia // ia
GSVector4 mm = GSVector4(-1, -1, 1, 1);
GSVertexPT1 vertices[] = GSVertexPT1 vertices[] =
{ {
{GSVector4(mm.x, -mm.y, 0.5f, 1.0f), GSVector2(0, 0)}, {GSVector4(mm.x, -mm.y, 0.5f, 1.0f), GSVector2(0, 0)},
@ -601,16 +589,7 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt)
{GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(0, 0)}, {GSVector4(mm.z, -mm.w, 0.5f, 1.0f), GSVector2(0, 0)},
}; };
void* buff = NULL; dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), countof(vertices));
if(SUCCEEDED(dev->m_convert.vb->Lock(0, 0, &buff, D3DLOCK_DISCARD)))
{
memcpy(buff, vertices, sizeof(vertices));
dev->m_convert.vb->Unlock();
}
dev->IASetVertexBuffer(dev->m_convert.vb, sizeof(vertices[0]));
dev->IASetInputLayout(dev->m_convert.il); dev->IASetInputLayout(dev->m_convert.il);
dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP); dev->IASetPrimitiveTopology(D3DPT_TRIANGLESTRIP);
@ -624,11 +603,11 @@ void GSRendererHW9::UpdateFBA(GSTexture* rt)
// rs // rs
dev->RSSet(w, h); dev->RSSet(rt->GetWidth(), rt->GetHeight());
// //
dev->DrawPrimitive(countof(vertices)); dev->DrawPrimitive();
// //

View File

@ -793,3 +793,171 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
} }
} }
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::VertexKick(bool skip)
{
const GSDrawingContext* context = m_context;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
xy = xy.insert16<3>(m_v.FOG.F);
xy = xy.upl16();
xy -= context->XYOFFSET;
GSVertexSW v;
v.p = GSVector4(xy) * g_pos_scale;
v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
if(tme)
{
float q;
if(fst)
{
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
q = 1.0f;
}
else
{
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
q = m_v.RGBAQ.Q;
}
v.t = v.t.xyxy(GSVector4::load(q));
}
GSVertexSW& dst = m_vl.AddTail();
dst = v;
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
int count = 0;
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
{
if(!m_dump)
{
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.minv(v[1].p);
pmax = v[0].p.maxv(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.minv(v[1].p).minv(v[2].p);
pmax = v[0].p.maxv(v[1].p).maxv(v[2].p);
break;
}
GSVector4 scissor = context->scissor.ex;
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin.ceil() == pmax.ceil();
break;
}
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
// are in line or just two of them are the same (cross product == 0)
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
test |= tmp == tmp.yxwz();
break;
}
if(test.mask() & 3)
{
return;
}
}
switch(prim)
{
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP:
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
break;
case GS_SPRITE:
break;
}
if(m_count < 30 && m_count >= 3)
{
GSVertexSW* v = &m_vertices[m_count - 3];
int tl = 0;
int br = 0;
bool isquad = false;
switch(prim)
{
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLELIST:
isquad = GSVertexSW::IsQuad(v, tl, br);
break;
}
if(isquad)
{
m_count -= 3;
if(m_count > 0)
{
tl += m_count;
br += m_count;
Flush();
}
if(tl != 0) m_vertices[0] = m_vertices[tl];
if(br != 1) m_vertices[1] = m_vertices[br];
m_count = 2;
uint32 tmp = PRIM->PRIM;
PRIM->PRIM = GS_SPRITE;
Flush();
PRIM->PRIM = tmp;
m_perfmon.Put(GSPerfMon::Quad, 1);
return;
}
}
m_count += count;
}
}

View File

@ -54,171 +54,5 @@ public:
virtual ~GSRendererSW(); virtual ~GSRendererSW();
template<uint32 prim, uint32 tme, uint32 fst> template<uint32 prim, uint32 tme, uint32 fst>
void VertexKick(bool skip) void VertexKick(bool skip);
{
const GSDrawingContext* context = m_context;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
xy = xy.insert16<3>(m_v.FOG.F);
xy = xy.upl16();
xy -= context->XYOFFSET;
GSVertexSW v;
v.p = GSVector4(xy) * g_pos_scale;
v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
if(tme)
{
float q;
if(fst)
{
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
q = 1.0f;
}
else
{
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
q = m_v.RGBAQ.Q;
}
v.t = v.t.xyxy(GSVector4::load(q));
}
GSVertexSW& dst = m_vl.AddTail();
dst = v;
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
int count = 0;
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
{
if(!m_dump)
{
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.minv(v[1].p);
pmax = v[0].p.maxv(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.minv(v[1].p).minv(v[2].p);
pmax = v[0].p.maxv(v[1].p).maxv(v[2].p);
break;
}
GSVector4 scissor = context->scissor.ex;
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin.ceil() == pmax.ceil();
break;
}
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
// are in line or just two of them are the same (cross product == 0)
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
test |= tmp == tmp.yxwz();
break;
}
if(test.mask() & 3)
{
return;
}
}
switch(prim)
{
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP:
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
break;
case GS_SPRITE:
break;
}
if(m_count < 30 && m_count >= 3)
{
GSVertexSW* v = &m_vertices[m_count - 3];
int tl = 0;
int br = 0;
bool isquad = false;
switch(prim)
{
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLELIST:
isquad = GSVertexSW::IsQuad(v, tl, br);
break;
}
if(isquad)
{
m_count -= 3;
if(m_count > 0)
{
tl += m_count;
br += m_count;
Flush();
}
if(tl != 0) m_vertices[0] = m_vertices[tl];
if(br != 1) m_vertices[1] = m_vertices[br];
m_count = 2;
uint32 tmp = PRIM->PRIM;
PRIM->PRIM = GS_SPRITE;
Flush();
PRIM->PRIM = tmp;
m_perfmon.Put(GSPerfMon::Quad, 1);
return;
}
}
m_count += count;
}
}
}; };

View File

@ -44,8 +44,6 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, u
void GSSetupPrimCodeGenerator::Generate() void GSSetupPrimCodeGenerator::Generate()
{ {
const int params = 0;
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip) if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
{ {
for(int i = 0; i < 5; i++) for(int i = 0; i < 5; i++)

View File

@ -1213,7 +1213,7 @@ template<int index> void GSState::Transfer(uint8* mem, uint32 size)
{ {
GIFReg r; GIFReg r;
r.u64 = path.tag.PRIM; r.u64 = path.tag.PRIM;
(this->*m_fpGIFRegHandlers[GIF_A_D_REG_PRIM])(&r); GIFRegHandlerPRIM(&r);
} }
} }
} }

View File

@ -25,9 +25,6 @@
GSTextureFX10::GSTextureFX10() GSTextureFX10::GSTextureFX10()
: m_dev(NULL) : m_dev(NULL)
, m_vb_max(0)
, m_vb_start(0)
, m_vb_count(0)
{ {
memset(&m_vs_cb_cache, 0, sizeof(m_vs_cb_cache)); memset(&m_vs_cb_cache, 0, sizeof(m_vs_cb_cache));
memset(&m_ps_cb_cache, 0, sizeof(m_ps_cb_cache)); memset(&m_ps_cb_cache, 0, sizeof(m_ps_cb_cache));
@ -94,63 +91,7 @@ bool GSTextureFX10::Create(GSDevice10* dev)
bool GSTextureFX10::SetupIA(const GSVertexHW10* vertices, int count, D3D10_PRIMITIVE_TOPOLOGY prim) bool GSTextureFX10::SetupIA(const GSVertexHW10* vertices, int count, D3D10_PRIMITIVE_TOPOLOGY prim)
{ {
HRESULT hr; m_dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), count);
if(max(count * 3 / 2, 10000) > m_vb_max)
{
m_vb_old = m_vb;
m_vb = NULL;
m_vb_max = max(count * 2, 10000);
m_vb_start = 0;
m_vb_count = 0;
}
if(!m_vb)
{
D3D10_BUFFER_DESC bd;
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D10_USAGE_DYNAMIC;
bd.ByteWidth = m_vb_max * sizeof(vertices[0]);
bd.BindFlags = D3D10_BIND_VERTEX_BUFFER;
bd.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE;
hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_vb);
if(FAILED(hr)) return false;
}
GSVertexHW10* v = NULL;
int next = m_vb_start + m_vb_count;
if(next + count <= m_vb_max)
{
if(SUCCEEDED(m_vb->Map(D3D10_MAP_WRITE_NO_OVERWRITE, 0, (void**)&v)))
{
memcpy(&v[next], vertices, count * sizeof(vertices[0]));
m_vb->Unmap();
}
m_vb_start = next;
m_vb_count = count;
}
else
{
if(SUCCEEDED(m_vb->Map(D3D10_MAP_WRITE_DISCARD, 0, (void**)&v)))
{
memcpy(v, vertices, count * sizeof(vertices[0]));
m_vb->Unmap();
}
m_vb_start = 0;
m_vb_count = count;
}
m_dev->IASetVertexBuffer(m_vb, sizeof(vertices[0]));
m_dev->IASetInputLayout(m_il); m_dev->IASetInputLayout(m_il);
m_dev->IASetPrimitiveTopology(prim); m_dev->IASetPrimitiveTopology(prim);
@ -589,5 +530,5 @@ void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
void GSTextureFX10::Draw() void GSTextureFX10::Draw()
{ {
m_dev->DrawPrimitive(m_vb_count, m_vb_start); m_dev->DrawPrimitive();
} }

View File

@ -25,9 +25,6 @@
GSTextureFX9::GSTextureFX9() GSTextureFX9::GSTextureFX9()
: m_dev(NULL) : m_dev(NULL)
, m_vb_max(0)
, m_vb_start(0)
, m_vb_count(0)
{ {
} }
@ -89,57 +86,7 @@ GSTexture* GSTextureFX9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
bool GSTextureFX9::SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVETYPE prim) bool GSTextureFX9::SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVETYPE prim)
{ {
HRESULT hr; m_dev->IASetVertexBuffer(vertices, sizeof(vertices[0]), count);
if(max(count * 3 / 2, 10000) > m_vb_max)
{
m_vb_old = m_vb;
m_vb = NULL;
m_vb_max = max(count * 2, 10000);
m_vb_start = 0;
m_vb_count = 0;
}
if(!m_vb)
{
hr = (*m_dev)->CreateVertexBuffer(m_vb_max * sizeof(vertices[0]), D3DUSAGE_DYNAMIC, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
if(FAILED(hr)) return false;
}
GSVertexHW9* v = NULL;
int next = m_vb_start + m_vb_count;
int size = count * sizeof(vertices[0]);
if(next + count <= m_vb_max)
{
int offset = next * sizeof(vertices[0]);
if(SUCCEEDED(m_vb->Lock(offset, size, (void**)&v, D3DLOCK_NOOVERWRITE)))
{
memcpy(v, vertices, size);
m_vb->Unlock();
}
m_vb_start = next;
m_vb_count = count;
}
else
{
if(SUCCEEDED(m_vb->Lock(0, size, (void**)&v, D3DLOCK_DISCARD)))
{
memcpy(v, vertices, size);
m_vb->Unlock();
}
m_vb_start = 0;
m_vb_count = count;
}
m_dev->IASetVertexBuffer(m_vb, sizeof(vertices[0]));
m_dev->IASetInputLayout(m_il); m_dev->IASetInputLayout(m_il);
m_dev->IASetPrimitiveTopology(prim); m_dev->IASetPrimitiveTopology(prim);
@ -526,5 +473,5 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
void GSTextureFX9::Draw() void GSTextureFX9::Draw()
{ {
m_dev->DrawPrimitive(m_vb_count, m_vb_start); m_dev->DrawPrimitive();
} }

View File

@ -155,12 +155,6 @@ private:
hash_map<uint32, Direct3DSamplerState9* > m_ps_ss; hash_map<uint32, Direct3DSamplerState9* > m_ps_ss;
hash_map<uint32, Direct3DDepthStencilState9* > m_om_dss; hash_map<uint32, Direct3DDepthStencilState9* > m_om_dss;
hash_map<uint32, Direct3DBlendState9* > m_om_bs; hash_map<uint32, Direct3DBlendState9* > m_om_bs;
CComPtr<IDirect3DVertexBuffer9> m_vb, m_vb_old;
int m_vb_max;
int m_vb_start;
int m_vb_count;
hash_map<uint32, GSTexture*> m_mskfix; hash_map<uint32, GSTexture*> m_mskfix;
GSTexture* CreateMskFix(uint32 size, uint32 msk, uint32 fix); GSTexture* CreateMskFix(uint32 size, uint32 msk, uint32 fix);

View File

@ -1599,14 +1599,18 @@ public:
#endif #endif
#if _M_SSE >= 0x401
static GSVector4i loadnt(const void* p) static GSVector4i loadnt(const void* p)
{ {
return GSVector4i(_mm_stream_load_si128((__m128i*)p)); #if _M_SSE >= 0x401
}
#endif return GSVector4i(_mm_stream_load_si128((__m128i*)p));
#else
return GSVector4i(_mm_load_si128((__m128i*)p));
#endif
}
static GSVector4i loadl(const void* p) static GSVector4i loadl(const void* p)
{ {
@ -1696,6 +1700,31 @@ public:
#endif #endif
static void storent(void* RESTRICT dst, const void* RESTRICT src, size_t size)
{
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
if(size == 0) return;
size_t i = 0;
size_t j = size >> 6;
for(; i < j; i++, s += 4, d += 4)
{
storent(&d[0], s[0]);
storent(&d[1], s[1]);
storent(&d[2], s[2]);
storent(&d[3], s[3]);
}
size &= 63;
if(size == 0) return;
memcpy(d, s, size);
}
__forceinline static void transpose(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d) __forceinline static void transpose(GSVector4i& a, GSVector4i& b, GSVector4i& c, GSVector4i& d)
{ {
_MM_TRANSPOSE4_SI128(a.m, b.m, c.m, d.m); _MM_TRANSPOSE4_SI128(a.m, b.m, c.m, d.m);