GSdx: Streamlined several instances of CComPtr use in the Shader caches, as it was causing general slowdowns due to internal reference counters. Gives fairly significant speedups (6-15%) across most games and both DX9 and DX10 alike.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1311 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-06-03 19:22:50 +00:00
parent ffda8ead30
commit 02167e90b3
2 changed files with 44 additions and 77 deletions

View File

@ -100,15 +100,9 @@ bool GSTextureFX10::SetupIA(const GSVertexHW10* vertices, int count, D3D10_PRIMI
bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb) bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
{ {
CComPtr<ID3D10VertexShader> vs; hash_map<uint32, CComPtr<ID3D10VertexShader> >::const_iterator i = m_vs.find(sel);
hash_map<uint32, CComPtr<ID3D10VertexShader> >::iterator i = m_vs.find(sel); if(i == m_vs.end())
if(i != m_vs.end())
{
vs = (*i).second;
}
else
{ {
string str[5]; string str[5];
@ -139,6 +133,7 @@ bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
}; };
CComPtr<ID3D10InputLayout> il; CComPtr<ID3D10InputLayout> il;
CComPtr<ID3D10VertexShader> vs;
m_dev->CompileShader(IDR_TFX10_FX, "vs_main", macro, &vs, layout, countof(layout), &il); m_dev->CompileShader(IDR_TFX10_FX, "vs_main", macro, &vs, layout, countof(layout), &il);
@ -148,6 +143,7 @@ bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
} }
m_vs[sel] = vs; m_vs[sel] = vs;
i = m_vs.find( sel );
} }
if(m_vs_cb_cache.Update(cb)) if(m_vs_cb_cache.Update(cb))
@ -155,7 +151,7 @@ bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
(*m_dev)->UpdateSubresource(m_vs_cb, 0, NULL, cb, 0, 0); (*m_dev)->UpdateSubresource(m_vs_cb, 0, NULL, cb, 0, 0);
} }
m_dev->VSSetShader(vs, m_vs_cb); m_dev->VSSetShader((*i).second, m_vs_cb);
return true; return true;
} }
@ -164,11 +160,11 @@ bool GSTextureFX10::SetupGS(GSSelector sel)
{ {
HRESULT hr; HRESULT hr;
CComPtr<ID3D10GeometryShader> gs; ID3D10GeometryShader* gs = NULL;
if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) // geometry shader works in every case, but not needed if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) // geometry shader works in every case, but not needed
{ {
hash_map<uint32, CComPtr<ID3D10GeometryShader> >::iterator i = m_gs.find(sel); hash_map<uint32, CComPtr<ID3D10GeometryShader> >::const_iterator i = m_gs.find(sel);
if(i != m_gs.end()) if(i != m_gs.end())
{ {
@ -212,15 +208,9 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
{ {
HRESULT hr; HRESULT hr;
CComPtr<ID3D10PixelShader> ps; hash_map<uint32, CComPtr<ID3D10PixelShader> >::const_iterator i = m_ps.find(sel);
hash_map<uint32, CComPtr<ID3D10PixelShader> >::iterator i = m_ps.find(sel); if(i == m_ps.end())
if(i != m_ps.end())
{
ps = (*i).second;
}
else
{ {
string str[13]; string str[13];
@ -256,9 +246,11 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
{NULL, NULL}, {NULL, NULL},
}; };
CComPtr<ID3D10PixelShader> ps;
hr = m_dev->CompileShader(IDR_TFX10_FX, "ps_main", macro, &ps); hr = m_dev->CompileShader(IDR_TFX10_FX, "ps_main", macro, &ps);
m_ps[sel] = ps; m_ps[sel] = ps;
i = m_ps.find(sel);
} }
if(m_ps_cb_cache.Update(cb)) if(m_ps_cb_cache.Update(cb))
@ -266,9 +258,9 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
(*m_dev)->UpdateSubresource(m_ps_cb, 0, NULL, cb, 0, 0); (*m_dev)->UpdateSubresource(m_ps_cb, 0, NULL, cb, 0, 0);
} }
m_dev->PSSetShader(ps, m_ps_cb); m_dev->PSSetShader((*i).second, m_ps_cb);
CComPtr<ID3D10SamplerState> ss0, ss1; ID3D10SamplerState* ss0=NULL, *ss1=NULL;
if(sel.tfx != 4) if(sel.tfx != 4)
{ {
@ -277,7 +269,7 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
ssel.min = ssel.mag = 0; ssel.min = ssel.mag = 0;
} }
hash_map<uint32, CComPtr<ID3D10SamplerState> >::iterator i = m_ps_ss.find(ssel); hash_map<uint32, CComPtr<ID3D10SamplerState> >::const_iterator i = m_ps_ss.find(ssel);
if(i != m_ps_ss.end()) if(i != m_ps_ss.end())
{ {
@ -333,15 +325,9 @@ void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
{ {
HRESULT hr; HRESULT hr;
CComPtr<ID3D10DepthStencilState> dss; hash_map<uint32, CComPtr<ID3D10DepthStencilState> >::const_iterator i = m_om_dss.find(dssel);
hash_map<uint32, CComPtr<ID3D10DepthStencilState> >::iterator i = m_om_dss.find(dssel); if(i == m_om_dss.end())
if(i != m_om_dss.end())
{
dss = (*i).second;
}
else
{ {
D3D10_DEPTH_STENCIL_DESC dsd; D3D10_DEPTH_STENCIL_DESC dsd;
@ -377,22 +363,18 @@ void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
dsd.DepthFunc = ztst[dssel.ztst]; dsd.DepthFunc = ztst[dssel.ztst];
} }
CComPtr<ID3D10DepthStencilState> dss;
hr = (*m_dev)->CreateDepthStencilState(&dsd, &dss); hr = (*m_dev)->CreateDepthStencilState(&dsd, &dss);
m_om_dss[dssel] = dss; m_om_dss[dssel] = dss;
i = m_om_dss.find(dssel);
} }
m_dev->OMSetDepthStencilState(dss, 1); m_dev->OMSetDepthStencilState((*i).second, 1);
CComPtr<ID3D10BlendState> bs; hash_map<uint32, CComPtr<ID3D10BlendState> >::const_iterator j = m_om_bs.find(bsel);
hash_map<uint32, CComPtr<ID3D10BlendState> >::iterator j = m_om_bs.find(bsel); if(j == m_om_bs.end())
if(j != m_om_bs.end())
{
bs = (*j).second;
}
else
{ {
D3D10_BLEND_DESC bd; D3D10_BLEND_DESC bd;
@ -520,12 +502,14 @@ void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
if(bsel.wb) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_BLUE; if(bsel.wb) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_BLUE;
if(bsel.wa) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_ALPHA; if(bsel.wa) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_ALPHA;
CComPtr<ID3D10BlendState> bs;
hr = (*m_dev)->CreateBlendState(&bd, &bs); hr = (*m_dev)->CreateBlendState(&bd, &bs);
m_om_bs[bsel] = bs; m_om_bs[bsel] = bs;
j = m_om_bs.find(bsel);
} }
m_dev->OMSetBlendState(bs, bf); m_dev->OMSetBlendState((*j).second, bf);
} }
void GSTextureFX10::Draw() void GSTextureFX10::Draw()

View File

@ -95,15 +95,9 @@ bool GSTextureFX9::SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVET
bool GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb) bool GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
{ {
CComPtr<IDirect3DVertexShader9> vs; hash_map<uint32, CComPtr<IDirect3DVertexShader9> >::const_iterator i = m_vs.find(sel);
hash_map<uint32, CComPtr<IDirect3DVertexShader9> >::iterator i = m_vs.find(sel); if(i == m_vs.end())
if(i != m_vs.end())
{
vs = (*i).second;
}
else
{ {
string str[4]; string str[4];
@ -131,6 +125,7 @@ bool GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
}; };
CComPtr<IDirect3DVertexDeclaration9> il; CComPtr<IDirect3DVertexDeclaration9> il;
CComPtr<IDirect3DVertexShader9> vs;
m_dev->CompileShader(IDR_TFX9_FX, "vs_main", macro, &vs, layout, countof(layout), &il); m_dev->CompileShader(IDR_TFX9_FX, "vs_main", macro, &vs, layout, countof(layout), &il);
@ -140,9 +135,10 @@ bool GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
} }
m_vs[sel] = vs; m_vs[sel] = vs;
i = m_vs.find( sel );
} }
m_dev->VSSetShader(vs, (const float*)cb, sizeof(*cb) / sizeof(GSVector4)); m_dev->VSSetShader( (*i).second, (const float*)cb, sizeof(*cb) / sizeof(GSVector4));
return true; return true;
} }
@ -185,15 +181,9 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
if(sel.wmt == 3) sel.wmt = 0; if(sel.wmt == 3) sel.wmt = 0;
} }
CComPtr<IDirect3DPixelShader9> ps; hash_map<uint32, CComPtr<IDirect3DPixelShader9> >::const_iterator i = m_ps.find(sel);
hash_map<uint32, CComPtr<IDirect3DPixelShader9> >::iterator i = m_ps.find(sel); if(i == m_ps.end())
if(i != m_ps.end())
{
ps = (*i).second;
}
else
{ {
string str[12]; string str[12];
@ -227,12 +217,14 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
{NULL, NULL}, {NULL, NULL},
}; };
CComPtr<IDirect3DPixelShader9> ps;
hr = m_dev->CompileShader(IDR_TFX9_FX, "ps_main", macro, &ps); hr = m_dev->CompileShader(IDR_TFX9_FX, "ps_main", macro, &ps);
m_ps[sel] = ps; m_ps[sel] = ps;
i = m_ps.find(sel);
} }
m_dev->PSSetShader(ps, (const float*)cb, sizeof(*cb) / sizeof(GSVector4)); m_dev->PSSetShader((*i).second, (const float*)cb, sizeof(*cb) / sizeof(GSVector4));
Direct3DSamplerState9* ss = NULL; Direct3DSamplerState9* ss = NULL;
@ -243,7 +235,7 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
ssel.min = ssel.mag = 0; ssel.min = ssel.mag = 0;
} }
hash_map<uint32, Direct3DSamplerState9* >::iterator i = m_ps_ss.find(ssel); hash_map<uint32, Direct3DSamplerState9* >::const_iterator i = m_ps_ss.find(ssel);
if(i != m_ps_ss.end()) if(i != m_ps_ss.end())
{ {
@ -286,13 +278,9 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
{ {
Direct3DDepthStencilState9* dss = NULL; Direct3DDepthStencilState9* dss = NULL;
hash_map<uint32, Direct3DDepthStencilState9*>::iterator i = m_om_dss.find(dssel); hash_map<uint32, Direct3DDepthStencilState9*>::const_iterator i = m_om_dss.find(dssel);
if(i != m_om_dss.end()) if(i == m_om_dss.end())
{
dss = (*i).second;
}
else
{ {
dss = new Direct3DDepthStencilState9(); dss = new Direct3DDepthStencilState9();
@ -325,22 +313,16 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
} }
m_om_dss[dssel] = dss; m_om_dss[dssel] = dss;
i = m_om_dss.find(dssel);
} }
m_dev->OMSetDepthStencilState(dss, 3); m_dev->OMSetDepthStencilState((*i).second, 3);
Direct3DBlendState9* bs = NULL; hash_map<uint32, Direct3DBlendState9*>::const_iterator j = m_om_bs.find(bsel);
hash_map<uint32, Direct3DBlendState9*>::iterator j = m_om_bs.find(bsel); if(j == m_om_bs.end())
if(j != m_om_bs.end())
{ {
bs = (*j).second; Direct3DBlendState9* bs = new Direct3DBlendState9();
}
else
{
bs = new Direct3DBlendState9();
memset(bs, 0, sizeof(*bs)); memset(bs, 0, sizeof(*bs));
bs->BlendEnable = bsel.abe; bs->BlendEnable = bsel.abe;
@ -466,9 +448,10 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
if(bsel.wa) bs->RenderTargetWriteMask |= D3DCOLORWRITEENABLE_ALPHA; if(bsel.wa) bs->RenderTargetWriteMask |= D3DCOLORWRITEENABLE_ALPHA;
m_om_bs[bsel] = bs; m_om_bs[bsel] = bs;
j = m_om_bs.find(bsel);
} }
m_dev->OMSetBlendState(bs, 0x010101 * bf); m_dev->OMSetBlendState((*j).second, 0x010101 * bf);
} }
void GSTextureFX9::Draw() void GSTextureFX9::Draw()