- trying the dx10.1-only "gather" shader instruction for palletized lookups ("8-bit texture" mode), saves 4 instructions which isn't much but still... (not tested, don't have ati)
- may fix the intel gma "no output" bug (don't have gma either :P)
- and the usual small code optimizations


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1549 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-07-22 03:55:28 +00:00
parent 6aeda9884a
commit 83b15dbac0
52 changed files with 904 additions and 1239 deletions

View File

@ -535,11 +535,13 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
fprintf(file, "%6d %6d | ", (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000));
const GSOffset* o = mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
start = clock();
for(int j = 0; j < n; j++)
{
(mem.*rtx)(r, ptr, w * 4, TEX0, TEXA);
(mem.*rtx)(o, r, ptr, w * 4, TEXA);
}
end = clock();
@ -552,7 +554,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
for(int j = 0; j < n; j++)
{
(mem.*rtxP)(r, ptr, w, TEX0, TEXA);
(mem.*rtxP)(o, r, ptr, w, TEXA);
}
end = clock();

View File

@ -815,42 +815,11 @@ REG64_(GIFReg, TEX1)
uint32 _PAD2:9;
uint32 L:2;
uint32 _PAD3:11;
uint32 K:12; // TODO: 1:7:4 (signed? 16x scaled?)
int32 K:12; // 1:7:4
uint32 _PAD4:20;
REG_END2
bool IsMinLinear() const {return (MMIN == 1) || (MMIN & 4);}
bool IsMagLinear() const {return MMAG;}
bool IsLinear() const
{
bool mmin = IsMinLinear();
bool mmag = IsMagLinear();
return !LCM ? mmag || mmin : K <= 0 ? mmag : mmin;
}
bool IsLinear(float qmin, float qmax) const
{
bool mmin = IsMinLinear();
bool mmag = IsMagLinear();
if(mmag == mmin) return mmag;
float LODmin = K;
float LODmax = K;
if(!LCM)
{
float f = (float)(1 << L) / log(2.0f);
LODmin += log(1.0f / abs(qmax)) * f;
LODmax += log(1.0f / abs(qmin)) * f;
}
return LODmax <= 0 ? mmag : LODmin > 0 ? mmin : mmag || mmin;
}
REG_END2
REG64_(GIFReg, TEX2)

View File

@ -168,16 +168,16 @@ void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& T
template<int n> void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSLocalMemory::PixelOffset* po = m_mem->GetPixelOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
GSOffset* o = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
uint32* RESTRICT s = &m_mem->m_vm32[po->row[TEXCLUT.COV]];
int* RESTRICT o = &po->col[0][TEXCLUT.COU << 4];
uint32* RESTRICT s = &m_mem->m_vm32[o->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &o->pixel.col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for(int i = 0; i < n; i++)
{
uint32 c = s[o[i]];
uint32 c = s[col[i]];
clut[i] = (uint16)(c & 0xffff);
clut[i + 256] = (uint16)(c >> 16);
@ -186,31 +186,31 @@ template<int n> void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFR
template<int n> void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSLocalMemory::PixelOffset* po = m_mem->GetPixelOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
GSOffset* o = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
uint16* RESTRICT s = &m_mem->m_vm16[po->row[TEXCLUT.COV]];
int* RESTRICT o = &po->col[0][TEXCLUT.COU << 4];
uint16* RESTRICT s = &m_mem->m_vm16[o->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &o->pixel.col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for(int i = 0; i < n; i++)
{
clut[i] = s[o[i]];
clut[i] = s[col[i]];
}
}
template<int n> void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
GSLocalMemory::PixelOffset* po = m_mem->GetPixelOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
GSOffset* o = m_mem->GetOffset(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
uint16* RESTRICT s = &m_mem->m_vm16[po->row[TEXCLUT.COV]];
int* RESTRICT o = &po->col[0][TEXCLUT.COU << 4];
uint16* RESTRICT s = &m_mem->m_vm16[o->pixel.row[TEXCLUT.COV]];
int* RESTRICT col = &o->pixel.col[0][TEXCLUT.COU << 4];
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
for(int i = 0; i < n; i++)
{
clut[i] = s[o[i]];
clut[i] = s[col[i]];
}
}

View File

@ -251,3 +251,46 @@ bool GSDevice::ResizeTexture(GSTexture** t, int w, int h)
return t2 != NULL;
}
bool GSDevice::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode)
{
m_shader.level = level;
switch(level)
{
case D3D_FEATURE_LEVEL_9_1:
case D3D_FEATURE_LEVEL_9_2:
m_shader.model = "0x200";
m_shader.vs = compat_mode ? "vs_4_0_level_9_1" : "vs_2_0";
m_shader.ps = compat_mode ? "ps_4_0_level_9_1" : "ps_2_0";
break;
case D3D_FEATURE_LEVEL_9_3:
m_shader.model = "0x300";
m_shader.vs = compat_mode ? "vs_4_0_level_9_3" : "vs_3_0";
m_shader.ps = compat_mode ? "ps_4_0_level_9_3" : "ps_3_0";
break;
case D3D_FEATURE_LEVEL_10_0:
m_shader.model = "0x400";
m_shader.vs = "vs_4_0";
m_shader.gs = "gs_4_0";
m_shader.ps = "ps_4_0";
break;
case D3D_FEATURE_LEVEL_10_1:
m_shader.model = "0x401";
m_shader.vs = "vs_4_1";
m_shader.gs = "gs_4_1";
m_shader.ps = "ps_4_1";
break;
case D3D_FEATURE_LEVEL_11_0:
m_shader.model = "0x500";
m_shader.vs = "vs_5_0";
m_shader.gs = "gs_5_0";
m_shader.ps = "ps_5_0";
break;
default:
ASSERT(0);
return false;
}
return true;
}

View File

@ -62,6 +62,7 @@ protected:
GSTexture* m_blend;
GSTexture* m_1x1;
GSTexture* m_current;
struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps;} m_shader;
virtual GSTexture* Create(int type, int w, int h, int format) = 0;
@ -103,6 +104,9 @@ public:
virtual void StretchRect(GSTexture* st, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true);
virtual void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, int shader = 0, bool linear = true) {}
virtual void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) {}
virtual void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) {}
GSTexture* GetCurrent();
void Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVector2i& fs, bool slbg, bool mmod, const GSVector4& c);
@ -112,7 +116,7 @@ public:
bool IsRBSwapped() {return m_rbswapped;}
template<class T> void PrepareShaderMacro(vector<T>& dst, const T* src, const char* model)
template<class T> void PrepareShaderMacro(vector<T>& dst, const T* src)
{
dst.clear();
@ -124,7 +128,7 @@ public:
T m;
m.Name = "SHADER_MODEL";
m.Definition = model;
m.Definition = m_shader.model.c_str();
dst.push_back(m);
@ -133,4 +137,6 @@ public:
dst.push_back(m);
}
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode); // TODO: GSDeviceDX
};

View File

@ -92,7 +92,28 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync)
flags |= D3D10_CREATE_DEVICE_DEBUG;
#endif
hr = D3D10CreateDeviceAndSwapChain(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, flags, D3D10_SDK_VERSION, &scd, &m_swapchain, &m_dev);
D3D10_FEATURE_LEVEL1 levels[] =
{
D3D10_FEATURE_LEVEL_10_1,
D3D10_FEATURE_LEVEL_10_0
};
for(int i = 0; i < countof(levels); i++)
{
hr = D3D10CreateDeviceAndSwapChain1(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, flags, levels[i], D3D10_1_SDK_VERSION, &scd, &m_swapchain, &m_dev);
if(SUCCEEDED(hr))
{
if(!SetFeatureLevel((D3D_FEATURE_LEVEL)levels[i], true))
{
return false;
}
break;
}
}
// hr = D3D10CreateDeviceAndSwapChain(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, flags, D3D10_SDK_VERSION, &scd, &m_swapchain, &m_dev);
if(FAILED(hr)) return false;
@ -219,14 +240,17 @@ bool GSDevice10::Reset(int w, int h, int mode)
if(!__super::Reset(w, h, mode))
return false;
DXGI_SWAP_CHAIN_DESC scd;
memset(&scd, 0, sizeof(scd));
m_swapchain->GetDesc(&scd);
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
CComPtr<ID3D10Texture2D> backbuffer;
m_swapchain->GetBuffer(0, __uuidof(ID3D10Texture2D), (void**)&backbuffer);
m_backbuffer = new GSTexture10(backbuffer);
if(m_swapchain)
{
DXGI_SWAP_CHAIN_DESC scd;
memset(&scd, 0, sizeof(scd));
m_swapchain->GetDesc(&scd);
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
CComPtr<ID3D10Texture2D> backbuffer;
m_swapchain->GetBuffer(0, __uuidof(ID3D10Texture2D), (void**)&backbuffer);
m_backbuffer = new GSTexture10(backbuffer);
}
return true;
}
@ -454,10 +478,6 @@ void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL);
PSSetShaderResources(st, NULL);
// rs
RSSet(ds);
//
DrawPrimitive();
@ -560,12 +580,12 @@ void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride)
{
if(m_vb != vb || m_vb_stride != stride)
{
m_vb = vb;
m_vb_stride = stride;
uint32 offset = 0;
m_dev->IASetVertexBuffers(0, 1, &vb, &stride, &offset);
m_vb = vb;
m_vb_stride = stride;
}
}
@ -573,9 +593,9 @@ void GSDevice10::IASetInputLayout(ID3D10InputLayout* layout)
{
if(m_layout != layout)
{
m_dev->IASetInputLayout(layout);
m_layout = layout;
m_dev->IASetInputLayout(layout);
}
}
@ -583,9 +603,9 @@ void GSDevice10::IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology)
{
if(m_topology != topology)
{
m_dev->IASetPrimitiveTopology(topology);
m_topology = topology;
m_dev->IASetPrimitiveTopology(topology);
}
}
@ -593,16 +613,16 @@ void GSDevice10::VSSetShader(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb)
{
if(m_vs != vs)
{
m_dev->VSSetShader(vs);
m_vs = vs;
m_dev->VSSetShader(vs);
}
if(m_vs_cb != vs_cb)
{
m_dev->VSSetConstantBuffers(0, 1, &vs_cb);
m_vs_cb = vs_cb;
m_dev->VSSetConstantBuffers(0, 1, &vs_cb);
}
}
@ -610,9 +630,9 @@ void GSDevice10::GSSetShader(ID3D10GeometryShader* gs)
{
if(m_gs != gs)
{
m_dev->GSSetShader(gs);
m_gs = gs;
m_dev->GSSetShader(gs);
}
}
@ -626,12 +646,12 @@ void GSDevice10::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
if(m_ps_srv[0] != srv0 || m_ps_srv[1] != srv1)
{
m_ps_srv[0] = srv0;
m_ps_srv[1] = srv1;
ID3D10ShaderResourceView* srvs[] = {srv0, srv1};
m_dev->PSSetShaderResources(0, 2, srvs);
m_ps_srv[0] = srv0;
m_ps_srv[1] = srv1;
}
}
@ -639,16 +659,16 @@ void GSDevice10::PSSetShader(ID3D10PixelShader* ps, ID3D10Buffer* ps_cb)
{
if(m_ps != ps)
{
m_dev->PSSetShader(ps);
m_ps = ps;
m_dev->PSSetShader(ps);
}
if(m_ps_cb != ps_cb)
{
m_dev->PSSetConstantBuffers(0, 1, &ps_cb);
m_ps_cb = ps_cb;
m_dev->PSSetConstantBuffers(0, 1, &ps_cb);
}
}
@ -656,42 +676,12 @@ void GSDevice10::PSSetSamplerState(ID3D10SamplerState* ss0, ID3D10SamplerState*
{
if(m_ps_ss[0] != ss0 || m_ps_ss[1] != ss1)
{
m_ps_ss[0] = ss0;
m_ps_ss[1] = ss1;
ID3D10SamplerState* sss[] = {ss0, ss1};
m_dev->PSSetSamplers(0, 2, sss);
m_ps_ss[0] = ss0;
m_ps_ss[1] = ss1;
}
}
void GSDevice10::RSSet(const GSVector2i& size, const GSVector4i* scissor)
{
if(m_viewport != size)
{
D3D10_VIEWPORT vp;
memset(&vp, 0, sizeof(vp));
vp.TopLeftX = 0;
vp.TopLeftY = 0;
vp.Width = size.x;
vp.Height = size.y;
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
m_dev->RSSetViewports(1, &vp);
m_viewport = size;
}
GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy();
if(!m_scissor.eq(r))
{
m_dev->RSSetScissorRects(1, r);
m_scissor = r;
}
}
@ -699,10 +689,10 @@ void GSDevice10::OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref
{
if(m_dss != dss || m_sref != sref)
{
m_dev->OMSetDepthStencilState(dss, sref);
m_dss = dss;
m_sref = sref;
m_dev->OMSetDepthStencilState(dss, sref);
}
}
@ -710,16 +700,16 @@ void GSDevice10::OMSetBlendState(ID3D10BlendState* bs, float bf)
{
if(m_bs != bs || m_bf != bf)
{
m_bs = bs;
m_bf = bf;
float BlendFactor[] = {bf, bf, bf, 0};
m_dev->OMSetBlendState(bs, BlendFactor, 0xffffffff);
m_bs = bs;
m_bf = bf;
}
}
void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor)
{
ID3D10RenderTargetView* rtv = NULL;
ID3D10DepthStencilView* dsv = NULL;
@ -729,10 +719,37 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
if(m_rtv != rtv || m_dsv != dsv)
{
m_dev->OMSetRenderTargets(1, &rtv, dsv);
m_rtv = rtv;
m_dsv = dsv;
m_dev->OMSetRenderTargets(1, &rtv, dsv);
}
if(m_viewport != rt->m_size)
{
m_viewport = rt->m_size;
D3D10_VIEWPORT vp;
memset(&vp, 0, sizeof(vp));
vp.TopLeftX = 0;
vp.TopLeftY = 0;
vp.Width = rt->m_size.x;
vp.Height = rt->m_size.y;
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
m_dev->RSSetViewports(1, &vp);
}
GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy();
if(!m_scissor.eq(r))
{
m_scissor = r;
m_dev->RSSetScissorRects(1, r);
}
}
@ -742,11 +759,11 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M
vector<D3D10_SHADER_MACRO> m;
PrepareShaderMacro(m, macro, "0x400");
PrepareShaderMacro(m, macro);
CComPtr<ID3D10Blob> shader, error;
hr = D3DX10CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), "vs_4_0", 0, 0, NULL, &shader, &error, NULL);
hr = D3DX10CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.vs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
@ -781,11 +798,11 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M
vector<D3D10_SHADER_MACRO> m;
PrepareShaderMacro(m, macro, "0x400");
PrepareShaderMacro(m, macro);
CComPtr<ID3D10Blob> shader, error;
hr = D3DX10CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), "gs_4_0", 0, 0, NULL, &shader, &error, NULL);
hr = D3DX10CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{
@ -813,11 +830,11 @@ HRESULT GSDevice10::CompileShader(uint32 id, const string& entry, D3D10_SHADER_M
vector<D3D10_SHADER_MACRO> m;
PrepareShaderMacro(m, macro, "0x400");
PrepareShaderMacro(m, macro);
CComPtr<ID3D10Blob> shader, error;
hr = D3DX10CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), "ps_4_0", 0, 0, NULL, &shader, &error, NULL);
hr = D3DX10CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
if(error)
{

View File

@ -55,7 +55,7 @@ class GSDevice10 : public GSDevice
//
CComPtr<ID3D10Device> m_dev;
CComPtr<ID3D10Device1> m_dev;
CComPtr<IDXGISwapChain> m_swapchain;
struct
@ -130,10 +130,9 @@ public:
void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1);
void PSSetShader(ID3D10PixelShader* ps, ID3D10Buffer* ps_cb);
void PSSetSamplerState(ID3D10SamplerState* ss0, ID3D10SamplerState* ss1);
void RSSet(const GSVector2i& size, const GSVector4i* scissor = NULL);
void OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref);
void OMSetBlendState(ID3D10BlendState* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
ID3D10Device* operator->() {return m_dev;}
operator ID3D10Device*() {return m_dev;}

View File

@ -92,48 +92,29 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync)
flags |= D3D11_CREATE_DEVICE_DEBUG;
#endif
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &m_level, &m_ctx);
// hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_REFERENCE, NULL, flags, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &m_level, &m_ctx);
D3D_FEATURE_LEVEL levels[] =
{
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
};
D3D_FEATURE_LEVEL level;
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, levels, countof(levels), D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &level, &m_ctx);
// hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_REFERENCE, NULL, flags, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &level, &m_ctx);
if(FAILED(hr)) return false;
if(!SetFeatureLevel(level, true))
{
return false;
}
D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS options;
hr = m_dev->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &options, sizeof(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS));
switch(m_level)
{
case D3D_FEATURE_LEVEL_9_1:
case D3D_FEATURE_LEVEL_9_2:
m_shader.model = "0x200";
m_shader.vs = "vs_4_0_level_9_1";
m_shader.ps = "ps_4_0_level_9_1";
break;
case D3D_FEATURE_LEVEL_9_3:
m_shader.model = "0x300";
m_shader.vs = "vs_4_0_level_9_3";
m_shader.ps = "ps_4_0_level_9_3";
break;
case D3D_FEATURE_LEVEL_10_0:
case D3D_FEATURE_LEVEL_10_1:
m_shader.model = "0x400";
m_shader.vs = "vs_4_0";
m_shader.gs = "gs_4_0";
m_shader.ps = "ps_4_0";
break;
case D3D_FEATURE_LEVEL_11_0:
m_shader.model = "0x500";
m_shader.vs = "vs_5_0";
m_shader.gs = "gs_5_0";
m_shader.ps = "ps_5_0";
break;
}
if(m_level < D3D_FEATURE_LEVEL_10_0)
{
return false;
}
// convert
D3D11_INPUT_ELEMENT_DESC il_convert[] =
@ -256,14 +237,17 @@ bool GSDevice11::Reset(int w, int h, int mode)
if(!__super::Reset(w, h, mode))
return false;
DXGI_SWAP_CHAIN_DESC scd;
memset(&scd, 0, sizeof(scd));
m_swapchain->GetDesc(&scd);
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
CComPtr<ID3D11Texture2D> backbuffer;
m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer);
m_backbuffer = new GSTexture11(backbuffer);
if(m_swapchain)
{
DXGI_SWAP_CHAIN_DESC scd;
memset(&scd, 0, sizeof(scd));
m_swapchain->GetDesc(&scd);
m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
CComPtr<ID3D11Texture2D> backbuffer;
m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer);
m_backbuffer = new GSTexture11(backbuffer);
}
return true;
}
@ -491,10 +475,6 @@ void GSDevice11::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt,
PSSetSamplerState(linear ? m_convert.ln : m_convert.pt, NULL);
PSSetShaderResources(st, NULL);
// rs
RSSet(ds);
//
DrawPrimitive();
@ -597,12 +577,12 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
{
if(m_vb != vb || m_vb_stride != stride)
{
m_vb = vb;
m_vb_stride = stride;
uint32 offset = 0;
m_ctx->IASetVertexBuffers(0, 1, &vb, &stride, &offset);
m_vb = vb;
m_vb_stride = stride;
}
}
@ -610,9 +590,9 @@ void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
{
if(m_layout != layout)
{
m_ctx->IASetInputLayout(layout);
m_layout = layout;
m_ctx->IASetInputLayout(layout);
}
}
@ -620,9 +600,9 @@ void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology)
{
if(m_topology != topology)
{
m_ctx->IASetPrimitiveTopology(topology);
m_topology = topology;
m_ctx->IASetPrimitiveTopology(topology);
}
}
@ -630,16 +610,16 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb)
{
if(m_vs != vs)
{
m_ctx->VSSetShader(vs, NULL, 0);
m_vs = vs;
m_ctx->VSSetShader(vs, NULL, 0);
}
if(m_vs_cb != vs_cb)
{
m_ctx->VSSetConstantBuffers(0, 1, &vs_cb);
m_vs_cb = vs_cb;
m_ctx->VSSetConstantBuffers(0, 1, &vs_cb);
}
}
@ -663,12 +643,12 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
if(m_ps_srv[0] != srv0 || m_ps_srv[1] != srv1)
{
m_ps_srv[0] = srv0;
m_ps_srv[1] = srv1;
ID3D11ShaderResourceView* srvs[] = {srv0, srv1};
m_ctx->PSSetShaderResources(0, 2, srvs);
m_ps_srv[0] = srv0;
m_ps_srv[1] = srv1;
}
}
@ -676,16 +656,16 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
{
if(m_ps != ps)
{
m_ctx->PSSetShader(ps, NULL, 0);
m_ps = ps;
m_ctx->PSSetShader(ps, NULL, 0);
}
if(m_ps_cb != ps_cb)
{
m_ctx->PSSetConstantBuffers(0, 1, &ps_cb);
m_ps_cb = ps_cb;
m_ctx->PSSetConstantBuffers(0, 1, &ps_cb);
}
}
@ -693,42 +673,12 @@ void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState*
{
if(m_ps_ss[0] != ss0 || m_ps_ss[1] != ss1)
{
m_ps_ss[0] = ss0;
m_ps_ss[1] = ss1;
ID3D11SamplerState* sss[] = {ss0, ss1};
m_ctx->PSSetSamplers(0, 2, sss);
m_ps_ss[0] = ss0;
m_ps_ss[1] = ss1;
}
}
void GSDevice11::RSSet(const GSVector2i& size, const GSVector4i* scissor)
{
if(m_viewport != size)
{
D3D11_VIEWPORT vp;
memset(&vp, 0, sizeof(vp));
vp.TopLeftX = 0;
vp.TopLeftY = 0;
vp.Width = size.x;
vp.Height = size.y;
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
m_ctx->RSSetViewports(1, &vp);
m_viewport = size;
}
GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy();
if(!m_scissor.eq(r))
{
m_ctx->RSSetScissorRects(1, r);
m_scissor = r;
}
}
@ -756,7 +706,7 @@ void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, float bf)
}
}
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor)
{
ID3D11RenderTargetView* rtv = NULL;
ID3D11DepthStencilView* dsv = NULL;
@ -766,10 +716,37 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
if(m_rtv != rtv || m_dsv != dsv)
{
m_ctx->OMSetRenderTargets(1, &rtv, dsv);
m_rtv = rtv;
m_dsv = dsv;
m_ctx->OMSetRenderTargets(1, &rtv, dsv);
}
if(m_viewport != rt->m_size)
{
m_viewport = rt->m_size;
D3D11_VIEWPORT vp;
memset(&vp, 0, sizeof(vp));
vp.TopLeftX = 0;
vp.TopLeftY = 0;
vp.Width = rt->m_size.x;
vp.Height = rt->m_size.y;
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
m_ctx->RSSetViewports(1, &vp);
}
GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy();
if(!m_scissor.eq(r))
{
m_scissor = r;
m_ctx->RSSetScissorRects(1, r);
}
}
@ -779,7 +756,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
vector<D3D11_SHADER_MACRO> m;
PrepareShaderMacro(m, macro, m_shader.model.c_str());
PrepareShaderMacro(m, macro);
CComPtr<ID3D11Blob> shader, error;
@ -818,7 +795,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
vector<D3D11_SHADER_MACRO> m;
PrepareShaderMacro(m, macro, m_shader.model.c_str());
PrepareShaderMacro(m, macro);
CComPtr<ID3D11Blob> shader, error;
@ -850,7 +827,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
vector<D3D11_SHADER_MACRO> m;
PrepareShaderMacro(m, macro, m_shader.model.c_str());
PrepareShaderMacro(m, macro);
CComPtr<ID3D10Blob> shader, error;

View File

@ -55,11 +55,9 @@ class GSDevice11 : public GSDevice
//
D3D_FEATURE_LEVEL m_level;
CComPtr<ID3D11Device> m_dev;
CComPtr<ID3D11DeviceContext> m_ctx;
CComPtr<IDXGISwapChain> m_swapchain;
struct {string model, vs, gs, ps;} m_shader;
struct
{
@ -133,10 +131,9 @@ public:
void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1);
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1);
void RSSet(const GSVector2i& size, const GSVector4i* scissor = NULL);
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
void OMSetBlendState(ID3D11BlendState* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
ID3D11Device* operator->() {return m_dev;}
operator ID3D11Device*() {return m_dev;}

View File

@ -108,13 +108,26 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync)
m_d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &m_d3dcaps);
if(!Reset(1, 1, theApp.GetConfig("windowed", 1) ? Windowed : Fullscreen)) return false;
//
m_dev->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
if(m_d3dcaps.VertexShaderVersion < (m_d3dcaps.PixelShaderVersion & ~0x10000))
{
ASSERT(0);
// shaders
return false;
}
if(m_d3dcaps.PixelShaderVersion < D3DPS_VERSION(2, 0))
m_d3dcaps.VertexShaderVersion = m_d3dcaps.PixelShaderVersion & ~0x10000;
if(m_d3dcaps.PixelShaderVersion >= D3DPS_VERSION(3, 0))
{
SetFeatureLevel(D3D_FEATURE_LEVEL_9_3, false);
}
else if(m_d3dcaps.PixelShaderVersion >= D3DPS_VERSION(2, 0))
{
SetFeatureLevel(D3D_FEATURE_LEVEL_9_2, false);
}
else
{
string s = format(
"Supported pixel shader version is too low!\n\nSupported: %d.%d\nNeeded: 2.0 or higher",
@ -125,6 +138,15 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync)
return false;
}
//
if(!Reset(1, 1, theApp.GetConfig("windowed", 1) ? Windowed : Fullscreen))
{
return false;
}
m_dev->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
// convert
static const D3DVERTEXELEMENT9 il_convert[] =
@ -635,10 +657,6 @@ void GSDevice9::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, c
PSSetSamplerState(linear ? &m_convert.ln : &m_convert.pt);
PSSetShaderResources(st, NULL);
// rs
RSSet(ds);
//
DrawPrimitive();
@ -732,10 +750,10 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
{
if(m_vb != vb || m_vb_stride != stride)
{
m_dev->SetStreamSource(0, vb, 0, stride);
m_vb = vb;
m_vb_stride = stride;
m_dev->SetStreamSource(0, vb, 0, stride);
}
}
@ -743,9 +761,9 @@ void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout)
{
if(m_layout != layout)
{
m_dev->SetVertexDeclaration(layout);
m_layout = layout;
m_dev->SetVertexDeclaration(layout);
}
}
@ -758,9 +776,9 @@ void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int
{
if(m_vs != vs)
{
m_dev->SetVertexShader(vs);
m_vs = vs;
m_dev->SetVertexShader(vs);
}
if(vs_cb && vs_cb_len > 0)
@ -776,11 +794,11 @@ void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int
m_vs_cb = (float*)_aligned_malloc(size, 16);
}
m_vs_cb_len = vs_cb_len;
memcpy(m_vs_cb, vs_cb, size);
m_dev->SetVertexShaderConstantF(0, vs_cb, vs_cb_len);
m_vs_cb_len = vs_cb_len;
}
}
}
@ -795,16 +813,16 @@ void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
if(m_ps_srvs[0] != srv0)
{
m_dev->SetTexture(0, srv0);
m_ps_srvs[0] = srv0;
m_dev->SetTexture(0, srv0);
}
if(m_ps_srvs[1] != srv1)
{
m_dev->SetTexture(1, srv1);
m_ps_srvs[1] = srv1;
m_dev->SetTexture(1, srv1);
}
}
@ -812,9 +830,9 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p
{
if(m_ps != ps)
{
m_dev->SetPixelShader(ps);
m_ps = ps;
m_dev->SetPixelShader(ps);
}
if(ps_cb && ps_cb_len > 0)
@ -830,11 +848,11 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p
m_ps_cb = (float*)_aligned_malloc(size, 16);
}
m_ps_cb_len = ps_cb_len;
memcpy(m_ps_cb, ps_cb, size);
m_dev->SetPixelShaderConstantF(0, ps_cb, ps_cb_len);
m_ps_cb_len = ps_cb_len;
}
}
}
@ -843,6 +861,7 @@ void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss)
{
if(ss && m_ps_ss != ss)
{
m_ps_ss = ss;
m_dev->SetSamplerState(0, D3DSAMP_ADDRESSU, ss->AddressU);
m_dev->SetSamplerState(0, D3DSAMP_ADDRESSV, ss->AddressV);
@ -860,20 +879,6 @@ void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss)
m_dev->SetSamplerState(2, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
m_dev->SetSamplerState(3, D3DSAMP_MINFILTER, D3DTEXF_POINT);
m_dev->SetSamplerState(3, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
m_ps_ss = ss;
}
}
void GSDevice9::RSSet(const GSVector2i& size, const GSVector4i* scissor)
{
GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy();
if(!m_scissor.eq(r))
{
m_dev->SetScissorRect(r);
m_scissor = r;
}
}
@ -881,6 +886,8 @@ void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss)
{
if(m_dss != dss)
{
m_dss = dss;
m_dev->SetRenderState(D3DRS_ZENABLE, dss->DepthEnable);
m_dev->SetRenderState(D3DRS_ZWRITEENABLE, dss->DepthWriteMask);
@ -901,8 +908,6 @@ void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss)
m_dev->SetRenderState(D3DRS_STENCILZFAIL, dss->StencilDepthFailOp);
m_dev->SetRenderState(D3DRS_STENCILREF, dss->StencilRef);
}
m_dss = dss;
}
}
@ -910,6 +915,9 @@ void GSDevice9::OMSetBlendState(Direct3DBlendState9* bs, uint32 bf)
{
if(m_bs != bs || m_bf != bf)
{
m_bs = bs;
m_bf = bf;
m_dev->SetRenderState(D3DRS_ALPHABLENDENABLE, bs->BlendEnable);
if(bs->BlendEnable)
@ -925,13 +933,10 @@ void GSDevice9::OMSetBlendState(Direct3DBlendState9* bs, uint32 bf)
}
m_dev->SetRenderState(D3DRS_COLORWRITEENABLE, bs->RenderTargetWriteMask);
m_bs = bs;
m_bf = bf;
}
}
void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor)
{
IDirect3DSurface9* rtv = NULL;
IDirect3DSurface9* dsv = NULL;
@ -941,16 +946,25 @@ void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
if(m_rtv != rtv)
{
m_dev->SetRenderTarget(0, rtv);
m_rtv = rtv;
m_dev->SetRenderTarget(0, rtv);
}
if(m_dsv != dsv)
{
m_dev->SetDepthStencilSurface(dsv);
m_dsv = dsv;
m_dev->SetDepthStencilSurface(dsv);
}
GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy();
if(!m_scissor.eq(r))
{
m_scissor = r;
m_dev->SetScissorRect(r);
}
}
@ -979,27 +993,9 @@ static HRESULT LoadShader(uint32 id, LPCSTR& data, uint32& size)
HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il)
{
const char* target;
const char* model;
if(m_d3dcaps.VertexShaderVersion >= D3DVS_VERSION(3, 0))
{
target = "vs_3_0";
model = "0x300";
}
else if(m_d3dcaps.VertexShaderVersion >= D3DVS_VERSION(2, 0))
{
target = "vs_2_0";
model = "0x200";
}
else
{
return E_FAIL;
}
vector<D3DXMACRO> m;
PrepareShaderMacro(m, macro, model);
PrepareShaderMacro(m, macro);
HRESULT hr;
@ -1014,7 +1010,7 @@ HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO
if(FAILED(hr)) return E_FAIL;
hr = D3DXCompileShader(data, size, &m[0], NULL, entry.c_str(), target, 0, &shader, &error, NULL);
hr = D3DXCompileShader(data, size, &m[0], NULL, entry.c_str(), m_shader.vs.c_str(), 0, &shader, &error, NULL);
if(SUCCEEDED(hr))
{
@ -1044,29 +1040,16 @@ HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO
HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps)
{
const char* target = NULL;
const char* model;
uint32 flags = 0;
if(m_d3dcaps.PixelShaderVersion >= D3DPS_VERSION(3, 0))
if(m_shader.level >= D3D_FEATURE_LEVEL_9_3)
{
target = "ps_3_0";
model = "0x300";
flags |= D3DXSHADER_AVOID_FLOW_CONTROL;
}
else if(m_d3dcaps.PixelShaderVersion >= D3DPS_VERSION(2, 0))
{
target = "ps_2_0";
model = "0x200";
}
else
{
return false;
}
vector<D3DXMACRO> m;
PrepareShaderMacro(m, macro, model);
PrepareShaderMacro(m, macro);
HRESULT hr;
@ -1081,7 +1064,7 @@ HRESULT GSDevice9::CompileShader(uint32 id, const string& entry, const D3DXMACRO
if(FAILED(hr)) return E_FAIL;
hr = D3DXCompileShader(data, size, &m[0], NULL, entry.c_str(), target, 0, &shader, &error, NULL);
hr = D3DXCompileShader(data, size, &m[0], NULL, entry.c_str(), m_shader.ps.c_str(), 0, &shader, &error, NULL);
if(SUCCEEDED(hr))
{

View File

@ -167,10 +167,9 @@ public:
void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1);
void PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len);
void PSSetSamplerState(Direct3DSamplerState9* ss);
void RSSet(const GSVector2i& size, const GSVector4i* scissor = NULL);
void OMSetDepthStencilState(Direct3DDepthStencilState9* dss);
void OMSetBlendState(Direct3DBlendState9* bs, uint32 bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
IDirect3DDevice9* operator->() {return m_dev;}
operator IDirect3DDevice9*() {return m_dev;}

View File

@ -406,10 +406,17 @@ void GSDeviceOGL::IASetPrimitiveTopology(int topology)
m_topology = topology;
}
void GSDeviceOGL::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
{
// TODO
}
void GSDeviceOGL::PSSetSamplerState(SamplerStateOGL* ss)
{
if(ss && m_ps_ss != ss)
{
m_ps_ss = ss;
glActiveTexture(GL_TEXTURE0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, ss->wrap.s);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, ss->wrap.t);
@ -433,27 +440,6 @@ void GSDeviceOGL::PSSetSamplerState(SamplerStateOGL* ss)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_POINT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_POINT);
m_ps_ss = ss;
}
}
void GSDeviceOGL::RSSet(const GSVector2i& size, const GSVector4i* scissor)
{
if(m_viewport != size)
{
glViewport(0, 0, size.x, size.y); CheckError();
m_viewport = size;
}
GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy();
if(!m_scissor.eq(r))
{
glScissor(r.left, r.top, r.width(), r.height()); CheckError();
m_scissor = r;
}
}
@ -461,6 +447,8 @@ void GSDeviceOGL::OMSetDepthStencilState(DepthStencilStateOGL* dss)
{
if(m_dss != dss)
{
m_dss = dss;
if(dss->depth.enable)
{
glEnable(GL_DEPTH_TEST); CheckError();
@ -483,8 +471,6 @@ void GSDeviceOGL::OMSetDepthStencilState(DepthStencilStateOGL* dss)
{
glDisable(GL_STENCIL_TEST); CheckError();
}
m_dss = dss;
}
}
@ -492,6 +478,9 @@ void GSDeviceOGL::OMSetBlendState(BlendStateOGL* bs, float bf)
{
if(m_bs != bs || m_bf != bf)
{
m_bs = bs;
m_bf = bf;
if(bs->enable)
{
glEnable(GL_BLEND); CheckError();
@ -505,13 +494,10 @@ void GSDeviceOGL::OMSetBlendState(BlendStateOGL* bs, float bf)
}
glColorMask(bs->mask.r, bs->mask.g, bs->mask.b, bs->mask.a); CheckError();
m_bs = bs;
m_bf = bf;
}
}
void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor)
{
GLuint rti = 0;
GLuint dsi = 0;
@ -521,15 +507,31 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds)
if(m_rt != rti)
{
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, rti); CheckError();
m_rt = rti;
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, rti); CheckError();
}
if(m_ds != dsi)
{
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_DEPTH_COMPONENT, dsi); CheckError();
m_ds = dsi;
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_DEPTH_COMPONENT, dsi); CheckError();
}
if(m_viewport != rt->m_size)
{
m_viewport = rt->m_size;
glViewport(0, 0, rt->m_size.x, rt->m_size.y); CheckError();
}
GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy();
if(!m_scissor.eq(r))
{
m_scissor = r;
glScissor(r.left, r.top, r.width(), r.height()); CheckError();
}
}

View File

@ -138,11 +138,11 @@ public:
void IASetInputLayout(); // TODO
void IASetPrimitiveTopology(int topology);
void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1);
void PSSetSamplerState(SamplerStateOGL* ss);
void RSSet(const GSVector2i& size, const GSVector4i* scissor);
void OMSetDepthStencilState(DepthStencilStateOGL* dss);
void OMSetBlendState(BlendStateOGL* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
static void CheckError()
{

View File

@ -46,10 +46,10 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_sel = p->sel;
m_env.vm = p->vm;
m_env.fbr = p->fbo->row;
m_env.zbr = p->zbo->row;
m_env.fbc = p->fbo->col[0];
m_env.zbc = p->zbo->col[0];
m_env.fbr = p->fbo->pixel.row;
m_env.zbr = p->zbo->pixel.row;
m_env.fbc = p->fbo->pixel.col[0];
m_env.zbc = p->zbo->pixel.col[0];
m_env.fzbr = p->fzbo->row;
m_env.fzbc = p->fzbo->col;
m_env.fm = GSVector4i(p->fm);

View File

@ -51,8 +51,18 @@ public:
GSVector4 ex;
} scissor;
struct
{
GSOffset* fb;
GSOffset* zb;
GSOffset* tex;
GSPixelOffset4* fzb;
} offset;
GSDrawingContext()
{
memset(&offset, 0, sizeof(offset));
Reset();
}

View File

@ -30,18 +30,17 @@
#define ASSERT_BLOCK(r, w, h) \
ASSERT((r).width() >= w && (r).height() >= h && !((r).left & (w - 1)) && !((r).top & (h - 1)) && !((r).right & (w - 1)) && !((r).bottom & (h - 1))); \
#define FOREACH_BLOCK_START(r, w, h, bpp, psm) \
#define FOREACH_BLOCK_START(r, w, h, bpp) \
ASSERT_BLOCK(r, w, h); \
const GSLocalMemory::BlockOffset* RESTRICT _bo = GetBlockOffset(TEX0.TBP0, TEX0.TBW, psm); \
GSVector4i _r = r >> 3; \
uint8* _dst = dst - _r.left * bpp; \
int _offset = dstpitch * h; \
for(int y = _r.top; y < _r.bottom; y += h >> 3, _dst += _offset) \
{ \
uint32 _base = _bo->row[y]; \
uint32 _base = o->block.row[y]; \
for(int x = _r.left; x < _r.right; x += w >> 3) \
{ \
const uint8* src = BlockPtr(_base + _bo->col[x]); \
const uint8* src = BlockPtr(_base + o->block.col[x]); \
uint8* dst = &_dst[x * bpp]; \
#define FOREACH_BLOCK_END }}
@ -206,13 +205,11 @@ GSLocalMemory::GSLocalMemory()
m_psm[i].wp = &GSLocalMemory::WritePixel32;
m_psm[i].wpa = &GSLocalMemory::WritePixel32;
m_psm[i].rt = &GSLocalMemory::ReadTexel32;
m_psm[i].rtNP = &GSLocalMemory::ReadTexel32;
m_psm[i].rta = &GSLocalMemory::ReadTexel32;
m_psm[i].wfa = &GSLocalMemory::WritePixel32;
m_psm[i].wi = &GSLocalMemory::WriteImage<PSM_PSMCT32, 8, 8, 32>;
m_psm[i].ri = &GSLocalMemory::ReadImageX; // TODO
m_psm[i].rtx = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxNP = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxP = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxb = &GSLocalMemory::ReadTextureBlock32;
m_psm[i].rtxbP = &GSLocalMemory::ReadTextureBlock32;
@ -328,18 +325,6 @@ GSLocalMemory::GSLocalMemory()
m_psm[PSM_PSMZ16].wfa = &GSLocalMemory::WriteFrame16;
m_psm[PSM_PSMZ16S].wfa = &GSLocalMemory::WriteFrame16;
m_psm[PSM_PSMCT16].rtNP = &GSLocalMemory::ReadTexel16NP;
m_psm[PSM_PSMCT16S].rtNP = &GSLocalMemory::ReadTexel16SNP;
m_psm[PSM_PSMT8].rtNP = &GSLocalMemory::ReadTexel8;
m_psm[PSM_PSMT4].rtNP = &GSLocalMemory::ReadTexel4;
m_psm[PSM_PSMT8H].rtNP = &GSLocalMemory::ReadTexel8H;
m_psm[PSM_PSMT4HL].rtNP = &GSLocalMemory::ReadTexel4HL;
m_psm[PSM_PSMT4HH].rtNP = &GSLocalMemory::ReadTexel4HH;
m_psm[PSM_PSMZ32].rtNP = &GSLocalMemory::ReadTexel32Z;
m_psm[PSM_PSMZ24].rtNP = &GSLocalMemory::ReadTexel24Z;
m_psm[PSM_PSMZ16].rtNP = &GSLocalMemory::ReadTexel16ZNP;
m_psm[PSM_PSMZ16S].rtNP = &GSLocalMemory::ReadTexel16SZNP;
m_psm[PSM_PSMCT24].wi = &GSLocalMemory::WriteImage24; // TODO
m_psm[PSM_PSMCT16].wi = &GSLocalMemory::WriteImage<PSM_PSMCT16, 16, 8, 16>;
m_psm[PSM_PSMCT16S].wi = &GSLocalMemory::WriteImage<PSM_PSMCT16S, 16, 8, 16>;
@ -366,18 +351,6 @@ GSLocalMemory::GSLocalMemory()
m_psm[PSM_PSMZ16].rtx = &GSLocalMemory::ReadTexture16Z;
m_psm[PSM_PSMZ16S].rtx = &GSLocalMemory::ReadTexture16SZ;
m_psm[PSM_PSMCT16].rtxNP = &GSLocalMemory::ReadTexture16NP;
m_psm[PSM_PSMCT16S].rtxNP = &GSLocalMemory::ReadTexture16SNP;
m_psm[PSM_PSMT8].rtxNP = &GSLocalMemory::ReadTexture8NP;
m_psm[PSM_PSMT4].rtxNP = &GSLocalMemory::ReadTexture4NP;
m_psm[PSM_PSMT8H].rtxNP = &GSLocalMemory::ReadTexture8HNP;
m_psm[PSM_PSMT4HL].rtxNP = &GSLocalMemory::ReadTexture4HLNP;
m_psm[PSM_PSMT4HH].rtxNP = &GSLocalMemory::ReadTexture4HHNP;
m_psm[PSM_PSMZ32].rtxNP = &GSLocalMemory::ReadTexture32Z;
m_psm[PSM_PSMZ24].rtxNP = &GSLocalMemory::ReadTexture24Z;
m_psm[PSM_PSMZ16].rtxNP = &GSLocalMemory::ReadTexture16ZNP;
m_psm[PSM_PSMZ16S].rtxNP = &GSLocalMemory::ReadTexture16SZNP;
m_psm[PSM_PSMCT24].rtxP = &GSLocalMemory::ReadTexture24;
m_psm[PSM_PSMCT16].rtxP = &GSLocalMemory::ReadTexture16;
m_psm[PSM_PSMCT16S].rtxP = &GSLocalMemory::ReadTexture16S;
@ -471,23 +444,22 @@ GSLocalMemory::~GSLocalMemory()
{
VirtualFree(m_vm8, 0, MEM_RELEASE);
for_each(m_bomap.begin(), m_bomap.end(), aligned_free_second());
for_each(m_pomap.begin(), m_pomap.end(), aligned_free_second());
for_each(m_omap.begin(), m_omap.end(), aligned_free_second());
for_each(m_po4map.begin(), m_po4map.end(), aligned_free_second());
}
GSLocalMemory::BlockOffset* GSLocalMemory::GetBlockOffset(uint32 bp, uint32 bw, uint32 psm)
GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
{
uint32 hash = bp | (bw << 14) | (psm << 20);
hash_map<uint32, BlockOffset*>::iterator i = m_bomap.find(hash);
hash_map<uint32, GSOffset*>::iterator i = m_omap.find(hash);
if(i != m_bomap.end())
if(i != m_omap.end())
{
return i->second;
}
BlockOffset* o = (BlockOffset*)_aligned_malloc(sizeof(BlockOffset), 16);
GSOffset* o = (GSOffset*)_aligned_malloc(sizeof(GSOffset), 16);
o->hash = hash;
@ -495,49 +467,29 @@ GSLocalMemory::BlockOffset* GSLocalMemory::GetBlockOffset(uint32 bp, uint32 bw,
for(int i = 0; i < 256; i++)
{
o->row[i] = (short)bn(0, i << 3, bp, bw);
o->block.row[i] = (short)bn(0, i << 3, bp, bw);
}
o->col = m_psm[psm].blockOffset;
m_bomap[hash] = o;
return o;
}
GSLocalMemory::PixelOffset* GSLocalMemory::GetPixelOffset(uint32 bp, uint32 bw, uint32 psm)
{
uint32 hash = bp | (bw << 14) | (psm << 20);
hash_map<uint32, PixelOffset*>::iterator i = m_pomap.find(hash);
if(i != m_pomap.end())
{
return i->second;
}
PixelOffset* o = (PixelOffset*)_aligned_malloc(sizeof(PixelOffset), 16);
o->hash = hash;
o->block.col = m_psm[psm].blockOffset;
pixelAddress pa = m_psm[psm].pa;
for(int i = 0; i < 2048; i++)
{
o->row[i] = (int)pa(0, i, bp, bw);
o->pixel.row[i] = (int)pa(0, i, bp, bw);
}
for(int i = 0; i < 8; i++)
{
o->col[i] = m_psm[psm].rowOffset[i];
o->pixel.col[i] = m_psm[psm].rowOffset[i];
}
m_pomap[hash] = o;
m_omap[hash] = o;
return o;
}
GSLocalMemory::PixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
{
uint32 fbp = FRAME.Block();
uint32 zbp = ZBUF.Block();
@ -554,14 +506,14 @@ GSLocalMemory::PixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& F
uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
hash_map<uint32, PixelOffset4*>::iterator i = m_po4map.find(hash);
hash_map<uint32, GSPixelOffset4*>::iterator i = m_po4map.find(hash);
if(i != m_po4map.end())
{
return i->second;
}
PixelOffset4* o = (PixelOffset4*)_aligned_malloc(sizeof(PixelOffset4), 16);
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 16);
o->hash = hash;
@ -1455,20 +1407,20 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
///////////////////
void GSLocalMemory::ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture32(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT32)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadBlock32<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
if(TEXA.AEM)
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
}
@ -1476,7 +1428,7 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch,
}
else
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMCT24)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
}
@ -1484,11 +1436,11 @@ void GSLocalMemory::ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch,
}
}
void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16)
FOREACH_BLOCK_START(r, 16, 8, 32)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1497,11 +1449,11 @@ void GSLocalMemory::ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch,
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMCT16S)
FOREACH_BLOCK_START(r, 16, 8, 32)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1510,75 +1462,75 @@ void GSLocalMemory::ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8)
FOREACH_BLOCK_START(r, 16, 16, 32)
{
ReadAndExpandBlock8_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint64* pal = m_clut;
FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4)
FOREACH_BLOCK_START(r, 32, 16, 32)
{
ReadAndExpandBlock4_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8H(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadAndExpandBlock8H_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HL(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HH(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture32Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ32)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadBlock32<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
if(TEXA.AEM)
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
}
@ -1586,7 +1538,7 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch
}
else
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMZ24)
FOREACH_BLOCK_START(r, 8, 8, 32)
{
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
}
@ -1594,11 +1546,11 @@ void GSLocalMemory::ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch
}
}
void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16)
FOREACH_BLOCK_START(r, 16, 8, 32)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1607,11 +1559,11 @@ void GSLocalMemory::ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
__declspec(align(16)) uint16 block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 32, PSM_PSMZ16S)
FOREACH_BLOCK_START(r, 16, 8, 32)
{
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
@ -1737,286 +1689,125 @@ void GSLocalMemory::ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, co
///////////////////
void GSLocalMemory::ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
readTexture rtx = m_psm[TEX0.PSM].rtx;
readTexel rt = m_psm[TEX0.PSM].rt;
GSVector2i bs = m_psm[TEX0.PSM].bs;
const psm_t& psm = m_psm[o->psm];
if(r.width() < bs.x || r.height() < bs.y
|| (r.left & (bs.x - 1)) || (r.top & (bs.y - 1))
|| (r.right & (bs.x - 1)) || (r.bottom & (bs.y - 1)))
{
ReadTexture<uint32>(r, dst, dstpitch, TEX0, TEXA, rt, rtx);
}
else
{
(this->*rtx)(r, dst, dstpitch, TEX0, TEXA);
}
}
///////////////////
readTexel rt = psm.rt;
readTexture rtx = psm.rtx;
void GSLocalMemory::ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16)
if(r.width() < psm.bs.x || r.height() < psm.bs.y
|| (r.left & (psm.bs.x - 1)) || (r.top & (psm.bs.y - 1))
|| (r.right & (psm.bs.x - 1)) || (r.bottom & (psm.bs.y - 1)))
{
ReadBlock16<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
GIFRegTEX0 TEX0;
void GSLocalMemory::ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMCT16S)
{
ReadBlock16<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
TEX0.TBP0 = o->bp;
TEX0.TBW = o->bw;
TEX0.PSM = o->psm;
void GSLocalMemory::ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
GSVector4i cr = r.ralign<GSVector4i::Inside>(psm.bs);
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 16, 16, 32, PSM_PSMT8)
bool aligned = ((DWORD_PTR)(dst + (cr.left - r.left) * sizeof(uint32)) & 0xf) == 0;
if(cr.rempty() || !aligned)
{
ReadAndExpandBlock8_32(src, dst, dstpitch, pal);
// TODO: expand r to block size, read into temp buffer
if(!aligned) printf("unaligned memory pointer passed to ReadTexture\n");
for(int y = r.top; y < r.bottom; y++, dst += dstpitch)
{
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
}
}
}
FOREACH_BLOCK_END
}
else
{
ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
__declspec(align(16)) uint8 block[16 * 16];
FOREACH_BLOCK_START(r, 16, 16, 16, PSM_PSMT8)
else
{
ReadBlock8<true>(src, (uint8*)block, sizeof(block) / 16);
for(int y = r.top; y < cr.top; y++, dst += dstpitch)
{
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
}
}
ExpandBlock8_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
}
for(int y = cr.bottom; y < r.bottom; y++, dst += dstpitch)
{
for(int x = r.left, i = 0; x < r.right; x++, i++)
{
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
}
}
void GSLocalMemory::ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint64* pal = m_clut;
for(int y = cr.top; y < cr.bottom; y++, dst += dstpitch)
{
for(int x = r.left, i = 0; x < cr.left; x++, i++)
{
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
}
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 32, 16, 32, PSM_PSMT4)
{
ReadAndExpandBlock4_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
else
{
ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
for(int x = cr.right, i = x - r.left; x < r.right; x++, i++)
{
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
}
}
__declspec(align(16)) uint8 block[(32 / 2) * 16];
FOREACH_BLOCK_START(r, 32, 16, 16, PSM_PSMT4)
{
ReadBlock4<true>(src, (uint8*)block, sizeof(block) / 16);
ExpandBlock4_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
}
void GSLocalMemory::ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT8H)
{
ReadAndExpandBlock8H_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
else
{
ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
__declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT8H)
{
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
ExpandBlock8H_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
}
void GSLocalMemory::ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HL)
{
ReadAndExpandBlock4HL_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
else
{
ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
__declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HL)
{
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
ExpandBlock4HL_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
}
void GSLocalMemory::ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const uint32* pal = m_clut;
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 8, 8, 32, PSM_PSMT4HH)
{
ReadAndExpandBlock4HH_32(src, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
else
{
ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
__declspec(align(16)) uint32 block[8 * 8];
FOREACH_BLOCK_START(r, 8, 8, 16, PSM_PSMT4HH)
{
ReadBlock32<true>(src, (uint8*)block, sizeof(block) / 8);
ExpandBlock4HH_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
}
void GSLocalMemory::ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16)
{
ReadBlock16<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16, PSM_PSMZ16S)
{
ReadBlock16<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
///////////////////
void GSLocalMemory::ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
readTexture rtx = m_psm[TEX0.PSM].rtxNP;
readTexel rt = m_psm[TEX0.PSM].rtNP;
GSVector2i bs = m_psm[TEX0.PSM].bs;
if(r.width() < bs.x || r.height() < bs.y
|| (r.left & (bs.x - 1)) || (r.top & (bs.y - 1))
|| (r.right & (bs.x - 1)) || (r.bottom & (bs.y - 1)))
{
uint32 psm = TEX0.PSM;
switch(psm)
{
case PSM_PSMT8:
case PSM_PSMT8H:
case PSM_PSMT4:
case PSM_PSMT4HL:
case PSM_PSMT4HH:
psm = TEX0.CPSM;
break;
}
switch(psm)
{
default:
case PSM_PSMCT32:
case PSM_PSMCT24:
ReadTexture<uint32>(r, dst, dstpitch, TEX0, TEXA, rt, rtx);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
ReadTexture<uint16>(r, dst, dstpitch, TEX0, TEXA, rt, rtx);
break;
if(!cr.rempty())
{
(this->*rtx)(o, cr, dst + (cr.left - r.left) * sizeof(32), dstpitch, TEXA);
}
}
}
else
{
(this->*rtx)(r, dst, dstpitch, TEX0, TEXA);
(this->*rtx)(o, r, dst, dstpitch, TEXA);
}
}
// 32/8
void GSLocalMemory::ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8P(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 16, 8, PSM_PSMT8)
FOREACH_BLOCK_START(r, 16, 16, 8)
{
ReadBlock8<true>(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4P(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 32, 16, 8, PSM_PSMT4)
FOREACH_BLOCK_START(r, 32, 16, 8)
{
ReadBlock4P(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture8HP(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT8H)
FOREACH_BLOCK_START(r, 8, 8, 8)
{
ReadBlock8HP(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HLP(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HL)
FOREACH_BLOCK_START(r, 8, 8, 8)
{
ReadBlock4HLP(src, dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
void GSLocalMemory::ReadTexture4HHP(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 8, PSM_PSMT4HH)
FOREACH_BLOCK_START(r, 8, 8, 8)
{
ReadBlock4HHP(src, dst, dstpitch);
}
@ -2060,47 +1851,6 @@ void GSLocalMemory::ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, co
//
template<typename T>
void GSLocalMemory::ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, readTexel rt, readTexture rtx)
{
GSVector4i cr = r.ralign<GSVector4i::Inside>(m_psm[TEX0.PSM].bs);
bool aligned = ((DWORD_PTR)(dst + (cr.left - r.left) * sizeof(T)) & 0xf) == 0;
if(cr.rempty() || !aligned)
{
// TODO: expand r to block size, read into temp buffer, copy to r (like above)
if(!aligned)
printf("unaligned memory pointer passed to ReadTexture\n");
for(int y = r.top; y < r.bottom; y++, dst += dstpitch)
for(int x = r.left, i = 0; x < r.right; x++, i++)
((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
}
else
{
for(int y = r.top; y < cr.top; y++, dst += dstpitch)
for(int x = r.left, i = 0; x < r.right; x++, i++)
((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
if(!cr.rempty())
(this->*rtx)(cr, dst + (cr.left - r.left) * sizeof(T), dstpitch, TEX0, TEXA);
for(int y = cr.top; y < cr.bottom; y++, dst += dstpitch)
{
for(int x = r.left, i = 0; x < cr.left; x++, i++)
((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
for(int x = cr.right, i = x - r.left; x < r.right; x++, i++)
((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
}
for(int y = cr.bottom; y < r.bottom; y++, dst += dstpitch)
for(int x = r.left, i = 0; x < r.right; x++, i++)
((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
}
}
HRESULT GSLocalMemory::SaveBMP(const string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h)
{
int pitch = w * 4;

View File

@ -29,6 +29,32 @@
#include "GSBlock.h"
#include "GSClut.h"
struct GSOffset
{
struct
{
short row[256]; // yn (n = 0 8 16 ...)
short* col; // blockOffset*
} block;
struct
{
int row[2048]; // yn (n = 0 1 2 ...)
int* col[8]; // rowOffset*
} pixel;
union {uint32 hash; struct {uint32 bp:14, bw:6, psm:6;};};
};
struct GSPixelOffset4
{
// 16 bit offsets (m_vm16[...])
GSVector2i row[2048]; // f yn | z yn (n = 0 1 2 ...)
GSVector2i col[512]; // f xn | z xn (n = 0 4 8 ...)
uint32 hash;
};
class GSLocalMemory : public GSBlock
{
public:
@ -43,7 +69,7 @@ public:
typedef uint32 (GSLocalMemory::*readTexelAddr)(uint32 addr, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTexture)(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
typedef void (GSLocalMemory::*readTextureBlock)(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
typedef union
@ -55,12 +81,12 @@ public:
readPixelAddr rpa;
writePixel wp;
writePixelAddr wpa;
readTexel rt, rtNP;
readTexel rt;
readTexelAddr rta;
writeFrameAddr wfa;
writeImage wi;
readImage ri;
readTexture rtx, rtxNP, rtxP;
readTexture rtx, rtxP;
readTextureBlock rtxb, rtxbP;
uint16 bpp, trbpp, pal, fmt;
GSVector2i bs, pgs;
@ -80,29 +106,6 @@ public:
GSClut m_clut;
struct BlockOffset
{
short row[256]; // yn (n = 0 8 16 ...)
short* col; // blockOffset*
uint32 hash;
};
struct PixelOffset
{
int row[2048]; // yn (n = 0 1 2 ...)
int* col[8]; // rowOffset*
uint32 hash;
};
struct PixelOffset4
{
// 16 bit offsets (m_vm16[...])
GSVector2i row[2048]; // f yn | z yn (n = 0 1 2 ...)
GSVector2i col[512]; // f xn | z xn (n = 0 4 8 ...)
uint32 hash;
};
protected:
static uint32 pageOffset32[32][32][64];
static uint32 pageOffset32Z[32][32][64];
@ -147,17 +150,15 @@ protected:
//
hash_map<uint32, BlockOffset*> m_bomap;
hash_map<uint32, PixelOffset*> m_pomap;
hash_map<uint32, PixelOffset4*> m_po4map;
hash_map<uint32, GSOffset*> m_omap;
hash_map<uint32, GSPixelOffset4*> m_po4map;
public:
GSLocalMemory();
virtual ~GSLocalMemory();
BlockOffset* GetBlockOffset(uint32 bp, uint32 bw, uint32 psm);
PixelOffset* GetPixelOffset(uint32 bp, uint32 bw, uint32 psm);
PixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
// address
@ -634,73 +635,73 @@ public:
WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c);
}
__forceinline void WritePixel32(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
__forceinline void WritePixel32(uint8* RESTRICT src, uint32 pitch, GSOffset* o, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[po->row[y]];
int* RESTRICT o = po->col[0];
uint32* RESTRICT d = &m_vm32[o->pixel.row[y]];
int* RESTRICT col = o->pixel.col[0];
for(int x = r.left; x < r.right; x++)
{
d[o[x]] = s[x];
d[col[x]] = s[x];
}
}
}
__forceinline void WritePixel24(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
__forceinline void WritePixel24(uint8* RESTRICT src, uint32 pitch, GSOffset* o, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint32* RESTRICT d = &m_vm32[po->row[y]];
int* RESTRICT o = po->col[0];
uint32* RESTRICT d = &m_vm32[o->pixel.row[y]];
int* RESTRICT col = o->pixel.col[0];
for(int x = r.left; x < r.right; x++)
{
d[o[x]] = (d[o[x]] & 0xff000000) | (s[x] & 0x00ffffff);
d[col[x]] = (d[col[x]] & 0xff000000) | (s[x] & 0x00ffffff);
}
}
}
__forceinline void WritePixel16(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
__forceinline void WritePixel16(uint8* RESTRICT src, uint32 pitch, GSOffset* o, const GSVector4i& r)
{
src -= r.left * sizeof(uint16);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint16* RESTRICT s = (uint16*)src;
uint16* RESTRICT d = &m_vm16[po->row[y]];
int* RESTRICT o = po->col[0];
uint16* RESTRICT d = &m_vm16[o->pixel.row[y]];
int* RESTRICT col = o->pixel.col[0];
for(int x = r.left; x < r.right; x++)
{
d[o[x]] = s[x];
d[col[x]] = s[x];
}
}
}
__forceinline void WriteFrame16(uint8* RESTRICT src, uint32 pitch, PixelOffset* po, const GSVector4i& r)
__forceinline void WriteFrame16(uint8* RESTRICT src, uint32 pitch, GSOffset* o, const GSVector4i& r)
{
src -= r.left * sizeof(uint32);
for(int y = r.top; y < r.bottom; y++, src += pitch)
{
uint32* RESTRICT s = (uint32*)src;
uint16* RESTRICT d = &m_vm16[po->row[y]];
int* RESTRICT o = po->col[0];
uint16* RESTRICT d = &m_vm16[o->pixel.row[y]];
int* RESTRICT col = o->pixel.col[0];
for(int x = r.left; x < r.right; x++)
{
uint32 rb = s[x] & 0x00f800f8;
uint32 ga = s[x] & 0x8000f800;
d[o[x]] = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
d[col[x]] = (uint16)((ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
}
}
}
@ -810,26 +811,6 @@ public:
return ReadTexel16(PixelAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline uint32 ReadTexel16NP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16(x, y, TEX0.TBP0, TEX0.TBW);
}
__forceinline uint32 ReadTexel16SNP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16S(x, y, TEX0.TBP0, TEX0.TBW);
}
__forceinline uint32 ReadTexel16ZNP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16Z(x, y, TEX0.TBP0, TEX0.TBW);
}
__forceinline uint32 ReadTexel16SZNP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16SZ(x, y, TEX0.TBP0, TEX0.TBW);
}
//
template<int psm, int bsx, int bsy, bool aligned>
@ -860,21 +841,21 @@ public:
// * => 32
void ReadTexture32(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16S(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture32Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture24Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16Z(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SZ(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture32(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8H(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HL(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HH(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture32Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
@ -890,27 +871,13 @@ public:
void ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
// * => 32/16
void ReadTexture16NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4NP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8HNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HLNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HHNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16ZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture16SZNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTextureNP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
// pal ? 8 : 32
void ReadTexture8P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
void ReadTexture8P(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4P(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture8HP(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HLP(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTexture4HHP(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
void ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
void ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
@ -920,7 +887,7 @@ public:
//
template<typename T> void ReadTexture(const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, readTexel rt, readTexture rtx);
template<typename T> void ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
//

View File

@ -554,9 +554,7 @@ void GSRenderer::GetTextureMinMax(GSVector4i& r, bool linear)
}
}
GSVector2i bs = GSLocalMemory::m_psm[context->TEX0.PSM].bs;
r = vr.ralign<GSVector4i::Outside>(bs).rintersect(tr);
r = vr.rintersect(tr);
}
void GSRenderer::GetAlphaMinMax()
@ -712,18 +710,22 @@ bool GSRenderer::IsLinear()
if(mmag == mmin) return mmag;
float LODmin = (float)TEX1.K;
float LODmax = (float)TEX1.K;
if(!TEX1.LCM && !PRIM->FST) // if FST => assume Q = 1.0f (should not, but Q is very often bogus, 0 or DEN)
{
float K = (float)TEX1.K / 16;
float f = (float)(1 << TEX1.L) / log(2.0f);
LODmin += log(1.0f / abs(m_vt.m_min.t.z)) * f;
LODmax += log(1.0f / abs(m_vt.m_max.t.z)) * f;
}
// TODO: abs(Qmin) may not be <= abs(Qmax), check the sign
return LODmax <= 0 ? mmag : LODmin > 0 ? mmin : mmag || mmin;
float LODmin = K + log(1.0f / abs(m_vt.m_max.t.z)) * f;
float LODmax = K + log(1.0f / abs(m_vt.m_min.t.z)) * f;
return LODmax <= 0 ? mmag : LODmin > 0 ? mmin : mmag || mmin;
}
else
{
return TEX1.K <= 0 ? mmag : TEX1.K > 0 ? mmin : mmag || mmin;
}
}
bool GSRenderer::IsOpaque()

View File

@ -107,37 +107,23 @@ protected:
void FlushPrim()
{
if(m_count > 0)
if(m_count == 0) return;
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
{
/*
TRACE(_T("[%d] Draw f %05x (%d) z %05x (%d %d %d %d) t %05x %05x (%d)\n"),
(int)m_perfmon.GetFrame(),
(int)m_context->FRAME.Block(),
(int)m_context->FRAME.PSM,
(int)m_context->ZBUF.Block(),
(int)m_context->ZBUF.PSM,
m_context->TEST.ZTE,
m_context->TEST.ZTST,
m_context->ZBUF.ZMSK,
PRIM->TME ? (int)m_context->TEX0.TBP0 : 0xfffff,
PRIM->TME && m_context->TEX0.PSM > PSM_PSMCT16S ? (int)m_context->TEX0.CBP : 0xfffff,
PRIM->TME ? (int)m_context->TEX0.PSM : 0xff);
*/
// FIXME: berserk fpsm = 27 (8H)
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
if(!m_dev->IsLost())
{
// FIXME: berserk fpsm = 27 (8H)
m_vt.Update(m_vertices, m_count, PRIM, m_context);
if(!m_dev->IsLost())
{
Draw();
}
m_perfmon.Put(GSPerfMon::Draw, 1);
Draw();
}
m_count = 0;
m_perfmon.Put(GSPerfMon::Draw, 1);
}
m_count = 0;
}
void GrowVertexBuffer()

View File

@ -65,7 +65,7 @@ public:
return true;
}
void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
@ -178,7 +178,7 @@ public:
GSTextureFX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = primclass;
gs_sel.prim = m_vt.m_primclass;
// ps
@ -265,16 +265,16 @@ public:
GSVector4i scissor = GSVector4i(GSVector4(rt->m_scale).xyxy() * context->scissor.in).rintersect(GSVector4i(rt->GetSize()).zwxy());
//
m_dev->OMSetRenderTargets(rt, ds, &scissor);
m_dev->PSSetShaderResources(tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL);
uint8 afix = context->ALPHA.FIX;
m_tfx->SetupOM(om_dssel, om_bsel, afix, rt, ds);
m_tfx->SetupOM(om_dssel, om_bsel, afix);
m_tfx->SetupIA(m_vertices, m_count, m_topology);
m_tfx->SetupVS(vs_sel, &vs_cb);
m_tfx->SetupGS(gs_sel);
m_tfx->SetupPS(ps_sel, &ps_cb, ps_ssel, tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL);
m_tfx->SetupRS(rt->m_size, scissor);
m_tfx->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
@ -304,7 +304,7 @@ public:
break;
}
m_tfx->UpdatePS(ps_sel, &ps_cb, ps_ssel);
m_tfx->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
@ -329,7 +329,7 @@ public:
om_bsel.wb = b;
om_bsel.wa = a;
m_tfx->UpdateOM(om_dssel, om_bsel, afix);
m_tfx->SetupOM(om_dssel, om_bsel, afix);
m_dev->DrawPrimitive();
}

View File

@ -169,9 +169,9 @@ void GSRendererDX10::VertexKick(bool skip)
}
}
void GSRendererDX10::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void GSRendererDX10::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(primclass)
switch(m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3D10_PRIMITIVE_TOPOLOGY_POINTLIST;
@ -190,7 +190,7 @@ void GSRendererDX10::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds,
__assume(0);
}
__super::Draw(primclass, rt, ds, tex);
__super::Draw(rt, ds, tex);
}
void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds)
@ -249,11 +249,7 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds)
dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL);
dev->PSSetSamplerState(dev->m_convert.pt, NULL);
// rs
dev->RSSet(size);
// set
//
dev->DrawPrimitive();

View File

@ -35,7 +35,7 @@ protected:
CComPtr<ID3D10BlendState> bs;
} m_date;
void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void SetupDATE(GSTexture* rt, GSTexture* ds);
public:

View File

@ -169,9 +169,9 @@ void GSRendererDX11::VertexKick(bool skip)
}
}
void GSRendererDX11::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(primclass)
switch(m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
@ -190,7 +190,7 @@ void GSRendererDX11::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds,
__assume(0);
}
__super::Draw(primclass, rt, ds, tex);
__super::Draw(rt, ds, tex);
}
void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds)
@ -249,11 +249,7 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds)
dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL);
dev->PSSetSamplerState(dev->m_convert.pt, NULL);
// rs
dev->RSSet(size);
// set
//
dev->DrawPrimitive();

View File

@ -29,7 +29,7 @@
class GSRendererDX11 : public GSRendererDX<GSVertexHW11>
{
protected:
void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
struct
{

View File

@ -184,9 +184,9 @@ void GSRendererDX9::VertexKick(bool skip)
}
}
void GSRendererDX9::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(primclass)
switch(m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3DPT_POINTLIST;
@ -207,7 +207,7 @@ void GSRendererDX9::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds,
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
__super::Draw(primclass, rt, ds, tex);
__super::Draw(rt, ds, tex);
}
void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds)
@ -262,10 +262,6 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds)
dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL, 0);
dev->PSSetSamplerState(&dev->m_convert.pt);
// rs
dev->RSSet(size);
//
dev->DrawPrimitive();
@ -317,10 +313,6 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
dev->PSSetShader(dev->m_convert.ps[4], NULL, 0);
// rs
dev->RSSet(rt->m_size);
//
dev->DrawPrimitive();

View File

@ -41,7 +41,7 @@ protected:
Direct3DBlendState9 bs;
} m_fba;
void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void SetupDATE(GSTexture* rt, GSTexture* ds);
void UpdateFBA(GSTexture* rt);

View File

@ -497,22 +497,20 @@ protected:
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(BITBLTBUF, r);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
m_tc->InvalidateLocalMem(BITBLTBUF, r);
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void Draw()
{
if(IsBadFrame(m_skip)) return;
m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM), PRIM, m_context);
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
@ -617,7 +615,7 @@ protected:
//
Draw(GSUtil::GetPrimClass(prim), rt->m_texture, ds->m_texture, tex);
Draw(rt->m_texture, ds->m_texture, tex);
//
@ -629,28 +627,18 @@ protected:
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.DBW = context->FRAME.FBW;
if(fm != 0xffffffff)
{
rt->m_valid = rt->m_valid.runion(r);
BITBLTBUF.DBP = context->FRAME.Block();
BITBLTBUF.DPSM = context->FRAME.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r, false);
m_tc->InvalidateVideoMem(m_context->offset.fb, r, false);
}
if(zm != 0xffffffff)
{
ds->m_valid = ds->m_valid.runion(r);
BITBLTBUF.DBP = context->ZBUF.Block();
BITBLTBUF.DPSM = context->ZBUF.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r, false);
m_tc->InvalidateVideoMem(m_context->offset.zb, r, false);
}
//
@ -684,7 +672,7 @@ protected:
}
}
virtual void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
virtual void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
bool CanUpscale()
{

View File

@ -64,7 +64,7 @@ void GSRendererOGL::VertexKick(bool skip)
}
}
void GSRendererOGL::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void GSRendererOGL::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;

View File

@ -29,7 +29,7 @@
class GSRendererOGL : public GSRendererHW<GSVertexOGL>
{
protected:
void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
public:
GSRendererOGL(uint8* base, bool mt, void (*irq)());

View File

@ -86,13 +86,7 @@ GSTexture* GSRendererSW::GetOutput(int i)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
int w = TEX0.TBW * 64;
int w = DISPFB.FBW * 64;
int h = GetFrameRect(i).bottom;
// TODO: round up bottom
@ -105,7 +99,9 @@ GSTexture* GSRendererSW::GetOutput(int i)
GSVector4i r(0, 0, w, h);
m_mem.ReadTexture(r, buff, pitch, TEX0, m_env.TEXA);
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM];
(m_mem.*psm.rtx)(m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM), r.ralign<GSVector4i::Outside>(psm.bs), buff, pitch, m_env.TEXA);
m_texture[i]->Update(r, buff, pitch);
@ -113,7 +109,7 @@ GSTexture* GSRendererSW::GetOutput(int i)
{
if(s_save && s_n >= s_saven)
{
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%I64d_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), (int)DISPFB.PSM));
}
s_n++;
@ -125,18 +121,14 @@ GSTexture* GSRendererSW::GetOutput(int i)
void GSRendererSW::Draw()
{
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM);
m_vt.Update(m_vertices, m_count, primclass, PRIM, m_context);
if(m_dump)
{
m_dump.Object(m_vertices, m_count, primclass);
m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
}
GSScanlineParam p;
GetScanlineParam(p, primclass);
GetScanlineParam(p, m_vt.m_primclass);
if((p.fm & p.zm) == 0xffffffff)
{
@ -179,7 +171,7 @@ void GSRendererSW::Draw()
data.scissor = GSVector4i(m_context->scissor.in);
data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.primclass = primclass;
data.primclass = m_vt.m_primclass;
data.vertices = m_vertices;
data.count = m_count;
data.param = &p;
@ -190,30 +182,19 @@ void GSRendererSW::Draw()
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.DBW = m_context->FRAME.FBW;
if(p.fm != 0xffffffff)
{
BITBLTBUF.DBP = m_context->FRAME.Block();
BITBLTBUF.DPSM = m_context->FRAME.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
}
if(p.zm != 0xffffffff)
{
BITBLTBUF.DBP = m_context->ZBUF.Block();
BITBLTBUF.DPSM = m_context->ZBUF.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
}
if(s_dump)
@ -251,7 +232,7 @@ void GSRendererSW::Draw()
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
m_tc->InvalidateVideoMem(BITBLTBUF, r);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
@ -261,9 +242,9 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
p.vm = m_mem.m_vm8;
p.fbo = m_mem.GetPixelOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM);
p.zbo = m_mem.GetPixelOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM);
p.fzbo = m_mem.GetPixelOffset4(context->FRAME, context->ZBUF);
p.fbo = context->offset.fb;
p.zbo = context->offset.zb;
p.fzbo = context->offset.fzb;
p.sel.key = 0;

View File

@ -108,9 +108,9 @@ __declspec(align(16)) struct GSScanlineParam
const uint32* clut;
uint32 tw;
GSLocalMemory::PixelOffset* fbo;
GSLocalMemory::PixelOffset* zbo;
GSLocalMemory::PixelOffset4* fzbo;
GSOffset* fbo;
GSOffset* zbo;
GSPixelOffset4* fzbo;
uint32 fm, zm;
};

View File

@ -490,18 +490,23 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
Flush();
}
m_env.CTXT[i].TEX0 = (GSVector4i)r->TEX0;
if(r->TEX0.TW > 10) r->TEX0.TW = 10;
if(r->TEX0.TH > 10) r->TEX0.TH = 10;
if(m_env.CTXT[i].TEX0.TW > 10) m_env.CTXT[i].TEX0.TW = 10;
if(m_env.CTXT[i].TEX0.TH > 10) m_env.CTXT[i].TEX0.TH = 10;
r->TEX0.CPSM &= 0xa; // 1010b
m_env.CTXT[i].TEX0.CPSM &= 0xa; // 1010b
if((m_env.CTXT[i].TEX0.TBW & 1) && (m_env.CTXT[i].TEX0.PSM == PSM_PSMT8 || m_env.CTXT[i].TEX0.PSM == PSM_PSMT4))
if((r->TEX0.TBW & 1) && (r->TEX0.PSM == PSM_PSMT8 || r->TEX0.PSM == PSM_PSMT4))
{
m_env.CTXT[i].TEX0.TBW &= ~1; // GS User 2.6
r->TEX0.TBW &= ~1; // GS User 2.6
}
if((r->TEX0.u32[0] ^ m_env.CTXT[i].TEX0.u32[0]) & 0x3ffffff) // TBP0 TBW PSM
{
m_env.CTXT[i].offset.tex = m_mem.GetOffset(r->TEX0.TBP0, r->TEX0.TBW, r->TEX0.PSM);
}
m_env.CTXT[i].TEX0 = (GSVector4i)r->TEX0;
if(wt)
{
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT);
@ -792,6 +797,13 @@ template<int i> void GSState::GIFRegHandlerFRAME(GIFReg* r)
Flush();
}
if((m_env.CTXT[i].FRAME.u32[0] ^ r->FRAME.u32[0]) & 0x3f3f01ff) // FBP FBW PSM
{
m_env.CTXT[i].offset.fb = m_mem.GetOffset(r->FRAME.Block(), r->FRAME.FBW, r->FRAME.PSM);
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), r->FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(r->FRAME, m_env.CTXT[i].ZBUF);
}
m_env.CTXT[i].FRAME = (GSVector4i)r->FRAME;
}
@ -806,20 +818,26 @@ template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
r->ZBUF.PSM |= 0x30;
if(r->ZBUF.PSM != PSM_PSMZ32
&& r->ZBUF.PSM != PSM_PSMZ24
&& r->ZBUF.PSM != PSM_PSMZ16
&& r->ZBUF.PSM != PSM_PSMZ16S)
{
r->ZBUF.PSM = PSM_PSMZ32;
}
if(PRIM->CTXT == i && r->ZBUF != m_env.CTXT[i].ZBUF)
{
Flush();
}
m_env.CTXT[i].ZBUF = (GSVector4i)r->ZBUF;
if(m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ32
&& m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ24
&& m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ16
&& m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ16S)
if((m_env.CTXT[i].ZBUF.u32[0] ^ r->ZBUF.u32[0]) & 0x3f0001ff) // ZBP PSM
{
m_env.CTXT[i].ZBUF.PSM = PSM_PSMZ32;
m_env.CTXT[i].offset.zb = m_mem.GetOffset(r->ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, r->ZBUF.PSM);
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, r->ZBUF);
}
m_env.CTXT[i].ZBUF = (GSVector4i)r->ZBUF;
}
void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
@ -1084,13 +1102,13 @@ void GSState::Move()
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
GSLocalMemory::PixelOffset* RESTRICT spo = m_mem.GetPixelOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
GSLocalMemory::PixelOffset* RESTRICT dpo = m_mem.GetPixelOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
GSOffset* RESTRICT spo = m_mem.GetOffset(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM);
GSOffset* RESTRICT dpo = m_mem.GetOffset(m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM);
if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{
int* RESTRICT scol = &spo->col[0][sx];
int* RESTRICT dcol = &dpo->col[0][dx];
int* RESTRICT scol = &spo->pixel.col[0][sx];
int* RESTRICT dcol = &dpo->pixel.col[0][dx];
if(spsm.trbpp == 32)
{
@ -1098,8 +1116,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]];
}
@ -1108,8 +1126,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]];
}
@ -1121,8 +1139,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for(int x = 0; x < w; x++) d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
@ -1131,8 +1149,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32* RESTRICT s = &m_mem.m_vm32[spo->row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->row[dy]];
uint32* RESTRICT s = &m_mem.m_vm32[spo->pixel.row[sy]];
uint32* RESTRICT d = &m_mem.m_vm32[dpo->pixel.row[dy]];
for(int x = 0; x > -w; x--) d[dcol[x]] = (d[dcol[x]] & 0xff000000) | (s[scol[x]] & 0x00ffffff);
}
@ -1144,8 +1162,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint16* RESTRICT s = &m_mem.m_vm16[spo->row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->row[dy]];
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]];
}
@ -1154,8 +1172,8 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint16* RESTRICT s = &m_mem.m_vm16[spo->row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->row[dy]];
uint16* RESTRICT s = &m_mem.m_vm16[spo->pixel.row[sy]];
uint16* RESTRICT d = &m_mem.m_vm16[dpo->pixel.row[dy]];
for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]];
}
@ -1168,11 +1186,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint8* RESTRICT s = &m_mem.m_vm8[spo->row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->row[dy]];
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for(int x = 0; x < w; x++) d[dcol[x]] = s[scol[x]];
}
@ -1181,11 +1199,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint8* RESTRICT s = &m_mem.m_vm8[spo->row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->row[dy]];
uint8* RESTRICT s = &m_mem.m_vm8[spo->pixel.row[sy]];
uint8* RESTRICT d = &m_mem.m_vm8[dpo->pixel.row[dy]];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for(int x = 0; x > -w; x--) d[dcol[x]] = s[scol[x]];
}
@ -1197,11 +1215,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for(int x = 0; x < w; x++) m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
}
@ -1210,11 +1228,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for(int x = 0; x > -w; x--) m_mem.WritePixel4(dbase + dcol[x], m_mem.ReadPixel4(sbase + scol[x]));
}
@ -1226,11 +1244,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for(int x = 0; x < w; x++) (m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
}
@ -1239,11 +1257,11 @@ void GSState::Move()
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc)
{
uint32 sbase = spo->row[sy];
uint32 dbase = dpo->row[dy];
uint32 sbase = spo->pixel.row[sy];
uint32 dbase = dpo->pixel.row[dy];
int* RESTRICT scol = &spo->col[sy & 7][sx];
int* RESTRICT dcol = &dpo->col[dy & 7][dx];
int* RESTRICT scol = &spo->pixel.col[sy & 7][sx];
int* RESTRICT dcol = &dpo->pixel.col[dy & 7][dx];
for(int x = 0; x > -w; x--) (m_mem.*dpsm.wpa)(dbase + dcol[x], (m_mem.*spsm.rpa)(sbase + scol[x]));
}
@ -1649,8 +1667,15 @@ int GSState::Defrost(const GSFreezeData* fd)
m_env.UpdateDIMX();
m_env.CTXT[0].UpdateScissor();
m_env.CTXT[1].UpdateScissor();
for(int i = 0; i < 2; i++)
{
m_env.CTXT[i].UpdateScissor();
m_env.CTXT[i].offset.fb = m_mem.GetOffset(m_env.CTXT[i].FRAME.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].FRAME.PSM);
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
m_env.CTXT[i].offset.tex = m_mem.GetOffset(m_env.CTXT[i].TEX0.TBP0, m_env.CTXT[i].TEX0.TBW, m_env.CTXT[i].TEX0.PSM);
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
}
m_perfmon.SetFrame(5000);

View File

@ -69,7 +69,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
continue;
}
if(s->m_palette == NULL && psm.pal > 0 && !GSVector4i::compare(clut, s->m_clut, psm.pal * sizeof(clut[0])))
if(s->m_palette == NULL && psm.pal > 0 && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0])))
{
continue;
}
@ -120,7 +120,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
memcpy(src->m_clut, clut, psm.pal * sizeof(clut[0]));
}
m_src.Add(src, TEX0, m_renderer->m_mem);
m_src.Add(src, TEX0, m_renderer->m_context->offset.tex);
}
if(psm.pal > 0)
@ -233,13 +233,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
return dst;
}
void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect, bool target)
void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect, bool target)
{
uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW;
uint32 psm = BITBLTBUF.DPSM;
const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(bp, bw, psm);
uint32 bp = o->bp;
uint32 bw = o->bw;
uint32 psm = o->psm;
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
@ -266,11 +264,11 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = bo->row[y >> 3];
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 page = (base + bo->col[x >> 3]) >> 5;
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
@ -355,10 +353,10 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
}
}
void GSTextureCache::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r)
{
uint32 bp = BITBLTBUF.SBP;
uint32 psm = BITBLTBUF.SPSM;
uint32 bp = o->bp;
uint32 psm = o->psm;
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); )
{
@ -743,7 +741,7 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
m_complete = true; // lame, but better than nothing
}
const GSLocalMemory::BlockOffset* bo = m_renderer->m_mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
const GSOffset* o = m_renderer->m_context->offset.tex;
bool repeating = m_TEX0.IsRepeating();
@ -751,11 +749,11 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = bo->row[y >> 3];
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + bo->col[x >> 3];
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
@ -783,11 +781,11 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = bo->row[y >> 3];
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + bo->col[x >> 3];
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
@ -855,6 +853,8 @@ void GSTextureCache::Source::Flush(uint32 count)
GSLocalMemory& mem = m_renderer->m_mem;
const GSOffset* o = m_renderer->m_context->offset.tex;
GSLocalMemory::readTexture rtx = psm.rtx;
if(m_fmt == GSTextureFX::FMT_8)
@ -869,7 +869,7 @@ void GSTextureCache::Source::Flush(uint32 count)
if((r > tr).mask() & 0xff00)
{
(mem.*rtx)(r, buff, pitch, m_TEX0, m_TEXA);
(mem.*rtx)(o, r, buff, pitch, m_TEXA);
m_texture->Update(r.rintersect(tr), buff, pitch);
}
@ -879,13 +879,13 @@ void GSTextureCache::Source::Flush(uint32 count)
if(m_texture->Map(m, &r))
{
(mem.*rtx)(r, m.bits, m.pitch, m_TEX0, m_TEXA);
(mem.*rtx)(o, r, m.bits, m.pitch, m_TEXA);
m_texture->Unmap();
}
else
{
(mem.*rtx)(r, buff, pitch, m_TEX0, m_TEXA);
(mem.*rtx)(o, r, buff, pitch, m_TEXA);
m_texture->Update(r, buff, pitch);
}
@ -951,6 +951,8 @@ void GSTextureCache::Target::Update()
if(GSTexture* t = m_renderer->m_dev->CreateTexture(w, h))
{
const GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); // TODO: m_renderer->m_context->bo.tex;
GIFRegTEXA TEXA;
TEXA.AEM = 1;
@ -961,7 +963,7 @@ void GSTextureCache::Target::Update()
if(t->Map(m))
{
m_renderer->m_mem.ReadTexture(r, m.bits, m.pitch, m_TEX0, TEXA);
m_renderer->m_mem.ReadTexture(o, r, m.bits, m.pitch, TEXA);
t->Unmap();
}
@ -971,7 +973,7 @@ void GSTextureCache::Target::Update()
int pitch = ((w + 3) & ~3) * 4;
m_renderer->m_mem.ReadTexture(r, buff, pitch, m_TEX0, TEXA);
m_renderer->m_mem.ReadTexture(o, r, buff, pitch, TEXA);
t->Update(r.rsize(), buff, pitch);
}
@ -996,7 +998,7 @@ void GSTextureCache::Target::Update()
// GSTextureCache::SourceMap
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem)
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset* o)
{
m_surfaces.insert(s);
@ -1009,8 +1011,6 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMe
return;
}
const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
@ -1020,11 +1020,11 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMe
for(int y = 0; y < th; y += bs.y)
{
uint32 base = bo->row[y >> 3];
uint32 base = o->block.row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 page = (base + bo->col[x >> 3]) >> 5;
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{

View File

@ -105,7 +105,7 @@ protected:
SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));}
void Add(Source* s, const GIFRegTEX0& TEX0, GSLocalMemory& mem);
void Add(Source* s, const GIFRegTEX0& TEX0, const GSOffset* o);
void RemoveAll();
void RemoveAt(Source* s);
@ -125,8 +125,8 @@ public:
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb = false);
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(const GSOffset* o, const GSVector4i& r);
void IncAge();
};

View File

@ -76,19 +76,19 @@ void GSTextureCache10::Target10::Read(const GSVector4i& r)
{
// TODO: block level write
GSLocalMemory::PixelOffset* po = m_renderer->m_mem.GetPixelOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r);
break;
case PSM_PSMCT24:
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, o, r);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_renderer->m_mem.WritePixel16(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel16(m.bits, m.pitch, o, r);
break;
default:
ASSERT(0);

View File

@ -76,19 +76,19 @@ void GSTextureCache11::Target11::Read(const GSVector4i& r)
{
// TODO: block level write
GSLocalMemory::PixelOffset* po = m_renderer->m_mem.GetPixelOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r);
break;
case PSM_PSMCT24:
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, o, r);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_renderer->m_mem.WritePixel16(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel16(m.bits, m.pitch, o, r);
break;
default:
ASSERT(0);

View File

@ -74,19 +74,19 @@ void GSTextureCache9::Target9::Read(const GSVector4i& r)
{
// TODO: block level write
GSLocalMemory::PixelOffset* po = m_renderer->m_mem.GetPixelOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r);
break;
case PSM_PSMCT24:
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, po, r);
m_renderer->m_mem.WritePixel24(m.bits, m.pitch, o, r);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_renderer->m_mem.WriteFrame16(m.bits, m.pitch, po, r);
m_renderer->m_mem.WriteFrame16(m.bits, m.pitch, o, r);
break;
default:
ASSERT(0);

View File

@ -70,7 +70,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
m_textures.insert(t);
const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
const GSOffset* o = m_state->m_context->offset.tex;
GSVector2i bs = (TEX0.TBP0 & 31) == 0 ? psm.pgs : psm.bs;
@ -79,11 +79,11 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
for(int y = 0; y < th; y += bs.y)
{
uint32 base = bo->row[y >> 3];
uint32 base = o->block.row[y >> 3];
for(int x = 0; x < tw; x += bs.x)
{
uint32 page = (base + bo->col[x >> 3]) >> 5;
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
@ -124,6 +124,43 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
return t;
}
void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect)
{
uint32 bp = o->bp;
uint32 bw = o->bw;
uint32 psm = o->psm;
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 page = (base + o->block.col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
const list<GSTexture*>& map = m_map[page];
for(list<GSTexture*>::const_iterator i = map.begin(); i != map.end(); i++)
{
GSTexture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
t->m_valid[page] = 0;
t->m_complete = false;
}
}
}
}
}
}
void GSTextureCacheSW::RemoveAll()
{
for_each(m_textures.begin(), m_textures.end(), delete_object());
@ -170,45 +207,6 @@ void GSTextureCacheSW::IncAge()
}
}
void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& rect)
{
uint32 bp = BITBLTBUF.DBP;
uint32 bw = BITBLTBUF.DBW;
uint32 psm = BITBLTBUF.DPSM;
const GSLocalMemory::BlockOffset* bo = m_state->m_mem.GetBlockOffset(bp, bw, psm);
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = bo->row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 page = (base + bo->col[x >> 3]) >> 5;
if(page < MAX_PAGES)
{
const list<GSTexture*>& map = m_map[page];
for(list<GSTexture*>::const_iterator i = map.begin(); i != map.end(); i++)
{
GSTexture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
t->m_valid[page] = 0;
t->m_complete = false;
}
}
}
}
}
}
//
GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
@ -267,7 +265,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
GSLocalMemory& mem = m_state->m_mem;
const GSLocalMemory::BlockOffset* bo = mem.GetBlockOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
const GSOffset* o = m_state->m_context->offset.tex;
bool repeating = m_TEX0.IsRepeating();
@ -281,13 +279,13 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
uint8* dst = (uint8*)m_buff + pitch * r.top;
for(int y = r.top, o = pitch * bs.y; y < r.bottom; y += bs.y, dst += o)
for(int y = r.top, block_pitch = pitch * bs.y; y < r.bottom; y += bs.y, dst += block_pitch)
{
uint32 base = bo->row[y >> 3];
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + bo->col[x >> 3];
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{
@ -315,11 +313,11 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
{
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = bo->row[y >> 3];
uint32 base = o->block.row[y >> 3];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + bo->col[x >> 3];
uint32 block = base + o->block.col[x >> 3];
if(block < MAX_BLOCKS)
{

View File

@ -56,8 +56,9 @@ public:
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r);
void RemoveAll();
void RemoveAt(GSTexture* t);
void IncAge();
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
};

View File

@ -282,9 +282,6 @@ public:
virtual void SetupIA(const void* vertices, int count, int prim) = 0;
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
virtual void SetupGS(GSSelector sel) = 0;
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal) = 0;
virtual void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;
virtual void SetupRS(const GSVector2i& size, const GSVector4i& scissor) = 0;
virtual void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, GSTexture* rt, GSTexture* ds) = 0;
virtual void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) = 0;
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;
virtual void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) = 0;
};

View File

@ -189,14 +189,7 @@ void GSTextureFX10::SetupGS(GSSelector sel)
dev->GSSetShader(gs);
}
void GSTextureFX10::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal)
{
((GSDevice10*)m_dev)->PSSetShaderResources(tex, pal);
UpdatePS(sel, cb, ssel);
}
void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
void GSTextureFX10::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
{
GSDevice10* dev = (GSDevice10*)m_dev;
@ -302,19 +295,7 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
dev->PSSetSamplerState(ss0, ss1);
}
void GSTextureFX10::SetupRS(const GSVector2i& size, const GSVector4i& scissor)
{
((GSDevice10*)m_dev)->RSSet(size, &scissor);
}
void GSTextureFX10::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, GSTexture* rt, GSTexture* ds)
{
UpdateOM(dssel, bsel, afix);
((GSDevice10*)m_dev)->OMSetRenderTargets(rt, ds);
}
void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
void GSTextureFX10::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
{
GSDevice10* dev = (GSDevice10*)m_dev;
/*

View File

@ -49,9 +49,6 @@ public:
void SetupIA(const void* vertices, int count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal);
void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupRS(const GSVector2i& size, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
};

View File

@ -191,14 +191,7 @@ void GSTextureFX11::SetupGS(GSSelector sel)
dev->GSSetShader(gs);
}
void GSTextureFX11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal)
{
((GSDevice11*)m_dev)->PSSetShaderResources(tex, pal);
UpdatePS(sel, cb, ssel);
}
void GSTextureFX11::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
void GSTextureFX11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
{
GSDevice11* dev = (GSDevice11*)m_dev;
@ -306,19 +299,7 @@ void GSTextureFX11::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
dev->PSSetSamplerState(ss0, ss1);
}
void GSTextureFX11::SetupRS(const GSVector2i& size, const GSVector4i& scissor)
{
((GSDevice11*)m_dev)->RSSet(size, &scissor);
}
void GSTextureFX11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, GSTexture* rt, GSTexture* ds)
{
UpdateOM(dssel, bsel, afix);
((GSDevice11*)m_dev)->OMSetRenderTargets(rt, ds);
}
void GSTextureFX11::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
void GSTextureFX11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
{
GSDevice11* dev = (GSDevice11*)m_dev;

View File

@ -48,9 +48,6 @@ public:
void SetupIA(const void* vertices, int count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal);
void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupRS(const GSVector2i& size, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
};

View File

@ -148,17 +148,17 @@ void GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
dev->VSSetShader(i->second, (const float*)cb, sizeof(*cb) / sizeof(GSVector4));
}
void GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal)
void GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
{
GSDevice9* dev = (GSDevice9*)m_dev;
dev->PSSetShaderResources(tex, pal);
if(tex && (sel.wms == 3 || sel.wmt == 3))
if(cb->WH.z > 0 && cb->WH.w > 0 && (sel.wms == 3 || sel.wmt == 3))
{
GSVector4i size(cb->WH);
if(sel.wms == 3)
{
if(GSTexture* t = CreateMskFix(tex->m_size.x, cb->MskFix.x, cb->MskFix.z))
if(GSTexture* t = CreateMskFix(size.z, cb->MskFix.x, cb->MskFix.z))
{
(*dev)->SetTexture(2, *(GSTexture9*)t);
}
@ -166,20 +166,13 @@ void GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSampler
if(sel.wmt == 3)
{
if(GSTexture* t = CreateMskFix(tex->m_size.y, cb->MskFix.y, cb->MskFix.w))
if(GSTexture* t = CreateMskFix(size.w, cb->MskFix.y, cb->MskFix.w))
{
(*dev)->SetTexture(3, *(GSTexture9*)t);
}
}
}
UpdatePS(sel, cb, ssel);
}
void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
{
GSDevice9* dev = (GSDevice9*)m_dev;
hash_map<uint32, CComPtr<IDirect3DPixelShader9> >::const_iterator i = m_ps.find(sel);
if(i == m_ps.end())
@ -265,19 +258,7 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
dev->PSSetSamplerState(ss);
}
void GSTextureFX9::SetupRS(const GSVector2i& size, const GSVector4i& scissor)
{
((GSDevice9*)m_dev)->RSSet(size, &scissor);
}
void GSTextureFX9::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, GSTexture* rt, GSTexture* ds)
{
UpdateOM(dssel, bsel, afix);
((GSDevice9*)m_dev)->OMSetRenderTargets(rt, ds);
}
void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
void GSTextureFX9::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
{
GSDevice9* dev = (GSDevice9*)m_dev;

View File

@ -46,9 +46,6 @@ public:
void SetupIA(const void* vertices, int count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel) {}
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal);
void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupRS(const GSVector2i& size, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
};

View File

@ -1805,7 +1805,7 @@ public:
d = f.uph64(d);
}
__forceinline static bool compare(const void* dst, const void* src, int size)
__forceinline static bool compare16(const void* dst, const void* src, int size)
{
ASSERT((size & 15) == 0);
@ -1814,19 +1814,43 @@ public:
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
if(!d[0].eq(s[0]))
{
return false;
}
GSVector4i v = GSVector4i::xffffffff();
for(int i = 0; i < size; i++)
{
v &= d[i] == s[i];
if(!d[i].eq(s[i]))
{
return false;
}
}
return v.alltrue();
return true;
}
__forceinline static bool compare64(const void* dst, const void* src, int size)
{
ASSERT((size & 63) == 0);
size >>= 6;
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
for(int i = 0; i < size; i += 4)
{
GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]);
GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]);
GSVector4i v2 = (d[i * 4 + 2] == s[i * 4 + 2]);
GSVector4i v3 = (d[i * 4 + 3] == s[i * 4 + 3]);
v0 = v0 & v1;
v2 = v2 & v3;
if(!(v0 & v2).alltrue())
{
return false;
}
}
return true;
}
__forceinline static bool update(const void* dst, const void* src, int size)

View File

@ -23,33 +23,34 @@
#include "stdafx.h"
#include "GSVertexTrace.h"
#include "GSUtil.h"
void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
uint32 GSVertexTrace::Hash(const GIFRegPRIM* PRIM, const GSDrawingContext* context)
{
uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4);
m_primclass = GSUtil::GetPrimClass(PRIM->PRIM);
uint32 hash = m_primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4);
if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC))
{
key |= 1 << 5;
hash |= 1 << 5;
}
m_map_sw[key](v, count, m_min, m_max);
return hash;
}
void GSVertexTrace::Update(const GSVertexSW* v, int count, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
{
m_map_sw[Hash(PRIM, context)](v, count, m_min, m_max);
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
}
void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
void GSVertexTrace::Update(const GSVertexHW9* v, int count, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
{
uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4);
if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC))
{
key |= 1 << 5;
}
m_map_hw9[key](v, count, m_min, m_max);
m_map_hw9[Hash(PRIM, context)](v, count, m_min, m_max);
GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
@ -77,16 +78,9 @@ void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primcl
m_alpha.valid = false;
}
void GSVertexTrace::Update(const GSVertexHW10* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
void GSVertexTrace::Update(const GSVertexHW10* v, int count, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
{
uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4);
if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC))
{
key |= 1 << 5;
}
m_map_hw10[key](v, count, m_min, m_max);
m_map_hw10[Hash(PRIM, context)](v, count, m_min, m_max);
GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);

View File

@ -22,6 +22,7 @@
#pragma once
#include "GSDrawingContext.h"
#include "GSVertex.h"
#include "GSVertexSW.h"
#include "GSVertexHW.h"
#include "GSFunctionMap.h"
@ -82,7 +83,10 @@ __declspec(align(16)) class GSVertexTrace
GSVertexTraceMapHW9 m_map_hw9;
GSVertexTraceMapHW10 m_map_hw10;
uint32 Hash(const GIFRegPRIM* PRIM, const GSDrawingContext* context);
public:
GS_PRIM_CLASS m_primclass;
Vertex m_min, m_max; // t.xy * 0x10000
VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
@ -93,7 +97,8 @@ public:
struct {uint32 rgba:16, xyzf:4, stq:4;};
} m_eq;
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexHW10* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexSW* v, int count, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexHW9* v, int count, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexHW10* v, int count, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexNull* v, int count, const GIFRegPRIM* PRIM, const GSDrawingContext* context) {}
};

View File

@ -82,7 +82,7 @@ IDB_LOGO10 BITMAP "res\\logo10.bmp"
// Dialog
//
IDD_CONFIG DIALOGEX 0, 0, 189, 253
IDD_CONFIG DIALOGEX 0, 0, 189, 247
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "Settings..."
FONT 8, "MS Shell Dlg", 400, 0, 0x1
@ -110,12 +110,12 @@ BEGIN
CONTROL "Logarithmic Z",IDC_LOGZ,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,153,58,10
CONTROL "Allow 8-bit textures",IDC_PALTEX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,165,82,10
CONTROL "Alpha correction (FBA)",IDC_FBA,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,166,93,10
CONTROL "Wait VSync",IDC_VSYNC,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,180,51,10
CONTROL "Windowed",IDC_WINDOWED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,180,93,10
CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_AA1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,194,141,10
CONTROL "Enable output merger blur effect",IDC_BLUR,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,208,121,10
DEFPUSHBUTTON "OK",IDOK,43,232,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,232,50,14
CONTROL "Wait VSync",IDC_VSYNC,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,179,51,10
CONTROL "Windowed",IDC_WINDOWED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,89,179,93,10
CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_AA1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,141,10
CONTROL "Enable output merger blur effect",IDC_BLUR,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,207,121,10
DEFPUSHBUTTON "OK",IDOK,43,226,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,226,50,14
END
IDD_CAPTURE DIALOGEX 0, 0, 279, 71
@ -178,7 +178,7 @@ BEGIN
VERTGUIDE, 89
VERTGUIDE, 182
TOPMARGIN, 7
BOTTOMMARGIN, 246
BOTTOMMARGIN, 240
HORZGUIDE, 49
END

View File

@ -33,7 +33,7 @@
#define PS_CLR1 0
#define PS_FBA 0
#define PS_AOUT 0
#define PS_LTF 0
#define PS_LTF 1
#endif
struct VS_INPUT
@ -100,6 +100,15 @@ float4 sample_p(float u)
return Palette.Sample(PaletteSampler, u);
}
#if SHADER_MODEL >= 0x401
float4 gather_c(float2 uv)
{
return Texture.Gather(TextureSampler, uv, int2(0, 0));
}
#endif
#elif SHADER_MODEL <= 0x300
#ifndef VS_BPPZ
@ -183,6 +192,7 @@ float4 wrapuv(float4 uv)
{
if(PS_WMS == PS_WMT)
{
/*
if(PS_WMS == 0)
{
uv = frac(uv);
@ -191,7 +201,9 @@ float4 wrapuv(float4 uv)
{
uv = saturate(uv);
}
else if(PS_WMS == 2)
else
*/
if(PS_WMS == 2)
{
uv = clamp(uv, MinMax.xyxy, MinMax.zwzw);
}
@ -209,6 +221,7 @@ float4 wrapuv(float4 uv)
}
else
{
/*
if(PS_WMS == 0)
{
uv.xz = frac(uv.xz);
@ -217,7 +230,9 @@ float4 wrapuv(float4 uv)
{
uv.xz = saturate(uv.xz);
}
else if(PS_WMS == 2)
else
*/
if(PS_WMS == 2)
{
uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
}
@ -230,7 +245,7 @@ float4 wrapuv(float4 uv)
uv.z = tex1D(UMSKFIX, uv.z);
#endif
}
/*
if(PS_WMT == 0)
{
uv.yw = frac(uv.yw);
@ -239,7 +254,9 @@ float4 wrapuv(float4 uv)
{
uv.yw = saturate(uv.yw);
}
else if(PS_WMT == 2)
else
*/
if(PS_WMT == 2)
{
uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
}
@ -291,10 +308,18 @@ float4 sample_4a(float4 uv)
{
float4 c;
#if SHADER_MODEL >= 0x401 && PS_LTF && PS_WMS < 2 && PS_WMT < 2
c = gather_c(uv.xy);
#else
c.x = sample_c(uv.xy).a;
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;
#endif
#if SHADER_MODEL <= 0x300
if(PS_RT) c *= 128.0f / 255;

View File

@ -18,9 +18,9 @@
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="JITProfiling.lib d3d11_beta.lib d3dx11.lib d3d10.lib d3dx10.lib d3d9.lib d3dx9.lib ddraw.lib dxguid.lib winmm.lib strmiids.lib xinput.lib cg.lib cgGL.lib glut32.lib glew32.lib"
AdditionalDependencies="JITProfiling.lib d3d11_beta.lib d3dx11.lib d3d10_1.lib d3dx10.lib d3d9.lib d3dx9.lib ddraw.lib dxguid.lib winmm.lib strmiids.lib xinput.lib cg.lib cgGL.lib glut32.lib glew32.lib"
AdditionalLibraryDirectories="./vtune"
DelayLoadDLLs="d3d9.dll;d3dx9_41.dll;d3d10.dll;d3dx10_41.dll;d3d11.dll;d3d11_beta.dll;d3dx11_41.dll;cg.dll;cgGL.dll;glut32.dll"
DelayLoadDLLs="d3d9.dll;d3dx9_41.dll;d3d10.dll;d3d10_1.dll;d3dx10_41.dll;d3d11.dll;d3d11_beta.dll;d3dx11_41.dll;cg.dll;cgGL.dll;glut32.dll"
GenerateDebugInformation="true"
SubSystem="2"
RandomizedBaseAddress="1"