diff --git a/plugins/GSdx/GPURendererSW.cpp b/plugins/GSdx/GPURendererSW.cpp index 3a40e3988f..7d885eb489 100644 --- a/plugins/GSdx/GPURendererSW.cpp +++ b/plugins/GSdx/GPURendererSW.cpp @@ -114,22 +114,26 @@ void GPURendererSW::Draw() gd.vm = m_mem.GetPixelAddress(0, 0); - data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); - memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); - data->count = m_count; - - data->frame = m_perfmon.GetFrame(); - data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x; data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y; data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth()); data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight()); + + data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); + data->vertex = (GSVertexSW*)data->buff; + data->vertex_count = m_count; + + memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count); + + data->frame = m_perfmon.GetFrame(); + + int prims = 0; switch(env.PRIM.TYPE) { - case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; break; - case GPU_LINE: data->primclass = GS_LINE_CLASS; break; - case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; break; + case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break; + case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break; + case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break; default: __assume(0); } @@ -138,9 +142,9 @@ void GPURendererSW::Draw() GSVector4 tl(+1e10f); GSVector4 br(-1e10f); - GSVertexSW* v = data->vertices; + GSVertexSW* v = data->vertex; - for(int i = 0, j = m_count; i < j; i++) + for(int i = 0, j = data->vertex_count; i < j; i++) { GSVector4 p = v[i].p; @@ -163,9 +167,9 @@ void GPURendererSW::Draw() m_rl->Sync(); - // TODO: m_perfmon.Put(GSPerfMon::Draw, 1); - // TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims); - // TODO: m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); + m_perfmon.Put(GSPerfMon::Draw, 1); + m_perfmon.Put(GSPerfMon::Prim, prims); + m_perfmon.Put(GSPerfMon::Fillrate, data->pixels); } void GPURendererSW::VertexKick() diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index a44d987b18..a71fd4e461 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -735,8 +735,6 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); - vector buff; - if(FILE* fp = fopen(lpszCmdLine, "rb")) { Console console("GSdx", true); @@ -769,10 +767,127 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) GSvsync(1); + struct Packet {uint8 type, param; uint32 size, addr; vector buff;}; + + list packets; + vector buff; + int type; + + while((type = fgetc(fp)) != EOF) + { + Packet* p = new Packet(); + + p->type = (uint8)type; + + switch(type) + { + case 0: + + p->param = (uint8)fgetc(fp); + + fread(&p->size, 4, 1, fp); + + switch(p->param) + { + case 0: + p->buff.resize(0x4000); + p->addr = 0x4000 - p->size; + fread(&p->buff[p->addr], p->size, 1, fp); + break; + case 1: + case 2: + case 3: + p->buff.resize(p->size); + fread(&p->buff[0], p->size, 1, fp); + break; + } + + break; + + case 1: + + p->param = (uint8)fgetc(fp); + + break; + + case 2: + + fread(&p->size, 4, 1, fp); + + break; + + case 3: + + p->buff.resize(0x2000); + + fread(&p->buff[0], 0x2000, 1, fp); + + break; + } + + packets.push_back(p); + } + Sleep(100); + while(IsWindowVisible(hWnd)) + { + for(list::iterator i = packets.begin(); i != packets.end(); i++) + { + Packet* p = *i; + + switch(p->type) + { + case 0: + + switch(p->param) + { + case 0: GSgifTransfer1(&p->buff[0], p->addr); break; + case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break; + case 2: GSgifTransfer3(&p->buff[0], p->size / 16); break; + case 3: GSgifTransfer(&p->buff[0], p->size / 16); break; + } + + break; + + case 1: + + GSvsync(p->param); + + break; + + case 2: + + if(buff.size() < p->size) buff.resize(p->size); + + GSreadFIFO2(&buff[0], p->size / 16); + + break; + + case 3: + + memcpy(regs, &p->buff[0], 0x2000); + + break; + } + } + } + + for(list::iterator i = packets.begin(); i != packets.end(); i++) + { + delete *i; + } + + packets.clear(); + + Sleep(100); + + + /* bool exit = false; + int round = 0; + while(!exit) { uint32 index; @@ -786,6 +901,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) case EOF: fseek(fp, start, 0); exit = !IsWindowVisible(hWnd); + //exit = ++round == 60; break; case 0: @@ -838,6 +954,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) break; } } + */ GSclose(); GSshutdown(); diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index ac32878bef..a5c86189b9 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -638,8 +638,8 @@ REG64_(GIFReg, FINISH) REG_END REG64_(GIFReg, FOG) - uint8 _PAD1[4+3]; - uint8 F:8; + uint8 _PAD1[7]; + uint8 F; REG_END REG64_(GIFReg, FOGCOL) @@ -1030,7 +1030,9 @@ REG128_(GIFPacked, XYZF2) uint32 _PAD6:3; uint32 ADC:1; uint32 _PAD7:16; -REG_END +REG_END2 + uint32 Skip() const {return u32[3] & 0x8000;} +REG_END2 REG128_(GIFPacked, XYZ2) uint16 X; @@ -1041,7 +1043,9 @@ REG128_(GIFPacked, XYZ2) uint32 _PAD3:15; uint32 ADC:1; uint32 _PAD4:16; -REG_END +REG_END2 + uint32 Skip() const {return u32[3] & 0x8000;} +REG_END2 REG128_(GIFPacked, FOG) uint32 _PAD1; diff --git a/plugins/GSdx/GSDevice.cpp b/plugins/GSdx/GSDevice.cpp index a8735861c6..8dbb1c881e 100644 --- a/plugins/GSdx/GSDevice.cpp +++ b/plugins/GSdx/GSDevice.cpp @@ -35,7 +35,8 @@ GSDevice::GSDevice() , m_1x1(NULL) , m_frame(0) { - memset(&m_vertices, 0, sizeof(m_vertices)); + memset(&m_vertex, 0, sizeof(m_vertex)); + memset(&m_index, 0, sizeof(m_index)); } GSDevice::~GSDevice() @@ -135,8 +136,10 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, bool msaa, int format) void GSDevice::EndScene() { - m_vertices.start += m_vertices.count; - m_vertices.count = 0; + m_vertex.start += m_vertex.count; + m_vertex.count = 0; + m_index.start += m_index.count; + m_index.count = 0; } void GSDevice::Recycle(GSTexture* t) diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index e0fed6ef0b..8ed3d884f0 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -72,7 +72,8 @@ protected: GSTexture* m_fxaa; GSTexture* m_1x1; GSTexture* m_current; - struct {size_t stride, start, count, limit;} m_vertices; + struct {size_t stride, start, count, limit;} m_vertex; + struct {size_t start, count, limit;} m_index; unsigned int m_frame; // for ageing the pool virtual GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format) = 0; @@ -101,6 +102,7 @@ public: virtual void BeginScene() {} virtual void DrawPrimitive() {}; + virtual void DrawIndexedPrimitive() {} virtual void EndScene(); virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {} diff --git a/plugins/GSdx/GSDevice11.cpp b/plugins/GSdx/GSDevice11.cpp index 58190b357a..ddca54752e 100644 --- a/plugins/GSdx/GSDevice11.cpp +++ b/plugins/GSdx/GSDevice11.cpp @@ -352,7 +352,12 @@ void GSDevice11::Flip() void GSDevice11::DrawPrimitive() { - m_ctx->Draw(m_vertices.count, m_vertices.start); + m_ctx->Draw(m_vertex.count, m_vertex.start); +} + +void GSDevice11::DrawIndexedPrimitive() +{ + m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start); } void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c) @@ -709,18 +714,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert } } -void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t count) +void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) { - ASSERT(m_vertices.count == 0); + ASSERT(m_vertex.count == 0); - if(count * stride > m_vertices.limit * m_vertices.stride) + if(count * stride > m_vertex.limit * m_vertex.stride) { m_vb_old = m_vb; m_vb = NULL; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = std::max(count * 3 / 2, 11000); + m_vertex.start = 0; + m_vertex.count = 0; + m_vertex.limit = std::max(count * 3 / 2, 11000); } if(m_vb == NULL) @@ -730,7 +735,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c memset(&bd, 0, sizeof(bd)); bd.Usage = D3D11_USAGE_DYNAMIC; - bd.ByteWidth = m_vertices.limit * stride; + bd.ByteWidth = m_vertex.limit * stride; bd.BindFlags = D3D11_BIND_VERTEX_BUFFER; bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -743,9 +748,9 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; - if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride) + if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride) { - m_vertices.start = 0; + m_vertex.start = 0; type = D3D11_MAP_WRITE_DISCARD; } @@ -754,13 +759,13 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m))) { - GSVector4i::storent((uint8*)m.pData + m_vertices.start * stride, vertices, count * stride); + GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride); m_ctx->Unmap(m_vb, 0); } - m_vertices.count = count; - m_vertices.stride = stride; + m_vertex.count = count; + m_vertex.stride = stride; IASetVertexBuffer(m_vb, stride); } @@ -779,6 +784,70 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) } } +void GSDevice11::IASetIndexBuffer(const void* index, size_t count) +{ + ASSERT(m_index.count == 0); + + if(count > m_index.limit) + { + m_ib_old = m_ib; + m_ib = NULL; + + m_index.count = 0; + m_index.limit = std::max(count * 3 / 2, 11000); + } + + if(m_ib == NULL) + { + D3D11_BUFFER_DESC bd; + + memset(&bd, 0, sizeof(bd)); + + bd.Usage = D3D11_USAGE_DYNAMIC; + bd.ByteWidth = m_index.limit * sizeof(uint32); + bd.BindFlags = D3D11_BIND_INDEX_BUFFER; + bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + HRESULT hr; + + hr = m_dev->CreateBuffer(&bd, NULL, &m_ib); + + if(FAILED(hr)) return; + } + + D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; + + if(m_index.start + count > m_index.limit) + { + m_index.start = 0; + + type = D3D11_MAP_WRITE_DISCARD; + } + + D3D11_MAPPED_SUBRESOURCE m; + + if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m))) + { + memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32)); + + m_ctx->Unmap(m_ib, 0); + } + + m_index.count = count; + + IASetIndexBuffer(m_ib); +} + +void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib) +{ + if(m_state.ib != ib) + { + m_state.ib = ib; + + m_ctx->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0); + } +} + void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) { if(m_state.layout != layout) diff --git a/plugins/GSdx/GSDevice11.h b/plugins/GSdx/GSDevice11.h index b22e668aec..a370a24b0a 100644 --- a/plugins/GSdx/GSDevice11.h +++ b/plugins/GSdx/GSDevice11.h @@ -45,6 +45,8 @@ class GSDevice11 : public GSDeviceDX CComPtr m_swapchain; CComPtr m_vb; CComPtr m_vb_old; + CComPtr m_ib; + CComPtr m_ib_old; bool m_srv_changed, m_ss_changed; @@ -52,6 +54,7 @@ class GSDevice11 : public GSDeviceDX { ID3D11Buffer* vb; size_t vb_stride; + ID3D11Buffer* ib; ID3D11InputLayout* layout; D3D11_PRIMITIVE_TOPOLOGY topology; ID3D11VertexShader* vs; @@ -141,6 +144,7 @@ public: void SetExclusive(bool isExcl); void DrawPrimitive(); + void DrawIndexedPrimitive(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); @@ -162,8 +166,10 @@ public: void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true); - void IASetVertexBuffer(const void* vertices, size_t stride, size_t count); + void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride); + void IASetIndexBuffer(const void* index, size_t count); + void IASetIndexBuffer(ID3D11Buffer* ib); void IASetInputLayout(ID3D11InputLayout* layout); void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology); void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb); @@ -176,7 +182,7 @@ public: void OMSetBlendState(ID3D11BlendState* bs, float bf); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); - void SetupIA(const void* vertices, int count, int prim); + void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupGS(GSSelector sel); void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 855df86537..3a8c875f50 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -352,8 +352,10 @@ bool GSDevice9::Reset(int w, int h) m_vb = NULL; m_vb_old = NULL; - m_vertices.start = 0; - m_vertices.count = 0; + m_vertex.start = 0; + m_vertex.count = 0; + m_index.start = 0; + m_index.count = 0; if(m_state.vs_cb) _aligned_free(m_state.vs_cb); if(m_state.ps_cb) _aligned_free(m_state.ps_cb); @@ -510,25 +512,52 @@ void GSDevice9::DrawPrimitive() switch(m_state.topology) { - case D3DPT_TRIANGLELIST: - prims = m_vertices.count / 3; + case D3DPT_POINTLIST: + prims = m_vertex.count; break; case D3DPT_LINELIST: - prims = m_vertices.count / 2; + prims = m_vertex.count / 2; break; - case D3DPT_POINTLIST: - prims = m_vertices.count; + case D3DPT_LINESTRIP: + prims = m_vertex.count - 1; + break; + case D3DPT_TRIANGLELIST: + prims = m_vertex.count / 3; break; case D3DPT_TRIANGLESTRIP: case D3DPT_TRIANGLEFAN: - prims = m_vertices.count - 2; - break; - case D3DPT_LINESTRIP: - prims = m_vertices.count - 1; + prims = m_vertex.count - 2; break; + default: + __assume(0); } - m_dev->DrawPrimitive(m_state.topology, m_vertices.start, prims); + m_dev->DrawPrimitive(m_state.topology, m_vertex.start, prims); +} + +void GSDevice9::DrawIndexedPrimitive() +{ + int prims = 0; + + switch(m_state.topology) + { + case D3DPT_POINTLIST: + prims = m_index.count; + break; + case D3DPT_LINELIST: + case D3DPT_LINESTRIP: + prims = m_index.count / 2; + break; + case D3DPT_TRIANGLELIST: + case D3DPT_TRIANGLESTRIP: + case D3DPT_TRIANGLEFAN: + prims = m_index.count / 3; + break; + default: + __assume(0); + } + + m_dev->DrawIndexedPrimitive(m_state.topology, m_vertex.start, 0, m_index.count, m_index.start, prims); } void GSDevice9::EndScene() @@ -881,49 +910,49 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti } } -void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t count) +void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) { - ASSERT(m_vertices.count == 0); + ASSERT(m_vertex.count == 0); - if(count * stride > m_vertices.limit * m_vertices.stride) + if(count * stride > m_vertex.limit * m_vertex.stride) { m_vb_old = m_vb; m_vb = NULL; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = std::max(count * 3 / 2, 10000); + m_vertex.start = 0; + m_vertex.count = 0; + m_vertex.limit = std::max(count * 3 / 2, 10000); } if(m_vb == NULL) { HRESULT hr; - hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL); + hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL); if(FAILED(hr)) return; } uint32 flags = D3DLOCK_NOOVERWRITE; - if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride) + if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride) { - m_vertices.start = 0; + m_vertex.start = 0; flags = D3DLOCK_DISCARD; } - void* v = NULL; + void* ptr = NULL; - if(SUCCEEDED(m_vb->Lock(m_vertices.start * stride, count * stride, &v, flags))) + if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags))) { - GSVector4i::storent(v, vertices, count * stride); + GSVector4i::storent(ptr, vertex, count * stride); m_vb->Unlock(); } - m_vertices.count = count; - m_vertices.stride = stride; + m_vertex.count = count; + m_vertex.stride = stride; IASetVertexBuffer(m_vb, stride); } @@ -939,6 +968,61 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) } } +void GSDevice9::IASetIndexBuffer(const void* index, size_t count) +{ + ASSERT(m_index.count == 0); + + if(count > m_index.limit) + { + m_ib_old = m_ib; + m_ib = NULL; + + m_index.count = 0; + m_index.limit = std::max(count * 3 / 2, 11000); + } + + if(m_ib == NULL) + { + HRESULT hr; + + hr = m_dev->CreateIndexBuffer(m_index.limit * sizeof(uint32), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX32, D3DPOOL_DEFAULT, &m_ib, NULL); + + if(FAILED(hr)) return; + } + + uint32 flags = D3DLOCK_NOOVERWRITE; + + if(m_index.start + count > m_index.limit) + { + m_index.start = 0; + + flags = D3DLOCK_DISCARD; + } + + void* ptr = NULL; + + if(SUCCEEDED(m_ib->Lock(m_index.start * sizeof(uint32), count * sizeof(uint32), &ptr, flags))) + { + memcpy(ptr, index, count * sizeof(uint32)); + + m_ib->Unlock(); + } + + m_index.count = count; + + IASetIndexBuffer(m_ib); +} + +void GSDevice9::IASetIndexBuffer(IDirect3DIndexBuffer9* ib) +{ + if(m_state.ib != ib) + { + m_state.ib = ib; + + m_dev->SetIndices(ib); + } +} + void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout) { if(m_state.layout != layout) diff --git a/plugins/GSdx/GSDevice9.h b/plugins/GSdx/GSDevice9.h index 41a28c9f6a..3e46c94214 100644 --- a/plugins/GSdx/GSDevice9.h +++ b/plugins/GSdx/GSDevice9.h @@ -82,6 +82,8 @@ class GSDevice9 : public GSDeviceDX CComPtr m_swapchain; CComPtr m_vb; CComPtr m_vb_old; + CComPtr m_ib; + CComPtr m_ib_old; bool m_lost; D3DFORMAT m_depth_format; @@ -89,6 +91,7 @@ class GSDevice9 : public GSDeviceDX { IDirect3DVertexBuffer9* vb; size_t vb_stride; + IDirect3DIndexBuffer9* ib; IDirect3DVertexDeclaration9* layout; D3DPRIMITIVETYPE topology; IDirect3DVertexShader9* vs; @@ -169,6 +172,7 @@ public: void BeginScene(); void DrawPrimitive(); + void DrawIndexedPrimitive(); void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); @@ -191,8 +195,10 @@ public: void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true); void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true); - void IASetVertexBuffer(const void* vertices, size_t stride, size_t count); + void IASetVertexBuffer(const void* vertex, size_t stride, size_t count); void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride); + void IASetIndexBuffer(const void* index, size_t count); + void IASetIndexBuffer(IDirect3DIndexBuffer9* ib); void IASetInputLayout(IDirect3DVertexDeclaration9* layout); void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology); void VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len); @@ -210,7 +216,7 @@ public: HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il); HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps); - void SetupIA(const void* vertices, int count, int prim); + void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); void SetupVS(VSSelector sel, const VSConstantBuffer* cb); void SetupGS(GSSelector sel) {} void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); diff --git a/plugins/GSdx/GSDeviceDX.h b/plugins/GSdx/GSDeviceDX.h index 74d033f879..6de181fa01 100644 --- a/plugins/GSdx/GSDeviceDX.h +++ b/plugins/GSdx/GSDeviceDX.h @@ -278,7 +278,7 @@ public: bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode); - virtual void SetupIA(const void* vertices, int count, int prim) = 0; + virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0; virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0; virtual void SetupGS(GSSelector sel) = 0; virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0; diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index ce87ed8b81..7e8c26e2e4 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -87,7 +87,7 @@ void GSDrawScanline::BeginDraw(const void* param) sel.tcc = m_global.sel.tcc; sel.fst = m_global.sel.fst; sel.fge = m_global.sel.fge; - sel.sprite = m_global.sel.sprite; + sel.prim = m_global.sel.prim; sel.fb = m_global.sel.fb; sel.zb = m_global.sel.zb; sel.zoverflow = m_global.sel.zoverflow; @@ -102,7 +102,7 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels) #ifndef ENABLE_JIT_RASTERIZER -void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) +void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) { GSScanlineSelector sel = m_global.sel; @@ -115,7 +115,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc if(has_z || has_f) { - if(!sel.sprite) + if(sel.prim != GS_SPRITE_CLASS) { if(has_f) { @@ -145,12 +145,12 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc { if(has_f) { - m_local.p.f = GSVector4i(vertices[0].p).zzzzh().zzzz(); + m_local.p.f = GSVector4i(vertex.p).zzzzh().zzzz(); } if(has_z) { - m_local.p.z = vertices[0].t.u32[3]; // uint32 z is bypassed in t.w + m_local.p.z = vertex.t.u32[3]; // uint32 z is bypassed in t.w } } } @@ -234,7 +234,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc } else { - GSVector4i c = GSVector4i(vertices[0].c); + GSVector4i c = GSVector4i(vertex.c); c = c.upl16(c.zwxy()); @@ -271,7 +271,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))]; - if(!sel.sprite) + if(sel.prim != GS_SPRITE_CLASS) { if(sel.fwrite && sel.fge) { @@ -300,7 +300,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s); GSVector4i v = vt.yyyy(); - if(!sel.sprite || sel.mmin) + if(sel.prim != GS_SPRITE_CLASS || sel.mmin) { v += GSVector4i::cast(m_local.d[skip].t); } @@ -354,7 +354,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { za = fza_base->y + fza_offset->y; - if(!sel.sprite) + if(sel.prim != GS_SPRITE_CLASS) { GSVector4 z = scan.p.zzzz() + zo; @@ -754,7 +754,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { uf = u.xxzzlh().srl16(1); - if(!sel.sprite) + if(sel.prim != GS_SPRITE_CLASS) { vf = v.xxzzlh().srl16(1); } @@ -936,7 +936,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS if(sel.fwrite && sel.fge) { - GSVector4i fog = !sel.sprite ? f : m_local.p.f; + GSVector4i fog = sel.prim != GS_SPRITE_CLASS ? f : m_local.p.f; rb = m_global.frb.lerp16<0>(rb, fog); ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga); @@ -1211,7 +1211,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS fza_offset++; - if(!sel.sprite) + if(sel.prim != GS_SPRITE_CLASS) { if(sel.zb) { @@ -1234,7 +1234,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx()); - if(!sel.sprite || sel.mmin) + if(sel.prim != GS_SPRITE_CLASS || sel.mmin) { t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy()); } diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index e083020cda..b67d9dfd30 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init() lea(edi, ptr[ebx * 2]); add(edi, ptr[&m_local.gd->fzbc]); - if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) + if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_local.d[skip] @@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init() mov(ebx, ptr[esp + _v]); } - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { if(m_sel.fwrite && m_sel.fge || m_sel.zb) { @@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init() vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - if(!m_sel.sprite || m_sel.mmin) + if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); } @@ -455,7 +455,7 @@ void GSDrawScanlineCodeGenerator::Step() add(edi, 8); - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { // z += m_local.d4.z; @@ -501,7 +501,7 @@ void GSDrawScanlineCodeGenerator::Step() vpaddd(xmm2, ptr[&m_local.temp.s]); vmovdqa(ptr[&m_local.temp.s], xmm2); - if(!m_sel.sprite || m_sel.mmin) + if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); vpaddd(xmm3, ptr[&m_local.temp.t]); @@ -597,7 +597,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) // GSVector4i zs = zi; - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { if(m_sel.zoverflow) { @@ -733,7 +733,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpsrlw(xmm0, 1); vmovdqa(ptr[&m_local.temp.uf], xmm0); - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { // GSVector4i vf = v.xxzzlh().srl16(1); @@ -2227,7 +2227,7 @@ void GSDrawScanlineCodeGenerator::Fog() // rb = m_local.gd->frb.lerp16<0>(rb, f); // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - vmovdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]); + vmovdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]); vmovdqa(xmm1, xmm6); vmovdqa(xmm2, ptr[&m_local.gd->frb]); @@ -2350,7 +2350,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf() bool fast = m_sel.ztest && m_sel.zpsm < 2; - vmovdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]); + vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]); if(fast) { diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index bc0c24ba2c..aad049e47b 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init() lea(edi, ptr[ebx * 2]); add(edi, ptr[&m_local.gd->fzbc]); - if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) + if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_local.d[skip] @@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init() mov(ebx, ptr[esp + _v]); } - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { if(m_sel.fwrite && m_sel.fge || m_sel.zb) { @@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init() paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - if(!m_sel.sprite || m_sel.mmin) + if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); } @@ -458,7 +458,7 @@ void GSDrawScanlineCodeGenerator::Step() add(edi, 8); - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { // z += m_local.d4.z; @@ -504,7 +504,7 @@ void GSDrawScanlineCodeGenerator::Step() paddd(xmm2, ptr[&m_local.temp.s]); movdqa(ptr[&m_local.temp.s], xmm2); - if(!m_sel.sprite || m_sel.mmin) + if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); paddd(xmm3, ptr[&m_local.temp.t]); @@ -602,7 +602,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2) // GSVector4i zs = zi; - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { if(m_sel.zoverflow) { @@ -738,7 +738,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() psrlw(xmm0, 1); movdqa(ptr[&m_local.temp.uf], xmm0); - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { // GSVector4i vf = v.xxzzlh().srl16(1); @@ -2341,7 +2341,7 @@ void GSDrawScanlineCodeGenerator::Fog() // rb = m_local.gd->frb.lerp16<0>(rb, f); // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - movdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]); + movdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]); movdqa(xmm1, xmm6); movdqa(xmm2, ptr[&m_local.gd->frb]); @@ -2464,7 +2464,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf() bool fast = m_sel.ztest && m_sel.zpsm < 2; - movdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]); + movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]); if(fast) { diff --git a/plugins/GSdx/GSDump.cpp b/plugins/GSdx/GSDump.cpp index 29d93009a5..f7c52cdb64 100644 --- a/plugins/GSdx/GSDump.cpp +++ b/plugins/GSdx/GSDump.cpp @@ -24,10 +24,7 @@ GSDump::GSDump() : m_gs(NULL) - , m_obj(NULL) , m_frames(0) - , m_objects(0) - , m_vertices(0) { } @@ -39,11 +36,8 @@ GSDump::~GSDump() void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs) { m_gs = fopen((fn + ".gs").c_str(), "wb"); - m_obj = fopen((fn + ".obj").c_str(), "wt"); m_frames = 0; - m_objects = 0; - m_vertices = 0; if(m_gs) { @@ -57,7 +51,6 @@ void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GS void GSDump::Close() { if(m_gs) {fclose(m_gs); m_gs = NULL;} - if(m_obj) {fclose(m_obj); m_obj = NULL;} } void GSDump::Transfer(int index, const uint8* mem, size_t size) @@ -96,67 +89,3 @@ void GSDump::VSync(int field, bool last, const GSPrivRegSet* regs) } } } - -void GSDump::Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass) -{ - if(m_obj) - { - switch(primclass) - { - case GS_POINT_CLASS: - - // TODO - - break; - - case GS_LINE_CLASS: - - // TODO - - break; - - case GS_TRIANGLE_CLASS: - - for(int i = 0; i < count; i++) - { - float x = vertices[i].p.x; - float y = vertices[i].p.y; - float z = vertices[i].p.z; - - fprintf(m_obj, "v %f %f %f\n", x, y, z); - } - - for(int i = 0; i < count; i++) - { - fprintf(m_obj, "vt %f %f %f\n", vertices[i].t.x, vertices[i].t.y, vertices[i].t.z); - } - - for(int i = 0; i < count; i++) - { - fprintf(m_obj, "vn %f %f %f\n", 0.0f, 0.0f, 0.0f); - } - - fprintf(m_obj, "g f%d_o%d_p%d_v%d\n", m_frames, m_objects, primclass, count); - - for(int i = 0; i < count; i += 3) - { - int a = m_vertices + i + 1; - int b = m_vertices + i + 2; - int c = m_vertices + i + 3; - - fprintf(m_obj, "f %d/%d/%d %d/%d/%d %d/%d/%d\n", a, a, a, b, b, b, c, c, c); - } - - m_vertices += count; - m_objects++; - - break; - - case GS_SPRITE_CLASS: - - // TODO - - break; - } - } -} diff --git a/plugins/GSdx/GSDump.h b/plugins/GSdx/GSDump.h index 57059da247..3fea0c1050 100644 --- a/plugins/GSdx/GSDump.h +++ b/plugins/GSdx/GSDump.h @@ -46,10 +46,7 @@ Regs data (id == 3) class GSDump { FILE* m_gs; - FILE* m_obj; int m_frames; - int m_objects; - int m_vertices; public: GSDump(); @@ -60,6 +57,5 @@ public: void ReadFIFO(uint32 size); void Transfer(int index, const uint8* mem, size_t size); void VSync(int field, bool last, const GSPrivRegSet* regs); - void Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass); operator bool() {return m_gs != NULL;} }; diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index 9ae7090b74..99ad2207d0 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -1992,7 +1992,7 @@ GSOffset::~GSOffset() { } -vector* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox) +uint32* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox) { GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; @@ -2000,23 +2000,37 @@ vector* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox) if(bbox != NULL) *bbox = r; - vector* pages = new vector(); + // worst case: + // bp page-aligned: (w * h) / (64 * 32) + // bp block-aligned: (w * h) / (8 * 8) - // 32-bpp worst case: (w * h) / (64 * 32), it can be a bit more if we are only block-aligned (bp & 31) != 0 + int size = r.width() * r.height(); - pages->reserve(((r.width() * r.height()) >> 11) + 2); + int limit = std::min((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1; - uint32 tmp[16]; + uint32* pages = new uint32[limit]; - memset(tmp, 0, sizeof(tmp)); + __aligned(uint32, 16) tmp[16]; + ((GSVector4i*)tmp)[0] = GSVector4i::zero(); + ((GSVector4i*)tmp)[1] = GSVector4i::zero(); + ((GSVector4i*)tmp)[2] = GSVector4i::zero(); + ((GSVector4i*)tmp)[3] = GSVector4i::zero(); + + r = r.sra32(3); + + bs.x >>= 3; + bs.y >>= 3; + + uint32* RESTRICT p = pages; + for(int y = r.top; y < r.bottom; y += bs.y) { - uint32 base = block.row[y >> 3]; + uint32 base = block.row[y]; for(int x = r.left; x < r.right; x += bs.x) { - uint32 n = (base + block.col[x >> 3]) >> 5; + uint32 n = (base + block.col[x]) >> 5; if(n < MAX_PAGES) { @@ -2027,11 +2041,15 @@ vector* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox) { row |= col; - pages->push_back(n); + *p++ = n; } } } } + *p++ = EOP; + + ASSERT(p - pages <= limit); + return pages; } diff --git a/plugins/GSdx/GSLocalMemory.h b/plugins/GSdx/GSLocalMemory.h index d5b0478e7d..03e3e838dd 100644 --- a/plugins/GSdx/GSLocalMemory.h +++ b/plugins/GSdx/GSLocalMemory.h @@ -51,7 +51,9 @@ public: GSOffset(uint32 bp, uint32 bw, uint32 psm); virtual ~GSOffset(); - vector* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL); + enum {EOP = 0xffffffff}; + + uint32* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL); }; struct GSPixelOffset4 diff --git a/plugins/GSdx/GSPerfMon.h b/plugins/GSdx/GSPerfMon.h index 907af994bf..740c3baeef 100644 --- a/plugins/GSdx/GSPerfMon.h +++ b/plugins/GSdx/GSPerfMon.h @@ -35,7 +35,7 @@ public: enum counter_t { - Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad, + Frame, Prim, PrimNotRendered, Draw, Swizzle, Unswizzle, Fillrate, Quad, CounterLast, }; diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 4b71945906..a873f3658f 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -105,12 +105,17 @@ void GSRasterizer::Draw(shared_ptr data) { GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id); - if(data->count == 0) return; + if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return; m_ds->BeginDraw(data->param); - const GSVertexSW* vertices = data->vertices; - const GSVertexSW* vertices_end = data->vertices + data->count; + const GSVertexSW* vertex = data->vertex; + const GSVertexSW* vertex_end = data->vertex + data->vertex_count; + + const uint32* index = data->index; + const uint32* index_end = data->index + data->index_count; + + uint32 tmp_index[] = {0, 1, 2}; bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor)); @@ -128,33 +133,57 @@ void GSRasterizer::Draw(shared_ptr data) if(scissor_test) { - DrawPoint(vertices, data->count); + DrawPoint(vertex, data->vertex_count, index, data->index_count); } else { - DrawPoint(vertices, data->count); + DrawPoint(vertex, data->vertex_count, index, data->index_count); } break; case GS_LINE_CLASS: - do {DrawLine(vertices); vertices += 2;} - while(vertices < vertices_end); + if(index != NULL) + { + do {DrawLine(vertex, index); index += 2;} + while(index < index_end); + } + else + { + do {DrawLine(vertex, tmp_index); vertex += 2;} + while(vertex < vertex_end); + } break; case GS_TRIANGLE_CLASS: - do {DrawTriangle(vertices); vertices += 3;} - while(vertices < vertices_end); + if(index != NULL) + { + do {DrawTriangle(vertex, index); index += 3;} + while(index < index_end); + } + else + { + do {DrawTriangle(vertex, tmp_index); vertex += 3;} + while(vertex < vertex_end); + } break; case GS_SPRITE_CLASS: - do {DrawSprite(vertices, data->solidrect); vertices += 2;} - while(vertices < vertices_end); + if(index != NULL) + { + do {DrawSprite(vertex, index, data->solidrect); index += 2;} + while(index < index_end); + } + else + { + do {DrawSprite(vertex, tmp_index, data->solidrect); vertex += 2;} + while(vertex < vertex_end); + } break; @@ -171,11 +200,13 @@ void GSRasterizer::Draw(shared_ptr data) } template -void GSRasterizer::DrawPoint(const GSVertexSW* v, int count) +void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count) { - for(; count > 0; count--, v++) + for(int i = 0, count = index != NULL ? index_count : vertex_count; i < count; i++) { - GSVector4i p(v->p); + const GSVertexSW& v = vertex[index != NULL ? index[i] : i]; + + GSVector4i p(v.p); if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) { @@ -183,17 +214,20 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, int count) { m_pixels++; - m_ds->SetupPrim(v, *v); + m_ds->SetupPrim(v, v); - m_ds->DrawScanline(1, p.x, p.y, *v); + m_ds->DrawScanline(1, p.x, p.y, v); } } } } -void GSRasterizer::DrawLine(const GSVertexSW* v) +void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) { - GSVertexSW dv = v[1] - v[0]; + const GSVertexSW& v0 = vertex[index[0]]; + const GSVertexSW& v1 = vertex[index[1]]; + + GSVertexSW dv = v1 - v0; GSVector4 dp = dv.p.abs(); @@ -201,10 +235,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v) if(m_ds->HasEdge()) { - DrawEdge(v[0], v[1], dv, i, 0); - DrawEdge(v[0], v[1], dv, i, 1); + DrawEdge(v0, v1, dv, i, 0); + DrawEdge(v0, v1, dv, i, 1); - Flush(v, GSVertexSW::zero(), true); + Flush(v1, GSVertexSW::zero(), true); return; } @@ -217,19 +251,19 @@ void GSRasterizer::DrawLine(const GSVertexSW* v) { // shortcut for horizontal lines - GSVector4 mask = (v[0].p > v[1].p).xxxx(); + GSVector4 mask = (v0.p > v1.p).xxxx(); GSVertexSW scan; - scan.p = v[0].p.blend32(v[1].p, mask); - scan.t = v[0].t.blend32(v[1].t, mask); - scan.c = v[0].c.blend32(v[1].c, mask); + scan.p = v0.p.blend32(v1.p, mask); + scan.t = v0.t.blend32(v1.t, mask); + scan.c = v0.c.blend32(v1.c, mask); GSVector4i p(scan.p); if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y)) { - GSVector4 lrf = scan.p.upl(v[1].p.blend32(v[0].p, mask)).ceil(); + GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil(); GSVector4 l = lrf.max(m_fscissor_x); GSVector4 r = lrf.min(m_fscissor_x); GSVector4i lr = GSVector4i(l.xxyy(r)); @@ -247,7 +281,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v) scan += dscan * (l - scan.p).xxxx(); - m_ds->SetupPrim(v, dscan); + m_ds->SetupPrim(v1, dscan); m_ds->DrawScanline(pixels, left, p.y, scan); } @@ -261,7 +295,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v) if(steps > 0) { - GSVertexSW edge = v[0]; + GSVertexSW edge = v0; GSVertexSW dedge = dv / GSVector4(dp.v[i]); GSVertexSW* RESTRICT e = m_edge.buff; @@ -287,7 +321,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v) m_edge.count = e - m_edge.buff; - Flush(v, GSVertexSW::zero()); + Flush(v1, GSVertexSW::zero()); } } @@ -303,42 +337,47 @@ static const uint8 s_ysort[8][4] = {2, 1, 0, 0}, // y2 < y1 < y0 }; -void GSRasterizer::DrawTriangle(const GSVertexSW* vertices) +void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) { - GSVertexSW v[3]; GSVertexSW dv[3]; GSVertexSW edge; GSVertexSW dedge; GSVertexSW dscan; - GSVector4 y0011 = vertices[0].p.yyyy(vertices[1].p); - GSVector4 y1221 = vertices[1].p.yyyy(vertices[2].p).xzzx(); + GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p); + GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx(); - int mask = (y0011 > y1221).mask() & 7; + int m1 = (y0011 > y1221).mask() & 7; - v[0] = vertices[s_ysort[mask][0]]; - v[1] = vertices[s_ysort[mask][1]]; - v[2] = vertices[s_ysort[mask][2]]; + int i[3]; - y0011 = v[0].p.yyyy(v[1].p); - y1221 = v[1].p.yyyy(v[2].p).xzzx(); + i[0] = index[s_ysort[m1][0]]; + i[1] = index[s_ysort[m1][1]]; + i[2] = index[s_ysort[m1][2]]; - int i = (y0011 == y1221).mask() & 7; + const GSVertexSW& v0 = vertex[i[0]]; + const GSVertexSW& v1 = vertex[i[1]]; + const GSVertexSW& v2 = vertex[i[2]]; + + y0011 = v0.p.yyyy(v1.p); + y1221 = v1.p.yyyy(v2.p).xzzx(); + + m1 = (y0011 == y1221).mask() & 7; // if(i == 0) => y0 < y1 < y2 // if(i == 1) => y0 == y1 < y2 // if(i == 4) => y0 < y1 == y2 - if(i == 7) return; // y0 == y1 == y2 + if(m1 == 7) return; // y0 == y1 == y2 GSVector4 tbf = y0011.xzxz(y1221).ceil(); GSVector4 tbmax = tbf.max(m_fscissor_y); GSVector4 tbmin = tbf.min(m_fscissor_y); GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); - dv[0] = v[1] - v[0]; - dv[1] = v[2] - v[0]; - dv[2] = v[2] - v[1]; + dv[0] = v1 - v0; + dv[1] = v2 - v0; + dv[2] = v2 - v1; GSVector4 cross = dv[0].p * dv[1].p.yxwz(); @@ -346,11 +385,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices) // the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value - int j = cross.upl(cross == GSVector4::zero()).mask(); + int m2 = cross.upl(cross == GSVector4::zero()).mask(); - if(j & 2) return; + if(m2 & 2) return; - j &= 1; + m2 &= 1; cross = cross.rcpnr(); @@ -390,42 +429,42 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices) dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0 dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1 - if(i & 1) + if(m1 & 1) { if(tb.y < tb.w) { - edge = v[1 - j]; + edge = vertex[i[1 - m2]]; - edge.p = edge.p.insert<0, 1>(v[j].p); - dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p); + edge.p = edge.p.insert<0, 1>(vertex[i[m2]].p); + dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p); - DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p); + DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p); } } else { if(tb.x < tb.z) { - edge = v[0]; + edge = v0; edge.p = edge.p.xxzw(); - dedge.p = ddx[j].xyzw(dedge.p); + dedge.p = ddx[m2].xyzw(dedge.p); - DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v[0].p); + DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p); } if(tb.y < tb.w) { - edge = v[1]; + edge = v1; - edge.p = (v[0].p.xxxx() + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p); - dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p); + edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p); + dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p); - DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p); + DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p); } } - Flush(v, dscan); + Flush(vertex[index[2]], dscan); if(m_ds->HasEdge()) { @@ -433,14 +472,14 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices) GSVector4 b = dx < GSVector4::zero(); // dx < 0 GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0 - int i = a.mask(); - int j = ((a | b) ^ c).mask() ^ 2; // evil + int orientation = a.mask(); + int side = ((a | b) ^ c).mask() ^ 2; // evil - DrawEdge(v[0], v[1], dv[0], i & 1, j & 1); - DrawEdge(v[0], v[2], dv[1], i & 2, j & 2); - DrawEdge(v[1], v[2], dv[2], i & 4, j & 4); + DrawEdge(v0, v1, dv[0], orientation & 1, side & 1); + DrawEdge(v0, v2, dv[1], orientation & 2, side & 2); + DrawEdge(v1, v2, dv[2], orientation & 4, side & 4); - Flush(v, GSVertexSW::zero(), true); + Flush(vertex[index[2]], GSVertexSW::zero(), true); } } @@ -492,18 +531,21 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co m_edge.count += e - &m_edge.buff[m_edge.count]; } -void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect) +void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect) { + const GSVertexSW& v0 = vertex[index[0]]; + const GSVertexSW& v1 = vertex[index[1]]; + + GSVector4 mask = (v0.p < v1.p).xyzw(GSVector4::zero()); + GSVertexSW v[2]; - GSVector4 mask = (vertices[0].p < vertices[1].p).xyzw(GSVector4::zero()); + v[0].p = v1.p.blend32(v0.p, mask); + v[0].t = v1.t.blend32(v0.t, mask); + v[0].c = v1.c; - v[0].p = vertices[1].p.blend32(vertices[0].p, mask); - v[0].t = vertices[1].t.blend32(vertices[0].t, mask); - v[0].c = vertices[1].c; - - v[1].p = vertices[0].p.blend32(vertices[1].p, mask); - v[1].t = vertices[0].t.blend32(vertices[1].t, mask); + v[1].p = v0.p.blend32(v1.p, mask); + v[1].t = v0.t.blend32(v1.t, mask); GSVector4i r(v[0].p.xyxy(v[1].p).ceil()); @@ -515,17 +557,6 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect) if(solidrect) { - /* - if(m_id == 0) - { - m_ds->DrawRect(r, scan); - - m_pixels += r.width() * r.height(); - } - - return; - */ - if(m_threads == 1) { m_ds->DrawRect(r, scan); @@ -570,7 +601,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect) if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy(); if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx(); - m_ds->SetupPrim(v, dscan); + m_ds->SetupPrim(v1, dscan); while(1) { @@ -787,7 +818,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con e->p.i16[2] = (int16)top; } -void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge) +void GSRasterizer::Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge) { // TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline) @@ -795,7 +826,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo if(count > 0) { - m_ds->SetupPrim(vertices, dscan); + m_ds->SetupPrim(vertex, dscan); const GSVertexSW* RESTRICT e = m_edge.buff; const GSVertexSW* RESTRICT ee = e + count; @@ -882,19 +913,7 @@ void GSRasterizerList::Process(shared_ptr& item) { m_solidrect_count++; } -/* - if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch? - { - for(size_t i = 0; i < m_workers.size(); i++) - { - m_workers[i]->Wait(); - } - m_workers.front()->Process(item); - - return; - } -*/ if(item->syncpoint) { for(size_t i = 0; i < m_workers.size(); i++) diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index 810007f24a..b4df63ff8e 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -34,8 +34,11 @@ public: GSVector4i scissor; GSVector4i bbox; GS_PRIM_CLASS primclass; - GSVertexSW* vertices; - int count; + uint8* buff; + GSVertexSW* vertex; + int vertex_count; + uint32* index; + int index_count; bool solidrect; bool syncpoint; uint64 frame; @@ -50,8 +53,11 @@ public: : scissor(GSVector4i::zero()) , bbox(GSVector4i::zero()) , primclass(GS_INVALID_CLASS) - , vertices(NULL) - , count(0) + , buff(NULL) + , vertex(NULL) + , vertex_count(0) + , index(NULL) + , index_count(0) , solidrect(false) , syncpoint(false) , frame(0) @@ -63,7 +69,7 @@ public: virtual ~GSRasterizerData() { - if(vertices != NULL) _aligned_free(vertices); + if(buff != NULL) _aligned_free(buff); // derived class should free param and its members } @@ -72,7 +78,7 @@ public: class IDrawScanline : public GSAlignedClass<32> { public: - typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); + typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW& vertex, const GSVertexSW& dscan); typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan); typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit @@ -91,14 +97,14 @@ public: #ifdef ENABLE_JIT_RASTERIZER - __forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);} + __forceinline void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) {m_sp(vertex, dscan);} __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);} __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);} __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} #else - virtual void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) = 0; + virtual void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) = 0; virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0; @@ -134,17 +140,17 @@ protected: typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count); template - void DrawPoint(const GSVertexSW* v, int count); - void DrawLine(const GSVertexSW* v); - void DrawTriangle(const GSVertexSW* v); - void DrawSprite(const GSVertexSW* v, bool solidrect); + void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count); + void DrawLine(const GSVertexSW* vertex, const uint32* index); + void DrawTriangle(const GSVertexSW* vertex, const uint32* index); + void DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect); __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0); void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side); __forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan); - __forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false); + __forceinline void Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge = false); public: GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon); diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index b5d434dcf8..f04ed77929 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -22,9 +22,8 @@ #include "stdafx.h" #include "GSRenderer.h" -GSRenderer::GSRenderer() - : GSState() - , m_vt(this) +GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride) + : GSState(vt, vertex_stride) , m_dev(NULL) , m_shader(0) { @@ -78,8 +77,6 @@ bool GSRenderer::CreateDevice(GSDevice* dev) void GSRenderer::ResetDevice() { - ResetPrim(); - if(m_dev) m_dev->Reset(1, 1); } @@ -336,7 +333,7 @@ void GSRenderer::VSync(int field) theApp.m_gs_interlace[m_interlace].name.c_str(), theApp.m_gs_aspectratio[m_aspectratio].name.c_str(), (int)m_perfmon.Get(GSPerfMon::Quad), - (int)m_perfmon.Get(GSPerfMon::Prim), + (int)(m_perfmon.Get(GSPerfMon::Prim) - m_perfmon.Get(GSPerfMon::PrimNotRendered)), (int)m_perfmon.Get(GSPerfMon::Draw), m_perfmon.CPU(), m_perfmon.Get(GSPerfMon::Swizzle) / 1024, @@ -541,308 +538,3 @@ void GSRenderer::KeyEvent(GSKeyEventData* e) #endif } } - -void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear) -{ - int tw = TEX0.TW; - int th = TEX0.TH; - - int w = 1 << tw; - int h = 1 << th; - - GSVector4i tr(0, 0, w, h); - - int wms = CLAMP.WMS; - int wmt = CLAMP.WMT; - - int minu = (int)CLAMP.MINU; - int minv = (int)CLAMP.MINV; - int maxu = (int)CLAMP.MAXU; - int maxv = (int)CLAMP.MAXV; - - GSVector4i vr = tr; - - switch(wms) - { - case CLAMP_REPEAT: - break; - case CLAMP_CLAMP: - break; - case CLAMP_REGION_CLAMP: - if(vr.x < minu) vr.x = minu; - if(vr.z > maxu + 1) vr.z = maxu + 1; - break; - case CLAMP_REGION_REPEAT: - vr.x = maxu; - vr.z = vr.x + (minu + 1); - break; - default: - __assume(0); - } - - switch(wmt) - { - case CLAMP_REPEAT: - break; - case CLAMP_CLAMP: - break; - case CLAMP_REGION_CLAMP: - if(vr.y < minv) vr.y = minv; - if(vr.w > maxv + 1) vr.w = maxv + 1; - break; - case CLAMP_REGION_REPEAT: - vr.y = maxv; - vr.w = vr.y + (minv + 1); - break; - default: - __assume(0); - } - - if(wms + wmt < 6) - { - GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t); - - if(linear) - { - st += GSVector4(-0x8000, 0x8000).xxyy(); - } - - GSVector4i uv = GSVector4i(st).sra32(16); - - GSVector4i u, v; - - int mask = 0; - - if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT) - { - u = uv & GSVector4i::xffffffff().srl32(32 - tw); - v = uv & GSVector4i::xffffffff().srl32(32 - th); - - GSVector4i uu = uv.sra32(tw); - GSVector4i vv = uv.sra32(th); - - mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); - } - - uv = uv.rintersect(tr); - - switch(wms) - { - case CLAMP_REPEAT: - if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;} - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - if(vr.x < uv.x) vr.x = uv.x; - if(vr.z > uv.z + 1) vr.z = uv.z + 1; - break; - case CLAMP_REGION_REPEAT: - break; - default: - __assume(0); - } - - switch(wmt) - { - case CLAMP_REPEAT: - if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;} - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - if(vr.y < uv.y) vr.y = uv.y; - if(vr.w > uv.w + 1) vr.w = uv.w + 1; - break; - case CLAMP_REGION_REPEAT: - break; - default: - __assume(0); - } - } - - r = vr.rintersect(tr); -} - -void GSRenderer::GetAlphaMinMax() -{ - if(m_vt.m_alpha.valid) - { - return; - } - - const GSDrawingEnvironment& env = m_env; - const GSDrawingContext* context = m_context; - - GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww(); - - if(PRIM->TME && context->TEX0.TCC) - { - switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt) - { - case 0: - a.y = 0; - a.w = 0xff; - break; - case 1: - a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0; - a.w = env.TEXA.TA0; - break; - case 2: - a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1); - a.w = max(env.TEXA.TA0, env.TEXA.TA1); - break; - case 3: - m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); - break; - default: - __assume(0); - } - - switch(context->TEX0.TFX) - { - case TFX_MODULATE: - a.x = (a.x * a.y) >> 7; - a.z = (a.z * a.w) >> 7; - if(a.x > 0xff) a.x = 0xff; - if(a.z > 0xff) a.z = 0xff; - break; - case TFX_DECAL: - a.x = a.y; - a.z = a.w; - break; - case TFX_HIGHLIGHT: - a.x = a.x + a.y; - a.z = a.z + a.w; - if(a.x > 0xff) a.x = 0xff; - if(a.z > 0xff) a.z = 0xff; - break; - case TFX_HIGHLIGHT2: - a.x = a.y; - a.z = a.w; - break; - default: - __assume(0); - } - } - - m_vt.m_alpha.min = a.x; - m_vt.m_alpha.max = a.z; - m_vt.m_alpha.valid = true; -} - -bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm) -{ - const GSDrawingContext* context = m_context; - - bool pass = true; - - if(context->TEST.ATST == ATST_NEVER) - { - pass = false; - } - else if(context->TEST.ATST != ATST_ALWAYS) - { - GetAlphaMinMax(); - - int amin = m_vt.m_alpha.min; - int amax = m_vt.m_alpha.max; - - int aref = context->TEST.AREF; - - switch(context->TEST.ATST) - { - case ATST_NEVER: - pass = false; - break; - case ATST_ALWAYS: - pass = true; - break; - case ATST_LESS: - if(amax < aref) pass = true; - else if(amin >= aref) pass = false; - else return false; - break; - case ATST_LEQUAL: - if(amax <= aref) pass = true; - else if(amin > aref) pass = false; - else return false; - break; - case ATST_EQUAL: - if(amin == aref && amax == aref) pass = true; - else if(amin > aref || amax < aref) pass = false; - else return false; - break; - case ATST_GEQUAL: - if(amin >= aref) pass = true; - else if(amax < aref) pass = false; - else return false; - break; - case ATST_GREATER: - if(amin > aref) pass = true; - else if(amax <= aref) pass = false; - else return false; - break; - case ATST_NOTEQUAL: - if(amin == aref && amax == aref) pass = false; - else if(amin > aref || amax < aref) pass = true; - else return false; - break; - default: - __assume(0); - } - } - - if(!pass) - { - switch(context->TEST.AFAIL) - { - case AFAIL_KEEP: fm = zm = 0xffffffff; break; - case AFAIL_FB_ONLY: zm = 0xffffffff; break; - case AFAIL_ZB_ONLY: fm = 0xffffffff; break; - case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break; - default: __assume(0); - } - } - - return true; -} - -bool GSRenderer::IsOpaque() -{ - if(PRIM->AA1) - { - return false; - } - - if(!PRIM->ABE) - { - return true; - } - - const GSDrawingContext* context = m_context; - - int amin = 0, amax = 0xff; - - if(context->ALPHA.A != context->ALPHA.B) - { - if(context->ALPHA.C == 0) - { - GetAlphaMinMax(); - - amin = m_vt.m_alpha.min; - amax = m_vt.m_alpha.max; - } - else if(context->ALPHA.C == 1) - { - if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24) - { - amin = amax = 0x80; - } - } - else if(context->ALPHA.C == 2) - { - amin = amax = context->ALPHA.FIX; - } - } - - return context->ALPHA.IsOpaque(amin, amax); -} diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index 80d088f79a..e78013bf4f 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -24,8 +24,6 @@ #include "GSdx.h" #include "GSWnd.h" #include "GSState.h" -#include "GSVertexTrace.h" -#include "GSVertexList.h" #include "GSCapture.h" class GSRenderer : public GSState @@ -49,15 +47,6 @@ protected: virtual GSTexture* GetOutput(int i) = 0; - GSVertexTrace m_vt; - - // following functions need m_vt to be initialized - - void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear); - void GetAlphaMinMax(); - bool TryAlphaTest(uint32& fm, uint32& zm); - bool IsOpaque(); - public: GSWnd m_wnd; GSDevice* m_dev; @@ -67,10 +56,9 @@ public: bool s_save; bool s_savez; int s_saven; - GSCritSec s_lock; public: - GSRenderer(); + GSRenderer(GSVertexTrace* vt, size_t vertex_stride); virtual ~GSRenderer(); virtual bool CreateWnd(const string& title, int w, int h); @@ -93,157 +81,4 @@ public: GSCritSec m_pGSsetTitle_Crit; char m_GStitleInfoBuffer[128]; -}; - -template class GSRendererT : public GSRenderer -{ -protected: - Vertex* m_vertices; - int m_count; - int m_maxcount; - GSVertexList m_vl; - - void Reset() - { - m_count = 0; - m_vl.RemoveAll(); - - GSRenderer::Reset(); - } - - void ResetPrim() - { - m_vl.RemoveAll(); - } - - void FlushPrim() - { - if(m_count == 0) return; - - if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3) - { - // FIXME: berserk fpsm = 27 (8H) - - if(!m_dev->IsLost()) - { - m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM)); - - Draw(); - } - - m_perfmon.Put(GSPerfMon::Draw, 1); - } - - m_count = 0; - } - - void GrowVertexBuffer() - { - int maxcount = std::max(m_maxcount * 3 / 2, 10000); - Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * maxcount, 16); - - if(m_vertices != NULL) - { - memcpy(vertices, m_vertices, sizeof(Vertex) * m_maxcount); - _aligned_free(m_vertices); - } - - m_vertices = vertices; - m_maxcount = maxcount - 100; - } - - // Returns a pointer to the drawing vertex. Can return NULL! - - template __forceinline Vertex* DrawingKick(bool skip, int& count) - { - switch(prim) - { - case GS_POINTLIST: count = 1; break; - case GS_LINELIST: count = 2; break; - case GS_LINESTRIP: count = 2; break; - case GS_TRIANGLELIST: count = 3; break; - case GS_TRIANGLESTRIP: count = 3; break; - case GS_TRIANGLEFAN: count = 3; break; - case GS_SPRITE: count = 2; break; - case GS_INVALID: count = 1; break; - default: __assume(0); - } - - if(m_vl.GetCount() < count) - { - return NULL; - } - - if(m_count >= m_maxcount) - { - GrowVertexBuffer(); - } - - Vertex* v = &m_vertices[m_count]; - - switch(prim) - { - case GS_POINTLIST: - m_vl.GetAt(0, v[0]); - m_vl.RemoveAll(); - break; - case GS_LINELIST: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAll(); - break; - case GS_LINESTRIP: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAt(0, 1); - break; - case GS_TRIANGLELIST: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.GetAt(2, v[2]); - m_vl.RemoveAll(); - break; - case GS_TRIANGLESTRIP: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.GetAt(2, v[2]); - m_vl.RemoveAt(0, 2); - break; - case GS_TRIANGLEFAN: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.GetAt(2, v[2]); - m_vl.RemoveAt(1, 1); - break; - case GS_SPRITE: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAll(); - break; - case GS_INVALID: - ASSERT(0); - m_vl.RemoveAll(); - return NULL; - default: - __assume(0); - } - - return !skip ? v : NULL; - } - - virtual void Draw() = 0; - -public: - GSRendererT() - : GSRenderer() - , m_vertices(NULL) - , m_count(0) - , m_maxcount(0) - { - } - - virtual ~GSRendererT() - { - if(m_vertices) _aligned_free(m_vertices); - } -}; +}; \ No newline at end of file diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index 35f7578f3f..48cfbf0ec1 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -21,3 +21,411 @@ #include "stdafx.h" #include "GSRendererDX.h" +#include "GSDeviceDX.h" + +GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter) + : GSRendererHW(vt, vertex_stride, tc) + , m_pixelcenter(pixelcenter) + , m_topology(-1) +{ + m_logz = !!theApp.GetConfig("logz", 0); + m_fba = !!theApp.GetConfig("fba", 1); + //UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); + UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0); +} + +GSRendererDX::~GSRendererDX() +{ +} + +void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) +{ + GSDrawingEnvironment& env = m_env; + GSDrawingContext* context = m_context; + + const GSVector2i& rtsize = rt->GetSize(); + const GSVector2& rtscale = rt->GetScale(); + + bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; + + GSTexture* rtcopy = NULL; + + ASSERT(m_dev != NULL); + + GSDeviceDX* dev = (GSDeviceDX*)m_dev; + + if(DATE) + { + if(dev->HasStencil()) + { + GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); + GSVector4 o = GSVector4(-1.0f, 1.0f); + + GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); + GSVector4 dst = src * 2.0f + o.xxxx(); + + GSVertexPT1 vertices[] = + { + {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, + {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, + {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, + {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, + }; + + dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); + } + else + { + rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); + + // I'll use VertexTrace when I consider it more trustworthy + + dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); + } + } + + // + + dev->BeginScene(); + + // om + + GSDeviceDX::OMDepthStencilSelector om_dssel; + + if(context->TEST.ZTE) + { + om_dssel.ztst = context->TEST.ZTST; + om_dssel.zwe = !context->ZBUF.ZMSK; + } + else + { + om_dssel.ztst = ZTST_ALWAYS; + } + + if(m_fba) + { + om_dssel.fba = context->FBA.FBA; + } + + GSDeviceDX::OMBlendSelector om_bsel; + + if(!IsOpaque()) + { + om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS; + + om_bsel.a = context->ALPHA.A; + om_bsel.b = context->ALPHA.B; + om_bsel.c = context->ALPHA.C; + om_bsel.d = context->ALPHA.D; + + if(env.PABE.PABE) + { + if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) + { + // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader + // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result + + om_bsel.abe = 0; + } + else + { + //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. + //ASSERT(0); + } + } + } + + om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); + + // vs + + GSDeviceDX::VSSelector vs_sel; + + vs_sel.tme = PRIM->TME; + vs_sel.fst = PRIM->FST; + vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; + vs_sel.rtcopy = !!rtcopy; + + // The real GS appears to do no masking based on the Z buffer format and writing larger Z values + // than the buffer supports seems to be an error condition on the real GS, causing it to crash. + // We are probably receiving bad coordinates from VU1 in these cases. + + if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) + { + if(context->ZBUF.PSM == PSM_PSMZ24) + { + if(m_vt->m_max.p.z > 0xffffff) + { + ASSERT(m_vt->m_min.p.z > 0xffffff); + // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. + if (m_vt->m_min.p.z > 0xffffff) + { + vs_sel.bppz = 1; + om_dssel.ztst = ZTST_ALWAYS; + } + } + } + else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) + { + if(m_vt->m_max.p.z > 0xffff) + { + ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo + // Fixme : Same as above, I guess. + if (m_vt->m_min.p.z > 0xffff) + { + vs_sel.bppz = 2; + om_dssel.ztst = ZTST_ALWAYS; + } + } + } + } + + GSDeviceDX::VSConstantBuffer vs_cb; + + float sx = 2.0f * rtscale.x / (rtsize.x << 4); + float sy = 2.0f * rtscale.y / (rtsize.y << 4); + float ox = (float)(int)context->XYOFFSET.OFX; + float oy = (float)(int)context->XYOFFSET.OFY; + float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; + float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; + + //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, + //because DX10 and DX9 have a different pixel center.) + // + //The resulting shifted output aligns better with common blending / corona / blurring effects, + //but introduces a few bad pixels on the edges. + + if(rt->LikelyOffset) + { + // DX9 has pixelcenter set to 0.0, so give it some value here + + if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } + + ox2 *= rt->OffsetHack_modx; + oy2 *= rt->OffsetHack_mody; + } + + vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); + vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); + + // gs + + GSDeviceDX::GSSelector gs_sel; + + gs_sel.iip = PRIM->IIP; + gs_sel.prim = m_vt->m_primclass; + + // ps + + GSDeviceDX::PSSelector ps_sel; + GSDeviceDX::PSSamplerSelector ps_ssel; + GSDeviceDX::PSConstantBuffer ps_cb; + + if(DATE) + { + if(dev->HasStencil()) + { + om_dssel.date = 1; + } + else + { + ps_sel.date = 1 + context->TEST.DATM; + } + } + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + ps_sel.colclip = 1; + } + + ps_sel.clr1 = om_bsel.IsCLR1(); + ps_sel.fba = context->FBA.FBA; + ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; + + if(UserHacks_AlphaHack) ps_sel.aout = 1; + + if(PRIM->FGE) + { + ps_sel.fog = 1; + + ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; + } + + if(context->TEST.ATE) + { + ps_sel.atst = context->TEST.ATST; + + switch(ps_sel.atst) + { + case ATST_LESS: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); + break; + case ATST_GREATER: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); + break; + default: + ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; + break; + } + } + else + { + ps_sel.atst = ATST_ALWAYS; + } + + if(tex) + { + ps_sel.wms = context->CLAMP.WMS; + ps_sel.wmt = context->CLAMP.WMT; + ps_sel.fmt = tex->m_fmt; + ps_sel.aem = env.TEXA.AEM; + ps_sel.tfx = context->TEX0.TFX; + ps_sel.tcc = context->TEX0.TCC; + ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter; + ps_sel.rt = tex->m_target; + + int w = tex->m_texture->GetWidth(); + int h = tex->m_texture->GetHeight(); + + int tw = (int)(1 << context->TEX0.TW); + int th = (int)(1 << context->TEX0.TH); + + GSVector4 WH(tw, th, w, h); + + if(PRIM->FST) + { + vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); + //Maybe better? + //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); + ps_sel.fst = 1; + } + + ps_cb.WH = WH; + ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); + ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); + + GSVector4 clamp(ps_cb.MskFix); + GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); + + ps_cb.MinMax = clamp / WH.xyxy(); + ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); + + ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; + ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; + ps_ssel.ltf = ps_sel.ltf; + } + else + { + ps_sel.tfx = 4; + } + + // rs + + GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); + + dev->OMSetRenderTargets(rt, ds, &scissor); + dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL); + dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL); + dev->PSSetShaderResource(2, rtcopy); + + uint8 afix = context->ALPHA.FIX; + + dev->SetupOM(om_dssel, om_bsel, afix); + dev->SetupIA(m_vertex.buff, m_vertex.tail, m_index.buff, m_index.tail, m_topology); + dev->SetupVS(vs_sel, &vs_cb); + dev->SetupGS(gs_sel); + dev->SetupPS(ps_sel, &ps_cb, ps_ssel); + + // draw + + if(context->TEST.DoFirstPass()) + { + dev->DrawIndexedPrimitive(); + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); + GSDeviceDX::PSSelector ps_selneg(ps_sel); + + om_bselneg.negative = 1; + ps_selneg.colclip = 2; + + dev->SetupOM(om_dssel, om_bselneg, afix); + dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); + + dev->DrawIndexedPrimitive(); + } + } + + if(context->TEST.DoSecondPass()) + { + ASSERT(!env.PABE.PABE); + + static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; + + ps_sel.atst = iatst[ps_sel.atst]; + + switch(ps_sel.atst) + { + case ATST_LESS: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); + break; + case ATST_GREATER: + ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); + break; + default: + ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; + break; + } + + dev->SetupPS(ps_sel, &ps_cb, ps_ssel); + + bool z = om_dssel.zwe; + bool r = om_bsel.wr; + bool g = om_bsel.wg; + bool b = om_bsel.wb; + bool a = om_bsel.wa; + + switch(context->TEST.AFAIL) + { + case 0: z = r = g = b = a = false; break; // none + case 1: z = false; break; // rgba + case 2: r = g = b = a = false; break; // z + case 3: z = a = false; break; // rgb + default: __assume(0); + } + + if(z || r || g || b || a) + { + om_dssel.zwe = z; + om_bsel.wr = r; + om_bsel.wg = g; + om_bsel.wb = b; + om_bsel.wa = a; + + dev->SetupOM(om_dssel, om_bsel, afix); + + dev->DrawIndexedPrimitive(); + + if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) + { + GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); + GSDeviceDX::PSSelector ps_selneg(ps_sel); + + om_bselneg.negative = 1; + ps_selneg.colclip = 2; + + dev->SetupOM(om_dssel, om_bselneg, afix); + dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); + + dev->DrawIndexedPrimitive(); + } + } + } + + dev->EndScene(); + + dev->Recycle(rtcopy); + + if(om_dssel.fba) UpdateFBA(rt); +} diff --git a/plugins/GSdx/GSRendererDX.h b/plugins/GSdx/GSRendererDX.h index ac288a112d..b693d89315 100644 --- a/plugins/GSdx/GSRendererDX.h +++ b/plugins/GSdx/GSRendererDX.h @@ -23,8 +23,7 @@ #include "GSRendererHW.h" -template -class GSRendererDX : public GSRendererHW +class GSRendererDX : public GSRendererHW { GSVector2 m_pixelcenter; bool m_logz; @@ -35,413 +34,11 @@ class GSRendererDX : public GSRendererHW protected: int m_topology; + virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); virtual void UpdateFBA(GSTexture* rt) {} public: - GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0)) - : GSRendererHW(tc) - , m_pixelcenter(pixelcenter) - , m_topology(-1) - { - m_logz = !!theApp.GetConfig("logz", 0); - m_fba = !!theApp.GetConfig("fba", 1); - //UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); - UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0); - } + GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0)); + virtual ~GSRendererDX(); - virtual ~GSRendererDX() - { - } - - void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) - { - GSDrawingEnvironment& env = m_env; - GSDrawingContext* context = m_context; - - const GSVector2i& rtsize = rt->GetSize(); - const GSVector2& rtscale = rt->GetScale(); - - bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; - - GSTexture *rtcopy = NULL; - - ASSERT(m_dev != NULL); - - GSDeviceDX* dev = (GSDeviceDX*)m_dev; - - if(DATE) - { - if(dev->HasStencil()) - { - GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); - GSVector4 o = GSVector4(-1.0f, 1.0f); - - GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); - GSVector4 dst = src * 2.0f + o.xxxx(); - - GSVertexPT1 vertices[] = - { - {GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)}, - {GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)}, - {GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)}, - {GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)}, - }; - - dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM); - } - else - { - rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat()); - - // I'll use VertexTrace when I consider it more trustworthy - - dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy()); - } - } - - // - - dev->BeginScene(); - - // om - - GSDeviceDX::OMDepthStencilSelector om_dssel; - - if(context->TEST.ZTE) - { - om_dssel.ztst = context->TEST.ZTST; - om_dssel.zwe = !context->ZBUF.ZMSK; - } - else - { - om_dssel.ztst = ZTST_ALWAYS; - } - - if(m_fba) - { - om_dssel.fba = context->FBA.FBA; - } - - GSDeviceDX::OMBlendSelector om_bsel; - - if(!IsOpaque()) - { - om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; - - om_bsel.a = context->ALPHA.A; - om_bsel.b = context->ALPHA.B; - om_bsel.c = context->ALPHA.C; - om_bsel.d = context->ALPHA.D; - - if(env.PABE.PABE) - { - if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) - { - // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader - // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result - - om_bsel.abe = 0; - } - else - { - //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. - //ASSERT(0); - } - } - } - - om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); - - // vs - - GSDeviceDX::VSSelector vs_sel; - - vs_sel.tme = PRIM->TME; - vs_sel.fst = PRIM->FST; - vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; - vs_sel.rtcopy = !!rtcopy; - - // The real GS appears to do no masking based on the Z buffer format and writing larger Z values - // than the buffer supports seems to be an error condition on the real GS, causing it to crash. - // We are probably receiving bad coordinates from VU1 in these cases. - - if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) - { - if(context->ZBUF.PSM == PSM_PSMZ24) - { - if(m_vt.m_max.p.z > 0xffffff) - { - ASSERT(m_vt.m_min.p.z > 0xffffff); - // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. - if (m_vt.m_min.p.z > 0xffffff) - { - vs_sel.bppz = 1; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) - { - if(m_vt.m_max.p.z > 0xffff) - { - ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo - // Fixme : Same as above, I guess. - if (m_vt.m_min.p.z > 0xffff) - { - vs_sel.bppz = 2; - om_dssel.ztst = ZTST_ALWAYS; - } - } - } - } - - GSDeviceDX::VSConstantBuffer vs_cb; - - float sx = 2.0f * rtscale.x / (rtsize.x << 4); - float sy = 2.0f * rtscale.y / (rtsize.y << 4); - float ox = (float)(int)context->XYOFFSET.OFX; - float oy = (float)(int)context->XYOFFSET.OFY; - float ox2 = 2.0f * m_pixelcenter.x / rtsize.x; - float oy2 = 2.0f * m_pixelcenter.y / rtsize.y; - - //This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly, - //because DX10 and DX9 have a different pixel center.) - // - //The resulting shifted output aligns better with common blending / corona / blurring effects, - //but introduces a few bad pixels on the edges. - - if(rt->LikelyOffset) - { - // DX9 has pixelcenter set to 0.0, so give it some value here - - if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; } - - ox2 *= rt->OffsetHack_modx; - oy2 *= rt->OffsetHack_mody; - } - - vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f); - vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); - - // gs - - GSDeviceDX::GSSelector gs_sel; - - gs_sel.iip = PRIM->IIP; - gs_sel.prim = m_vt.m_primclass; - - // ps - - GSDeviceDX::PSSelector ps_sel; - GSDeviceDX::PSSamplerSelector ps_ssel; - GSDeviceDX::PSConstantBuffer ps_cb; - - if(DATE) - { - if(dev->HasStencil()) - { - om_dssel.date = 1; - } - else - { - ps_sel.date = 1 + context->TEST.DATM; - } - } - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - ps_sel.colclip = 1; - } - - ps_sel.clr1 = om_bsel.IsCLR1(); - ps_sel.fba = context->FBA.FBA; - ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; - - if(UserHacks_AlphaHack) ps_sel.aout = 1; - - if(PRIM->FGE) - { - ps_sel.fog = 1; - - ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; - } - - if(context->TEST.ATE) - { - ps_sel.atst = context->TEST.ATST; - - switch(ps_sel.atst) - { - case ATST_LESS: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); - break; - case ATST_GREATER: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); - break; - default: - ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; - break; - } - } - else - { - ps_sel.atst = ATST_ALWAYS; - } - - if(tex) - { - ps_sel.wms = context->CLAMP.WMS; - ps_sel.wmt = context->CLAMP.WMT; - ps_sel.fmt = tex->m_fmt; - ps_sel.aem = env.TEXA.AEM; - ps_sel.tfx = context->TEX0.TFX; - ps_sel.tcc = context->TEX0.TCC; - ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter; - ps_sel.rt = tex->m_target; - - int w = tex->m_texture->GetWidth(); - int h = tex->m_texture->GetHeight(); - - int tw = (int)(1 << context->TEX0.TW); - int th = (int)(1 << context->TEX0.TH); - - GSVector4 WH(tw, th, w, h); - - if(PRIM->FST) - { - vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); - //Maybe better? - //vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw(); - ps_sel.fst = 1; - } - - ps_cb.WH = WH; - ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); - ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); - - GSVector4 clamp(ps_cb.MskFix); - GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); - - ps_cb.MinMax = clamp / WH.xyxy(); - ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); - - ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; - ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; - ps_ssel.ltf = ps_sel.ltf; - } - else - { - ps_sel.tfx = 4; - } - - // rs - - GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); - - dev->OMSetRenderTargets(rt, ds, &scissor); - dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL); - dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL); - dev->PSSetShaderResource(2, rtcopy); - - uint8 afix = context->ALPHA.FIX; - - dev->SetupOM(om_dssel, om_bsel, afix); - dev->SetupIA(m_vertices, m_count, m_topology); - dev->SetupVS(vs_sel, &vs_cb); - dev->SetupGS(gs_sel); - dev->SetupPS(ps_sel, &ps_cb, ps_ssel); - - // draw - - if(context->TEST.DoFirstPass()) - { - dev->DrawPrimitive(); - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); - GSDeviceDX::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); - - dev->DrawPrimitive(); - } - } - - if(context->TEST.DoSecondPass()) - { - ASSERT(!env.PABE.PABE); - - static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; - - ps_sel.atst = iatst[ps_sel.atst]; - - switch(ps_sel.atst) - { - case ATST_LESS: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1); - break; - case ATST_GREATER: - ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1); - break; - default: - ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF; - break; - } - - dev->SetupPS(ps_sel, &ps_cb, ps_ssel); - - bool z = om_dssel.zwe; - bool r = om_bsel.wr; - bool g = om_bsel.wg; - bool b = om_bsel.wb; - bool a = om_bsel.wa; - - switch(context->TEST.AFAIL) - { - case 0: z = r = g = b = a = false; break; // none - case 1: z = false; break; // rgba - case 2: r = g = b = a = false; break; // z - case 3: z = a = false; break; // rgb - default: __assume(0); - } - - if(z || r || g || b || a) - { - om_dssel.zwe = z; - om_bsel.wr = r; - om_bsel.wg = g; - om_bsel.wb = b; - om_bsel.wa = a; - - dev->SetupOM(om_dssel, om_bsel, afix); - - dev->DrawPrimitive(); - - if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) - { - GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); - GSDeviceDX::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); - - dev->DrawPrimitive(); - } - } - } - - dev->EndScene(); - - dev->Recycle(rtcopy); - - if(om_dssel.fba) UpdateFBA(rt); - } }; diff --git a/plugins/GSdx/GSRendererDX11.cpp b/plugins/GSdx/GSRendererDX11.cpp index adf754e5b8..1b30304976 100644 --- a/plugins/GSdx/GSRendererDX11.cpp +++ b/plugins/GSdx/GSRendererDX11.cpp @@ -25,9 +25,9 @@ #include "resource.h" GSRendererDX11::GSRendererDX11() - : GSRendererDX(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f)) + : GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f)) { - InitVertexKick(GSRendererDX11); + InitConvertVertex(GSRendererDX11); } bool GSRendererDX11::CreateDevice(GSDevice* dev) @@ -38,202 +38,49 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev) return true; } -template -void GSRendererDX11::VertexKick(bool skip) +template +void GSRendererDX11::ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index) { - GSVertexHW11& dst = m_vl.AddTail(); - - dst = *(GSVertexHW11*)&m_v; - -#ifdef ENABLE_UPSCALE_HACKS + GSVector4i v0(m_v.m[0]); + GSVector4i v1(m_v.m[1]); if(tme && fst) { - //GSVector4::storel(&dst.ST, m_v.GetUV()); + // TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed - int Udiff = 0; - int Vdiff = 0; - int Uadjust = 0; - int Vadjust = 0; - - int multiplier = GetUpscaleMultiplier(); - - if(multiplier > 1) - { - Udiff = m_v.UV.U & 4095; - Vdiff = m_v.UV.V & 4095; - - if(Udiff != 0) - { - if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; } - else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; } - } - - if(Vdiff != 0) - { - if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; } - else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; } - } - - Udiff = m_v.UV.U & 255; - Vdiff = m_v.UV.V & 255; - - if(Udiff != 0) - { - if (Udiff >= 248) { Uadjust = -1; } - else if (Udiff <= 8) { Uadjust = 1; } - } - - if(Vdiff != 0) - { - if (Vdiff >= 248) { Vadjust = -1; } - else if (Vdiff <= 8) { Vadjust = 1; } - } - - Udiff = m_v.UV.U & 15; - Vdiff = m_v.UV.V & 15; - - if(Udiff != 0) - { - if (Udiff >= 15) { Uadjust = -1; } - else if (Udiff <= 1) { Uadjust = 1; } - } - - if(Vdiff != 0) - { - if (Vdiff >= 15) { Vadjust = -1; } - else if (Vdiff <= 1) { Vadjust = 1; } - } - } - - dst.ST.S = (float)m_v.UV.U - Uadjust; - dst.ST.T = (float)m_v.UV.V - Vadjust; - } - else if(tme) - { - // Wip :p - //dst.XYZ.X += 5; - //dst.XYZ.Y += 5; + v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st } -#else + GSVector4i* RESTRICT dst = (GSVector4i*)&vertex[index]; - if(tme && fst) - { - GSVector4::storel(&dst.ST, m_v.GetUV()); - } - -#endif - - int count = 0; - - if(GSVertexHW11* v = DrawingKick(skip, count)) - { - GSVector4i scissor = m_context->scissor.dx10; - - GSVector4i pmin, pmax; - - #if _M_SSE >= 0x401 - - GSVector4i v0, v1, v2; - - switch(prim) - { - case GS_POINTLIST: - v0 = GSVector4i::load((int)v[0].p.xy).upl16(); - pmin = v0; - pmax = v0; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - v0 = GSVector4i::load((int)v[0].p.xy); - v1 = GSVector4i::load((int)v[1].p.xy); - pmin = v0.min_u16(v1).upl16(); - pmax = v0.max_u16(v1).upl16(); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - v0 = GSVector4i::load((int)v[0].p.xy); - v1 = GSVector4i::load((int)v[1].p.xy); - v2 = GSVector4i::load((int)v[2].p.xy); - pmin = v0.min_u16(v1).min_u16(v2).upl16(); - pmax = v0.max_u16(v1).max_u16(v2).upl16(); - break; - } - - #else - - switch(prim) - { - case GS_POINTLIST: - pmin.x = v[0].p.x; - pmin.y = v[0].p.y; - pmax.x = v[0].p.x; - pmax.y = v[0].p.y; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - pmin.x = std::min(v[0].p.x, v[1].p.x); - pmin.y = std::min(v[0].p.y, v[1].p.y); - pmax.x = std::max(v[0].p.x, v[1].p.x); - pmax.y = std::max(v[0].p.y, v[1].p.y); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - pmin.x = std::min(std::min(v[0].p.x, v[1].p.x), v[2].p.x); - pmin.y = std::min(std::min(v[0].p.y, v[1].p.y), v[2].p.y); - pmax.x = std::max(std::max(v[0].p.x, v[1].p.x), v[2].p.x); - pmax.y = std::max(std::max(v[0].p.y, v[1].p.y), v[2].p.y); - break; - } - - #endif - - GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy()); - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: - test |= pmin == pmax; - break; - } - - if(test.mask() & 0xff) - { - return; - } - - m_count += count; - } + dst[0] = v0; + dst[1] = v1; } -void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) +void GSRendererDX11::Draw() { - switch(m_vt.m_primclass) + // TODO: remove invisible prims here + + __super::Draw(); +} + +void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) +{ + switch(m_vt->m_primclass) { case GS_POINT_CLASS: m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; - m_perfmon.Put(GSPerfMon::Prim, m_count); break; case GS_LINE_CLASS: case GS_SPRITE_CLASS: m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; - m_perfmon.Put(GSPerfMon::Prim, m_count / 2); break; case GS_TRIANGLE_CLASS: m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - m_perfmon.Put(GSPerfMon::Prim, m_count / 3); break; default: __assume(0); } - __super::Draw(rt, ds, tex); + __super::DrawPrims(rt, ds, tex); } diff --git a/plugins/GSdx/GSRendererDX11.h b/plugins/GSdx/GSRendererDX11.h index fc7f8a0337..a67b7c8ad2 100644 --- a/plugins/GSdx/GSRendererDX11.h +++ b/plugins/GSdx/GSRendererDX11.h @@ -25,16 +25,22 @@ #include "GSVertexHW.h" #include "GSTextureCache11.h" -class GSRendererDX11 : public GSRendererDX +class GSRendererDX11 : public GSRendererDX { protected: - void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); + template + void ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index); + void Draw(); + void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); + + int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;} + int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;} + uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;} + void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;} public: GSRendererDX11(); virtual ~GSRendererDX11() {} bool CreateDevice(GSDevice* dev); - - template void VertexKick(bool skip); }; diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index d2c167a2ee..c373e12219 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -25,9 +25,9 @@ #include "resource.h" GSRendererDX9::GSRendererDX9() - : GSRendererDX(new GSTextureCache9(this)) + : GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this)) { - InitVertexKick(GSRendererDX9); + InitConvertVertex(GSRendererDX9); } bool GSRendererDX9::CreateDevice(GSDevice* dev) @@ -57,8 +57,8 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev) return true; } -template -void GSRendererDX9::VertexKick(bool skip) +template +void GSRendererDX9::ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index) { GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16()); @@ -71,197 +71,143 @@ void GSRendererDX9::VertexKick(bool skip) p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z)); } - GSVertexHW9& dst = m_vl.AddTail(); - - dst.p = p; - - int Uadjust = 0; - int Vadjust = 0; + GSVector4 t = GSVector4::zero(); if(tme) { if(fst) { - dst.t = m_v.GetUV(); - - #ifdef ENABLE_UPSCALE_HACKS - - int Udiff = 0; - int Vdiff = 0; - - int multiplier = GetUpscaleMultiplier(); - - if(multiplier > 1) - { - Udiff = m_v.UV.U & 4095; - Vdiff = m_v.UV.V & 4095; - - if(Udiff != 0) - { - if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; } - else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; } - } - - if(Vdiff != 0) - { - if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; } - else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; } - } - - Udiff = m_v.UV.U & 255; - Vdiff = m_v.UV.V & 255; - - if(Udiff != 0) - { - if (Udiff >= 248) { Uadjust = -1; } - else if (Udiff <= 8) { Uadjust = 1; } - } - - if(Vdiff != 0) - { - if (Vdiff >= 248) { Vadjust = -1; } - else if (Vdiff <= 8) { Vadjust = 1; } - } - - Udiff = m_v.UV.U & 15; - Vdiff = m_v.UV.V & 15; - - if(Udiff != 0) - { - if (Udiff >= 15) { Uadjust = -1; } - else if (Udiff <= 1) { Uadjust = 1; } - } - - if(Vdiff != 0) - { - if (Vdiff >= 15) { Vadjust = -1; } - else if (Vdiff <= 1) { Vadjust = 1; } - } - } - - dst.t.x -= (float) Uadjust; - dst.t.y -= (float) Vadjust; - - #endif + t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16()); } else { - dst.t = GSVector4::loadl(&m_v.ST); + t = GSVector4::loadl(&m_v.ST); } } - dst._c0() = m_v.RGBAQ.u32[0]; - dst._c1() = m_v.FOG.u32[1]; + t = t.xyxy(GSVector4::cast(GSVector4i(m_v.RGBAQ.u32[0], m_v.FOG.u32[1]))); - // + GSVertexHW9* RESTRICT dst = (GSVertexHW9*)&vertex[index]; - // BaseDrawingKick can never return NULL here because the DrawingKick function - // tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only - // condition where this function would return NULL). - - int count = 0; - - if(GSVertexHW9* v = DrawingKick(skip, count)) - { - GSVector4 scissor = m_context->scissor.dx9; - - GSVector4 pmin, pmax; - - switch(prim) - { - case GS_POINTLIST: - pmin = v[0].p; - pmax = v[0].p; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - pmin = v[0].p.min(v[1].p); - pmax = v[0].p.max(v[1].p); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - pmin = v[0].p.min(v[1].p).min(v[2].p); - pmax = v[0].p.max(v[1].p).max(v[2].p); - break; - } - - GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: - test |= pmin == pmax; - break; - } - - if(test.mask() & 3) - { - return; - } - - switch(prim) - { - case GS_POINTLIST: - break; - case GS_LINELIST: - case GS_LINESTRIP: - if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();} - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();} - break; - case GS_SPRITE: - if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();} - v[0].p.z = v[1].p.z; - v[0].p.w = v[1].p.w; - v[0]._c1() = v[1]._c1(); - v[2] = v[1]; - v[3] = v[1]; - v[1].p.y = v[0].p.y; - v[1].t.y = v[0].t.y; - v[2].p.x = v[0].p.x; - v[2].t.x = v[0].t.x; - v[4] = v[1]; - v[5] = v[2]; - count += 4; - break; - } - - m_count += count; - } + dst->p = p; + dst->t = t; } -void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) +void GSRendererDX9::Draw() { - switch(m_vt.m_primclass) + // TODO: remove invisible prims here + + __super::Draw(); +} + +void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) +{ + switch(m_vt->m_primclass) { case GS_POINT_CLASS: + m_topology = D3DPT_POINTLIST; - m_perfmon.Put(GSPerfMon::Prim, m_count); + break; + case GS_LINE_CLASS: + m_topology = D3DPT_LINELIST; - m_perfmon.Put(GSPerfMon::Prim, m_count / 2); + + if(PRIM->IIP == 0) + { + for(size_t i = 0, j = m_index.tail; i < j; i += 2) + { + uint32 tmp = m_index.buff[i + 0]; + m_index.buff[i + 0] = m_index.buff[i + 1]; + m_index.buff[i + 1] = tmp; + } + } + break; + case GS_TRIANGLE_CLASS: - case GS_SPRITE_CLASS: + m_topology = D3DPT_TRIANGLELIST; - m_perfmon.Put(GSPerfMon::Prim, m_count / 3); + + if(PRIM->IIP == 0) + { + for(size_t i = 0, j = m_index.tail; i < j; i += 3) + { + uint32 tmp = m_index.buff[i + 0]; + m_index.buff[i + 0] = m_index.buff[i + 2]; + m_index.buff[i + 2] = tmp; + } + } + break; + + case GS_SPRITE_CLASS: + + m_topology = D3DPT_TRIANGLELIST; + + // each sprite converted to quad needs twice the space + + while(m_vertex.tail * 2 > m_vertex.maxcount) + { + GrowVertexBuffer(); + } + + // assume vertices are tightly packed and sequentially indexed (it should be the case) + + if(m_vertex.tail >= 2) + { + size_t count = m_vertex.tail; + + int i = (int)count * 2 - 4; + GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2; + GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4; + uint32* RESTRICT index = &m_index.buff[count * 3] - 6; + + for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6) + { + GSVertexHW9 v0 = s[0]; + GSVertexHW9 v1 = s[1]; + + v0.p = v0.p.xyzw(v1.p); // z, q + v0.t = v0.t.xyzw(v1.t); // c, f + + q[0] = v0; + q[3] = v1; + + // swap x, s + + GSVector4 p = v0.p.insert<0, 0>(v1.p); + GSVector4 t = v0.t.insert<0, 0>(v1.t); + v1.p = v1.p.insert<0, 0>(v0.p); + v1.t = v1.t.insert<0, 0>(v0.t); + v0.p = p; + v0.t = t; + + q[1] = v0; + q[2] = v1; + + index[0] = i + 0; + index[1] = i + 1; + index[2] = i + 2; + index[3] = i + 1; + index[4] = i + 2; + index[5] = i + 3; + } + + m_vertex.head = m_vertex.tail = count * 2; + m_index.tail = count * 3; + } + + break; + default: __assume(0); } (*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO - __super::Draw(rt, ds, tex); + __super::DrawPrims(rt, ds, tex); } void GSRendererDX9::UpdateFBA(GSTexture* rt) @@ -280,7 +226,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt) GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); GSVector4 o = GSVector4(-1.0f, 1.0f); - GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); + GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); GSVector4 dst = src * 2.0f + o.xxxx(); GSVertexPT1 vertices[] = diff --git a/plugins/GSdx/GSRendererDX9.h b/plugins/GSdx/GSRendererDX9.h index f4d8eb73a8..119736e782 100644 --- a/plugins/GSdx/GSRendererDX9.h +++ b/plugins/GSdx/GSRendererDX9.h @@ -25,7 +25,7 @@ #include "GSVertexHW.h" #include "GSTextureCache9.h" -class GSRendererDX9 : public GSRendererDX +class GSRendererDX9 : public GSRendererDX { protected: struct @@ -34,14 +34,20 @@ protected: Direct3DBlendState9 bs; } m_fba; - void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); + template + void ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index); + void Draw(); + void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); void UpdateFBA(GSTexture* rt); + int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;} + int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;} + uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];} + void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;} + public: GSRendererDX9(); virtual ~GSRendererDX9() {} bool CreateDevice(GSDevice* dev); - - template void VertexKick(bool skip); }; diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 8db4e975d3..6781d08e39 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -21,3 +21,910 @@ #include "stdafx.h" #include "GSRendererHW.h" + +GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc) + : GSRenderer(vt, vertex_stride) + , m_tc(tc) + , m_width(1024) + , m_height(1024) + , m_skip(0) + , m_reset(false) + , m_upscale_multiplier(1) +{ + m_nativeres = !!theApp.GetConfig("nativeres", 0); + m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1); + m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0); + + if(!m_nativeres) + { + m_width = theApp.GetConfig("resx", m_width); + m_height = theApp.GetConfig("resy", m_height); + + m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier); + + if(m_upscale_multiplier > 6) + { + m_upscale_multiplier = 1; // use the normal upscale math + } + else if(m_upscale_multiplier > 1) + { + m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right. + m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right. + } + } + else m_upscale_multiplier = 1; +} + +GSRendererHW::~GSRendererHW() +{ + delete m_tc; +} + +void GSRendererHW::SetGameCRC(uint32 crc, int options) +{ + GSRenderer::SetGameCRC(crc, options); + + m_hacks.SetGameCRC(m_game); + + if(m_game.title == CRC::JackieChanAdv) + { + m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem + } +} + +bool GSRendererHW::CanUpscale() +{ + if(m_hacks.m_cu && !(this->*m_hacks.m_cu)()) + { + return false; + } + + return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition) +} + +int GSRendererHW::GetUpscaleMultiplier() +{ + return m_upscale_multiplier; +} + +void GSRendererHW::Reset() +{ + // TODO: GSreset can come from the main thread too => crash + // m_tc->RemoveAll(); + + m_reset = true; + + GSRenderer::Reset(); +} + +void GSRendererHW::VSync(int field) +{ + GSRenderer::VSync(field); + + m_tc->IncAge(); + m_dev->AgePool(); + + m_skip = 0; + + if(m_reset) + { + m_tc->RemoveAll(); + + m_reset = false; + } +} + +void GSRendererHW::ResetDevice() +{ + m_tc->RemoveAll(); + + GSRenderer::ResetDevice(); +} + +GSTexture* GSRendererHW::GetOutput(int i) +{ + const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = DISPFB.Block(); + TEX0.TBW = DISPFB.FBW; + TEX0.PSM = DISPFB.PSM; + + // TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM); + + GSTexture* t = NULL; + + if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height)) + { + t = rt->m_texture; + + if(s_dump) + { + if(s_save && s_n >= s_saven) + { + t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM)); + } + + s_n++; + } + } + + return t; +} + +void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) +{ + // printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM); + + m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r); +} + +void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) +{ + // printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM); + + if(clut) return; // FIXME + + m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); +} + +void GSRendererHW::Draw() +{ + if(m_dev->IsLost()) return; + + m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); + + #ifndef DISABLE_CRC_HACKS + + if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return; + + #endif + + GSDrawingEnvironment& env = m_env; + GSDrawingContext* context = m_context; + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = context->FRAME.Block(); + TEX0.TBW = context->FRAME.FBW; + TEX0.PSM = context->FRAME.PSM; + GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); + + TEX0.TBP0 = context->ZBUF.Block(); + TEX0.TBW = context->FRAME.FBW; + TEX0.PSM = context->ZBUF.PSM; + + GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); + + if(!rt || !ds) + { + ASSERT(0); + + return; + } + + GSTextureCache::Source* tex = NULL; + + if(PRIM->TME) + { + m_mem.m_clut.Read32(context->TEX0, env.TEXA); + + GSVector4i r; + + GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear()); + + tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); + + if(!tex) return; + } + + if(s_dump) + { + uint64 frame = m_perfmon.GetFrame(); + + string s; + + if(s_save && s_n >= s_saven && tex) + { + s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds", + s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM, + (int)context->CLAMP.WMS, (int)context->CLAMP.WMT, + (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, + (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); + + tex->m_texture->Save(s, true); + + if(tex->m_palette) + { + s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM); + + tex->m_palette->Save(s, true); + } + } + + s_n++; + + if(s_save && s_n >= s_saven) + { + s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); + + rt->m_texture->Save(s); + } + + if(s_savez && s_n >= s_saven) + { + s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); + + ds->m_texture->Save(s); + } + + s_n++; + } + + if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex)) + { + return; + } + + // skip alpha test if possible + + GIFRegTEST TEST = context->TEST; + GIFRegFRAME FRAME = context->FRAME; + GIFRegZBUF ZBUF = context->ZBUF; + + uint32 fm = context->FRAME.FBMSK; + uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; + + if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) + { + if(GSRenderer::TryAlphaTest(fm, zm)) + { + context->TEST.ATST = ATST_ALWAYS; + } + } + + context->FRAME.FBMSK = fm; + context->ZBUF.ZMSK = zm != 0; + + // + + DrawPrims(rt->m_texture, ds->m_texture, tex); + + // + + context->TEST = TEST; + context->FRAME = FRAME; + context->ZBUF = ZBUF; + + // + + GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in)); + + if(fm != 0xffffffff) + { + rt->m_valid = rt->m_valid.runion(r); + + m_tc->InvalidateVideoMem(context->offset.fb, r, false); + } + + if(zm != 0xffffffff) + { + ds->m_valid = ds->m_valid.runion(r); + + m_tc->InvalidateVideoMem(context->offset.zb, r, false); + } + + // + + if(m_hacks.m_oo) + { + (this->*m_hacks.m_oo)(); + } + + if(s_dump) + { + uint64 frame = m_perfmon.GetFrame(); + + string s; + + if(s_save && s_n >= s_saven) + { + s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); + + rt->m_texture->Save(s); + } + + if(s_savez && s_n >= s_saven) + { + s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); + + ds->m_texture->Save(s); + } + + s_n++; + } + + #ifdef DISABLE_HW_TEXTURE_CACHE + + m_tc->Read(rt, r); + + #endif +} + +// hacks + +GSRendererHW::Hacks::Hacks() + : m_oi_map(m_oi_list) + , m_oo_map(m_oo_list) + , m_cu_map(m_cu_list) + , m_oi(NULL) + , m_oo(NULL) + , m_cu(NULL) +{ + m_oi_list.push_back(HackEntry(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII)); + m_oi_list.push_back(HackEntry(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX)); + m_oi_list.push_back(HackEntry(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6)); + m_oi_list.push_back(HackEntry(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2)); + m_oi_list.push_back(HackEntry(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame)); + m_oi_list.push_back(HackEntry(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden)); + m_oi_list.push_back(HackEntry(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS)); + m_oi_list.push_back(HackEntry(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger)); + m_oi_list.push_back(HackEntry(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger)); + m_oi_list.push_back(HackEntry(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2)); + m_oi_list.push_back(HackEntry(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed)); + m_oi_list.push_back(HackEntry(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown)); + m_oi_list.push_back(HackEntry(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine)); + m_oi_list.push_back(HackEntry(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts)); + m_oi_list.push_back(HackEntry(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning)); + m_oi_list.push_back(HackEntry(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight)); + m_oi_list.push_back(HackEntry(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia)); + + m_oo_list.push_back(HackEntry(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2)); + m_oo_list.push_back(HackEntry(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2)); + + m_cu_list.push_back(HackEntry(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2)); + m_cu_list.push_back(HackEntry(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2)); + m_cu_list.push_back(HackEntry(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss)); +} + +void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game) +{ + uint32 hash = (uint32)((game.region << 24) | game.title); + + m_oi = m_oi_map[hash]; + m_oo = m_oo_map[hash]; + m_cu = m_cu_map[hash]; + + if(game.flags & CRC::PointListPalette) + { + ASSERT(m_oi == NULL); + + m_oi = &GSRendererHW::OI_PointListPalette; + } +} + +bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + static uint32* video = NULL; + static size_t lines = 0; + + if(lines == 0) + { + if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.tail == 448 * 2 || m_vertex.tail == 512 * 2)) + { + lines = m_vertex.tail / 2; + } + } + else + { + if(m_vt->m_primclass == GS_POINT_CLASS) + { + if(m_vertex.tail >= 16 * 512) + { + // incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 + + if(!video) video = new uint32[512 * 512]; + + int ox = m_context->XYOFFSET.OFX; + int oy = m_context->XYOFFSET.OFY; + + const uint8* RESTRICT v = m_vertex.buff; + + for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride) + { + int x = (GetPosX(v) - ox) >> 4; + int y = (GetPosY(v) - oy) >> 4; + + video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v); + } + + return false; + } + else + { + lines = 0; + } + } + else if(m_vt->m_primclass == GS_LINE_CLASS) + { + if(m_vertex.tail == lines * 2) + { + // normally, this step would copy the video onto screen with 512 texture mapped horizontal lines, + // but we use the stored video data to create a new texture, and replace the lines with two triangles + + m_dev->Recycle(t->m_texture); + + t->m_texture = m_dev->CreateTexture(512, 512); + + t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4); + + memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 2)], m_vertex.stride); + memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 1)], m_vertex.stride); + + m_index.buff[0] = 0; + m_index.buff[1] = 1; + m_index.buff[2] = 2; + m_index.buff[3] = 1; + m_index.buff[4] = 2; + m_index.buff[5] = 3; + + m_vertex.head = m_vertex.tail = 4; + m_index.tail = 6; + + m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS); + } + else + { + lines = 0; + } + } + } + + return true; +} + +bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 ZBP = m_context->ZBUF.Block(); + uint32 TBP = m_context->TEX0.TBP0; + + if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S) + { + // random battle transition (z buffer written directly, clear it now) + + m_dev->ClearDepth(ds, 0); + } + + return true; +} + +bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + // missing red channel fix (looks alright in pcsx2 r5000+) + + uint8* RESTRICT v = m_vertex.buff; + + for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride) + { + uint32 c = GetColor(v); + + uint32 r = (c >> 0) & 0xff; + uint32 g = (c >> 8) & 0xff; + uint32 b = (c >> 16) & 0xff; + + if(r == 0 && g != 0 && b != 0) + { + SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1)); + } + } + + m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass); + + return true; +} + +bool GSRendererHW::OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FBW = m_context->FRAME.FBW; + uint32 FPSM = m_context->FRAME.PSM; + + if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280 + { + // z buffer clear + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = FBP; + TEX0.TBW = FBW; + TEX0.PSM = FPSM; + + if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) + { + m_dev->ClearDepth(ds->m_texture, 0); + } + + return false; + } + + return true; +} + +bool GSRendererHW::OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FBW = m_context->FRAME.FBW; + uint32 FPSM = m_context->FRAME.PSM; + + if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc + { + // instead of just simply drawing a full height 512x512 sprite to clear the z buffer, + // it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros, + // how? by using a render target that overlaps with the lower half of the z buffer... + + // TODO: tony hawk pro skater 4 same problem, the empty half is not visible though, painted over fully + + m_dev->ClearDepth(ds, 0); + + return false; + } + + return true; +} + +bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + if(!PRIM->TME) + { + uint32 FBP = m_context->FRAME.Block(); + uint32 ZBP = m_context->ZBUF.Block(); + + if(FBP == 0x008c0 && ZBP == 0x01a40) + { + // frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = ZBP; + TEX0.TBW = m_context->FRAME.FBW; + TEX0.PSM = m_context->FRAME.PSM; + + if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true)) + { + m_dev->ClearRenderTarget(rt->m_texture, 0); + } + + return false; + } + else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180) + { + // z buffer clear, frame buffer now points to the z buffer (how can they be so clever?) + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = FBP; + TEX0.TBW = m_context->FRAME.FBW; + TEX0.PSM = m_context->ZBUF.PSM; + + if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) + { + m_dev->ClearDepth(ds->m_texture, 0); + } + + return false; + } + } + + return true; +} + +bool GSRendererHW::OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc + { + //only top half of the screen clears + m_dev->ClearDepth(ds, 0); + } + + return true; +} + +bool GSRendererHW::OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FBW = m_context->FRAME.FBW; + uint32 FPSM = m_context->FRAME.PSM; + + if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc + { + //half height buffer clear + m_dev->ClearDepth(ds, 0); + + return false; + } + + return true; +} + +bool GSRendererHW::OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if(!PRIM->TME) + { + if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32) + { + //half height buffer clear + m_dev->ClearDepth(ds, 0); + } + } + + return true; +} + +bool GSRendererHW::OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if(FBP == 0x02000 && FPSM == PSM_PSMZ24) + { + //half height buffer clear + m_dev->ClearDepth(ds, 0); + + return false; + } + + return true; +} + +bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if(!PRIM->TME) + { + if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0) + { + m_dev->ClearDepth(ds, 0); + + return false; + } + } + else if(PRIM->TME) + { + if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0)) + { + m_dev->ClearDepth(ds, 0); + } + } + + return true; +} + +bool GSRendererHW::OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if(FBP == 0x0 && FPSM == PSM_PSMCT16) + { + //half height buffer clear + m_dev->ClearDepth(ds, 0); + } + + return true; +} + +bool GSRendererHW::OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if(FBP == 0x02300 && FPSM == PSM_PSMZ24) + { + //half height buffer clear + m_dev->ClearDepth(ds, 0); + + return false; + } + + return true; +} + +bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if(!PRIM->TME) + { + if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc + { + //half height buffer clear + m_dev->ClearDepth(ds, 0); + + return false; + } + } + else if(PRIM->TME) + { + if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0)) + { + m_dev->ClearDepth(ds, 0); + } + } + + return true; +} + +bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if(!PRIM->TME) + { + if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0) + { + //half height buffer clear + m_dev->ClearDepth(ds, 0); + + return false; + } + } + else if(PRIM->TME) + { + if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0)) + { + m_dev->ClearDepth(ds, 0); + } + } + + return true; +} + +bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + uint32 FBP = m_context->FRAME.Block(); + uint32 FPSM = m_context->FRAME.PSM; + + if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z) + { + m_context->TEST.ZTST = ZTST_ALWAYS; + //m_dev->ClearDepth(ds, 0); + } + + return true; +} + + +bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME) + { + uint32 FBP = m_context->FRAME.Block(); + uint32 FBW = m_context->FRAME.FBW; + + if(FBP >= 0x03f40 && (FBP & 0x1f) == 0) + { + if(m_vertex.tail == 16) + { + uint8* RESTRICT v = m_vertex.buff; + + for(int i = 0; i < 16; i++, v += m_vertex.stride) + { + uint32 c = GetColor(v); + uint32 a = c >> 24; + + c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff); + + SetColor(v, c); + + m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW); + } + + m_mem.m_clut.Invalidate(); + + return false; + } + else if(m_vertex.tail == 256) + { + uint8* RESTRICT v = m_vertex.buff; + + for(int i = 0; i < 256; i++, v += m_vertex.stride) + { + uint32 c = GetColor(v); + uint32 a = c >> 24; + + c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff); + + SetColor(v, c); + + m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW); + } + + m_mem.m_clut.Invalidate(); + + return false; + } + else + { + ASSERT(0); + } + } + } + + return true; +} + +void GSRendererHW::OO_DBZBT2() +{ + // palette readback (cannot detect yet, when fetching the texture later) + + uint32 FBP = m_context->FRAME.Block(); + uint32 TBP0 = m_context->TEX0.TBP0; + + if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40)) + { + GIFRegBITBLTBUF BITBLTBUF; + + BITBLTBUF.SBP = FBP; + BITBLTBUF.SBW = 1; + BITBLTBUF.SPSM = PSM_PSMCT32; + + InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64)); + } +} + +void GSRendererHW::OO_MajokkoALaMode2() +{ + // palette readback + + uint32 FBP = m_context->FRAME.Block(); + + if(!PRIM->TME && FBP == 0x03f40) + { + GIFRegBITBLTBUF BITBLTBUF; + + BITBLTBUF.SBP = FBP; + BITBLTBUF.SBW = 1; + BITBLTBUF.SPSM = PSM_PSMCT32; + + InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16)); + } +} + +bool GSRendererHW::CU_DBZBT2() +{ + // palette should stay 64 x 64 + + uint32 FBP = m_context->FRAME.Block(); + + return FBP != 0x03c00 && FBP != 0x03ac0; +} + +bool GSRendererHW::CU_MajokkoALaMode2() +{ + // palette should stay 16 x 16 + + uint32 FBP = m_context->FRAME.Block(); + + return FBP != 0x03f40; +} + +bool GSRendererHW::CU_TalesOfAbyss() +{ + // full image blur and brightening + + uint32 FBP = m_context->FRAME.Block(); + + return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0; +} diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 158943ee2c..2fe8a879f0 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -26,28 +26,8 @@ #include "GSCrc.h" #include "GSFunctionMap.h" - -template -class GSRendererHW : public GSRendererT +class GSRendererHW : public GSRenderer { -protected: - using GSRendererT::m_vt; - using GSRendererT::m_count; - using GSRendererT::m_env; - using GSRendererT::m_context; - using GSRendererT::m_vertices; - using GSRendererT::m_dev; - using GSRendererT::PRIM; - using GSRendererT::m_mem; - using GSRendererT::m_regs; - using GSRendererT::m_perfmon; - using GSRendererT::m_game; - using GSRendererT::s_dump; - using GSRendererT::s_save; - using GSRendererT::s_saven; - using GSRendererT::s_savez; - using GSRendererT::s_n; - private: int m_width; int m_height; @@ -56,512 +36,36 @@ private: bool m_nativeres; int m_upscale_multiplier; int m_userhacks_skipdraw; - + #pragma region hacks typedef bool (GSRendererHW::*OI_Ptr)(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); typedef void (GSRendererHW::*OO_Ptr)(); typedef bool (GSRendererHW::*CU_Ptr)(); - bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - static uint32* video = NULL; - static int lines = 0; - - if(lines == 0) - { - if(m_vt.m_primclass == GS_LINE_CLASS && (m_count == 448 * 2 || m_count == 512 * 2)) - { - lines = m_count / 2; - } - } - else - { - if(m_vt.m_primclass == GS_POINT_CLASS) - { - if(m_count >= 16 * 512) - { - // incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 - - if(!video) video = new uint32[512 * 512]; - - int ox = m_context->XYOFFSET.OFX; - int oy = m_context->XYOFFSET.OFY; - - for(int i = 0; i < m_count; i++) - { - int x = ((int)m_vertices[i].p.x - ox) >> 4; - int y = ((int)m_vertices[i].p.y - oy) >> 4; - - // video[y * 448 + x] = m_vertices[i].c0; - video[(y << 8) + (y << 7) + (y << 6) + x] = m_vertices[i]._c0(); - } - - return false; - } - else - { - lines = 0; - } - } - else if(m_vt.m_primclass == GS_LINE_CLASS) - { - if(m_count == lines * 2) - { - // normally, this step would copy the video onto screen with 512 texture mapped horizontal lines, - // but we use the stored video data to create a new texture, and replace the lines with two triangles - - m_dev->Recycle(t->m_texture); - - t->m_texture = m_dev->CreateTexture(512, 512); - - t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4); - - m_vertices[0] = m_vertices[0]; - m_vertices[1] = m_vertices[1]; - m_vertices[2] = m_vertices[m_count - 2]; - m_vertices[3] = m_vertices[1]; - m_vertices[4] = m_vertices[2]; - m_vertices[5] = m_vertices[m_count - 1]; - - m_count = 6; - - m_vt.Update(m_vertices, m_count, GS_TRIANGLE_CLASS); - } - else - { - lines = 0; - } - } - } - - return true; - } - - bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 ZBP = m_context->ZBUF.Block(); - uint32 TBP = m_context->TEX0.TBP0; - - if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S) - { - // random battle transition (z buffer written directly, clear it now) - - m_dev->ClearDepth(ds, 0); - } - - return true; - } - - bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - // missing red channel fix - - for(int i = 0, j = m_count; i < j; i++) - { - if(m_vertices[i]._r() == 0 && m_vertices[i]._g() != 0 && m_vertices[i]._b() != 0) - { - m_vertices[i]._r() = (m_vertices[i]._g() + m_vertices[i]._b()) / 2; - } - } - - m_vt.Update(m_vertices, m_count, m_vt.m_primclass); - - return true; - } - - bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FBW = m_context->FRAME.FBW; - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280 - { - // z buffer clear - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = FBP; - TEX0.TBW = FBW; - TEX0.PSM = FPSM; - - if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) - { - m_dev->ClearDepth(ds->m_texture, 0); - } - - return false; - } - - return true; - } - - bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FBW = m_context->FRAME.FBW; - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc - { - // instead of just simply drawing a full height 512x512 sprite to clear the z buffer, - // it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros, - // how? by using a render target that overlaps with the lower half of the z buffer... - - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; - } - - bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - if(!PRIM->TME) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 ZBP = m_context->ZBUF.Block(); - - if(FBP == 0x008c0 && ZBP == 0x01a40) - { - // frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = ZBP; - TEX0.TBW = m_context->FRAME.FBW; - TEX0.PSM = m_context->FRAME.PSM; - - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true)) - { - m_dev->ClearRenderTarget(rt->m_texture, 0); - } - - return false; - } - else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180) - { - // z buffer clear, frame buffer now points to the z buffer (how can they be so clever?) - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = FBP; - TEX0.TBW = m_context->FRAME.FBW; - TEX0.PSM = m_context->ZBUF.PSM; - - if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) - { - m_dev->ClearDepth(ds->m_texture, 0); - } - - return false; - } - } - - return true; - } - - bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc - { - //only top half of the screen clears - m_dev->ClearDepth(ds, 0); - } - - return true; - } - - bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FBW = m_context->FRAME.FBW; - uint32 FPSM = m_context->FRAME.PSM; - - if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; - } - - bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - } - } - - return true; - } - - bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(FBP == 0x02000 && FPSM == PSM_PSMZ24) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; - } - - bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0) - { - m_dev->ClearDepth(ds, 0); - - return false; - } - } - else if(PRIM->TME) - { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_max.p.z == 0)) - { - m_dev->ClearDepth(ds, 0); - } - } - - return true; - } - - bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(FBP == 0x0 && FPSM == PSM_PSMCT16) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - } - - return true; - } - - bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(FBP == 0x02300 && FPSM == PSM_PSMZ24) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - - return true; - } - - bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - } - else if(PRIM->TME) - { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0)) - { - m_dev->ClearDepth(ds, 0); - } - } - - return true; - } - - bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if(!PRIM->TME) - { - if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0) - { - //half height buffer clear - m_dev->ClearDepth(ds, 0); - - return false; - } - } - else if(PRIM->TME) - { - if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0)) - { - m_dev->ClearDepth(ds, 0); - } - } - - return true; - } - - bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FPSM = m_context->FRAME.PSM; - - if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_max.p.z == m_vt.m_min.p.z) - { - m_context->TEST.ZTST = ZTST_ALWAYS; - //m_dev->ClearDepth(ds, 0); - } - - return true; - } - - - bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) - { - if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME) - { - uint32 FBP = m_context->FRAME.Block(); - uint32 FBW = m_context->FRAME.FBW; - - if(FBP >= 0x03f40 && (FBP & 0x1f) == 0) - { - if(m_count == 16) - { - for(int i = 0; i < 16; i++) - { - uint8 a = m_vertices[i]._a(); - - m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2; - - m_mem.WritePixel32(i & 7, i >> 3, m_vertices[i]._c0(), FBP, FBW); - } - - m_mem.m_clut.Invalidate(); - - return false; - } - else if(m_count == 256) - { - for(int i = 0; i < 256; i++) - { - uint8 a = m_vertices[i]._a(); - - m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2; - - m_mem.WritePixel32(i & 15, i >> 4, m_vertices[i]._c0(), FBP, FBW); - } - - m_mem.m_clut.Invalidate(); - - return false; - } - else - { - ASSERT(0); - } - } - } - - return true; - } - - void OO_DBZBT2() - { - // palette readback (cannot detect yet, when fetching the texture later) - - uint32 FBP = m_context->FRAME.Block(); - uint32 TBP0 = m_context->TEX0.TBP0; - - if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40)) - { - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.SBP = FBP; - BITBLTBUF.SBW = 1; - BITBLTBUF.SPSM = PSM_PSMCT32; - - InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64)); - } - } - - void OO_MajokkoALaMode2() - { - // palette readback - - uint32 FBP = m_context->FRAME.Block(); - - if(!PRIM->TME && FBP == 0x03f40) - { - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.SBP = FBP; - BITBLTBUF.SBW = 1; - BITBLTBUF.SPSM = PSM_PSMCT32; - - InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16)); - } - } - - bool CU_DBZBT2() - { - // palette should stay 64 x 64 - - uint32 FBP = m_context->FRAME.Block(); - - return FBP != 0x03c00 && FBP != 0x03ac0; - } - - bool CU_MajokkoALaMode2() - { - // palette should stay 16 x 16 - - uint32 FBP = m_context->FRAME.Block(); - - return FBP != 0x03f40; - } - - bool CU_TalesOfAbyss() - { - // full image blur and brightening - - uint32 FBP = m_context->FRAME.Block(); - - return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0; - } + bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + void OO_DBZBT2(); + void OO_MajokkoALaMode2(); + + bool CU_DBZBT2(); + bool CU_MajokkoALaMode2(); + bool CU_TalesOfAbyss(); class Hacks { @@ -617,383 +121,37 @@ private: OO_Ptr m_oo; CU_Ptr m_cu; - Hacks() - : m_oi_map(m_oi_list) - , m_oo_map(m_oo_list) - , m_cu_map(m_cu_list) - , m_oi(NULL) - , m_oo(NULL) - , m_cu(NULL) - { - m_oi_list.push_back(HackEntry(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII)); - m_oi_list.push_back(HackEntry(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX)); - m_oi_list.push_back(HackEntry(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6)); - m_oi_list.push_back(HackEntry(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2)); - m_oi_list.push_back(HackEntry(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame)); - m_oi_list.push_back(HackEntry(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden)); - m_oi_list.push_back(HackEntry(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS)); - m_oi_list.push_back(HackEntry(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger)); - m_oi_list.push_back(HackEntry(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger)); - m_oi_list.push_back(HackEntry(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2)); - m_oi_list.push_back(HackEntry(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed)); - m_oi_list.push_back(HackEntry(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown)); - m_oi_list.push_back(HackEntry(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine)); - m_oi_list.push_back(HackEntry(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts)); - m_oi_list.push_back(HackEntry(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning)); - m_oi_list.push_back(HackEntry(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight)); - m_oi_list.push_back(HackEntry(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia)); + Hacks(); - m_oo_list.push_back(HackEntry(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2)); - m_oo_list.push_back(HackEntry(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2)); - - m_cu_list.push_back(HackEntry(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2)); - m_cu_list.push_back(HackEntry(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2)); - m_cu_list.push_back(HackEntry(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss)); - } - - void SetGame(const CRC::Game& game) - { - uint32 hash = (uint32)((game.region << 24) | game.title); - - m_oi = m_oi_map[hash]; - m_oo = m_oo_map[hash]; - m_cu = m_cu_map[hash]; - - if(game.flags & CRC::PointListPalette) - { - ASSERT(m_oi == NULL); - - m_oi = &GSRendererHW::OI_PointListPalette; - } - } + void SetGameCRC(const CRC::Game& game); } m_hacks; + virtual int GetPosX(const void* vertex) const = 0; + virtual int GetPosY(const void* vertex) const = 0; + virtual uint32 GetColor(const void* vertex) const = 0; + virtual void SetColor(void* vertex, uint32 c) const = 0; + #pragma endregion protected: GSTextureCache* m_tc; - void Reset() - { - // TODO: GSreset can come from the main thread too => crash - // m_tc->RemoveAll(); - - m_reset = true; - - GSRendererT::Reset(); - } - - void VSync(int field) - { - GSRendererT::VSync(field); - - m_tc->IncAge(); - m_dev->AgePool(); - - m_skip = 0; - - if(m_reset) - { - m_tc->RemoveAll(); - - m_reset = false; - } - } - - void ResetDevice() - { - m_tc->RemoveAll(); - - GSRendererT::ResetDevice(); - } - - GSTexture* GetOutput(int i) - { - const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB; - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = DISPFB.Block(); - TEX0.TBW = DISPFB.FBW; - TEX0.PSM = DISPFB.PSM; - - // TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM); - - GSTexture* t = NULL; - - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height)) - { - t = rt->m_texture; - - if(s_dump) - { - if(s_save && s_n >= s_saven) - { - t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM)); - } - - s_n++; - } - } - - return t; - } - - void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) - { - // printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM); - - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r); - } - - void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) - { - // printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM); - - if(clut) return; // FIXME - - m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); - } - - void Draw() - { -#ifndef DISABLE_CRC_HACKS - if(GSRendererT::IsBadFrame(m_skip, m_userhacks_skipdraw)) return; -#endif - - GSDrawingEnvironment& env = m_env; - GSDrawingContext* context = m_context; - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = context->FRAME.Block(); - TEX0.TBW = context->FRAME.FBW; - TEX0.PSM = context->FRAME.PSM; - GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); - - TEX0.TBP0 = context->ZBUF.Block(); - TEX0.TBW = context->FRAME.FBW; - TEX0.PSM = context->ZBUF.PSM; - - GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); - - if(!rt || !ds) - { - ASSERT(0); - return; - } - - GSTextureCache::Source* tex = NULL; - - if(PRIM->TME) - { - m_mem.m_clut.Read32(context->TEX0, env.TEXA); - - GSVector4i r; - - GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); - - tex = m_tc->LookupSource(context->TEX0, env.TEXA, r); - - if(!tex) return; - } - - if(s_dump) - { - uint64 frame = m_perfmon.GetFrame(); - - string s; - - if(s_save && s_n >= s_saven && tex) - { - s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds", - s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM, - (int)context->CLAMP.WMS, (int)context->CLAMP.WMT, - (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, - (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); - - tex->m_texture->Save(s, true); - - if(tex->m_palette) - { - s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM); - - tex->m_palette->Save(s, true); - } - } - - s_n++; - - if(s_save && s_n >= s_saven) - { - s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); - - rt->m_texture->Save(s); - } - - if(s_savez && s_n >= s_saven) - { - s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); - - ds->m_texture->Save(s); - } - - s_n++; - } - - if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex)) - { - return; - } - - // skip alpha test if possible - - GIFRegTEST TEST = context->TEST; - GIFRegFRAME FRAME = context->FRAME; - GIFRegZBUF ZBUF = context->ZBUF; - - uint32 fm = context->FRAME.FBMSK; - uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; - - if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) - { - if(GSRendererT::TryAlphaTest(fm, zm)) - { - context->TEST.ATST = ATST_ALWAYS; - } - } - - context->FRAME.FBMSK = fm; - context->ZBUF.ZMSK = zm != 0; - - // - - Draw(rt->m_texture, ds->m_texture, tex); - - // - - context->TEST = TEST; - context->FRAME = FRAME; - context->ZBUF = ZBUF; - - // - - GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); - - if(fm != 0xffffffff) - { - rt->m_valid = rt->m_valid.runion(r); - - m_tc->InvalidateVideoMem(context->offset.fb, r, false); - } - - if(zm != 0xffffffff) - { - ds->m_valid = ds->m_valid.runion(r); - - m_tc->InvalidateVideoMem(context->offset.zb, r, false); - } - - // - - if(m_hacks.m_oo) - { - (this->*m_hacks.m_oo)(); - } - - if(s_dump) - { - uint64 frame = m_perfmon.GetFrame(); - - string s; - - if(s_save && s_n >= s_saven) - { - s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); - - rt->m_texture->Save(s); - } - - if(s_savez && s_n >= s_saven) - { - s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM); - - ds->m_texture->Save(s); - } - - s_n++; - } -#ifdef DISABLE_HW_TEXTURE_CACHE - m_tc->Read(rt, r); -#endif - } - - virtual void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0; - - bool CanUpscale() - { - if(m_hacks.m_cu && !(this->*m_hacks.m_cu)()) - { - return false; - } - - return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition) - } - - int GetUpscaleMultiplier() - { - return m_upscale_multiplier; - } + virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0; public: - GSRendererHW(GSTextureCache* tc) - : GSRendererT() - , m_tc(tc) - , m_width(1024) - , m_height(1024) - , m_skip(0) - , m_reset(false) - , m_upscale_multiplier(1) - { - m_nativeres = !!theApp.GetConfig("nativeres", 0); - m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1); - m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0); + GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc); + virtual ~GSRendererHW(); - if(!m_nativeres) - { - m_width = theApp.GetConfig("resx", m_width); - m_height = theApp.GetConfig("resy", m_height); + void SetGameCRC(uint32 crc, int options); + bool CanUpscale(); + int GetUpscaleMultiplier(); - m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier); - - if(m_upscale_multiplier > 6) - { - m_upscale_multiplier = 1; // use the normal upscale math - } - else if(m_upscale_multiplier > 1) - { - m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right. - m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right. - } - } - else m_upscale_multiplier = 1; - } - - virtual ~GSRendererHW() - { - delete m_tc; - } - - void SetGameCRC(uint32 crc, int options) - { - GSRendererT::SetGameCRC(crc, options); - - m_hacks.SetGame(m_game); - - if(m_game.title == CRC::JackieChanAdv) - { - m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem - } - } + void Reset(); + void VSync(int field); + void ResetDevice(); + GSTexture* GetOutput(int i); + void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); + void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); + void Draw(); }; diff --git a/plugins/GSdx/GSRendererNull.h b/plugins/GSdx/GSRendererNull.h index 70c434fc82..864da08493 100644 --- a/plugins/GSdx/GSRendererNull.h +++ b/plugins/GSdx/GSRendererNull.h @@ -23,26 +23,33 @@ #include "GSRenderer.h" -class GSRendererNull : public GSRendererT +class GSRendererNull : public GSRenderer { + class GSVertexTraceNull : public GSVertexTrace + { + public: + GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {} + }; + protected: + template + void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index) + { + } + void Draw() { } - GSTexture* GetOutput(int i) + GSTexture* GetOutput(int i) { return NULL; } public: - GSRendererNull() - : GSRendererT() - { - InitVertexKick(GSRendererNull); - } - - template void VertexKick(bool skip) + GSRendererNull() + : GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertexNull)) { + InitConvertVertex(GSRendererNull); } }; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index ec33fbb493..a6424082a0 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -25,9 +25,11 @@ const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); GSRendererSW::GSRendererSW(int threads) - : m_fzb(NULL) + : GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW)) + , m_fzb(NULL) { - InitVertexKick(GSRendererSW); + InitConvertVertex(GSRendererSW); + InitConvertIndex(); m_tc = new GSTextureCacheSW(this); @@ -62,7 +64,7 @@ void GSRendererSW::Reset() m_reset = true; - GSRendererT::Reset(); + GSRenderer::Reset(); } void GSRendererSW::VSync(int field) @@ -88,7 +90,7 @@ void GSRendererSW::VSync(int field) printf("m_syncpoint_count = %d\n", ((GSRasterizerList*)m_rl)->m_syncpoint_count); ((GSRasterizerList*)m_rl)->m_syncpoint_count = 0; printf("m_solidrect_count = %d\n", ((GSRasterizerList*)m_rl)->m_solidrect_count); ((GSRasterizerList*)m_rl)->m_solidrect_count = 0; */ - GSRendererT::VSync(field); + GSRenderer::VSync(field); m_tc->IncAge(); @@ -149,41 +151,187 @@ GSTexture* GSRendererSW::GetOutput(int i) return m_texture[i]; } +template +void GSRendererSW::ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index) +{ + GSVertexSW* RESTRICT v = &vertex[index]; + + GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - m_context->XYOFFSET; + GSVector4i zf = GSVector4i((int)std::min(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later + + v->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; + + if(tme) + { + GSVector4 t; + + if(fst) + { + t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16() << (16 - 4)); + } + else + { + t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH); + t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q)); + } + + v->t = t; + } + + v->c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7); + + if(prim == GS_SPRITE) + { + v->t.u32[3] = m_v.XYZ.Z; + } +} + +template +size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count) +{ + // memcpy(dst, src, sizeof(uint32) * count); return; + + // TODO: IsQuad + + const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff; + + GSVector4 scissor = m_context->scissor.ex; + + const uint32* src_end = src + count; + uint32* dst_base = dst; + + while(src < src_end) + { + GSVector4 pmin, pmax; + + switch(prim) + { + case GS_POINTLIST: + pmin = v[src[0]].p; + pmax = v[src[0]].p; + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + pmin = v[src[0]].p.min(v[src[1]].p); + pmax = v[src[0]].p.max(v[src[1]].p); + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p); + pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p); + break; + default: + __assume(0); + } + + GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); + GSVector4 tmp; + + switch(prim) + { + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + // are in line or just two of them are the same (cross product == 0) + tmp = (v[src[1]].p - v[src[0]].p) * (v[src[2]].p - v[src[0]].p).yxwz(); + test |= tmp == tmp.yxwz(); + break; + } + + switch(prim) + { + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + case GS_SPRITE: + test |= pmin.ceil() == pmax.ceil(); + break; + } + + bool pass = test.xyxy().allfalse(); + + switch(prim) + { + case GS_POINTLIST: + if(pass) {dst[0] = src[0]; dst++;} + src++; + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst += 2;} + src += 2; + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;} + src += 3; + break; + default: + __assume(0); + } + } + + return dst - dst_base; +} + +void GSRendererSW::UpdateVertexKick() +{ + GSRenderer::UpdateVertexKick(); + + m_cif = m_ci[PRIM->PRIM]; +} + void GSRendererSW::Draw() { - if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass); - - GSVector4i scissor = GSVector4i(m_context->scissor.in); - GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); - - scissor.z = std::min(scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour - - GSVector4i r = bbox.rintersect(scissor); + const GSDrawingContext* context = m_context; shared_ptr data(new GSRasterizerData2(this)); + data->primclass = GSUtil::GetPrimClass(PRIM->PRIM); + data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.tail + sizeof(uint32) * m_index.tail, 32); + data->vertex = (GSVertexSW*)data->buff; + data->vertex_count = m_vertex.tail; + data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.tail); + data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail); + + m_perfmon.Put(GSPerfMon::PrimNotRendered, (m_index.tail - data->index_count) / GSUtil::GetVertexCount(PRIM->PRIM)); + + if(data->index_count == 0) return; + + // TODO: merge these + + memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.tail); + + m_vt->Update(data->vertex, data->index, data->index_count, data->primclass); + + // + GSRasterizerData2* data2 = (GSRasterizerData2*)data.get(); - if(!GetScanlineGlobalData(data2)) - { - return; - } + if(!GetScanlineGlobalData(data2)) return; GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param; + GSVector4i scissor = GSVector4i(context->scissor.in); + GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil())); + + scissor.z = std::min(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour + data->scissor = scissor; data->bbox = bbox; - data->primclass = m_vt.m_primclass; - data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); // TODO: detach m_vertices and reallocate later? - memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); // TODO: m_vt.Update fetches all the vertices already, could also store them here - data->count = m_count; data->solidrect = gd->sel.IsSolidRect(); data->frame = m_perfmon.GetFrame(); // - vector* fb_pages = NULL; - vector* zb_pages = NULL; + uint32* fb_pages = NULL; + uint32* zb_pages = NULL; + + GSVector4i r = bbox.rintersect(scissor); if(gd->sel.fwrite) { @@ -215,9 +363,9 @@ void GSRendererSW::Draw() { if(gd->sel.fwrite) { - for(vector::iterator i = fb_pages->begin(); i != fb_pages->end(); i++) + for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++) { - if(m_fzb_pages[*i] & 0xffff0000) // already used as a z-buffer + if(m_fzb_pages[*p] & 0xffff0000) // already used as a z-buffer { data->syncpoint = true; @@ -231,9 +379,9 @@ void GSRendererSW::Draw() { if(gd->sel.zwrite) { - for(vector::iterator i = zb_pages->begin(); i != zb_pages->end(); i++) + for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++) { - if(m_fzb_pages[*i] & 0x0000ffff) // already used as a frame buffer + if(m_fzb_pages[*p] & 0x0000ffff) // already used as a frame buffer { data->syncpoint = true; @@ -307,18 +455,6 @@ void GSRendererSW::Draw() m_rl->Queue(data); } - int prims = 0; - - switch(data->primclass) - { - case GS_POINT_CLASS: prims = data->count; break; - case GS_LINE_CLASS: prims = data->count / 2; break; - case GS_TRIANGLE_CLASS: prims = data->count / 3; break; - case GS_SPRITE_CLASS: prims = data->count / 2; break; - } - - m_perfmon.Put(GSPerfMon::Prim, prims); - /* if(0)//stats.ticks > 5000000) { @@ -344,15 +480,15 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS { GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); - vector* pages = o->GetPages(r); + uint32* pages = o->GetPages(r); m_tc->InvalidatePages(pages, o->psm); // check if the changing pages either used as a texture or a target - for(vector::const_iterator i = pages->begin(); i != pages->end(); i++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - uint32 page = *i; + uint32 page = *p; //while(m_fzb_pages[page] | m_tex_pages[page]) _mm_pause(); @@ -364,20 +500,20 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS } } - delete pages; + delete [] pages; } void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) { GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); - vector* pages = o->GetPages(r); + uint32* pages = o->GetPages(r); - for(vector::const_iterator i = pages->begin(); i != pages->end(); i++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - //while(m_fzb_pages[*i]) _mm_pause(); + //while(m_fzb_pages[*p]) _mm_pause(); - if(m_fzb_pages[*i]) + if(m_fzb_pages[*p]) { Sync(6); @@ -385,27 +521,27 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS } } - delete pages; + delete [] pages; } -void GSRendererSW::UsePages(const vector* pages, int type) +void GSRendererSW::UsePages(const uint32* pages, int type) { if(type < 2) { - for(vector::const_iterator i = pages->begin(); i != pages->end(); i++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - ASSERT(((short*)&m_fzb_pages[*i])[type] < SHRT_MAX); + ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX); - _InterlockedIncrement16((short*)&m_fzb_pages[*i] + type); + _InterlockedIncrement16((short*)&m_fzb_pages[*p] + type); } } else { - for(vector::const_iterator i = pages->begin(); i != pages->end(); i++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - //while(m_fzb_pages[*i]) _mm_pause(); + //while(m_fzb_pages[*p]) _mm_pause(); - if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D) + if(m_fzb_pages[*p]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D) { Sync(7); @@ -413,33 +549,33 @@ void GSRendererSW::UsePages(const vector* pages, int type) } } - for(vector::const_iterator i = pages->begin(); i != pages->end(); i++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - ASSERT(m_tex_pages[*i] < SHRT_MAX); + ASSERT(m_tex_pages[*p] < SHRT_MAX); - _InterlockedIncrement16((short*)&m_tex_pages[*i]); // remember which texture pages are used + _InterlockedIncrement16((short*)&m_tex_pages[*p]); // remember which texture pages are used } } } -void GSRendererSW::ReleasePages(const vector* pages, int type) +void GSRendererSW::ReleasePages(const uint32* pages, int type) { if(type < 2) { - for(vector::const_iterator i = pages->begin(); i != pages->end(); i++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - ASSERT(((short*)&m_fzb_pages[*i])[type] > 0); + ASSERT(((short*)&m_fzb_pages[*p])[type] > 0); - _InterlockedDecrement16((short*)&m_fzb_pages[*i] + type); + _InterlockedDecrement16((short*)&m_fzb_pages[*p] + type); } } else { - for(vector::const_iterator i = pages->begin(); i != pages->end(); i++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - ASSERT(m_tex_pages[*i] > 0); + ASSERT(m_tex_pages[*p] > 0); - _InterlockedDecrement16((short*)&m_tex_pages[*i]); + _InterlockedDecrement16((short*)&m_tex_pages[*p]); } } } @@ -452,7 +588,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) const GSDrawingEnvironment& env = m_env; const GSDrawingContext* context = m_context; - const GS_PRIM_CLASS primclass = m_vt.m_primclass; + const GS_PRIM_CLASS primclass = m_vt->m_primclass; gd.vm = m_mem.m_vm8; @@ -470,7 +606,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) gd.sel.atst = ATST_ALWAYS; gd.sel.tfx = TFX_NONE; gd.sel.ababcd = 255; - gd.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0; + gd.sel.prim = primclass; uint32 fm = context->FRAME.FBMSK; uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; @@ -529,7 +665,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) { gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; - if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) + if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff) { gd.sel.iip = PRIM->IIP; } @@ -539,7 +675,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) gd.sel.tfx = context->TEX0.TFX; gd.sel.tcc = context->TEX0.TCC; gd.sel.fst = PRIM->FST; - gd.sel.ltf = m_vt.IsLinear(); + gd.sel.ltf = m_vt->IsLinear(); if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { @@ -553,7 +689,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) gd.sel.wms = context->CLAMP.WMS; gd.sel.wmt = context->CLAMP.WMT; - if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) + if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128))) { // modulate does not do anything when vertex color is 0x80 @@ -572,7 +708,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) if(!t->Update(r)) {ASSERT(0); return false;} - if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0) + if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0) { uint64 frame = m_perfmon.GetFrame(); @@ -589,7 +725,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) gd.tex[0] = t->m_buff; gd.sel.tw = t->m_tw - 3; - if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0) + if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0) { // TEX1.MMIN // 000 p @@ -599,13 +735,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) // 100 l round // 101 l tri - if(m_vt.m_lod.x > 0) + if(m_vt->m_lod.x > 0) { gd.sel.ltf = context->TEX1.MMIN >> 2; } else { - // TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0 + // TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0 } gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri @@ -614,9 +750,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) int mxl = (std::min((int)context->TEX1.MXL, 6) << 16); int k = context->TEX1.K << 12; - if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL) + if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL) { - k = (int)m_vt.m_lod.x << 16; // set lod to max level + k = (int)m_vt->m_lod.x << 16; // set lod to max level gd.sel.lcm = 1; // lod is constant gd.sel.mmin = 1; // tri-linear is meaningless @@ -630,7 +766,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) if(gd.sel.fst) { ASSERT(gd.sel.lcm == 1); - ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) + ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) gd.sel.lcm = 1; } @@ -659,8 +795,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) GIFRegTEX0 MIP_TEX0 = context->TEX0; GIFRegCLAMP MIP_CLAMP = context->CLAMP; - GSVector4 tmin = m_vt.m_min.t; - GSVector4 tmax = m_vt.m_max.t; + GSVector4 tmin = m_vt->m_min.t; + GSVector4 tmax = m_vt->m_max.t; static int s_counter = 0; @@ -710,8 +846,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) MIP_CLAMP.MAXU >>= 1; MIP_CLAMP.MAXV >>= 1; - m_vt.m_min.t *= 0.5f; - m_vt.m_max.t *= 0.5f; + m_vt->m_min.t *= 0.5f; + m_vt->m_max.t *= 0.5f; GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3); @@ -753,8 +889,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) s_counter++; - m_vt.m_min.t = tmin; - m_vt.m_max.t = tmax; + m_vt->m_min.t = tmin; + m_vt->m_max.t = tmax; } else { @@ -762,17 +898,19 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) { // skip per pixel division if q is constant - GSVertexSW* v = m_vertices; + GSVertexSW* RESTRICT v = data2->vertex; - if(m_vt.m_eq.q) + if(m_vt->m_eq.q) { gd.sel.fst = 1; - if(v[0].t.z != 1.0f) - { - GSVector4 w = v[0].t.zzzz().rcpnr(); + const GSVector4& t = v[data2->index[0]].t; - for(int i = 0, j = m_count; i < j; i++) + if(t.z != 1.0f) + { + GSVector4 w = t.zzzz().rcpnr(); + + for(int i = 0, j = data2->vertex_count; i < j; i++) { GSVector4 t = v[i].t; @@ -784,7 +922,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) { gd.sel.fst = 1; - for(int i = 0, j = m_count; i < j; i += 2) + for(int i = 0, j = data2->vertex_count; i < j; i += 2) { GSVector4 t0 = v[i + 0].t; GSVector4 t1 = v[i + 1].t; @@ -805,9 +943,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) GSVector4 half(0x8000, 0x8000); - GSVertexSW* v = m_vertices; + GSVertexSW* RESTRICT v = data2->vertex; - for(int i = 0, j = m_count; i < j; i++) + for(int i = 0, j = data2->vertex_count; i < j; i++) { GSVector4 t = v[i].t; @@ -939,7 +1077,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) { gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt; gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS; - gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000; + gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000; } gd.fm = GSVector4i(fm); @@ -969,176 +1107,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2) return true; } -template -void GSRendererSW::VertexKick(bool skip) -{ - const GSDrawingContext* context = m_context; - - GSVertexSW& dst = m_vl.AddTail(); - - GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET; - GSVector4i zf = GSVector4i((int)std::min(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later - - dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; - - if(tme) - { - GSVector4 t; - - if(fst) - { - t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4)); - } - else - { - t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH); - t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q)); - } - - dst.t = t; - } - - dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7); - - if(prim == GS_SPRITE) - { - dst.t.u32[3] = m_v.XYZ.Z; - } - - int count = 0; - - if(GSVertexSW* v = DrawingKick(skip, count)) - { -if(!m_dump) -{ - GSVector4 pmin, pmax; - - switch(prim) - { - case GS_POINTLIST: - pmin = v[0].p; - pmax = v[0].p; - break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: - pmin = v[0].p.min(v[1].p); - pmax = v[0].p.max(v[1].p); - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - pmin = v[0].p.min(v[1].p).min(v[2].p); - pmax = v[0].p.max(v[1].p).max(v[2].p); - break; - } - - GSVector4 scissor = context->scissor.ex; - - GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: - test |= pmin.ceil() == pmax.ceil(); - break; - } - - switch(prim) - { - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - // are in line or just two of them are the same (cross product == 0) - GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz(); - test |= tmp == tmp.yxwz(); - break; - } - - if(test.mask() & 3) - { - return; - } -} - switch(prim) - { - case GS_POINTLIST: - break; - case GS_LINELIST: - case GS_LINESTRIP: - if(PRIM->IIP == 0) {v[0].c = v[1].c;} - break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} - break; - case GS_SPRITE: - break; - } - - if(m_count < 30 && m_count >= 3) - { - GSVertexSW* v = &m_vertices[m_count - 3]; - - int tl = 0; - int br = 0; - - bool isquad = false; - - switch(prim) - { - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_TRIANGLELIST: - isquad = GSVertexSW::IsQuad(v, tl, br); - break; - } - - if(isquad) - { - m_count -= 3; - - if(m_count > 0) - { - tl += m_count; - br += m_count; - - Flush(); - } - - if(tl != 0) m_vertices[0] = m_vertices[tl]; - if(br != 1) m_vertices[1] = m_vertices[br]; - - m_vertices[0].t.u32[3] = m_v.XYZ.Z; - m_vertices[1].t.u32[3] = m_v.XYZ.Z; - - m_count = 2; - - uint32 tmp = PRIM->PRIM; - PRIM->PRIM = GS_SPRITE; - - Flush(); - - PRIM->PRIM = tmp; - - m_perfmon.Put(GSPerfMon::Quad, 1); - - return; - } - } - - m_count += count; - - // Flush(); - } -} - -// GSRendererSW::GSRasterizerData2 - GSRendererSW::GSRasterizerData2::GSRasterizerData2(GSRendererSW* parent) : m_parent(parent) , m_fb_pages(NULL) @@ -1192,7 +1160,7 @@ GSRendererSW::GSRasterizerData2::~GSRasterizerData2() m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels); } -void GSRendererSW::GSRasterizerData2::UseTargetPages(const vector* fb_pages, const vector* zb_pages) +void GSRendererSW::GSRasterizerData2::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages) { if(m_using_pages) return; @@ -1218,9 +1186,7 @@ void GSRendererSW::GSRasterizerData2::UseSourcePages(GSTextureCacheSW::Texture* { ASSERT(m_tex_pages[level] == NULL); - const vector* pages = t->m_pages.n; + m_tex_pages[level] = t->m_pages.n; - m_tex_pages[level] = pages; - - m_parent->UsePages(pages, 2); + m_parent->UsePages(t->m_pages.n, 2); } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 21e3274b8d..983cc2df4e 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -25,21 +25,21 @@ #include "GSTextureCacheSW.h" #include "GSDrawScanline.h" -class GSRendererSW : public GSRendererT +class GSRendererSW : public GSRenderer { class GSRasterizerData2 : public GSRasterizerData { GSRendererSW* m_parent; - const vector* m_fb_pages; - const vector* m_zb_pages; - const vector* m_tex_pages[7]; + const uint32* m_fb_pages; + const uint32* m_zb_pages; + const uint32* m_tex_pages[7]; bool m_using_pages; public: GSRasterizerData2(GSRendererSW* parent); virtual ~GSRasterizerData2(); - void UseTargetPages(const vector* fb_pages, const vector* zb_pages); + void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages); void UseSourcePages(GSTextureCacheSW::Texture* t, int level); }; @@ -63,15 +63,37 @@ protected: void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); - void UsePages(const vector* pages, int type); - void ReleasePages(const vector* pages, int type); + void UsePages(const uint32* pages, int type); + void ReleasePages(const uint32* pages, int type); bool GetScanlineGlobalData(GSRasterizerData2* data2); + typedef size_t (GSState::*ConvertIndexPtr)(uint32* RESTRICT dst, const uint32* RESTRICT src, int count); + + ConvertIndexPtr m_ci[8], m_cif; + + #define InitConvertIndex2(P) \ + m_ci[P] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex

; \ + + #define InitConvertIndex() \ + InitConvertIndex2(GS_POINTLIST) \ + InitConvertIndex2(GS_LINELIST) \ + InitConvertIndex2(GS_LINESTRIP) \ + InitConvertIndex2(GS_TRIANGLELIST) \ + InitConvertIndex2(GS_TRIANGLESTRIP) \ + InitConvertIndex2(GS_TRIANGLEFAN) \ + InitConvertIndex2(GS_SPRITE) \ + InitConvertIndex2(GS_INVALID) \ + + template + void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index); + + template + size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count); + + void UpdateVertexKick(); + public: GSRendererSW(int threads); virtual ~GSRendererSW(); - - template - void VertexKick(bool skip); }; diff --git a/plugins/GSdx/GSScanlineEnvironment.h b/plugins/GSdx/GSScanlineEnvironment.h index f37e2fd487..f6ecaced11 100644 --- a/plugins/GSdx/GSScanlineEnvironment.h +++ b/plugins/GSdx/GSScanlineEnvironment.h @@ -61,12 +61,12 @@ union GSScanlineSelector uint32 colclamp:1; // 43 uint32 fba:1; // 44 uint32 dthe:1; // 45 - uint32 sprite:1; // 46 - uint32 edge:1; // 47 + uint32 prim:2; // 46 - uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3) - uint32 lcm:1; // 49 - uint32 mmin:2; // 50 + uint32 edge:1; // 48 + uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3) + uint32 lcm:1; // 50 + uint32 mmin:2; // 51 }; struct @@ -92,7 +92,7 @@ union GSScanlineSelector bool IsSolidRect() const { - return sprite + return prim == GS_SPRITE_CLASS && iip == 0 && tfx == TFX_NONE && abe == 0 diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp index 5e2d664bcd..b644a6f79d 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp @@ -29,7 +29,7 @@ using namespace Xbyak; void GSSetupPrimCodeGenerator::Generate() { - if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip) + if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { for(int i = 0; i < 5; i++) { @@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Depth() return; } - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { // GSVector4 p = dscan.p; @@ -107,7 +107,7 @@ void GSSetupPrimCodeGenerator::Depth() } else { - // GSVector4 p = vertices[0].p; + // GSVector4 p = vertex.p; vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); @@ -312,7 +312,7 @@ void GSSetupPrimCodeGenerator::Color() } else { - // GSVector4i c = GSVector4i(vertices[0].c); + // GSVector4i c = GSVector4i(vertex.c); vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp index 5066928b20..af03d3c5b0 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp @@ -29,7 +29,7 @@ using namespace Xbyak; void GSSetupPrimCodeGenerator::Generate() { - if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip) + if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { for(int i = 0; i < 5; i++) { @@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Depth() return; } - if(!m_sel.sprite) + if(m_sel.prim != GS_SPRITE_CLASS) { // GSVector4 p = dscan.p; @@ -112,7 +112,7 @@ void GSSetupPrimCodeGenerator::Depth() } else { - // GSVector4 p = vertices[0].p; + // GSVector4 p = vertex.p; movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); @@ -327,7 +327,7 @@ void GSSetupPrimCodeGenerator::Color() } else { - // GSVector4i c = GSVector4i(vertices[0].c); + // GSVector4i c = GSVector4i(vertex.c); cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]); diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 3e96c58469..53dbae6e80 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -25,18 +25,37 @@ //#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering //#define Offset_UV // Fixes / breaks various titles -GSState::GSState() +GSState::GSState(GSVertexTrace* vt, size_t vertex_stride) : m_version(6) , m_mt(false) , m_irq(NULL) , m_path3hack(0) , m_regs(NULL) - , m_q(1.0f) - , m_vprim(1) , m_crc(0) , m_options(0) , m_frameskip(0) + , m_vt(vt) { + memset(&m_v, 0, sizeof(m_v)); + m_q = 1.0f; + memset(&m_vertex, 0, sizeof(m_vertex)); + memset(&m_index, 0, sizeof(m_index)); + + m_vertex.stride = vertex_stride; + + GrowVertexBuffer(); + + m_dk[GS_POINTLIST] = (DrawingKickPtr)&GSState::DrawingKick; + m_dk[GS_LINELIST] = (DrawingKickPtr)&GSState::DrawingKick; + m_dk[GS_LINESTRIP] = (DrawingKickPtr)&GSState::DrawingKick; + m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&GSState::DrawingKick; + m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&GSState::DrawingKick; + m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&GSState::DrawingKick; + m_dk[GS_SPRITE] = (DrawingKickPtr)&GSState::DrawingKick; + m_dk[GS_INVALID] = (DrawingKickPtr)&GSState::DrawingKick; + + memset(m_cv, 0, sizeof(m_cv)); + m_sssize = 0; m_sssize += sizeof(m_version); @@ -78,7 +97,7 @@ GSState::GSState() m_sssize += sizeof(m_v.ST); m_sssize += sizeof(m_v.UV); m_sssize += sizeof(m_v.XYZ); - m_sssize += sizeof(m_v.FOG); + m_sssize += sizeof(m_v.FOG); // obsolete m_sssize += sizeof(m_tr.x); m_sssize += sizeof(m_tr.y); @@ -97,6 +116,8 @@ GSState::GSState() GSState::~GSState() { + if(m_vertex.buff) _aligned_free(m_vertex.buff); + if(m_index.buff) _aligned_free(m_index.buff); } void GSState::SetRegsMem(uint8* basemem) @@ -195,6 +216,10 @@ void GSState::Reset() m_env.Reset(); m_context = &m_env.CTXT[0]; + + m_vertex.head = 0; + m_vertex.tail = 0; + m_index.tail = 0; } void GSState::ResetHandlers() @@ -472,7 +497,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRIC m_v.XYZ.Z = r->XYZF2.Z; m_v.FOG.F = r->XYZF2.F; - VertexKick(r->XYZF2.ADC); + VertexKick(r->XYZF2.Skip()); } __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r) @@ -481,7 +506,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT m_v.XYZ.Y = r->XYZ2.Y; m_v.XYZ.Z = r->XYZ2.Z; - VertexKick(r->XYZ2.ADC); + VertexKick(r->XYZ2.Skip()); } __forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r) @@ -509,7 +534,7 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim) { // ASSERT(r->PRIM.PRIM < 7); - if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM)) + if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM)) // NOTE: assume strips/fans are converted to lists { if((m_env.PRIM.u32[0] ^ prim.u32[0]) & 0x7f8) // all fields except PRIM { @@ -528,7 +553,7 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim) UpdateVertexKick(); - ResetPrim(); + m_vertex.head = m_vertex.tail = m_index.tail > 0 ? m_index.buff[m_index.tail - 1] + 1 : 0; // remove unused vertices from the end of the vertex buffer } void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r) @@ -559,8 +584,8 @@ __forceinline void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r) m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff; #ifdef Offset_UV - m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v.UV.U - 4U)); - m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v.UV.V - 4U)); + m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v._UV.U - 4U)); + m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v._UV.V - 4U)); #endif } @@ -576,14 +601,14 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r) m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff; m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000; - VertexKick(false); + VertexKick(0); } void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r) { m_v.XYZ = (GSVector4i)r->XYZ; - VertexKick(false); + VertexKick(0); } void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0) @@ -697,7 +722,7 @@ template void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r) void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r) { - m_v.FOG = (GSVector4i)r->FOG; + m_v.FOG.u32[1] = r->FOG.u32[1]; } void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r) @@ -712,14 +737,14 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r) m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff; m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000; - VertexKick(true); + VertexKick(1); } void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r) { m_v.XYZ = (GSVector4i)r->XYZ; - VertexKick(true); + VertexKick(1); } void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r) @@ -1174,6 +1199,26 @@ void GSState::FlushWrite() */ } +void GSState::FlushPrim() +{ + if(m_index.tail > 0) + { + if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3) + { + // FIXME: berserk fpsm = 27 (8H) + + Draw(); + + m_perfmon.Put(GSPerfMon::Draw, 1); + m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM)); + } + } + + m_vertex.head = 0; + m_vertex.tail = 0; + m_index.tail = 0; +} + // void GSState::Write(const uint8* mem, int len) @@ -1881,6 +1926,446 @@ void GSState::SetGameCRC(uint32 crc, int options) m_game = CRC::Lookup(crc); } +// + +void GSState::UpdateVertexKick() +{ + m_dkf = m_dk[PRIM->PRIM]; + m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST]; + m_vertex.n = GSUtil::GetVertexCount(PRIM->PRIM); +} + +void GSState::GrowVertexBuffer() +{ + int maxcount = std::max(m_vertex.maxcount * 3 / 2, 10000); + + uint8* vertex = (uint8*)_aligned_malloc(m_vertex.stride * maxcount, 16); + uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 16); // worst case is slightly less than vertex number * 3 + + if(m_vertex.buff != NULL) + { + memcpy(vertex, m_vertex.buff, m_vertex.stride * m_vertex.tail); + + _aligned_free(m_vertex.buff); + } + + if(m_index.buff != NULL) + { + memcpy(index, m_index.buff, sizeof(uint32) * m_index.tail); + + _aligned_free(m_index.buff); + } + + m_vertex.buff = vertex; + m_vertex.maxcount = maxcount - 100; // -100 because skipped vertices don't trigger growing the vertex buffer (VertexKick should be as fast as possible) + m_index.buff = index; +} + +void GSState::VertexKick(uint32 skip) +{ + (this->*m_cvf)(m_vertex.buff, m_vertex.tail); + + if(++m_vertex.tail - m_vertex.head >= m_vertex.n) + { + (this->*m_dkf)(skip); + } +} + +template +void GSState::DrawingKick(uint32 skip) +{ + size_t head = m_vertex.head; + size_t tail = m_vertex.tail; + + if(skip) + { + switch(prim) + { + case GS_POINTLIST: + case GS_LINELIST: + case GS_TRIANGLELIST: + case GS_SPRITE: + case GS_INVALID: + m_vertex.tail = head; + break; + case GS_LINESTRIP: + case GS_TRIANGLESTRIP: + m_vertex.head = head + 1; + break; + case GS_TRIANGLEFAN: + break; + default: + __assume(0); + } + + return; + } + + if(tail >= m_vertex.maxcount) + { + GrowVertexBuffer(); + } + + uint32* RESTRICT buff = &m_index.buff[m_index.tail]; + + switch(prim) + { + case GS_POINTLIST: + buff[0] = head + 0; + m_vertex.head = head + 1; + m_index.tail += 1; + break; + case GS_LINELIST: + buff[0] = head + 0; + buff[1] = head + 1; + m_vertex.head = head + 2; + m_index.tail += 2; + break; + case GS_LINESTRIP: + buff[0] = head + 0; + buff[1] = head + 1; + m_vertex.head = head + 1; + m_index.tail += 2; + break; + case GS_TRIANGLELIST: + buff[0] = head + 0; + buff[1] = head + 1; + buff[2] = head + 2; + m_vertex.head = head + 3; + m_index.tail += 3; + break; + case GS_TRIANGLESTRIP: + buff[0] = head + 0; + buff[1] = head + 1; + buff[2] = head + 2; + m_vertex.head = head + 1; + m_index.tail += 3; + break; + case GS_TRIANGLEFAN: + buff[0] = head + 0; + buff[1] = tail - 2; + buff[2] = tail - 1; + m_index.tail += 3; + break; + case GS_SPRITE: + buff[0] = head + 0; + buff[1] = head + 1; + m_vertex.head = head + 2; + m_index.tail += 2; + break; + case GS_INVALID: + m_vertex.tail = head; + break; + default: + __assume(0); + } +} + +void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear) +{ + int tw = TEX0.TW; + int th = TEX0.TH; + + int w = 1 << tw; + int h = 1 << th; + + GSVector4i tr(0, 0, w, h); + + int wms = CLAMP.WMS; + int wmt = CLAMP.WMT; + + int minu = (int)CLAMP.MINU; + int minv = (int)CLAMP.MINV; + int maxu = (int)CLAMP.MAXU; + int maxv = (int)CLAMP.MAXV; + + GSVector4i vr = tr; + + switch(wms) + { + case CLAMP_REPEAT: + break; + case CLAMP_CLAMP: + break; + case CLAMP_REGION_CLAMP: + if(vr.x < minu) vr.x = minu; + if(vr.z > maxu + 1) vr.z = maxu + 1; + break; + case CLAMP_REGION_REPEAT: + vr.x = maxu; + vr.z = vr.x + (minu + 1); + break; + default: + __assume(0); + } + + switch(wmt) + { + case CLAMP_REPEAT: + break; + case CLAMP_CLAMP: + break; + case CLAMP_REGION_CLAMP: + if(vr.y < minv) vr.y = minv; + if(vr.w > maxv + 1) vr.w = maxv + 1; + break; + case CLAMP_REGION_REPEAT: + vr.y = maxv; + vr.w = vr.y + (minv + 1); + break; + default: + __assume(0); + } + + if(wms + wmt < 6) + { + GSVector4 st = m_vt->m_min.t.xyxy(m_vt->m_max.t); + + if(linear) + { + st += GSVector4(-0x8000, 0x8000).xxyy(); + } + + GSVector4i uv = GSVector4i(st).sra32(16); + + GSVector4i u, v; + + int mask = 0; + + if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT) + { + u = uv & GSVector4i::xffffffff().srl32(32 - tw); + v = uv & GSVector4i::xffffffff().srl32(32 - th); + + GSVector4i uu = uv.sra32(tw); + GSVector4i vv = uv.sra32(th); + + mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); + } + + uv = uv.rintersect(tr); + + switch(wms) + { + case CLAMP_REPEAT: + if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;} + break; + case CLAMP_CLAMP: + case CLAMP_REGION_CLAMP: + if(vr.x < uv.x) vr.x = uv.x; + if(vr.z > uv.z + 1) vr.z = uv.z + 1; + break; + case CLAMP_REGION_REPEAT: + break; + default: + __assume(0); + } + + switch(wmt) + { + case CLAMP_REPEAT: + if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;} + break; + case CLAMP_CLAMP: + case CLAMP_REGION_CLAMP: + if(vr.y < uv.y) vr.y = uv.y; + if(vr.w > uv.w + 1) vr.w = uv.w + 1; + break; + case CLAMP_REGION_REPEAT: + break; + default: + __assume(0); + } + } + + r = vr.rintersect(tr); +} + +void GSState::GetAlphaMinMax() +{ + if(m_vt->m_alpha.valid) + { + return; + } + + const GSDrawingEnvironment& env = m_env; + const GSDrawingContext* context = m_context; + + GSVector4i a = m_vt->m_min.c.uph32(m_vt->m_max.c).zzww(); + + if(PRIM->TME && context->TEX0.TCC) + { + switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt) + { + case 0: + a.y = 0; + a.w = 0xff; + break; + case 1: + a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0; + a.w = env.TEXA.TA0; + break; + case 2: + a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1); + a.w = max(env.TEXA.TA0, env.TEXA.TA1); + break; + case 3: + m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); + break; + default: + __assume(0); + } + + switch(context->TEX0.TFX) + { + case TFX_MODULATE: + a.x = (a.x * a.y) >> 7; + a.z = (a.z * a.w) >> 7; + if(a.x > 0xff) a.x = 0xff; + if(a.z > 0xff) a.z = 0xff; + break; + case TFX_DECAL: + a.x = a.y; + a.z = a.w; + break; + case TFX_HIGHLIGHT: + a.x = a.x + a.y; + a.z = a.z + a.w; + if(a.x > 0xff) a.x = 0xff; + if(a.z > 0xff) a.z = 0xff; + break; + case TFX_HIGHLIGHT2: + a.x = a.y; + a.z = a.w; + break; + default: + __assume(0); + } + } + + m_vt->m_alpha.min = a.x; + m_vt->m_alpha.max = a.z; + m_vt->m_alpha.valid = true; +} + +bool GSState::TryAlphaTest(uint32& fm, uint32& zm) +{ + const GSDrawingContext* context = m_context; + + bool pass = true; + + if(context->TEST.ATST == ATST_NEVER) + { + pass = false; + } + else if(context->TEST.ATST != ATST_ALWAYS) + { + GetAlphaMinMax(); + + int amin = m_vt->m_alpha.min; + int amax = m_vt->m_alpha.max; + + int aref = context->TEST.AREF; + + switch(context->TEST.ATST) + { + case ATST_NEVER: + pass = false; + break; + case ATST_ALWAYS: + pass = true; + break; + case ATST_LESS: + if(amax < aref) pass = true; + else if(amin >= aref) pass = false; + else return false; + break; + case ATST_LEQUAL: + if(amax <= aref) pass = true; + else if(amin > aref) pass = false; + else return false; + break; + case ATST_EQUAL: + if(amin == aref && amax == aref) pass = true; + else if(amin > aref || amax < aref) pass = false; + else return false; + break; + case ATST_GEQUAL: + if(amin >= aref) pass = true; + else if(amax < aref) pass = false; + else return false; + break; + case ATST_GREATER: + if(amin > aref) pass = true; + else if(amax <= aref) pass = false; + else return false; + break; + case ATST_NOTEQUAL: + if(amin == aref && amax == aref) pass = false; + else if(amin > aref || amax < aref) pass = true; + else return false; + break; + default: + __assume(0); + } + } + + if(!pass) + { + switch(context->TEST.AFAIL) + { + case AFAIL_KEEP: fm = zm = 0xffffffff; break; + case AFAIL_FB_ONLY: zm = 0xffffffff; break; + case AFAIL_ZB_ONLY: fm = 0xffffffff; break; + case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break; + default: __assume(0); + } + } + + return true; +} + +bool GSState::IsOpaque() +{ + if(PRIM->AA1) + { + return false; + } + + if(!PRIM->ABE) + { + return true; + } + + const GSDrawingContext* context = m_context; + + int amin = 0, amax = 0xff; + + if(context->ALPHA.A != context->ALPHA.B) + { + if(context->ALPHA.C == 0) + { + GetAlphaMinMax(); + + amin = m_vt->m_alpha.min; + amax = m_vt->m_alpha.max; + } + else if(context->ALPHA.C == 1) + { + if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24) + { + amin = amax = 0x80; + } + } + else if(context->ALPHA.C == 2) + { + amin = amax = context->ALPHA.FIX; + } + } + + return context->ALPHA.IsOpaque(amin, amax); +} + // GSTransferBuffer GSState::GSTransferBuffer::GSTransferBuffer() diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 4de179a7bb..abed084fd0 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -26,7 +26,7 @@ #include "GSDrawingContext.h" #include "GSDrawingEnvironment.h" #include "GSVertex.h" -#include "GSVertexList.h" +#include "GSVertexTrace.h" #include "GSUtil.h" #include "GSPerfMon.h" #include "GSVector.h" @@ -126,49 +126,53 @@ class GSState : public GSAlignedClass<32> } m_tr; - void FlushWrite(); - protected: bool IsBadFrame(int& skip, int UserHacks_SkipDraw); - typedef void (GSState::*VertexKickPtr)(bool skip); + GSVertex m_v; + float m_q; + struct {uint8* buff; size_t head, tail, maxcount, stride, n;} m_vertex; + struct {uint32* buff; size_t tail;} m_index; - VertexKickPtr m_vk[8][2][2]; - VertexKickPtr m_vkf; + typedef void (GSState::*DrawingKickPtr)(uint32 skip); + typedef void (GSState::*ConvertVertexPtr)(void* RESTRICT vertex, size_t index); - #define InitVertexKick3(T, P, N, M) \ - m_vk[P][N][M] = (VertexKickPtr)(void (T::*)(bool))&T::VertexKick; + DrawingKickPtr m_dk[8], m_dkf; + ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST] - #define InitVertexKick2(T, P) \ - InitVertexKick3(T, P, 0, 0) \ - InitVertexKick3(T, P, 0, 1) \ - InitVertexKick3(T, P, 1, 0) \ - InitVertexKick3(T, P, 1, 1) \ + #define InitConvertVertex2(T, P) \ + m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex; \ + m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex; \ + m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex; \ + m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex; \ - #define InitVertexKick(T) \ - InitVertexKick2(T, GS_POINTLIST) \ - InitVertexKick2(T, GS_LINELIST) \ - InitVertexKick2(T, GS_LINESTRIP) \ - InitVertexKick2(T, GS_TRIANGLELIST) \ - InitVertexKick2(T, GS_TRIANGLESTRIP) \ - InitVertexKick2(T, GS_TRIANGLEFAN) \ - InitVertexKick2(T, GS_SPRITE) \ - InitVertexKick2(T, GS_INVALID) \ + #define InitConvertVertex(T) \ + InitConvertVertex2(T, GS_POINTLIST) \ + InitConvertVertex2(T, GS_LINELIST) \ + InitConvertVertex2(T, GS_LINESTRIP) \ + InitConvertVertex2(T, GS_TRIANGLELIST) \ + InitConvertVertex2(T, GS_TRIANGLESTRIP) \ + InitConvertVertex2(T, GS_TRIANGLEFAN) \ + InitConvertVertex2(T, GS_SPRITE) \ + InitConvertVertex2(T, GS_INVALID) \ - void UpdateVertexKick() - { - m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST]; - } + virtual void UpdateVertexKick(); - void VertexKickNull(bool skip) - { - ASSERT(0); - } + void GrowVertexBuffer(); - void VertexKick(bool skip) - { - (this->*m_vkf)(skip); - } + void VertexKick(uint32 skip); + + template + void DrawingKick(uint32 skip); + + // following functions need m_vt to be initialized + + GSVertexTrace* m_vt; + + void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear); + void GetAlphaMinMax(); + bool TryAlphaTest(uint32& fm, uint32& zm); + bool IsOpaque(); public: GIFPath m_path[4]; @@ -177,10 +181,6 @@ public: GSLocalMemory m_mem; GSDrawingEnvironment m_env; GSDrawingContext* m_context; - GSVertex m_v; - float m_q; - uint32 m_vprim; - GSPerfMon m_perfmon; uint32 m_crc; int m_options; @@ -190,7 +190,7 @@ public: GSDump m_dump; public: - GSState(); + GSState(GSVertexTrace* vt, size_t vertex_stride); virtual ~GSState(); void ResetHandlers(); @@ -205,8 +205,9 @@ public: virtual void Reset(); virtual void Flush(); - virtual void FlushPrim() = 0; - virtual void ResetPrim() = 0; + virtual void FlushPrim(); + virtual void FlushWrite(); + virtual void Draw() = 0; virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {} virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {} diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index e13d2b221a..d261d08d9b 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -319,11 +319,11 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo GSVector4i r; - vector* pages = o->GetPages(rect, &r); + const uint32* pages = o->GetPages(rect, &r); bool found = false; - for(vector::iterator p = pages->begin(); p != pages->end(); p++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { uint32 page = *p; @@ -374,7 +374,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo } } - delete pages; + delete [] pages; if(!target) return; diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index 93183fe50f..595fea45f5 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -74,18 +74,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons m_textures.insert(t); - for(vector::const_iterator i = t->m_pages.n->begin(); i != t->m_pages.n->end(); i++) + for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) { - m_map[*i].push_front(t); + m_map[*p].push_front(t); } } return t; } -void GSTextureCacheSW::InvalidatePages(const vector* pages, uint32 psm) +void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm) { - for(vector::const_iterator p = pages->begin(); p != pages->end(); p++) + for(const uint32* p = pages; *p != GSOffset::EOP; p++) { uint32 page = *p; @@ -185,9 +185,9 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); - for(vector::const_iterator i = m_pages.n->begin(); i != m_pages.n->end(); i++) + for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++) { - uint32 page = *i; + uint32 page = *p; m_pages.bm[page >> 5] |= 1 << (page & 31); } @@ -202,7 +202,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& GSTextureCacheSW::Texture::~Texture() { - delete m_pages.n; + delete [] m_pages.n; if(m_buff) { diff --git a/plugins/GSdx/GSTextureCacheSW.h b/plugins/GSdx/GSTextureCacheSW.h index 34a6bb85c0..fc642afc11 100644 --- a/plugins/GSdx/GSTextureCacheSW.h +++ b/plugins/GSdx/GSTextureCacheSW.h @@ -40,7 +40,7 @@ public: bool m_repeating; vector* m_p2t; uint32 m_valid[MAX_PAGES]; - struct {uint32 bm[16]; const vector* n;} m_pages; + struct {uint32 bm[16]; const uint32* n;} m_pages; // m_valid // fast mode: each uint32 bits map to the 32 blocks of that page @@ -64,7 +64,7 @@ public: Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0); - void InvalidatePages(const vector* pages, uint32 psm); + void InvalidatePages(const uint32* pages, uint32 psm); void RemoveAll(); void RemoveAt(Texture* t); diff --git a/plugins/GSdx/GSTextureFX11.cpp b/plugins/GSdx/GSTextureFX11.cpp index e21f6b200f..354efd530f 100644 --- a/plugins/GSdx/GSTextureFX11.cpp +++ b/plugins/GSdx/GSTextureFX11.cpp @@ -82,9 +82,10 @@ bool GSDevice11::CreateTextureFX() return true; } -void GSDevice11::SetupIA(const void* vertices, int count, int prim) +void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) { - IASetVertexBuffer(vertices, sizeof(GSVertexHW11), count); + IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count); + IASetIndexBuffer(index, index_count); IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim); } diff --git a/plugins/GSdx/GSTextureFX9.cpp b/plugins/GSdx/GSTextureFX9.cpp index 7fdc443da7..aff4a33d90 100644 --- a/plugins/GSdx/GSTextureFX9.cpp +++ b/plugins/GSdx/GSTextureFX9.cpp @@ -61,9 +61,10 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix) return t; } -void GSDevice9::SetupIA(const void* vertices, int count, int prim) +void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) { - IASetVertexBuffer(vertices, sizeof(GSVertexHW9), count); + IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count); + IASetIndexBuffer(index, index_count); IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim); } diff --git a/plugins/GSdx/GSUtil.cpp b/plugins/GSdx/GSUtil.cpp index 2306106570..d9a2998d38 100644 --- a/plugins/GSdx/GSUtil.cpp +++ b/plugins/GSdx/GSUtil.cpp @@ -91,6 +91,7 @@ static class GSUtilMaps { public: uint8 PrimClassField[8]; + uint8 VertexCountField[8]; uint32 CompatibleBitsField[64][2]; uint32 SharedBitsField[64][2]; @@ -105,6 +106,15 @@ public: PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS; PrimClassField[GS_INVALID] = GS_INVALID_CLASS; + VertexCountField[GS_POINTLIST] = 1; + VertexCountField[GS_LINELIST] = 2; + VertexCountField[GS_LINESTRIP] = 2; + VertexCountField[GS_TRIANGLELIST] = 3; + VertexCountField[GS_TRIANGLESTRIP] = 3; + VertexCountField[GS_TRIANGLEFAN] = 3; + VertexCountField[GS_SPRITE] = 2; + VertexCountField[GS_INVALID] = 1; + memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField)); for(int i = 0; i < 64; i++) @@ -146,6 +156,11 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim) return (GS_PRIM_CLASS)s_maps.PrimClassField[prim]; } +int GSUtil::GetVertexCount(uint32 prim) +{ + return s_maps.VertexCountField[prim]; +} + bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm) { return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0; @@ -321,7 +336,7 @@ static bool DXUTDelayLoadDXGI() bool GSUtil::CheckDirect3D11Level(D3D_FEATURE_LEVEL& level) { - HRESULT hr; + HRESULT hr = S_OK; level = (D3D_FEATURE_LEVEL)0; diff --git a/plugins/GSdx/GSUtil.h b/plugins/GSdx/GSUtil.h index 153493c9be..f834255d2b 100644 --- a/plugins/GSdx/GSUtil.h +++ b/plugins/GSdx/GSUtil.h @@ -29,6 +29,7 @@ public: static const char* GetLibName(); static GS_PRIM_CLASS GetPrimClass(uint32 prim); + static int GetVertexCount(uint32 prim); static bool HasSharedBits(uint32 spsm, uint32 dpsm); static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm); diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index e9f3166fb6..98afea5d9f 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -1900,7 +1900,7 @@ public: d = f.uph64(d); } - __forceinline static bool compare16(const void* dst, const void* src, int size) + __forceinline static bool compare16(const void* dst, const void* src, size_t size) { ASSERT((size & 15) == 0); @@ -1909,7 +1909,7 @@ public: GSVector4i* s = (GSVector4i*)src; GSVector4i* d = (GSVector4i*)dst; - for(int i = 0; i < size; i++) + for(size_t i = 0; i < size; i++) { if(!d[i].eq(s[i])) { @@ -1920,7 +1920,7 @@ public: return true; } - __forceinline static bool compare64(const void* dst, const void* src, int size) + __forceinline static bool compare64(const void* dst, const void* src, size_t size) { ASSERT((size & 63) == 0); @@ -1929,7 +1929,7 @@ public: GSVector4i* s = (GSVector4i*)src; GSVector4i* d = (GSVector4i*)dst; - for(int i = 0; i < size; i += 4) + for(size_t i = 0; i < size; i += 4) { GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]); GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]); @@ -1948,7 +1948,7 @@ public: return true; } - __forceinline static bool update(const void* dst, const void* src, int size) + __forceinline static bool update(const void* dst, const void* src, size_t size) { ASSERT((size & 15) == 0); @@ -1959,7 +1959,7 @@ public: GSVector4i v = GSVector4i::xffffffff(); - for(int i = 0; i < size; i++) + for(size_t i = 0; i < size; i++) { v &= d[i] == s[i]; diff --git a/plugins/GSdx/GSVertex.h b/plugins/GSdx/GSVertex.h index 6df8fcd3aa..93f8e15b6a 100644 --- a/plugins/GSdx/GSVertex.h +++ b/plugins/GSdx/GSVertex.h @@ -30,15 +30,20 @@ __aligned(struct, 32) GSVertex { - GIFRegST ST; - GIFRegRGBAQ RGBAQ; - GIFRegXYZ XYZ; - GIFRegFOG FOG; - GIFRegUV UV; + union + { + struct + { + GIFRegST ST; + GIFRegRGBAQ RGBAQ; + GIFRegXYZ XYZ; + union {GIFRegUV UV; GIFRegFOG FOG;}; // UV.u32[0] | FOG.u32[1] + }; - GSVertex() {memset(this, 0, sizeof(*this));} + __m128i m[2]; + }; - GSVector4 GetUV() const {return GSVector4(GSVector4i::load(UV.u32[0]).upl16());} + void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];} }; struct GSVertexP diff --git a/plugins/GSdx/GSVertexHW.h b/plugins/GSdx/GSVertexHW.h index 77369fe050..6262e28804 100644 --- a/plugins/GSdx/GSVertexHW.h +++ b/plugins/GSdx/GSVertexHW.h @@ -35,16 +35,6 @@ __aligned(struct, 32) GSVertexHW9 // t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;} GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;} - - float& _q() {return p.w;} - - uint8& _r() {return t.u8[8];} - uint8& _g() {return t.u8[9];} - uint8& _b() {return t.u8[10];} - uint8& _a() {return t.u8[11];} - - uint32& _c0() {return t.u32[2];} - uint32& _c1() {return t.u32[3];} }; __aligned(union, 32) GSVertexHW11 @@ -86,16 +76,6 @@ __aligned(union, 32) GSVertexHW11 return *this; } - - float& _q() {return q;} - - uint8& _r() {return r;} - uint8& _g() {return g;} - uint8& _b() {return b;} - uint8& _a() {return a;} - - uint32& _c0() {return c0;} - uint32& _c1() {return c1;} }; #pragma pack(pop) diff --git a/plugins/GSdx/GSVertexSW.h b/plugins/GSdx/GSVertexSW.h index 953cd7a672..3fc45006ae 100644 --- a/plugins/GSdx/GSVertexSW.h +++ b/plugins/GSdx/GSVertexSW.h @@ -23,7 +23,7 @@ #include "GSVector.h" -__aligned(struct, 16) GSVertexSW +__aligned(struct, 32) GSVertexSW { GSVector4 p, t, c; diff --git a/plugins/GSdx/GSVertexTrace.cpp b/plugins/GSdx/GSVertexTrace.cpp index df4933687e..413d5799aa 100644 --- a/plugins/GSdx/GSVertexTrace.cpp +++ b/plugins/GSdx/GSVertexTrace.cpp @@ -28,12 +28,60 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX); GSVertexTrace::GSVertexTrace(const GSState* state) : m_state(state) - , m_map_sw("VertexTraceSW", NULL) - , m_map_hw9("VertexTraceHW9", NULL) - , m_map_hw11("VertexTraceHW11", NULL) { } +void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) +{ + m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); + + m_alpha.valid = false; + + if(m_state->PRIM->TME) + { + const GIFRegTEX1& TEX1 = m_state->m_context->TEX1; + + m_filter.mmag = TEX1.IsMagLinear(); + m_filter.mmin = TEX1.IsMinLinear(); + + if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2 + { + m_filter.linear = m_filter.mmag; + + return; + } + + float K = (float)TEX1.K / 16; + + if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated + { + // LOD = log2(1/|Q|) * (1 << L) + K + + GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K); + + if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;} + } + else + { + m_lod.x = K; + m_lod.y = K; + } + + if(m_lod.y <= 0) + { + m_filter.linear = m_filter.mmag; + } + else if(m_lod.x > 0) + { + m_filter.linear = m_filter.mmin; + } + else + { + m_filter.linear = m_filter.mmag | m_filter.mmin; + } + } +} + uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass) { m_primclass = primclass; @@ -48,66 +96,28 @@ uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass) return hash; } -void GSVertexTrace::UpdateLOD() +GSVertexTraceSW::GSVertexTraceSW(const GSState* state) + : GSVertexTrace(state) + , m_map("VertexTraceSW", NULL) { - if(!m_state->PRIM->TME) return; - - const GIFRegTEX1& TEX1 = m_state->m_context->TEX1; - - m_filter.mmag = TEX1.IsMagLinear(); - m_filter.mmin = TEX1.IsMinLinear(); - - if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2 - { - m_filter.linear = m_filter.mmag; - - return; - } - - float K = (float)TEX1.K / 16; - - if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated - { - // LOD = log2(1/|Q|) * (1 << L) + K - - GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K); - - if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;} - } - else - { - m_lod.x = K; - m_lod.y = K; - } - - if(m_lod.y <= 0) - { - m_filter.linear = m_filter.mmag; - } - else if(m_lod.x > 0) - { - m_filter.linear = m_filter.mmin; - } - else - { - m_filter.linear = m_filter.mmag | m_filter.mmin; - } } -void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass) +void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) { - m_map_sw[Hash(primclass)](count, v, m_min, m_max); + m_map[Hash(primclass)](count, vertex, index, m_min, m_max); - m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); - - m_alpha.valid = false; - - UpdateLOD(); + GSVertexTrace::Update(vertex, index, count, primclass); } -void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass) +GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state) + : GSVertexTrace(state) + , m_map("VertexTraceHW9", NULL) { - m_map_hw9[Hash(primclass)](count, v, m_min, m_max); +} + +void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) +{ + m_map[Hash(primclass)](count, vertex, index, m_min, m_max); const GSDrawingContext* context = m_state->m_context; @@ -132,16 +142,18 @@ void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primcl m_max.t *= s; } - m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); - - m_alpha.valid = false; - - UpdateLOD(); + GSVertexTrace::Update(vertex, index, count, primclass); } -void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass) +GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state) + : GSVertexTrace(state) + , m_map("VertexTraceHW11", NULL) { - m_map_hw11[Hash(primclass)](count, v, m_min, m_max); +} + +void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass) +{ + m_map[Hash(primclass)](count, vertex, index, m_min, m_max); const GSDrawingContext* context = m_state->m_context; @@ -166,10 +178,6 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc m_max.t *= s; } - m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); - - m_alpha.valid = false; - - UpdateLOD(); + GSVertexTrace::Update(vertex, index, count, primclass); } diff --git a/plugins/GSdx/GSVertexTrace.h b/plugins/GSdx/GSVertexTrace.h index d18c73c1cf..4c0a5653b0 100644 --- a/plugins/GSdx/GSVertexTrace.h +++ b/plugins/GSdx/GSVertexTrace.h @@ -29,42 +29,18 @@ class GSState; -__aligned(class, 32) GSVertexTrace +__aligned(class, 32) GSVertexTrace : public GSAlignedClass<32> { public: struct Vertex {GSVector4i c; GSVector4 p, t;}; // t.xy * 0x10000 struct VertexAlpha {int min, max; bool valid;}; -private: - typedef void (*VertexTracePtr)(int count, const void* v, Vertex& min, Vertex& max); - - class CGSW : public GSCodeGenerator - { - public: - CGSW(const void* param, uint32 key, void* code, size_t maxsize); - }; - - class CGHW9 : public GSCodeGenerator - { - public: - CGHW9(const void* param, uint32 key, void* code, size_t maxsize); - }; - - class CGHW11 : public GSCodeGenerator - { - public: - CGHW11(const void* param, uint32 key, void* code, size_t maxsize); - }; - - GSCodeGeneratorFunctionMap m_map_sw; - GSCodeGeneratorFunctionMap m_map_hw9; - GSCodeGeneratorFunctionMap m_map_hw11; - +protected: const GSState* m_state; uint32 Hash(GS_PRIM_CLASS primclass); - void UpdateLOD(); + typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max); static const GSVector4 s_minmax; @@ -73,10 +49,7 @@ public: Vertex m_min; Vertex m_max; - - // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it - - VertexAlpha m_alpha; + VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it union { @@ -92,12 +65,59 @@ public: GSVector2 m_lod; // x = min, y = max +public: GSVertexTrace(const GSState* state); + virtual ~GSVertexTrace() {} - void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass); - void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass); - void Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass); - void Update(const GSVertexNull* v, int count, GS_PRIM_CLASS primclass) {} + virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); bool IsLinear() const {return m_filter.linear;} }; + +__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace +{ + class CG : public GSCodeGenerator + { + public: + CG(const void* param, uint32 key, void* code, size_t maxsize); + }; + + GSCodeGeneratorFunctionMap m_map; + +public: + GSVertexTraceSW(const GSState* state); + + void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); +}; + +__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace +{ + class CG : public GSCodeGenerator + { + public: + CG(const void* param, uint32 key, void* code, size_t maxsize); + }; + + GSCodeGeneratorFunctionMap m_map; + +public: + GSVertexTraceDX9(const GSState* state); + + void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); +}; + +__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace +{ + class CG : public GSCodeGenerator + { + public: + CG(const void* param, uint32 key, void* code, size_t maxsize); + }; + + GSCodeGeneratorFunctionMap m_map; + +public: + GSVertexTraceDX11(const GSState* state); + + void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass); +}; diff --git a/plugins/GSdx/GSVertexTrace.x86.avx.cpp b/plugins/GSdx/GSVertexTrace.x86.avx.cpp index 5fb291fb9a..35e5cc0d2a 100644 --- a/plugins/GSdx/GSVertexTrace.x86.avx.cpp +++ b/plugins/GSdx/GSVertexTrace.x86.avx.cpp @@ -27,12 +27,13 @@ using namespace Xbyak; static const int _args = 0; -static const int _count = _args + 4; // rcx -static const int _v = _args + 8; // rdx -static const int _min = _args + 12; // r8 -static const int _max = _args + 16; // r9 +static const int _count = _args + 8; // rcx +static const int _vertex = _args + 12; // rdx +static const int _index = _args + 16; // r8 +static const int _min = _args + 20; // r9 +static const int _max = _args + 24; // _args + 4 -GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize) +GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) { uint32 primclass = (key >> 0) & 3; @@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs break; } + push(ebx); + // min.p = FLT_MAX; // max.p = -FLT_MAX; @@ -83,7 +86,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs // for(int i = 0; i < count; i += step) { - mov(edx, dword[esp + _v]); + mov(edx, dword[esp + _vertex]); + mov(ebx, dword[esp + _index]); mov(ecx, dword[esp + _count]); align(16); @@ -92,18 +96,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs if(tme && !fst && primclass == GS_SPRITE_CLASS) { - vmovaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); + mov(eax, ptr[ebx + 1 * sizeof(uint32)]); + shl(eax, 6); // * sizeof(GSVertexSW) + + vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]); vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); } for(int j = 0; j < n; j++) { + mov(eax, ptr[ebx + j * sizeof(uint32)]); + shl(eax, 6); // * sizeof(GSVertexSW) + if(color && (iip || j == n - 1)) { // min.c = min.c.minv(v[i + j].c); // max.c = max.c.maxv(v[i + j].c); - vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]); + vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]); vminps(xmm2, xmm0); vmaxps(xmm3, xmm0); @@ -112,7 +122,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs // min.p = min.p.minv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p); - vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]); + vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]); vminps(xmm4, xmm0); vmaxps(xmm5, xmm0); @@ -122,7 +132,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs // min.t = min.t.minv(v[i + j].t); // max.t = max.t.maxv(v[i + j].t); - vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); + vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]); if(!fst) { @@ -140,7 +150,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs } } - add(edx, n * sizeof(GSVertexSW)); + add(ebx, n * sizeof(uint32)); sub(ecx, n); jg("loop"); @@ -170,10 +180,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); } + pop(ebx); + ret(); } -GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize) +GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) { uint32 primclass = (key >> 0) & 3; @@ -189,17 +201,17 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma case GS_POINT_CLASS: n = 1; break; + case GS_SPRITE_CLASS: case GS_LINE_CLASS: n = 2; break; case GS_TRIANGLE_CLASS: n = 3; break; - case GS_SPRITE_CLASS: - n = 6; - break; } + push(ebx); + // min.p = FLT_MAX; // max.p = -FLT_MAX; @@ -226,7 +238,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma // for(int i = 0; i < count; i += step) { - mov(edx, dword[esp + _v]); + mov(edx, dword[esp + _vertex]); + mov(ebx, dword[esp + _index]); mov(ecx, dword[esp + _count]); align(16); @@ -235,16 +248,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma if(tme && !fst && primclass == GS_SPRITE_CLASS) { - vmovaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); + mov(eax, ptr[ebx + 1 * sizeof(uint32)]); + shl(eax, 5); // * sizeof(GSVertexHW9) + + vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]); vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); } for(int j = 0; j < n; j++) { + mov(eax, ptr[ebx + j * sizeof(uint32)]); + shl(eax, 5); // * sizeof(GSVertexHW9) + // min.p = min.p.minv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p); - vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); + vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]); vminps(xmm4, xmm0); vmaxps(xmm5, xmm0); @@ -256,7 +275,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma if(color && (iip || j == n - 1) || tme) { - vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]); + vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]); } if(color && (iip || j == n - 1)) @@ -287,7 +306,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma } } - add(edx, n * sizeof(GSVertexHW9)); + add(ebx, n * sizeof(uint32)); sub(ecx, n); jg("loop"); @@ -330,10 +349,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); } + pop(ebx); + ret(); } -GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize) +GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) { uint32 primclass = (key >> 0) & 3; @@ -358,6 +379,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t break; } + push(ebx); + // min.p = FLT_MAX; // max.p = -FLT_MAX; @@ -384,7 +407,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t // for(int i = 0; i < count; i += step) { - mov(edx, dword[esp + _v]); + mov(edx, dword[esp + _vertex]); + mov(ebx, dword[esp + _index]); mov(ecx, dword[esp + _count]); align(16); @@ -393,9 +417,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t for(int j = 0; j < n; j++) { + mov(eax, ptr[ebx + j * sizeof(uint32)]); + shl(eax, 5); // * sizeof(GSVertexHW11) + if(color && (iip || j == n - 1) || tme) { - vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]); + vmovaps(xmm0, ptr[edx + eax]); } if(color && (iip || j == n - 1)) @@ -424,7 +451,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t vmaxps(xmm7, xmm0); } - vmovdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]); + vmovdqa(xmm0, ptr[edx + eax + 16]); vpmovzxwd(xmm1, xmm0); vpsrld(xmm0, 1); @@ -435,7 +462,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t vmaxps(xmm5, xmm1); } - add(edx, n * sizeof(GSVertexHW11)); + add(ebx, n * sizeof(uint32)); sub(ecx, n); jg("loop"); @@ -478,6 +505,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); } + pop(ebx); + ret(); } diff --git a/plugins/GSdx/GSVertexTrace.x86.cpp b/plugins/GSdx/GSVertexTrace.x86.cpp index 6b792df4be..62b0e10aa0 100644 --- a/plugins/GSdx/GSVertexTrace.x86.cpp +++ b/plugins/GSdx/GSVertexTrace.x86.cpp @@ -27,12 +27,13 @@ using namespace Xbyak; static const int _args = 0; -static const int _count = _args + 4; // rcx -static const int _v = _args + 8; // rdx -static const int _min = _args + 12; // r8 -static const int _max = _args + 16; // r9 +static const int _count = _args + 8; // rcx +static const int _vertex = _args + 12; // rdx +static const int _index = _args + 16; // r8 +static const int _min = _args + 20; // r9 +static const int _max = _args + 24; // _args + 4 -GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize) +GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) { uint32 primclass = (key >> 0) & 3; @@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs break; } + push(ebx); + // min.p = FLT_MAX; // max.p = -FLT_MAX; @@ -86,7 +89,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs // for(int i = 0; i < count; i += step) { - mov(edx, dword[esp + _v]); + mov(edx, dword[esp + _vertex]); + mov(ebx, dword[esp + _index]); mov(ecx, dword[esp + _count]); align(16); @@ -95,18 +99,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs if(tme && !fst && primclass == GS_SPRITE_CLASS) { - movaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); + mov(eax, ptr[ebx + 1 * sizeof(uint32)]); + shl(eax, 6); // * sizeof(GSVertexSW) + + movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]); shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); } for(int j = 0; j < n; j++) { + mov(eax, ptr[ebx + j * sizeof(uint32)]); + shl(eax, 6); // * sizeof(GSVertexSW) + if(color && (iip || j == n - 1)) { // min.c = min.c.minv(v[i + j].c); // max.c = max.c.maxv(v[i + j].c); - movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]); + movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]); minps(xmm2, xmm0); maxps(xmm3, xmm0); @@ -115,7 +125,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs // min.p = min.p.minv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p); - movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]); + movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]); minps(xmm4, xmm0); maxps(xmm5, xmm0); @@ -125,7 +135,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs // min.t = min.t.minv(v[i + j].t); // max.t = max.t.maxv(v[i + j].t); - movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]); + movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]); if(!fst) { @@ -144,7 +154,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs } } - add(edx, n * sizeof(GSVertexSW)); + add(ebx, n * sizeof(uint32)); sub(ecx, n); jg("loop"); @@ -174,10 +184,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); } + pop(ebx); + ret(); } -GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize) +GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) { uint32 primclass = (key >> 0) & 3; @@ -204,6 +216,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma break; } + push(ebx); + // min.p = FLT_MAX; // max.p = -FLT_MAX; @@ -233,7 +247,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma // for(int i = 0; i < count; i += step) { - mov(edx, dword[esp + _v]); + mov(edx, dword[esp + _vertex]); + mov(ebx, dword[esp + _index]); mov(ecx, dword[esp + _count]); align(16); @@ -242,16 +257,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma if(tme && !fst && primclass == GS_SPRITE_CLASS) { - movaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); + mov(eax, ptr[ebx + 1 * sizeof(uint32)]); + shl(eax, 5); // * sizeof(GSVertexHW9) + + movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]); shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); } for(int j = 0; j < n; j++) { + mov(eax, ptr[ebx + j * sizeof(uint32)]); + shl(eax, 5); // * sizeof(GSVertexHW9) + // min.p = min.p.minv(v[i + j].p); // max.p = max.p.maxv(v[i + j].p); - movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]); + movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]); minps(xmm4, xmm0); maxps(xmm5, xmm0); @@ -264,7 +285,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma if(color && (iip || j == n - 1) || tme) { - movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]); + movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]); } if(color && (iip || j == n - 1)) @@ -295,7 +316,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma } } - add(edx, n * sizeof(GSVertexHW9)); + add(ebx, n * sizeof(uint32)); sub(ecx, n); jg("loop"); @@ -351,10 +372,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); } + pop(ebx); + ret(); } -GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize) +GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) { uint32 primclass = (key >> 0) & 3; @@ -379,6 +402,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t break; } + push(ebx); + // min.p = FLT_MAX; // max.p = -FLT_MAX; @@ -408,7 +433,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t // for(int i = 0; i < count; i += step) { - mov(edx, dword[esp + _v]); + mov(edx, dword[esp + _vertex]); + mov(ebx, dword[esp + _index]); mov(ecx, dword[esp + _count]); align(16); @@ -417,9 +443,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t for(int j = 0; j < n; j++) { + mov(eax, ptr[ebx + j * sizeof(uint32)]); + shl(eax, 5); // * sizeof(GSVertexHW11) + if(color && (iip || j == n - 1) || tme) { - movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]); + movaps(xmm0, ptr[edx + eax]); } if(color && (iip || j == n - 1)) @@ -448,7 +477,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxps(xmm7, xmm0); } - movdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]); + movdqa(xmm0, ptr[edx + eax + 16]); if(m_cpu.has(util::Cpu::tSSE41)) { @@ -469,7 +498,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxps(xmm5, xmm1); } - add(edx, n * sizeof(GSVertexHW11)); + add(ebx, n * sizeof(uint32)); sub(ecx, n); jg("loop"); @@ -525,6 +554,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7); } + pop(ebx); + ret(); } diff --git a/plugins/GSdx/GSdx.vcxproj b/plugins/GSdx/GSdx.vcxproj index 82412e5e72..7745046397 100644 --- a/plugins/GSdx/GSdx.vcxproj +++ b/plugins/GSdx/GSdx.vcxproj @@ -529,7 +529,6 @@ AssemblyAndSourceCode - AssemblyAndSourceCode diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index cbca970307..2fe70e9053 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -363,34 +363,43 @@ struct aligned_free_second {template void operator()(T& p) {_aligned_fr __forceinline unsigned char _BitScanForward(unsigned long* const Index, const unsigned long Mask) { __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask)); + return Mask ? 1 : 0; } __forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b) { unsigned char retval; + __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory"); + return retval; } __forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b) { unsigned char retval; + __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory"); + return retval; } __forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value) { long retval = Value; + __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory"); + return retval; } __forceinline long _InterlockedExchangeAdd16(volatile short* const Addend, const short Value) { long retval = Value; + __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory"); + return retval; }