GSdx: the promised index buffer update, needed a lot of changes, expect bugs in the next dozen revisions.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5045 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-05 02:40:24 +00:00
parent bffde4fd5e
commit f68f007f00
55 changed files with 3220 additions and 2973 deletions

View File

@ -114,22 +114,26 @@ void GPURendererSW::Draw()
gd.vm = m_mem.GetPixelAddress(0, 0);
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count);
data->count = m_count;
data->frame = m_perfmon.GetFrame();
data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
data->vertex = (GSVertexSW*)data->buff;
data->vertex_count = m_count;
memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count);
data->frame = m_perfmon.GetFrame();
int prims = 0;
switch(env.PRIM.TYPE)
{
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; break;
case GPU_LINE: data->primclass = GS_LINE_CLASS; break;
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; break;
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break;
case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break;
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break;
default: __assume(0);
}
@ -138,9 +142,9 @@ void GPURendererSW::Draw()
GSVector4 tl(+1e10f);
GSVector4 br(-1e10f);
GSVertexSW* v = data->vertices;
GSVertexSW* v = data->vertex;
for(int i = 0, j = m_count; i < j; i++)
for(int i = 0, j = data->vertex_count; i < j; i++)
{
GSVector4 p = v[i].p;
@ -163,9 +167,9 @@ void GPURendererSW::Draw()
m_rl->Sync();
// TODO: m_perfmon.Put(GSPerfMon::Draw, 1);
// TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims);
// TODO: m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, prims);
m_perfmon.Put(GSPerfMon::Fillrate, data->pixels);
}
void GPURendererSW::VertexKick()

View File

@ -735,8 +735,6 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);
vector<uint8> buff;
if(FILE* fp = fopen(lpszCmdLine, "rb"))
{
Console console("GSdx", true);
@ -769,10 +767,127 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
GSvsync(1);
struct Packet {uint8 type, param; uint32 size, addr; vector<uint8> buff;};
list<Packet*> packets;
vector<uint8> buff;
int type;
while((type = fgetc(fp)) != EOF)
{
Packet* p = new Packet();
p->type = (uint8)type;
switch(type)
{
case 0:
p->param = (uint8)fgetc(fp);
fread(&p->size, 4, 1, fp);
switch(p->param)
{
case 0:
p->buff.resize(0x4000);
p->addr = 0x4000 - p->size;
fread(&p->buff[p->addr], p->size, 1, fp);
break;
case 1:
case 2:
case 3:
p->buff.resize(p->size);
fread(&p->buff[0], p->size, 1, fp);
break;
}
break;
case 1:
p->param = (uint8)fgetc(fp);
break;
case 2:
fread(&p->size, 4, 1, fp);
break;
case 3:
p->buff.resize(0x2000);
fread(&p->buff[0], 0x2000, 1, fp);
break;
}
packets.push_back(p);
}
Sleep(100);
while(IsWindowVisible(hWnd))
{
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
{
Packet* p = *i;
switch(p->type)
{
case 0:
switch(p->param)
{
case 0: GSgifTransfer1(&p->buff[0], p->addr); break;
case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break;
case 2: GSgifTransfer3(&p->buff[0], p->size / 16); break;
case 3: GSgifTransfer(&p->buff[0], p->size / 16); break;
}
break;
case 1:
GSvsync(p->param);
break;
case 2:
if(buff.size() < p->size) buff.resize(p->size);
GSreadFIFO2(&buff[0], p->size / 16);
break;
case 3:
memcpy(regs, &p->buff[0], 0x2000);
break;
}
}
}
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
{
delete *i;
}
packets.clear();
Sleep(100);
/*
bool exit = false;
int round = 0;
while(!exit)
{
uint32 index;
@ -786,6 +901,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
case EOF:
fseek(fp, start, 0);
exit = !IsWindowVisible(hWnd);
//exit = ++round == 60;
break;
case 0:
@ -838,6 +954,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
break;
}
}
*/
GSclose();
GSshutdown();

View File

@ -638,8 +638,8 @@ REG64_(GIFReg, FINISH)
REG_END
REG64_(GIFReg, FOG)
uint8 _PAD1[4+3];
uint8 F:8;
uint8 _PAD1[7];
uint8 F;
REG_END
REG64_(GIFReg, FOGCOL)
@ -1030,7 +1030,9 @@ REG128_(GIFPacked, XYZF2)
uint32 _PAD6:3;
uint32 ADC:1;
uint32 _PAD7:16;
REG_END
REG_END2
uint32 Skip() const {return u32[3] & 0x8000;}
REG_END2
REG128_(GIFPacked, XYZ2)
uint16 X;
@ -1041,7 +1043,9 @@ REG128_(GIFPacked, XYZ2)
uint32 _PAD3:15;
uint32 ADC:1;
uint32 _PAD4:16;
REG_END
REG_END2
uint32 Skip() const {return u32[3] & 0x8000;}
REG_END2
REG128_(GIFPacked, FOG)
uint32 _PAD1;

View File

@ -35,7 +35,8 @@ GSDevice::GSDevice()
, m_1x1(NULL)
, m_frame(0)
{
memset(&m_vertices, 0, sizeof(m_vertices));
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
}
GSDevice::~GSDevice()
@ -135,8 +136,10 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, bool msaa, int format)
void GSDevice::EndScene()
{
m_vertices.start += m_vertices.count;
m_vertices.count = 0;
m_vertex.start += m_vertex.count;
m_vertex.count = 0;
m_index.start += m_index.count;
m_index.count = 0;
}
void GSDevice::Recycle(GSTexture* t)

View File

@ -72,7 +72,8 @@ protected:
GSTexture* m_fxaa;
GSTexture* m_1x1;
GSTexture* m_current;
struct {size_t stride, start, count, limit;} m_vertices;
struct {size_t stride, start, count, limit;} m_vertex;
struct {size_t start, count, limit;} m_index;
unsigned int m_frame; // for ageing the pool
virtual GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format) = 0;
@ -101,6 +102,7 @@ public:
virtual void BeginScene() {}
virtual void DrawPrimitive() {};
virtual void DrawIndexedPrimitive() {}
virtual void EndScene();
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}

View File

@ -352,7 +352,12 @@ void GSDevice11::Flip()
void GSDevice11::DrawPrimitive()
{
m_ctx->Draw(m_vertices.count, m_vertices.start);
m_ctx->Draw(m_vertex.count, m_vertex.start);
}
void GSDevice11::DrawIndexedPrimitive()
{
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
}
void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
@ -709,18 +714,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
}
}
void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{
ASSERT(m_vertices.count == 0);
ASSERT(m_vertex.count == 0);
if(count * stride > m_vertices.limit * m_vertices.stride)
if(count * stride > m_vertex.limit * m_vertex.stride)
{
m_vb_old = m_vb;
m_vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = std::max<int>(count * 3 / 2, 11000);
m_vertex.start = 0;
m_vertex.count = 0;
m_vertex.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_vb == NULL)
@ -730,7 +735,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.ByteWidth = m_vertices.limit * stride;
bd.ByteWidth = m_vertex.limit * stride;
bd.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
@ -743,9 +748,9 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
{
m_vertices.start = 0;
m_vertex.start = 0;
type = D3D11_MAP_WRITE_DISCARD;
}
@ -754,13 +759,13 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m)))
{
GSVector4i::storent((uint8*)m.pData + m_vertices.start * stride, vertices, count * stride);
GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride);
m_ctx->Unmap(m_vb, 0);
}
m_vertices.count = count;
m_vertices.stride = stride;
m_vertex.count = count;
m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride);
}
@ -779,6 +784,70 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
}
}
void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
{
ASSERT(m_index.count == 0);
if(count > m_index.limit)
{
m_ib_old = m_ib;
m_ib = NULL;
m_index.count = 0;
m_index.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_ib == NULL)
{
D3D11_BUFFER_DESC bd;
memset(&bd, 0, sizeof(bd));
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.ByteWidth = m_index.limit * sizeof(uint32);
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr;
hr = m_dev->CreateBuffer(&bd, NULL, &m_ib);
if(FAILED(hr)) return;
}
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
if(m_index.start + count > m_index.limit)
{
m_index.start = 0;
type = D3D11_MAP_WRITE_DISCARD;
}
D3D11_MAPPED_SUBRESOURCE m;
if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m)))
{
memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32));
m_ctx->Unmap(m_ib, 0);
}
m_index.count = count;
IASetIndexBuffer(m_ib);
}
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib)
{
if(m_state.ib != ib)
{
m_state.ib = ib;
m_ctx->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0);
}
}
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
{
if(m_state.layout != layout)

View File

@ -45,6 +45,8 @@ class GSDevice11 : public GSDeviceDX
CComPtr<IDXGISwapChain> m_swapchain;
CComPtr<ID3D11Buffer> m_vb;
CComPtr<ID3D11Buffer> m_vb_old;
CComPtr<ID3D11Buffer> m_ib;
CComPtr<ID3D11Buffer> m_ib_old;
bool m_srv_changed, m_ss_changed;
@ -52,6 +54,7 @@ class GSDevice11 : public GSDeviceDX
{
ID3D11Buffer* vb;
size_t vb_stride;
ID3D11Buffer* ib;
ID3D11InputLayout* layout;
D3D11_PRIMITIVE_TOPOLOGY topology;
ID3D11VertexShader* vs;
@ -141,6 +144,7 @@ public:
void SetExclusive(bool isExcl);
void DrawPrimitive();
void DrawIndexedPrimitive();
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
void ClearRenderTarget(GSTexture* t, uint32 c);
@ -162,8 +166,10 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(ID3D11Buffer* ib);
void IASetInputLayout(ID3D11InputLayout* layout);
void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology);
void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb);
@ -176,7 +182,7 @@ public:
void OMSetBlendState(ID3D11BlendState* bs, float bf);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
void SetupIA(const void* vertices, int count, int prim);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);

View File

@ -352,8 +352,10 @@ bool GSDevice9::Reset(int w, int h)
m_vb = NULL;
m_vb_old = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertex.start = 0;
m_vertex.count = 0;
m_index.start = 0;
m_index.count = 0;
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
@ -510,25 +512,52 @@ void GSDevice9::DrawPrimitive()
switch(m_state.topology)
{
case D3DPT_TRIANGLELIST:
prims = m_vertices.count / 3;
case D3DPT_POINTLIST:
prims = m_vertex.count;
break;
case D3DPT_LINELIST:
prims = m_vertices.count / 2;
prims = m_vertex.count / 2;
break;
case D3DPT_POINTLIST:
prims = m_vertices.count;
case D3DPT_LINESTRIP:
prims = m_vertex.count - 1;
break;
case D3DPT_TRIANGLELIST:
prims = m_vertex.count / 3;
break;
case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN:
prims = m_vertices.count - 2;
break;
case D3DPT_LINESTRIP:
prims = m_vertices.count - 1;
prims = m_vertex.count - 2;
break;
default:
__assume(0);
}
m_dev->DrawPrimitive(m_state.topology, m_vertices.start, prims);
m_dev->DrawPrimitive(m_state.topology, m_vertex.start, prims);
}
void GSDevice9::DrawIndexedPrimitive()
{
int prims = 0;
switch(m_state.topology)
{
case D3DPT_POINTLIST:
prims = m_index.count;
break;
case D3DPT_LINELIST:
case D3DPT_LINESTRIP:
prims = m_index.count / 2;
break;
case D3DPT_TRIANGLELIST:
case D3DPT_TRIANGLESTRIP:
case D3DPT_TRIANGLEFAN:
prims = m_index.count / 3;
break;
default:
__assume(0);
}
m_dev->DrawIndexedPrimitive(m_state.topology, m_vertex.start, 0, m_index.count, m_index.start, prims);
}
void GSDevice9::EndScene()
@ -881,49 +910,49 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti
}
}
void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{
ASSERT(m_vertices.count == 0);
ASSERT(m_vertex.count == 0);
if(count * stride > m_vertices.limit * m_vertices.stride)
if(count * stride > m_vertex.limit * m_vertex.stride)
{
m_vb_old = m_vb;
m_vb = NULL;
m_vertices.start = 0;
m_vertices.count = 0;
m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
m_vertex.start = 0;
m_vertex.count = 0;
m_vertex.limit = std::max<int>(count * 3 / 2, 10000);
}
if(m_vb == NULL)
{
HRESULT hr;
hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
if(FAILED(hr)) return;
}
uint32 flags = D3DLOCK_NOOVERWRITE;
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
{
m_vertices.start = 0;
m_vertex.start = 0;
flags = D3DLOCK_DISCARD;
}
void* v = NULL;
void* ptr = NULL;
if(SUCCEEDED(m_vb->Lock(m_vertices.start * stride, count * stride, &v, flags)))
if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags)))
{
GSVector4i::storent(v, vertices, count * stride);
GSVector4i::storent(ptr, vertex, count * stride);
m_vb->Unlock();
}
m_vertices.count = count;
m_vertices.stride = stride;
m_vertex.count = count;
m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride);
}
@ -939,6 +968,61 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
}
}
void GSDevice9::IASetIndexBuffer(const void* index, size_t count)
{
ASSERT(m_index.count == 0);
if(count > m_index.limit)
{
m_ib_old = m_ib;
m_ib = NULL;
m_index.count = 0;
m_index.limit = std::max<int>(count * 3 / 2, 11000);
}
if(m_ib == NULL)
{
HRESULT hr;
hr = m_dev->CreateIndexBuffer(m_index.limit * sizeof(uint32), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX32, D3DPOOL_DEFAULT, &m_ib, NULL);
if(FAILED(hr)) return;
}
uint32 flags = D3DLOCK_NOOVERWRITE;
if(m_index.start + count > m_index.limit)
{
m_index.start = 0;
flags = D3DLOCK_DISCARD;
}
void* ptr = NULL;
if(SUCCEEDED(m_ib->Lock(m_index.start * sizeof(uint32), count * sizeof(uint32), &ptr, flags)))
{
memcpy(ptr, index, count * sizeof(uint32));
m_ib->Unlock();
}
m_index.count = count;
IASetIndexBuffer(m_ib);
}
void GSDevice9::IASetIndexBuffer(IDirect3DIndexBuffer9* ib)
{
if(m_state.ib != ib)
{
m_state.ib = ib;
m_dev->SetIndices(ib);
}
}
void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout)
{
if(m_state.layout != layout)

View File

@ -82,6 +82,8 @@ class GSDevice9 : public GSDeviceDX
CComPtr<IDirect3DSwapChain9> m_swapchain;
CComPtr<IDirect3DVertexBuffer9> m_vb;
CComPtr<IDirect3DVertexBuffer9> m_vb_old;
CComPtr<IDirect3DIndexBuffer9> m_ib;
CComPtr<IDirect3DIndexBuffer9> m_ib_old;
bool m_lost;
D3DFORMAT m_depth_format;
@ -89,6 +91,7 @@ class GSDevice9 : public GSDeviceDX
{
IDirect3DVertexBuffer9* vb;
size_t vb_stride;
IDirect3DIndexBuffer9* ib;
IDirect3DVertexDeclaration9* layout;
D3DPRIMITIVETYPE topology;
IDirect3DVertexShader9* vs;
@ -169,6 +172,7 @@ public:
void BeginScene();
void DrawPrimitive();
void DrawIndexedPrimitive();
void EndScene();
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
@ -191,8 +195,10 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true);
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(IDirect3DIndexBuffer9* ib);
void IASetInputLayout(IDirect3DVertexDeclaration9* layout);
void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology);
void VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len);
@ -210,7 +216,7 @@ public:
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il);
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps);
void SetupIA(const void* vertices, int count, int prim);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel) {}
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);

View File

@ -278,7 +278,7 @@ public:
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
virtual void SetupIA(const void* vertices, int count, int prim) = 0;
virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
virtual void SetupGS(GSSelector sel) = 0;
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;

View File

@ -87,7 +87,7 @@ void GSDrawScanline::BeginDraw(const void* param)
sel.tcc = m_global.sel.tcc;
sel.fst = m_global.sel.fst;
sel.fge = m_global.sel.fge;
sel.sprite = m_global.sel.sprite;
sel.prim = m_global.sel.prim;
sel.fb = m_global.sel.fb;
sel.zb = m_global.sel.zb;
sel.zoverflow = m_global.sel.zoverflow;
@ -102,7 +102,7 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
#ifndef ENABLE_JIT_RASTERIZER
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan)
{
GSScanlineSelector sel = m_global.sel;
@ -115,7 +115,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
if(has_z || has_f)
{
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
if(has_f)
{
@ -145,12 +145,12 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
{
if(has_f)
{
m_local.p.f = GSVector4i(vertices[0].p).zzzzh().zzzz();
m_local.p.f = GSVector4i(vertex.p).zzzzh().zzzz();
}
if(has_z)
{
m_local.p.z = vertices[0].t.u32[3]; // uint32 z is bypassed in t.w
m_local.p.z = vertex.t.u32[3]; // uint32 z is bypassed in t.w
}
}
}
@ -234,7 +234,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
}
else
{
GSVector4i c = GSVector4i(vertices[0].c);
GSVector4i c = GSVector4i(vertex.c);
c = c.upl16(c.zwxy());
@ -271,7 +271,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
if(sel.fwrite && sel.fge)
{
@ -300,7 +300,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s);
GSVector4i v = vt.yyyy();
if(!sel.sprite || sel.mmin)
if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
{
v += GSVector4i::cast(m_local.d[skip].t);
}
@ -354,7 +354,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{
za = fza_base->y + fza_offset->y;
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
GSVector4 z = scan.p.zzzz() + zo;
@ -754,7 +754,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
{
uf = u.xxzzlh().srl16(1);
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
vf = v.xxzzlh().srl16(1);
}
@ -936,7 +936,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
if(sel.fwrite && sel.fge)
{
GSVector4i fog = !sel.sprite ? f : m_local.p.f;
GSVector4i fog = sel.prim != GS_SPRITE_CLASS ? f : m_local.p.f;
rb = m_global.frb.lerp16<0>(rb, fog);
ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga);
@ -1211,7 +1211,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
fza_offset++;
if(!sel.sprite)
if(sel.prim != GS_SPRITE_CLASS)
{
if(sel.zb)
{
@ -1234,7 +1234,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx());
if(!sel.sprite || sel.mmin)
if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
{
t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy());
}

View File

@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
lea(edi, ptr[ebx * 2]);
add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{
// edx = &m_local.d[skip]
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
mov(ebx, ptr[esp + _v]);
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
{
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
}
@ -455,7 +455,7 @@ void GSDrawScanlineCodeGenerator::Step()
add(edi, 8);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// z += m_local.d4.z;
@ -501,7 +501,7 @@ void GSDrawScanlineCodeGenerator::Step()
vpaddd(xmm2, ptr[&m_local.temp.s]);
vmovdqa(ptr[&m_local.temp.s], xmm2);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
vpaddd(xmm3, ptr[&m_local.temp.t]);
@ -597,7 +597,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = zi;
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.zoverflow)
{
@ -733,7 +733,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpsrlw(xmm0, 1);
vmovdqa(ptr[&m_local.temp.uf], xmm0);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4i vf = v.xxzzlh().srl16(1);
@ -2227,7 +2227,7 @@ void GSDrawScanlineCodeGenerator::Fog()
// rb = m_local.gd->frb.lerp16<0>(rb, f);
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
vmovdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]);
vmovdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
vmovdqa(xmm1, xmm6);
vmovdqa(xmm2, ptr[&m_local.gd->frb]);
@ -2350,7 +2350,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
bool fast = m_sel.ztest && m_sel.zpsm < 2;
vmovdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]);
vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
if(fast)
{

View File

@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
lea(edi, ptr[ebx * 2]);
add(edi, ptr[&m_local.gd->fzbc]);
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
{
// edx = &m_local.d[skip]
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
mov(ebx, ptr[esp + _v]);
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
{
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
}
@ -458,7 +458,7 @@ void GSDrawScanlineCodeGenerator::Step()
add(edi, 8);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// z += m_local.d4.z;
@ -504,7 +504,7 @@ void GSDrawScanlineCodeGenerator::Step()
paddd(xmm2, ptr[&m_local.temp.s]);
movdqa(ptr[&m_local.temp.s], xmm2);
if(!m_sel.sprite || m_sel.mmin)
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
{
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
paddd(xmm3, ptr[&m_local.temp.t]);
@ -602,7 +602,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
// GSVector4i zs = zi;
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
if(m_sel.zoverflow)
{
@ -738,7 +738,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
psrlw(xmm0, 1);
movdqa(ptr[&m_local.temp.uf], xmm0);
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4i vf = v.xxzzlh().srl16(1);
@ -2341,7 +2341,7 @@ void GSDrawScanlineCodeGenerator::Fog()
// rb = m_local.gd->frb.lerp16<0>(rb, f);
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
movdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]);
movdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
movdqa(xmm1, xmm6);
movdqa(xmm2, ptr[&m_local.gd->frb]);
@ -2464,7 +2464,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
bool fast = m_sel.ztest && m_sel.zpsm < 2;
movdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]);
movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
if(fast)
{

View File

@ -24,10 +24,7 @@
GSDump::GSDump()
: m_gs(NULL)
, m_obj(NULL)
, m_frames(0)
, m_objects(0)
, m_vertices(0)
{
}
@ -39,11 +36,8 @@ GSDump::~GSDump()
void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs)
{
m_gs = fopen((fn + ".gs").c_str(), "wb");
m_obj = fopen((fn + ".obj").c_str(), "wt");
m_frames = 0;
m_objects = 0;
m_vertices = 0;
if(m_gs)
{
@ -57,7 +51,6 @@ void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GS
void GSDump::Close()
{
if(m_gs) {fclose(m_gs); m_gs = NULL;}
if(m_obj) {fclose(m_obj); m_obj = NULL;}
}
void GSDump::Transfer(int index, const uint8* mem, size_t size)
@ -96,67 +89,3 @@ void GSDump::VSync(int field, bool last, const GSPrivRegSet* regs)
}
}
}
void GSDump::Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass)
{
if(m_obj)
{
switch(primclass)
{
case GS_POINT_CLASS:
// TODO
break;
case GS_LINE_CLASS:
// TODO
break;
case GS_TRIANGLE_CLASS:
for(int i = 0; i < count; i++)
{
float x = vertices[i].p.x;
float y = vertices[i].p.y;
float z = vertices[i].p.z;
fprintf(m_obj, "v %f %f %f\n", x, y, z);
}
for(int i = 0; i < count; i++)
{
fprintf(m_obj, "vt %f %f %f\n", vertices[i].t.x, vertices[i].t.y, vertices[i].t.z);
}
for(int i = 0; i < count; i++)
{
fprintf(m_obj, "vn %f %f %f\n", 0.0f, 0.0f, 0.0f);
}
fprintf(m_obj, "g f%d_o%d_p%d_v%d\n", m_frames, m_objects, primclass, count);
for(int i = 0; i < count; i += 3)
{
int a = m_vertices + i + 1;
int b = m_vertices + i + 2;
int c = m_vertices + i + 3;
fprintf(m_obj, "f %d/%d/%d %d/%d/%d %d/%d/%d\n", a, a, a, b, b, b, c, c, c);
}
m_vertices += count;
m_objects++;
break;
case GS_SPRITE_CLASS:
// TODO
break;
}
}
}

View File

@ -46,10 +46,7 @@ Regs data (id == 3)
class GSDump
{
FILE* m_gs;
FILE* m_obj;
int m_frames;
int m_objects;
int m_vertices;
public:
GSDump();
@ -60,6 +57,5 @@ public:
void ReadFIFO(uint32 size);
void Transfer(int index, const uint8* mem, size_t size);
void VSync(int field, bool last, const GSPrivRegSet* regs);
void Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass);
operator bool() {return m_gs != NULL;}
};

View File

@ -1992,7 +1992,7 @@ GSOffset::~GSOffset()
{
}
vector<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
uint32* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
{
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
@ -2000,23 +2000,37 @@ vector<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
if(bbox != NULL) *bbox = r;
vector<uint32>* pages = new vector<uint32>();
// worst case:
// bp page-aligned: (w * h) / (64 * 32)
// bp block-aligned: (w * h) / (8 * 8)
// 32-bpp worst case: (w * h) / (64 * 32), it can be a bit more if we are only block-aligned (bp & 31) != 0
int size = r.width() * r.height();
pages->reserve(((r.width() * r.height()) >> 11) + 2);
int limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
uint32 tmp[16];
uint32* pages = new uint32[limit];
memset(tmp, 0, sizeof(tmp));
__aligned(uint32, 16) tmp[16];
((GSVector4i*)tmp)[0] = GSVector4i::zero();
((GSVector4i*)tmp)[1] = GSVector4i::zero();
((GSVector4i*)tmp)[2] = GSVector4i::zero();
((GSVector4i*)tmp)[3] = GSVector4i::zero();
r = r.sra32(3);
bs.x >>= 3;
bs.y >>= 3;
uint32* RESTRICT p = pages;
for(int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = block.row[y >> 3];
uint32 base = block.row[y];
for(int x = r.left; x < r.right; x += bs.x)
{
uint32 n = (base + block.col[x >> 3]) >> 5;
uint32 n = (base + block.col[x]) >> 5;
if(n < MAX_PAGES)
{
@ -2027,11 +2041,15 @@ vector<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
{
row |= col;
pages->push_back(n);
*p++ = n;
}
}
}
}
*p++ = EOP;
ASSERT(p - pages <= limit);
return pages;
}

View File

@ -51,7 +51,9 @@ public:
GSOffset(uint32 bp, uint32 bw, uint32 psm);
virtual ~GSOffset();
vector<uint32>* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
enum {EOP = 0xffffffff};
uint32* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
};
struct GSPixelOffset4

View File

@ -35,7 +35,7 @@ public:
enum counter_t
{
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad,
Frame, Prim, PrimNotRendered, Draw, Swizzle, Unswizzle, Fillrate, Quad,
CounterLast,
};

View File

@ -105,12 +105,17 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
{
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
if(data->count == 0) return;
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
m_ds->BeginDraw(data->param);
const GSVertexSW* vertices = data->vertices;
const GSVertexSW* vertices_end = data->vertices + data->count;
const GSVertexSW* vertex = data->vertex;
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
const uint32* index = data->index;
const uint32* index_end = data->index + data->index_count;
uint32 tmp_index[] = {0, 1, 2};
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
@ -128,33 +133,57 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
if(scissor_test)
{
DrawPoint<true>(vertices, data->count);
DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
}
else
{
DrawPoint<false>(vertices, data->count);
DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
}
break;
case GS_LINE_CLASS:
do {DrawLine(vertices); vertices += 2;}
while(vertices < vertices_end);
if(index != NULL)
{
do {DrawLine(vertex, index); index += 2;}
while(index < index_end);
}
else
{
do {DrawLine(vertex, tmp_index); vertex += 2;}
while(vertex < vertex_end);
}
break;
case GS_TRIANGLE_CLASS:
do {DrawTriangle(vertices); vertices += 3;}
while(vertices < vertices_end);
if(index != NULL)
{
do {DrawTriangle(vertex, index); index += 3;}
while(index < index_end);
}
else
{
do {DrawTriangle(vertex, tmp_index); vertex += 3;}
while(vertex < vertex_end);
}
break;
case GS_SPRITE_CLASS:
do {DrawSprite(vertices, data->solidrect); vertices += 2;}
while(vertices < vertices_end);
if(index != NULL)
{
do {DrawSprite(vertex, index, data->solidrect); index += 2;}
while(index < index_end);
}
else
{
do {DrawSprite(vertex, tmp_index, data->solidrect); vertex += 2;}
while(vertex < vertex_end);
}
break;
@ -171,11 +200,13 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
}
template<bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
{
for(; count > 0; count--, v++)
for(int i = 0, count = index != NULL ? index_count : vertex_count; i < count; i++)
{
GSVector4i p(v->p);
const GSVertexSW& v = vertex[index != NULL ? index[i] : i];
GSVector4i p(v.p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
@ -183,17 +214,20 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
{
m_pixels++;
m_ds->SetupPrim(v, *v);
m_ds->SetupPrim(v, v);
m_ds->DrawScanline(1, p.x, p.y, *v);
m_ds->DrawScanline(1, p.x, p.y, v);
}
}
}
}
void GSRasterizer::DrawLine(const GSVertexSW* v)
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
{
GSVertexSW dv = v[1] - v[0];
const GSVertexSW& v0 = vertex[index[0]];
const GSVertexSW& v1 = vertex[index[1]];
GSVertexSW dv = v1 - v0;
GSVector4 dp = dv.p.abs();
@ -201,10 +235,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(m_ds->HasEdge())
{
DrawEdge(v[0], v[1], dv, i, 0);
DrawEdge(v[0], v[1], dv, i, 1);
DrawEdge(v0, v1, dv, i, 0);
DrawEdge(v0, v1, dv, i, 1);
Flush(v, GSVertexSW::zero(), true);
Flush(v1, GSVertexSW::zero(), true);
return;
}
@ -217,19 +251,19 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
{
// shortcut for horizontal lines
GSVector4 mask = (v[0].p > v[1].p).xxxx();
GSVector4 mask = (v0.p > v1.p).xxxx();
GSVertexSW scan;
scan.p = v[0].p.blend32(v[1].p, mask);
scan.t = v[0].t.blend32(v[1].t, mask);
scan.c = v[0].c.blend32(v[1].c, mask);
scan.p = v0.p.blend32(v1.p, mask);
scan.t = v0.t.blend32(v1.t, mask);
scan.c = v0.c.blend32(v1.c, mask);
GSVector4i p(scan.p);
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
{
GSVector4 lrf = scan.p.upl(v[1].p.blend32(v[0].p, mask)).ceil();
GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil();
GSVector4 l = lrf.max(m_fscissor_x);
GSVector4 r = lrf.min(m_fscissor_x);
GSVector4i lr = GSVector4i(l.xxyy(r));
@ -247,7 +281,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
scan += dscan * (l - scan.p).xxxx();
m_ds->SetupPrim(v, dscan);
m_ds->SetupPrim(v1, dscan);
m_ds->DrawScanline(pixels, left, p.y, scan);
}
@ -261,7 +295,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
if(steps > 0)
{
GSVertexSW edge = v[0];
GSVertexSW edge = v0;
GSVertexSW dedge = dv / GSVector4(dp.v[i]);
GSVertexSW* RESTRICT e = m_edge.buff;
@ -287,7 +321,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
m_edge.count = e - m_edge.buff;
Flush(v, GSVertexSW::zero());
Flush(v1, GSVertexSW::zero());
}
}
@ -303,42 +337,47 @@ static const uint8 s_ysort[8][4] =
{2, 1, 0, 0}, // y2 < y1 < y0
};
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
{
GSVertexSW v[3];
GSVertexSW dv[3];
GSVertexSW edge;
GSVertexSW dedge;
GSVertexSW dscan;
GSVector4 y0011 = vertices[0].p.yyyy(vertices[1].p);
GSVector4 y1221 = vertices[1].p.yyyy(vertices[2].p).xzzx();
GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p);
GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx();
int mask = (y0011 > y1221).mask() & 7;
int m1 = (y0011 > y1221).mask() & 7;
v[0] = vertices[s_ysort[mask][0]];
v[1] = vertices[s_ysort[mask][1]];
v[2] = vertices[s_ysort[mask][2]];
int i[3];
y0011 = v[0].p.yyyy(v[1].p);
y1221 = v[1].p.yyyy(v[2].p).xzzx();
i[0] = index[s_ysort[m1][0]];
i[1] = index[s_ysort[m1][1]];
i[2] = index[s_ysort[m1][2]];
int i = (y0011 == y1221).mask() & 7;
const GSVertexSW& v0 = vertex[i[0]];
const GSVertexSW& v1 = vertex[i[1]];
const GSVertexSW& v2 = vertex[i[2]];
y0011 = v0.p.yyyy(v1.p);
y1221 = v1.p.yyyy(v2.p).xzzx();
m1 = (y0011 == y1221).mask() & 7;
// if(i == 0) => y0 < y1 < y2
// if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2
if(i == 7) return; // y0 == y1 == y2
if(m1 == 7) return; // y0 == y1 == y2
GSVector4 tbf = y0011.xzxz(y1221).ceil();
GSVector4 tbmax = tbf.max(m_fscissor_y);
GSVector4 tbmin = tbf.min(m_fscissor_y);
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
dv[0] = v[1] - v[0];
dv[1] = v[2] - v[0];
dv[2] = v[2] - v[1];
dv[0] = v1 - v0;
dv[1] = v2 - v0;
dv[2] = v2 - v1;
GSVector4 cross = dv[0].p * dv[1].p.yxwz();
@ -346,11 +385,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
// the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value
int j = cross.upl(cross == GSVector4::zero()).mask();
int m2 = cross.upl(cross == GSVector4::zero()).mask();
if(j & 2) return;
if(m2 & 2) return;
j &= 1;
m2 &= 1;
cross = cross.rcpnr();
@ -390,42 +429,42 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
if(i & 1)
if(m1 & 1)
{
if(tb.y < tb.w)
{
edge = v[1 - j];
edge = vertex[i[1 - m2]];
edge.p = edge.p.insert<0, 1>(v[j].p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
edge.p = edge.p.insert<0, 1>(vertex[i[m2]].p);
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
}
}
else
{
if(tb.x < tb.z)
{
edge = v[0];
edge = v0;
edge.p = edge.p.xxzw();
dedge.p = ddx[j].xyzw(dedge.p);
dedge.p = ddx[m2].xyzw(dedge.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v[0].p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if(tb.y < tb.w)
{
edge = v[1];
edge = v1;
edge.p = (v[0].p.xxxx() + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p);
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
}
}
Flush(v, dscan);
Flush(vertex[index[2]], dscan);
if(m_ds->HasEdge())
{
@ -433,14 +472,14 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 b = dx < GSVector4::zero(); // dx < 0
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
int i = a.mask();
int j = ((a | b) ^ c).mask() ^ 2; // evil
int orientation = a.mask();
int side = ((a | b) ^ c).mask() ^ 2; // evil
DrawEdge(v[0], v[1], dv[0], i & 1, j & 1);
DrawEdge(v[0], v[2], dv[1], i & 2, j & 2);
DrawEdge(v[1], v[2], dv[2], i & 4, j & 4);
DrawEdge(v0, v1, dv[0], orientation & 1, side & 1);
DrawEdge(v0, v2, dv[1], orientation & 2, side & 2);
DrawEdge(v1, v2, dv[2], orientation & 4, side & 4);
Flush(v, GSVertexSW::zero(), true);
Flush(vertex[index[2]], GSVertexSW::zero(), true);
}
}
@ -492,18 +531,21 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
m_edge.count += e - &m_edge.buff[m_edge.count];
}
void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect)
{
const GSVertexSW& v0 = vertex[index[0]];
const GSVertexSW& v1 = vertex[index[1]];
GSVector4 mask = (v0.p < v1.p).xyzw(GSVector4::zero());
GSVertexSW v[2];
GSVector4 mask = (vertices[0].p < vertices[1].p).xyzw(GSVector4::zero());
v[0].p = v1.p.blend32(v0.p, mask);
v[0].t = v1.t.blend32(v0.t, mask);
v[0].c = v1.c;
v[0].p = vertices[1].p.blend32(vertices[0].p, mask);
v[0].t = vertices[1].t.blend32(vertices[0].t, mask);
v[0].c = vertices[1].c;
v[1].p = vertices[0].p.blend32(vertices[1].p, mask);
v[1].t = vertices[0].t.blend32(vertices[1].t, mask);
v[1].p = v0.p.blend32(v1.p, mask);
v[1].t = v0.t.blend32(v1.t, mask);
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
@ -515,17 +557,6 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
if(solidrect)
{
/*
if(m_id == 0)
{
m_ds->DrawRect(r, scan);
m_pixels += r.width() * r.height();
}
return;
*/
if(m_threads == 1)
{
m_ds->DrawRect(r, scan);
@ -570,7 +601,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
m_ds->SetupPrim(v, dscan);
m_ds->SetupPrim(v1, dscan);
while(1)
{
@ -787,7 +818,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
e->p.i16[2] = (int16)top;
}
void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge)
void GSRasterizer::Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge)
{
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
@ -795,7 +826,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
if(count > 0)
{
m_ds->SetupPrim(vertices, dscan);
m_ds->SetupPrim(vertex, dscan);
const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count;
@ -882,19 +913,7 @@ void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
{
m_solidrect_count++;
}
/*
if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch?
{
for(size_t i = 0; i < m_workers.size(); i++)
{
m_workers[i]->Wait();
}
m_workers.front()->Process(item);
return;
}
*/
if(item->syncpoint)
{
for(size_t i = 0; i < m_workers.size(); i++)

View File

@ -34,8 +34,11 @@ public:
GSVector4i scissor;
GSVector4i bbox;
GS_PRIM_CLASS primclass;
GSVertexSW* vertices;
int count;
uint8* buff;
GSVertexSW* vertex;
int vertex_count;
uint32* index;
int index_count;
bool solidrect;
bool syncpoint;
uint64 frame;
@ -50,8 +53,11 @@ public:
: scissor(GSVector4i::zero())
, bbox(GSVector4i::zero())
, primclass(GS_INVALID_CLASS)
, vertices(NULL)
, count(0)
, buff(NULL)
, vertex(NULL)
, vertex_count(0)
, index(NULL)
, index_count(0)
, solidrect(false)
, syncpoint(false)
, frame(0)
@ -63,7 +69,7 @@ public:
virtual ~GSRasterizerData()
{
if(vertices != NULL) _aligned_free(vertices);
if(buff != NULL) _aligned_free(buff);
// derived class should free param and its members
}
@ -72,7 +78,7 @@ public:
class IDrawScanline : public GSAlignedClass<32>
{
public:
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW& vertex, const GSVertexSW& dscan);
typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
@ -91,14 +97,14 @@ public:
#ifdef ENABLE_JIT_RASTERIZER
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
__forceinline void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) {m_sp(vertex, dscan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
#else
virtual void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) = 0;
virtual void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) = 0;
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
@ -134,17 +140,17 @@ protected:
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
template<bool scissor_test>
void DrawPoint(const GSVertexSW* v, int count);
void DrawLine(const GSVertexSW* v);
void DrawTriangle(const GSVertexSW* v);
void DrawSprite(const GSVertexSW* v, bool solidrect);
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count);
void DrawLine(const GSVertexSW* vertex, const uint32* index);
void DrawTriangle(const GSVertexSW* vertex, const uint32* index);
void DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false);
__forceinline void Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge = false);
public:
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);

View File

@ -22,9 +22,8 @@
#include "stdafx.h"
#include "GSRenderer.h"
GSRenderer::GSRenderer()
: GSState()
, m_vt(this)
GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
: GSState(vt, vertex_stride)
, m_dev(NULL)
, m_shader(0)
{
@ -78,8 +77,6 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
void GSRenderer::ResetDevice()
{
ResetPrim();
if(m_dev) m_dev->Reset(1, 1);
}
@ -336,7 +333,7 @@ void GSRenderer::VSync(int field)
theApp.m_gs_interlace[m_interlace].name.c_str(),
theApp.m_gs_aspectratio[m_aspectratio].name.c_str(),
(int)m_perfmon.Get(GSPerfMon::Quad),
(int)m_perfmon.Get(GSPerfMon::Prim),
(int)(m_perfmon.Get(GSPerfMon::Prim) - m_perfmon.Get(GSPerfMon::PrimNotRendered)),
(int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(),
m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
@ -541,308 +538,3 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
#endif
}
}
void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
{
int tw = TEX0.TW;
int th = TEX0.TH;
int w = 1 << tw;
int h = 1 << th;
GSVector4i tr(0, 0, w, h);
int wms = CLAMP.WMS;
int wmt = CLAMP.WMT;
int minu = (int)CLAMP.MINU;
int minv = (int)CLAMP.MINV;
int maxu = (int)CLAMP.MAXU;
int maxv = (int)CLAMP.MAXV;
GSVector4i vr = tr;
switch(wms)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.x < minu) vr.x = minu;
if(vr.z > maxu + 1) vr.z = maxu + 1;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.y < minv) vr.y = minv;
if(vr.w > maxv + 1) vr.w = maxv + 1;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
}
if(wms + wmt < 6)
{
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if(linear)
{
st += GSVector4(-0x8000, 0x8000).xxyy();
}
GSVector4i uv = GSVector4i(st).sra32(16);
GSVector4i u, v;
int mask = 0;
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
{
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
uv = uv.rintersect(tr);
switch(wms)
{
case CLAMP_REPEAT:
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
}
r = vr.rintersect(tr);
}
void GSRenderer::GetAlphaMinMax()
{
if(m_vt.m_alpha.valid)
{
return;
}
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
if(PRIM->TME && context->TEX0.TCC)
{
switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
{
case 0:
a.y = 0;
a.w = 0xff;
break;
case 1:
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
a.w = env.TEXA.TA0;
break;
case 2:
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
break;
case 3:
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
break;
default:
__assume(0);
}
switch(context->TEX0.TFX)
{
case TFX_MODULATE:
a.x = (a.x * a.y) >> 7;
a.z = (a.z * a.w) >> 7;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_DECAL:
a.x = a.y;
a.z = a.w;
break;
case TFX_HIGHLIGHT:
a.x = a.x + a.y;
a.z = a.z + a.w;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_HIGHLIGHT2:
a.x = a.y;
a.z = a.w;
break;
default:
__assume(0);
}
}
m_vt.m_alpha.min = a.x;
m_vt.m_alpha.max = a.z;
m_vt.m_alpha.valid = true;
}
bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm)
{
const GSDrawingContext* context = m_context;
bool pass = true;
if(context->TEST.ATST == ATST_NEVER)
{
pass = false;
}
else if(context->TEST.ATST != ATST_ALWAYS)
{
GetAlphaMinMax();
int amin = m_vt.m_alpha.min;
int amax = m_vt.m_alpha.max;
int aref = context->TEST.AREF;
switch(context->TEST.ATST)
{
case ATST_NEVER:
pass = false;
break;
case ATST_ALWAYS:
pass = true;
break;
case ATST_LESS:
if(amax < aref) pass = true;
else if(amin >= aref) pass = false;
else return false;
break;
case ATST_LEQUAL:
if(amax <= aref) pass = true;
else if(amin > aref) pass = false;
else return false;
break;
case ATST_EQUAL:
if(amin == aref && amax == aref) pass = true;
else if(amin > aref || amax < aref) pass = false;
else return false;
break;
case ATST_GEQUAL:
if(amin >= aref) pass = true;
else if(amax < aref) pass = false;
else return false;
break;
case ATST_GREATER:
if(amin > aref) pass = true;
else if(amax <= aref) pass = false;
else return false;
break;
case ATST_NOTEQUAL:
if(amin == aref && amax == aref) pass = false;
else if(amin > aref || amax < aref) pass = true;
else return false;
break;
default:
__assume(0);
}
}
if(!pass)
{
switch(context->TEST.AFAIL)
{
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
default: __assume(0);
}
}
return true;
}
bool GSRenderer::IsOpaque()
{
if(PRIM->AA1)
{
return false;
}
if(!PRIM->ABE)
{
return true;
}
const GSDrawingContext* context = m_context;
int amin = 0, amax = 0xff;
if(context->ALPHA.A != context->ALPHA.B)
{
if(context->ALPHA.C == 0)
{
GetAlphaMinMax();
amin = m_vt.m_alpha.min;
amax = m_vt.m_alpha.max;
}
else if(context->ALPHA.C == 1)
{
if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
{
amin = amax = 0x80;
}
}
else if(context->ALPHA.C == 2)
{
amin = amax = context->ALPHA.FIX;
}
}
return context->ALPHA.IsOpaque(amin, amax);
}

View File

@ -24,8 +24,6 @@
#include "GSdx.h"
#include "GSWnd.h"
#include "GSState.h"
#include "GSVertexTrace.h"
#include "GSVertexList.h"
#include "GSCapture.h"
class GSRenderer : public GSState
@ -49,15 +47,6 @@ protected:
virtual GSTexture* GetOutput(int i) = 0;
GSVertexTrace m_vt;
// following functions need m_vt to be initialized
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
bool IsOpaque();
public:
GSWnd m_wnd;
GSDevice* m_dev;
@ -67,10 +56,9 @@ public:
bool s_save;
bool s_savez;
int s_saven;
GSCritSec s_lock;
public:
GSRenderer();
GSRenderer(GSVertexTrace* vt, size_t vertex_stride);
virtual ~GSRenderer();
virtual bool CreateWnd(const string& title, int w, int h);
@ -93,157 +81,4 @@ public:
GSCritSec m_pGSsetTitle_Crit;
char m_GStitleInfoBuffer[128];
};
template<class Vertex> class GSRendererT : public GSRenderer
{
protected:
Vertex* m_vertices;
int m_count;
int m_maxcount;
GSVertexList<Vertex> m_vl;
void Reset()
{
m_count = 0;
m_vl.RemoveAll();
GSRenderer::Reset();
}
void ResetPrim()
{
m_vl.RemoveAll();
}
void FlushPrim()
{
if(m_count == 0) return;
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
{
// FIXME: berserk fpsm = 27 (8H)
if(!m_dev->IsLost())
{
m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM));
Draw();
}
m_perfmon.Put(GSPerfMon::Draw, 1);
}
m_count = 0;
}
void GrowVertexBuffer()
{
int maxcount = std::max<int>(m_maxcount * 3 / 2, 10000);
Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * maxcount, 16);
if(m_vertices != NULL)
{
memcpy(vertices, m_vertices, sizeof(Vertex) * m_maxcount);
_aligned_free(m_vertices);
}
m_vertices = vertices;
m_maxcount = maxcount - 100;
}
// Returns a pointer to the drawing vertex. Can return NULL!
template<uint32 prim> __forceinline Vertex* DrawingKick(bool skip, int& count)
{
switch(prim)
{
case GS_POINTLIST: count = 1; break;
case GS_LINELIST: count = 2; break;
case GS_LINESTRIP: count = 2; break;
case GS_TRIANGLELIST: count = 3; break;
case GS_TRIANGLESTRIP: count = 3; break;
case GS_TRIANGLEFAN: count = 3; break;
case GS_SPRITE: count = 2; break;
case GS_INVALID: count = 1; break;
default: __assume(0);
}
if(m_vl.GetCount() < count)
{
return NULL;
}
if(m_count >= m_maxcount)
{
GrowVertexBuffer();
}
Vertex* v = &m_vertices[m_count];
switch(prim)
{
case GS_POINTLIST:
m_vl.GetAt(0, v[0]);
m_vl.RemoveAll();
break;
case GS_LINELIST:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
case GS_LINESTRIP:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAt(0, 1);
break;
case GS_TRIANGLELIST:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAll();
break;
case GS_TRIANGLESTRIP:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAt(0, 2);
break;
case GS_TRIANGLEFAN:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.GetAt(2, v[2]);
m_vl.RemoveAt(1, 1);
break;
case GS_SPRITE:
m_vl.GetAt(0, v[0]);
m_vl.GetAt(1, v[1]);
m_vl.RemoveAll();
break;
case GS_INVALID:
ASSERT(0);
m_vl.RemoveAll();
return NULL;
default:
__assume(0);
}
return !skip ? v : NULL;
}
virtual void Draw() = 0;
public:
GSRendererT()
: GSRenderer()
, m_vertices(NULL)
, m_count(0)
, m_maxcount(0)
{
}
virtual ~GSRendererT()
{
if(m_vertices) _aligned_free(m_vertices);
}
};
};

View File

@ -21,3 +21,411 @@
#include "stdafx.h"
#include "GSRendererDX.h"
#include "GSDeviceDX.h"
GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter)
: GSRendererHW(vt, vertex_stride, tc)
, m_pixelcenter(pixelcenter)
, m_topology(-1)
{
m_logz = !!theApp.GetConfig("logz", 0);
m_fba = !!theApp.GetConfig("fba", 1);
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
}
GSRendererDX::~GSRendererDX()
{
}
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
GSTexture* rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
if(DATE)
{
if(dev->HasStencil())
{
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
// I'll use VertexTrace when I consider it more trustworthy
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
dev->BeginScene();
// om
GSDeviceDX::OMDepthStencilSelector om_dssel;
if(context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
}
else
{
om_dssel.ztst = ZTST_ALWAYS;
}
if(m_fba)
{
om_dssel.fba = context->FBA.FBA;
}
GSDeviceDX::OMBlendSelector om_bsel;
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
om_bsel.abe = 0;
}
else
{
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
//ASSERT(0);
}
}
}
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
// vs
GSDeviceDX::VSSelector vs_sel;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt->m_max.p.z > 0xffffff)
{
ASSERT(m_vt->m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt->m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt->m_max.p.z > 0xffff)
{
ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt->m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
GSDeviceDX::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
//
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// gs
GSDeviceDX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt->m_primclass;
// ps
GSDeviceDX::PSSelector ps_sel;
GSDeviceDX::PSSamplerSelector ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb;
if(DATE)
{
if(dev->HasStencil())
{
om_dssel.date = 1;
}
else
{
ps_sel.date = 1 + context->TEST.DATM;
}
}
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
ps_sel.colclip = 1;
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
{
ps_sel.atst = context->TEST.ATST;
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
}
else
{
ps_sel.atst = ATST_ALWAYS;
}
if(tex)
{
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_fmt;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << context->TEX0.TW);
int th = (int)(1 << context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = ps_sel.ltf;
}
else
{
ps_sel.tfx = 4;
}
// rs
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertex.buff, m_vertex.tail, m_index.buff, m_index.tail, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
if(context->TEST.DoFirstPass())
{
dev->DrawIndexedPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawIndexedPrimitive();
}
}
if(context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
ps_sel.atst = iatst[ps_sel.atst];
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
bool g = om_bsel.wg;
bool b = om_bsel.wb;
bool a = om_bsel.wa;
switch(context->TEST.AFAIL)
{
case 0: z = r = g = b = a = false; break; // none
case 1: z = false; break; // rgba
case 2: r = g = b = a = false; break; // z
case 3: z = a = false; break; // rgb
default: __assume(0);
}
if(z || r || g || b || a)
{
om_dssel.zwe = z;
om_bsel.wr = r;
om_bsel.wg = g;
om_bsel.wb = b;
om_bsel.wa = a;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->DrawIndexedPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawIndexedPrimitive();
}
}
}
dev->EndScene();
dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}

View File

@ -23,8 +23,7 @@
#include "GSRendererHW.h"
template<class Vertex>
class GSRendererDX : public GSRendererHW<Vertex>
class GSRendererDX : public GSRendererHW
{
GSVector2 m_pixelcenter;
bool m_logz;
@ -35,413 +34,11 @@ class GSRendererDX : public GSRendererHW<Vertex>
protected:
int m_topology;
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
virtual void UpdateFBA(GSTexture* rt) {}
public:
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0))
: GSRendererHW<Vertex>(tc)
, m_pixelcenter(pixelcenter)
, m_topology(-1)
{
m_logz = !!theApp.GetConfig("logz", 0);
m_fba = !!theApp.GetConfig("fba", 1);
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
}
GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
virtual ~GSRendererDX();
virtual ~GSRendererDX()
{
}
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
const GSVector2i& rtsize = rt->GetSize();
const GSVector2& rtscale = rt->GetScale();
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
GSTexture *rtcopy = NULL;
ASSERT(m_dev != NULL);
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
if(DATE)
{
if(dev->HasStencil())
{
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
}
else
{
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
// I'll use VertexTrace when I consider it more trustworthy
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
}
}
//
dev->BeginScene();
// om
GSDeviceDX::OMDepthStencilSelector om_dssel;
if(context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
}
else
{
om_dssel.ztst = ZTST_ALWAYS;
}
if(m_fba)
{
om_dssel.fba = context->FBA.FBA;
}
GSDeviceDX::OMBlendSelector om_bsel;
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
om_bsel.abe = 0;
}
else
{
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
//ASSERT(0);
}
}
}
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
// vs
GSDeviceDX::VSSelector vs_sel;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy;
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt.m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt.m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
GSDeviceDX::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)context->XYOFFSET.OFX;
float oy = (float)(int)context->XYOFFSET.OFY;
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
//because DX10 and DX9 have a different pixel center.)
//
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
{
// DX9 has pixelcenter set to 0.0, so give it some value here
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
}
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
// gs
GSDeviceDX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt.m_primclass;
// ps
GSDeviceDX::PSSelector ps_sel;
GSDeviceDX::PSSamplerSelector ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb;
if(DATE)
{
if(dev->HasStencil())
{
om_dssel.date = 1;
}
else
{
ps_sel.date = 1 + context->TEST.DATM;
}
}
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
ps_sel.colclip = 1;
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
{
ps_sel.atst = context->TEST.ATST;
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
}
else
{
ps_sel.atst = ATST_ALWAYS;
}
if(tex)
{
ps_sel.wms = context->CLAMP.WMS;
ps_sel.wmt = context->CLAMP.WMT;
ps_sel.fmt = tex->m_fmt;
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << context->TEX0.TW);
int th = (int)(1 << context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = ps_sel.ltf;
}
else
{
ps_sel.tfx = 4;
}
// rs
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
dev->OMSetRenderTargets(rt, ds, &scissor);
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
dev->PSSetShaderResource(2, rtcopy);
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertices, m_count, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
// draw
if(context->TEST.DoFirstPass())
{
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
if(context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
ps_sel.atst = iatst[ps_sel.atst];
switch(ps_sel.atst)
{
case ATST_LESS:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
break;
case ATST_GREATER:
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
break;
default:
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
break;
}
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;
bool g = om_bsel.wg;
bool b = om_bsel.wb;
bool a = om_bsel.wa;
switch(context->TEST.AFAIL)
{
case 0: z = r = g = b = a = false; break; // none
case 1: z = false; break; // rgba
case 2: r = g = b = a = false; break; // z
case 3: z = a = false; break; // rgb
default: __assume(0);
}
if(z || r || g || b || a)
{
om_dssel.zwe = z;
om_bsel.wr = r;
om_bsel.wg = g;
om_bsel.wb = b;
om_bsel.wa = a;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->DrawPrimitive();
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel);
om_bselneg.negative = 1;
ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
dev->DrawPrimitive();
}
}
}
dev->EndScene();
dev->Recycle(rtcopy);
if(om_dssel.fba) UpdateFBA(rt);
}
};

View File

@ -25,9 +25,9 @@
#include "resource.h"
GSRendererDX11::GSRendererDX11()
: GSRendererDX<GSVertexHW11>(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
: GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
{
InitVertexKick(GSRendererDX11);
InitConvertVertex(GSRendererDX11);
}
bool GSRendererDX11::CreateDevice(GSDevice* dev)
@ -38,202 +38,49 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
return true;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX11::VertexKick(bool skip)
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX11::ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index)
{
GSVertexHW11& dst = m_vl.AddTail();
dst = *(GSVertexHW11*)&m_v;
#ifdef ENABLE_UPSCALE_HACKS
GSVector4i v0(m_v.m[0]);
GSVector4i v1(m_v.m[1]);
if(tme && fst)
{
//GSVector4::storel(&dst.ST, m_v.GetUV());
// TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed
int Udiff = 0;
int Vdiff = 0;
int Uadjust = 0;
int Vadjust = 0;
int multiplier = GetUpscaleMultiplier();
if(multiplier > 1)
{
Udiff = m_v.UV.U & 4095;
Vdiff = m_v.UV.V & 4095;
if(Udiff != 0)
{
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
}
Udiff = m_v.UV.U & 255;
Vdiff = m_v.UV.V & 255;
if(Udiff != 0)
{
if (Udiff >= 248) { Uadjust = -1; }
else if (Udiff <= 8) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 248) { Vadjust = -1; }
else if (Vdiff <= 8) { Vadjust = 1; }
}
Udiff = m_v.UV.U & 15;
Vdiff = m_v.UV.V & 15;
if(Udiff != 0)
{
if (Udiff >= 15) { Uadjust = -1; }
else if (Udiff <= 1) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 15) { Vadjust = -1; }
else if (Vdiff <= 1) { Vadjust = 1; }
}
}
dst.ST.S = (float)m_v.UV.U - Uadjust;
dst.ST.T = (float)m_v.UV.V - Vadjust;
}
else if(tme)
{
// Wip :p
//dst.XYZ.X += 5;
//dst.XYZ.Y += 5;
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
}
#else
GSVector4i* RESTRICT dst = (GSVector4i*)&vertex[index];
if(tme && fst)
{
GSVector4::storel(&dst.ST, m_v.GetUV());
}
#endif
int count = 0;
if(GSVertexHW11* v = DrawingKick<prim>(skip, count))
{
GSVector4i scissor = m_context->scissor.dx10;
GSVector4i pmin, pmax;
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2;
switch(prim)
{
case GS_POINTLIST:
v0 = GSVector4i::load((int)v[0].p.xy).upl16();
pmin = v0;
pmax = v0;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
pmin = v0.min_u16(v1).upl16();
pmax = v0.max_u16(v1).upl16();
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
v0 = GSVector4i::load((int)v[0].p.xy);
v1 = GSVector4i::load((int)v[1].p.xy);
v2 = GSVector4i::load((int)v[2].p.xy);
pmin = v0.min_u16(v1).min_u16(v2).upl16();
pmax = v0.max_u16(v1).max_u16(v2).upl16();
break;
}
#else
switch(prim)
{
case GS_POINTLIST:
pmin.x = v[0].p.x;
pmin.y = v[0].p.y;
pmax.x = v[0].p.x;
pmax.y = v[0].p.y;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin.x = std::min<uint16>(v[0].p.x, v[1].p.x);
pmin.y = std::min<uint16>(v[0].p.y, v[1].p.y);
pmax.x = std::max<uint16>(v[0].p.x, v[1].p.x);
pmax.y = std::max<uint16>(v[0].p.y, v[1].p.y);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin.x = std::min<uint16>(std::min<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmin.y = std::min<uint16>(std::min<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
pmax.x = std::max<uint16>(std::max<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
pmax.y = std::max<uint16>(std::max<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
break;
}
#endif
GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin == pmax;
break;
}
if(test.mask() & 0xff)
{
return;
}
m_count += count;
}
dst[0] = v0;
dst[1] = v1;
}
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void GSRendererDX11::Draw()
{
switch(m_vt.m_primclass)
// TODO: remove invisible prims here
__super::Draw();
}
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(m_vt->m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
m_perfmon.Put(GSPerfMon::Prim, m_count);
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
break;
case GS_TRIANGLE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
break;
default:
__assume(0);
}
__super::Draw(rt, ds, tex);
__super::DrawPrims(rt, ds, tex);
}

View File

@ -25,16 +25,22 @@
#include "GSVertexHW.h"
#include "GSTextureCache11.h"
class GSRendererDX11 : public GSRendererDX<GSVertexHW11>
class GSRendererDX11 : public GSRendererDX
{
protected:
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index);
void Draw();
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;}
public:
GSRendererDX11();
virtual ~GSRendererDX11() {}
bool CreateDevice(GSDevice* dev);
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
};

View File

@ -25,9 +25,9 @@
#include "resource.h"
GSRendererDX9::GSRendererDX9()
: GSRendererDX<GSVertexHW9>(new GSTextureCache9(this))
: GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this))
{
InitVertexKick(GSRendererDX9);
InitConvertVertex(GSRendererDX9);
}
bool GSRendererDX9::CreateDevice(GSDevice* dev)
@ -57,8 +57,8 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
return true;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX9::VertexKick(bool skip)
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX9::ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index)
{
GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16());
@ -71,197 +71,143 @@ void GSRendererDX9::VertexKick(bool skip)
p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z));
}
GSVertexHW9& dst = m_vl.AddTail();
dst.p = p;
int Uadjust = 0;
int Vadjust = 0;
GSVector4 t = GSVector4::zero();
if(tme)
{
if(fst)
{
dst.t = m_v.GetUV();
#ifdef ENABLE_UPSCALE_HACKS
int Udiff = 0;
int Vdiff = 0;
int multiplier = GetUpscaleMultiplier();
if(multiplier > 1)
{
Udiff = m_v.UV.U & 4095;
Vdiff = m_v.UV.V & 4095;
if(Udiff != 0)
{
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
}
Udiff = m_v.UV.U & 255;
Vdiff = m_v.UV.V & 255;
if(Udiff != 0)
{
if (Udiff >= 248) { Uadjust = -1; }
else if (Udiff <= 8) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 248) { Vadjust = -1; }
else if (Vdiff <= 8) { Vadjust = 1; }
}
Udiff = m_v.UV.U & 15;
Vdiff = m_v.UV.V & 15;
if(Udiff != 0)
{
if (Udiff >= 15) { Uadjust = -1; }
else if (Udiff <= 1) { Uadjust = 1; }
}
if(Vdiff != 0)
{
if (Vdiff >= 15) { Vadjust = -1; }
else if (Vdiff <= 1) { Vadjust = 1; }
}
}
dst.t.x -= (float) Uadjust;
dst.t.y -= (float) Vadjust;
#endif
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16());
}
else
{
dst.t = GSVector4::loadl(&m_v.ST);
t = GSVector4::loadl(&m_v.ST);
}
}
dst._c0() = m_v.RGBAQ.u32[0];
dst._c1() = m_v.FOG.u32[1];
t = t.xyxy(GSVector4::cast(GSVector4i(m_v.RGBAQ.u32[0], m_v.FOG.u32[1])));
//
GSVertexHW9* RESTRICT dst = (GSVertexHW9*)&vertex[index];
// BaseDrawingKick can never return NULL here because the DrawingKick function
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
// condition where this function would return NULL).
int count = 0;
if(GSVertexHW9* v = DrawingKick<prim>(skip, count))
{
GSVector4 scissor = m_context->scissor.dx9;
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
}
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin == pmax;
break;
}
if(test.mask() & 3)
{
return;
}
switch(prim)
{
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();}
break;
case GS_SPRITE:
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
v[0].p.z = v[1].p.z;
v[0].p.w = v[1].p.w;
v[0]._c1() = v[1]._c1();
v[2] = v[1];
v[3] = v[1];
v[1].p.y = v[0].p.y;
v[1].t.y = v[0].t.y;
v[2].p.x = v[0].p.x;
v[2].t.x = v[0].t.x;
v[4] = v[1];
v[5] = v[2];
count += 4;
break;
}
m_count += count;
}
dst->p = p;
dst->t = t;
}
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
void GSRendererDX9::Draw()
{
switch(m_vt.m_primclass)
// TODO: remove invisible prims here
__super::Draw();
}
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(m_vt->m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3DPT_POINTLIST;
m_perfmon.Put(GSPerfMon::Prim, m_count);
break;
case GS_LINE_CLASS:
m_topology = D3DPT_LINELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
if(PRIM->IIP == 0)
{
for(size_t i = 0, j = m_index.tail; i < j; i += 2)
{
uint32 tmp = m_index.buff[i + 0];
m_index.buff[i + 0] = m_index.buff[i + 1];
m_index.buff[i + 1] = tmp;
}
}
break;
case GS_TRIANGLE_CLASS:
case GS_SPRITE_CLASS:
m_topology = D3DPT_TRIANGLELIST;
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
if(PRIM->IIP == 0)
{
for(size_t i = 0, j = m_index.tail; i < j; i += 3)
{
uint32 tmp = m_index.buff[i + 0];
m_index.buff[i + 0] = m_index.buff[i + 2];
m_index.buff[i + 2] = tmp;
}
}
break;
case GS_SPRITE_CLASS:
m_topology = D3DPT_TRIANGLELIST;
// each sprite converted to quad needs twice the space
while(m_vertex.tail * 2 > m_vertex.maxcount)
{
GrowVertexBuffer();
}
// assume vertices are tightly packed and sequentially indexed (it should be the case)
if(m_vertex.tail >= 2)
{
size_t count = m_vertex.tail;
int i = (int)count * 2 - 4;
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4;
uint32* RESTRICT index = &m_index.buff[count * 3] - 6;
for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
{
GSVertexHW9 v0 = s[0];
GSVertexHW9 v1 = s[1];
v0.p = v0.p.xyzw(v1.p); // z, q
v0.t = v0.t.xyzw(v1.t); // c, f
q[0] = v0;
q[3] = v1;
// swap x, s
GSVector4 p = v0.p.insert<0, 0>(v1.p);
GSVector4 t = v0.t.insert<0, 0>(v1.t);
v1.p = v1.p.insert<0, 0>(v0.p);
v1.t = v1.t.insert<0, 0>(v0.t);
v0.p = p;
v0.t = t;
q[1] = v0;
q[2] = v1;
index[0] = i + 0;
index[1] = i + 1;
index[2] = i + 2;
index[3] = i + 1;
index[4] = i + 2;
index[5] = i + 3;
}
m_vertex.head = m_vertex.tail = count * 2;
m_index.tail = count * 3;
}
break;
default:
__assume(0);
}
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
__super::Draw(rt, ds, tex);
__super::DrawPrims(rt, ds, tex);
}
void GSRendererDX9::UpdateFBA(GSTexture* rt)
@ -280,7 +226,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =

View File

@ -25,7 +25,7 @@
#include "GSVertexHW.h"
#include "GSTextureCache9.h"
class GSRendererDX9 : public GSRendererDX<GSVertexHW9>
class GSRendererDX9 : public GSRendererDX
{
protected:
struct
@ -34,14 +34,20 @@ protected:
Direct3DBlendState9 bs;
} m_fba;
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index);
void Draw();
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void UpdateFBA(GSTexture* rt);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;}
public:
GSRendererDX9();
virtual ~GSRendererDX9() {}
bool CreateDevice(GSDevice* dev);
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
};

View File

@ -21,3 +21,910 @@
#include "stdafx.h"
#include "GSRendererHW.h"
GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc)
: GSRenderer(vt, vertex_stride)
, m_tc(tc)
, m_width(1024)
, m_height(1024)
, m_skip(0)
, m_reset(false)
, m_upscale_multiplier(1)
{
m_nativeres = !!theApp.GetConfig("nativeres", 0);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
if(!m_nativeres)
{
m_width = theApp.GetConfig("resx", m_width);
m_height = theApp.GetConfig("resy", m_height);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier);
if(m_upscale_multiplier > 6)
{
m_upscale_multiplier = 1; // use the normal upscale math
}
else if(m_upscale_multiplier > 1)
{
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
}
}
else m_upscale_multiplier = 1;
}
GSRendererHW::~GSRendererHW()
{
delete m_tc;
}
void GSRendererHW::SetGameCRC(uint32 crc, int options)
{
GSRenderer::SetGameCRC(crc, options);
m_hacks.SetGameCRC(m_game);
if(m_game.title == CRC::JackieChanAdv)
{
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
}
}
bool GSRendererHW::CanUpscale()
{
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
{
return false;
}
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
}
int GSRendererHW::GetUpscaleMultiplier()
{
return m_upscale_multiplier;
}
void GSRendererHW::Reset()
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
GSRenderer::Reset();
}
void GSRendererHW::VSync(int field)
{
GSRenderer::VSync(field);
m_tc->IncAge();
m_dev->AgePool();
m_skip = 0;
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
}
void GSRendererHW::ResetDevice()
{
m_tc->RemoveAll();
GSRenderer::ResetDevice();
}
GSTexture* GSRendererHW::GetOutput(int i)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
{
t = rt->m_texture;
if(s_dump)
{
if(s_save && s_n >= s_saven)
{
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
}
return t;
}
void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void GSRendererHW::Draw()
{
if(m_dev->IsLost()) return;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
#ifndef DISABLE_CRC_HACKS
if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
#endif
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
if(!rt || !ds)
{
ASSERT(0);
return;
}
GSTextureCache::Source* tex = NULL;
if(PRIM->TME)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
if(!tex) return;
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven && tex)
{
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
tex->m_texture->Save(s, true);
if(tex->m_palette)
{
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
tex->m_palette->Save(s, true);
}
}
s_n++;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
{
return;
}
// skip alpha test if possible
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
{
if(GSRenderer::TryAlphaTest(fm, zm))
{
context->TEST.ATST = ATST_ALWAYS;
}
}
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
//
DrawPrims(rt->m_texture, ds->m_texture, tex);
//
context->TEST = TEST;
context->FRAME = FRAME;
context->ZBUF = ZBUF;
//
GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
{
rt->m_valid = rt->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
}
if(zm != 0xffffffff)
{
ds->m_valid = ds->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
}
//
if(m_hacks.m_oo)
{
(this->*m_hacks.m_oo)();
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
#endif
}
// hacks
GSRendererHW::Hacks::Hacks()
: m_oi_map(m_oi_list)
, m_oo_map(m_oo_list)
, m_cu_map(m_cu_list)
, m_oi(NULL)
, m_oo(NULL)
, m_cu(NULL)
{
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
}
void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
{
uint32 hash = (uint32)((game.region << 24) | game.title);
m_oi = m_oi_map[hash];
m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash];
if(game.flags & CRC::PointListPalette)
{
ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette;
}
}
bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
static uint32* video = NULL;
static size_t lines = 0;
if(lines == 0)
{
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.tail == 448 * 2 || m_vertex.tail == 512 * 2))
{
lines = m_vertex.tail / 2;
}
}
else
{
if(m_vt->m_primclass == GS_POINT_CLASS)
{
if(m_vertex.tail >= 16 * 512)
{
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
if(!video) video = new uint32[512 * 512];
int ox = m_context->XYOFFSET.OFX;
int oy = m_context->XYOFFSET.OFY;
const uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride)
{
int x = (GetPosX(v) - ox) >> 4;
int y = (GetPosY(v) - oy) >> 4;
video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v);
}
return false;
}
else
{
lines = 0;
}
}
else if(m_vt->m_primclass == GS_LINE_CLASS)
{
if(m_vertex.tail == lines * 2)
{
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles
m_dev->Recycle(t->m_texture);
t->m_texture = m_dev->CreateTexture(512, 512);
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 2)], m_vertex.stride);
memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 1)], m_vertex.stride);
m_index.buff[0] = 0;
m_index.buff[1] = 1;
m_index.buff[2] = 2;
m_index.buff[3] = 1;
m_index.buff[4] = 2;
m_index.buff[5] = 3;
m_vertex.head = m_vertex.tail = 4;
m_index.tail = 6;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
}
else
{
lines = 0;
}
}
}
return true;
}
bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
uint32 TBP = m_context->TEX0.TBP0;
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
{
// random battle transition (z buffer written directly, clear it now)
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
// missing red channel fix (looks alright in pcsx2 r5000+)
uint8* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 r = (c >> 0) & 0xff;
uint32 g = (c >> 8) & 0xff;
uint32 b = (c >> 16) & 0xff;
if(r == 0 && g != 0 && b != 0)
{
SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1));
}
}
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass);
return true;
}
bool GSRendererHW::OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
{
// z buffer clear
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = FBW;
TEX0.PSM = FPSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
return true;
}
bool GSRendererHW::OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
{
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
// how? by using a render target that overlaps with the lower half of the z buffer...
// TODO: tony hawk pro skater 4 same problem, the empty half is not visible though, painted over fully
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(!PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
if(FBP == 0x008c0 && ZBP == 0x01a40)
{
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
GIFRegTEX0 TEX0;
TEX0.TBP0 = ZBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->FRAME.PSM;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
{
m_dev->ClearRenderTarget(rt->m_texture, 0);
}
return false;
}
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
{
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->ZBUF.PSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
}
return true;
}
bool GSRendererHW::OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
{
//only top half of the screen clears
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z)
{
m_context->TEST.ZTST = ZTST_ALWAYS;
//m_dev->ClearDepth(ds, 0);
}
return true;
}
bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{
if(m_vertex.tail == 16)
{
uint8* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 16; i++, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else if(m_vertex.tail == 256)
{
uint8* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 256; i++, v += m_vertex.stride)
{
uint32 c = GetColor(v);
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else
{
ASSERT(0);
}
}
}
return true;
}
void GSRendererHW::OO_DBZBT2()
{
// palette readback (cannot detect yet, when fetching the texture later)
uint32 FBP = m_context->FRAME.Block();
uint32 TBP0 = m_context->TEX0.TBP0;
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
}
}
void GSRendererHW::OO_MajokkoALaMode2()
{
// palette readback
uint32 FBP = m_context->FRAME.Block();
if(!PRIM->TME && FBP == 0x03f40)
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
}
}
bool GSRendererHW::CU_DBZBT2()
{
// palette should stay 64 x 64
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03c00 && FBP != 0x03ac0;
}
bool GSRendererHW::CU_MajokkoALaMode2()
{
// palette should stay 16 x 16
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03f40;
}
bool GSRendererHW::CU_TalesOfAbyss()
{
// full image blur and brightening
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
}

View File

@ -26,28 +26,8 @@
#include "GSCrc.h"
#include "GSFunctionMap.h"
template<class Vertex>
class GSRendererHW : public GSRendererT<Vertex>
class GSRendererHW : public GSRenderer
{
protected:
using GSRendererT<Vertex>::m_vt;
using GSRendererT<Vertex>::m_count;
using GSRendererT<Vertex>::m_env;
using GSRendererT<Vertex>::m_context;
using GSRendererT<Vertex>::m_vertices;
using GSRendererT<Vertex>::m_dev;
using GSRendererT<Vertex>::PRIM;
using GSRendererT<Vertex>::m_mem;
using GSRendererT<Vertex>::m_regs;
using GSRendererT<Vertex>::m_perfmon;
using GSRendererT<Vertex>::m_game;
using GSRendererT<Vertex>::s_dump;
using GSRendererT<Vertex>::s_save;
using GSRendererT<Vertex>::s_saven;
using GSRendererT<Vertex>::s_savez;
using GSRendererT<Vertex>::s_n;
private:
int m_width;
int m_height;
@ -56,512 +36,36 @@ private:
bool m_nativeres;
int m_upscale_multiplier;
int m_userhacks_skipdraw;
#pragma region hacks
typedef bool (GSRendererHW::*OI_Ptr)(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
typedef void (GSRendererHW::*OO_Ptr)();
typedef bool (GSRendererHW::*CU_Ptr)();
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
static uint32* video = NULL;
static int lines = 0;
if(lines == 0)
{
if(m_vt.m_primclass == GS_LINE_CLASS && (m_count == 448 * 2 || m_count == 512 * 2))
{
lines = m_count / 2;
}
}
else
{
if(m_vt.m_primclass == GS_POINT_CLASS)
{
if(m_count >= 16 * 512)
{
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
if(!video) video = new uint32[512 * 512];
int ox = m_context->XYOFFSET.OFX;
int oy = m_context->XYOFFSET.OFY;
for(int i = 0; i < m_count; i++)
{
int x = ((int)m_vertices[i].p.x - ox) >> 4;
int y = ((int)m_vertices[i].p.y - oy) >> 4;
// video[y * 448 + x] = m_vertices[i].c0;
video[(y << 8) + (y << 7) + (y << 6) + x] = m_vertices[i]._c0();
}
return false;
}
else
{
lines = 0;
}
}
else if(m_vt.m_primclass == GS_LINE_CLASS)
{
if(m_count == lines * 2)
{
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
// but we use the stored video data to create a new texture, and replace the lines with two triangles
m_dev->Recycle(t->m_texture);
t->m_texture = m_dev->CreateTexture(512, 512);
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
m_vertices[0] = m_vertices[0];
m_vertices[1] = m_vertices[1];
m_vertices[2] = m_vertices[m_count - 2];
m_vertices[3] = m_vertices[1];
m_vertices[4] = m_vertices[2];
m_vertices[5] = m_vertices[m_count - 1];
m_count = 6;
m_vt.Update(m_vertices, m_count, GS_TRIANGLE_CLASS);
}
else
{
lines = 0;
}
}
}
return true;
}
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
uint32 TBP = m_context->TEX0.TBP0;
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
{
// random battle transition (z buffer written directly, clear it now)
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
// missing red channel fix
for(int i = 0, j = m_count; i < j; i++)
{
if(m_vertices[i]._r() == 0 && m_vertices[i]._g() != 0 && m_vertices[i]._b() != 0)
{
m_vertices[i]._r() = (m_vertices[i]._g() + m_vertices[i]._b()) / 2;
}
}
m_vt.Update(m_vertices, m_count, m_vt.m_primclass);
return true;
}
bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
{
// z buffer clear
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = FBW;
TEX0.PSM = FPSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
return true;
}
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
{
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
// how? by using a render target that overlaps with the lower half of the z buffer...
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(!PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 ZBP = m_context->ZBUF.Block();
if(FBP == 0x008c0 && ZBP == 0x01a40)
{
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
GIFRegTEX0 TEX0;
TEX0.TBP0 = ZBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->FRAME.PSM;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
{
m_dev->ClearRenderTarget(rt->m_texture, 0);
}
return false;
}
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
{
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = m_context->FRAME.FBW;
TEX0.PSM = m_context->ZBUF.PSM;
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
{
m_dev->ClearDepth(ds->m_texture, 0);
}
return false;
}
}
return true;
}
bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
{
//only top half of the screen clears
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
uint32 FPSM = m_context->FRAME.PSM;
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_max.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
return true;
}
bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if(!PRIM->TME)
{
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
{
//half height buffer clear
m_dev->ClearDepth(ds, 0);
return false;
}
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
{
m_dev->ClearDepth(ds, 0);
}
}
return true;
}
bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_max.p.z == m_vt.m_min.p.z)
{
m_context->TEST.ZTST = ZTST_ALWAYS;
//m_dev->ClearDepth(ds, 0);
}
return true;
}
bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
{
if(m_count == 16)
{
for(int i = 0; i < 16; i++)
{
uint8 a = m_vertices[i]._a();
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
m_mem.WritePixel32(i & 7, i >> 3, m_vertices[i]._c0(), FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else if(m_count == 256)
{
for(int i = 0; i < 256; i++)
{
uint8 a = m_vertices[i]._a();
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
m_mem.WritePixel32(i & 15, i >> 4, m_vertices[i]._c0(), FBP, FBW);
}
m_mem.m_clut.Invalidate();
return false;
}
else
{
ASSERT(0);
}
}
}
return true;
}
void OO_DBZBT2()
{
// palette readback (cannot detect yet, when fetching the texture later)
uint32 FBP = m_context->FRAME.Block();
uint32 TBP0 = m_context->TEX0.TBP0;
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
}
}
void OO_MajokkoALaMode2()
{
// palette readback
uint32 FBP = m_context->FRAME.Block();
if(!PRIM->TME && FBP == 0x03f40)
{
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = FBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = PSM_PSMCT32;
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
}
}
bool CU_DBZBT2()
{
// palette should stay 64 x 64
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03c00 && FBP != 0x03ac0;
}
bool CU_MajokkoALaMode2()
{
// palette should stay 16 x 16
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x03f40;
}
bool CU_TalesOfAbyss()
{
// full image blur and brightening
uint32 FBP = m_context->FRAME.Block();
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
}
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
void OO_DBZBT2();
void OO_MajokkoALaMode2();
bool CU_DBZBT2();
bool CU_MajokkoALaMode2();
bool CU_TalesOfAbyss();
class Hacks
{
@ -617,383 +121,37 @@ private:
OO_Ptr m_oo;
CU_Ptr m_cu;
Hacks()
: m_oi_map(m_oi_list)
, m_oo_map(m_oo_list)
, m_cu_map(m_cu_list)
, m_oi(NULL)
, m_oo(NULL)
, m_cu(NULL)
{
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
Hacks();
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2));
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
}
void SetGame(const CRC::Game& game)
{
uint32 hash = (uint32)((game.region << 24) | game.title);
m_oi = m_oi_map[hash];
m_oo = m_oo_map[hash];
m_cu = m_cu_map[hash];
if(game.flags & CRC::PointListPalette)
{
ASSERT(m_oi == NULL);
m_oi = &GSRendererHW::OI_PointListPalette;
}
}
void SetGameCRC(const CRC::Game& game);
} m_hacks;
virtual int GetPosX(const void* vertex) const = 0;
virtual int GetPosY(const void* vertex) const = 0;
virtual uint32 GetColor(const void* vertex) const = 0;
virtual void SetColor(void* vertex, uint32 c) const = 0;
#pragma endregion
protected:
GSTextureCache* m_tc;
void Reset()
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
GSRendererT<Vertex>::Reset();
}
void VSync(int field)
{
GSRendererT<Vertex>::VSync(field);
m_tc->IncAge();
m_dev->AgePool();
m_skip = 0;
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
}
void ResetDevice()
{
m_tc->RemoveAll();
GSRendererT<Vertex>::ResetDevice();
}
GSTexture* GetOutput(int i)
{
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
GIFRegTEX0 TEX0;
TEX0.TBP0 = DISPFB.Block();
TEX0.TBW = DISPFB.FBW;
TEX0.PSM = DISPFB.PSM;
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
GSTexture* t = NULL;
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
{
t = rt->m_texture;
if(s_dump)
{
if(s_save && s_n >= s_saven)
{
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
}
s_n++;
}
}
return t;
}
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false)
{
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
if(clut) return; // FIXME
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
}
void Draw()
{
#ifndef DISABLE_CRC_HACKS
if(GSRendererT<Vertex>::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
#endif
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
GIFRegTEX0 TEX0;
TEX0.TBP0 = context->FRAME.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->FRAME.PSM;
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
TEX0.PSM = context->ZBUF.PSM;
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
if(!rt || !ds)
{
ASSERT(0);
return;
}
GSTextureCache::Source* tex = NULL;
if(PRIM->TME)
{
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
if(!tex) return;
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven && tex)
{
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
tex->m_texture->Save(s, true);
if(tex->m_palette)
{
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
tex->m_palette->Save(s, true);
}
}
s_n++;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
{
return;
}
// skip alpha test if possible
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
{
if(GSRendererT<Vertex>::TryAlphaTest(fm, zm))
{
context->TEST.ATST = ATST_ALWAYS;
}
}
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
//
Draw(rt->m_texture, ds->m_texture, tex);
//
context->TEST = TEST;
context->FRAME = FRAME;
context->ZBUF = ZBUF;
//
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
{
rt->m_valid = rt->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
}
if(zm != 0xffffffff)
{
ds->m_valid = ds->m_valid.runion(r);
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
}
//
if(m_hacks.m_oo)
{
(this->*m_hacks.m_oo)();
}
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
rt->m_texture->Save(s);
}
if(s_savez && s_n >= s_saven)
{
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
ds->m_texture->Save(s);
}
s_n++;
}
#ifdef DISABLE_HW_TEXTURE_CACHE
m_tc->Read(rt, r);
#endif
}
virtual void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
bool CanUpscale()
{
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
{
return false;
}
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
}
int GetUpscaleMultiplier()
{
return m_upscale_multiplier;
}
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
public:
GSRendererHW(GSTextureCache* tc)
: GSRendererT<Vertex>()
, m_tc(tc)
, m_width(1024)
, m_height(1024)
, m_skip(0)
, m_reset(false)
, m_upscale_multiplier(1)
{
m_nativeres = !!theApp.GetConfig("nativeres", 0);
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc);
virtual ~GSRendererHW();
if(!m_nativeres)
{
m_width = theApp.GetConfig("resx", m_width);
m_height = theApp.GetConfig("resy", m_height);
void SetGameCRC(uint32 crc, int options);
bool CanUpscale();
int GetUpscaleMultiplier();
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier);
if(m_upscale_multiplier > 6)
{
m_upscale_multiplier = 1; // use the normal upscale math
}
else if(m_upscale_multiplier > 1)
{
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
}
}
else m_upscale_multiplier = 1;
}
virtual ~GSRendererHW()
{
delete m_tc;
}
void SetGameCRC(uint32 crc, int options)
{
GSRendererT<Vertex>::SetGameCRC(crc, options);
m_hacks.SetGame(m_game);
if(m_game.title == CRC::JackieChanAdv)
{
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
}
}
void Reset();
void VSync(int field);
void ResetDevice();
GSTexture* GetOutput(int i);
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void Draw();
};

View File

@ -23,26 +23,33 @@
#include "GSRenderer.h"
class GSRendererNull : public GSRendererT<GSVertexNull>
class GSRendererNull : public GSRenderer
{
class GSVertexTraceNull : public GSVertexTrace
{
public:
GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {}
};
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index)
{
}
void Draw()
{
}
GSTexture* GetOutput(int i)
GSTexture* GetOutput(int i)
{
return NULL;
}
public:
GSRendererNull()
: GSRendererT<GSVertexNull>()
{
InitVertexKick(GSRendererNull);
}
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip)
GSRendererNull()
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertexNull))
{
InitConvertVertex(GSRendererNull);
}
};

View File

@ -25,9 +25,11 @@
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
GSRendererSW::GSRendererSW(int threads)
: m_fzb(NULL)
: GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW))
, m_fzb(NULL)
{
InitVertexKick(GSRendererSW);
InitConvertVertex(GSRendererSW);
InitConvertIndex();
m_tc = new GSTextureCacheSW(this);
@ -62,7 +64,7 @@ void GSRendererSW::Reset()
m_reset = true;
GSRendererT<GSVertexSW>::Reset();
GSRenderer::Reset();
}
void GSRendererSW::VSync(int field)
@ -88,7 +90,7 @@ void GSRendererSW::VSync(int field)
printf("m_syncpoint_count = %d\n", ((GSRasterizerList*)m_rl)->m_syncpoint_count); ((GSRasterizerList*)m_rl)->m_syncpoint_count = 0;
printf("m_solidrect_count = %d\n", ((GSRasterizerList*)m_rl)->m_solidrect_count); ((GSRasterizerList*)m_rl)->m_solidrect_count = 0;
*/
GSRendererT<GSVertexSW>::VSync(field);
GSRenderer::VSync(field);
m_tc->IncAge();
@ -149,41 +151,187 @@ GSTexture* GSRendererSW::GetOutput(int i)
return m_texture[i];
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index)
{
GSVertexSW* RESTRICT v = &vertex[index];
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
v->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(tme)
{
GSVector4 t;
if(fst)
{
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16() << (16 - 4));
}
else
{
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
}
v->t = t;
}
v->c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
if(prim == GS_SPRITE)
{
v->t.u32[3] = m_v.XYZ.Z;
}
}
template<uint32 prim>
size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count)
{
// memcpy(dst, src, sizeof(uint32) * count); return;
// TODO: IsQuad
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
GSVector4 scissor = m_context->scissor.ex;
const uint32* src_end = src + count;
uint32* dst_base = dst;
while(src < src_end)
{
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[src[0]].p;
pmax = v[src[0]].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[src[0]].p.min(v[src[1]].p);
pmax = v[src[0]].p.max(v[src[1]].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p);
pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p);
break;
default:
__assume(0);
}
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
GSVector4 tmp;
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
// are in line or just two of them are the same (cross product == 0)
tmp = (v[src[1]].p - v[src[0]].p) * (v[src[2]].p - v[src[0]].p).yxwz();
test |= tmp == tmp.yxwz();
break;
}
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin.ceil() == pmax.ceil();
break;
}
bool pass = test.xyxy().allfalse();
switch(prim)
{
case GS_POINTLIST:
if(pass) {dst[0] = src[0]; dst++;}
src++;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst += 2;}
src += 2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;}
src += 3;
break;
default:
__assume(0);
}
}
return dst - dst_base;
}
void GSRendererSW::UpdateVertexKick()
{
GSRenderer::UpdateVertexKick();
m_cif = m_ci[PRIM->PRIM];
}
void GSRendererSW::Draw()
{
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
GSVector4i scissor = GSVector4i(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
scissor.z = std::min<int>(scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
GSVector4i r = bbox.rintersect(scissor);
const GSDrawingContext* context = m_context;
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
data->primclass = GSUtil::GetPrimClass(PRIM->PRIM);
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.tail + sizeof(uint32) * m_index.tail, 32);
data->vertex = (GSVertexSW*)data->buff;
data->vertex_count = m_vertex.tail;
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.tail);
data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail);
m_perfmon.Put(GSPerfMon::PrimNotRendered, (m_index.tail - data->index_count) / GSUtil::GetVertexCount(PRIM->PRIM));
if(data->index_count == 0) return;
// TODO: merge these
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.tail);
m_vt->Update(data->vertex, data->index, data->index_count, data->primclass);
//
GSRasterizerData2* data2 = (GSRasterizerData2*)data.get();
if(!GetScanlineGlobalData(data2))
{
return;
}
if(!GetScanlineGlobalData(data2)) return;
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
GSVector4i scissor = GSVector4i(context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data->scissor = scissor;
data->bbox = bbox;
data->primclass = m_vt.m_primclass;
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); // TODO: detach m_vertices and reallocate later?
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); // TODO: m_vt.Update fetches all the vertices already, could also store them here
data->count = m_count;
data->solidrect = gd->sel.IsSolidRect();
data->frame = m_perfmon.GetFrame();
//
vector<uint32>* fb_pages = NULL;
vector<uint32>* zb_pages = NULL;
uint32* fb_pages = NULL;
uint32* zb_pages = NULL;
GSVector4i r = bbox.rintersect(scissor);
if(gd->sel.fwrite)
{
@ -215,9 +363,9 @@ void GSRendererSW::Draw()
{
if(gd->sel.fwrite)
{
for(vector<uint32>::iterator i = fb_pages->begin(); i != fb_pages->end(); i++)
for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
{
if(m_fzb_pages[*i] & 0xffff0000) // already used as a z-buffer
if(m_fzb_pages[*p] & 0xffff0000) // already used as a z-buffer
{
data->syncpoint = true;
@ -231,9 +379,9 @@ void GSRendererSW::Draw()
{
if(gd->sel.zwrite)
{
for(vector<uint32>::iterator i = zb_pages->begin(); i != zb_pages->end(); i++)
for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
{
if(m_fzb_pages[*i] & 0x0000ffff) // already used as a frame buffer
if(m_fzb_pages[*p] & 0x0000ffff) // already used as a frame buffer
{
data->syncpoint = true;
@ -307,18 +455,6 @@ void GSRendererSW::Draw()
m_rl->Queue(data);
}
int prims = 0;
switch(data->primclass)
{
case GS_POINT_CLASS: prims = data->count; break;
case GS_LINE_CLASS: prims = data->count / 2; break;
case GS_TRIANGLE_CLASS: prims = data->count / 3; break;
case GS_SPRITE_CLASS: prims = data->count / 2; break;
}
m_perfmon.Put(GSPerfMon::Prim, prims);
/*
if(0)//stats.ticks > 5000000)
{
@ -344,15 +480,15 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
vector<uint32>* pages = o->GetPages(r);
uint32* pages = o->GetPages(r);
m_tc->InvalidatePages(pages, o->psm);
// check if the changing pages either used as a texture or a target
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
uint32 page = *i;
uint32 page = *p;
//while(m_fzb_pages[page] | m_tex_pages[page]) _mm_pause();
@ -364,20 +500,20 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
}
}
delete pages;
delete [] pages;
}
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
vector<uint32>* pages = o->GetPages(r);
uint32* pages = o->GetPages(r);
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
//while(m_fzb_pages[*i]) _mm_pause();
//while(m_fzb_pages[*p]) _mm_pause();
if(m_fzb_pages[*i])
if(m_fzb_pages[*p])
{
Sync(6);
@ -385,27 +521,27 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
}
}
delete pages;
delete [] pages;
}
void GSRendererSW::UsePages(const vector<uint32>* pages, int type)
void GSRendererSW::UsePages(const uint32* pages, int type)
{
if(type < 2)
{
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*i])[type] < SHRT_MAX);
ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX);
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + type);
_InterlockedIncrement16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
//while(m_fzb_pages[*i]) _mm_pause();
//while(m_fzb_pages[*p]) _mm_pause();
if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
if(m_fzb_pages[*p]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
{
Sync(7);
@ -413,33 +549,33 @@ void GSRendererSW::UsePages(const vector<uint32>* pages, int type)
}
}
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*i] < SHRT_MAX);
ASSERT(m_tex_pages[*p] < SHRT_MAX);
_InterlockedIncrement16((short*)&m_tex_pages[*i]); // remember which texture pages are used
_InterlockedIncrement16((short*)&m_tex_pages[*p]); // remember which texture pages are used
}
}
}
void GSRendererSW::ReleasePages(const vector<uint32>* pages, int type)
void GSRendererSW::ReleasePages(const uint32* pages, int type)
{
if(type < 2)
{
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(((short*)&m_fzb_pages[*i])[type] > 0);
ASSERT(((short*)&m_fzb_pages[*p])[type] > 0);
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + type);
_InterlockedDecrement16((short*)&m_fzb_pages[*p] + type);
}
}
else
{
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
ASSERT(m_tex_pages[*i] > 0);
ASSERT(m_tex_pages[*p] > 0);
_InterlockedDecrement16((short*)&m_tex_pages[*i]);
_InterlockedDecrement16((short*)&m_tex_pages[*p]);
}
}
}
@ -452,7 +588,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
const GS_PRIM_CLASS primclass = m_vt->m_primclass;
gd.vm = m_mem.m_vm8;
@ -470,7 +606,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
gd.sel.atst = ATST_ALWAYS;
gd.sel.tfx = TFX_NONE;
gd.sel.ababcd = 255;
gd.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
gd.sel.prim = primclass;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
@ -529,7 +665,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
{
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff)
{
gd.sel.iip = PRIM->IIP;
}
@ -539,7 +675,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
gd.sel.ltf = m_vt.IsLinear();
gd.sel.ltf = m_vt->IsLinear();
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
@ -553,7 +689,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT;
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128)))
{
// modulate does not do anything when vertex color is 0x80
@ -572,7 +708,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
if(!t->Update(r)) {ASSERT(0); return false;}
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0)
{
uint64 frame = m_perfmon.GetFrame();
@ -589,7 +725,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
gd.tex[0] = t->m_buff;
gd.sel.tw = t->m_tw - 3;
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0)
{
// TEX1.MMIN
// 000 p
@ -599,13 +735,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
// 100 l round
// 101 l tri
if(m_vt.m_lod.x > 0)
if(m_vt->m_lod.x > 0)
{
gd.sel.ltf = context->TEX1.MMIN >> 2;
}
else
{
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0
}
gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
@ -614,9 +750,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16);
int k = context->TEX1.K << 12;
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL)
{
k = (int)m_vt.m_lod.x << 16; // set lod to max level
k = (int)m_vt->m_lod.x << 16; // set lod to max level
gd.sel.lcm = 1; // lod is constant
gd.sel.mmin = 1; // tri-linear is meaningless
@ -630,7 +766,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
if(gd.sel.fst)
{
ASSERT(gd.sel.lcm == 1);
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
gd.sel.lcm = 1;
}
@ -659,8 +795,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
GSVector4 tmin = m_vt.m_min.t;
GSVector4 tmax = m_vt.m_max.t;
GSVector4 tmin = m_vt->m_min.t;
GSVector4 tmax = m_vt->m_max.t;
static int s_counter = 0;
@ -710,8 +846,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
MIP_CLAMP.MAXU >>= 1;
MIP_CLAMP.MAXV >>= 1;
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;
m_vt->m_min.t *= 0.5f;
m_vt->m_max.t *= 0.5f;
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
@ -753,8 +889,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
s_counter++;
m_vt.m_min.t = tmin;
m_vt.m_max.t = tmax;
m_vt->m_min.t = tmin;
m_vt->m_max.t = tmax;
}
else
{
@ -762,17 +898,19 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
{
// skip per pixel division if q is constant
GSVertexSW* v = m_vertices;
GSVertexSW* RESTRICT v = data2->vertex;
if(m_vt.m_eq.q)
if(m_vt->m_eq.q)
{
gd.sel.fst = 1;
if(v[0].t.z != 1.0f)
{
GSVector4 w = v[0].t.zzzz().rcpnr();
const GSVector4& t = v[data2->index[0]].t;
for(int i = 0, j = m_count; i < j; i++)
if(t.z != 1.0f)
{
GSVector4 w = t.zzzz().rcpnr();
for(int i = 0, j = data2->vertex_count; i < j; i++)
{
GSVector4 t = v[i].t;
@ -784,7 +922,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
{
gd.sel.fst = 1;
for(int i = 0, j = m_count; i < j; i += 2)
for(int i = 0, j = data2->vertex_count; i < j; i += 2)
{
GSVector4 t0 = v[i + 0].t;
GSVector4 t1 = v[i + 1].t;
@ -805,9 +943,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
GSVector4 half(0x8000, 0x8000);
GSVertexSW* v = m_vertices;
GSVertexSW* RESTRICT v = data2->vertex;
for(int i = 0, j = m_count; i < j; i++)
for(int i = 0, j = data2->vertex_count; i < j; i++)
{
GSVector4 t = v[i].t;
@ -939,7 +1077,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
{
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000;
}
gd.fm = GSVector4i(fm);
@ -969,176 +1107,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
return true;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::VertexKick(bool skip)
{
const GSDrawingContext* context = m_context;
GSVertexSW& dst = m_vl.AddTail();
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(tme)
{
GSVector4 t;
if(fst)
{
t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
}
else
{
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
}
dst.t = t;
}
dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
if(prim == GS_SPRITE)
{
dst.t.u32[3] = m_v.XYZ.Z;
}
int count = 0;
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
{
if(!m_dump)
{
GSVector4 pmin, pmax;
switch(prim)
{
case GS_POINTLIST:
pmin = v[0].p;
pmax = v[0].p;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v[0].p.min(v[1].p);
pmax = v[0].p.max(v[1].p);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
pmin = v[0].p.min(v[1].p).min(v[2].p);
pmax = v[0].p.max(v[1].p).max(v[2].p);
break;
}
GSVector4 scissor = context->scissor.ex;
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
test |= pmin.ceil() == pmax.ceil();
break;
}
switch(prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
// are in line or just two of them are the same (cross product == 0)
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
test |= tmp == tmp.yxwz();
break;
}
if(test.mask() & 3)
{
return;
}
}
switch(prim)
{
case GS_POINTLIST:
break;
case GS_LINELIST:
case GS_LINESTRIP:
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
break;
case GS_SPRITE:
break;
}
if(m_count < 30 && m_count >= 3)
{
GSVertexSW* v = &m_vertices[m_count - 3];
int tl = 0;
int br = 0;
bool isquad = false;
switch(prim)
{
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLELIST:
isquad = GSVertexSW::IsQuad(v, tl, br);
break;
}
if(isquad)
{
m_count -= 3;
if(m_count > 0)
{
tl += m_count;
br += m_count;
Flush();
}
if(tl != 0) m_vertices[0] = m_vertices[tl];
if(br != 1) m_vertices[1] = m_vertices[br];
m_vertices[0].t.u32[3] = m_v.XYZ.Z;
m_vertices[1].t.u32[3] = m_v.XYZ.Z;
m_count = 2;
uint32 tmp = PRIM->PRIM;
PRIM->PRIM = GS_SPRITE;
Flush();
PRIM->PRIM = tmp;
m_perfmon.Put(GSPerfMon::Quad, 1);
return;
}
}
m_count += count;
// Flush();
}
}
// GSRendererSW::GSRasterizerData2
GSRendererSW::GSRasterizerData2::GSRasterizerData2(GSRendererSW* parent)
: m_parent(parent)
, m_fb_pages(NULL)
@ -1192,7 +1160,7 @@ GSRendererSW::GSRasterizerData2::~GSRasterizerData2()
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
}
void GSRendererSW::GSRasterizerData2::UseTargetPages(const vector<uint32>* fb_pages, const vector<uint32>* zb_pages)
void GSRendererSW::GSRasterizerData2::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
{
if(m_using_pages) return;
@ -1218,9 +1186,7 @@ void GSRendererSW::GSRasterizerData2::UseSourcePages(GSTextureCacheSW::Texture*
{
ASSERT(m_tex_pages[level] == NULL);
const vector<uint32>* pages = t->m_pages.n;
m_tex_pages[level] = t->m_pages.n;
m_tex_pages[level] = pages;
m_parent->UsePages(pages, 2);
m_parent->UsePages(t->m_pages.n, 2);
}

View File

@ -25,21 +25,21 @@
#include "GSTextureCacheSW.h"
#include "GSDrawScanline.h"
class GSRendererSW : public GSRendererT<GSVertexSW>
class GSRendererSW : public GSRenderer
{
class GSRasterizerData2 : public GSRasterizerData
{
GSRendererSW* m_parent;
const vector<uint32>* m_fb_pages;
const vector<uint32>* m_zb_pages;
const vector<uint32>* m_tex_pages[7];
const uint32* m_fb_pages;
const uint32* m_zb_pages;
const uint32* m_tex_pages[7];
bool m_using_pages;
public:
GSRasterizerData2(GSRendererSW* parent);
virtual ~GSRasterizerData2();
void UseTargetPages(const vector<uint32>* fb_pages, const vector<uint32>* zb_pages);
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
};
@ -63,15 +63,37 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UsePages(const vector<uint32>* pages, int type);
void ReleasePages(const vector<uint32>* pages, int type);
void UsePages(const uint32* pages, int type);
void ReleasePages(const uint32* pages, int type);
bool GetScanlineGlobalData(GSRasterizerData2* data2);
typedef size_t (GSState::*ConvertIndexPtr)(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
ConvertIndexPtr m_ci[8], m_cif;
#define InitConvertIndex2(P) \
m_ci[P] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<P>; \
#define InitConvertIndex() \
InitConvertIndex2(GS_POINTLIST) \
InitConvertIndex2(GS_LINELIST) \
InitConvertIndex2(GS_LINESTRIP) \
InitConvertIndex2(GS_TRIANGLELIST) \
InitConvertIndex2(GS_TRIANGLESTRIP) \
InitConvertIndex2(GS_TRIANGLEFAN) \
InitConvertIndex2(GS_SPRITE) \
InitConvertIndex2(GS_INVALID) \
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index);
template<uint32 prim>
size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
void UpdateVertexKick();
public:
GSRendererSW(int threads);
virtual ~GSRendererSW();
template<uint32 prim, uint32 tme, uint32 fst>
void VertexKick(bool skip);
};

View File

@ -61,12 +61,12 @@ union GSScanlineSelector
uint32 colclamp:1; // 43
uint32 fba:1; // 44
uint32 dthe:1; // 45
uint32 sprite:1; // 46
uint32 edge:1; // 47
uint32 prim:2; // 46
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 lcm:1; // 49
uint32 mmin:2; // 50
uint32 edge:1; // 48
uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 lcm:1; // 50
uint32 mmin:2; // 51
};
struct
@ -92,7 +92,7 @@ union GSScanlineSelector
bool IsSolidRect() const
{
return sprite
return prim == GS_SPRITE_CLASS
&& iip == 0
&& tfx == TFX_NONE
&& abe == 0

View File

@ -29,7 +29,7 @@ using namespace Xbyak;
void GSSetupPrimCodeGenerator::Generate()
{
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{
for(int i = 0; i < 5; i++)
{
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Depth()
return;
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4 p = dscan.p;
@ -107,7 +107,7 @@ void GSSetupPrimCodeGenerator::Depth()
}
else
{
// GSVector4 p = vertices[0].p;
// GSVector4 p = vertex.p;
vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -312,7 +312,7 @@ void GSSetupPrimCodeGenerator::Color()
}
else
{
// GSVector4i c = GSVector4i(vertices[0].c);
// GSVector4i c = GSVector4i(vertex.c);
vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

View File

@ -29,7 +29,7 @@ using namespace Xbyak;
void GSSetupPrimCodeGenerator::Generate()
{
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
{
for(int i = 0; i < 5; i++)
{
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Depth()
return;
}
if(!m_sel.sprite)
if(m_sel.prim != GS_SPRITE_CLASS)
{
// GSVector4 p = dscan.p;
@ -112,7 +112,7 @@ void GSSetupPrimCodeGenerator::Depth()
}
else
{
// GSVector4 p = vertices[0].p;
// GSVector4 p = vertex.p;
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
@ -327,7 +327,7 @@ void GSSetupPrimCodeGenerator::Color()
}
else
{
// GSVector4i c = GSVector4i(vertices[0].c);
// GSVector4i c = GSVector4i(vertex.c);
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);

View File

@ -25,18 +25,37 @@
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
//#define Offset_UV // Fixes / breaks various titles
GSState::GSState()
GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
: m_version(6)
, m_mt(false)
, m_irq(NULL)
, m_path3hack(0)
, m_regs(NULL)
, m_q(1.0f)
, m_vprim(1)
, m_crc(0)
, m_options(0)
, m_frameskip(0)
, m_vt(vt)
{
memset(&m_v, 0, sizeof(m_v));
m_q = 1.0f;
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
m_vertex.stride = vertex_stride;
GrowVertexBuffer();
m_dk[GS_POINTLIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_POINTLIST>;
m_dk[GS_LINELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINELIST>;
m_dk[GS_LINESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINESTRIP>;
m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLELIST>;
m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLESTRIP>;
m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLEFAN>;
m_dk[GS_SPRITE] = (DrawingKickPtr)&GSState::DrawingKick<GS_SPRITE>;
m_dk[GS_INVALID] = (DrawingKickPtr)&GSState::DrawingKick<GS_INVALID>;
memset(m_cv, 0, sizeof(m_cv));
m_sssize = 0;
m_sssize += sizeof(m_version);
@ -78,7 +97,7 @@ GSState::GSState()
m_sssize += sizeof(m_v.ST);
m_sssize += sizeof(m_v.UV);
m_sssize += sizeof(m_v.XYZ);
m_sssize += sizeof(m_v.FOG);
m_sssize += sizeof(m_v.FOG); // obsolete
m_sssize += sizeof(m_tr.x);
m_sssize += sizeof(m_tr.y);
@ -97,6 +116,8 @@ GSState::GSState()
GSState::~GSState()
{
if(m_vertex.buff) _aligned_free(m_vertex.buff);
if(m_index.buff) _aligned_free(m_index.buff);
}
void GSState::SetRegsMem(uint8* basemem)
@ -195,6 +216,10 @@ void GSState::Reset()
m_env.Reset();
m_context = &m_env.CTXT[0];
m_vertex.head = 0;
m_vertex.tail = 0;
m_index.tail = 0;
}
void GSState::ResetHandlers()
@ -472,7 +497,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRIC
m_v.XYZ.Z = r->XYZF2.Z;
m_v.FOG.F = r->XYZF2.F;
VertexKick(r->XYZF2.ADC);
VertexKick(r->XYZF2.Skip());
}
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
@ -481,7 +506,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT
m_v.XYZ.Y = r->XYZ2.Y;
m_v.XYZ.Z = r->XYZ2.Z;
VertexKick(r->XYZ2.ADC);
VertexKick(r->XYZ2.Skip());
}
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
@ -509,7 +534,7 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
{
// ASSERT(r->PRIM.PRIM < 7);
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM))
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM)) // NOTE: assume strips/fans are converted to lists
{
if((m_env.PRIM.u32[0] ^ prim.u32[0]) & 0x7f8) // all fields except PRIM
{
@ -528,7 +553,7 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
UpdateVertexKick();
ResetPrim();
m_vertex.head = m_vertex.tail = m_index.tail > 0 ? m_index.buff[m_index.tail - 1] + 1 : 0; // remove unused vertices from the end of the vertex buffer
}
void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
@ -559,8 +584,8 @@ __forceinline void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
#ifdef Offset_UV
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v.UV.U - 4U));
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v.UV.V - 4U));
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v._UV.U - 4U));
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v._UV.V - 4U));
#endif
}
@ -576,14 +601,14 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
VertexKick(false);
VertexKick(0);
}
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
VertexKick(false);
VertexKick(0);
}
void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
@ -697,7 +722,7 @@ template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r)
{
m_v.FOG = (GSVector4i)r->FOG;
m_v.FOG.u32[1] = r->FOG.u32[1];
}
void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
@ -712,14 +737,14 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
VertexKick(true);
VertexKick(1);
}
void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r)
{
m_v.XYZ = (GSVector4i)r->XYZ;
VertexKick(true);
VertexKick(1);
}
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
@ -1174,6 +1199,26 @@ void GSState::FlushWrite()
*/
}
void GSState::FlushPrim()
{
if(m_index.tail > 0)
{
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
{
// FIXME: berserk fpsm = 27 (8H)
Draw();
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
}
}
m_vertex.head = 0;
m_vertex.tail = 0;
m_index.tail = 0;
}
//
void GSState::Write(const uint8* mem, int len)
@ -1881,6 +1926,446 @@ void GSState::SetGameCRC(uint32 crc, int options)
m_game = CRC::Lookup(crc);
}
//
void GSState::UpdateVertexKick()
{
m_dkf = m_dk[PRIM->PRIM];
m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST];
m_vertex.n = GSUtil::GetVertexCount(PRIM->PRIM);
}
void GSState::GrowVertexBuffer()
{
int maxcount = std::max<int>(m_vertex.maxcount * 3 / 2, 10000);
uint8* vertex = (uint8*)_aligned_malloc(m_vertex.stride * maxcount, 16);
uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 16); // worst case is slightly less than vertex number * 3
if(m_vertex.buff != NULL)
{
memcpy(vertex, m_vertex.buff, m_vertex.stride * m_vertex.tail);
_aligned_free(m_vertex.buff);
}
if(m_index.buff != NULL)
{
memcpy(index, m_index.buff, sizeof(uint32) * m_index.tail);
_aligned_free(m_index.buff);
}
m_vertex.buff = vertex;
m_vertex.maxcount = maxcount - 100; // -100 because skipped vertices don't trigger growing the vertex buffer (VertexKick should be as fast as possible)
m_index.buff = index;
}
void GSState::VertexKick(uint32 skip)
{
(this->*m_cvf)(m_vertex.buff, m_vertex.tail);
if(++m_vertex.tail - m_vertex.head >= m_vertex.n)
{
(this->*m_dkf)(skip);
}
}
template<uint32 prim>
void GSState::DrawingKick(uint32 skip)
{
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
if(skip)
{
switch(prim)
{
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID:
m_vertex.tail = head;
break;
case GS_LINESTRIP:
case GS_TRIANGLESTRIP:
m_vertex.head = head + 1;
break;
case GS_TRIANGLEFAN:
break;
default:
__assume(0);
}
return;
}
if(tail >= m_vertex.maxcount)
{
GrowVertexBuffer();
}
uint32* RESTRICT buff = &m_index.buff[m_index.tail];
switch(prim)
{
case GS_POINTLIST:
buff[0] = head + 0;
m_vertex.head = head + 1;
m_index.tail += 1;
break;
case GS_LINELIST:
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 2;
m_index.tail += 2;
break;
case GS_LINESTRIP:
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 1;
m_index.tail += 2;
break;
case GS_TRIANGLELIST:
buff[0] = head + 0;
buff[1] = head + 1;
buff[2] = head + 2;
m_vertex.head = head + 3;
m_index.tail += 3;
break;
case GS_TRIANGLESTRIP:
buff[0] = head + 0;
buff[1] = head + 1;
buff[2] = head + 2;
m_vertex.head = head + 1;
m_index.tail += 3;
break;
case GS_TRIANGLEFAN:
buff[0] = head + 0;
buff[1] = tail - 2;
buff[2] = tail - 1;
m_index.tail += 3;
break;
case GS_SPRITE:
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 2;
m_index.tail += 2;
break;
case GS_INVALID:
m_vertex.tail = head;
break;
default:
__assume(0);
}
}
void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
{
int tw = TEX0.TW;
int th = TEX0.TH;
int w = 1 << tw;
int h = 1 << th;
GSVector4i tr(0, 0, w, h);
int wms = CLAMP.WMS;
int wmt = CLAMP.WMT;
int minu = (int)CLAMP.MINU;
int minv = (int)CLAMP.MINV;
int maxu = (int)CLAMP.MAXU;
int maxv = (int)CLAMP.MAXV;
GSVector4i vr = tr;
switch(wms)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.x < minu) vr.x = minu;
if(vr.z > maxu + 1) vr.z = maxu + 1;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.y < minv) vr.y = minv;
if(vr.w > maxv + 1) vr.w = maxv + 1;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
}
if(wms + wmt < 6)
{
GSVector4 st = m_vt->m_min.t.xyxy(m_vt->m_max.t);
if(linear)
{
st += GSVector4(-0x8000, 0x8000).xxyy();
}
GSVector4i uv = GSVector4i(st).sra32(16);
GSVector4i u, v;
int mask = 0;
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
{
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
uv = uv.rintersect(tr);
switch(wms)
{
case CLAMP_REPEAT:
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
break;
case CLAMP_REGION_REPEAT:
break;
default:
__assume(0);
}
}
r = vr.rintersect(tr);
}
void GSState::GetAlphaMinMax()
{
if(m_vt->m_alpha.valid)
{
return;
}
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = m_vt->m_min.c.uph32(m_vt->m_max.c).zzww();
if(PRIM->TME && context->TEX0.TCC)
{
switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
{
case 0:
a.y = 0;
a.w = 0xff;
break;
case 1:
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
a.w = env.TEXA.TA0;
break;
case 2:
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
break;
case 3:
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
break;
default:
__assume(0);
}
switch(context->TEX0.TFX)
{
case TFX_MODULATE:
a.x = (a.x * a.y) >> 7;
a.z = (a.z * a.w) >> 7;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_DECAL:
a.x = a.y;
a.z = a.w;
break;
case TFX_HIGHLIGHT:
a.x = a.x + a.y;
a.z = a.z + a.w;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_HIGHLIGHT2:
a.x = a.y;
a.z = a.w;
break;
default:
__assume(0);
}
}
m_vt->m_alpha.min = a.x;
m_vt->m_alpha.max = a.z;
m_vt->m_alpha.valid = true;
}
bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
{
const GSDrawingContext* context = m_context;
bool pass = true;
if(context->TEST.ATST == ATST_NEVER)
{
pass = false;
}
else if(context->TEST.ATST != ATST_ALWAYS)
{
GetAlphaMinMax();
int amin = m_vt->m_alpha.min;
int amax = m_vt->m_alpha.max;
int aref = context->TEST.AREF;
switch(context->TEST.ATST)
{
case ATST_NEVER:
pass = false;
break;
case ATST_ALWAYS:
pass = true;
break;
case ATST_LESS:
if(amax < aref) pass = true;
else if(amin >= aref) pass = false;
else return false;
break;
case ATST_LEQUAL:
if(amax <= aref) pass = true;
else if(amin > aref) pass = false;
else return false;
break;
case ATST_EQUAL:
if(amin == aref && amax == aref) pass = true;
else if(amin > aref || amax < aref) pass = false;
else return false;
break;
case ATST_GEQUAL:
if(amin >= aref) pass = true;
else if(amax < aref) pass = false;
else return false;
break;
case ATST_GREATER:
if(amin > aref) pass = true;
else if(amax <= aref) pass = false;
else return false;
break;
case ATST_NOTEQUAL:
if(amin == aref && amax == aref) pass = false;
else if(amin > aref || amax < aref) pass = true;
else return false;
break;
default:
__assume(0);
}
}
if(!pass)
{
switch(context->TEST.AFAIL)
{
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
default: __assume(0);
}
}
return true;
}
bool GSState::IsOpaque()
{
if(PRIM->AA1)
{
return false;
}
if(!PRIM->ABE)
{
return true;
}
const GSDrawingContext* context = m_context;
int amin = 0, amax = 0xff;
if(context->ALPHA.A != context->ALPHA.B)
{
if(context->ALPHA.C == 0)
{
GetAlphaMinMax();
amin = m_vt->m_alpha.min;
amax = m_vt->m_alpha.max;
}
else if(context->ALPHA.C == 1)
{
if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
{
amin = amax = 0x80;
}
}
else if(context->ALPHA.C == 2)
{
amin = amax = context->ALPHA.FIX;
}
}
return context->ALPHA.IsOpaque(amin, amax);
}
// GSTransferBuffer
GSState::GSTransferBuffer::GSTransferBuffer()

View File

@ -26,7 +26,7 @@
#include "GSDrawingContext.h"
#include "GSDrawingEnvironment.h"
#include "GSVertex.h"
#include "GSVertexList.h"
#include "GSVertexTrace.h"
#include "GSUtil.h"
#include "GSPerfMon.h"
#include "GSVector.h"
@ -126,49 +126,53 @@ class GSState : public GSAlignedClass<32>
} m_tr;
void FlushWrite();
protected:
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
typedef void (GSState::*VertexKickPtr)(bool skip);
GSVertex m_v;
float m_q;
struct {uint8* buff; size_t head, tail, maxcount, stride, n;} m_vertex;
struct {uint32* buff; size_t tail;} m_index;
VertexKickPtr m_vk[8][2][2];
VertexKickPtr m_vkf;
typedef void (GSState::*DrawingKickPtr)(uint32 skip);
typedef void (GSState::*ConvertVertexPtr)(void* RESTRICT vertex, size_t index);
#define InitVertexKick3(T, P, N, M) \
m_vk[P][N][M] = (VertexKickPtr)(void (T::*)(bool))&T::VertexKick<P, N, M>;
DrawingKickPtr m_dk[8], m_dkf;
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
#define InitVertexKick2(T, P) \
InitVertexKick3(T, P, 0, 0) \
InitVertexKick3(T, P, 0, 1) \
InitVertexKick3(T, P, 1, 0) \
InitVertexKick3(T, P, 1, 1) \
#define InitConvertVertex2(T, P) \
m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 0>; \
m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 1>; \
m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 0>; \
m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 1>; \
#define InitVertexKick(T) \
InitVertexKick2(T, GS_POINTLIST) \
InitVertexKick2(T, GS_LINELIST) \
InitVertexKick2(T, GS_LINESTRIP) \
InitVertexKick2(T, GS_TRIANGLELIST) \
InitVertexKick2(T, GS_TRIANGLESTRIP) \
InitVertexKick2(T, GS_TRIANGLEFAN) \
InitVertexKick2(T, GS_SPRITE) \
InitVertexKick2(T, GS_INVALID) \
#define InitConvertVertex(T) \
InitConvertVertex2(T, GS_POINTLIST) \
InitConvertVertex2(T, GS_LINELIST) \
InitConvertVertex2(T, GS_LINESTRIP) \
InitConvertVertex2(T, GS_TRIANGLELIST) \
InitConvertVertex2(T, GS_TRIANGLESTRIP) \
InitConvertVertex2(T, GS_TRIANGLEFAN) \
InitConvertVertex2(T, GS_SPRITE) \
InitConvertVertex2(T, GS_INVALID) \
void UpdateVertexKick()
{
m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST];
}
virtual void UpdateVertexKick();
void VertexKickNull(bool skip)
{
ASSERT(0);
}
void GrowVertexBuffer();
void VertexKick(bool skip)
{
(this->*m_vkf)(skip);
}
void VertexKick(uint32 skip);
template<uint32 prim>
void DrawingKick(uint32 skip);
// following functions need m_vt to be initialized
GSVertexTrace* m_vt;
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
bool IsOpaque();
public:
GIFPath m_path[4];
@ -177,10 +181,6 @@ public:
GSLocalMemory m_mem;
GSDrawingEnvironment m_env;
GSDrawingContext* m_context;
GSVertex m_v;
float m_q;
uint32 m_vprim;
GSPerfMon m_perfmon;
uint32 m_crc;
int m_options;
@ -190,7 +190,7 @@ public:
GSDump m_dump;
public:
GSState();
GSState(GSVertexTrace* vt, size_t vertex_stride);
virtual ~GSState();
void ResetHandlers();
@ -205,8 +205,9 @@ public:
virtual void Reset();
virtual void Flush();
virtual void FlushPrim() = 0;
virtual void ResetPrim() = 0;
virtual void FlushPrim();
virtual void FlushWrite();
virtual void Draw() = 0;
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {}

View File

@ -319,11 +319,11 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
GSVector4i r;
vector<uint32>* pages = o->GetPages(rect, &r);
const uint32* pages = o->GetPages(rect, &r);
bool found = false;
for(vector<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
uint32 page = *p;
@ -374,7 +374,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
}
}
delete pages;
delete [] pages;
if(!target) return;

View File

@ -74,18 +74,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
m_textures.insert(t);
for(vector<uint32>::const_iterator i = t->m_pages.n->begin(); i != t->m_pages.n->end(); i++)
for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
{
m_map[*i].push_front(t);
m_map[*p].push_front(t);
}
}
return t;
}
void GSTextureCacheSW::InvalidatePages(const vector<uint32>* pages, uint32 psm)
void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
{
for(vector<uint32>::const_iterator p = pages->begin(); p != pages->end(); p++)
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
{
uint32 page = *p;
@ -185,9 +185,9 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
for(vector<uint32>::const_iterator i = m_pages.n->begin(); i != m_pages.n->end(); i++)
for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++)
{
uint32 page = *i;
uint32 page = *p;
m_pages.bm[page >> 5] |= 1 << (page & 31);
}
@ -202,7 +202,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
GSTextureCacheSW::Texture::~Texture()
{
delete m_pages.n;
delete [] m_pages.n;
if(m_buff)
{

View File

@ -40,7 +40,7 @@ public:
bool m_repeating;
vector<GSVector2i>* m_p2t;
uint32 m_valid[MAX_PAGES];
struct {uint32 bm[16]; const vector<uint32>* n;} m_pages;
struct {uint32 bm[16]; const uint32* n;} m_pages;
// m_valid
// fast mode: each uint32 bits map to the 32 blocks of that page
@ -64,7 +64,7 @@ public:
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidatePages(const vector<uint32>* pages, uint32 psm);
void InvalidatePages(const uint32* pages, uint32 psm);
void RemoveAll();
void RemoveAt(Texture* t);

View File

@ -82,9 +82,10 @@ bool GSDevice11::CreateTextureFX()
return true;
}
void GSDevice11::SetupIA(const void* vertices, int count, int prim)
void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{
IASetVertexBuffer(vertices, sizeof(GSVertexHW11), count);
IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim);
}

View File

@ -61,9 +61,10 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
return t;
}
void GSDevice9::SetupIA(const void* vertices, int count, int prim)
void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{
IASetVertexBuffer(vertices, sizeof(GSVertexHW9), count);
IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim);
}

View File

@ -91,6 +91,7 @@ static class GSUtilMaps
{
public:
uint8 PrimClassField[8];
uint8 VertexCountField[8];
uint32 CompatibleBitsField[64][2];
uint32 SharedBitsField[64][2];
@ -105,6 +106,15 @@ public:
PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS;
PrimClassField[GS_INVALID] = GS_INVALID_CLASS;
VertexCountField[GS_POINTLIST] = 1;
VertexCountField[GS_LINELIST] = 2;
VertexCountField[GS_LINESTRIP] = 2;
VertexCountField[GS_TRIANGLELIST] = 3;
VertexCountField[GS_TRIANGLESTRIP] = 3;
VertexCountField[GS_TRIANGLEFAN] = 3;
VertexCountField[GS_SPRITE] = 2;
VertexCountField[GS_INVALID] = 1;
memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField));
for(int i = 0; i < 64; i++)
@ -146,6 +156,11 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim)
return (GS_PRIM_CLASS)s_maps.PrimClassField[prim];
}
int GSUtil::GetVertexCount(uint32 prim)
{
return s_maps.VertexCountField[prim];
}
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
{
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
@ -321,7 +336,7 @@ static bool DXUTDelayLoadDXGI()
bool GSUtil::CheckDirect3D11Level(D3D_FEATURE_LEVEL& level)
{
HRESULT hr;
HRESULT hr = S_OK;
level = (D3D_FEATURE_LEVEL)0;

View File

@ -29,6 +29,7 @@ public:
static const char* GetLibName();
static GS_PRIM_CLASS GetPrimClass(uint32 prim);
static int GetVertexCount(uint32 prim);
static bool HasSharedBits(uint32 spsm, uint32 dpsm);
static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm);

View File

@ -1900,7 +1900,7 @@ public:
d = f.uph64(d);
}
__forceinline static bool compare16(const void* dst, const void* src, int size)
__forceinline static bool compare16(const void* dst, const void* src, size_t size)
{
ASSERT((size & 15) == 0);
@ -1909,7 +1909,7 @@ public:
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
for(int i = 0; i < size; i++)
for(size_t i = 0; i < size; i++)
{
if(!d[i].eq(s[i]))
{
@ -1920,7 +1920,7 @@ public:
return true;
}
__forceinline static bool compare64(const void* dst, const void* src, int size)
__forceinline static bool compare64(const void* dst, const void* src, size_t size)
{
ASSERT((size & 63) == 0);
@ -1929,7 +1929,7 @@ public:
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
for(int i = 0; i < size; i += 4)
for(size_t i = 0; i < size; i += 4)
{
GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]);
GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]);
@ -1948,7 +1948,7 @@ public:
return true;
}
__forceinline static bool update(const void* dst, const void* src, int size)
__forceinline static bool update(const void* dst, const void* src, size_t size)
{
ASSERT((size & 15) == 0);
@ -1959,7 +1959,7 @@ public:
GSVector4i v = GSVector4i::xffffffff();
for(int i = 0; i < size; i++)
for(size_t i = 0; i < size; i++)
{
v &= d[i] == s[i];

View File

@ -30,15 +30,20 @@
__aligned(struct, 32) GSVertex
{
GIFRegST ST;
GIFRegRGBAQ RGBAQ;
GIFRegXYZ XYZ;
GIFRegFOG FOG;
GIFRegUV UV;
union
{
struct
{
GIFRegST ST;
GIFRegRGBAQ RGBAQ;
GIFRegXYZ XYZ;
union {GIFRegUV UV; GIFRegFOG FOG;}; // UV.u32[0] | FOG.u32[1]
};
GSVertex() {memset(this, 0, sizeof(*this));}
__m128i m[2];
};
GSVector4 GetUV() const {return GSVector4(GSVector4i::load(UV.u32[0]).upl16());}
void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];}
};
struct GSVertexP

View File

@ -35,16 +35,6 @@ __aligned(struct, 32) GSVertexHW9
// t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;}
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
float& _q() {return p.w;}
uint8& _r() {return t.u8[8];}
uint8& _g() {return t.u8[9];}
uint8& _b() {return t.u8[10];}
uint8& _a() {return t.u8[11];}
uint32& _c0() {return t.u32[2];}
uint32& _c1() {return t.u32[3];}
};
__aligned(union, 32) GSVertexHW11
@ -86,16 +76,6 @@ __aligned(union, 32) GSVertexHW11
return *this;
}
float& _q() {return q;}
uint8& _r() {return r;}
uint8& _g() {return g;}
uint8& _b() {return b;}
uint8& _a() {return a;}
uint32& _c0() {return c0;}
uint32& _c1() {return c1;}
};
#pragma pack(pop)

View File

@ -23,7 +23,7 @@
#include "GSVector.h"
__aligned(struct, 16) GSVertexSW
__aligned(struct, 32) GSVertexSW
{
GSVector4 p, t, c;

View File

@ -28,12 +28,60 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state)
, m_map_sw("VertexTraceSW", NULL)
, m_map_hw9("VertexTraceHW9", NULL)
, m_map_hw11("VertexTraceHW11", NULL)
{
}
void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
if(m_state->PRIM->TME)
{
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
m_filter.mmag = TEX1.IsMagLinear();
m_filter.mmin = TEX1.IsMinLinear();
if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
{
m_filter.linear = m_filter.mmag;
return;
}
float K = (float)TEX1.K / 16;
if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated
{
// LOD = log2(1/|Q|) * (1 << L) + K
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K);
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;}
}
else
{
m_lod.x = K;
m_lod.y = K;
}
if(m_lod.y <= 0)
{
m_filter.linear = m_filter.mmag;
}
else if(m_lod.x > 0)
{
m_filter.linear = m_filter.mmin;
}
else
{
m_filter.linear = m_filter.mmag | m_filter.mmin;
}
}
}
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
{
m_primclass = primclass;
@ -48,66 +96,28 @@ uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
return hash;
}
void GSVertexTrace::UpdateLOD()
GSVertexTraceSW::GSVertexTraceSW(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceSW", NULL)
{
if(!m_state->PRIM->TME) return;
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
m_filter.mmag = TEX1.IsMagLinear();
m_filter.mmin = TEX1.IsMinLinear();
if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
{
m_filter.linear = m_filter.mmag;
return;
}
float K = (float)TEX1.K / 16;
if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated
{
// LOD = log2(1/|Q|) * (1 << L) + K
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K);
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;}
}
else
{
m_lod.x = K;
m_lod.y = K;
}
if(m_lod.y <= 0)
{
m_filter.linear = m_filter.mmag;
}
else if(m_lod.x > 0)
{
m_filter.linear = m_filter.mmin;
}
else
{
m_filter.linear = m_filter.mmag | m_filter.mmin;
}
}
void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass)
void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map_sw[Hash(primclass)](count, v, m_min, m_max);
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
UpdateLOD();
GSVertexTrace::Update(vertex, index, count, primclass);
}
void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass)
GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW9", NULL)
{
m_map_hw9[Hash(primclass)](count, v, m_min, m_max);
}
void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
const GSDrawingContext* context = m_state->m_context;
@ -132,16 +142,18 @@ void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primcl
m_max.t *= s;
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
UpdateLOD();
GSVertexTrace::Update(vertex, index, count, primclass);
}
void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass)
GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW11", NULL)
{
m_map_hw11[Hash(primclass)](count, v, m_min, m_max);
}
void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
const GSDrawingContext* context = m_state->m_context;
@ -166,10 +178,6 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
m_max.t *= s;
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
UpdateLOD();
GSVertexTrace::Update(vertex, index, count, primclass);
}

View File

@ -29,42 +29,18 @@
class GSState;
__aligned(class, 32) GSVertexTrace
__aligned(class, 32) GSVertexTrace : public GSAlignedClass<32>
{
public:
struct Vertex {GSVector4i c; GSVector4 p, t;}; // t.xy * 0x10000
struct VertexAlpha {int min, max; bool valid;};
private:
typedef void (*VertexTracePtr)(int count, const void* v, Vertex& min, Vertex& max);
class CGSW : public GSCodeGenerator
{
public:
CGSW(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW9 : public GSCodeGenerator
{
public:
CGHW9(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW11 : public GSCodeGenerator
{
public:
CGHW11(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr> m_map_sw;
GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr> m_map_hw9;
GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr> m_map_hw11;
protected:
const GSState* m_state;
uint32 Hash(GS_PRIM_CLASS primclass);
void UpdateLOD();
typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max);
static const GSVector4 s_minmax;
@ -73,10 +49,7 @@ public:
Vertex m_min;
Vertex m_max;
// source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
VertexAlpha m_alpha;
VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
union
{
@ -92,12 +65,59 @@ public:
GSVector2 m_lod; // x = min, y = max
public:
GSVertexTrace(const GSState* state);
virtual ~GSVertexTrace() {}
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass);
void Update(const GSVertexNull* v, int count, GS_PRIM_CLASS primclass) {}
virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
bool IsLinear() const {return m_filter.linear;}
};
__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceSW(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX9(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX11(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};

View File

@ -27,12 +27,13 @@
using namespace Xbyak;
static const int _args = 0;
static const int _count = _args + 4; // rcx
static const int _v = _args + 8; // rdx
static const int _min = _args + 12; // r8
static const int _max = _args + 16; // r9
static const int _count = _args + 8; // rcx
static const int _vertex = _args + 12; // rdx
static const int _index = _args + 16; // r8
static const int _min = _args + 20; // r9
static const int _max = _args + 24; // _args + 4
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -83,7 +86,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -92,18 +96,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
vminps(xmm2, xmm0);
vmaxps(xmm3, xmm0);
@ -112,7 +122,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
@ -122,7 +132,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst)
{
@ -140,7 +150,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
}
}
add(edx, n * sizeof(GSVertexSW));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -170,10 +180,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -189,17 +201,17 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
case GS_POINT_CLASS:
n = 1;
break;
case GS_SPRITE_CLASS:
case GS_LINE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
case GS_SPRITE_CLASS:
n = 6;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -226,7 +238,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -235,16 +248,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
@ -256,7 +275,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
@ -287,7 +306,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
}
}
add(edx, n * sizeof(GSVertexHW9));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -330,10 +349,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -358,6 +379,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -384,7 +407,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -393,9 +417,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
vmovaps(xmm0, ptr[edx + eax]);
}
if(color && (iip || j == n - 1))
@ -424,7 +451,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmaxps(xmm7, xmm0);
}
vmovdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
vmovdqa(xmm0, ptr[edx + eax + 16]);
vpmovzxwd(xmm1, xmm0);
vpsrld(xmm0, 1);
@ -435,7 +462,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmaxps(xmm5, xmm1);
}
add(edx, n * sizeof(GSVertexHW11));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -478,6 +505,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}

View File

@ -27,12 +27,13 @@
using namespace Xbyak;
static const int _args = 0;
static const int _count = _args + 4; // rcx
static const int _v = _args + 8; // rdx
static const int _min = _args + 12; // r8
static const int _max = _args + 16; // r9
static const int _count = _args + 8; // rcx
static const int _vertex = _args + 12; // rdx
static const int _index = _args + 16; // r8
static const int _min = _args + 20; // r9
static const int _max = _args + 24; // _args + 4
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -86,7 +89,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -95,18 +99,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
minps(xmm2, xmm0);
maxps(xmm3, xmm0);
@ -115,7 +125,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
@ -125,7 +135,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst)
{
@ -144,7 +154,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
}
}
add(edx, n * sizeof(GSVertexSW));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -174,10 +184,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -204,6 +216,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -233,7 +247,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -242,16 +257,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
@ -264,7 +285,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
@ -295,7 +316,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
}
}
add(edx, n * sizeof(GSVertexHW9));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -351,10 +372,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
@ -379,6 +402,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
@ -408,7 +433,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
@ -417,9 +443,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
movaps(xmm0, ptr[edx + eax]);
}
if(color && (iip || j == n - 1))
@ -448,7 +477,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
maxps(xmm7, xmm0);
}
movdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
movdqa(xmm0, ptr[edx + eax + 16]);
if(m_cpu.has(util::Cpu::tSSE41))
{
@ -469,7 +498,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
maxps(xmm5, xmm1);
}
add(edx, n * sizeof(GSVertexHW11));
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
@ -525,6 +554,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}

View File

@ -529,7 +529,6 @@
<ClCompile Include="GSPerfMon.cpp" />
<ClCompile Include="GSRasterizer.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
</ClCompile>
<ClCompile Include="GSRenderer.cpp" />
<ClCompile Include="GSRendererDX.cpp" />

View File

@ -363,34 +363,43 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
__forceinline unsigned char _BitScanForward(unsigned long* const Index, const unsigned long Mask)
{
__asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
return Mask ? 1 : 0;
}
__forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b)
{
unsigned char retval;
__asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
return retval;
}
__forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value)
{
long retval = Value;
__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline long _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
{
long retval = Value;
__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}