mirror of https://github.com/PCSX2/pcsx2.git
GSdx: the promised index buffer update, needed a lot of changes, expect bugs in the next dozen revisions.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5045 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
bffde4fd5e
commit
f68f007f00
|
@ -114,22 +114,26 @@ void GPURendererSW::Draw()
|
|||
|
||||
gd.vm = m_mem.GetPixelAddress(0, 0);
|
||||
|
||||
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
|
||||
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count);
|
||||
data->count = m_count;
|
||||
|
||||
data->frame = m_perfmon.GetFrame();
|
||||
|
||||
data->scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
|
||||
data->scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
|
||||
data->scissor.right = min((int)(m_env.DRAREABR.X + 1) << m_scale.x, m_mem.GetWidth());
|
||||
data->scissor.bottom = min((int)(m_env.DRAREABR.Y + 1) << m_scale.y, m_mem.GetHeight());
|
||||
|
||||
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16);
|
||||
data->vertex = (GSVertexSW*)data->buff;
|
||||
data->vertex_count = m_count;
|
||||
|
||||
memcpy(data->vertex, m_vertices, sizeof(GSVertexSW) * m_count);
|
||||
|
||||
data->frame = m_perfmon.GetFrame();
|
||||
|
||||
int prims = 0;
|
||||
|
||||
switch(env.PRIM.TYPE)
|
||||
{
|
||||
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; break;
|
||||
case GPU_LINE: data->primclass = GS_LINE_CLASS; break;
|
||||
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; break;
|
||||
case GPU_POLYGON: data->primclass = GS_TRIANGLE_CLASS; prims = data->vertex_count / 3; break;
|
||||
case GPU_LINE: data->primclass = GS_LINE_CLASS; prims = data->vertex_count / 2; break;
|
||||
case GPU_SPRITE: data->primclass = GS_SPRITE_CLASS; prims = data->vertex_count / 2; break;
|
||||
default: __assume(0);
|
||||
}
|
||||
|
||||
|
@ -138,9 +142,9 @@ void GPURendererSW::Draw()
|
|||
GSVector4 tl(+1e10f);
|
||||
GSVector4 br(-1e10f);
|
||||
|
||||
GSVertexSW* v = data->vertices;
|
||||
GSVertexSW* v = data->vertex;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
for(int i = 0, j = data->vertex_count; i < j; i++)
|
||||
{
|
||||
GSVector4 p = v[i].p;
|
||||
|
||||
|
@ -163,9 +167,9 @@ void GPURendererSW::Draw()
|
|||
|
||||
m_rl->Sync();
|
||||
|
||||
// TODO: m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
// TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
// TODO: m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
m_perfmon.Put(GSPerfMon::Prim, prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, data->pixels);
|
||||
}
|
||||
|
||||
void GPURendererSW::VertexKick()
|
||||
|
|
|
@ -735,8 +735,6 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
|||
|
||||
::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS);
|
||||
|
||||
vector<uint8> buff;
|
||||
|
||||
if(FILE* fp = fopen(lpszCmdLine, "rb"))
|
||||
{
|
||||
Console console("GSdx", true);
|
||||
|
@ -769,10 +767,127 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
|||
|
||||
GSvsync(1);
|
||||
|
||||
struct Packet {uint8 type, param; uint32 size, addr; vector<uint8> buff;};
|
||||
|
||||
list<Packet*> packets;
|
||||
vector<uint8> buff;
|
||||
int type;
|
||||
|
||||
while((type = fgetc(fp)) != EOF)
|
||||
{
|
||||
Packet* p = new Packet();
|
||||
|
||||
p->type = (uint8)type;
|
||||
|
||||
switch(type)
|
||||
{
|
||||
case 0:
|
||||
|
||||
p->param = (uint8)fgetc(fp);
|
||||
|
||||
fread(&p->size, 4, 1, fp);
|
||||
|
||||
switch(p->param)
|
||||
{
|
||||
case 0:
|
||||
p->buff.resize(0x4000);
|
||||
p->addr = 0x4000 - p->size;
|
||||
fread(&p->buff[p->addr], p->size, 1, fp);
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
p->buff.resize(p->size);
|
||||
fread(&p->buff[0], p->size, 1, fp);
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 1:
|
||||
|
||||
p->param = (uint8)fgetc(fp);
|
||||
|
||||
break;
|
||||
|
||||
case 2:
|
||||
|
||||
fread(&p->size, 4, 1, fp);
|
||||
|
||||
break;
|
||||
|
||||
case 3:
|
||||
|
||||
p->buff.resize(0x2000);
|
||||
|
||||
fread(&p->buff[0], 0x2000, 1, fp);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
packets.push_back(p);
|
||||
}
|
||||
|
||||
Sleep(100);
|
||||
|
||||
while(IsWindowVisible(hWnd))
|
||||
{
|
||||
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
|
||||
{
|
||||
Packet* p = *i;
|
||||
|
||||
switch(p->type)
|
||||
{
|
||||
case 0:
|
||||
|
||||
switch(p->param)
|
||||
{
|
||||
case 0: GSgifTransfer1(&p->buff[0], p->addr); break;
|
||||
case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break;
|
||||
case 2: GSgifTransfer3(&p->buff[0], p->size / 16); break;
|
||||
case 3: GSgifTransfer(&p->buff[0], p->size / 16); break;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 1:
|
||||
|
||||
GSvsync(p->param);
|
||||
|
||||
break;
|
||||
|
||||
case 2:
|
||||
|
||||
if(buff.size() < p->size) buff.resize(p->size);
|
||||
|
||||
GSreadFIFO2(&buff[0], p->size / 16);
|
||||
|
||||
break;
|
||||
|
||||
case 3:
|
||||
|
||||
memcpy(regs, &p->buff[0], 0x2000);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(list<Packet*>::iterator i = packets.begin(); i != packets.end(); i++)
|
||||
{
|
||||
delete *i;
|
||||
}
|
||||
|
||||
packets.clear();
|
||||
|
||||
Sleep(100);
|
||||
|
||||
|
||||
/*
|
||||
bool exit = false;
|
||||
|
||||
int round = 0;
|
||||
|
||||
while(!exit)
|
||||
{
|
||||
uint32 index;
|
||||
|
@ -786,6 +901,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
|||
case EOF:
|
||||
fseek(fp, start, 0);
|
||||
exit = !IsWindowVisible(hWnd);
|
||||
//exit = ++round == 60;
|
||||
break;
|
||||
|
||||
case 0:
|
||||
|
@ -838,6 +954,7 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
|||
break;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
GSclose();
|
||||
GSshutdown();
|
||||
|
|
|
@ -638,8 +638,8 @@ REG64_(GIFReg, FINISH)
|
|||
REG_END
|
||||
|
||||
REG64_(GIFReg, FOG)
|
||||
uint8 _PAD1[4+3];
|
||||
uint8 F:8;
|
||||
uint8 _PAD1[7];
|
||||
uint8 F;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, FOGCOL)
|
||||
|
@ -1030,7 +1030,9 @@ REG128_(GIFPacked, XYZF2)
|
|||
uint32 _PAD6:3;
|
||||
uint32 ADC:1;
|
||||
uint32 _PAD7:16;
|
||||
REG_END
|
||||
REG_END2
|
||||
uint32 Skip() const {return u32[3] & 0x8000;}
|
||||
REG_END2
|
||||
|
||||
REG128_(GIFPacked, XYZ2)
|
||||
uint16 X;
|
||||
|
@ -1041,7 +1043,9 @@ REG128_(GIFPacked, XYZ2)
|
|||
uint32 _PAD3:15;
|
||||
uint32 ADC:1;
|
||||
uint32 _PAD4:16;
|
||||
REG_END
|
||||
REG_END2
|
||||
uint32 Skip() const {return u32[3] & 0x8000;}
|
||||
REG_END2
|
||||
|
||||
REG128_(GIFPacked, FOG)
|
||||
uint32 _PAD1;
|
||||
|
|
|
@ -35,7 +35,8 @@ GSDevice::GSDevice()
|
|||
, m_1x1(NULL)
|
||||
, m_frame(0)
|
||||
{
|
||||
memset(&m_vertices, 0, sizeof(m_vertices));
|
||||
memset(&m_vertex, 0, sizeof(m_vertex));
|
||||
memset(&m_index, 0, sizeof(m_index));
|
||||
}
|
||||
|
||||
GSDevice::~GSDevice()
|
||||
|
@ -135,8 +136,10 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, bool msaa, int format)
|
|||
|
||||
void GSDevice::EndScene()
|
||||
{
|
||||
m_vertices.start += m_vertices.count;
|
||||
m_vertices.count = 0;
|
||||
m_vertex.start += m_vertex.count;
|
||||
m_vertex.count = 0;
|
||||
m_index.start += m_index.count;
|
||||
m_index.count = 0;
|
||||
}
|
||||
|
||||
void GSDevice::Recycle(GSTexture* t)
|
||||
|
|
|
@ -72,7 +72,8 @@ protected:
|
|||
GSTexture* m_fxaa;
|
||||
GSTexture* m_1x1;
|
||||
GSTexture* m_current;
|
||||
struct {size_t stride, start, count, limit;} m_vertices;
|
||||
struct {size_t stride, start, count, limit;} m_vertex;
|
||||
struct {size_t start, count, limit;} m_index;
|
||||
unsigned int m_frame; // for ageing the pool
|
||||
|
||||
virtual GSTexture* CreateSurface(int type, int w, int h, bool msaa, int format) = 0;
|
||||
|
@ -101,6 +102,7 @@ public:
|
|||
|
||||
virtual void BeginScene() {}
|
||||
virtual void DrawPrimitive() {};
|
||||
virtual void DrawIndexedPrimitive() {}
|
||||
virtual void EndScene();
|
||||
|
||||
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}
|
||||
|
|
|
@ -352,7 +352,12 @@ void GSDevice11::Flip()
|
|||
|
||||
void GSDevice11::DrawPrimitive()
|
||||
{
|
||||
m_ctx->Draw(m_vertices.count, m_vertices.start);
|
||||
m_ctx->Draw(m_vertex.count, m_vertex.start);
|
||||
}
|
||||
|
||||
void GSDevice11::DrawIndexedPrimitive()
|
||||
{
|
||||
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
|
||||
}
|
||||
|
||||
void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
||||
|
@ -709,18 +714,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
|
||||
void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
|
||||
{
|
||||
ASSERT(m_vertices.count == 0);
|
||||
ASSERT(m_vertex.count == 0);
|
||||
|
||||
if(count * stride > m_vertices.limit * m_vertices.stride)
|
||||
if(count * stride > m_vertex.limit * m_vertex.stride)
|
||||
{
|
||||
m_vb_old = m_vb;
|
||||
m_vb = NULL;
|
||||
|
||||
m_vertices.start = 0;
|
||||
m_vertices.count = 0;
|
||||
m_vertices.limit = std::max<int>(count * 3 / 2, 11000);
|
||||
m_vertex.start = 0;
|
||||
m_vertex.count = 0;
|
||||
m_vertex.limit = std::max<int>(count * 3 / 2, 11000);
|
||||
}
|
||||
|
||||
if(m_vb == NULL)
|
||||
|
@ -730,7 +735,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
|
|||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.Usage = D3D11_USAGE_DYNAMIC;
|
||||
bd.ByteWidth = m_vertices.limit * stride;
|
||||
bd.ByteWidth = m_vertex.limit * stride;
|
||||
bd.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
|
||||
|
@ -743,9 +748,9 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
|
|||
|
||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||
|
||||
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
|
||||
if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
|
||||
{
|
||||
m_vertices.start = 0;
|
||||
m_vertex.start = 0;
|
||||
|
||||
type = D3D11_MAP_WRITE_DISCARD;
|
||||
}
|
||||
|
@ -754,13 +759,13 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c
|
|||
|
||||
if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m)))
|
||||
{
|
||||
GSVector4i::storent((uint8*)m.pData + m_vertices.start * stride, vertices, count * stride);
|
||||
GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride);
|
||||
|
||||
m_ctx->Unmap(m_vb, 0);
|
||||
}
|
||||
|
||||
m_vertices.count = count;
|
||||
m_vertices.stride = stride;
|
||||
m_vertex.count = count;
|
||||
m_vertex.stride = stride;
|
||||
|
||||
IASetVertexBuffer(m_vb, stride);
|
||||
}
|
||||
|
@ -779,6 +784,70 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
|
||||
{
|
||||
ASSERT(m_index.count == 0);
|
||||
|
||||
if(count > m_index.limit)
|
||||
{
|
||||
m_ib_old = m_ib;
|
||||
m_ib = NULL;
|
||||
|
||||
m_index.count = 0;
|
||||
m_index.limit = std::max<int>(count * 3 / 2, 11000);
|
||||
}
|
||||
|
||||
if(m_ib == NULL)
|
||||
{
|
||||
D3D11_BUFFER_DESC bd;
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.Usage = D3D11_USAGE_DYNAMIC;
|
||||
bd.ByteWidth = m_index.limit * sizeof(uint32);
|
||||
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
|
||||
HRESULT hr;
|
||||
|
||||
hr = m_dev->CreateBuffer(&bd, NULL, &m_ib);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
}
|
||||
|
||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||
|
||||
if(m_index.start + count > m_index.limit)
|
||||
{
|
||||
m_index.start = 0;
|
||||
|
||||
type = D3D11_MAP_WRITE_DISCARD;
|
||||
}
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE m;
|
||||
|
||||
if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m)))
|
||||
{
|
||||
memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32));
|
||||
|
||||
m_ctx->Unmap(m_ib, 0);
|
||||
}
|
||||
|
||||
m_index.count = count;
|
||||
|
||||
IASetIndexBuffer(m_ib);
|
||||
}
|
||||
|
||||
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib)
|
||||
{
|
||||
if(m_state.ib != ib)
|
||||
{
|
||||
m_state.ib = ib;
|
||||
|
||||
m_ctx->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
|
||||
{
|
||||
if(m_state.layout != layout)
|
||||
|
|
|
@ -45,6 +45,8 @@ class GSDevice11 : public GSDeviceDX
|
|||
CComPtr<IDXGISwapChain> m_swapchain;
|
||||
CComPtr<ID3D11Buffer> m_vb;
|
||||
CComPtr<ID3D11Buffer> m_vb_old;
|
||||
CComPtr<ID3D11Buffer> m_ib;
|
||||
CComPtr<ID3D11Buffer> m_ib_old;
|
||||
|
||||
bool m_srv_changed, m_ss_changed;
|
||||
|
||||
|
@ -52,6 +54,7 @@ class GSDevice11 : public GSDeviceDX
|
|||
{
|
||||
ID3D11Buffer* vb;
|
||||
size_t vb_stride;
|
||||
ID3D11Buffer* ib;
|
||||
ID3D11InputLayout* layout;
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology;
|
||||
ID3D11VertexShader* vs;
|
||||
|
@ -141,6 +144,7 @@ public:
|
|||
void SetExclusive(bool isExcl);
|
||||
|
||||
void DrawPrimitive();
|
||||
void DrawIndexedPrimitive();
|
||||
|
||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
|
||||
void ClearRenderTarget(GSTexture* t, uint32 c);
|
||||
|
@ -162,8 +166,10 @@ public:
|
|||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true);
|
||||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
|
||||
|
||||
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
|
||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||
void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void IASetIndexBuffer(ID3D11Buffer* ib);
|
||||
void IASetInputLayout(ID3D11InputLayout* layout);
|
||||
void IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology);
|
||||
void VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb);
|
||||
|
@ -176,7 +182,7 @@ public:
|
|||
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
||||
|
||||
void SetupIA(const void* vertices, int count, int prim);
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel);
|
||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||
|
|
|
@ -352,8 +352,10 @@ bool GSDevice9::Reset(int w, int h)
|
|||
m_vb = NULL;
|
||||
m_vb_old = NULL;
|
||||
|
||||
m_vertices.start = 0;
|
||||
m_vertices.count = 0;
|
||||
m_vertex.start = 0;
|
||||
m_vertex.count = 0;
|
||||
m_index.start = 0;
|
||||
m_index.count = 0;
|
||||
|
||||
if(m_state.vs_cb) _aligned_free(m_state.vs_cb);
|
||||
if(m_state.ps_cb) _aligned_free(m_state.ps_cb);
|
||||
|
@ -510,25 +512,52 @@ void GSDevice9::DrawPrimitive()
|
|||
|
||||
switch(m_state.topology)
|
||||
{
|
||||
case D3DPT_TRIANGLELIST:
|
||||
prims = m_vertices.count / 3;
|
||||
case D3DPT_POINTLIST:
|
||||
prims = m_vertex.count;
|
||||
break;
|
||||
case D3DPT_LINELIST:
|
||||
prims = m_vertices.count / 2;
|
||||
prims = m_vertex.count / 2;
|
||||
break;
|
||||
case D3DPT_POINTLIST:
|
||||
prims = m_vertices.count;
|
||||
case D3DPT_LINESTRIP:
|
||||
prims = m_vertex.count - 1;
|
||||
break;
|
||||
case D3DPT_TRIANGLELIST:
|
||||
prims = m_vertex.count / 3;
|
||||
break;
|
||||
case D3DPT_TRIANGLESTRIP:
|
||||
case D3DPT_TRIANGLEFAN:
|
||||
prims = m_vertices.count - 2;
|
||||
break;
|
||||
case D3DPT_LINESTRIP:
|
||||
prims = m_vertices.count - 1;
|
||||
prims = m_vertex.count - 2;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
m_dev->DrawPrimitive(m_state.topology, m_vertices.start, prims);
|
||||
m_dev->DrawPrimitive(m_state.topology, m_vertex.start, prims);
|
||||
}
|
||||
|
||||
void GSDevice9::DrawIndexedPrimitive()
|
||||
{
|
||||
int prims = 0;
|
||||
|
||||
switch(m_state.topology)
|
||||
{
|
||||
case D3DPT_POINTLIST:
|
||||
prims = m_index.count;
|
||||
break;
|
||||
case D3DPT_LINELIST:
|
||||
case D3DPT_LINESTRIP:
|
||||
prims = m_index.count / 2;
|
||||
break;
|
||||
case D3DPT_TRIANGLELIST:
|
||||
case D3DPT_TRIANGLESTRIP:
|
||||
case D3DPT_TRIANGLEFAN:
|
||||
prims = m_index.count / 3;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
m_dev->DrawIndexedPrimitive(m_state.topology, m_vertex.start, 0, m_index.count, m_index.start, prims);
|
||||
}
|
||||
|
||||
void GSDevice9::EndScene()
|
||||
|
@ -881,49 +910,49 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t count)
|
||||
void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
|
||||
{
|
||||
ASSERT(m_vertices.count == 0);
|
||||
ASSERT(m_vertex.count == 0);
|
||||
|
||||
if(count * stride > m_vertices.limit * m_vertices.stride)
|
||||
if(count * stride > m_vertex.limit * m_vertex.stride)
|
||||
{
|
||||
m_vb_old = m_vb;
|
||||
m_vb = NULL;
|
||||
|
||||
m_vertices.start = 0;
|
||||
m_vertices.count = 0;
|
||||
m_vertices.limit = std::max<int>(count * 3 / 2, 10000);
|
||||
m_vertex.start = 0;
|
||||
m_vertex.count = 0;
|
||||
m_vertex.limit = std::max<int>(count * 3 / 2, 10000);
|
||||
}
|
||||
|
||||
if(m_vb == NULL)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
|
||||
hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
}
|
||||
|
||||
uint32 flags = D3DLOCK_NOOVERWRITE;
|
||||
|
||||
if(m_vertices.start + count > m_vertices.limit || stride != m_vertices.stride)
|
||||
if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride)
|
||||
{
|
||||
m_vertices.start = 0;
|
||||
m_vertex.start = 0;
|
||||
|
||||
flags = D3DLOCK_DISCARD;
|
||||
}
|
||||
|
||||
void* v = NULL;
|
||||
void* ptr = NULL;
|
||||
|
||||
if(SUCCEEDED(m_vb->Lock(m_vertices.start * stride, count * stride, &v, flags)))
|
||||
if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags)))
|
||||
{
|
||||
GSVector4i::storent(v, vertices, count * stride);
|
||||
GSVector4i::storent(ptr, vertex, count * stride);
|
||||
|
||||
m_vb->Unlock();
|
||||
}
|
||||
|
||||
m_vertices.count = count;
|
||||
m_vertices.stride = stride;
|
||||
m_vertex.count = count;
|
||||
m_vertex.stride = stride;
|
||||
|
||||
IASetVertexBuffer(m_vb, stride);
|
||||
}
|
||||
|
@ -939,6 +968,61 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice9::IASetIndexBuffer(const void* index, size_t count)
|
||||
{
|
||||
ASSERT(m_index.count == 0);
|
||||
|
||||
if(count > m_index.limit)
|
||||
{
|
||||
m_ib_old = m_ib;
|
||||
m_ib = NULL;
|
||||
|
||||
m_index.count = 0;
|
||||
m_index.limit = std::max<int>(count * 3 / 2, 11000);
|
||||
}
|
||||
|
||||
if(m_ib == NULL)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
hr = m_dev->CreateIndexBuffer(m_index.limit * sizeof(uint32), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX32, D3DPOOL_DEFAULT, &m_ib, NULL);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
}
|
||||
|
||||
uint32 flags = D3DLOCK_NOOVERWRITE;
|
||||
|
||||
if(m_index.start + count > m_index.limit)
|
||||
{
|
||||
m_index.start = 0;
|
||||
|
||||
flags = D3DLOCK_DISCARD;
|
||||
}
|
||||
|
||||
void* ptr = NULL;
|
||||
|
||||
if(SUCCEEDED(m_ib->Lock(m_index.start * sizeof(uint32), count * sizeof(uint32), &ptr, flags)))
|
||||
{
|
||||
memcpy(ptr, index, count * sizeof(uint32));
|
||||
|
||||
m_ib->Unlock();
|
||||
}
|
||||
|
||||
m_index.count = count;
|
||||
|
||||
IASetIndexBuffer(m_ib);
|
||||
}
|
||||
|
||||
void GSDevice9::IASetIndexBuffer(IDirect3DIndexBuffer9* ib)
|
||||
{
|
||||
if(m_state.ib != ib)
|
||||
{
|
||||
m_state.ib = ib;
|
||||
|
||||
m_dev->SetIndices(ib);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout)
|
||||
{
|
||||
if(m_state.layout != layout)
|
||||
|
|
|
@ -82,6 +82,8 @@ class GSDevice9 : public GSDeviceDX
|
|||
CComPtr<IDirect3DSwapChain9> m_swapchain;
|
||||
CComPtr<IDirect3DVertexBuffer9> m_vb;
|
||||
CComPtr<IDirect3DVertexBuffer9> m_vb_old;
|
||||
CComPtr<IDirect3DIndexBuffer9> m_ib;
|
||||
CComPtr<IDirect3DIndexBuffer9> m_ib_old;
|
||||
bool m_lost;
|
||||
D3DFORMAT m_depth_format;
|
||||
|
||||
|
@ -89,6 +91,7 @@ class GSDevice9 : public GSDeviceDX
|
|||
{
|
||||
IDirect3DVertexBuffer9* vb;
|
||||
size_t vb_stride;
|
||||
IDirect3DIndexBuffer9* ib;
|
||||
IDirect3DVertexDeclaration9* layout;
|
||||
D3DPRIMITIVETYPE topology;
|
||||
IDirect3DVertexShader9* vs;
|
||||
|
@ -169,6 +172,7 @@ public:
|
|||
|
||||
void BeginScene();
|
||||
void DrawPrimitive();
|
||||
void DrawIndexedPrimitive();
|
||||
void EndScene();
|
||||
|
||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
|
||||
|
@ -191,8 +195,10 @@ public:
|
|||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, bool linear = true);
|
||||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
|
||||
|
||||
void IASetVertexBuffer(const void* vertices, size_t stride, size_t count);
|
||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void IASetIndexBuffer(IDirect3DIndexBuffer9* ib);
|
||||
void IASetInputLayout(IDirect3DVertexDeclaration9* layout);
|
||||
void IASetPrimitiveTopology(D3DPRIMITIVETYPE topology);
|
||||
void VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len);
|
||||
|
@ -210,7 +216,7 @@ public:
|
|||
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il);
|
||||
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps);
|
||||
|
||||
void SetupIA(const void* vertices, int count, int prim);
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel) {}
|
||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||
|
|
|
@ -278,7 +278,7 @@ public:
|
|||
|
||||
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
|
||||
|
||||
virtual void SetupIA(const void* vertices, int count, int prim) = 0;
|
||||
virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
|
||||
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
|
||||
virtual void SetupGS(GSSelector sel) = 0;
|
||||
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;
|
||||
|
|
|
@ -87,7 +87,7 @@ void GSDrawScanline::BeginDraw(const void* param)
|
|||
sel.tcc = m_global.sel.tcc;
|
||||
sel.fst = m_global.sel.fst;
|
||||
sel.fge = m_global.sel.fge;
|
||||
sel.sprite = m_global.sel.sprite;
|
||||
sel.prim = m_global.sel.prim;
|
||||
sel.fb = m_global.sel.fb;
|
||||
sel.zb = m_global.sel.zb;
|
||||
sel.zoverflow = m_global.sel.zoverflow;
|
||||
|
@ -102,7 +102,7 @@ void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int pixels)
|
|||
|
||||
#ifndef ENABLE_JIT_RASTERIZER
|
||||
|
||||
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
|
||||
void GSDrawScanline::SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan)
|
||||
{
|
||||
GSScanlineSelector sel = m_global.sel;
|
||||
|
||||
|
@ -115,7 +115,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
|
|||
|
||||
if(has_z || has_f)
|
||||
{
|
||||
if(!sel.sprite)
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(has_f)
|
||||
{
|
||||
|
@ -145,12 +145,12 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
|
|||
{
|
||||
if(has_f)
|
||||
{
|
||||
m_local.p.f = GSVector4i(vertices[0].p).zzzzh().zzzz();
|
||||
m_local.p.f = GSVector4i(vertex.p).zzzzh().zzzz();
|
||||
}
|
||||
|
||||
if(has_z)
|
||||
{
|
||||
m_local.p.z = vertices[0].t.u32[3]; // uint32 z is bypassed in t.w
|
||||
m_local.p.z = vertex.t.u32[3]; // uint32 z is bypassed in t.w
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -234,7 +234,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
|
|||
}
|
||||
else
|
||||
{
|
||||
GSVector4i c = GSVector4i(vertices[0].c);
|
||||
GSVector4i c = GSVector4i(vertex.c);
|
||||
|
||||
c = c.upl16(c.zwxy());
|
||||
|
||||
|
@ -271,7 +271,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
if(!sel.sprite)
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(sel.fwrite && sel.fge)
|
||||
{
|
||||
|
@ -300,7 +300,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s);
|
||||
GSVector4i v = vt.yyyy();
|
||||
|
||||
if(!sel.sprite || sel.mmin)
|
||||
if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
|
||||
{
|
||||
v += GSVector4i::cast(m_local.d[skip].t);
|
||||
}
|
||||
|
@ -354,7 +354,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
{
|
||||
za = fza_base->y + fza_offset->y;
|
||||
|
||||
if(!sel.sprite)
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
GSVector4 z = scan.p.zzzz() + zo;
|
||||
|
||||
|
@ -754,7 +754,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
{
|
||||
uf = u.xxzzlh().srl16(1);
|
||||
|
||||
if(!sel.sprite)
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
vf = v.xxzzlh().srl16(1);
|
||||
}
|
||||
|
@ -936,7 +936,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
if(sel.fwrite && sel.fge)
|
||||
{
|
||||
GSVector4i fog = !sel.sprite ? f : m_local.p.f;
|
||||
GSVector4i fog = sel.prim != GS_SPRITE_CLASS ? f : m_local.p.f;
|
||||
|
||||
rb = m_global.frb.lerp16<0>(rb, fog);
|
||||
ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga);
|
||||
|
@ -1211,7 +1211,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
fza_offset++;
|
||||
|
||||
if(!sel.sprite)
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(sel.zb)
|
||||
{
|
||||
|
@ -1234,7 +1234,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx());
|
||||
|
||||
if(!sel.sprite || sel.mmin)
|
||||
if(sel.prim != GS_SPRITE_CLASS || sel.mmin)
|
||||
{
|
||||
t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy());
|
||||
}
|
||||
|
|
|
@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
lea(edi, ptr[ebx * 2]);
|
||||
add(edi, ptr[&m_local.gd->fzbc]);
|
||||
|
||||
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||
if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||
{
|
||||
// edx = &m_local.d[skip]
|
||||
|
||||
|
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
mov(ebx, ptr[esp + _v]);
|
||||
}
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
|
||||
{
|
||||
|
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
|
||||
if(!m_sel.sprite || m_sel.mmin)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
|
||||
{
|
||||
vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
}
|
||||
|
@ -455,7 +455,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
add(edi, 8);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// z += m_local.d4.z;
|
||||
|
||||
|
@ -501,7 +501,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
vpaddd(xmm2, ptr[&m_local.temp.s]);
|
||||
vmovdqa(ptr[&m_local.temp.s], xmm2);
|
||||
|
||||
if(!m_sel.sprite || m_sel.mmin)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
|
||||
{
|
||||
vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vpaddd(xmm3, ptr[&m_local.temp.t]);
|
||||
|
@ -597,7 +597,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
// GSVector4i zs = zi;
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
|
@ -733,7 +733,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
vpsrlw(xmm0, 1);
|
||||
vmovdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
|
@ -2227,7 +2227,7 @@ void GSDrawScanlineCodeGenerator::Fog()
|
|||
// rb = m_local.gd->frb.lerp16<0>(rb, f);
|
||||
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
|
||||
|
||||
vmovdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]);
|
||||
vmovdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
|
||||
vmovdqa(xmm1, xmm6);
|
||||
|
||||
vmovdqa(xmm2, ptr[&m_local.gd->frb]);
|
||||
|
@ -2350,7 +2350,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
|
||||
bool fast = m_sel.ztest && m_sel.zpsm < 2;
|
||||
|
||||
vmovdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]);
|
||||
vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
|
||||
|
||||
if(fast)
|
||||
{
|
||||
|
|
|
@ -287,7 +287,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
lea(edi, ptr[ebx * 2]);
|
||||
add(edi, ptr[&m_local.gd->fzbc]);
|
||||
|
||||
if(!m_sel.sprite && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||
if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||
{
|
||||
// edx = &m_local.d[skip]
|
||||
|
||||
|
@ -298,7 +298,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
mov(ebx, ptr[esp + _v]);
|
||||
}
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(m_sel.fwrite && m_sel.fge || m_sel.zb)
|
||||
{
|
||||
|
@ -370,7 +370,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
|
||||
if(!m_sel.sprite || m_sel.mmin)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
|
||||
{
|
||||
paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
}
|
||||
|
@ -458,7 +458,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
add(edi, 8);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// z += m_local.d4.z;
|
||||
|
||||
|
@ -504,7 +504,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
paddd(xmm2, ptr[&m_local.temp.s]);
|
||||
movdqa(ptr[&m_local.temp.s], xmm2);
|
||||
|
||||
if(!m_sel.sprite || m_sel.mmin)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
|
||||
{
|
||||
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
paddd(xmm3, ptr[&m_local.temp.t]);
|
||||
|
@ -602,7 +602,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
// GSVector4i zs = zi;
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
|
@ -738,7 +738,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
psrlw(xmm0, 1);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
|
@ -2341,7 +2341,7 @@ void GSDrawScanlineCodeGenerator::Fog()
|
|||
// rb = m_local.gd->frb.lerp16<0>(rb, f);
|
||||
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
|
||||
|
||||
movdqa(xmm0, ptr[!m_sel.sprite ? &m_local.temp.f : &m_local.p.f]);
|
||||
movdqa(xmm0, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.f : &m_local.p.f]);
|
||||
movdqa(xmm1, xmm6);
|
||||
|
||||
movdqa(xmm2, ptr[&m_local.gd->frb]);
|
||||
|
@ -2464,7 +2464,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
|
||||
bool fast = m_sel.ztest && m_sel.zpsm < 2;
|
||||
|
||||
movdqa(xmm1, ptr[!m_sel.sprite ? &m_local.temp.zs : &m_local.p.z]);
|
||||
movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
|
||||
|
||||
if(fast)
|
||||
{
|
||||
|
|
|
@ -24,10 +24,7 @@
|
|||
|
||||
GSDump::GSDump()
|
||||
: m_gs(NULL)
|
||||
, m_obj(NULL)
|
||||
, m_frames(0)
|
||||
, m_objects(0)
|
||||
, m_vertices(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -39,11 +36,8 @@ GSDump::~GSDump()
|
|||
void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs)
|
||||
{
|
||||
m_gs = fopen((fn + ".gs").c_str(), "wb");
|
||||
m_obj = fopen((fn + ".obj").c_str(), "wt");
|
||||
|
||||
m_frames = 0;
|
||||
m_objects = 0;
|
||||
m_vertices = 0;
|
||||
|
||||
if(m_gs)
|
||||
{
|
||||
|
@ -57,7 +51,6 @@ void GSDump::Open(const string& fn, uint32 crc, const GSFreezeData& fd, const GS
|
|||
void GSDump::Close()
|
||||
{
|
||||
if(m_gs) {fclose(m_gs); m_gs = NULL;}
|
||||
if(m_obj) {fclose(m_obj); m_obj = NULL;}
|
||||
}
|
||||
|
||||
void GSDump::Transfer(int index, const uint8* mem, size_t size)
|
||||
|
@ -96,67 +89,3 @@ void GSDump::VSync(int field, bool last, const GSPrivRegSet* regs)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSDump::Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
if(m_obj)
|
||||
{
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
|
||||
// TODO
|
||||
|
||||
break;
|
||||
|
||||
case GS_LINE_CLASS:
|
||||
|
||||
// TODO
|
||||
|
||||
break;
|
||||
|
||||
case GS_TRIANGLE_CLASS:
|
||||
|
||||
for(int i = 0; i < count; i++)
|
||||
{
|
||||
float x = vertices[i].p.x;
|
||||
float y = vertices[i].p.y;
|
||||
float z = vertices[i].p.z;
|
||||
|
||||
fprintf(m_obj, "v %f %f %f\n", x, y, z);
|
||||
}
|
||||
|
||||
for(int i = 0; i < count; i++)
|
||||
{
|
||||
fprintf(m_obj, "vt %f %f %f\n", vertices[i].t.x, vertices[i].t.y, vertices[i].t.z);
|
||||
}
|
||||
|
||||
for(int i = 0; i < count; i++)
|
||||
{
|
||||
fprintf(m_obj, "vn %f %f %f\n", 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
fprintf(m_obj, "g f%d_o%d_p%d_v%d\n", m_frames, m_objects, primclass, count);
|
||||
|
||||
for(int i = 0; i < count; i += 3)
|
||||
{
|
||||
int a = m_vertices + i + 1;
|
||||
int b = m_vertices + i + 2;
|
||||
int c = m_vertices + i + 3;
|
||||
|
||||
fprintf(m_obj, "f %d/%d/%d %d/%d/%d %d/%d/%d\n", a, a, a, b, b, b, c, c, c);
|
||||
}
|
||||
|
||||
m_vertices += count;
|
||||
m_objects++;
|
||||
|
||||
break;
|
||||
|
||||
case GS_SPRITE_CLASS:
|
||||
|
||||
// TODO
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,10 +46,7 @@ Regs data (id == 3)
|
|||
class GSDump
|
||||
{
|
||||
FILE* m_gs;
|
||||
FILE* m_obj;
|
||||
int m_frames;
|
||||
int m_objects;
|
||||
int m_vertices;
|
||||
|
||||
public:
|
||||
GSDump();
|
||||
|
@ -60,6 +57,5 @@ public:
|
|||
void ReadFIFO(uint32 size);
|
||||
void Transfer(int index, const uint8* mem, size_t size);
|
||||
void VSync(int field, bool last, const GSPrivRegSet* regs);
|
||||
void Object(GSVertexSW* vertices, int count, GS_PRIM_CLASS primclass);
|
||||
operator bool() {return m_gs != NULL;}
|
||||
};
|
||||
|
|
|
@ -1992,7 +1992,7 @@ GSOffset::~GSOffset()
|
|||
{
|
||||
}
|
||||
|
||||
vector<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
|
||||
uint32* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
|
||||
{
|
||||
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||
|
||||
|
@ -2000,23 +2000,37 @@ vector<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
|
|||
|
||||
if(bbox != NULL) *bbox = r;
|
||||
|
||||
vector<uint32>* pages = new vector<uint32>();
|
||||
// worst case:
|
||||
// bp page-aligned: (w * h) / (64 * 32)
|
||||
// bp block-aligned: (w * h) / (8 * 8)
|
||||
|
||||
// 32-bpp worst case: (w * h) / (64 * 32), it can be a bit more if we are only block-aligned (bp & 31) != 0
|
||||
int size = r.width() * r.height();
|
||||
|
||||
pages->reserve(((r.width() * r.height()) >> 11) + 2);
|
||||
int limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
|
||||
|
||||
uint32 tmp[16];
|
||||
uint32* pages = new uint32[limit];
|
||||
|
||||
memset(tmp, 0, sizeof(tmp));
|
||||
__aligned(uint32, 16) tmp[16];
|
||||
|
||||
((GSVector4i*)tmp)[0] = GSVector4i::zero();
|
||||
((GSVector4i*)tmp)[1] = GSVector4i::zero();
|
||||
((GSVector4i*)tmp)[2] = GSVector4i::zero();
|
||||
((GSVector4i*)tmp)[3] = GSVector4i::zero();
|
||||
|
||||
r = r.sra32(3);
|
||||
|
||||
bs.x >>= 3;
|
||||
bs.y >>= 3;
|
||||
|
||||
uint32* RESTRICT p = pages;
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = block.row[y >> 3];
|
||||
uint32 base = block.row[y];
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 n = (base + block.col[x >> 3]) >> 5;
|
||||
uint32 n = (base + block.col[x]) >> 5;
|
||||
|
||||
if(n < MAX_PAGES)
|
||||
{
|
||||
|
@ -2027,11 +2041,15 @@ vector<uint32>* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
|
|||
{
|
||||
row |= col;
|
||||
|
||||
pages->push_back(n);
|
||||
*p++ = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*p++ = EOP;
|
||||
|
||||
ASSERT(p - pages <= limit);
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
|
|
@ -51,7 +51,9 @@ public:
|
|||
GSOffset(uint32 bp, uint32 bw, uint32 psm);
|
||||
virtual ~GSOffset();
|
||||
|
||||
vector<uint32>* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
|
||||
enum {EOP = 0xffffffff};
|
||||
|
||||
uint32* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
|
||||
};
|
||||
|
||||
struct GSPixelOffset4
|
||||
|
|
|
@ -35,7 +35,7 @@ public:
|
|||
|
||||
enum counter_t
|
||||
{
|
||||
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad,
|
||||
Frame, Prim, PrimNotRendered, Draw, Swizzle, Unswizzle, Fillrate, Quad,
|
||||
CounterLast,
|
||||
};
|
||||
|
||||
|
|
|
@ -105,12 +105,17 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
|||
{
|
||||
GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id);
|
||||
|
||||
if(data->count == 0) return;
|
||||
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
||||
|
||||
m_ds->BeginDraw(data->param);
|
||||
|
||||
const GSVertexSW* vertices = data->vertices;
|
||||
const GSVertexSW* vertices_end = data->vertices + data->count;
|
||||
const GSVertexSW* vertex = data->vertex;
|
||||
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
|
||||
|
||||
const uint32* index = data->index;
|
||||
const uint32* index_end = data->index + data->index_count;
|
||||
|
||||
uint32 tmp_index[] = {0, 1, 2};
|
||||
|
||||
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
|
||||
|
||||
|
@ -128,33 +133,57 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
|||
|
||||
if(scissor_test)
|
||||
{
|
||||
DrawPoint<true>(vertices, data->count);
|
||||
DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawPoint<false>(vertices, data->count);
|
||||
DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case GS_LINE_CLASS:
|
||||
|
||||
do {DrawLine(vertices); vertices += 2;}
|
||||
while(vertices < vertices_end);
|
||||
if(index != NULL)
|
||||
{
|
||||
do {DrawLine(vertex, index); index += 2;}
|
||||
while(index < index_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
do {DrawLine(vertex, tmp_index); vertex += 2;}
|
||||
while(vertex < vertex_end);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case GS_TRIANGLE_CLASS:
|
||||
|
||||
do {DrawTriangle(vertices); vertices += 3;}
|
||||
while(vertices < vertices_end);
|
||||
if(index != NULL)
|
||||
{
|
||||
do {DrawTriangle(vertex, index); index += 3;}
|
||||
while(index < index_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
do {DrawTriangle(vertex, tmp_index); vertex += 3;}
|
||||
while(vertex < vertex_end);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case GS_SPRITE_CLASS:
|
||||
|
||||
do {DrawSprite(vertices, data->solidrect); vertices += 2;}
|
||||
while(vertices < vertices_end);
|
||||
if(index != NULL)
|
||||
{
|
||||
do {DrawSprite(vertex, index, data->solidrect); index += 2;}
|
||||
while(index < index_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
do {DrawSprite(vertex, tmp_index, data->solidrect); vertex += 2;}
|
||||
while(vertex < vertex_end);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
|
@ -171,11 +200,13 @@ void GSRasterizer::Draw(shared_ptr<GSRasterizerData> data)
|
|||
}
|
||||
|
||||
template<bool scissor_test>
|
||||
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
|
||||
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
|
||||
{
|
||||
for(; count > 0; count--, v++)
|
||||
for(int i = 0, count = index != NULL ? index_count : vertex_count; i < count; i++)
|
||||
{
|
||||
GSVector4i p(v->p);
|
||||
const GSVertexSW& v = vertex[index != NULL ? index[i] : i];
|
||||
|
||||
GSVector4i p(v.p);
|
||||
|
||||
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
||||
{
|
||||
|
@ -183,17 +214,20 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
|
|||
{
|
||||
m_pixels++;
|
||||
|
||||
m_ds->SetupPrim(v, *v);
|
||||
m_ds->SetupPrim(v, v);
|
||||
|
||||
m_ds->DrawScanline(1, p.x, p.y, *v);
|
||||
m_ds->DrawScanline(1, p.x, p.y, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawLine(const GSVertexSW* v)
|
||||
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
|
||||
{
|
||||
GSVertexSW dv = v[1] - v[0];
|
||||
const GSVertexSW& v0 = vertex[index[0]];
|
||||
const GSVertexSW& v1 = vertex[index[1]];
|
||||
|
||||
GSVertexSW dv = v1 - v0;
|
||||
|
||||
GSVector4 dp = dv.p.abs();
|
||||
|
||||
|
@ -201,10 +235,10 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
if(m_ds->HasEdge())
|
||||
{
|
||||
DrawEdge(v[0], v[1], dv, i, 0);
|
||||
DrawEdge(v[0], v[1], dv, i, 1);
|
||||
DrawEdge(v0, v1, dv, i, 0);
|
||||
DrawEdge(v0, v1, dv, i, 1);
|
||||
|
||||
Flush(v, GSVertexSW::zero(), true);
|
||||
Flush(v1, GSVertexSW::zero(), true);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -217,19 +251,19 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
{
|
||||
// shortcut for horizontal lines
|
||||
|
||||
GSVector4 mask = (v[0].p > v[1].p).xxxx();
|
||||
GSVector4 mask = (v0.p > v1.p).xxxx();
|
||||
|
||||
GSVertexSW scan;
|
||||
|
||||
scan.p = v[0].p.blend32(v[1].p, mask);
|
||||
scan.t = v[0].t.blend32(v[1].t, mask);
|
||||
scan.c = v[0].c.blend32(v[1].c, mask);
|
||||
scan.p = v0.p.blend32(v1.p, mask);
|
||||
scan.t = v0.t.blend32(v1.t, mask);
|
||||
scan.c = v0.c.blend32(v1.c, mask);
|
||||
|
||||
GSVector4i p(scan.p);
|
||||
|
||||
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
|
||||
{
|
||||
GSVector4 lrf = scan.p.upl(v[1].p.blend32(v[0].p, mask)).ceil();
|
||||
GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil();
|
||||
GSVector4 l = lrf.max(m_fscissor_x);
|
||||
GSVector4 r = lrf.min(m_fscissor_x);
|
||||
GSVector4i lr = GSVector4i(l.xxyy(r));
|
||||
|
@ -247,7 +281,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
scan += dscan * (l - scan.p).xxxx();
|
||||
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
m_ds->SetupPrim(v1, dscan);
|
||||
|
||||
m_ds->DrawScanline(pixels, left, p.y, scan);
|
||||
}
|
||||
|
@ -261,7 +295,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
if(steps > 0)
|
||||
{
|
||||
GSVertexSW edge = v[0];
|
||||
GSVertexSW edge = v0;
|
||||
GSVertexSW dedge = dv / GSVector4(dp.v[i]);
|
||||
|
||||
GSVertexSW* RESTRICT e = m_edge.buff;
|
||||
|
@ -287,7 +321,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
m_edge.count = e - m_edge.buff;
|
||||
|
||||
Flush(v, GSVertexSW::zero());
|
||||
Flush(v1, GSVertexSW::zero());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -303,42 +337,47 @@ static const uint8 s_ysort[8][4] =
|
|||
{2, 1, 0, 0}, // y2 < y1 < y0
|
||||
};
|
||||
|
||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
|
||||
{
|
||||
GSVertexSW v[3];
|
||||
GSVertexSW dv[3];
|
||||
GSVertexSW edge;
|
||||
GSVertexSW dedge;
|
||||
GSVertexSW dscan;
|
||||
|
||||
GSVector4 y0011 = vertices[0].p.yyyy(vertices[1].p);
|
||||
GSVector4 y1221 = vertices[1].p.yyyy(vertices[2].p).xzzx();
|
||||
GSVector4 y0011 = vertex[index[0]].p.yyyy(vertex[index[1]].p);
|
||||
GSVector4 y1221 = vertex[index[1]].p.yyyy(vertex[index[2]].p).xzzx();
|
||||
|
||||
int mask = (y0011 > y1221).mask() & 7;
|
||||
int m1 = (y0011 > y1221).mask() & 7;
|
||||
|
||||
v[0] = vertices[s_ysort[mask][0]];
|
||||
v[1] = vertices[s_ysort[mask][1]];
|
||||
v[2] = vertices[s_ysort[mask][2]];
|
||||
int i[3];
|
||||
|
||||
y0011 = v[0].p.yyyy(v[1].p);
|
||||
y1221 = v[1].p.yyyy(v[2].p).xzzx();
|
||||
i[0] = index[s_ysort[m1][0]];
|
||||
i[1] = index[s_ysort[m1][1]];
|
||||
i[2] = index[s_ysort[m1][2]];
|
||||
|
||||
int i = (y0011 == y1221).mask() & 7;
|
||||
const GSVertexSW& v0 = vertex[i[0]];
|
||||
const GSVertexSW& v1 = vertex[i[1]];
|
||||
const GSVertexSW& v2 = vertex[i[2]];
|
||||
|
||||
y0011 = v0.p.yyyy(v1.p);
|
||||
y1221 = v1.p.yyyy(v2.p).xzzx();
|
||||
|
||||
m1 = (y0011 == y1221).mask() & 7;
|
||||
|
||||
// if(i == 0) => y0 < y1 < y2
|
||||
// if(i == 1) => y0 == y1 < y2
|
||||
// if(i == 4) => y0 < y1 == y2
|
||||
|
||||
if(i == 7) return; // y0 == y1 == y2
|
||||
if(m1 == 7) return; // y0 == y1 == y2
|
||||
|
||||
GSVector4 tbf = y0011.xzxz(y1221).ceil();
|
||||
GSVector4 tbmax = tbf.max(m_fscissor_y);
|
||||
GSVector4 tbmin = tbf.min(m_fscissor_y);
|
||||
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
|
||||
|
||||
dv[0] = v[1] - v[0];
|
||||
dv[1] = v[2] - v[0];
|
||||
dv[2] = v[2] - v[1];
|
||||
dv[0] = v1 - v0;
|
||||
dv[1] = v2 - v0;
|
||||
dv[2] = v2 - v1;
|
||||
|
||||
GSVector4 cross = dv[0].p * dv[1].p.yxwz();
|
||||
|
||||
|
@ -346,11 +385,11 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
|
||||
// the longest horizontal span would be cross.x / dv[1].p.y, but we don't need its actual value
|
||||
|
||||
int j = cross.upl(cross == GSVector4::zero()).mask();
|
||||
int m2 = cross.upl(cross == GSVector4::zero()).mask();
|
||||
|
||||
if(j & 2) return;
|
||||
if(m2 & 2) return;
|
||||
|
||||
j &= 1;
|
||||
m2 &= 1;
|
||||
|
||||
cross = cross.rcpnr();
|
||||
|
||||
|
@ -390,42 +429,42 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
|
||||
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
|
||||
|
||||
if(i & 1)
|
||||
if(m1 & 1)
|
||||
{
|
||||
if(tb.y < tb.w)
|
||||
{
|
||||
edge = v[1 - j];
|
||||
edge = vertex[i[1 - m2]];
|
||||
|
||||
edge.p = edge.p.insert<0, 1>(v[j].p);
|
||||
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
|
||||
edge.p = edge.p.insert<0, 1>(vertex[i[m2]].p);
|
||||
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
|
||||
|
||||
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p);
|
||||
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(tb.x < tb.z)
|
||||
{
|
||||
edge = v[0];
|
||||
edge = v0;
|
||||
|
||||
edge.p = edge.p.xxzw();
|
||||
dedge.p = ddx[j].xyzw(dedge.p);
|
||||
dedge.p = ddx[m2].xyzw(dedge.p);
|
||||
|
||||
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v[0].p);
|
||||
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
|
||||
}
|
||||
|
||||
if(tb.y < tb.w)
|
||||
{
|
||||
edge = v[1];
|
||||
edge = v1;
|
||||
|
||||
edge.p = (v[0].p.xxxx() + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p);
|
||||
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
|
||||
edge.p = (v0.p.xxxx() + ddx[m2] * dv[0].p.yyyy()).xyzw(edge.p);
|
||||
dedge.p = ddx[2 - (m2 << 1)].yzzw(dedge.p);
|
||||
|
||||
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p);
|
||||
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
|
||||
}
|
||||
}
|
||||
|
||||
Flush(v, dscan);
|
||||
Flush(vertex[index[2]], dscan);
|
||||
|
||||
if(m_ds->HasEdge())
|
||||
{
|
||||
|
@ -433,14 +472,14 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
GSVector4 b = dx < GSVector4::zero(); // dx < 0
|
||||
GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0
|
||||
|
||||
int i = a.mask();
|
||||
int j = ((a | b) ^ c).mask() ^ 2; // evil
|
||||
int orientation = a.mask();
|
||||
int side = ((a | b) ^ c).mask() ^ 2; // evil
|
||||
|
||||
DrawEdge(v[0], v[1], dv[0], i & 1, j & 1);
|
||||
DrawEdge(v[0], v[2], dv[1], i & 2, j & 2);
|
||||
DrawEdge(v[1], v[2], dv[2], i & 4, j & 4);
|
||||
DrawEdge(v0, v1, dv[0], orientation & 1, side & 1);
|
||||
DrawEdge(v0, v2, dv[1], orientation & 2, side & 2);
|
||||
DrawEdge(v1, v2, dv[2], orientation & 4, side & 4);
|
||||
|
||||
Flush(v, GSVertexSW::zero(), true);
|
||||
Flush(vertex[index[2]], GSVertexSW::zero(), true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -492,18 +531,21 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
|||
m_edge.count += e - &m_edge.buff[m_edge.count];
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
|
||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect)
|
||||
{
|
||||
const GSVertexSW& v0 = vertex[index[0]];
|
||||
const GSVertexSW& v1 = vertex[index[1]];
|
||||
|
||||
GSVector4 mask = (v0.p < v1.p).xyzw(GSVector4::zero());
|
||||
|
||||
GSVertexSW v[2];
|
||||
|
||||
GSVector4 mask = (vertices[0].p < vertices[1].p).xyzw(GSVector4::zero());
|
||||
v[0].p = v1.p.blend32(v0.p, mask);
|
||||
v[0].t = v1.t.blend32(v0.t, mask);
|
||||
v[0].c = v1.c;
|
||||
|
||||
v[0].p = vertices[1].p.blend32(vertices[0].p, mask);
|
||||
v[0].t = vertices[1].t.blend32(vertices[0].t, mask);
|
||||
v[0].c = vertices[1].c;
|
||||
|
||||
v[1].p = vertices[0].p.blend32(vertices[1].p, mask);
|
||||
v[1].t = vertices[0].t.blend32(vertices[1].t, mask);
|
||||
v[1].p = v0.p.blend32(v1.p, mask);
|
||||
v[1].t = v0.t.blend32(v1.t, mask);
|
||||
|
||||
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
|
||||
|
||||
|
@ -515,17 +557,6 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
|
|||
|
||||
if(solidrect)
|
||||
{
|
||||
/*
|
||||
if(m_id == 0)
|
||||
{
|
||||
m_ds->DrawRect(r, scan);
|
||||
|
||||
m_pixels += r.width() * r.height();
|
||||
}
|
||||
|
||||
return;
|
||||
*/
|
||||
|
||||
if(m_threads == 1)
|
||||
{
|
||||
m_ds->DrawRect(r, scan);
|
||||
|
@ -570,7 +601,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, bool solidrect)
|
|||
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
|
||||
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
|
||||
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
m_ds->SetupPrim(v1, dscan);
|
||||
|
||||
while(1)
|
||||
{
|
||||
|
@ -787,7 +818,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
|
|||
e->p.i16[2] = (int16)top;
|
||||
}
|
||||
|
||||
void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge)
|
||||
void GSRasterizer::Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge)
|
||||
{
|
||||
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
||||
|
||||
|
@ -795,7 +826,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
|
|||
|
||||
if(count > 0)
|
||||
{
|
||||
m_ds->SetupPrim(vertices, dscan);
|
||||
m_ds->SetupPrim(vertex, dscan);
|
||||
|
||||
const GSVertexSW* RESTRICT e = m_edge.buff;
|
||||
const GSVertexSW* RESTRICT ee = e + count;
|
||||
|
@ -882,19 +913,7 @@ void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
|
|||
{
|
||||
m_solidrect_count++;
|
||||
}
|
||||
/*
|
||||
if(m_workers.size() > 1 && item->solidrect) // TODO: clip to thread area and dispatch?
|
||||
{
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
{
|
||||
m_workers[i]->Wait();
|
||||
}
|
||||
|
||||
m_workers.front()->Process(item);
|
||||
|
||||
return;
|
||||
}
|
||||
*/
|
||||
if(item->syncpoint)
|
||||
{
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
|
|
|
@ -34,8 +34,11 @@ public:
|
|||
GSVector4i scissor;
|
||||
GSVector4i bbox;
|
||||
GS_PRIM_CLASS primclass;
|
||||
GSVertexSW* vertices;
|
||||
int count;
|
||||
uint8* buff;
|
||||
GSVertexSW* vertex;
|
||||
int vertex_count;
|
||||
uint32* index;
|
||||
int index_count;
|
||||
bool solidrect;
|
||||
bool syncpoint;
|
||||
uint64 frame;
|
||||
|
@ -50,8 +53,11 @@ public:
|
|||
: scissor(GSVector4i::zero())
|
||||
, bbox(GSVector4i::zero())
|
||||
, primclass(GS_INVALID_CLASS)
|
||||
, vertices(NULL)
|
||||
, count(0)
|
||||
, buff(NULL)
|
||||
, vertex(NULL)
|
||||
, vertex_count(0)
|
||||
, index(NULL)
|
||||
, index_count(0)
|
||||
, solidrect(false)
|
||||
, syncpoint(false)
|
||||
, frame(0)
|
||||
|
@ -63,7 +69,7 @@ public:
|
|||
|
||||
virtual ~GSRasterizerData()
|
||||
{
|
||||
if(vertices != NULL) _aligned_free(vertices);
|
||||
if(buff != NULL) _aligned_free(buff);
|
||||
|
||||
// derived class should free param and its members
|
||||
}
|
||||
|
@ -72,7 +78,7 @@ public:
|
|||
class IDrawScanline : public GSAlignedClass<32>
|
||||
{
|
||||
public:
|
||||
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW& vertex, const GSVertexSW& dscan);
|
||||
typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
|
||||
|
||||
|
@ -91,14 +97,14 @@ public:
|
|||
|
||||
#ifdef ENABLE_JIT_RASTERIZER
|
||||
|
||||
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
|
||||
__forceinline void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) {m_sp(vertex, dscan);}
|
||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
|
||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
|
||||
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
|
||||
|
||||
#else
|
||||
|
||||
virtual void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) = 0;
|
||||
virtual void SetupPrim(const GSVertexSW& vertex, const GSVertexSW& dscan) = 0;
|
||||
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
||||
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
||||
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
|
||||
|
@ -134,17 +140,17 @@ protected:
|
|||
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
|
||||
|
||||
template<bool scissor_test>
|
||||
void DrawPoint(const GSVertexSW* v, int count);
|
||||
void DrawLine(const GSVertexSW* v);
|
||||
void DrawTriangle(const GSVertexSW* v);
|
||||
void DrawSprite(const GSVertexSW* v, bool solidrect);
|
||||
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count);
|
||||
void DrawLine(const GSVertexSW* vertex, const uint32* index);
|
||||
void DrawTriangle(const GSVertexSW* vertex, const uint32* index);
|
||||
void DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect);
|
||||
|
||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0);
|
||||
|
||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
||||
|
||||
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
||||
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false);
|
||||
__forceinline void Flush(const GSVertexSW& vertex, const GSVertexSW& dscan, bool edge = false);
|
||||
|
||||
public:
|
||||
GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon);
|
||||
|
|
|
@ -22,9 +22,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSRenderer.h"
|
||||
|
||||
GSRenderer::GSRenderer()
|
||||
: GSState()
|
||||
, m_vt(this)
|
||||
GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
|
||||
: GSState(vt, vertex_stride)
|
||||
, m_dev(NULL)
|
||||
, m_shader(0)
|
||||
{
|
||||
|
@ -78,8 +77,6 @@ bool GSRenderer::CreateDevice(GSDevice* dev)
|
|||
|
||||
void GSRenderer::ResetDevice()
|
||||
{
|
||||
ResetPrim();
|
||||
|
||||
if(m_dev) m_dev->Reset(1, 1);
|
||||
}
|
||||
|
||||
|
@ -336,7 +333,7 @@ void GSRenderer::VSync(int field)
|
|||
theApp.m_gs_interlace[m_interlace].name.c_str(),
|
||||
theApp.m_gs_aspectratio[m_aspectratio].name.c_str(),
|
||||
(int)m_perfmon.Get(GSPerfMon::Quad),
|
||||
(int)m_perfmon.Get(GSPerfMon::Prim),
|
||||
(int)(m_perfmon.Get(GSPerfMon::Prim) - m_perfmon.Get(GSPerfMon::PrimNotRendered)),
|
||||
(int)m_perfmon.Get(GSPerfMon::Draw),
|
||||
m_perfmon.CPU(),
|
||||
m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
|
||||
|
@ -541,308 +538,3 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
|
|||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void GSRenderer::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
|
||||
{
|
||||
int tw = TEX0.TW;
|
||||
int th = TEX0.TH;
|
||||
|
||||
int w = 1 << tw;
|
||||
int h = 1 << th;
|
||||
|
||||
GSVector4i tr(0, 0, w, h);
|
||||
|
||||
int wms = CLAMP.WMS;
|
||||
int wmt = CLAMP.WMT;
|
||||
|
||||
int minu = (int)CLAMP.MINU;
|
||||
int minv = (int)CLAMP.MINV;
|
||||
int maxu = (int)CLAMP.MAXU;
|
||||
int maxv = (int)CLAMP.MAXV;
|
||||
|
||||
GSVector4i vr = tr;
|
||||
|
||||
switch(wms)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.x < minu) vr.x = minu;
|
||||
if(vr.z > maxu + 1) vr.z = maxu + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
vr.x = maxu;
|
||||
vr.z = vr.x + (minu + 1);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(wmt)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.y < minv) vr.y = minv;
|
||||
if(vr.w > maxv + 1) vr.w = maxv + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
vr.y = maxv;
|
||||
vr.w = vr.y + (minv + 1);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
if(wms + wmt < 6)
|
||||
{
|
||||
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
|
||||
|
||||
if(linear)
|
||||
{
|
||||
st += GSVector4(-0x8000, 0x8000).xxyy();
|
||||
}
|
||||
|
||||
GSVector4i uv = GSVector4i(st).sra32(16);
|
||||
|
||||
GSVector4i u, v;
|
||||
|
||||
int mask = 0;
|
||||
|
||||
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
|
||||
{
|
||||
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
|
||||
v = uv & GSVector4i::xffffffff().srl32(32 - th);
|
||||
|
||||
GSVector4i uu = uv.sra32(tw);
|
||||
GSVector4i vv = uv.sra32(th);
|
||||
|
||||
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
|
||||
}
|
||||
|
||||
uv = uv.rintersect(tr);
|
||||
|
||||
switch(wms)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.x < uv.x) vr.x = uv.x;
|
||||
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(wmt)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.y < uv.y) vr.y = uv.y;
|
||||
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
r = vr.rintersect(tr);
|
||||
}
|
||||
|
||||
void GSRenderer::GetAlphaMinMax()
|
||||
{
|
||||
if(m_vt.m_alpha.valid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
|
||||
|
||||
if(PRIM->TME && context->TEX0.TCC)
|
||||
{
|
||||
switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
|
||||
{
|
||||
case 0:
|
||||
a.y = 0;
|
||||
a.w = 0xff;
|
||||
break;
|
||||
case 1:
|
||||
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
|
||||
a.w = env.TEXA.TA0;
|
||||
break;
|
||||
case 2:
|
||||
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
|
||||
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
|
||||
break;
|
||||
case 3:
|
||||
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(context->TEX0.TFX)
|
||||
{
|
||||
case TFX_MODULATE:
|
||||
a.x = (a.x * a.y) >> 7;
|
||||
a.z = (a.z * a.w) >> 7;
|
||||
if(a.x > 0xff) a.x = 0xff;
|
||||
if(a.z > 0xff) a.z = 0xff;
|
||||
break;
|
||||
case TFX_DECAL:
|
||||
a.x = a.y;
|
||||
a.z = a.w;
|
||||
break;
|
||||
case TFX_HIGHLIGHT:
|
||||
a.x = a.x + a.y;
|
||||
a.z = a.z + a.w;
|
||||
if(a.x > 0xff) a.x = 0xff;
|
||||
if(a.z > 0xff) a.z = 0xff;
|
||||
break;
|
||||
case TFX_HIGHLIGHT2:
|
||||
a.x = a.y;
|
||||
a.z = a.w;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
m_vt.m_alpha.min = a.x;
|
||||
m_vt.m_alpha.max = a.z;
|
||||
m_vt.m_alpha.valid = true;
|
||||
}
|
||||
|
||||
bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm)
|
||||
{
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
bool pass = true;
|
||||
|
||||
if(context->TEST.ATST == ATST_NEVER)
|
||||
{
|
||||
pass = false;
|
||||
}
|
||||
else if(context->TEST.ATST != ATST_ALWAYS)
|
||||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
int amin = m_vt.m_alpha.min;
|
||||
int amax = m_vt.m_alpha.max;
|
||||
|
||||
int aref = context->TEST.AREF;
|
||||
|
||||
switch(context->TEST.ATST)
|
||||
{
|
||||
case ATST_NEVER:
|
||||
pass = false;
|
||||
break;
|
||||
case ATST_ALWAYS:
|
||||
pass = true;
|
||||
break;
|
||||
case ATST_LESS:
|
||||
if(amax < aref) pass = true;
|
||||
else if(amin >= aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_LEQUAL:
|
||||
if(amax <= aref) pass = true;
|
||||
else if(amin > aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_EQUAL:
|
||||
if(amin == aref && amax == aref) pass = true;
|
||||
else if(amin > aref || amax < aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_GEQUAL:
|
||||
if(amin >= aref) pass = true;
|
||||
else if(amax < aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
if(amin > aref) pass = true;
|
||||
else if(amax <= aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_NOTEQUAL:
|
||||
if(amin == aref && amax == aref) pass = false;
|
||||
else if(amin > aref || amax < aref) pass = true;
|
||||
else return false;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
if(!pass)
|
||||
{
|
||||
switch(context->TEST.AFAIL)
|
||||
{
|
||||
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
|
||||
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
|
||||
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
|
||||
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
|
||||
default: __assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRenderer::IsOpaque()
|
||||
{
|
||||
if(PRIM->AA1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!PRIM->ABE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
int amin = 0, amax = 0xff;
|
||||
|
||||
if(context->ALPHA.A != context->ALPHA.B)
|
||||
{
|
||||
if(context->ALPHA.C == 0)
|
||||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
amin = m_vt.m_alpha.min;
|
||||
amax = m_vt.m_alpha.max;
|
||||
}
|
||||
else if(context->ALPHA.C == 1)
|
||||
{
|
||||
if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
|
||||
{
|
||||
amin = amax = 0x80;
|
||||
}
|
||||
}
|
||||
else if(context->ALPHA.C == 2)
|
||||
{
|
||||
amin = amax = context->ALPHA.FIX;
|
||||
}
|
||||
}
|
||||
|
||||
return context->ALPHA.IsOpaque(amin, amax);
|
||||
}
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
#include "GSdx.h"
|
||||
#include "GSWnd.h"
|
||||
#include "GSState.h"
|
||||
#include "GSVertexTrace.h"
|
||||
#include "GSVertexList.h"
|
||||
#include "GSCapture.h"
|
||||
|
||||
class GSRenderer : public GSState
|
||||
|
@ -49,15 +47,6 @@ protected:
|
|||
|
||||
virtual GSTexture* GetOutput(int i) = 0;
|
||||
|
||||
GSVertexTrace m_vt;
|
||||
|
||||
// following functions need m_vt to be initialized
|
||||
|
||||
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
|
||||
void GetAlphaMinMax();
|
||||
bool TryAlphaTest(uint32& fm, uint32& zm);
|
||||
bool IsOpaque();
|
||||
|
||||
public:
|
||||
GSWnd m_wnd;
|
||||
GSDevice* m_dev;
|
||||
|
@ -67,10 +56,9 @@ public:
|
|||
bool s_save;
|
||||
bool s_savez;
|
||||
int s_saven;
|
||||
GSCritSec s_lock;
|
||||
|
||||
public:
|
||||
GSRenderer();
|
||||
GSRenderer(GSVertexTrace* vt, size_t vertex_stride);
|
||||
virtual ~GSRenderer();
|
||||
|
||||
virtual bool CreateWnd(const string& title, int w, int h);
|
||||
|
@ -93,157 +81,4 @@ public:
|
|||
GSCritSec m_pGSsetTitle_Crit;
|
||||
|
||||
char m_GStitleInfoBuffer[128];
|
||||
};
|
||||
|
||||
template<class Vertex> class GSRendererT : public GSRenderer
|
||||
{
|
||||
protected:
|
||||
Vertex* m_vertices;
|
||||
int m_count;
|
||||
int m_maxcount;
|
||||
GSVertexList<Vertex> m_vl;
|
||||
|
||||
void Reset()
|
||||
{
|
||||
m_count = 0;
|
||||
m_vl.RemoveAll();
|
||||
|
||||
GSRenderer::Reset();
|
||||
}
|
||||
|
||||
void ResetPrim()
|
||||
{
|
||||
m_vl.RemoveAll();
|
||||
}
|
||||
|
||||
void FlushPrim()
|
||||
{
|
||||
if(m_count == 0) return;
|
||||
|
||||
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
|
||||
{
|
||||
// FIXME: berserk fpsm = 27 (8H)
|
||||
|
||||
if(!m_dev->IsLost())
|
||||
{
|
||||
m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
|
||||
Draw();
|
||||
}
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
}
|
||||
|
||||
m_count = 0;
|
||||
}
|
||||
|
||||
void GrowVertexBuffer()
|
||||
{
|
||||
int maxcount = std::max<int>(m_maxcount * 3 / 2, 10000);
|
||||
Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * maxcount, 16);
|
||||
|
||||
if(m_vertices != NULL)
|
||||
{
|
||||
memcpy(vertices, m_vertices, sizeof(Vertex) * m_maxcount);
|
||||
_aligned_free(m_vertices);
|
||||
}
|
||||
|
||||
m_vertices = vertices;
|
||||
m_maxcount = maxcount - 100;
|
||||
}
|
||||
|
||||
// Returns a pointer to the drawing vertex. Can return NULL!
|
||||
|
||||
template<uint32 prim> __forceinline Vertex* DrawingKick(bool skip, int& count)
|
||||
{
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST: count = 1; break;
|
||||
case GS_LINELIST: count = 2; break;
|
||||
case GS_LINESTRIP: count = 2; break;
|
||||
case GS_TRIANGLELIST: count = 3; break;
|
||||
case GS_TRIANGLESTRIP: count = 3; break;
|
||||
case GS_TRIANGLEFAN: count = 3; break;
|
||||
case GS_SPRITE: count = 2; break;
|
||||
case GS_INVALID: count = 1; break;
|
||||
default: __assume(0);
|
||||
}
|
||||
|
||||
if(m_vl.GetCount() < count)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(m_count >= m_maxcount)
|
||||
{
|
||||
GrowVertexBuffer();
|
||||
}
|
||||
|
||||
Vertex* v = &m_vertices[m_count];
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
m_vl.GetAt(0, v[0]);
|
||||
m_vl.RemoveAll();
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
m_vl.GetAt(0, v[0]);
|
||||
m_vl.GetAt(1, v[1]);
|
||||
m_vl.RemoveAll();
|
||||
break;
|
||||
case GS_LINESTRIP:
|
||||
m_vl.GetAt(0, v[0]);
|
||||
m_vl.GetAt(1, v[1]);
|
||||
m_vl.RemoveAt(0, 1);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
m_vl.GetAt(0, v[0]);
|
||||
m_vl.GetAt(1, v[1]);
|
||||
m_vl.GetAt(2, v[2]);
|
||||
m_vl.RemoveAll();
|
||||
break;
|
||||
case GS_TRIANGLESTRIP:
|
||||
m_vl.GetAt(0, v[0]);
|
||||
m_vl.GetAt(1, v[1]);
|
||||
m_vl.GetAt(2, v[2]);
|
||||
m_vl.RemoveAt(0, 2);
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
m_vl.GetAt(0, v[0]);
|
||||
m_vl.GetAt(1, v[1]);
|
||||
m_vl.GetAt(2, v[2]);
|
||||
m_vl.RemoveAt(1, 1);
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
m_vl.GetAt(0, v[0]);
|
||||
m_vl.GetAt(1, v[1]);
|
||||
m_vl.RemoveAll();
|
||||
break;
|
||||
case GS_INVALID:
|
||||
ASSERT(0);
|
||||
m_vl.RemoveAll();
|
||||
return NULL;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
return !skip ? v : NULL;
|
||||
}
|
||||
|
||||
virtual void Draw() = 0;
|
||||
|
||||
public:
|
||||
GSRendererT()
|
||||
: GSRenderer()
|
||||
, m_vertices(NULL)
|
||||
, m_count(0)
|
||||
, m_maxcount(0)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~GSRendererT()
|
||||
{
|
||||
if(m_vertices) _aligned_free(m_vertices);
|
||||
}
|
||||
};
|
||||
};
|
|
@ -21,3 +21,411 @@
|
|||
|
||||
#include "stdafx.h"
|
||||
#include "GSRendererDX.h"
|
||||
#include "GSDeviceDX.h"
|
||||
|
||||
GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter)
|
||||
: GSRendererHW(vt, vertex_stride, tc)
|
||||
, m_pixelcenter(pixelcenter)
|
||||
, m_topology(-1)
|
||||
{
|
||||
m_logz = !!theApp.GetConfig("logz", 0);
|
||||
m_fba = !!theApp.GetConfig("fba", 1);
|
||||
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
|
||||
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
|
||||
}
|
||||
|
||||
GSRendererDX::~GSRendererDX()
|
||||
{
|
||||
}
|
||||
|
||||
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
GSDrawingContext* context = m_context;
|
||||
|
||||
const GSVector2i& rtsize = rt->GetSize();
|
||||
const GSVector2& rtscale = rt->GetScale();
|
||||
|
||||
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
|
||||
GSTexture* rtcopy = NULL;
|
||||
|
||||
ASSERT(m_dev != NULL);
|
||||
|
||||
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
|
||||
|
||||
if(DATE)
|
||||
{
|
||||
if(dev->HasStencil())
|
||||
{
|
||||
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f);
|
||||
|
||||
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 dst = src * 2.0f + o.xxxx();
|
||||
|
||||
GSVertexPT1 vertices[] =
|
||||
{
|
||||
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
|
||||
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
|
||||
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
|
||||
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
|
||||
};
|
||||
|
||||
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
|
||||
}
|
||||
else
|
||||
{
|
||||
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
|
||||
|
||||
// I'll use VertexTrace when I consider it more trustworthy
|
||||
|
||||
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
dev->BeginScene();
|
||||
|
||||
// om
|
||||
|
||||
GSDeviceDX::OMDepthStencilSelector om_dssel;
|
||||
|
||||
if(context->TEST.ZTE)
|
||||
{
|
||||
om_dssel.ztst = context->TEST.ZTST;
|
||||
om_dssel.zwe = !context->ZBUF.ZMSK;
|
||||
}
|
||||
else
|
||||
{
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
|
||||
if(m_fba)
|
||||
{
|
||||
om_dssel.fba = context->FBA.FBA;
|
||||
}
|
||||
|
||||
GSDeviceDX::OMBlendSelector om_bsel;
|
||||
|
||||
if(!IsOpaque())
|
||||
{
|
||||
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS;
|
||||
|
||||
om_bsel.a = context->ALPHA.A;
|
||||
om_bsel.b = context->ALPHA.B;
|
||||
om_bsel.c = context->ALPHA.C;
|
||||
om_bsel.d = context->ALPHA.D;
|
||||
|
||||
if(env.PABE.PABE)
|
||||
{
|
||||
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
|
||||
{
|
||||
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
|
||||
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
|
||||
|
||||
om_bsel.abe = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
|
||||
//ASSERT(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
||||
|
||||
// vs
|
||||
|
||||
GSDeviceDX::VSSelector vs_sel;
|
||||
|
||||
vs_sel.tme = PRIM->TME;
|
||||
vs_sel.fst = PRIM->FST;
|
||||
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
|
||||
vs_sel.rtcopy = !!rtcopy;
|
||||
|
||||
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
|
||||
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
|
||||
// We are probably receiving bad coordinates from VU1 in these cases.
|
||||
|
||||
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
|
||||
{
|
||||
if(context->ZBUF.PSM == PSM_PSMZ24)
|
||||
{
|
||||
if(m_vt->m_max.p.z > 0xffffff)
|
||||
{
|
||||
ASSERT(m_vt->m_min.p.z > 0xffffff);
|
||||
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
|
||||
if (m_vt->m_min.p.z > 0xffffff)
|
||||
{
|
||||
vs_sel.bppz = 1;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
|
||||
{
|
||||
if(m_vt->m_max.p.z > 0xffff)
|
||||
{
|
||||
ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo
|
||||
// Fixme : Same as above, I guess.
|
||||
if (m_vt->m_min.p.z > 0xffff)
|
||||
{
|
||||
vs_sel.bppz = 2;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSDeviceDX::VSConstantBuffer vs_cb;
|
||||
|
||||
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
|
||||
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
|
||||
float ox = (float)(int)context->XYOFFSET.OFX;
|
||||
float oy = (float)(int)context->XYOFFSET.OFY;
|
||||
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
|
||||
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
|
||||
|
||||
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
|
||||
//because DX10 and DX9 have a different pixel center.)
|
||||
//
|
||||
//The resulting shifted output aligns better with common blending / corona / blurring effects,
|
||||
//but introduces a few bad pixels on the edges.
|
||||
|
||||
if(rt->LikelyOffset)
|
||||
{
|
||||
// DX9 has pixelcenter set to 0.0, so give it some value here
|
||||
|
||||
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
|
||||
|
||||
ox2 *= rt->OffsetHack_modx;
|
||||
oy2 *= rt->OffsetHack_mody;
|
||||
}
|
||||
|
||||
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
|
||||
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
|
||||
|
||||
// gs
|
||||
|
||||
GSDeviceDX::GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
gs_sel.prim = m_vt->m_primclass;
|
||||
|
||||
// ps
|
||||
|
||||
GSDeviceDX::PSSelector ps_sel;
|
||||
GSDeviceDX::PSSamplerSelector ps_ssel;
|
||||
GSDeviceDX::PSConstantBuffer ps_cb;
|
||||
|
||||
if(DATE)
|
||||
{
|
||||
if(dev->HasStencil())
|
||||
{
|
||||
om_dssel.date = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_sel.date = 1 + context->TEST.DATM;
|
||||
}
|
||||
}
|
||||
|
||||
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
{
|
||||
ps_sel.colclip = 1;
|
||||
}
|
||||
|
||||
ps_sel.clr1 = om_bsel.IsCLR1();
|
||||
ps_sel.fba = context->FBA.FBA;
|
||||
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
|
||||
|
||||
if(UserHacks_AlphaHack) ps_sel.aout = 1;
|
||||
|
||||
if(PRIM->FGE)
|
||||
{
|
||||
ps_sel.fog = 1;
|
||||
|
||||
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
|
||||
}
|
||||
|
||||
if(context->TEST.ATE)
|
||||
{
|
||||
ps_sel.atst = context->TEST.ATST;
|
||||
|
||||
switch(ps_sel.atst)
|
||||
{
|
||||
case ATST_LESS:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
|
||||
break;
|
||||
default:
|
||||
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_sel.atst = ATST_ALWAYS;
|
||||
}
|
||||
|
||||
if(tex)
|
||||
{
|
||||
ps_sel.wms = context->CLAMP.WMS;
|
||||
ps_sel.wmt = context->CLAMP.WMT;
|
||||
ps_sel.fmt = tex->m_fmt;
|
||||
ps_sel.aem = env.TEXA.AEM;
|
||||
ps_sel.tfx = context->TEX0.TFX;
|
||||
ps_sel.tcc = context->TEX0.TCC;
|
||||
ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter;
|
||||
ps_sel.rt = tex->m_target;
|
||||
|
||||
int w = tex->m_texture->GetWidth();
|
||||
int h = tex->m_texture->GetHeight();
|
||||
|
||||
int tw = (int)(1 << context->TEX0.TW);
|
||||
int th = (int)(1 << context->TEX0.TH);
|
||||
|
||||
GSVector4 WH(tw, th, w, h);
|
||||
|
||||
if(PRIM->FST)
|
||||
{
|
||||
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
|
||||
//Maybe better?
|
||||
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
|
||||
ps_sel.fst = 1;
|
||||
}
|
||||
|
||||
ps_cb.WH = WH;
|
||||
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
|
||||
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
|
||||
|
||||
GSVector4 clamp(ps_cb.MskFix);
|
||||
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
|
||||
|
||||
ps_cb.MinMax = clamp / WH.xyxy();
|
||||
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
|
||||
|
||||
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
|
||||
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
|
||||
ps_ssel.ltf = ps_sel.ltf;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_sel.tfx = 4;
|
||||
}
|
||||
|
||||
// rs
|
||||
|
||||
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
|
||||
|
||||
dev->OMSetRenderTargets(rt, ds, &scissor);
|
||||
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
|
||||
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
|
||||
dev->PSSetShaderResource(2, rtcopy);
|
||||
|
||||
uint8 afix = context->ALPHA.FIX;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
dev->SetupIA(m_vertex.buff, m_vertex.tail, m_index.buff, m_index.tail, m_topology);
|
||||
dev->SetupVS(vs_sel, &vs_cb);
|
||||
dev->SetupGS(gs_sel);
|
||||
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
|
||||
|
||||
// draw
|
||||
|
||||
if(context->TEST.DoFirstPass())
|
||||
{
|
||||
dev->DrawIndexedPrimitive();
|
||||
|
||||
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
{
|
||||
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
|
||||
GSDeviceDX::PSSelector ps_selneg(ps_sel);
|
||||
|
||||
om_bselneg.negative = 1;
|
||||
ps_selneg.colclip = 2;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bselneg, afix);
|
||||
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
|
||||
|
||||
dev->DrawIndexedPrimitive();
|
||||
}
|
||||
}
|
||||
|
||||
if(context->TEST.DoSecondPass())
|
||||
{
|
||||
ASSERT(!env.PABE.PABE);
|
||||
|
||||
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
|
||||
|
||||
ps_sel.atst = iatst[ps_sel.atst];
|
||||
|
||||
switch(ps_sel.atst)
|
||||
{
|
||||
case ATST_LESS:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
|
||||
break;
|
||||
default:
|
||||
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
|
||||
break;
|
||||
}
|
||||
|
||||
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
|
||||
|
||||
bool z = om_dssel.zwe;
|
||||
bool r = om_bsel.wr;
|
||||
bool g = om_bsel.wg;
|
||||
bool b = om_bsel.wb;
|
||||
bool a = om_bsel.wa;
|
||||
|
||||
switch(context->TEST.AFAIL)
|
||||
{
|
||||
case 0: z = r = g = b = a = false; break; // none
|
||||
case 1: z = false; break; // rgba
|
||||
case 2: r = g = b = a = false; break; // z
|
||||
case 3: z = a = false; break; // rgb
|
||||
default: __assume(0);
|
||||
}
|
||||
|
||||
if(z || r || g || b || a)
|
||||
{
|
||||
om_dssel.zwe = z;
|
||||
om_bsel.wr = r;
|
||||
om_bsel.wg = g;
|
||||
om_bsel.wb = b;
|
||||
om_bsel.wa = a;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
|
||||
dev->DrawIndexedPrimitive();
|
||||
|
||||
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
{
|
||||
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
|
||||
GSDeviceDX::PSSelector ps_selneg(ps_sel);
|
||||
|
||||
om_bselneg.negative = 1;
|
||||
ps_selneg.colclip = 2;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bselneg, afix);
|
||||
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
|
||||
|
||||
dev->DrawIndexedPrimitive();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dev->EndScene();
|
||||
|
||||
dev->Recycle(rtcopy);
|
||||
|
||||
if(om_dssel.fba) UpdateFBA(rt);
|
||||
}
|
||||
|
|
|
@ -23,8 +23,7 @@
|
|||
|
||||
#include "GSRendererHW.h"
|
||||
|
||||
template<class Vertex>
|
||||
class GSRendererDX : public GSRendererHW<Vertex>
|
||||
class GSRendererDX : public GSRendererHW
|
||||
{
|
||||
GSVector2 m_pixelcenter;
|
||||
bool m_logz;
|
||||
|
@ -35,413 +34,11 @@ class GSRendererDX : public GSRendererHW<Vertex>
|
|||
protected:
|
||||
int m_topology;
|
||||
|
||||
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
virtual void UpdateFBA(GSTexture* rt) {}
|
||||
|
||||
public:
|
||||
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0))
|
||||
: GSRendererHW<Vertex>(tc)
|
||||
, m_pixelcenter(pixelcenter)
|
||||
, m_topology(-1)
|
||||
{
|
||||
m_logz = !!theApp.GetConfig("logz", 0);
|
||||
m_fba = !!theApp.GetConfig("fba", 1);
|
||||
//UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
|
||||
UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0);
|
||||
}
|
||||
GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
|
||||
virtual ~GSRendererDX();
|
||||
|
||||
virtual ~GSRendererDX()
|
||||
{
|
||||
}
|
||||
|
||||
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
GSDrawingContext* context = m_context;
|
||||
|
||||
const GSVector2i& rtsize = rt->GetSize();
|
||||
const GSVector2& rtscale = rt->GetScale();
|
||||
|
||||
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
|
||||
GSTexture *rtcopy = NULL;
|
||||
|
||||
ASSERT(m_dev != NULL);
|
||||
|
||||
GSDeviceDX* dev = (GSDeviceDX*)m_dev;
|
||||
|
||||
if(DATE)
|
||||
{
|
||||
if(dev->HasStencil())
|
||||
{
|
||||
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f);
|
||||
|
||||
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 dst = src * 2.0f + o.xxxx();
|
||||
|
||||
GSVertexPT1 vertices[] =
|
||||
{
|
||||
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
|
||||
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
|
||||
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
|
||||
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
|
||||
};
|
||||
|
||||
dev->SetupDATE(rt, ds, vertices, m_context->TEST.DATM);
|
||||
}
|
||||
else
|
||||
{
|
||||
rtcopy = dev->CreateRenderTarget(rtsize.x, rtsize.y, false, rt->GetFormat());
|
||||
|
||||
// I'll use VertexTrace when I consider it more trustworthy
|
||||
|
||||
dev->CopyRect(rt, rtcopy, GSVector4i(rtsize).zwxy());
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
dev->BeginScene();
|
||||
|
||||
// om
|
||||
|
||||
GSDeviceDX::OMDepthStencilSelector om_dssel;
|
||||
|
||||
if(context->TEST.ZTE)
|
||||
{
|
||||
om_dssel.ztst = context->TEST.ZTST;
|
||||
om_dssel.zwe = !context->ZBUF.ZMSK;
|
||||
}
|
||||
else
|
||||
{
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
|
||||
if(m_fba)
|
||||
{
|
||||
om_dssel.fba = context->FBA.FBA;
|
||||
}
|
||||
|
||||
GSDeviceDX::OMBlendSelector om_bsel;
|
||||
|
||||
if(!IsOpaque())
|
||||
{
|
||||
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
|
||||
|
||||
om_bsel.a = context->ALPHA.A;
|
||||
om_bsel.b = context->ALPHA.B;
|
||||
om_bsel.c = context->ALPHA.C;
|
||||
om_bsel.d = context->ALPHA.D;
|
||||
|
||||
if(env.PABE.PABE)
|
||||
{
|
||||
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
|
||||
{
|
||||
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
|
||||
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
|
||||
|
||||
om_bsel.abe = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
|
||||
//ASSERT(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
||||
|
||||
// vs
|
||||
|
||||
GSDeviceDX::VSSelector vs_sel;
|
||||
|
||||
vs_sel.tme = PRIM->TME;
|
||||
vs_sel.fst = PRIM->FST;
|
||||
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
|
||||
vs_sel.rtcopy = !!rtcopy;
|
||||
|
||||
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
|
||||
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
|
||||
// We are probably receiving bad coordinates from VU1 in these cases.
|
||||
|
||||
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
|
||||
{
|
||||
if(context->ZBUF.PSM == PSM_PSMZ24)
|
||||
{
|
||||
if(m_vt.m_max.p.z > 0xffffff)
|
||||
{
|
||||
ASSERT(m_vt.m_min.p.z > 0xffffff);
|
||||
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
|
||||
if (m_vt.m_min.p.z > 0xffffff)
|
||||
{
|
||||
vs_sel.bppz = 1;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
|
||||
{
|
||||
if(m_vt.m_max.p.z > 0xffff)
|
||||
{
|
||||
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
|
||||
// Fixme : Same as above, I guess.
|
||||
if (m_vt.m_min.p.z > 0xffff)
|
||||
{
|
||||
vs_sel.bppz = 2;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSDeviceDX::VSConstantBuffer vs_cb;
|
||||
|
||||
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
|
||||
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
|
||||
float ox = (float)(int)context->XYOFFSET.OFX;
|
||||
float oy = (float)(int)context->XYOFFSET.OFY;
|
||||
float ox2 = 2.0f * m_pixelcenter.x / rtsize.x;
|
||||
float oy2 = 2.0f * m_pixelcenter.y / rtsize.y;
|
||||
|
||||
//This hack subtracts around half a pixel from OFX and OFY. (Cannot do this directly,
|
||||
//because DX10 and DX9 have a different pixel center.)
|
||||
//
|
||||
//The resulting shifted output aligns better with common blending / corona / blurring effects,
|
||||
//but introduces a few bad pixels on the edges.
|
||||
|
||||
if(rt->LikelyOffset)
|
||||
{
|
||||
// DX9 has pixelcenter set to 0.0, so give it some value here
|
||||
|
||||
if(m_pixelcenter.x == 0 && m_pixelcenter.y == 0) { ox2 = -0.0003f; oy2 = -0.0003f; }
|
||||
|
||||
ox2 *= rt->OffsetHack_modx;
|
||||
oy2 *= rt->OffsetHack_mody;
|
||||
}
|
||||
|
||||
vs_cb.VertexScale = GSVector4(sx, -sy, ldexpf(1, -32), 0.0f);
|
||||
vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f);
|
||||
|
||||
// gs
|
||||
|
||||
GSDeviceDX::GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
gs_sel.prim = m_vt.m_primclass;
|
||||
|
||||
// ps
|
||||
|
||||
GSDeviceDX::PSSelector ps_sel;
|
||||
GSDeviceDX::PSSamplerSelector ps_ssel;
|
||||
GSDeviceDX::PSConstantBuffer ps_cb;
|
||||
|
||||
if(DATE)
|
||||
{
|
||||
if(dev->HasStencil())
|
||||
{
|
||||
om_dssel.date = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_sel.date = 1 + context->TEST.DATM;
|
||||
}
|
||||
}
|
||||
|
||||
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
{
|
||||
ps_sel.colclip = 1;
|
||||
}
|
||||
|
||||
ps_sel.clr1 = om_bsel.IsCLR1();
|
||||
ps_sel.fba = context->FBA.FBA;
|
||||
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
|
||||
|
||||
if(UserHacks_AlphaHack) ps_sel.aout = 1;
|
||||
|
||||
if(PRIM->FGE)
|
||||
{
|
||||
ps_sel.fog = 1;
|
||||
|
||||
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
|
||||
}
|
||||
|
||||
if(context->TEST.ATE)
|
||||
{
|
||||
ps_sel.atst = context->TEST.ATST;
|
||||
|
||||
switch(ps_sel.atst)
|
||||
{
|
||||
case ATST_LESS:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
|
||||
break;
|
||||
default:
|
||||
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_sel.atst = ATST_ALWAYS;
|
||||
}
|
||||
|
||||
if(tex)
|
||||
{
|
||||
ps_sel.wms = context->CLAMP.WMS;
|
||||
ps_sel.wmt = context->CLAMP.WMT;
|
||||
ps_sel.fmt = tex->m_fmt;
|
||||
ps_sel.aem = env.TEXA.AEM;
|
||||
ps_sel.tfx = context->TEX0.TFX;
|
||||
ps_sel.tcc = context->TEX0.TCC;
|
||||
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
|
||||
ps_sel.rt = tex->m_target;
|
||||
|
||||
int w = tex->m_texture->GetWidth();
|
||||
int h = tex->m_texture->GetHeight();
|
||||
|
||||
int tw = (int)(1 << context->TEX0.TW);
|
||||
int th = (int)(1 << context->TEX0.TH);
|
||||
|
||||
GSVector4 WH(tw, th, w, h);
|
||||
|
||||
if(PRIM->FST)
|
||||
{
|
||||
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
|
||||
//Maybe better?
|
||||
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
|
||||
ps_sel.fst = 1;
|
||||
}
|
||||
|
||||
ps_cb.WH = WH;
|
||||
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
|
||||
ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
|
||||
|
||||
GSVector4 clamp(ps_cb.MskFix);
|
||||
GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
|
||||
|
||||
ps_cb.MinMax = clamp / WH.xyxy();
|
||||
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
|
||||
|
||||
ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
|
||||
ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
|
||||
ps_ssel.ltf = ps_sel.ltf;
|
||||
}
|
||||
else
|
||||
{
|
||||
ps_sel.tfx = 4;
|
||||
}
|
||||
|
||||
// rs
|
||||
|
||||
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
|
||||
|
||||
dev->OMSetRenderTargets(rt, ds, &scissor);
|
||||
dev->PSSetShaderResource(0, tex ? tex->m_texture : NULL);
|
||||
dev->PSSetShaderResource(1, tex ? tex->m_palette : NULL);
|
||||
dev->PSSetShaderResource(2, rtcopy);
|
||||
|
||||
uint8 afix = context->ALPHA.FIX;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
dev->SetupIA(m_vertices, m_count, m_topology);
|
||||
dev->SetupVS(vs_sel, &vs_cb);
|
||||
dev->SetupGS(gs_sel);
|
||||
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
|
||||
|
||||
// draw
|
||||
|
||||
if(context->TEST.DoFirstPass())
|
||||
{
|
||||
dev->DrawPrimitive();
|
||||
|
||||
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
{
|
||||
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
|
||||
GSDeviceDX::PSSelector ps_selneg(ps_sel);
|
||||
|
||||
om_bselneg.negative = 1;
|
||||
ps_selneg.colclip = 2;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bselneg, afix);
|
||||
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
|
||||
|
||||
dev->DrawPrimitive();
|
||||
}
|
||||
}
|
||||
|
||||
if(context->TEST.DoSecondPass())
|
||||
{
|
||||
ASSERT(!env.PABE.PABE);
|
||||
|
||||
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
|
||||
|
||||
ps_sel.atst = iatst[ps_sel.atst];
|
||||
|
||||
switch(ps_sel.atst)
|
||||
{
|
||||
case ATST_LESS:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF - 1);
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
ps_cb.FogColor_AREF.a = (float)((int)context->TEST.AREF + 1);
|
||||
break;
|
||||
default:
|
||||
ps_cb.FogColor_AREF.a = (float)(int)context->TEST.AREF;
|
||||
break;
|
||||
}
|
||||
|
||||
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
|
||||
|
||||
bool z = om_dssel.zwe;
|
||||
bool r = om_bsel.wr;
|
||||
bool g = om_bsel.wg;
|
||||
bool b = om_bsel.wb;
|
||||
bool a = om_bsel.wa;
|
||||
|
||||
switch(context->TEST.AFAIL)
|
||||
{
|
||||
case 0: z = r = g = b = a = false; break; // none
|
||||
case 1: z = false; break; // rgba
|
||||
case 2: r = g = b = a = false; break; // z
|
||||
case 3: z = a = false; break; // rgb
|
||||
default: __assume(0);
|
||||
}
|
||||
|
||||
if(z || r || g || b || a)
|
||||
{
|
||||
om_dssel.zwe = z;
|
||||
om_bsel.wr = r;
|
||||
om_bsel.wg = g;
|
||||
om_bsel.wb = b;
|
||||
om_bsel.wa = a;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
|
||||
dev->DrawPrimitive();
|
||||
|
||||
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
{
|
||||
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
|
||||
GSDeviceDX::PSSelector ps_selneg(ps_sel);
|
||||
|
||||
om_bselneg.negative = 1;
|
||||
ps_selneg.colclip = 2;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bselneg, afix);
|
||||
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel);
|
||||
|
||||
dev->DrawPrimitive();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dev->EndScene();
|
||||
|
||||
dev->Recycle(rtcopy);
|
||||
|
||||
if(om_dssel.fba) UpdateFBA(rt);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -25,9 +25,9 @@
|
|||
#include "resource.h"
|
||||
|
||||
GSRendererDX11::GSRendererDX11()
|
||||
: GSRendererDX<GSVertexHW11>(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
|
||||
: GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
|
||||
{
|
||||
InitVertexKick(GSRendererDX11);
|
||||
InitConvertVertex(GSRendererDX11);
|
||||
}
|
||||
|
||||
bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
||||
|
@ -38,202 +38,49 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX11::VertexKick(bool skip)
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX11::ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index)
|
||||
{
|
||||
GSVertexHW11& dst = m_vl.AddTail();
|
||||
|
||||
dst = *(GSVertexHW11*)&m_v;
|
||||
|
||||
#ifdef ENABLE_UPSCALE_HACKS
|
||||
GSVector4i v0(m_v.m[0]);
|
||||
GSVector4i v1(m_v.m[1]);
|
||||
|
||||
if(tme && fst)
|
||||
{
|
||||
//GSVector4::storel(&dst.ST, m_v.GetUV());
|
||||
// TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed
|
||||
|
||||
int Udiff = 0;
|
||||
int Vdiff = 0;
|
||||
int Uadjust = 0;
|
||||
int Vadjust = 0;
|
||||
|
||||
int multiplier = GetUpscaleMultiplier();
|
||||
|
||||
if(multiplier > 1)
|
||||
{
|
||||
Udiff = m_v.UV.U & 4095;
|
||||
Vdiff = m_v.UV.V & 4095;
|
||||
|
||||
if(Udiff != 0)
|
||||
{
|
||||
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
|
||||
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
|
||||
}
|
||||
|
||||
if(Vdiff != 0)
|
||||
{
|
||||
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
|
||||
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
|
||||
}
|
||||
|
||||
Udiff = m_v.UV.U & 255;
|
||||
Vdiff = m_v.UV.V & 255;
|
||||
|
||||
if(Udiff != 0)
|
||||
{
|
||||
if (Udiff >= 248) { Uadjust = -1; }
|
||||
else if (Udiff <= 8) { Uadjust = 1; }
|
||||
}
|
||||
|
||||
if(Vdiff != 0)
|
||||
{
|
||||
if (Vdiff >= 248) { Vadjust = -1; }
|
||||
else if (Vdiff <= 8) { Vadjust = 1; }
|
||||
}
|
||||
|
||||
Udiff = m_v.UV.U & 15;
|
||||
Vdiff = m_v.UV.V & 15;
|
||||
|
||||
if(Udiff != 0)
|
||||
{
|
||||
if (Udiff >= 15) { Uadjust = -1; }
|
||||
else if (Udiff <= 1) { Uadjust = 1; }
|
||||
}
|
||||
|
||||
if(Vdiff != 0)
|
||||
{
|
||||
if (Vdiff >= 15) { Vadjust = -1; }
|
||||
else if (Vdiff <= 1) { Vadjust = 1; }
|
||||
}
|
||||
}
|
||||
|
||||
dst.ST.S = (float)m_v.UV.U - Uadjust;
|
||||
dst.ST.T = (float)m_v.UV.V - Vadjust;
|
||||
}
|
||||
else if(tme)
|
||||
{
|
||||
// Wip :p
|
||||
//dst.XYZ.X += 5;
|
||||
//dst.XYZ.Y += 5;
|
||||
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
|
||||
}
|
||||
|
||||
#else
|
||||
GSVector4i* RESTRICT dst = (GSVector4i*)&vertex[index];
|
||||
|
||||
if(tme && fst)
|
||||
{
|
||||
GSVector4::storel(&dst.ST, m_v.GetUV());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int count = 0;
|
||||
|
||||
if(GSVertexHW11* v = DrawingKick<prim>(skip, count))
|
||||
{
|
||||
GSVector4i scissor = m_context->scissor.dx10;
|
||||
|
||||
GSVector4i pmin, pmax;
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i v0, v1, v2;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
v0 = GSVector4i::load((int)v[0].p.xy).upl16();
|
||||
pmin = v0;
|
||||
pmax = v0;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
v0 = GSVector4i::load((int)v[0].p.xy);
|
||||
v1 = GSVector4i::load((int)v[1].p.xy);
|
||||
pmin = v0.min_u16(v1).upl16();
|
||||
pmax = v0.max_u16(v1).upl16();
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
v0 = GSVector4i::load((int)v[0].p.xy);
|
||||
v1 = GSVector4i::load((int)v[1].p.xy);
|
||||
v2 = GSVector4i::load((int)v[2].p.xy);
|
||||
pmin = v0.min_u16(v1).min_u16(v2).upl16();
|
||||
pmax = v0.max_u16(v1).max_u16(v2).upl16();
|
||||
break;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
pmin.x = v[0].p.x;
|
||||
pmin.y = v[0].p.y;
|
||||
pmax.x = v[0].p.x;
|
||||
pmax.y = v[0].p.y;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
pmin.x = std::min<uint16>(v[0].p.x, v[1].p.x);
|
||||
pmin.y = std::min<uint16>(v[0].p.y, v[1].p.y);
|
||||
pmax.x = std::max<uint16>(v[0].p.x, v[1].p.x);
|
||||
pmax.y = std::max<uint16>(v[0].p.y, v[1].p.y);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
pmin.x = std::min<uint16>(std::min<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
|
||||
pmin.y = std::min<uint16>(std::min<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
|
||||
pmax.x = std::max<uint16>(std::max<uint16>(v[0].p.x, v[1].p.x), v[2].p.x);
|
||||
pmax.y = std::max<uint16>(std::max<uint16>(v[0].p.y, v[1].p.y), v[2].p.y);
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
case GS_SPRITE:
|
||||
test |= pmin == pmax;
|
||||
break;
|
||||
}
|
||||
|
||||
if(test.mask() & 0xff)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
m_count += count;
|
||||
}
|
||||
dst[0] = v0;
|
||||
dst[1] = v1;
|
||||
}
|
||||
|
||||
void GSRendererDX11::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
void GSRendererDX11::Draw()
|
||||
{
|
||||
switch(m_vt.m_primclass)
|
||||
// TODO: remove invisible prims here
|
||||
|
||||
__super::Draw();
|
||||
}
|
||||
|
||||
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
switch(m_vt->m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_count);
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
__super::Draw(rt, ds, tex);
|
||||
__super::DrawPrims(rt, ds, tex);
|
||||
}
|
||||
|
|
|
@ -25,16 +25,22 @@
|
|||
#include "GSVertexHW.h"
|
||||
#include "GSTextureCache11.h"
|
||||
|
||||
class GSRendererDX11 : public GSRendererDX<GSVertexHW11>
|
||||
class GSRendererDX11 : public GSRendererDX
|
||||
{
|
||||
protected:
|
||||
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(GSVertexHW11* RESTRICT vertex, size_t index);
|
||||
void Draw();
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
|
||||
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}
|
||||
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;}
|
||||
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;}
|
||||
void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;}
|
||||
|
||||
public:
|
||||
GSRendererDX11();
|
||||
virtual ~GSRendererDX11() {}
|
||||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
|
||||
};
|
||||
|
|
|
@ -25,9 +25,9 @@
|
|||
#include "resource.h"
|
||||
|
||||
GSRendererDX9::GSRendererDX9()
|
||||
: GSRendererDX<GSVertexHW9>(new GSTextureCache9(this))
|
||||
: GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this))
|
||||
{
|
||||
InitVertexKick(GSRendererDX9);
|
||||
InitConvertVertex(GSRendererDX9);
|
||||
}
|
||||
|
||||
bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
||||
|
@ -57,8 +57,8 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX9::VertexKick(bool skip)
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX9::ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index)
|
||||
{
|
||||
GSVector4 p = GSVector4(((GSVector4i)m_v.XYZ).upl16());
|
||||
|
||||
|
@ -71,197 +71,143 @@ void GSRendererDX9::VertexKick(bool skip)
|
|||
p = p.xyxy(GSVector4::load((float)m_v.XYZ.Z));
|
||||
}
|
||||
|
||||
GSVertexHW9& dst = m_vl.AddTail();
|
||||
|
||||
dst.p = p;
|
||||
|
||||
int Uadjust = 0;
|
||||
int Vadjust = 0;
|
||||
GSVector4 t = GSVector4::zero();
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(fst)
|
||||
{
|
||||
dst.t = m_v.GetUV();
|
||||
|
||||
#ifdef ENABLE_UPSCALE_HACKS
|
||||
|
||||
int Udiff = 0;
|
||||
int Vdiff = 0;
|
||||
|
||||
int multiplier = GetUpscaleMultiplier();
|
||||
|
||||
if(multiplier > 1)
|
||||
{
|
||||
Udiff = m_v.UV.U & 4095;
|
||||
Vdiff = m_v.UV.V & 4095;
|
||||
|
||||
if(Udiff != 0)
|
||||
{
|
||||
if (Udiff >= 4080) {/*printf("U+ %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = -1; }
|
||||
else if (Udiff <= 16) {/*printf("U- %d %d\n", Udiff, m_v.UV.U);*/ Uadjust = 1; }
|
||||
}
|
||||
|
||||
if(Vdiff != 0)
|
||||
{
|
||||
if (Vdiff >= 4080) {/*printf("V+ %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = -1; }
|
||||
else if (Vdiff <= 16) {/*printf("V- %d %d\n", Vdiff, m_v.UV.V);*/ Vadjust = 1; }
|
||||
}
|
||||
|
||||
Udiff = m_v.UV.U & 255;
|
||||
Vdiff = m_v.UV.V & 255;
|
||||
|
||||
if(Udiff != 0)
|
||||
{
|
||||
if (Udiff >= 248) { Uadjust = -1; }
|
||||
else if (Udiff <= 8) { Uadjust = 1; }
|
||||
}
|
||||
|
||||
if(Vdiff != 0)
|
||||
{
|
||||
if (Vdiff >= 248) { Vadjust = -1; }
|
||||
else if (Vdiff <= 8) { Vadjust = 1; }
|
||||
}
|
||||
|
||||
Udiff = m_v.UV.U & 15;
|
||||
Vdiff = m_v.UV.V & 15;
|
||||
|
||||
if(Udiff != 0)
|
||||
{
|
||||
if (Udiff >= 15) { Uadjust = -1; }
|
||||
else if (Udiff <= 1) { Uadjust = 1; }
|
||||
}
|
||||
|
||||
if(Vdiff != 0)
|
||||
{
|
||||
if (Vdiff >= 15) { Vadjust = -1; }
|
||||
else if (Vdiff <= 1) { Vadjust = 1; }
|
||||
}
|
||||
}
|
||||
|
||||
dst.t.x -= (float) Uadjust;
|
||||
dst.t.y -= (float) Vadjust;
|
||||
|
||||
#endif
|
||||
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16());
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.t = GSVector4::loadl(&m_v.ST);
|
||||
t = GSVector4::loadl(&m_v.ST);
|
||||
}
|
||||
}
|
||||
|
||||
dst._c0() = m_v.RGBAQ.u32[0];
|
||||
dst._c1() = m_v.FOG.u32[1];
|
||||
t = t.xyxy(GSVector4::cast(GSVector4i(m_v.RGBAQ.u32[0], m_v.FOG.u32[1])));
|
||||
|
||||
//
|
||||
GSVertexHW9* RESTRICT dst = (GSVertexHW9*)&vertex[index];
|
||||
|
||||
// BaseDrawingKick can never return NULL here because the DrawingKick function
|
||||
// tables route to DrawingKickNull for GS_INVLALID prim types (and that's the only
|
||||
// condition where this function would return NULL).
|
||||
|
||||
int count = 0;
|
||||
|
||||
if(GSVertexHW9* v = DrawingKick<prim>(skip, count))
|
||||
{
|
||||
GSVector4 scissor = m_context->scissor.dx9;
|
||||
|
||||
GSVector4 pmin, pmax;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
pmin = v[0].p;
|
||||
pmax = v[0].p;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
pmin = v[0].p.min(v[1].p);
|
||||
pmax = v[0].p.max(v[1].p);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
pmin = v[0].p.min(v[1].p).min(v[2].p);
|
||||
pmax = v[0].p.max(v[1].p).max(v[2].p);
|
||||
break;
|
||||
}
|
||||
|
||||
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
case GS_SPRITE:
|
||||
test |= pmin == pmax;
|
||||
break;
|
||||
}
|
||||
|
||||
if(test.mask() & 3)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0() = v[2]._c0();}
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
if(PRIM->IIP == 0) {v[0]._c0() = v[1]._c0();}
|
||||
v[0].p.z = v[1].p.z;
|
||||
v[0].p.w = v[1].p.w;
|
||||
v[0]._c1() = v[1]._c1();
|
||||
v[2] = v[1];
|
||||
v[3] = v[1];
|
||||
v[1].p.y = v[0].p.y;
|
||||
v[1].t.y = v[0].t.y;
|
||||
v[2].p.x = v[0].p.x;
|
||||
v[2].t.x = v[0].t.x;
|
||||
v[4] = v[1];
|
||||
v[5] = v[2];
|
||||
count += 4;
|
||||
break;
|
||||
}
|
||||
|
||||
m_count += count;
|
||||
}
|
||||
dst->p = p;
|
||||
dst->t = t;
|
||||
}
|
||||
|
||||
void GSRendererDX9::Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
void GSRendererDX9::Draw()
|
||||
{
|
||||
switch(m_vt.m_primclass)
|
||||
// TODO: remove invisible prims here
|
||||
|
||||
__super::Draw();
|
||||
}
|
||||
|
||||
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
switch(m_vt->m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
|
||||
m_topology = D3DPT_POINTLIST;
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_count);
|
||||
|
||||
break;
|
||||
|
||||
case GS_LINE_CLASS:
|
||||
|
||||
m_topology = D3DPT_LINELIST;
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
|
||||
|
||||
if(PRIM->IIP == 0)
|
||||
{
|
||||
for(size_t i = 0, j = m_index.tail; i < j; i += 2)
|
||||
{
|
||||
uint32 tmp = m_index.buff[i + 0];
|
||||
m_index.buff[i + 0] = m_index.buff[i + 1];
|
||||
m_index.buff[i + 1] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case GS_TRIANGLE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
|
||||
m_topology = D3DPT_TRIANGLELIST;
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
|
||||
|
||||
if(PRIM->IIP == 0)
|
||||
{
|
||||
for(size_t i = 0, j = m_index.tail; i < j; i += 3)
|
||||
{
|
||||
uint32 tmp = m_index.buff[i + 0];
|
||||
m_index.buff[i + 0] = m_index.buff[i + 2];
|
||||
m_index.buff[i + 2] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case GS_SPRITE_CLASS:
|
||||
|
||||
m_topology = D3DPT_TRIANGLELIST;
|
||||
|
||||
// each sprite converted to quad needs twice the space
|
||||
|
||||
while(m_vertex.tail * 2 > m_vertex.maxcount)
|
||||
{
|
||||
GrowVertexBuffer();
|
||||
}
|
||||
|
||||
// assume vertices are tightly packed and sequentially indexed (it should be the case)
|
||||
|
||||
if(m_vertex.tail >= 2)
|
||||
{
|
||||
size_t count = m_vertex.tail;
|
||||
|
||||
int i = (int)count * 2 - 4;
|
||||
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
|
||||
GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4;
|
||||
uint32* RESTRICT index = &m_index.buff[count * 3] - 6;
|
||||
|
||||
for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
||||
{
|
||||
GSVertexHW9 v0 = s[0];
|
||||
GSVertexHW9 v1 = s[1];
|
||||
|
||||
v0.p = v0.p.xyzw(v1.p); // z, q
|
||||
v0.t = v0.t.xyzw(v1.t); // c, f
|
||||
|
||||
q[0] = v0;
|
||||
q[3] = v1;
|
||||
|
||||
// swap x, s
|
||||
|
||||
GSVector4 p = v0.p.insert<0, 0>(v1.p);
|
||||
GSVector4 t = v0.t.insert<0, 0>(v1.t);
|
||||
v1.p = v1.p.insert<0, 0>(v0.p);
|
||||
v1.t = v1.t.insert<0, 0>(v0.t);
|
||||
v0.p = p;
|
||||
v0.t = t;
|
||||
|
||||
q[1] = v0;
|
||||
q[2] = v1;
|
||||
|
||||
index[0] = i + 0;
|
||||
index[1] = i + 1;
|
||||
index[2] = i + 2;
|
||||
index[3] = i + 1;
|
||||
index[4] = i + 2;
|
||||
index[5] = i + 3;
|
||||
}
|
||||
|
||||
m_vertex.head = m_vertex.tail = count * 2;
|
||||
m_index.tail = count * 3;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
|
||||
|
||||
__super::Draw(rt, ds, tex);
|
||||
__super::DrawPrims(rt, ds, tex);
|
||||
}
|
||||
|
||||
void GSRendererDX9::UpdateFBA(GSTexture* rt)
|
||||
|
@ -280,7 +226,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
|
|||
GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f);
|
||||
|
||||
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 dst = src * 2.0f + o.xxxx();
|
||||
|
||||
GSVertexPT1 vertices[] =
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "GSVertexHW.h"
|
||||
#include "GSTextureCache9.h"
|
||||
|
||||
class GSRendererDX9 : public GSRendererDX<GSVertexHW9>
|
||||
class GSRendererDX9 : public GSRendererDX
|
||||
{
|
||||
protected:
|
||||
struct
|
||||
|
@ -34,14 +34,20 @@ protected:
|
|||
Direct3DBlendState9 bs;
|
||||
} m_fba;
|
||||
|
||||
void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(GSVertexHW9* RESTRICT vertex, size_t index);
|
||||
void Draw();
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
void UpdateFBA(GSTexture* rt);
|
||||
|
||||
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;}
|
||||
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;}
|
||||
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];}
|
||||
void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;}
|
||||
|
||||
public:
|
||||
GSRendererDX9();
|
||||
virtual ~GSRendererDX9() {}
|
||||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip);
|
||||
};
|
||||
|
|
|
@ -21,3 +21,910 @@
|
|||
|
||||
#include "stdafx.h"
|
||||
#include "GSRendererHW.h"
|
||||
|
||||
GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc)
|
||||
: GSRenderer(vt, vertex_stride)
|
||||
, m_tc(tc)
|
||||
, m_width(1024)
|
||||
, m_height(1024)
|
||||
, m_skip(0)
|
||||
, m_reset(false)
|
||||
, m_upscale_multiplier(1)
|
||||
{
|
||||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
|
||||
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
|
||||
|
||||
if(!m_nativeres)
|
||||
{
|
||||
m_width = theApp.GetConfig("resx", m_width);
|
||||
m_height = theApp.GetConfig("resy", m_height);
|
||||
|
||||
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier);
|
||||
|
||||
if(m_upscale_multiplier > 6)
|
||||
{
|
||||
m_upscale_multiplier = 1; // use the normal upscale math
|
||||
}
|
||||
else if(m_upscale_multiplier > 1)
|
||||
{
|
||||
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
|
||||
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
|
||||
}
|
||||
}
|
||||
else m_upscale_multiplier = 1;
|
||||
}
|
||||
|
||||
GSRendererHW::~GSRendererHW()
|
||||
{
|
||||
delete m_tc;
|
||||
}
|
||||
|
||||
void GSRendererHW::SetGameCRC(uint32 crc, int options)
|
||||
{
|
||||
GSRenderer::SetGameCRC(crc, options);
|
||||
|
||||
m_hacks.SetGameCRC(m_game);
|
||||
|
||||
if(m_game.title == CRC::JackieChanAdv)
|
||||
{
|
||||
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRendererHW::CanUpscale()
|
||||
{
|
||||
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
|
||||
}
|
||||
|
||||
int GSRendererHW::GetUpscaleMultiplier()
|
||||
{
|
||||
return m_upscale_multiplier;
|
||||
}
|
||||
|
||||
void GSRendererHW::Reset()
|
||||
{
|
||||
// TODO: GSreset can come from the main thread too => crash
|
||||
// m_tc->RemoveAll();
|
||||
|
||||
m_reset = true;
|
||||
|
||||
GSRenderer::Reset();
|
||||
}
|
||||
|
||||
void GSRendererHW::VSync(int field)
|
||||
{
|
||||
GSRenderer::VSync(field);
|
||||
|
||||
m_tc->IncAge();
|
||||
m_dev->AgePool();
|
||||
|
||||
m_skip = 0;
|
||||
|
||||
if(m_reset)
|
||||
{
|
||||
m_tc->RemoveAll();
|
||||
|
||||
m_reset = false;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererHW::ResetDevice()
|
||||
{
|
||||
m_tc->RemoveAll();
|
||||
|
||||
GSRenderer::ResetDevice();
|
||||
}
|
||||
|
||||
GSTexture* GSRendererHW::GetOutput(int i)
|
||||
{
|
||||
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = DISPFB.Block();
|
||||
TEX0.TBW = DISPFB.FBW;
|
||||
TEX0.PSM = DISPFB.PSM;
|
||||
|
||||
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
|
||||
|
||||
GSTexture* t = NULL;
|
||||
|
||||
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
|
||||
{
|
||||
t = rt->m_texture;
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
{
|
||||
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
|
||||
|
||||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
|
||||
}
|
||||
|
||||
void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
|
||||
|
||||
if(clut) return; // FIXME
|
||||
|
||||
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
|
||||
}
|
||||
|
||||
void GSRendererHW::Draw()
|
||||
{
|
||||
if(m_dev->IsLost()) return;
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
|
||||
#ifndef DISABLE_CRC_HACKS
|
||||
|
||||
if(GSRenderer::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
|
||||
|
||||
#endif
|
||||
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
GSDrawingContext* context = m_context;
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = context->FRAME.Block();
|
||||
TEX0.TBW = context->FRAME.FBW;
|
||||
TEX0.PSM = context->FRAME.PSM;
|
||||
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
|
||||
|
||||
TEX0.TBP0 = context->ZBUF.Block();
|
||||
TEX0.TBW = context->FRAME.FBW;
|
||||
TEX0.PSM = context->ZBUF.PSM;
|
||||
|
||||
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
|
||||
|
||||
if(!rt || !ds)
|
||||
{
|
||||
ASSERT(0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
GSTextureCache::Source* tex = NULL;
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
|
||||
|
||||
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
|
||||
|
||||
if(!tex) return;
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven && tex)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
|
||||
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
|
||||
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
|
||||
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
|
||||
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
|
||||
|
||||
tex->m_texture->Save(s, true);
|
||||
|
||||
if(tex->m_palette)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
|
||||
|
||||
tex->m_palette->Save(s, true);
|
||||
}
|
||||
}
|
||||
|
||||
s_n++;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
|
||||
|
||||
rt->m_texture->Save(s);
|
||||
}
|
||||
|
||||
if(s_savez && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
|
||||
|
||||
ds->m_texture->Save(s);
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
|
||||
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// skip alpha test if possible
|
||||
|
||||
GIFRegTEST TEST = context->TEST;
|
||||
GIFRegFRAME FRAME = context->FRAME;
|
||||
GIFRegZBUF ZBUF = context->ZBUF;
|
||||
|
||||
uint32 fm = context->FRAME.FBMSK;
|
||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
|
||||
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
|
||||
{
|
||||
if(GSRenderer::TryAlphaTest(fm, zm))
|
||||
{
|
||||
context->TEST.ATST = ATST_ALWAYS;
|
||||
}
|
||||
}
|
||||
|
||||
context->FRAME.FBMSK = fm;
|
||||
context->ZBUF.ZMSK = zm != 0;
|
||||
|
||||
//
|
||||
|
||||
DrawPrims(rt->m_texture, ds->m_texture, tex);
|
||||
|
||||
//
|
||||
|
||||
context->TEST = TEST;
|
||||
context->FRAME = FRAME;
|
||||
context->ZBUF = ZBUF;
|
||||
|
||||
//
|
||||
|
||||
GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in));
|
||||
|
||||
if(fm != 0xffffffff)
|
||||
{
|
||||
rt->m_valid = rt->m_valid.runion(r);
|
||||
|
||||
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
|
||||
}
|
||||
|
||||
if(zm != 0xffffffff)
|
||||
{
|
||||
ds->m_valid = ds->m_valid.runion(r);
|
||||
|
||||
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
if(m_hacks.m_oo)
|
||||
{
|
||||
(this->*m_hacks.m_oo)();
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
|
||||
|
||||
rt->m_texture->Save(s);
|
||||
}
|
||||
|
||||
if(s_savez && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
|
||||
|
||||
ds->m_texture->Save(s);
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
|
||||
#ifdef DISABLE_HW_TEXTURE_CACHE
|
||||
|
||||
m_tc->Read(rt, r);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// hacks
|
||||
|
||||
GSRendererHW::Hacks::Hacks()
|
||||
: m_oi_map(m_oi_list)
|
||||
, m_oo_map(m_oo_list)
|
||||
, m_cu_map(m_cu_list)
|
||||
, m_oi(NULL)
|
||||
, m_oo(NULL)
|
||||
, m_cu(NULL)
|
||||
{
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
|
||||
|
||||
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2));
|
||||
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
|
||||
|
||||
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
|
||||
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
|
||||
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
|
||||
}
|
||||
|
||||
void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
|
||||
{
|
||||
uint32 hash = (uint32)((game.region << 24) | game.title);
|
||||
|
||||
m_oi = m_oi_map[hash];
|
||||
m_oo = m_oo_map[hash];
|
||||
m_cu = m_cu_map[hash];
|
||||
|
||||
if(game.flags & CRC::PointListPalette)
|
||||
{
|
||||
ASSERT(m_oi == NULL);
|
||||
|
||||
m_oi = &GSRendererHW::OI_PointListPalette;
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
static uint32* video = NULL;
|
||||
static size_t lines = 0;
|
||||
|
||||
if(lines == 0)
|
||||
{
|
||||
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.tail == 448 * 2 || m_vertex.tail == 512 * 2))
|
||||
{
|
||||
lines = m_vertex.tail / 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_vt->m_primclass == GS_POINT_CLASS)
|
||||
{
|
||||
if(m_vertex.tail >= 16 * 512)
|
||||
{
|
||||
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
|
||||
|
||||
if(!video) video = new uint32[512 * 512];
|
||||
|
||||
int ox = m_context->XYOFFSET.OFX;
|
||||
int oy = m_context->XYOFFSET.OFY;
|
||||
|
||||
const uint8* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride)
|
||||
{
|
||||
int x = (GetPosX(v) - ox) >> 4;
|
||||
int y = (GetPosY(v) - oy) >> 4;
|
||||
|
||||
video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
lines = 0;
|
||||
}
|
||||
}
|
||||
else if(m_vt->m_primclass == GS_LINE_CLASS)
|
||||
{
|
||||
if(m_vertex.tail == lines * 2)
|
||||
{
|
||||
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
|
||||
// but we use the stored video data to create a new texture, and replace the lines with two triangles
|
||||
|
||||
m_dev->Recycle(t->m_texture);
|
||||
|
||||
t->m_texture = m_dev->CreateTexture(512, 512);
|
||||
|
||||
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
|
||||
|
||||
memcpy(&m_vertex.buff[m_vertex.stride * 2], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 2)], m_vertex.stride);
|
||||
memcpy(&m_vertex.buff[m_vertex.stride * 3], &m_vertex.buff[m_vertex.stride * (m_vertex.tail - 1)], m_vertex.stride);
|
||||
|
||||
m_index.buff[0] = 0;
|
||||
m_index.buff[1] = 1;
|
||||
m_index.buff[2] = 2;
|
||||
m_index.buff[3] = 1;
|
||||
m_index.buff[4] = 2;
|
||||
m_index.buff[5] = 3;
|
||||
|
||||
m_vertex.head = m_vertex.tail = 4;
|
||||
m_index.tail = 6;
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
|
||||
}
|
||||
else
|
||||
{
|
||||
lines = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 ZBP = m_context->ZBUF.Block();
|
||||
uint32 TBP = m_context->TEX0.TBP0;
|
||||
|
||||
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
|
||||
{
|
||||
// random battle transition (z buffer written directly, clear it now)
|
||||
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
// missing red channel fix (looks alright in pcsx2 r5000+)
|
||||
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = (int)m_vertex.tail; i >= 0; i--, v += m_vertex.stride)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
|
||||
uint32 r = (c >> 0) & 0xff;
|
||||
uint32 g = (c >> 8) & 0xff;
|
||||
uint32 b = (c >> 16) & 0xff;
|
||||
|
||||
if(r == 0 && g != 0 && b != 0)
|
||||
{
|
||||
SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1));
|
||||
}
|
||||
}
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
|
||||
{
|
||||
// z buffer clear
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = FBP;
|
||||
TEX0.TBW = FBW;
|
||||
TEX0.PSM = FPSM;
|
||||
|
||||
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
|
||||
{
|
||||
m_dev->ClearDepth(ds->m_texture, 0);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
|
||||
{
|
||||
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
|
||||
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
|
||||
// how? by using a render target that overlaps with the lower half of the z buffer...
|
||||
|
||||
// TODO: tony hawk pro skater 4 same problem, the empty half is not visible though, painted over fully
|
||||
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 ZBP = m_context->ZBUF.Block();
|
||||
|
||||
if(FBP == 0x008c0 && ZBP == 0x01a40)
|
||||
{
|
||||
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = ZBP;
|
||||
TEX0.TBW = m_context->FRAME.FBW;
|
||||
TEX0.PSM = m_context->FRAME.PSM;
|
||||
|
||||
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
|
||||
{
|
||||
m_dev->ClearRenderTarget(rt->m_texture, 0);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
|
||||
{
|
||||
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = FBP;
|
||||
TEX0.TBW = m_context->FRAME.FBW;
|
||||
TEX0.PSM = m_context->ZBUF.PSM;
|
||||
|
||||
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
|
||||
{
|
||||
m_dev->ClearDepth(ds->m_texture, 0);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
|
||||
{
|
||||
//only top half of the screen clears
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z)
|
||||
{
|
||||
m_context->TEST.ZTST = ZTST_ALWAYS;
|
||||
//m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
|
||||
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
|
||||
{
|
||||
if(m_vertex.tail == 16)
|
||||
{
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = 0; i < 16; i++, v += m_vertex.stride)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 a = c >> 24;
|
||||
|
||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||
|
||||
SetColor(v, c);
|
||||
|
||||
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
|
||||
}
|
||||
|
||||
m_mem.m_clut.Invalidate();
|
||||
|
||||
return false;
|
||||
}
|
||||
else if(m_vertex.tail == 256)
|
||||
{
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = 0; i < 256; i++, v += m_vertex.stride)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 a = c >> 24;
|
||||
|
||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||
|
||||
SetColor(v, c);
|
||||
|
||||
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
|
||||
}
|
||||
|
||||
m_mem.m_clut.Invalidate();
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSRendererHW::OO_DBZBT2()
|
||||
{
|
||||
// palette readback (cannot detect yet, when fetching the texture later)
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 TBP0 = m_context->TEX0.TBP0;
|
||||
|
||||
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
|
||||
{
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
|
||||
BITBLTBUF.SBP = FBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = PSM_PSMCT32;
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererHW::OO_MajokkoALaMode2()
|
||||
{
|
||||
// palette readback
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
if(!PRIM->TME && FBP == 0x03f40)
|
||||
{
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
|
||||
BITBLTBUF.SBP = FBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = PSM_PSMCT32;
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRendererHW::CU_DBZBT2()
|
||||
{
|
||||
// palette should stay 64 x 64
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
return FBP != 0x03c00 && FBP != 0x03ac0;
|
||||
}
|
||||
|
||||
bool GSRendererHW::CU_MajokkoALaMode2()
|
||||
{
|
||||
// palette should stay 16 x 16
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
return FBP != 0x03f40;
|
||||
}
|
||||
|
||||
bool GSRendererHW::CU_TalesOfAbyss()
|
||||
{
|
||||
// full image blur and brightening
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
|
||||
}
|
||||
|
|
|
@ -26,28 +26,8 @@
|
|||
#include "GSCrc.h"
|
||||
#include "GSFunctionMap.h"
|
||||
|
||||
|
||||
template<class Vertex>
|
||||
class GSRendererHW : public GSRendererT<Vertex>
|
||||
class GSRendererHW : public GSRenderer
|
||||
{
|
||||
protected:
|
||||
using GSRendererT<Vertex>::m_vt;
|
||||
using GSRendererT<Vertex>::m_count;
|
||||
using GSRendererT<Vertex>::m_env;
|
||||
using GSRendererT<Vertex>::m_context;
|
||||
using GSRendererT<Vertex>::m_vertices;
|
||||
using GSRendererT<Vertex>::m_dev;
|
||||
using GSRendererT<Vertex>::PRIM;
|
||||
using GSRendererT<Vertex>::m_mem;
|
||||
using GSRendererT<Vertex>::m_regs;
|
||||
using GSRendererT<Vertex>::m_perfmon;
|
||||
using GSRendererT<Vertex>::m_game;
|
||||
using GSRendererT<Vertex>::s_dump;
|
||||
using GSRendererT<Vertex>::s_save;
|
||||
using GSRendererT<Vertex>::s_saven;
|
||||
using GSRendererT<Vertex>::s_savez;
|
||||
using GSRendererT<Vertex>::s_n;
|
||||
|
||||
private:
|
||||
int m_width;
|
||||
int m_height;
|
||||
|
@ -56,512 +36,36 @@ private:
|
|||
bool m_nativeres;
|
||||
int m_upscale_multiplier;
|
||||
int m_userhacks_skipdraw;
|
||||
|
||||
|
||||
#pragma region hacks
|
||||
|
||||
typedef bool (GSRendererHW::*OI_Ptr)(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
typedef void (GSRendererHW::*OO_Ptr)();
|
||||
typedef bool (GSRendererHW::*CU_Ptr)();
|
||||
|
||||
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
static uint32* video = NULL;
|
||||
static int lines = 0;
|
||||
|
||||
if(lines == 0)
|
||||
{
|
||||
if(m_vt.m_primclass == GS_LINE_CLASS && (m_count == 448 * 2 || m_count == 512 * 2))
|
||||
{
|
||||
lines = m_count / 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_vt.m_primclass == GS_POINT_CLASS)
|
||||
{
|
||||
if(m_count >= 16 * 512)
|
||||
{
|
||||
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
|
||||
|
||||
if(!video) video = new uint32[512 * 512];
|
||||
|
||||
int ox = m_context->XYOFFSET.OFX;
|
||||
int oy = m_context->XYOFFSET.OFY;
|
||||
|
||||
for(int i = 0; i < m_count; i++)
|
||||
{
|
||||
int x = ((int)m_vertices[i].p.x - ox) >> 4;
|
||||
int y = ((int)m_vertices[i].p.y - oy) >> 4;
|
||||
|
||||
// video[y * 448 + x] = m_vertices[i].c0;
|
||||
video[(y << 8) + (y << 7) + (y << 6) + x] = m_vertices[i]._c0();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
lines = 0;
|
||||
}
|
||||
}
|
||||
else if(m_vt.m_primclass == GS_LINE_CLASS)
|
||||
{
|
||||
if(m_count == lines * 2)
|
||||
{
|
||||
// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
|
||||
// but we use the stored video data to create a new texture, and replace the lines with two triangles
|
||||
|
||||
m_dev->Recycle(t->m_texture);
|
||||
|
||||
t->m_texture = m_dev->CreateTexture(512, 512);
|
||||
|
||||
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
|
||||
|
||||
m_vertices[0] = m_vertices[0];
|
||||
m_vertices[1] = m_vertices[1];
|
||||
m_vertices[2] = m_vertices[m_count - 2];
|
||||
m_vertices[3] = m_vertices[1];
|
||||
m_vertices[4] = m_vertices[2];
|
||||
m_vertices[5] = m_vertices[m_count - 1];
|
||||
|
||||
m_count = 6;
|
||||
|
||||
m_vt.Update(m_vertices, m_count, GS_TRIANGLE_CLASS);
|
||||
}
|
||||
else
|
||||
{
|
||||
lines = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 ZBP = m_context->ZBUF.Block();
|
||||
uint32 TBP = m_context->TEX0.TBP0;
|
||||
|
||||
if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
|
||||
{
|
||||
// random battle transition (z buffer written directly, clear it now)
|
||||
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
// missing red channel fix
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
{
|
||||
if(m_vertices[i]._r() == 0 && m_vertices[i]._g() != 0 && m_vertices[i]._b() != 0)
|
||||
{
|
||||
m_vertices[i]._r() = (m_vertices[i]._g() + m_vertices[i]._b()) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
m_vt.Update(m_vertices, m_count, m_vt.m_primclass);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x00f00 || FBP == 0x00100 || FBP == 0x01280) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100, ntsc "HD" 0x1280
|
||||
{
|
||||
// z buffer clear
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = FBP;
|
||||
TEX0.TBW = FBW;
|
||||
TEX0.PSM = FPSM;
|
||||
|
||||
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
|
||||
{
|
||||
m_dev->ClearDepth(ds->m_texture, 0);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x01500 || FBP == 0x01800) && FPSM == PSM_PSMZ24) //0x1800 pal, 0x1500 ntsc
|
||||
{
|
||||
// instead of just simply drawing a full height 512x512 sprite to clear the z buffer,
|
||||
// it uses a 512x256 sprite only, yet it is still able to fill the whole surface with zeros,
|
||||
// how? by using a render target that overlaps with the lower half of the z buffer...
|
||||
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 ZBP = m_context->ZBUF.Block();
|
||||
|
||||
if(FBP == 0x008c0 && ZBP == 0x01a40)
|
||||
{
|
||||
// frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = ZBP;
|
||||
TEX0.TBW = m_context->FRAME.FBW;
|
||||
TEX0.PSM = m_context->FRAME.PSM;
|
||||
|
||||
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true))
|
||||
{
|
||||
m_dev->ClearRenderTarget(rt->m_texture, 0);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180)
|
||||
{
|
||||
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = FBP;
|
||||
TEX0.TBW = m_context->FRAME.FBW;
|
||||
TEX0.PSM = m_context->ZBUF.PSM;
|
||||
|
||||
if(GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true))
|
||||
{
|
||||
m_dev->ClearDepth(ds->m_texture, 0);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x025a0 || FBP == 0x02800) && FPSM == PSM_PSMCT32) //0x2800 pal, 0x25a0 ntsc
|
||||
{
|
||||
//only top half of the screen clears
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if((FBP == 0x02800 || FBP == 0x02BC0) && FPSM == PSM_PSMCT24) //0x2800 pal, 0x2bc0 ntsc
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x02300 || FBP == 0x03fc0) && FPSM == PSM_PSMCT32)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(FBP == 0x02000 && FPSM == PSM_PSMZ24)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_max.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(FBP == 0x0 && FPSM == PSM_PSMCT16)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(FBP == 0x02300 && FPSM == PSM_PSMZ24)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if(FPSM == PSM_PSMCT24 && (FBP == 0x02800 || FBP == 0x02bc0)) //0x2800 pal, 0x2bc0 ntsc
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if(!PRIM->TME)
|
||||
{
|
||||
if(FPSM == PSM_PSMCT24 && FBP == 0x2bc0)
|
||||
{
|
||||
//half height buffer clear
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_max.p.z == m_vt.m_min.p.z && m_vt.m_min.p.z == 0x0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_max.p.z == m_vt.m_min.p.z)
|
||||
{
|
||||
m_context->TEST.ZTST = ZTST_ALWAYS;
|
||||
//m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
|
||||
if(FBP >= 0x03f40 && (FBP & 0x1f) == 0)
|
||||
{
|
||||
if(m_count == 16)
|
||||
{
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
uint8 a = m_vertices[i]._a();
|
||||
|
||||
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
|
||||
|
||||
m_mem.WritePixel32(i & 7, i >> 3, m_vertices[i]._c0(), FBP, FBW);
|
||||
}
|
||||
|
||||
m_mem.m_clut.Invalidate();
|
||||
|
||||
return false;
|
||||
}
|
||||
else if(m_count == 256)
|
||||
{
|
||||
for(int i = 0; i < 256; i++)
|
||||
{
|
||||
uint8 a = m_vertices[i]._a();
|
||||
|
||||
m_vertices[i]._a() = a >= 0x80 ? 0xff : a * 2;
|
||||
|
||||
m_mem.WritePixel32(i & 15, i >> 4, m_vertices[i]._c0(), FBP, FBW);
|
||||
}
|
||||
|
||||
m_mem.m_clut.Invalidate();
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void OO_DBZBT2()
|
||||
{
|
||||
// palette readback (cannot detect yet, when fetching the texture later)
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 TBP0 = m_context->TEX0.TBP0;
|
||||
|
||||
if(PRIM->TME && (FBP == 0x03c00 && TBP0 == 0x03c80 || FBP == 0x03ac0 && TBP0 == 0x03b40))
|
||||
{
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
|
||||
BITBLTBUF.SBP = FBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = PSM_PSMCT32;
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 64, 64));
|
||||
}
|
||||
}
|
||||
|
||||
void OO_MajokkoALaMode2()
|
||||
{
|
||||
// palette readback
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
if(!PRIM->TME && FBP == 0x03f40)
|
||||
{
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
|
||||
BITBLTBUF.SBP = FBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = PSM_PSMCT32;
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, GSVector4i(0, 0, 16, 16));
|
||||
}
|
||||
}
|
||||
|
||||
bool CU_DBZBT2()
|
||||
{
|
||||
// palette should stay 64 x 64
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
return FBP != 0x03c00 && FBP != 0x03ac0;
|
||||
}
|
||||
|
||||
bool CU_MajokkoALaMode2()
|
||||
{
|
||||
// palette should stay 16 x 16
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
return FBP != 0x03f40;
|
||||
}
|
||||
|
||||
bool CU_TalesOfAbyss()
|
||||
{
|
||||
// full image blur and brightening
|
||||
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
|
||||
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
|
||||
}
|
||||
bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_GodOfWar2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_SimpsonsGame(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_SpidermanWoS(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_TyTasmanianTiger(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_DigimonRumbleArena2(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_BlackHawkDown(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_XmenOriginsWolverine(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_CallofDutyFinalFronts(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
bool OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t);
|
||||
void OO_DBZBT2();
|
||||
void OO_MajokkoALaMode2();
|
||||
|
||||
bool CU_DBZBT2();
|
||||
bool CU_MajokkoALaMode2();
|
||||
bool CU_TalesOfAbyss();
|
||||
|
||||
class Hacks
|
||||
{
|
||||
|
@ -617,383 +121,37 @@ private:
|
|||
OO_Ptr m_oo;
|
||||
CU_Ptr m_cu;
|
||||
|
||||
Hacks()
|
||||
: m_oi_map(m_oi_list)
|
||||
, m_oo_map(m_oo_list)
|
||||
, m_cu_map(m_cu_list)
|
||||
, m_oi(NULL)
|
||||
, m_oo(NULL)
|
||||
, m_cu(NULL)
|
||||
{
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFXII, CRC::EU, &GSRendererHW::OI_FFXII));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::FFX, CRC::RegionCount, &GSRendererHW::OI_FFX));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::MetalSlug6, CRC::RegionCount, &GSRendererHW::OI_MetalSlug6));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::GodOfWar2, CRC::RegionCount, &GSRendererHW::OI_GodOfWar2));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SimpsonsGame, CRC::RegionCount, &GSRendererHW::OI_SimpsonsGame));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::RozenMaidenGebetGarden, CRC::RegionCount, &GSRendererHW::OI_RozenMaidenGebetGarden));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpidermanWoS, CRC::RegionCount, &GSRendererHW::OI_SpidermanWoS));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TyTasmanianTiger2, CRC::RegionCount, &GSRendererHW::OI_TyTasmanianTiger));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::DigimonRumbleArena2, CRC::RegionCount, &GSRendererHW::OI_DigimonRumbleArena2));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::StarWarsForceUnleashed, CRC::RegionCount, &GSRendererHW::OI_StarWarsForceUnleashed));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::BlackHawkDown, CRC::RegionCount, &GSRendererHW::OI_BlackHawkDown));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::XmenOriginsWolverine, CRC::RegionCount, &GSRendererHW::OI_XmenOriginsWolverine));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::CallofDutyFinalFronts, CRC::RegionCount, &GSRendererHW::OI_CallofDutyFinalFronts));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroNewBeginning, CRC::RegionCount, &GSRendererHW::OI_SpyroNewBeginning));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::SpyroEternalNight, CRC::RegionCount, &GSRendererHW::OI_SpyroEternalNight));
|
||||
m_oi_list.push_back(HackEntry<OI_Ptr>(CRC::TalesOfLegendia, CRC::RegionCount, &GSRendererHW::OI_TalesOfLegendia));
|
||||
Hacks();
|
||||
|
||||
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::OO_DBZBT2));
|
||||
m_oo_list.push_back(HackEntry<OO_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::OO_MajokkoALaMode2));
|
||||
|
||||
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::DBZBT2, CRC::RegionCount, &GSRendererHW::CU_DBZBT2));
|
||||
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::MajokkoALaMode2, CRC::RegionCount, &GSRendererHW::CU_MajokkoALaMode2));
|
||||
m_cu_list.push_back(HackEntry<CU_Ptr>(CRC::TalesOfAbyss, CRC::RegionCount, &GSRendererHW::CU_TalesOfAbyss));
|
||||
}
|
||||
|
||||
void SetGame(const CRC::Game& game)
|
||||
{
|
||||
uint32 hash = (uint32)((game.region << 24) | game.title);
|
||||
|
||||
m_oi = m_oi_map[hash];
|
||||
m_oo = m_oo_map[hash];
|
||||
m_cu = m_cu_map[hash];
|
||||
|
||||
if(game.flags & CRC::PointListPalette)
|
||||
{
|
||||
ASSERT(m_oi == NULL);
|
||||
|
||||
m_oi = &GSRendererHW::OI_PointListPalette;
|
||||
}
|
||||
}
|
||||
void SetGameCRC(const CRC::Game& game);
|
||||
|
||||
} m_hacks;
|
||||
|
||||
virtual int GetPosX(const void* vertex) const = 0;
|
||||
virtual int GetPosY(const void* vertex) const = 0;
|
||||
virtual uint32 GetColor(const void* vertex) const = 0;
|
||||
virtual void SetColor(void* vertex, uint32 c) const = 0;
|
||||
|
||||
#pragma endregion
|
||||
|
||||
protected:
|
||||
GSTextureCache* m_tc;
|
||||
|
||||
void Reset()
|
||||
{
|
||||
// TODO: GSreset can come from the main thread too => crash
|
||||
// m_tc->RemoveAll();
|
||||
|
||||
m_reset = true;
|
||||
|
||||
GSRendererT<Vertex>::Reset();
|
||||
}
|
||||
|
||||
void VSync(int field)
|
||||
{
|
||||
GSRendererT<Vertex>::VSync(field);
|
||||
|
||||
m_tc->IncAge();
|
||||
m_dev->AgePool();
|
||||
|
||||
m_skip = 0;
|
||||
|
||||
if(m_reset)
|
||||
{
|
||||
m_tc->RemoveAll();
|
||||
|
||||
m_reset = false;
|
||||
}
|
||||
}
|
||||
|
||||
void ResetDevice()
|
||||
{
|
||||
m_tc->RemoveAll();
|
||||
|
||||
GSRendererT<Vertex>::ResetDevice();
|
||||
}
|
||||
|
||||
GSTexture* GetOutput(int i)
|
||||
{
|
||||
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = DISPFB.Block();
|
||||
TEX0.TBW = DISPFB.FBW;
|
||||
TEX0.PSM = DISPFB.PSM;
|
||||
|
||||
// TRACE(_T("[%d] GetOutput %d %05x (%d)\n"), (int)m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
|
||||
|
||||
GSTexture* t = NULL;
|
||||
|
||||
if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height))
|
||||
{
|
||||
t = rt->m_texture;
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
t->Save(format("c:\\temp2\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM));
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
{
|
||||
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);
|
||||
|
||||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
|
||||
}
|
||||
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false)
|
||||
{
|
||||
// printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM);
|
||||
|
||||
if(clut) return; // FIXME
|
||||
|
||||
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
|
||||
}
|
||||
|
||||
void Draw()
|
||||
{
|
||||
#ifndef DISABLE_CRC_HACKS
|
||||
if(GSRendererT<Vertex>::IsBadFrame(m_skip, m_userhacks_skipdraw)) return;
|
||||
#endif
|
||||
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
GSDrawingContext* context = m_context;
|
||||
|
||||
GIFRegTEX0 TEX0;
|
||||
|
||||
TEX0.TBP0 = context->FRAME.Block();
|
||||
TEX0.TBW = context->FRAME.FBW;
|
||||
TEX0.PSM = context->FRAME.PSM;
|
||||
GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true);
|
||||
|
||||
TEX0.TBP0 = context->ZBUF.Block();
|
||||
TEX0.TBW = context->FRAME.FBW;
|
||||
TEX0.PSM = context->ZBUF.PSM;
|
||||
|
||||
GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite());
|
||||
|
||||
if(!rt || !ds)
|
||||
{
|
||||
ASSERT(0);
|
||||
return;
|
||||
}
|
||||
|
||||
GSTextureCache::Source* tex = NULL;
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
|
||||
|
||||
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
|
||||
|
||||
if(!tex) return;
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven && tex)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_tex_%05x_%d_%d%d_%02x_%02x_%02x_%02x.dds",
|
||||
s_n, frame, (int)context->TEX0.TBP0, (int)context->TEX0.PSM,
|
||||
(int)context->CLAMP.WMS, (int)context->CLAMP.WMT,
|
||||
(int)context->CLAMP.MINU, (int)context->CLAMP.MAXU,
|
||||
(int)context->CLAMP.MINV, (int)context->CLAMP.MAXV);
|
||||
|
||||
tex->m_texture->Save(s, true);
|
||||
|
||||
if(tex->m_palette)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_tpx_%05x_%d.dds", s_n, frame, context->TEX0.CBP, context->TEX0.CPSM);
|
||||
|
||||
tex->m_palette->Save(s, true);
|
||||
}
|
||||
}
|
||||
|
||||
s_n++;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
|
||||
|
||||
rt->m_texture->Save(s);
|
||||
}
|
||||
|
||||
if(s_savez && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
|
||||
|
||||
ds->m_texture->Save(s);
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
|
||||
if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// skip alpha test if possible
|
||||
|
||||
GIFRegTEST TEST = context->TEST;
|
||||
GIFRegFRAME FRAME = context->FRAME;
|
||||
GIFRegZBUF ZBUF = context->ZBUF;
|
||||
|
||||
uint32 fm = context->FRAME.FBMSK;
|
||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
|
||||
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
|
||||
{
|
||||
if(GSRendererT<Vertex>::TryAlphaTest(fm, zm))
|
||||
{
|
||||
context->TEST.ATST = ATST_ALWAYS;
|
||||
}
|
||||
}
|
||||
|
||||
context->FRAME.FBMSK = fm;
|
||||
context->ZBUF.ZMSK = zm != 0;
|
||||
|
||||
//
|
||||
|
||||
Draw(rt->m_texture, ds->m_texture, tex);
|
||||
|
||||
//
|
||||
|
||||
context->TEST = TEST;
|
||||
context->FRAME = FRAME;
|
||||
context->ZBUF = ZBUF;
|
||||
|
||||
//
|
||||
|
||||
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
|
||||
|
||||
if(fm != 0xffffffff)
|
||||
{
|
||||
rt->m_valid = rt->m_valid.runion(r);
|
||||
|
||||
m_tc->InvalidateVideoMem(context->offset.fb, r, false);
|
||||
}
|
||||
|
||||
if(zm != 0xffffffff)
|
||||
{
|
||||
ds->m_valid = ds->m_valid.runion(r);
|
||||
|
||||
m_tc->InvalidateVideoMem(context->offset.zb, r, false);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
if(m_hacks.m_oo)
|
||||
{
|
||||
(this->*m_hacks.m_oo)();
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM);
|
||||
|
||||
rt->m_texture->Save(s);
|
||||
}
|
||||
|
||||
if(s_savez && s_n >= s_saven)
|
||||
{
|
||||
s = format("c:\\temp2\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, context->ZBUF.Block(), context->ZBUF.PSM);
|
||||
|
||||
ds->m_texture->Save(s);
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
#ifdef DISABLE_HW_TEXTURE_CACHE
|
||||
m_tc->Read(rt, r);
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void Draw(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
|
||||
|
||||
bool CanUpscale()
|
||||
{
|
||||
if(m_hacks.m_cu && !(this->*m_hacks.m_cu)())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition)
|
||||
}
|
||||
|
||||
int GetUpscaleMultiplier()
|
||||
{
|
||||
return m_upscale_multiplier;
|
||||
}
|
||||
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
|
||||
|
||||
public:
|
||||
GSRendererHW(GSTextureCache* tc)
|
||||
: GSRendererT<Vertex>()
|
||||
, m_tc(tc)
|
||||
, m_width(1024)
|
||||
, m_height(1024)
|
||||
, m_skip(0)
|
||||
, m_reset(false)
|
||||
, m_upscale_multiplier(1)
|
||||
{
|
||||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", 1);
|
||||
m_userhacks_skipdraw = theApp.GetConfig("UserHacks_SkipDraw", 0);
|
||||
GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc);
|
||||
virtual ~GSRendererHW();
|
||||
|
||||
if(!m_nativeres)
|
||||
{
|
||||
m_width = theApp.GetConfig("resx", m_width);
|
||||
m_height = theApp.GetConfig("resy", m_height);
|
||||
void SetGameCRC(uint32 crc, int options);
|
||||
bool CanUpscale();
|
||||
int GetUpscaleMultiplier();
|
||||
|
||||
m_upscale_multiplier = theApp.GetConfig("upscale_multiplier", m_upscale_multiplier);
|
||||
|
||||
if(m_upscale_multiplier > 6)
|
||||
{
|
||||
m_upscale_multiplier = 1; // use the normal upscale math
|
||||
}
|
||||
else if(m_upscale_multiplier > 1)
|
||||
{
|
||||
m_width = 640 * m_upscale_multiplier; // 512 is also common, but this is not always detected right.
|
||||
m_height = 512 * m_upscale_multiplier; // 448 is also common, but this is not always detected right.
|
||||
}
|
||||
}
|
||||
else m_upscale_multiplier = 1;
|
||||
}
|
||||
|
||||
virtual ~GSRendererHW()
|
||||
{
|
||||
delete m_tc;
|
||||
}
|
||||
|
||||
void SetGameCRC(uint32 crc, int options)
|
||||
{
|
||||
GSRendererT<Vertex>::SetGameCRC(crc, options);
|
||||
|
||||
m_hacks.SetGame(m_game);
|
||||
|
||||
if(m_game.title == CRC::JackieChanAdv)
|
||||
{
|
||||
m_width = 1280; // TODO: uses a 1280px wide 16 bit render target, but this only fixes half of the problem
|
||||
}
|
||||
}
|
||||
void Reset();
|
||||
void VSync(int field);
|
||||
void ResetDevice();
|
||||
GSTexture* GetOutput(int i);
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
||||
void Draw();
|
||||
};
|
||||
|
|
|
@ -23,26 +23,33 @@
|
|||
|
||||
#include "GSRenderer.h"
|
||||
|
||||
class GSRendererNull : public GSRendererT<GSVertexNull>
|
||||
class GSRendererNull : public GSRenderer
|
||||
{
|
||||
class GSVertexTraceNull : public GSVertexTrace
|
||||
{
|
||||
public:
|
||||
GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {}
|
||||
};
|
||||
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index)
|
||||
{
|
||||
}
|
||||
|
||||
void Draw()
|
||||
{
|
||||
}
|
||||
|
||||
GSTexture* GetOutput(int i)
|
||||
GSTexture* GetOutput(int i)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
public:
|
||||
GSRendererNull()
|
||||
: GSRendererT<GSVertexNull>()
|
||||
{
|
||||
InitVertexKick(GSRendererNull);
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst> void VertexKick(bool skip)
|
||||
GSRendererNull()
|
||||
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertexNull))
|
||||
{
|
||||
InitConvertVertex(GSRendererNull);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -25,9 +25,11 @@
|
|||
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
|
||||
GSRendererSW::GSRendererSW(int threads)
|
||||
: m_fzb(NULL)
|
||||
: GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW))
|
||||
, m_fzb(NULL)
|
||||
{
|
||||
InitVertexKick(GSRendererSW);
|
||||
InitConvertVertex(GSRendererSW);
|
||||
InitConvertIndex();
|
||||
|
||||
m_tc = new GSTextureCacheSW(this);
|
||||
|
||||
|
@ -62,7 +64,7 @@ void GSRendererSW::Reset()
|
|||
|
||||
m_reset = true;
|
||||
|
||||
GSRendererT<GSVertexSW>::Reset();
|
||||
GSRenderer::Reset();
|
||||
}
|
||||
|
||||
void GSRendererSW::VSync(int field)
|
||||
|
@ -88,7 +90,7 @@ void GSRendererSW::VSync(int field)
|
|||
printf("m_syncpoint_count = %d\n", ((GSRasterizerList*)m_rl)->m_syncpoint_count); ((GSRasterizerList*)m_rl)->m_syncpoint_count = 0;
|
||||
printf("m_solidrect_count = %d\n", ((GSRasterizerList*)m_rl)->m_solidrect_count); ((GSRasterizerList*)m_rl)->m_solidrect_count = 0;
|
||||
*/
|
||||
GSRendererT<GSVertexSW>::VSync(field);
|
||||
GSRenderer::VSync(field);
|
||||
|
||||
m_tc->IncAge();
|
||||
|
||||
|
@ -149,41 +151,187 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
|||
return m_texture[i];
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererSW::ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index)
|
||||
{
|
||||
GSVertexSW* RESTRICT v = &vertex[index];
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - m_context->XYOFFSET;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
v->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
|
||||
if(tme)
|
||||
{
|
||||
GSVector4 t;
|
||||
|
||||
if(fst)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(m_v.UV.u32[0]).upl16() << (16 - 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
|
||||
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
|
||||
}
|
||||
|
||||
v->t = t;
|
||||
}
|
||||
|
||||
v->c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
|
||||
|
||||
if(prim == GS_SPRITE)
|
||||
{
|
||||
v->t.u32[3] = m_v.XYZ.Z;
|
||||
}
|
||||
}
|
||||
|
||||
template<uint32 prim>
|
||||
size_t GSRendererSW::ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count)
|
||||
{
|
||||
// memcpy(dst, src, sizeof(uint32) * count); return;
|
||||
|
||||
// TODO: IsQuad
|
||||
|
||||
const GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
|
||||
|
||||
GSVector4 scissor = m_context->scissor.ex;
|
||||
|
||||
const uint32* src_end = src + count;
|
||||
uint32* dst_base = dst;
|
||||
|
||||
while(src < src_end)
|
||||
{
|
||||
GSVector4 pmin, pmax;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
pmin = v[src[0]].p;
|
||||
pmax = v[src[0]].p;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
pmin = v[src[0]].p.min(v[src[1]].p);
|
||||
pmax = v[src[0]].p.max(v[src[1]].p);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
pmin = v[src[0]].p.min(v[src[1]].p).min(v[src[2]].p);
|
||||
pmax = v[src[0]].p.max(v[src[1]].p).max(v[src[2]].p);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||
GSVector4 tmp;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
// are in line or just two of them are the same (cross product == 0)
|
||||
tmp = (v[src[1]].p - v[src[0]].p) * (v[src[2]].p - v[src[0]].p).yxwz();
|
||||
test |= tmp == tmp.yxwz();
|
||||
break;
|
||||
}
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
case GS_SPRITE:
|
||||
test |= pmin.ceil() == pmax.ceil();
|
||||
break;
|
||||
}
|
||||
|
||||
bool pass = test.xyxy().allfalse();
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
if(pass) {dst[0] = src[0]; dst++;}
|
||||
src++;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst += 2;}
|
||||
src += 2;
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
if(pass) {dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst += 3;}
|
||||
src += 3;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
return dst - dst_base;
|
||||
}
|
||||
|
||||
void GSRendererSW::UpdateVertexKick()
|
||||
{
|
||||
GSRenderer::UpdateVertexKick();
|
||||
|
||||
m_cif = m_ci[PRIM->PRIM];
|
||||
}
|
||||
|
||||
void GSRendererSW::Draw()
|
||||
{
|
||||
if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
|
||||
|
||||
GSVector4i scissor = GSVector4i(m_context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
|
||||
scissor.z = std::min<int>(scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
|
||||
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
shared_ptr<GSRasterizerData> data(new GSRasterizerData2(this));
|
||||
|
||||
data->primclass = GSUtil::GetPrimClass(PRIM->PRIM);
|
||||
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.tail + sizeof(uint32) * m_index.tail, 32);
|
||||
data->vertex = (GSVertexSW*)data->buff;
|
||||
data->vertex_count = m_vertex.tail;
|
||||
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.tail);
|
||||
data->index_count = (this->*m_cif)(data->index, m_index.buff, m_index.tail);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::PrimNotRendered, (m_index.tail - data->index_count) / GSUtil::GetVertexCount(PRIM->PRIM));
|
||||
|
||||
if(data->index_count == 0) return;
|
||||
|
||||
// TODO: merge these
|
||||
|
||||
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.tail);
|
||||
|
||||
m_vt->Update(data->vertex, data->index, data->index_count, data->primclass);
|
||||
|
||||
//
|
||||
|
||||
GSRasterizerData2* data2 = (GSRasterizerData2*)data.get();
|
||||
|
||||
if(!GetScanlineGlobalData(data2))
|
||||
{
|
||||
return;
|
||||
}
|
||||
if(!GetScanlineGlobalData(data2)) return;
|
||||
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)data->param;
|
||||
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
|
||||
|
||||
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
|
||||
|
||||
data->scissor = scissor;
|
||||
data->bbox = bbox;
|
||||
data->primclass = m_vt.m_primclass;
|
||||
data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); // TODO: detach m_vertices and reallocate later?
|
||||
memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); // TODO: m_vt.Update fetches all the vertices already, could also store them here
|
||||
data->count = m_count;
|
||||
data->solidrect = gd->sel.IsSolidRect();
|
||||
data->frame = m_perfmon.GetFrame();
|
||||
|
||||
//
|
||||
|
||||
vector<uint32>* fb_pages = NULL;
|
||||
vector<uint32>* zb_pages = NULL;
|
||||
uint32* fb_pages = NULL;
|
||||
uint32* zb_pages = NULL;
|
||||
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
|
@ -215,9 +363,9 @@ void GSRendererSW::Draw()
|
|||
{
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
for(vector<uint32>::iterator i = fb_pages->begin(); i != fb_pages->end(); i++)
|
||||
for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
if(m_fzb_pages[*i] & 0xffff0000) // already used as a z-buffer
|
||||
if(m_fzb_pages[*p] & 0xffff0000) // already used as a z-buffer
|
||||
{
|
||||
data->syncpoint = true;
|
||||
|
||||
|
@ -231,9 +379,9 @@ void GSRendererSW::Draw()
|
|||
{
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
for(vector<uint32>::iterator i = zb_pages->begin(); i != zb_pages->end(); i++)
|
||||
for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
if(m_fzb_pages[*i] & 0x0000ffff) // already used as a frame buffer
|
||||
if(m_fzb_pages[*p] & 0x0000ffff) // already used as a frame buffer
|
||||
{
|
||||
data->syncpoint = true;
|
||||
|
||||
|
@ -307,18 +455,6 @@ void GSRendererSW::Draw()
|
|||
m_rl->Queue(data);
|
||||
}
|
||||
|
||||
int prims = 0;
|
||||
|
||||
switch(data->primclass)
|
||||
{
|
||||
case GS_POINT_CLASS: prims = data->count; break;
|
||||
case GS_LINE_CLASS: prims = data->count / 2; break;
|
||||
case GS_TRIANGLE_CLASS: prims = data->count / 3; break;
|
||||
case GS_SPRITE_CLASS: prims = data->count / 2; break;
|
||||
}
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Prim, prims);
|
||||
|
||||
/*
|
||||
if(0)//stats.ticks > 5000000)
|
||||
{
|
||||
|
@ -344,15 +480,15 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
vector<uint32>* pages = o->GetPages(r);
|
||||
uint32* pages = o->GetPages(r);
|
||||
|
||||
m_tc->InvalidatePages(pages, o->psm);
|
||||
|
||||
// check if the changing pages either used as a texture or a target
|
||||
|
||||
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
uint32 page = *i;
|
||||
uint32 page = *p;
|
||||
|
||||
//while(m_fzb_pages[page] | m_tex_pages[page]) _mm_pause();
|
||||
|
||||
|
@ -364,20 +500,20 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
}
|
||||
}
|
||||
|
||||
delete pages;
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||
|
||||
vector<uint32>* pages = o->GetPages(r);
|
||||
uint32* pages = o->GetPages(r);
|
||||
|
||||
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
//while(m_fzb_pages[*i]) _mm_pause();
|
||||
//while(m_fzb_pages[*p]) _mm_pause();
|
||||
|
||||
if(m_fzb_pages[*i])
|
||||
if(m_fzb_pages[*p])
|
||||
{
|
||||
Sync(6);
|
||||
|
||||
|
@ -385,27 +521,27 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
}
|
||||
}
|
||||
|
||||
delete pages;
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
void GSRendererSW::UsePages(const vector<uint32>* pages, int type)
|
||||
void GSRendererSW::UsePages(const uint32* pages, int type)
|
||||
{
|
||||
if(type < 2)
|
||||
{
|
||||
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[type] < SHRT_MAX);
|
||||
ASSERT(((short*)&m_fzb_pages[*p])[type] < SHRT_MAX);
|
||||
|
||||
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + type);
|
||||
_InterlockedIncrement16((short*)&m_fzb_pages[*p] + type);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
//while(m_fzb_pages[*i]) _mm_pause();
|
||||
//while(m_fzb_pages[*p]) _mm_pause();
|
||||
|
||||
if(m_fzb_pages[*i]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
|
||||
if(m_fzb_pages[*p]) // currently being drawn to? => sync (could even spin and wait until it hits 0, not sure if it's worth though, or just create 512 condvars? :D)
|
||||
{
|
||||
Sync(7);
|
||||
|
||||
|
@ -413,33 +549,33 @@ void GSRendererSW::UsePages(const vector<uint32>* pages, int type)
|
|||
}
|
||||
}
|
||||
|
||||
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
ASSERT(m_tex_pages[*i] < SHRT_MAX);
|
||||
ASSERT(m_tex_pages[*p] < SHRT_MAX);
|
||||
|
||||
_InterlockedIncrement16((short*)&m_tex_pages[*i]); // remember which texture pages are used
|
||||
_InterlockedIncrement16((short*)&m_tex_pages[*p]); // remember which texture pages are used
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::ReleasePages(const vector<uint32>* pages, int type)
|
||||
void GSRendererSW::ReleasePages(const uint32* pages, int type)
|
||||
{
|
||||
if(type < 2)
|
||||
{
|
||||
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[type] > 0);
|
||||
ASSERT(((short*)&m_fzb_pages[*p])[type] > 0);
|
||||
|
||||
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + type);
|
||||
_InterlockedDecrement16((short*)&m_fzb_pages[*p] + type);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(vector<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
ASSERT(m_tex_pages[*i] > 0);
|
||||
ASSERT(m_tex_pages[*p] > 0);
|
||||
|
||||
_InterlockedDecrement16((short*)&m_tex_pages[*i]);
|
||||
_InterlockedDecrement16((short*)&m_tex_pages[*p]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -452,7 +588,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
|
||||
const GS_PRIM_CLASS primclass = m_vt->m_primclass;
|
||||
|
||||
gd.vm = m_mem.m_vm8;
|
||||
|
||||
|
@ -470,7 +606,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
gd.sel.atst = ATST_ALWAYS;
|
||||
gd.sel.tfx = TFX_NONE;
|
||||
gd.sel.ababcd = 255;
|
||||
gd.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
|
||||
gd.sel.prim = primclass;
|
||||
|
||||
uint32 fm = context->FRAME.FBMSK;
|
||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
|
@ -529,7 +665,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
{
|
||||
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
||||
|
||||
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
|
||||
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff)
|
||||
{
|
||||
gd.sel.iip = PRIM->IIP;
|
||||
}
|
||||
|
@ -539,7 +675,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
gd.sel.tfx = context->TEX0.TFX;
|
||||
gd.sel.tcc = context->TEX0.TCC;
|
||||
gd.sel.fst = PRIM->FST;
|
||||
gd.sel.ltf = m_vt.IsLinear();
|
||||
gd.sel.ltf = m_vt->IsLinear();
|
||||
|
||||
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
|
@ -553,7 +689,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
gd.sel.wms = context->CLAMP.WMS;
|
||||
gd.sel.wmt = context->CLAMP.WMT;
|
||||
|
||||
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
|
||||
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128)))
|
||||
{
|
||||
// modulate does not do anything when vertex color is 0x80
|
||||
|
||||
|
@ -572,7 +708,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
|
||||
if(!t->Update(r)) {ASSERT(0); return false;}
|
||||
|
||||
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
|
@ -589,7 +725,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
gd.tex[0] = t->m_buff;
|
||||
gd.sel.tw = t->m_tw - 3;
|
||||
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0)
|
||||
{
|
||||
// TEX1.MMIN
|
||||
// 000 p
|
||||
|
@ -599,13 +735,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
// 100 l round
|
||||
// 101 l tri
|
||||
|
||||
if(m_vt.m_lod.x > 0)
|
||||
if(m_vt->m_lod.x > 0)
|
||||
{
|
||||
gd.sel.ltf = context->TEX1.MMIN >> 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0
|
||||
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0
|
||||
}
|
||||
|
||||
gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
|
||||
|
@ -614,9 +750,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16);
|
||||
int k = context->TEX1.K << 12;
|
||||
|
||||
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
|
||||
if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL)
|
||||
{
|
||||
k = (int)m_vt.m_lod.x << 16; // set lod to max level
|
||||
k = (int)m_vt->m_lod.x << 16; // set lod to max level
|
||||
|
||||
gd.sel.lcm = 1; // lod is constant
|
||||
gd.sel.mmin = 1; // tri-linear is meaningless
|
||||
|
@ -630,7 +766,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
if(gd.sel.fst)
|
||||
{
|
||||
ASSERT(gd.sel.lcm == 1);
|
||||
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
|
||||
ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
|
||||
|
||||
gd.sel.lcm = 1;
|
||||
}
|
||||
|
@ -659,8 +795,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
GIFRegTEX0 MIP_TEX0 = context->TEX0;
|
||||
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
|
||||
|
||||
GSVector4 tmin = m_vt.m_min.t;
|
||||
GSVector4 tmax = m_vt.m_max.t;
|
||||
GSVector4 tmin = m_vt->m_min.t;
|
||||
GSVector4 tmax = m_vt->m_max.t;
|
||||
|
||||
static int s_counter = 0;
|
||||
|
||||
|
@ -710,8 +846,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
MIP_CLAMP.MAXU >>= 1;
|
||||
MIP_CLAMP.MAXV >>= 1;
|
||||
|
||||
m_vt.m_min.t *= 0.5f;
|
||||
m_vt.m_max.t *= 0.5f;
|
||||
m_vt->m_min.t *= 0.5f;
|
||||
m_vt->m_max.t *= 0.5f;
|
||||
|
||||
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
|
||||
|
||||
|
@ -753,8 +889,8 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
|
||||
s_counter++;
|
||||
|
||||
m_vt.m_min.t = tmin;
|
||||
m_vt.m_max.t = tmax;
|
||||
m_vt->m_min.t = tmin;
|
||||
m_vt->m_max.t = tmax;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -762,17 +898,19 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
{
|
||||
// skip per pixel division if q is constant
|
||||
|
||||
GSVertexSW* v = m_vertices;
|
||||
GSVertexSW* RESTRICT v = data2->vertex;
|
||||
|
||||
if(m_vt.m_eq.q)
|
||||
if(m_vt->m_eq.q)
|
||||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
if(v[0].t.z != 1.0f)
|
||||
{
|
||||
GSVector4 w = v[0].t.zzzz().rcpnr();
|
||||
const GSVector4& t = v[data2->index[0]].t;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
if(t.z != 1.0f)
|
||||
{
|
||||
GSVector4 w = t.zzzz().rcpnr();
|
||||
|
||||
for(int i = 0, j = data2->vertex_count; i < j; i++)
|
||||
{
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
|
@ -784,7 +922,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i += 2)
|
||||
for(int i = 0, j = data2->vertex_count; i < j; i += 2)
|
||||
{
|
||||
GSVector4 t0 = v[i + 0].t;
|
||||
GSVector4 t1 = v[i + 1].t;
|
||||
|
@ -805,9 +943,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
|
||||
GSVector4 half(0x8000, 0x8000);
|
||||
|
||||
GSVertexSW* v = m_vertices;
|
||||
GSVertexSW* RESTRICT v = data2->vertex;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
for(int i = 0, j = data2->vertex_count; i < j; i++)
|
||||
{
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
|
@ -939,7 +1077,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
{
|
||||
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||
gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
|
||||
gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
|
||||
gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000;
|
||||
}
|
||||
|
||||
gd.fm = GSVector4i(fm);
|
||||
|
@ -969,176 +1107,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSRasterizerData2* data2)
|
|||
return true;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererSW::VertexKick(bool skip)
|
||||
{
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
GSVertexSW& dst = m_vl.AddTail();
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
|
||||
if(tme)
|
||||
{
|
||||
GSVector4 t;
|
||||
|
||||
if(fst)
|
||||
{
|
||||
t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
||||
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
|
||||
}
|
||||
|
||||
dst.t = t;
|
||||
}
|
||||
|
||||
dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
|
||||
|
||||
if(prim == GS_SPRITE)
|
||||
{
|
||||
dst.t.u32[3] = m_v.XYZ.Z;
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
|
||||
if(GSVertexSW* v = DrawingKick<prim>(skip, count))
|
||||
{
|
||||
if(!m_dump)
|
||||
{
|
||||
GSVector4 pmin, pmax;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
pmin = v[0].p;
|
||||
pmax = v[0].p;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
pmin = v[0].p.min(v[1].p);
|
||||
pmax = v[0].p.max(v[1].p);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
pmin = v[0].p.min(v[1].p).min(v[2].p);
|
||||
pmax = v[0].p.max(v[1].p).max(v[2].p);
|
||||
break;
|
||||
}
|
||||
|
||||
GSVector4 scissor = context->scissor.ex;
|
||||
|
||||
GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy());
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
case GS_SPRITE:
|
||||
test |= pmin.ceil() == pmax.ceil();
|
||||
break;
|
||||
}
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
// are in line or just two of them are the same (cross product == 0)
|
||||
GSVector4 tmp = (v[1].p - v[0].p) * (v[2].p - v[0].p).yxwz();
|
||||
test |= tmp == tmp.yxwz();
|
||||
break;
|
||||
}
|
||||
|
||||
if(test.mask() & 3)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
break;
|
||||
}
|
||||
|
||||
if(m_count < 30 && m_count >= 3)
|
||||
{
|
||||
GSVertexSW* v = &m_vertices[m_count - 3];
|
||||
|
||||
int tl = 0;
|
||||
int br = 0;
|
||||
|
||||
bool isquad = false;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
case GS_TRIANGLELIST:
|
||||
isquad = GSVertexSW::IsQuad(v, tl, br);
|
||||
break;
|
||||
}
|
||||
|
||||
if(isquad)
|
||||
{
|
||||
m_count -= 3;
|
||||
|
||||
if(m_count > 0)
|
||||
{
|
||||
tl += m_count;
|
||||
br += m_count;
|
||||
|
||||
Flush();
|
||||
}
|
||||
|
||||
if(tl != 0) m_vertices[0] = m_vertices[tl];
|
||||
if(br != 1) m_vertices[1] = m_vertices[br];
|
||||
|
||||
m_vertices[0].t.u32[3] = m_v.XYZ.Z;
|
||||
m_vertices[1].t.u32[3] = m_v.XYZ.Z;
|
||||
|
||||
m_count = 2;
|
||||
|
||||
uint32 tmp = PRIM->PRIM;
|
||||
PRIM->PRIM = GS_SPRITE;
|
||||
|
||||
Flush();
|
||||
|
||||
PRIM->PRIM = tmp;
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Quad, 1);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
m_count += count;
|
||||
|
||||
// Flush();
|
||||
}
|
||||
}
|
||||
|
||||
// GSRendererSW::GSRasterizerData2
|
||||
|
||||
GSRendererSW::GSRasterizerData2::GSRasterizerData2(GSRendererSW* parent)
|
||||
: m_parent(parent)
|
||||
, m_fb_pages(NULL)
|
||||
|
@ -1192,7 +1160,7 @@ GSRendererSW::GSRasterizerData2::~GSRasterizerData2()
|
|||
m_parent->m_perfmon.Put(GSPerfMon::Fillrate, pixels);
|
||||
}
|
||||
|
||||
void GSRendererSW::GSRasterizerData2::UseTargetPages(const vector<uint32>* fb_pages, const vector<uint32>* zb_pages)
|
||||
void GSRendererSW::GSRasterizerData2::UseTargetPages(const uint32* fb_pages, const uint32* zb_pages)
|
||||
{
|
||||
if(m_using_pages) return;
|
||||
|
||||
|
@ -1218,9 +1186,7 @@ void GSRendererSW::GSRasterizerData2::UseSourcePages(GSTextureCacheSW::Texture*
|
|||
{
|
||||
ASSERT(m_tex_pages[level] == NULL);
|
||||
|
||||
const vector<uint32>* pages = t->m_pages.n;
|
||||
m_tex_pages[level] = t->m_pages.n;
|
||||
|
||||
m_tex_pages[level] = pages;
|
||||
|
||||
m_parent->UsePages(pages, 2);
|
||||
m_parent->UsePages(t->m_pages.n, 2);
|
||||
}
|
||||
|
|
|
@ -25,21 +25,21 @@
|
|||
#include "GSTextureCacheSW.h"
|
||||
#include "GSDrawScanline.h"
|
||||
|
||||
class GSRendererSW : public GSRendererT<GSVertexSW>
|
||||
class GSRendererSW : public GSRenderer
|
||||
{
|
||||
class GSRasterizerData2 : public GSRasterizerData
|
||||
{
|
||||
GSRendererSW* m_parent;
|
||||
const vector<uint32>* m_fb_pages;
|
||||
const vector<uint32>* m_zb_pages;
|
||||
const vector<uint32>* m_tex_pages[7];
|
||||
const uint32* m_fb_pages;
|
||||
const uint32* m_zb_pages;
|
||||
const uint32* m_tex_pages[7];
|
||||
bool m_using_pages;
|
||||
|
||||
public:
|
||||
GSRasterizerData2(GSRendererSW* parent);
|
||||
virtual ~GSRasterizerData2();
|
||||
|
||||
void UseTargetPages(const vector<uint32>* fb_pages, const vector<uint32>* zb_pages);
|
||||
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
|
||||
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
|
||||
};
|
||||
|
||||
|
@ -63,15 +63,37 @@ protected:
|
|||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
||||
|
||||
void UsePages(const vector<uint32>* pages, int type);
|
||||
void ReleasePages(const vector<uint32>* pages, int type);
|
||||
void UsePages(const uint32* pages, int type);
|
||||
void ReleasePages(const uint32* pages, int type);
|
||||
|
||||
bool GetScanlineGlobalData(GSRasterizerData2* data2);
|
||||
|
||||
typedef size_t (GSState::*ConvertIndexPtr)(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
|
||||
|
||||
ConvertIndexPtr m_ci[8], m_cif;
|
||||
|
||||
#define InitConvertIndex2(P) \
|
||||
m_ci[P] = (ConvertIndexPtr)&GSRendererSW::ConvertIndex<P>; \
|
||||
|
||||
#define InitConvertIndex() \
|
||||
InitConvertIndex2(GS_POINTLIST) \
|
||||
InitConvertIndex2(GS_LINELIST) \
|
||||
InitConvertIndex2(GS_LINESTRIP) \
|
||||
InitConvertIndex2(GS_TRIANGLELIST) \
|
||||
InitConvertIndex2(GS_TRIANGLESTRIP) \
|
||||
InitConvertIndex2(GS_TRIANGLEFAN) \
|
||||
InitConvertIndex2(GS_SPRITE) \
|
||||
InitConvertIndex2(GS_INVALID) \
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(GSVertexSW* RESTRICT vertex, size_t index);
|
||||
|
||||
template<uint32 prim>
|
||||
size_t ConvertIndex(uint32* RESTRICT dst, const uint32* RESTRICT src, int count);
|
||||
|
||||
void UpdateVertexKick();
|
||||
|
||||
public:
|
||||
GSRendererSW(int threads);
|
||||
virtual ~GSRendererSW();
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void VertexKick(bool skip);
|
||||
};
|
||||
|
|
|
@ -61,12 +61,12 @@ union GSScanlineSelector
|
|||
uint32 colclamp:1; // 43
|
||||
uint32 fba:1; // 44
|
||||
uint32 dthe:1; // 45
|
||||
uint32 sprite:1; // 46
|
||||
uint32 edge:1; // 47
|
||||
uint32 prim:2; // 46
|
||||
|
||||
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
||||
uint32 lcm:1; // 49
|
||||
uint32 mmin:2; // 50
|
||||
uint32 edge:1; // 48
|
||||
uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
||||
uint32 lcm:1; // 50
|
||||
uint32 mmin:2; // 51
|
||||
};
|
||||
|
||||
struct
|
||||
|
@ -92,7 +92,7 @@ union GSScanlineSelector
|
|||
|
||||
bool IsSolidRect() const
|
||||
{
|
||||
return sprite
|
||||
return prim == GS_SPRITE_CLASS
|
||||
&& iip == 0
|
||||
&& tfx == TFX_NONE
|
||||
&& abe == 0
|
||||
|
|
|
@ -29,7 +29,7 @@ using namespace Xbyak;
|
|||
|
||||
void GSSetupPrimCodeGenerator::Generate()
|
||||
{
|
||||
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
|
||||
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
|
||||
{
|
||||
for(int i = 0; i < 5; i++)
|
||||
{
|
||||
|
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
return;
|
||||
}
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// GSVector4 p = dscan.p;
|
||||
|
||||
|
@ -107,7 +107,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 p = vertices[0].p;
|
||||
// GSVector4 p = vertex.p;
|
||||
|
||||
vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
|
||||
|
||||
|
@ -312,7 +312,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
}
|
||||
else
|
||||
{
|
||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||
// GSVector4i c = GSVector4i(vertex.c);
|
||||
|
||||
vcvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ using namespace Xbyak;
|
|||
|
||||
void GSSetupPrimCodeGenerator::Generate()
|
||||
{
|
||||
if((m_en.z || m_en.f) && !m_sel.sprite || m_en.t || m_en.c && m_sel.iip)
|
||||
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
|
||||
{
|
||||
for(int i = 0; i < 5; i++)
|
||||
{
|
||||
|
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
return;
|
||||
}
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// GSVector4 p = dscan.p;
|
||||
|
||||
|
@ -112,7 +112,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 p = vertices[0].p;
|
||||
// GSVector4 p = vertex.p;
|
||||
|
||||
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]);
|
||||
|
||||
|
@ -327,7 +327,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
}
|
||||
else
|
||||
{
|
||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||
// GSVector4i c = GSVector4i(vertex.c);
|
||||
|
||||
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
|
||||
|
||||
|
|
|
@ -25,18 +25,37 @@
|
|||
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
|
||||
//#define Offset_UV // Fixes / breaks various titles
|
||||
|
||||
GSState::GSState()
|
||||
GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
||||
: m_version(6)
|
||||
, m_mt(false)
|
||||
, m_irq(NULL)
|
||||
, m_path3hack(0)
|
||||
, m_regs(NULL)
|
||||
, m_q(1.0f)
|
||||
, m_vprim(1)
|
||||
, m_crc(0)
|
||||
, m_options(0)
|
||||
, m_frameskip(0)
|
||||
, m_vt(vt)
|
||||
{
|
||||
memset(&m_v, 0, sizeof(m_v));
|
||||
m_q = 1.0f;
|
||||
memset(&m_vertex, 0, sizeof(m_vertex));
|
||||
memset(&m_index, 0, sizeof(m_index));
|
||||
|
||||
m_vertex.stride = vertex_stride;
|
||||
|
||||
GrowVertexBuffer();
|
||||
|
||||
m_dk[GS_POINTLIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_POINTLIST>;
|
||||
m_dk[GS_LINELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINELIST>;
|
||||
m_dk[GS_LINESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_LINESTRIP>;
|
||||
m_dk[GS_TRIANGLELIST] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLELIST>;
|
||||
m_dk[GS_TRIANGLESTRIP] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLESTRIP>;
|
||||
m_dk[GS_TRIANGLEFAN] = (DrawingKickPtr)&GSState::DrawingKick<GS_TRIANGLEFAN>;
|
||||
m_dk[GS_SPRITE] = (DrawingKickPtr)&GSState::DrawingKick<GS_SPRITE>;
|
||||
m_dk[GS_INVALID] = (DrawingKickPtr)&GSState::DrawingKick<GS_INVALID>;
|
||||
|
||||
memset(m_cv, 0, sizeof(m_cv));
|
||||
|
||||
m_sssize = 0;
|
||||
|
||||
m_sssize += sizeof(m_version);
|
||||
|
@ -78,7 +97,7 @@ GSState::GSState()
|
|||
m_sssize += sizeof(m_v.ST);
|
||||
m_sssize += sizeof(m_v.UV);
|
||||
m_sssize += sizeof(m_v.XYZ);
|
||||
m_sssize += sizeof(m_v.FOG);
|
||||
m_sssize += sizeof(m_v.FOG); // obsolete
|
||||
|
||||
m_sssize += sizeof(m_tr.x);
|
||||
m_sssize += sizeof(m_tr.y);
|
||||
|
@ -97,6 +116,8 @@ GSState::GSState()
|
|||
|
||||
GSState::~GSState()
|
||||
{
|
||||
if(m_vertex.buff) _aligned_free(m_vertex.buff);
|
||||
if(m_index.buff) _aligned_free(m_index.buff);
|
||||
}
|
||||
|
||||
void GSState::SetRegsMem(uint8* basemem)
|
||||
|
@ -195,6 +216,10 @@ void GSState::Reset()
|
|||
m_env.Reset();
|
||||
|
||||
m_context = &m_env.CTXT[0];
|
||||
|
||||
m_vertex.head = 0;
|
||||
m_vertex.tail = 0;
|
||||
m_index.tail = 0;
|
||||
}
|
||||
|
||||
void GSState::ResetHandlers()
|
||||
|
@ -472,7 +497,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRIC
|
|||
m_v.XYZ.Z = r->XYZF2.Z;
|
||||
m_v.FOG.F = r->XYZF2.F;
|
||||
|
||||
VertexKick(r->XYZF2.ADC);
|
||||
VertexKick(r->XYZF2.Skip());
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
|
||||
|
@ -481,7 +506,7 @@ __forceinline void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT
|
|||
m_v.XYZ.Y = r->XYZ2.Y;
|
||||
m_v.XYZ.Z = r->XYZ2.Z;
|
||||
|
||||
VertexKick(r->XYZ2.ADC);
|
||||
VertexKick(r->XYZ2.Skip());
|
||||
}
|
||||
|
||||
__forceinline void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
|
||||
|
@ -509,7 +534,7 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
|||
{
|
||||
// ASSERT(r->PRIM.PRIM < 7);
|
||||
|
||||
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM))
|
||||
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM)) // NOTE: assume strips/fans are converted to lists
|
||||
{
|
||||
if((m_env.PRIM.u32[0] ^ prim.u32[0]) & 0x7f8) // all fields except PRIM
|
||||
{
|
||||
|
@ -528,7 +553,7 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
|||
|
||||
UpdateVertexKick();
|
||||
|
||||
ResetPrim();
|
||||
m_vertex.head = m_vertex.tail = m_index.tail > 0 ? m_index.buff[m_index.tail - 1] + 1 : 0; // remove unused vertices from the end of the vertex buffer
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
|
||||
|
@ -559,8 +584,8 @@ __forceinline void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
|
|||
m_v.UV.u32[0] = r->UV.u32[0] & 0x3fff3fff;
|
||||
|
||||
#ifdef Offset_UV
|
||||
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v.UV.U - 4U));
|
||||
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v.UV.V - 4U));
|
||||
m_v.UV.U = min((uint16)m_v.UV.U, (uint16)(m_v._UV.U - 4U));
|
||||
m_v.UV.V = min((uint16)m_v.UV.V, (uint16)(m_v._UV.V - 4U));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -576,14 +601,14 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
|
|||
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
|
||||
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
|
||||
|
||||
VertexKick(false);
|
||||
VertexKick(0);
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||
|
||||
VertexKick(false);
|
||||
VertexKick(0);
|
||||
}
|
||||
|
||||
void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0)
|
||||
|
@ -697,7 +722,7 @@ template<int i> void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
|
|||
|
||||
void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.FOG = (GSVector4i)r->FOG;
|
||||
m_v.FOG.u32[1] = r->FOG.u32[1];
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
|
||||
|
@ -712,14 +737,14 @@ void GSState::GIFRegHandlerXYZF3(const GIFReg* RESTRICT r)
|
|||
m_v.XYZ.u32[1] = r->XYZF.u32[1] & 0x00ffffff;
|
||||
m_v.FOG.u32[1] = r->XYZF.u32[1] & 0xff000000;
|
||||
|
||||
VertexKick(true);
|
||||
VertexKick(1);
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerXYZ3(const GIFReg* RESTRICT r)
|
||||
{
|
||||
m_v.XYZ = (GSVector4i)r->XYZ;
|
||||
|
||||
VertexKick(true);
|
||||
VertexKick(1);
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
|
||||
|
@ -1174,6 +1199,26 @@ void GSState::FlushWrite()
|
|||
*/
|
||||
}
|
||||
|
||||
void GSState::FlushPrim()
|
||||
{
|
||||
if(m_index.tail > 0)
|
||||
{
|
||||
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
|
||||
{
|
||||
// FIXME: berserk fpsm = 27 (8H)
|
||||
|
||||
Draw();
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
m_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
|
||||
}
|
||||
}
|
||||
|
||||
m_vertex.head = 0;
|
||||
m_vertex.tail = 0;
|
||||
m_index.tail = 0;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
void GSState::Write(const uint8* mem, int len)
|
||||
|
@ -1881,6 +1926,446 @@ void GSState::SetGameCRC(uint32 crc, int options)
|
|||
m_game = CRC::Lookup(crc);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
void GSState::UpdateVertexKick()
|
||||
{
|
||||
m_dkf = m_dk[PRIM->PRIM];
|
||||
m_cvf = m_cv[PRIM->PRIM][PRIM->TME][PRIM->FST];
|
||||
m_vertex.n = GSUtil::GetVertexCount(PRIM->PRIM);
|
||||
}
|
||||
|
||||
void GSState::GrowVertexBuffer()
|
||||
{
|
||||
int maxcount = std::max<int>(m_vertex.maxcount * 3 / 2, 10000);
|
||||
|
||||
uint8* vertex = (uint8*)_aligned_malloc(m_vertex.stride * maxcount, 16);
|
||||
uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 16); // worst case is slightly less than vertex number * 3
|
||||
|
||||
if(m_vertex.buff != NULL)
|
||||
{
|
||||
memcpy(vertex, m_vertex.buff, m_vertex.stride * m_vertex.tail);
|
||||
|
||||
_aligned_free(m_vertex.buff);
|
||||
}
|
||||
|
||||
if(m_index.buff != NULL)
|
||||
{
|
||||
memcpy(index, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
_aligned_free(m_index.buff);
|
||||
}
|
||||
|
||||
m_vertex.buff = vertex;
|
||||
m_vertex.maxcount = maxcount - 100; // -100 because skipped vertices don't trigger growing the vertex buffer (VertexKick should be as fast as possible)
|
||||
m_index.buff = index;
|
||||
}
|
||||
|
||||
void GSState::VertexKick(uint32 skip)
|
||||
{
|
||||
(this->*m_cvf)(m_vertex.buff, m_vertex.tail);
|
||||
|
||||
if(++m_vertex.tail - m_vertex.head >= m_vertex.n)
|
||||
{
|
||||
(this->*m_dkf)(skip);
|
||||
}
|
||||
}
|
||||
|
||||
template<uint32 prim>
|
||||
void GSState::DrawingKick(uint32 skip)
|
||||
{
|
||||
size_t head = m_vertex.head;
|
||||
size_t tail = m_vertex.tail;
|
||||
|
||||
if(skip)
|
||||
{
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
case GS_LINELIST:
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_SPRITE:
|
||||
case GS_INVALID:
|
||||
m_vertex.tail = head;
|
||||
break;
|
||||
case GS_LINESTRIP:
|
||||
case GS_TRIANGLESTRIP:
|
||||
m_vertex.head = head + 1;
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if(tail >= m_vertex.maxcount)
|
||||
{
|
||||
GrowVertexBuffer();
|
||||
}
|
||||
|
||||
uint32* RESTRICT buff = &m_index.buff[m_index.tail];
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
buff[0] = head + 0;
|
||||
m_vertex.head = head + 1;
|
||||
m_index.tail += 1;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
m_vertex.head = head + 2;
|
||||
m_index.tail += 2;
|
||||
break;
|
||||
case GS_LINESTRIP:
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
m_vertex.head = head + 1;
|
||||
m_index.tail += 2;
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
buff[2] = head + 2;
|
||||
m_vertex.head = head + 3;
|
||||
m_index.tail += 3;
|
||||
break;
|
||||
case GS_TRIANGLESTRIP:
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
buff[2] = head + 2;
|
||||
m_vertex.head = head + 1;
|
||||
m_index.tail += 3;
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
buff[0] = head + 0;
|
||||
buff[1] = tail - 2;
|
||||
buff[2] = tail - 1;
|
||||
m_index.tail += 3;
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
m_vertex.head = head + 2;
|
||||
m_index.tail += 2;
|
||||
break;
|
||||
case GS_INVALID:
|
||||
m_vertex.tail = head;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
|
||||
{
|
||||
int tw = TEX0.TW;
|
||||
int th = TEX0.TH;
|
||||
|
||||
int w = 1 << tw;
|
||||
int h = 1 << th;
|
||||
|
||||
GSVector4i tr(0, 0, w, h);
|
||||
|
||||
int wms = CLAMP.WMS;
|
||||
int wmt = CLAMP.WMT;
|
||||
|
||||
int minu = (int)CLAMP.MINU;
|
||||
int minv = (int)CLAMP.MINV;
|
||||
int maxu = (int)CLAMP.MAXU;
|
||||
int maxv = (int)CLAMP.MAXV;
|
||||
|
||||
GSVector4i vr = tr;
|
||||
|
||||
switch(wms)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.x < minu) vr.x = minu;
|
||||
if(vr.z > maxu + 1) vr.z = maxu + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
vr.x = maxu;
|
||||
vr.z = vr.x + (minu + 1);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(wmt)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.y < minv) vr.y = minv;
|
||||
if(vr.w > maxv + 1) vr.w = maxv + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
vr.y = maxv;
|
||||
vr.w = vr.y + (minv + 1);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
if(wms + wmt < 6)
|
||||
{
|
||||
GSVector4 st = m_vt->m_min.t.xyxy(m_vt->m_max.t);
|
||||
|
||||
if(linear)
|
||||
{
|
||||
st += GSVector4(-0x8000, 0x8000).xxyy();
|
||||
}
|
||||
|
||||
GSVector4i uv = GSVector4i(st).sra32(16);
|
||||
|
||||
GSVector4i u, v;
|
||||
|
||||
int mask = 0;
|
||||
|
||||
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
|
||||
{
|
||||
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
|
||||
v = uv & GSVector4i::xffffffff().srl32(32 - th);
|
||||
|
||||
GSVector4i uu = uv.sra32(tw);
|
||||
GSVector4i vv = uv.sra32(th);
|
||||
|
||||
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
|
||||
}
|
||||
|
||||
uv = uv.rintersect(tr);
|
||||
|
||||
switch(wms)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.x < uv.x) vr.x = uv.x;
|
||||
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(wmt)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
case CLAMP_REGION_CLAMP:
|
||||
if(vr.y < uv.y) vr.y = uv.y;
|
||||
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
r = vr.rintersect(tr);
|
||||
}
|
||||
|
||||
void GSState::GetAlphaMinMax()
|
||||
{
|
||||
if(m_vt->m_alpha.valid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector4i a = m_vt->m_min.c.uph32(m_vt->m_max.c).zzww();
|
||||
|
||||
if(PRIM->TME && context->TEX0.TCC)
|
||||
{
|
||||
switch(GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
|
||||
{
|
||||
case 0:
|
||||
a.y = 0;
|
||||
a.w = 0xff;
|
||||
break;
|
||||
case 1:
|
||||
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
|
||||
a.w = env.TEXA.TA0;
|
||||
break;
|
||||
case 2:
|
||||
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
|
||||
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
|
||||
break;
|
||||
case 3:
|
||||
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(context->TEX0.TFX)
|
||||
{
|
||||
case TFX_MODULATE:
|
||||
a.x = (a.x * a.y) >> 7;
|
||||
a.z = (a.z * a.w) >> 7;
|
||||
if(a.x > 0xff) a.x = 0xff;
|
||||
if(a.z > 0xff) a.z = 0xff;
|
||||
break;
|
||||
case TFX_DECAL:
|
||||
a.x = a.y;
|
||||
a.z = a.w;
|
||||
break;
|
||||
case TFX_HIGHLIGHT:
|
||||
a.x = a.x + a.y;
|
||||
a.z = a.z + a.w;
|
||||
if(a.x > 0xff) a.x = 0xff;
|
||||
if(a.z > 0xff) a.z = 0xff;
|
||||
break;
|
||||
case TFX_HIGHLIGHT2:
|
||||
a.x = a.y;
|
||||
a.z = a.w;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
m_vt->m_alpha.min = a.x;
|
||||
m_vt->m_alpha.max = a.z;
|
||||
m_vt->m_alpha.valid = true;
|
||||
}
|
||||
|
||||
bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
|
||||
{
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
bool pass = true;
|
||||
|
||||
if(context->TEST.ATST == ATST_NEVER)
|
||||
{
|
||||
pass = false;
|
||||
}
|
||||
else if(context->TEST.ATST != ATST_ALWAYS)
|
||||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
int amin = m_vt->m_alpha.min;
|
||||
int amax = m_vt->m_alpha.max;
|
||||
|
||||
int aref = context->TEST.AREF;
|
||||
|
||||
switch(context->TEST.ATST)
|
||||
{
|
||||
case ATST_NEVER:
|
||||
pass = false;
|
||||
break;
|
||||
case ATST_ALWAYS:
|
||||
pass = true;
|
||||
break;
|
||||
case ATST_LESS:
|
||||
if(amax < aref) pass = true;
|
||||
else if(amin >= aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_LEQUAL:
|
||||
if(amax <= aref) pass = true;
|
||||
else if(amin > aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_EQUAL:
|
||||
if(amin == aref && amax == aref) pass = true;
|
||||
else if(amin > aref || amax < aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_GEQUAL:
|
||||
if(amin >= aref) pass = true;
|
||||
else if(amax < aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
if(amin > aref) pass = true;
|
||||
else if(amax <= aref) pass = false;
|
||||
else return false;
|
||||
break;
|
||||
case ATST_NOTEQUAL:
|
||||
if(amin == aref && amax == aref) pass = false;
|
||||
else if(amin > aref || amax < aref) pass = true;
|
||||
else return false;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
if(!pass)
|
||||
{
|
||||
switch(context->TEST.AFAIL)
|
||||
{
|
||||
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
|
||||
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
|
||||
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
|
||||
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
|
||||
default: __assume(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GSState::IsOpaque()
|
||||
{
|
||||
if(PRIM->AA1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!PRIM->ABE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
int amin = 0, amax = 0xff;
|
||||
|
||||
if(context->ALPHA.A != context->ALPHA.B)
|
||||
{
|
||||
if(context->ALPHA.C == 0)
|
||||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
amin = m_vt->m_alpha.min;
|
||||
amax = m_vt->m_alpha.max;
|
||||
}
|
||||
else if(context->ALPHA.C == 1)
|
||||
{
|
||||
if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
|
||||
{
|
||||
amin = amax = 0x80;
|
||||
}
|
||||
}
|
||||
else if(context->ALPHA.C == 2)
|
||||
{
|
||||
amin = amax = context->ALPHA.FIX;
|
||||
}
|
||||
}
|
||||
|
||||
return context->ALPHA.IsOpaque(amin, amax);
|
||||
}
|
||||
|
||||
// GSTransferBuffer
|
||||
|
||||
GSState::GSTransferBuffer::GSTransferBuffer()
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#include "GSDrawingContext.h"
|
||||
#include "GSDrawingEnvironment.h"
|
||||
#include "GSVertex.h"
|
||||
#include "GSVertexList.h"
|
||||
#include "GSVertexTrace.h"
|
||||
#include "GSUtil.h"
|
||||
#include "GSPerfMon.h"
|
||||
#include "GSVector.h"
|
||||
|
@ -126,49 +126,53 @@ class GSState : public GSAlignedClass<32>
|
|||
|
||||
} m_tr;
|
||||
|
||||
void FlushWrite();
|
||||
|
||||
protected:
|
||||
bool IsBadFrame(int& skip, int UserHacks_SkipDraw);
|
||||
|
||||
typedef void (GSState::*VertexKickPtr)(bool skip);
|
||||
GSVertex m_v;
|
||||
float m_q;
|
||||
struct {uint8* buff; size_t head, tail, maxcount, stride, n;} m_vertex;
|
||||
struct {uint32* buff; size_t tail;} m_index;
|
||||
|
||||
VertexKickPtr m_vk[8][2][2];
|
||||
VertexKickPtr m_vkf;
|
||||
typedef void (GSState::*DrawingKickPtr)(uint32 skip);
|
||||
typedef void (GSState::*ConvertVertexPtr)(void* RESTRICT vertex, size_t index);
|
||||
|
||||
#define InitVertexKick3(T, P, N, M) \
|
||||
m_vk[P][N][M] = (VertexKickPtr)(void (T::*)(bool))&T::VertexKick<P, N, M>;
|
||||
DrawingKickPtr m_dk[8], m_dkf;
|
||||
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
|
||||
|
||||
#define InitVertexKick2(T, P) \
|
||||
InitVertexKick3(T, P, 0, 0) \
|
||||
InitVertexKick3(T, P, 0, 1) \
|
||||
InitVertexKick3(T, P, 1, 0) \
|
||||
InitVertexKick3(T, P, 1, 1) \
|
||||
#define InitConvertVertex2(T, P) \
|
||||
m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 0>; \
|
||||
m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 1>; \
|
||||
m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 0>; \
|
||||
m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 1>; \
|
||||
|
||||
#define InitVertexKick(T) \
|
||||
InitVertexKick2(T, GS_POINTLIST) \
|
||||
InitVertexKick2(T, GS_LINELIST) \
|
||||
InitVertexKick2(T, GS_LINESTRIP) \
|
||||
InitVertexKick2(T, GS_TRIANGLELIST) \
|
||||
InitVertexKick2(T, GS_TRIANGLESTRIP) \
|
||||
InitVertexKick2(T, GS_TRIANGLEFAN) \
|
||||
InitVertexKick2(T, GS_SPRITE) \
|
||||
InitVertexKick2(T, GS_INVALID) \
|
||||
#define InitConvertVertex(T) \
|
||||
InitConvertVertex2(T, GS_POINTLIST) \
|
||||
InitConvertVertex2(T, GS_LINELIST) \
|
||||
InitConvertVertex2(T, GS_LINESTRIP) \
|
||||
InitConvertVertex2(T, GS_TRIANGLELIST) \
|
||||
InitConvertVertex2(T, GS_TRIANGLESTRIP) \
|
||||
InitConvertVertex2(T, GS_TRIANGLEFAN) \
|
||||
InitConvertVertex2(T, GS_SPRITE) \
|
||||
InitConvertVertex2(T, GS_INVALID) \
|
||||
|
||||
void UpdateVertexKick()
|
||||
{
|
||||
m_vkf = m_vk[PRIM->PRIM][PRIM->TME][PRIM->FST];
|
||||
}
|
||||
virtual void UpdateVertexKick();
|
||||
|
||||
void VertexKickNull(bool skip)
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
void GrowVertexBuffer();
|
||||
|
||||
void VertexKick(bool skip)
|
||||
{
|
||||
(this->*m_vkf)(skip);
|
||||
}
|
||||
void VertexKick(uint32 skip);
|
||||
|
||||
template<uint32 prim>
|
||||
void DrawingKick(uint32 skip);
|
||||
|
||||
// following functions need m_vt to be initialized
|
||||
|
||||
GSVertexTrace* m_vt;
|
||||
|
||||
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
|
||||
void GetAlphaMinMax();
|
||||
bool TryAlphaTest(uint32& fm, uint32& zm);
|
||||
bool IsOpaque();
|
||||
|
||||
public:
|
||||
GIFPath m_path[4];
|
||||
|
@ -177,10 +181,6 @@ public:
|
|||
GSLocalMemory m_mem;
|
||||
GSDrawingEnvironment m_env;
|
||||
GSDrawingContext* m_context;
|
||||
GSVertex m_v;
|
||||
float m_q;
|
||||
uint32 m_vprim;
|
||||
|
||||
GSPerfMon m_perfmon;
|
||||
uint32 m_crc;
|
||||
int m_options;
|
||||
|
@ -190,7 +190,7 @@ public:
|
|||
GSDump m_dump;
|
||||
|
||||
public:
|
||||
GSState();
|
||||
GSState(GSVertexTrace* vt, size_t vertex_stride);
|
||||
virtual ~GSState();
|
||||
|
||||
void ResetHandlers();
|
||||
|
@ -205,8 +205,9 @@ public:
|
|||
|
||||
virtual void Reset();
|
||||
virtual void Flush();
|
||||
virtual void FlushPrim() = 0;
|
||||
virtual void ResetPrim() = 0;
|
||||
virtual void FlushPrim();
|
||||
virtual void FlushWrite();
|
||||
virtual void Draw() = 0;
|
||||
virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) {}
|
||||
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {}
|
||||
|
||||
|
|
|
@ -319,11 +319,11 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
vector<uint32>* pages = o->GetPages(rect, &r);
|
||||
const uint32* pages = o->GetPages(rect, &r);
|
||||
|
||||
bool found = false;
|
||||
|
||||
for(vector<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
uint32 page = *p;
|
||||
|
||||
|
@ -374,7 +374,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
|
|||
}
|
||||
}
|
||||
|
||||
delete pages;
|
||||
delete [] pages;
|
||||
|
||||
if(!target) return;
|
||||
|
||||
|
|
|
@ -74,18 +74,18 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
|||
|
||||
m_textures.insert(t);
|
||||
|
||||
for(vector<uint32>::const_iterator i = t->m_pages.n->begin(); i != t->m_pages.n->end(); i++)
|
||||
for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
m_map[*i].push_front(t);
|
||||
m_map[*p].push_front(t);
|
||||
}
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void GSTextureCacheSW::InvalidatePages(const vector<uint32>* pages, uint32 psm)
|
||||
void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
|
||||
{
|
||||
for(vector<uint32>::const_iterator p = pages->begin(); p != pages->end(); p++)
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
uint32 page = *p;
|
||||
|
||||
|
@ -185,9 +185,9 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
|||
|
||||
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||
|
||||
for(vector<uint32>::const_iterator i = m_pages.n->begin(); i != m_pages.n->end(); i++)
|
||||
for(const uint32* p = m_pages.n; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
uint32 page = *i;
|
||||
uint32 page = *p;
|
||||
|
||||
m_pages.bm[page >> 5] |= 1 << (page & 31);
|
||||
}
|
||||
|
@ -202,7 +202,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
|||
|
||||
GSTextureCacheSW::Texture::~Texture()
|
||||
{
|
||||
delete m_pages.n;
|
||||
delete [] m_pages.n;
|
||||
|
||||
if(m_buff)
|
||||
{
|
||||
|
|
|
@ -40,7 +40,7 @@ public:
|
|||
bool m_repeating;
|
||||
vector<GSVector2i>* m_p2t;
|
||||
uint32 m_valid[MAX_PAGES];
|
||||
struct {uint32 bm[16]; const vector<uint32>* n;} m_pages;
|
||||
struct {uint32 bm[16]; const uint32* n;} m_pages;
|
||||
|
||||
// m_valid
|
||||
// fast mode: each uint32 bits map to the 32 blocks of that page
|
||||
|
@ -64,7 +64,7 @@ public:
|
|||
|
||||
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
|
||||
|
||||
void InvalidatePages(const vector<uint32>* pages, uint32 psm);
|
||||
void InvalidatePages(const uint32* pages, uint32 psm);
|
||||
|
||||
void RemoveAll();
|
||||
void RemoveAt(Texture* t);
|
||||
|
|
|
@ -82,9 +82,10 @@ bool GSDevice11::CreateTextureFX()
|
|||
return true;
|
||||
}
|
||||
|
||||
void GSDevice11::SetupIA(const void* vertices, int count, int prim)
|
||||
void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
|
||||
{
|
||||
IASetVertexBuffer(vertices, sizeof(GSVertexHW11), count);
|
||||
IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count);
|
||||
IASetIndexBuffer(index, index_count);
|
||||
IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim);
|
||||
}
|
||||
|
||||
|
|
|
@ -61,9 +61,10 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
|
|||
return t;
|
||||
}
|
||||
|
||||
void GSDevice9::SetupIA(const void* vertices, int count, int prim)
|
||||
void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
|
||||
{
|
||||
IASetVertexBuffer(vertices, sizeof(GSVertexHW9), count);
|
||||
IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count);
|
||||
IASetIndexBuffer(index, index_count);
|
||||
IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim);
|
||||
}
|
||||
|
||||
|
|
|
@ -91,6 +91,7 @@ static class GSUtilMaps
|
|||
{
|
||||
public:
|
||||
uint8 PrimClassField[8];
|
||||
uint8 VertexCountField[8];
|
||||
uint32 CompatibleBitsField[64][2];
|
||||
uint32 SharedBitsField[64][2];
|
||||
|
||||
|
@ -105,6 +106,15 @@ public:
|
|||
PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS;
|
||||
PrimClassField[GS_INVALID] = GS_INVALID_CLASS;
|
||||
|
||||
VertexCountField[GS_POINTLIST] = 1;
|
||||
VertexCountField[GS_LINELIST] = 2;
|
||||
VertexCountField[GS_LINESTRIP] = 2;
|
||||
VertexCountField[GS_TRIANGLELIST] = 3;
|
||||
VertexCountField[GS_TRIANGLESTRIP] = 3;
|
||||
VertexCountField[GS_TRIANGLEFAN] = 3;
|
||||
VertexCountField[GS_SPRITE] = 2;
|
||||
VertexCountField[GS_INVALID] = 1;
|
||||
|
||||
memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField));
|
||||
|
||||
for(int i = 0; i < 64; i++)
|
||||
|
@ -146,6 +156,11 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim)
|
|||
return (GS_PRIM_CLASS)s_maps.PrimClassField[prim];
|
||||
}
|
||||
|
||||
int GSUtil::GetVertexCount(uint32 prim)
|
||||
{
|
||||
return s_maps.VertexCountField[prim];
|
||||
}
|
||||
|
||||
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
|
||||
{
|
||||
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
||||
|
@ -321,7 +336,7 @@ static bool DXUTDelayLoadDXGI()
|
|||
|
||||
bool GSUtil::CheckDirect3D11Level(D3D_FEATURE_LEVEL& level)
|
||||
{
|
||||
HRESULT hr;
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
level = (D3D_FEATURE_LEVEL)0;
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ public:
|
|||
static const char* GetLibName();
|
||||
|
||||
static GS_PRIM_CLASS GetPrimClass(uint32 prim);
|
||||
static int GetVertexCount(uint32 prim);
|
||||
|
||||
static bool HasSharedBits(uint32 spsm, uint32 dpsm);
|
||||
static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm);
|
||||
|
|
|
@ -1900,7 +1900,7 @@ public:
|
|||
d = f.uph64(d);
|
||||
}
|
||||
|
||||
__forceinline static bool compare16(const void* dst, const void* src, int size)
|
||||
__forceinline static bool compare16(const void* dst, const void* src, size_t size)
|
||||
{
|
||||
ASSERT((size & 15) == 0);
|
||||
|
||||
|
@ -1909,7 +1909,7 @@ public:
|
|||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
|
||||
for(int i = 0; i < size; i++)
|
||||
for(size_t i = 0; i < size; i++)
|
||||
{
|
||||
if(!d[i].eq(s[i]))
|
||||
{
|
||||
|
@ -1920,7 +1920,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
__forceinline static bool compare64(const void* dst, const void* src, int size)
|
||||
__forceinline static bool compare64(const void* dst, const void* src, size_t size)
|
||||
{
|
||||
ASSERT((size & 63) == 0);
|
||||
|
||||
|
@ -1929,7 +1929,7 @@ public:
|
|||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
|
||||
for(int i = 0; i < size; i += 4)
|
||||
for(size_t i = 0; i < size; i += 4)
|
||||
{
|
||||
GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]);
|
||||
GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]);
|
||||
|
@ -1948,7 +1948,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
__forceinline static bool update(const void* dst, const void* src, int size)
|
||||
__forceinline static bool update(const void* dst, const void* src, size_t size)
|
||||
{
|
||||
ASSERT((size & 15) == 0);
|
||||
|
||||
|
@ -1959,7 +1959,7 @@ public:
|
|||
|
||||
GSVector4i v = GSVector4i::xffffffff();
|
||||
|
||||
for(int i = 0; i < size; i++)
|
||||
for(size_t i = 0; i < size; i++)
|
||||
{
|
||||
v &= d[i] == s[i];
|
||||
|
||||
|
|
|
@ -30,15 +30,20 @@
|
|||
|
||||
__aligned(struct, 32) GSVertex
|
||||
{
|
||||
GIFRegST ST;
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
GIFRegXYZ XYZ;
|
||||
GIFRegFOG FOG;
|
||||
GIFRegUV UV;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
GIFRegST ST;
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
GIFRegXYZ XYZ;
|
||||
union {GIFRegUV UV; GIFRegFOG FOG;}; // UV.u32[0] | FOG.u32[1]
|
||||
};
|
||||
|
||||
GSVertex() {memset(this, 0, sizeof(*this));}
|
||||
__m128i m[2];
|
||||
};
|
||||
|
||||
GSVector4 GetUV() const {return GSVector4(GSVector4i::load(UV.u32[0]).upl16());}
|
||||
void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];}
|
||||
};
|
||||
|
||||
struct GSVertexP
|
||||
|
|
|
@ -35,16 +35,6 @@ __aligned(struct, 32) GSVertexHW9
|
|||
// t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;}
|
||||
|
||||
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
|
||||
|
||||
float& _q() {return p.w;}
|
||||
|
||||
uint8& _r() {return t.u8[8];}
|
||||
uint8& _g() {return t.u8[9];}
|
||||
uint8& _b() {return t.u8[10];}
|
||||
uint8& _a() {return t.u8[11];}
|
||||
|
||||
uint32& _c0() {return t.u32[2];}
|
||||
uint32& _c1() {return t.u32[3];}
|
||||
};
|
||||
|
||||
__aligned(union, 32) GSVertexHW11
|
||||
|
@ -86,16 +76,6 @@ __aligned(union, 32) GSVertexHW11
|
|||
|
||||
return *this;
|
||||
}
|
||||
|
||||
float& _q() {return q;}
|
||||
|
||||
uint8& _r() {return r;}
|
||||
uint8& _g() {return g;}
|
||||
uint8& _b() {return b;}
|
||||
uint8& _a() {return a;}
|
||||
|
||||
uint32& _c0() {return c0;}
|
||||
uint32& _c1() {return c1;}
|
||||
};
|
||||
|
||||
#pragma pack(pop)
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
#include "GSVector.h"
|
||||
|
||||
__aligned(struct, 16) GSVertexSW
|
||||
__aligned(struct, 32) GSVertexSW
|
||||
{
|
||||
GSVector4 p, t, c;
|
||||
|
||||
|
|
|
@ -28,12 +28,60 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
|
|||
|
||||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||
: m_state(state)
|
||||
, m_map_sw("VertexTraceSW", NULL)
|
||||
, m_map_hw9("VertexTraceHW9", NULL)
|
||||
, m_map_hw11("VertexTraceHW11", NULL)
|
||||
{
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
||||
if(m_state->PRIM->TME)
|
||||
{
|
||||
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
|
||||
|
||||
m_filter.mmag = TEX1.IsMagLinear();
|
||||
m_filter.mmin = TEX1.IsMinLinear();
|
||||
|
||||
if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
|
||||
{
|
||||
m_filter.linear = m_filter.mmag;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
float K = (float)TEX1.K / 16;
|
||||
|
||||
if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated
|
||||
{
|
||||
// LOD = log2(1/|Q|) * (1 << L) + K
|
||||
|
||||
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K);
|
||||
|
||||
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_lod.x = K;
|
||||
m_lod.y = K;
|
||||
}
|
||||
|
||||
if(m_lod.y <= 0)
|
||||
{
|
||||
m_filter.linear = m_filter.mmag;
|
||||
}
|
||||
else if(m_lod.x > 0)
|
||||
{
|
||||
m_filter.linear = m_filter.mmin;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_filter.linear = m_filter.mmag | m_filter.mmin;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_primclass = primclass;
|
||||
|
@ -48,66 +96,28 @@ uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
|
|||
return hash;
|
||||
}
|
||||
|
||||
void GSVertexTrace::UpdateLOD()
|
||||
GSVertexTraceSW::GSVertexTraceSW(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceSW", NULL)
|
||||
{
|
||||
if(!m_state->PRIM->TME) return;
|
||||
|
||||
const GIFRegTEX1& TEX1 = m_state->m_context->TEX1;
|
||||
|
||||
m_filter.mmag = TEX1.IsMagLinear();
|
||||
m_filter.mmin = TEX1.IsMinLinear();
|
||||
|
||||
if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2
|
||||
{
|
||||
m_filter.linear = m_filter.mmag;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
float K = (float)TEX1.K / 16;
|
||||
|
||||
if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated
|
||||
{
|
||||
// LOD = log2(1/|Q|) * (1 << L) + K
|
||||
|
||||
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K);
|
||||
|
||||
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_lod.x = K;
|
||||
m_lod.y = K;
|
||||
}
|
||||
|
||||
if(m_lod.y <= 0)
|
||||
{
|
||||
m_filter.linear = m_filter.mmag;
|
||||
}
|
||||
else if(m_lod.x > 0)
|
||||
{
|
||||
m_filter.linear = m_filter.mmin;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_filter.linear = m_filter.mmag | m_filter.mmin;
|
||||
}
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass)
|
||||
void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map_sw[Hash(primclass)](count, v, m_min, m_max);
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
||||
UpdateLOD();
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass)
|
||||
GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceHW9", NULL)
|
||||
{
|
||||
m_map_hw9[Hash(primclass)](count, v, m_min, m_max);
|
||||
}
|
||||
|
||||
void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
|
||||
|
@ -132,16 +142,18 @@ void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primcl
|
|||
m_max.t *= s;
|
||||
}
|
||||
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
||||
UpdateLOD();
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass)
|
||||
GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceHW11", NULL)
|
||||
{
|
||||
m_map_hw11[Hash(primclass)](count, v, m_min, m_max);
|
||||
}
|
||||
|
||||
void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
|
||||
|
@ -166,10 +178,6 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
|
|||
m_max.t *= s;
|
||||
}
|
||||
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
||||
UpdateLOD();
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
}
|
||||
|
||||
|
|
|
@ -29,42 +29,18 @@
|
|||
|
||||
class GSState;
|
||||
|
||||
__aligned(class, 32) GSVertexTrace
|
||||
__aligned(class, 32) GSVertexTrace : public GSAlignedClass<32>
|
||||
{
|
||||
public:
|
||||
struct Vertex {GSVector4i c; GSVector4 p, t;}; // t.xy * 0x10000
|
||||
struct VertexAlpha {int min, max; bool valid;};
|
||||
|
||||
private:
|
||||
typedef void (*VertexTracePtr)(int count, const void* v, Vertex& min, Vertex& max);
|
||||
|
||||
class CGSW : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CGSW(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
class CGHW9 : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CGHW9(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
class CGHW11 : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CGHW11(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr> m_map_sw;
|
||||
GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr> m_map_hw9;
|
||||
GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr> m_map_hw11;
|
||||
|
||||
protected:
|
||||
const GSState* m_state;
|
||||
|
||||
uint32 Hash(GS_PRIM_CLASS primclass);
|
||||
|
||||
void UpdateLOD();
|
||||
typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max);
|
||||
|
||||
static const GSVector4 s_minmax;
|
||||
|
||||
|
@ -73,10 +49,7 @@ public:
|
|||
|
||||
Vertex m_min;
|
||||
Vertex m_max;
|
||||
|
||||
// source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
|
||||
|
||||
VertexAlpha m_alpha;
|
||||
VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
|
||||
|
||||
union
|
||||
{
|
||||
|
@ -92,12 +65,59 @@ public:
|
|||
|
||||
GSVector2 m_lod; // x = min, y = max
|
||||
|
||||
public:
|
||||
GSVertexTrace(const GSState* state);
|
||||
virtual ~GSVertexTrace() {}
|
||||
|
||||
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const GSVertexNull* v, int count, GS_PRIM_CLASS primclass) {}
|
||||
virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
|
||||
bool IsLinear() const {return m_filter.linear;}
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceSW(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceDX9(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceDX11(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
|
|
@ -27,12 +27,13 @@
|
|||
using namespace Xbyak;
|
||||
|
||||
static const int _args = 0;
|
||||
static const int _count = _args + 4; // rcx
|
||||
static const int _v = _args + 8; // rdx
|
||||
static const int _min = _args + 12; // r8
|
||||
static const int _max = _args + 16; // r9
|
||||
static const int _count = _args + 8; // rcx
|
||||
static const int _vertex = _args + 12; // rdx
|
||||
static const int _index = _args + 16; // r8
|
||||
static const int _min = _args + 20; // r9
|
||||
static const int _max = _args + 24; // _args + 4
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
|
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
|
@ -83,7 +86,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
@ -92,18 +96,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
vmovaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
|
||||
|
||||
vminps(xmm2, xmm0);
|
||||
vmaxps(xmm3, xmm0);
|
||||
|
@ -112,7 +122,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
@ -122,7 +132,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
|
@ -140,7 +150,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
}
|
||||
}
|
||||
|
||||
add(edx, n * sizeof(GSVertexSW));
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
@ -170,10 +180,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
|
@ -189,17 +201,17 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 6;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
|
@ -226,7 +238,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
@ -235,16 +248,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
vmovaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
@ -256,7 +275,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
|
@ -287,7 +306,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
}
|
||||
}
|
||||
|
||||
add(edx, n * sizeof(GSVertexHW9));
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
@ -330,10 +349,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
|
@ -358,6 +379,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
|
@ -384,7 +407,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
@ -393,9 +417,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW11)
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
|
||||
vmovaps(xmm0, ptr[edx + eax]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
|
@ -424,7 +451,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
vmovdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
|
||||
vmovdqa(xmm0, ptr[edx + eax + 16]);
|
||||
vpmovzxwd(xmm1, xmm0);
|
||||
|
||||
vpsrld(xmm0, 1);
|
||||
|
@ -435,7 +462,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
vmaxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(edx, n * sizeof(GSVertexHW11));
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
@ -478,6 +505,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
|
|
|
@ -27,12 +27,13 @@
|
|||
using namespace Xbyak;
|
||||
|
||||
static const int _args = 0;
|
||||
static const int _count = _args + 4; // rcx
|
||||
static const int _v = _args + 8; // rdx
|
||||
static const int _min = _args + 12; // r8
|
||||
static const int _max = _args + 16; // r9
|
||||
static const int _count = _args + 8; // rcx
|
||||
static const int _vertex = _args + 12; // rdx
|
||||
static const int _index = _args + 16; // r8
|
||||
static const int _min = _args + 20; // r9
|
||||
static const int _max = _args + 24; // _args + 4
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
|
@ -57,6 +58,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
|
@ -86,7 +89,8 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
@ -95,18 +99,24 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, ptr[edx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
|
||||
|
||||
minps(xmm2, xmm0);
|
||||
maxps(xmm3, xmm0);
|
||||
|
@ -115,7 +125,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
@ -125,7 +135,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
|
@ -144,7 +154,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
}
|
||||
}
|
||||
|
||||
add(edx, n * sizeof(GSVertexSW));
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
@ -174,10 +184,12 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
|
@ -204,6 +216,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
|
@ -233,7 +247,8 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
@ -242,16 +257,22 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, ptr[edx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
@ -264,7 +285,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
|
@ -295,7 +316,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
}
|
||||
}
|
||||
|
||||
add(edx, n * sizeof(GSVertexHW9));
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
@ -351,10 +372,12 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
|
@ -379,6 +402,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
|
@ -408,7 +433,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
@ -417,9 +443,12 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW11)
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[edx + j * sizeof(GSVertexHW11)]);
|
||||
movaps(xmm0, ptr[edx + eax]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
|
@ -448,7 +477,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
maxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmm0, ptr[edx + j * sizeof(GSVertexHW11) + 16]);
|
||||
movdqa(xmm0, ptr[edx + eax + 16]);
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
|
@ -469,7 +498,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
maxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(edx, n * sizeof(GSVertexHW11));
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
@ -525,6 +554,8 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
|
|
|
@ -529,7 +529,6 @@
|
|||
<ClCompile Include="GSPerfMon.cpp" />
|
||||
<ClCompile Include="GSRasterizer.cpp">
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSRenderer.cpp" />
|
||||
<ClCompile Include="GSRendererDX.cpp" />
|
||||
|
|
|
@ -363,34 +363,43 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
|
|||
__forceinline unsigned char _BitScanForward(unsigned long* const Index, const unsigned long Mask)
|
||||
{
|
||||
__asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
|
||||
|
||||
return Mask ? 1 : 0;
|
||||
}
|
||||
|
||||
__forceinline unsigned char _interlockedbittestandreset(volatile long* a, const long b)
|
||||
{
|
||||
unsigned char retval;
|
||||
|
||||
__asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
__forceinline unsigned char _interlockedbittestandset(volatile long* a, const long b)
|
||||
{
|
||||
unsigned char retval;
|
||||
|
||||
__asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
__forceinline long _InterlockedExchangeAdd(volatile long* const Addend, const long Value)
|
||||
{
|
||||
long retval = Value;
|
||||
|
||||
__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
__forceinline long _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
|
||||
{
|
||||
long retval = Value;
|
||||
|
||||
__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue