mirror of https://github.com/PCSX2/pcsx2.git
Trying to isolate the rasterizer step-by-step, for better multi-threading in the future.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4305 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
65fc196688
commit
6f18c0dabe
|
@ -93,12 +93,13 @@ EXPORT_C_(int32) GPUopen(HWND hWnd)
|
|||
}
|
||||
|
||||
int renderer = theApp.GetConfig("Renderer", 1);
|
||||
int threads = theApp.GetConfig("swthreads", 1);
|
||||
|
||||
switch(renderer)
|
||||
{
|
||||
default:
|
||||
case 0: s_gpu = new GPURendererSW(new GSDevice9()); break;
|
||||
case 1: s_gpu = new GPURendererSW(new GSDevice11()); break;
|
||||
case 0: s_gpu = new GPURendererSW(new GSDevice9(), threads); break;
|
||||
case 1: s_gpu = new GPURendererSW(new GSDevice11(), threads); break;
|
||||
// TODO: case 3: s_gpu = new GPURendererNull(new GSDeviceNull()); break;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,12 +22,13 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GPUDrawScanline.h"
|
||||
|
||||
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
|
||||
: m_state(state)
|
||||
, m_id(id)
|
||||
, m_sp_map("GPUSetupPrim", &m_env)
|
||||
, m_ds_map("GPUDrawScanline", &m_env)
|
||||
GPUDrawScanline::GPUDrawScanline(const GPUScanlineGlobalData* gd)
|
||||
: m_sp_map("GPUSetupPrim", &m_local)
|
||||
, m_ds_map("GPUDrawScanline", &m_local)
|
||||
{
|
||||
memset(&m_local, 0, sizeof(m_local));
|
||||
|
||||
m_local.gd = gd;
|
||||
}
|
||||
|
||||
GPUDrawScanline::~GPUDrawScanline()
|
||||
|
@ -36,40 +37,24 @@ GPUDrawScanline::~GPUDrawScanline()
|
|||
|
||||
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||
{
|
||||
GPUDrawingEnvironment& env = m_state->m_env;
|
||||
|
||||
const GPUScanlineParam* p = (const GPUScanlineParam*)data->param;
|
||||
|
||||
m_env.sel = p->sel;
|
||||
|
||||
m_env.vm = m_state->m_mem.GetPixelAddress(0, 0);
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_local.gd->sel.twin)
|
||||
{
|
||||
m_env.tex = p->tex;
|
||||
m_env.clut = p->clut;
|
||||
uint32 u, v;
|
||||
|
||||
if(m_env.sel.twin)
|
||||
{
|
||||
uint32 u, v;
|
||||
u = ~(m_local.gd->twin.x << 3) & 0xff; // TWW
|
||||
v = ~(m_local.gd->twin.y << 3) & 0xff; // TWH
|
||||
|
||||
u = ~(env.TWIN.TWW << 3) & 0xff;
|
||||
v = ~(env.TWIN.TWH << 3) & 0xff;
|
||||
m_local.twin[0].u = GSVector4i((u << 16) | u);
|
||||
m_local.twin[0].v = GSVector4i((v << 16) | v);
|
||||
|
||||
m_env.twin[0].u = GSVector4i((u << 16) | u);
|
||||
m_env.twin[0].v = GSVector4i((v << 16) | v);
|
||||
|
||||
u = env.TWIN.TWX << 3;
|
||||
v = env.TWIN.TWY << 3;
|
||||
u = m_local.gd->twin.z << 3; // TWX
|
||||
v = m_local.gd->twin.y << 3; // TWY
|
||||
|
||||
m_env.twin[1].u = GSVector4i((u << 16) | u) & ~m_env.twin[0].u;
|
||||
m_env.twin[1].v = GSVector4i((v << 16) | v) & ~m_env.twin[0].v;
|
||||
}
|
||||
m_local.twin[1].u = GSVector4i((u << 16) | u) & ~m_local.twin[0].u;
|
||||
m_local.twin[1].v = GSVector4i((v << 16) | v) & ~m_local.twin[0].v;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
m_ds = m_ds_map[m_env.sel];
|
||||
m_ds = m_ds_map[m_local.gd->sel];
|
||||
|
||||
m_de = NULL;
|
||||
|
||||
|
@ -81,15 +66,15 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
|
||||
sel.key = 0;
|
||||
|
||||
sel.iip = m_env.sel.iip;
|
||||
sel.tfx = m_env.sel.tfx;
|
||||
sel.twin = m_env.sel.twin;
|
||||
sel.sprite = m_env.sel.sprite;
|
||||
sel.iip = m_local.gd->sel.iip;
|
||||
sel.tfx = m_local.gd->sel.tfx;
|
||||
sel.twin = m_local.gd->sel.twin;
|
||||
sel.sprite = m_local.gd->sel.sprite;
|
||||
|
||||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
||||
void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats)
|
||||
void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats, uint64 frame)
|
||||
{
|
||||
m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
m_ds_map.UpdateStats(stats, frame);
|
||||
}
|
||||
|
|
|
@ -29,24 +29,18 @@
|
|||
|
||||
class GPUDrawScanline : public IDrawScanline
|
||||
{
|
||||
GPUScanlineEnvironment m_env;
|
||||
|
||||
//
|
||||
GPUScanlineLocalData m_local;
|
||||
|
||||
GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimPtr> m_sp_map;
|
||||
GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlinePtr> m_ds_map;
|
||||
|
||||
protected:
|
||||
GPUState* m_state;
|
||||
int m_id;
|
||||
|
||||
public:
|
||||
GPUDrawScanline(GPUState* state, int id);
|
||||
GPUDrawScanline(const GPUScanlineGlobalData* gd);
|
||||
virtual ~GPUDrawScanline();
|
||||
|
||||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(const GSRasterizerStats& stats);
|
||||
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
||||
void PrintStats() {m_ds_map.PrintStats();}
|
||||
};
|
||||
|
|
|
@ -24,14 +24,20 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GPUDrawScanlineCodeGenerator.h"
|
||||
|
||||
static const int _args = 8;
|
||||
static const int _top = _args + 4;
|
||||
static const int _v = _args + 8;
|
||||
|
||||
GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_env(*(GPUScanlineEnvironment*)param)
|
||||
, m_local(*(GPUScanlineLocalData*)param)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
#endif
|
||||
|
||||
m_sel.key = key;
|
||||
|
||||
Generate();
|
||||
}
|
||||
|
||||
|
@ -40,9 +46,7 @@ void GPUDrawScanlineCodeGenerator::Generate()
|
|||
push(esi);
|
||||
push(edi);
|
||||
|
||||
const int params = 8;
|
||||
|
||||
Init(params);
|
||||
Init();
|
||||
|
||||
align(16);
|
||||
|
||||
|
@ -112,26 +116,23 @@ L("exit");
|
|||
ret(8);
|
||||
}
|
||||
|
||||
void GPUDrawScanlineCodeGenerator::Init(int params)
|
||||
void GPUDrawScanlineCodeGenerator::Init()
|
||||
{
|
||||
const int _top = params + 4;
|
||||
const int _v = params + 8;
|
||||
|
||||
mov(eax, dword[esp + _top]);
|
||||
|
||||
// uint16* fb = &m_env.vm[(top << (10 + m_env.sel.scalex)) + left];
|
||||
// uint16* fb = &m_local.vm[(top << (10 + m_sel.scalex)) + left];
|
||||
|
||||
mov(edi, eax);
|
||||
shl(edi, 10 + m_env.sel.scalex);
|
||||
shl(edi, 10 + m_sel.scalex);
|
||||
add(edi, edx);
|
||||
lea(edi, ptr[edi * 2 + (size_t)m_env.vm]);
|
||||
lea(edi, ptr[edi * 2 + (size_t)m_local.gd->vm]);
|
||||
|
||||
// int steps = right - left - 8;
|
||||
|
||||
sub(ecx, edx);
|
||||
sub(ecx, 8);
|
||||
|
||||
if(m_env.sel.dtd)
|
||||
if(m_sel.dtd)
|
||||
{
|
||||
// dither = GSVector4i::load<false>(&s_dither[top & 3][left & 3]);
|
||||
|
||||
|
@ -140,48 +141,48 @@ void GPUDrawScanlineCodeGenerator::Init(int params)
|
|||
and(edx, 3);
|
||||
shl(edx, 1);
|
||||
movdqu(xmm0, ptr[eax + edx + (size_t)m_dither]);
|
||||
movdqa(ptr[&m_env.temp.dither], xmm0);
|
||||
movdqa(ptr[&m_local.temp.dither], xmm0);
|
||||
}
|
||||
|
||||
mov(edx, dword[esp + _v]);
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_sel.tme)
|
||||
{
|
||||
mov(esi, dword[&m_env.tex]);
|
||||
mov(esi, dword[&m_local.gd->tex]);
|
||||
|
||||
// GSVector4i vt = GSVector4i(v.t).xxzzl();
|
||||
|
||||
cvttps2dq(xmm4, ptr[edx + 32]);
|
||||
pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
|
||||
// s = vt.xxxx().add16(m_env.d.s);
|
||||
// t = vt.yyyy().add16(m_env.d.t);
|
||||
// s = vt.xxxx().add16(m_local.d.s);
|
||||
// t = vt.yyyy().add16(m_local.d.t);
|
||||
|
||||
pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
paddw(xmm2, ptr[&m_env.d.s]);
|
||||
paddw(xmm2, ptr[&m_local.d.s]);
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
paddw(xmm3, ptr[&m_env.d.t]);
|
||||
paddw(xmm3, ptr[&m_local.d.t]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
movdqa(xmm0, xmm3);
|
||||
psllw(xmm0, 8);
|
||||
psrlw(xmm0, 1);
|
||||
movdqa(ptr[&m_env.temp.vf], xmm0);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
movdqa(ptr[&m_env.temp.s], xmm2);
|
||||
movdqa(ptr[&m_env.temp.t], xmm3);
|
||||
movdqa(ptr[&m_local.temp.s], xmm2);
|
||||
movdqa(ptr[&m_local.temp.t], xmm3);
|
||||
}
|
||||
|
||||
if(m_env.sel.tfx != 3) // != decal
|
||||
if(m_sel.tfx != 3) // != decal
|
||||
{
|
||||
// GSVector4i vc = GSVector4i(v.c).xxzzlh();
|
||||
|
||||
|
@ -197,20 +198,20 @@ void GPUDrawScanlineCodeGenerator::Init(int params)
|
|||
pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_env.sel.iip)
|
||||
if(m_sel.iip)
|
||||
{
|
||||
// r = r.add16(m_env.d.r);
|
||||
// g = g.add16(m_env.d.g);
|
||||
// b = b.add16(m_env.d.b);
|
||||
// r = r.add16(m_local.d.r);
|
||||
// g = g.add16(m_local.d.g);
|
||||
// b = b.add16(m_local.d.b);
|
||||
|
||||
paddw(xmm4, ptr[&m_env.d.r]);
|
||||
paddw(xmm5, ptr[&m_env.d.g]);
|
||||
paddw(xmm6, ptr[&m_env.d.b]);
|
||||
paddw(xmm4, ptr[&m_local.d.r]);
|
||||
paddw(xmm5, ptr[&m_local.d.g]);
|
||||
paddw(xmm6, ptr[&m_local.d.b]);
|
||||
}
|
||||
|
||||
movdqa(ptr[&m_env.temp.r], xmm4);
|
||||
movdqa(ptr[&m_env.temp.g], xmm5);
|
||||
movdqa(ptr[&m_env.temp.b], xmm6);
|
||||
movdqa(ptr[&m_local.temp.r], xmm4);
|
||||
movdqa(ptr[&m_local.temp.g], xmm5);
|
||||
movdqa(ptr[&m_local.temp.b], xmm6);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -224,62 +225,62 @@ void GPUDrawScanlineCodeGenerator::Step()
|
|||
|
||||
add(edi, 8 * sizeof(uint16));
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_sel.tme)
|
||||
{
|
||||
// GSVector4i st = m_env.d8.st;
|
||||
// GSVector4i st = m_local.d8.st;
|
||||
|
||||
movdqa(xmm4, ptr[&m_env.d8.st]);
|
||||
movdqa(xmm4, ptr[&m_local.d8.st]);
|
||||
|
||||
// s = s.add16(st.xxxx());
|
||||
// t = t.add16(st.yyyy());
|
||||
|
||||
pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
paddw(xmm2, ptr[&m_env.temp.s]);
|
||||
movdqa(ptr[&m_env.temp.s], xmm2);
|
||||
paddw(xmm2, ptr[&m_local.temp.s]);
|
||||
movdqa(ptr[&m_local.temp.s], xmm2);
|
||||
|
||||
// TODO: if(!sprite) ... else reload t
|
||||
|
||||
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
paddw(xmm3, ptr[&m_env.temp.t]);
|
||||
movdqa(ptr[&m_env.temp.t], xmm3);
|
||||
paddw(xmm3, ptr[&m_local.temp.t]);
|
||||
movdqa(ptr[&m_local.temp.t], xmm3);
|
||||
}
|
||||
|
||||
if(m_env.sel.tfx != 3) // != decal
|
||||
if(m_sel.tfx != 3) // != decal
|
||||
{
|
||||
if(m_env.sel.iip)
|
||||
if(m_sel.iip)
|
||||
{
|
||||
// GSVector4i c = m_env.d8.c;
|
||||
// GSVector4i c = m_local.d8.c;
|
||||
|
||||
// r = r.add16(c.xxxx());
|
||||
// g = g.add16(c.yyyy());
|
||||
// b = b.add16(c.zzzz());
|
||||
|
||||
movdqa(xmm6, ptr[&m_env.d8.c]);
|
||||
movdqa(xmm6, ptr[&m_local.d8.c]);
|
||||
|
||||
pshufd(xmm4, xmm6, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm5, xmm6, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
paddw(xmm4, ptr[&m_env.temp.r]);
|
||||
paddw(xmm5, ptr[&m_env.temp.g]);
|
||||
paddw(xmm6, ptr[&m_env.temp.b]);
|
||||
paddw(xmm4, ptr[&m_local.temp.r]);
|
||||
paddw(xmm5, ptr[&m_local.temp.g]);
|
||||
paddw(xmm6, ptr[&m_local.temp.b]);
|
||||
|
||||
movdqa(ptr[&m_env.temp.r], xmm4);
|
||||
movdqa(ptr[&m_env.temp.g], xmm5);
|
||||
movdqa(ptr[&m_env.temp.b], xmm6);
|
||||
movdqa(ptr[&m_local.temp.r], xmm4);
|
||||
movdqa(ptr[&m_local.temp.g], xmm5);
|
||||
movdqa(ptr[&m_local.temp.b], xmm6);
|
||||
}
|
||||
else
|
||||
{
|
||||
movdqa(xmm4, ptr[&m_env.temp.r]);
|
||||
movdqa(xmm5, ptr[&m_env.temp.g]);
|
||||
movdqa(xmm6, ptr[&m_env.temp.b]);
|
||||
movdqa(xmm4, ptr[&m_local.temp.r]);
|
||||
movdqa(xmm5, ptr[&m_local.temp.g]);
|
||||
movdqa(xmm6, ptr[&m_local.temp.b]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDrawScanlineCodeGenerator::TestMask()
|
||||
{
|
||||
if(!m_env.sel.me)
|
||||
if(!m_sel.me)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -295,7 +296,7 @@ void GPUDrawScanlineCodeGenerator::TestMask()
|
|||
|
||||
void GPUDrawScanlineCodeGenerator::SampleTexture()
|
||||
{
|
||||
if(!m_env.sel.tme)
|
||||
if(!m_sel.tme)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -306,7 +307,7 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm0, xmm4, xmm5, xmm6 = free
|
||||
// xmm1 = used
|
||||
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// GSVector4i u = s.sub16(GSVector4i(0x00200020)); // - 0.125f
|
||||
// GSVector4i v = t.sub16(GSVector4i(0x00200020)); // - 0.125f
|
||||
|
@ -324,14 +325,14 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
movdqa(xmm0, xmm2);
|
||||
psllw(xmm0, 8);
|
||||
psrlw(xmm0, 1);
|
||||
movdqa(ptr[&m_env.temp.uf], xmm0);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
if(!m_env.sel.sprite)
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
movdqa(xmm0, xmm3);
|
||||
psllw(xmm0, 8);
|
||||
psrlw(xmm0, 1);
|
||||
movdqa(ptr[&m_env.temp.vf], xmm0);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -347,7 +348,7 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm0, xmm4, xmm5, xmm6 = free
|
||||
// xmm1 = used
|
||||
|
||||
if(m_env.sel.ltf)
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// GSVector4i u1 = u0.add16(GSVector4i::x0001());
|
||||
// GSVector4i v1 = v0.add16(GSVector4i::x0001());
|
||||
|
@ -360,23 +361,23 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
paddw(xmm4, xmm0);
|
||||
paddw(xmm5, xmm0);
|
||||
|
||||
if(m_env.sel.twin)
|
||||
if(m_sel.twin)
|
||||
{
|
||||
// u0 = (u0 & m_env.twin[0].u).add16(m_env.twin[1].u);
|
||||
// v0 = (v0 & m_env.twin[0].v).add16(m_env.twin[1].v);
|
||||
// u1 = (u1 & m_env.twin[0].u).add16(m_env.twin[1].u);
|
||||
// v1 = (v1 & m_env.twin[0].v).add16(m_env.twin[1].v);
|
||||
// u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u);
|
||||
// v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v);
|
||||
// u1 = (u1 & m_local.twin[0].u).add16(m_local.twin[1].u);
|
||||
// v1 = (v1 & m_local.twin[0].v).add16(m_local.twin[1].v);
|
||||
|
||||
movdqa(xmm0, ptr[&m_env.twin[0].u]);
|
||||
movdqa(xmm6, ptr[&m_env.twin[1].u]);
|
||||
movdqa(xmm0, ptr[&m_local.twin[0].u]);
|
||||
movdqa(xmm6, ptr[&m_local.twin[1].u]);
|
||||
|
||||
pand(xmm2, xmm0);
|
||||
paddw(xmm2, xmm6);
|
||||
pand(xmm4, xmm0);
|
||||
paddw(xmm4, xmm6);
|
||||
|
||||
movdqa(xmm0, ptr[&m_env.twin[0].v]);
|
||||
movdqa(xmm6, ptr[&m_env.twin[1].v]);
|
||||
movdqa(xmm0, ptr[&m_local.twin[0].v]);
|
||||
movdqa(xmm6, ptr[&m_local.twin[1].v]);
|
||||
|
||||
pand(xmm3, xmm0);
|
||||
paddw(xmm3, xmm6);
|
||||
|
@ -385,15 +386,15 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
}
|
||||
else
|
||||
{
|
||||
// u0 = u0.min_i16(m_env.twin[2].u);
|
||||
// v0 = v0.min_i16(m_env.twin[2].v);
|
||||
// u1 = u1.min_i16(m_env.twin[2].u);
|
||||
// v1 = v1.min_i16(m_env.twin[2].v);
|
||||
// u0 = u0.min_i16(m_local.twin[2].u);
|
||||
// v0 = v0.min_i16(m_local.twin[2].v);
|
||||
// u1 = u1.min_i16(m_local.twin[2].u);
|
||||
// v1 = v1.min_i16(m_local.twin[2].v);
|
||||
|
||||
// TODO: if(!sprite) clamp16 else:
|
||||
|
||||
movdqa(xmm0, ptr[&m_env.twin[2].u]);
|
||||
movdqa(xmm6, ptr[&m_env.twin[2].v]);
|
||||
movdqa(xmm0, ptr[&m_local.twin[2].u]);
|
||||
movdqa(xmm6, ptr[&m_local.twin[2].v]);
|
||||
|
||||
pminsw(xmm2, xmm0);
|
||||
pminsw(xmm3, xmm6);
|
||||
|
@ -447,8 +448,8 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// spill (TODO)
|
||||
|
||||
movdqa(ptr[&m_env.temp.fd], xmm1);
|
||||
movdqa(ptr[&m_env.temp.test], xmm7);
|
||||
movdqa(ptr[&m_local.temp.fd], xmm1);
|
||||
movdqa(ptr[&m_local.temp.test], xmm7);
|
||||
|
||||
// xmm2 = c00
|
||||
// xmm4 = c01
|
||||
|
@ -464,7 +465,7 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
psllw(xmm0, 11);
|
||||
psrlw(xmm0, 8);
|
||||
|
||||
lerp16<0>(xmm0, xmm1, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm0, xmm1, ptr[&m_local.temp.uf]);
|
||||
|
||||
movdqa(xmm6, xmm2);
|
||||
psllw(xmm6, 6);
|
||||
|
@ -476,7 +477,7 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
psrlw(xmm1, 11);
|
||||
psllw(xmm1, 3);
|
||||
|
||||
lerp16<0>(xmm1, xmm6, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm1, xmm6, ptr[&m_local.temp.uf]);
|
||||
|
||||
movdqa(xmm7, xmm2);
|
||||
psllw(xmm7, 1);
|
||||
|
@ -488,14 +489,14 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
psrlw(xmm6, 11);
|
||||
psllw(xmm6, 3);
|
||||
|
||||
lerp16<0>(xmm6, xmm7, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm6, xmm7, ptr[&m_local.temp.uf]);
|
||||
|
||||
psraw(xmm2, 15);
|
||||
psrlw(xmm2, 8);
|
||||
psraw(xmm4, 15);
|
||||
psrlw(xmm4, 8);
|
||||
|
||||
lerp16<0>(xmm4, xmm2, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm4, xmm2, ptr[&m_local.temp.uf]);
|
||||
|
||||
// xmm0 = r00
|
||||
// xmm1 = g00
|
||||
|
@ -513,8 +514,8 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
psllw(xmm2, 11);
|
||||
psrlw(xmm2, 8);
|
||||
|
||||
lerp16<0>(xmm2, xmm7, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm2, xmm0, ptr[&m_env.temp.vf]);
|
||||
lerp16<0>(xmm2, xmm7, ptr[&m_local.temp.uf]);
|
||||
lerp16<0>(xmm2, xmm0, ptr[&m_local.temp.vf]);
|
||||
|
||||
// xmm2 = r
|
||||
// xmm1 = g00
|
||||
|
@ -534,8 +535,8 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
psrlw(xmm0, 11);
|
||||
psllw(xmm0, 3);
|
||||
|
||||
lerp16<0>(xmm0, xmm7, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm0, xmm1, ptr[&m_env.temp.vf]);
|
||||
lerp16<0>(xmm0, xmm7, ptr[&m_local.temp.uf]);
|
||||
lerp16<0>(xmm0, xmm1, ptr[&m_local.temp.vf]);
|
||||
|
||||
// xmm2 = r
|
||||
// xmm0 = g
|
||||
|
@ -555,8 +556,8 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
psrlw(xmm1, 11);
|
||||
psllw(xmm1, 3);
|
||||
|
||||
lerp16<0>(xmm1, xmm7, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm1, xmm6, ptr[&m_env.temp.vf]);
|
||||
lerp16<0>(xmm1, xmm7, ptr[&m_local.temp.uf]);
|
||||
lerp16<0>(xmm1, xmm6, ptr[&m_local.temp.vf]);
|
||||
|
||||
// xmm2 = r
|
||||
// xmm0 = g
|
||||
|
@ -571,8 +572,8 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
psraw(xmm5, 15);
|
||||
psrlw(xmm5, 8);
|
||||
|
||||
lerp16<0>(xmm5, xmm3, ptr[&m_env.temp.uf]);
|
||||
lerp16<0>(xmm5, xmm4, ptr[&m_env.temp.vf]);
|
||||
lerp16<0>(xmm5, xmm3, ptr[&m_local.temp.uf]);
|
||||
lerp16<0>(xmm5, xmm4, ptr[&m_local.temp.vf]);
|
||||
|
||||
// xmm2 = r
|
||||
// xmm0 = g
|
||||
|
@ -588,7 +589,7 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// reload test
|
||||
|
||||
movdqa(xmm7, ptr[&m_env.temp.test]);
|
||||
movdqa(xmm7, ptr[&m_local.temp.test]);
|
||||
|
||||
// xmm4 = r
|
||||
// xmm5 = g
|
||||
|
@ -615,29 +616,29 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// reload fd
|
||||
|
||||
movdqa(xmm1, ptr[&m_env.temp.fd]);
|
||||
movdqa(xmm1, ptr[&m_local.temp.fd]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_env.sel.twin)
|
||||
if(m_sel.twin)
|
||||
{
|
||||
// u = (u & m_env.twin[0].u).add16(m_env.twin[1].u);
|
||||
// v = (v & m_env.twin[0].v).add16(m_env.twin[1].v);
|
||||
// u = (u & m_local.twin[0].u).add16(m_local.twin[1].u);
|
||||
// v = (v & m_local.twin[0].v).add16(m_local.twin[1].v);
|
||||
|
||||
pand(xmm2, ptr[&m_env.twin[0].u]);
|
||||
paddw(xmm2, ptr[&m_env.twin[1].u]);
|
||||
pand(xmm3, ptr[&m_env.twin[0].v]);
|
||||
paddw(xmm3, ptr[&m_env.twin[1].v]);
|
||||
pand(xmm2, ptr[&m_local.twin[0].u]);
|
||||
paddw(xmm2, ptr[&m_local.twin[1].u]);
|
||||
pand(xmm3, ptr[&m_local.twin[0].v]);
|
||||
paddw(xmm3, ptr[&m_local.twin[1].v]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// u = u.min_i16(m_env.twin[2].u);
|
||||
// v = v.min_i16(m_env.twin[2].v);
|
||||
// u = u.min_i16(m_local.twin[2].u);
|
||||
// v = v.min_i16(m_local.twin[2].v);
|
||||
|
||||
// TODO: if(!sprite) clamp16 else:
|
||||
|
||||
pminsw(xmm2, ptr[&m_env.twin[2].u]);
|
||||
pminsw(xmm3, ptr[&m_env.twin[2].v]);
|
||||
pminsw(xmm2, ptr[&m_local.twin[2].u]);
|
||||
pminsw(xmm3, ptr[&m_local.twin[2].v]);
|
||||
}
|
||||
|
||||
// xmm2 = u
|
||||
|
@ -696,7 +697,7 @@ void GPUDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
void GPUDrawScanlineCodeGenerator::ColorTFX()
|
||||
{
|
||||
switch(m_env.sel.tfx)
|
||||
switch(m_sel.tfx)
|
||||
{
|
||||
case 0: // none (tfx = 0)
|
||||
case 1: // none (tfx = tge)
|
||||
|
@ -713,11 +714,11 @@ void GPUDrawScanlineCodeGenerator::ColorTFX()
|
|||
// c[2] = c[2].modulate16<1>(b).clamp8();
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
psrlw(xmm0, 8);
|
||||
modulate16<1>(xmm4, ptr[&m_env.temp.r]);
|
||||
modulate16<1>(xmm4, ptr[&m_local.temp.r]);
|
||||
pminsw(xmm4, xmm0);
|
||||
modulate16<1>(xmm5, ptr[&m_env.temp.g]);
|
||||
modulate16<1>(xmm5, ptr[&m_local.temp.g]);
|
||||
pminsw(xmm5, xmm0);
|
||||
modulate16<1>(xmm6, ptr[&m_env.temp.b]);
|
||||
modulate16<1>(xmm6, ptr[&m_local.temp.b]);
|
||||
pminsw(xmm6, xmm0);
|
||||
break;
|
||||
case 3: // decal (tfx = tme)
|
||||
|
@ -727,7 +728,7 @@ void GPUDrawScanlineCodeGenerator::ColorTFX()
|
|||
|
||||
void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
||||
{
|
||||
if(!m_env.sel.abe)
|
||||
if(!m_sel.abe)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -748,7 +749,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
pand(xmm2, xmm0);
|
||||
psllw(xmm2, 3);
|
||||
|
||||
switch(m_env.sel.abr)
|
||||
switch(m_sel.abr)
|
||||
{
|
||||
case 0:
|
||||
// r = r.avg8(c[0]);
|
||||
|
@ -770,7 +771,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
break;
|
||||
}
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_sel.tme)
|
||||
{
|
||||
movdqa(xmm0, xmm3);
|
||||
blend8(xmm4, xmm2);
|
||||
|
@ -789,7 +790,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
pand(xmm2, xmm0);
|
||||
psrlw(xmm2, 2);
|
||||
|
||||
switch(m_env.sel.abr)
|
||||
switch(m_sel.abr)
|
||||
{
|
||||
case 0:
|
||||
// g = g.avg8(c[2]);
|
||||
|
@ -811,7 +812,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
break;
|
||||
}
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_sel.tme)
|
||||
{
|
||||
movdqa(xmm0, xmm3);
|
||||
blend8(xmm5, xmm2);
|
||||
|
@ -830,7 +831,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
pand(xmm2, xmm0);
|
||||
psrlw(xmm2, 7);
|
||||
|
||||
switch(m_env.sel.abr)
|
||||
switch(m_sel.abr)
|
||||
{
|
||||
case 0:
|
||||
// b = b.avg8(c[2]);
|
||||
|
@ -852,7 +853,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
break;
|
||||
}
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_sel.tme)
|
||||
{
|
||||
movdqa(xmm0, xmm3);
|
||||
blend8(xmm6, xmm2);
|
||||
|
@ -865,7 +866,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
|
||||
void GPUDrawScanlineCodeGenerator::Dither()
|
||||
{
|
||||
if(!m_env.sel.dtd)
|
||||
if(!m_sel.dtd)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -874,7 +875,7 @@ void GPUDrawScanlineCodeGenerator::Dither()
|
|||
// c[1] = c[1].addus8(dither);
|
||||
// c[2] = c[2].addus8(dither);
|
||||
|
||||
movdqa(xmm0, ptr[&m_env.temp.dither]);
|
||||
movdqa(xmm0, ptr[&m_local.temp.dither]);
|
||||
|
||||
paddusb(xmm4, xmm0);
|
||||
paddusb(xmm5, xmm0);
|
||||
|
@ -883,11 +884,11 @@ void GPUDrawScanlineCodeGenerator::Dither()
|
|||
|
||||
void GPUDrawScanlineCodeGenerator::WriteFrame()
|
||||
{
|
||||
// GSVector4i fs = r | g | b | (m_env.sel.md ? GSVector4i(0x80008000) : m_env.sel.tme ? a : 0);
|
||||
// GSVector4i fs = r | g | b | (m_sel.md ? GSVector4i(0x80008000) : m_sel.tme ? a : 0);
|
||||
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
|
||||
if(m_env.sel.md || m_env.sel.tme)
|
||||
if(m_sel.md || m_sel.tme)
|
||||
{
|
||||
movdqa(xmm2, xmm0);
|
||||
psllw(xmm2, 15);
|
||||
|
@ -916,13 +917,13 @@ void GPUDrawScanlineCodeGenerator::WriteFrame()
|
|||
psllw(xmm6, 7);
|
||||
por(xmm4, xmm6);
|
||||
|
||||
if(m_env.sel.md)
|
||||
if(m_sel.md)
|
||||
{
|
||||
// GSVector4i a = GSVector4i(0x80008000);
|
||||
|
||||
por(xmm4, xmm2);
|
||||
}
|
||||
else if(m_env.sel.tme)
|
||||
else if(m_sel.tme)
|
||||
{
|
||||
// GSVector4i a = (c[3] << 8) & 0x80008000;
|
||||
|
||||
|
@ -950,9 +951,9 @@ void GPUDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr)
|
|||
{
|
||||
pextrw(eax, addr, (uint8)i);
|
||||
|
||||
if(m_env.sel.tlu) movzx(eax, byte[esi + eax]);
|
||||
if(m_sel.tlu) movzx(eax, byte[esi + eax]);
|
||||
|
||||
const Address& src = m_env.sel.tlu ? ptr[eax * 2 + (size_t)m_env.clut] : ptr[esi + eax * 2];
|
||||
const Address& src = m_sel.tlu ? ptr[eax * 2 + (size_t)m_local.gd->clut] : ptr[esi + eax * 2];
|
||||
|
||||
if(i == 0) movd(dst, src);
|
||||
else pinsrw(dst, src, (uint8)i);
|
||||
|
|
|
@ -33,11 +33,12 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
static const GSVector4i m_test[8];
|
||||
static const uint16 m_dither[4][16];
|
||||
|
||||
GPUScanlineEnvironment& m_env;
|
||||
GPUScanlineSelector m_sel;
|
||||
GPUScanlineLocalData& m_local;
|
||||
|
||||
void Generate();
|
||||
|
||||
void Init(int params);
|
||||
void Init();
|
||||
void Step();
|
||||
void TestMask();
|
||||
void SampleTexture();
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
__declspec(align(16)) class GPUDrawingEnvironment
|
||||
__aligned32 class GPUDrawingEnvironment
|
||||
{
|
||||
public:
|
||||
GPURegSTATUS STATUS;
|
||||
|
|
|
@ -23,11 +23,11 @@
|
|||
#include "GPURendererSW.h"
|
||||
#include "GSdx.h"
|
||||
|
||||
GPURendererSW::GPURendererSW(GSDevice* dev)
|
||||
GPURendererSW::GPURendererSW(GSDevice* dev, int threads)
|
||||
: GPURendererT(dev)
|
||||
, m_texture(NULL)
|
||||
{
|
||||
m_rl.Create<GPUDrawScanline>(this, theApp.GetConfig("swthreads", 1));
|
||||
m_rl.Create<GPUDrawScanline, GPUScanlineGlobalData>(threads);
|
||||
}
|
||||
|
||||
GPURendererSW::~GPURendererSW()
|
||||
|
@ -70,39 +70,42 @@ void GPURendererSW::Draw()
|
|||
|
||||
//
|
||||
|
||||
GPUScanlineParam p;
|
||||
GPUScanlineGlobalData gd;
|
||||
|
||||
p.sel.key = 0;
|
||||
p.sel.iip = env.PRIM.IIP;
|
||||
p.sel.me = env.STATUS.ME;
|
||||
gd.sel.key = 0;
|
||||
gd.sel.iip = env.PRIM.IIP;
|
||||
gd.sel.me = env.STATUS.ME;
|
||||
|
||||
if(env.PRIM.ABE)
|
||||
{
|
||||
p.sel.abe = env.PRIM.ABE;
|
||||
p.sel.abr = env.STATUS.ABR;
|
||||
gd.sel.abe = env.PRIM.ABE;
|
||||
gd.sel.abr = env.STATUS.ABR;
|
||||
}
|
||||
|
||||
p.sel.tge = env.PRIM.TGE;
|
||||
gd.sel.tge = env.PRIM.TGE;
|
||||
|
||||
if(env.PRIM.TME)
|
||||
{
|
||||
p.sel.tme = env.PRIM.TME;
|
||||
p.sel.tlu = env.STATUS.TP < 2;
|
||||
p.sel.twin = (env.TWIN.u32 & 0xfffff) != 0;
|
||||
p.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
|
||||
gd.sel.tme = env.PRIM.TME;
|
||||
gd.sel.tlu = env.STATUS.TP < 2;
|
||||
gd.sel.twin = (env.TWIN.u32 & 0xfffff) != 0;
|
||||
gd.sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
|
||||
|
||||
const void* t = m_mem.GetTexture(env.STATUS.TP, env.STATUS.TX, env.STATUS.TY);
|
||||
|
||||
if(!t) {ASSERT(0); return;}
|
||||
|
||||
p.tex = t;
|
||||
p.clut = m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y);
|
||||
gd.tex = t;
|
||||
gd.clut = m_mem.GetCLUT(env.STATUS.TP, env.CLUT.X, env.CLUT.Y);
|
||||
gd.twin = GSVector4i(env.TWIN.TWW, env.TWIN.TWH, env.TWIN.TWX, env.TWIN.TWY);
|
||||
}
|
||||
|
||||
p.sel.dtd = m_dither ? env.STATUS.DTD : 0;
|
||||
p.sel.md = env.STATUS.MD;
|
||||
p.sel.sprite = env.PRIM.TYPE == GPU_SPRITE;
|
||||
p.sel.scalex = m_mem.GetScale().x;
|
||||
gd.sel.dtd = m_dither ? env.STATUS.DTD : 0;
|
||||
gd.sel.md = env.STATUS.MD;
|
||||
gd.sel.sprite = env.PRIM.TYPE == GPU_SPRITE;
|
||||
gd.sel.scalex = m_mem.GetScale().x;
|
||||
|
||||
gd.vm = m_mem.GetPixelAddress(0, 0);
|
||||
|
||||
//
|
||||
|
||||
|
@ -110,7 +113,8 @@ void GPURendererSW::Draw()
|
|||
|
||||
data.vertices = m_vertices;
|
||||
data.count = m_count;
|
||||
data.param = &p;
|
||||
data.frame = m_perfmon.GetFrame();
|
||||
data.param = &gd;
|
||||
|
||||
data.scissor.left = (int)m_env.DRAREATL.X << m_scale.x;
|
||||
data.scissor.top = (int)m_env.DRAREATL.Y << m_scale.y;
|
||||
|
@ -127,14 +131,6 @@ void GPURendererSW::Draw()
|
|||
|
||||
m_rl.Draw(&data);
|
||||
|
||||
GSRasterizerStats stats;
|
||||
|
||||
m_rl.GetStats(stats);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
|
||||
// TODO
|
||||
|
||||
{
|
||||
|
@ -158,6 +154,16 @@ void GPURendererSW::Draw()
|
|||
|
||||
Invalidate(r);
|
||||
}
|
||||
|
||||
m_rl.Sync();
|
||||
|
||||
GSRasterizerStats stats;
|
||||
|
||||
m_rl.GetStats(stats);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
}
|
||||
|
||||
void GPURendererSW::VertexKick()
|
||||
|
|
|
@ -36,6 +36,6 @@ protected:
|
|||
void Draw();
|
||||
|
||||
public:
|
||||
GPURendererSW(GSDevice* dev);
|
||||
GPURendererSW(GSDevice* dev, int threads);
|
||||
virtual ~GPURendererSW();
|
||||
};
|
||||
|
|
|
@ -53,29 +53,26 @@ union GPUScanlineSelector
|
|||
|
||||
uint32 key;
|
||||
|
||||
operator uint32() {return key;}
|
||||
operator uint32() const {return key;}
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GPUScanlineParam
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
const void* tex;
|
||||
const uint16* clut;
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GPUScanlineEnvironment
|
||||
__aligned32 struct GPUScanlineGlobalData
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const uint16* clut;
|
||||
GSVector4i twin; // TWW, TWH, TWX, TWY
|
||||
};
|
||||
|
||||
// GSVector4i md; // similar to gs fba
|
||||
__aligned32 struct GPUScanlineLocalData
|
||||
{
|
||||
const GPUScanlineGlobalData* gd;
|
||||
|
||||
struct {GSVector4i u, v;} twin[3];
|
||||
struct {GSVector4i s, t, r, g, b, _pad[3];} d;
|
||||
struct {GSVector4i st, c;} d8;
|
||||
|
||||
struct {GSVector4i s, t, r, b, g, uf, vf, dither, fd, test;} temp;
|
||||
};
|
||||
|
|
|
@ -29,22 +29,24 @@ using namespace Xbyak;
|
|||
|
||||
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_env(*(GPUScanlineEnvironment*)param)
|
||||
, m_local(*(GPUScanlineLocalData*)param)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
#endif
|
||||
|
||||
m_sel.key = key;
|
||||
|
||||
Generate();
|
||||
}
|
||||
|
||||
void GPUSetupPrimCodeGenerator::Generate()
|
||||
{
|
||||
if(m_env.sel.tme && !m_env.sel.twin)
|
||||
if(m_sel.tme && !m_sel.twin)
|
||||
{
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
|
||||
if(m_env.sel.sprite)
|
||||
if(m_sel.sprite)
|
||||
{
|
||||
// t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
|
||||
|
||||
|
@ -59,30 +61,30 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
packssdw(xmm1, xmm1);
|
||||
punpcklwd(xmm1, xmm1);
|
||||
|
||||
// m_env.twin[2].u = t.xxxx();
|
||||
// m_env.twin[2].v = t.yyyy();
|
||||
// m_local.twin[2].u = t.xxxx();
|
||||
// m_local.twin[2].v = t.yyyy();
|
||||
|
||||
pshufd(xmm2, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm3, xmm1, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
movdqa(ptr[&m_env.twin[2].u], xmm2);
|
||||
movdqa(ptr[&m_env.twin[2].v], xmm3);
|
||||
movdqa(ptr[&m_local.twin[2].u], xmm2);
|
||||
movdqa(ptr[&m_local.twin[2].v], xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: not really needed
|
||||
|
||||
// m_env.twin[2].u = GSVector4i::x00ff();
|
||||
// m_env.twin[2].v = GSVector4i::x00ff();
|
||||
// m_local.twin[2].u = GSVector4i::x00ff();
|
||||
// m_local.twin[2].v = GSVector4i::x00ff();
|
||||
|
||||
psrlw(xmm0, 8);
|
||||
|
||||
movdqa(ptr[&m_env.twin[2].u], xmm0);
|
||||
movdqa(ptr[&m_env.twin[2].v], xmm0);
|
||||
movdqa(ptr[&m_local.twin[2].u], xmm0);
|
||||
movdqa(ptr[&m_local.twin[2].v], xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_env.sel.tme || m_env.sel.iip && m_env.sel.tfx != 3)
|
||||
if(m_sel.tme || m_sel.iip && m_sel.tfx != 3)
|
||||
{
|
||||
for(int i = 0; i < 3; i++)
|
||||
{
|
||||
|
@ -105,21 +107,21 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
cvttps2dq(xmm2, xmm2);
|
||||
packssdw(xmm1, xmm2);
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_sel.tme)
|
||||
{
|
||||
// m_env.d8.st = dtc8.upl16(dtc8);
|
||||
// m_local.d8.st = dtc8.upl16(dtc8);
|
||||
|
||||
movdqa(xmm0, xmm1);
|
||||
punpcklwd(xmm0, xmm0);
|
||||
movdqa(ptr[&m_env.d8.st], xmm0);
|
||||
movdqa(ptr[&m_local.d8.st], xmm0);
|
||||
}
|
||||
|
||||
if(m_env.sel.iip && m_env.sel.tfx != 3)
|
||||
if(m_sel.iip && m_sel.tfx != 3)
|
||||
{
|
||||
// m_env.d8.c = dtc8.uph16(dtc8);
|
||||
// m_local.d8.c = dtc8.uph16(dtc8);
|
||||
|
||||
punpckhwd(xmm1, xmm1);
|
||||
movdqa(ptr[&m_env.d8.c], xmm1);
|
||||
movdqa(ptr[&m_local.d8.c], xmm1);
|
||||
}
|
||||
|
||||
// xmm3 = dt
|
||||
|
@ -128,7 +130,7 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
// xmm7 = ps4567
|
||||
// xmm0, xmm1, xmm2, xmm5 = free
|
||||
|
||||
if(m_env.sel.tme)
|
||||
if(m_sel.tme)
|
||||
{
|
||||
// GSVector4 dtx = dt.xxxx();
|
||||
// GSVector4 dty = dt.yyyy();
|
||||
|
@ -137,7 +139,7 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
shufps(xmm3, xmm3, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
// m_env.d.s = GSVector4i(dtx * ps0123).ps32(GSVector4i(dtx * ps4567));
|
||||
// m_local.d.s = GSVector4i(dtx * ps0123).ps32(GSVector4i(dtx * ps4567));
|
||||
|
||||
movaps(xmm1, xmm3);
|
||||
mulps(xmm3, xmm6);
|
||||
|
@ -145,9 +147,9 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
cvttps2dq(xmm3, xmm3);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
packssdw(xmm3, xmm1);
|
||||
movdqa(ptr[&m_env.d.s], xmm3);
|
||||
movdqa(ptr[&m_local.d.s], xmm3);
|
||||
|
||||
// m_env.d.t = GSVector4i(dty * ps0123).ps32(GSVector4i(dty * ps4567));
|
||||
// m_local.d.t = GSVector4i(dty * ps0123).ps32(GSVector4i(dty * ps4567));
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm0, xmm6);
|
||||
|
@ -155,7 +157,7 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
cvttps2dq(xmm0, xmm0);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
packssdw(xmm0, xmm1);
|
||||
movdqa(ptr[&m_env.d.t], xmm0);
|
||||
movdqa(ptr[&m_local.d.t], xmm0);
|
||||
}
|
||||
|
||||
// xmm4 = dc
|
||||
|
@ -163,7 +165,7 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
// xmm7 = ps4567
|
||||
// xmm0, xmm1, zmm2, xmm3, xmm5 = free
|
||||
|
||||
if(m_env.sel.iip && m_env.sel.tfx != 3)
|
||||
if(m_sel.iip && m_sel.tfx != 3)
|
||||
{
|
||||
// GSVector4 dcx = dc.xxxx();
|
||||
// GSVector4 dcy = dc.yyyy();
|
||||
|
@ -175,7 +177,7 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_env.d.r = GSVector4i(dcx * ps0123).ps32(GSVector4i(dcx * ps4567));
|
||||
// m_local.d.r = GSVector4i(dcx * ps0123).ps32(GSVector4i(dcx * ps4567));
|
||||
|
||||
movaps(xmm2, xmm4);
|
||||
mulps(xmm4, xmm6);
|
||||
|
@ -183,9 +185,9 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
cvttps2dq(xmm4, xmm4);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
packssdw(xmm4, xmm2);
|
||||
movdqa(ptr[&m_env.d.r], xmm4);
|
||||
movdqa(ptr[&m_local.d.r], xmm4);
|
||||
|
||||
// m_env.d.g = GSVector4i(dcy * ps0123).ps32(GSVector4i(dcy * ps4567));
|
||||
// m_local.d.g = GSVector4i(dcy * ps0123).ps32(GSVector4i(dcy * ps4567));
|
||||
|
||||
movaps(xmm2, xmm0);
|
||||
mulps(xmm0, xmm6);
|
||||
|
@ -193,9 +195,9 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
cvttps2dq(xmm0, xmm0);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
packssdw(xmm0, xmm2);
|
||||
movdqa(ptr[&m_env.d.g], xmm0);
|
||||
movdqa(ptr[&m_local.d.g], xmm0);
|
||||
|
||||
// m_env.d.b = GSVector4i(dcz * ps0123).ps32(GSVector4i(dcz * ps4567));
|
||||
// m_local.d.b = GSVector4i(dcz * ps0123).ps32(GSVector4i(dcz * ps4567));
|
||||
|
||||
movaps(xmm2, xmm1);
|
||||
mulps(xmm1, xmm6);
|
||||
|
@ -203,7 +205,7 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
cvttps2dq(xmm1, xmm1);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
packssdw(xmm1, xmm2);
|
||||
movdqa(ptr[&m_env.d.b], xmm1);
|
||||
movdqa(ptr[&m_local.d.b], xmm1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,8 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
|
||||
static const GSVector4 m_shift[3];
|
||||
|
||||
GPUScanlineEnvironment& m_env;
|
||||
GPUScanlineSelector m_sel;
|
||||
GPUScanlineLocalData& m_local;
|
||||
|
||||
void Generate();
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "GSUtil.h"
|
||||
#include "GSPerfMon.h"
|
||||
|
||||
class GPUState : public GSAlignedClass<16>
|
||||
class GPUState : public GSAlignedClass<32>
|
||||
{
|
||||
typedef void (GPUState::*GPUStatusCommandHandler)(GPUReg* r);
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
__declspec(align(16)) struct GPUVertex
|
||||
__aligned32 struct GPUVertex
|
||||
{
|
||||
union
|
||||
{
|
||||
|
|
|
@ -149,7 +149,7 @@ EXPORT_C GSclose()
|
|||
s_gs->m_wnd.Detach();
|
||||
}
|
||||
|
||||
static INT32 _GSopen(void* dsp, char* title, int renderer)
|
||||
static INT32 _GSopen(void* dsp, char* title, int renderer, int threads = -1)
|
||||
{
|
||||
GSDevice* dev = NULL;
|
||||
|
||||
|
@ -158,6 +158,11 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
renderer = theApp.GetConfig("renderer", 0);
|
||||
}
|
||||
|
||||
if(threads == -1)
|
||||
{
|
||||
threads = theApp.GetConfig("swthreads", 1);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (s_renderer != renderer)
|
||||
|
@ -196,20 +201,21 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
s_gs = new GSRendererNull();
|
||||
break;
|
||||
case 1: case 4: case 7: case 10: case 12:
|
||||
s_gs = new GSRendererSW();
|
||||
s_gs = new GSRendererSW(threads);
|
||||
break;
|
||||
}
|
||||
|
||||
s_renderer = renderer;
|
||||
}
|
||||
}
|
||||
catch( std::exception& ex )
|
||||
catch(std::exception& ex)
|
||||
{
|
||||
// Allowing std exceptions to escape the scope of the plugin callstack could
|
||||
// be problematic, because of differing typeids between DLL and EXE compilations.
|
||||
// ('new' could throw std::alloc)
|
||||
|
||||
printf( "GSdx error: Exception caught in GSopen: %s", ex.what() );
|
||||
printf("GSdx error: Exception caught in GSopen: %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -218,7 +224,7 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
s_gs->SetVsync(s_vsync);
|
||||
s_gs->SetFrameLimit(s_framelimit);
|
||||
|
||||
if( *(HWND*)dsp == NULL )
|
||||
if(*(HWND*)dsp == NULL)
|
||||
{
|
||||
// old-style API expects us to create and manage our own window:
|
||||
|
||||
|
@ -232,21 +238,23 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
}
|
||||
|
||||
s_gs->m_wnd.Show();
|
||||
|
||||
*(HWND*)dsp = (HWND)s_gs->m_wnd.GetHandle();
|
||||
}
|
||||
else
|
||||
{
|
||||
s_gs->SetMultithreaded( true );
|
||||
s_gs->m_wnd.Attach( *(HWND*)dsp, false );
|
||||
s_gs->SetMultithreaded(true);
|
||||
s_gs->m_wnd.Attach(*(HWND*)dsp, false);
|
||||
}
|
||||
|
||||
if( !s_gs->CreateDevice(dev) )
|
||||
if(!s_gs->CreateDevice(dev))
|
||||
{
|
||||
// This probably means the user has DX11 configured with a video card that is only DX9
|
||||
// compliant. Cound mean drivr issues of some sort also, but to be sure, that's the most
|
||||
// common cause of device creation errors. :) --air
|
||||
|
||||
GSclose();
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -255,17 +263,18 @@ static INT32 _GSopen(void* dsp, char* title, int renderer)
|
|||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_C_(INT32) GSopen2( void* dsp, INT32 flags )
|
||||
EXPORT_C_(INT32) GSopen2(void* dsp, INT32 flags)
|
||||
{
|
||||
int renderer = theApp.GetConfig("renderer", 0);
|
||||
if( flags & 4 )
|
||||
|
||||
if(flags & 4)
|
||||
{
|
||||
if (isdx11avail) renderer = 4; //dx11 sw
|
||||
else renderer = 1; //dx9 sw
|
||||
renderer = isdx11avail ? 4 : 1; // dx11 / dx9 sw
|
||||
}
|
||||
|
||||
INT32 retval = _GSopen( dsp, NULL, renderer );
|
||||
s_gs->SetAspectRatio(0); // PCSX2 manages the aspect ratios
|
||||
INT32 retval = _GSopen(dsp, NULL, renderer);
|
||||
|
||||
s_gs->SetAspectRatio(0); // PCSX2 manages the aspect ratios
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
@ -275,18 +284,21 @@ EXPORT_C_(INT32) GSopen(void* dsp, char* title, int mt)
|
|||
int renderer;
|
||||
|
||||
// Legacy GUI expects to acquire vsync from the configuration files.
|
||||
|
||||
s_vsync = !!theApp.GetConfig("vsync", 0);
|
||||
|
||||
if(mt == 2)
|
||||
{
|
||||
// pcsx2 sent a switch renderer request
|
||||
if (isdx11avail) renderer = 4; //dx11 sw
|
||||
else renderer = 1; //dx9 sw
|
||||
|
||||
renderer = isdx11avail ? 4 : 1; // dx11 / dx9 sw
|
||||
|
||||
mt = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// normal init
|
||||
|
||||
renderer = theApp.GetConfig("renderer", 0);
|
||||
}
|
||||
|
||||
|
@ -294,9 +306,9 @@ EXPORT_C_(INT32) GSopen(void* dsp, char* title, int mt)
|
|||
|
||||
int retval = _GSopen(dsp, title, renderer);
|
||||
|
||||
if( retval == 0 && s_gs )
|
||||
if(retval == 0 && s_gs)
|
||||
{
|
||||
s_gs->SetMultithreaded( !!mt );
|
||||
s_gs->SetMultithreaded(!!mt);
|
||||
}
|
||||
|
||||
return retval;
|
||||
|
@ -370,10 +382,14 @@ EXPORT_C GSvsync(int field)
|
|||
|
||||
EXPORT_C_(uint32) GSmakeSnapshot(char* path)
|
||||
{
|
||||
string str = string(path);
|
||||
if (str[str.length() - 1] != '\\')
|
||||
str = str + "\\";
|
||||
return s_gs->MakeSnapshot(str + "gsdx");
|
||||
string s(path);
|
||||
|
||||
if(s.back() != '\\')
|
||||
{
|
||||
s = s + "\\";
|
||||
}
|
||||
|
||||
return s_gs->MakeSnapshot(s + "gsdx");
|
||||
}
|
||||
|
||||
EXPORT_C GSkeyEvent(GSKeyEventData* e)
|
||||
|
@ -401,13 +417,14 @@ EXPORT_C_(int) GSfreeze(int mode, GSFreezeData* data)
|
|||
|
||||
EXPORT_C GSconfigure()
|
||||
{
|
||||
if( !GSUtil::CheckSSE() ) return;
|
||||
if(!GSUtil::CheckSSE()) return;
|
||||
|
||||
if( GSSettingsDlg( s_IsGsOpen2 ).DoModal() == IDOK )
|
||||
if(GSSettingsDlg(s_IsGsOpen2).DoModal() == IDOK)
|
||||
{
|
||||
if( s_gs != NULL && s_gs->m_wnd.IsManaged() )
|
||||
if(s_gs != NULL && s_gs->m_wnd.IsManaged())
|
||||
{
|
||||
// Legacy apps like gsdxgui expect this...
|
||||
|
||||
GSshutdown();
|
||||
}
|
||||
}
|
||||
|
@ -427,7 +444,9 @@ EXPORT_C_(INT32) GStest()
|
|||
if(!GSUtil::CheckDirectX())
|
||||
{
|
||||
if(SUCCEEDED(s_hr))
|
||||
{
|
||||
::CoUninitialize();
|
||||
}
|
||||
|
||||
s_hr = E_FAIL;
|
||||
|
||||
|
@ -435,7 +454,9 @@ EXPORT_C_(INT32) GStest()
|
|||
}
|
||||
|
||||
if(SUCCEEDED(s_hr))
|
||||
{
|
||||
::CoUninitialize();
|
||||
}
|
||||
|
||||
s_hr = E_FAIL;
|
||||
|
||||
|
@ -451,7 +472,8 @@ EXPORT_C GSabout()
|
|||
EXPORT_C GSirqCallback(void (*irq)())
|
||||
{
|
||||
s_irq = irq;
|
||||
if( s_gs )
|
||||
|
||||
if(s_gs)
|
||||
{
|
||||
s_gs->SetIrqCallback(s_irq);
|
||||
}
|
||||
|
@ -462,9 +484,13 @@ EXPORT_C_(int) GSsetupRecording(int start, void* data)
|
|||
if(!s_gs) return 0;
|
||||
|
||||
if(start & 1)
|
||||
{
|
||||
s_gs->BeginCapture();
|
||||
}
|
||||
else
|
||||
{
|
||||
s_gs->EndCapture();
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -486,13 +512,15 @@ EXPORT_C GSgetLastTag(uint32* tag)
|
|||
|
||||
EXPORT_C GSgetTitleInfo2(char* dest, size_t length)
|
||||
{
|
||||
if (!s_gs->m_GStitleInfoBuffer[0])
|
||||
if(!s_gs->m_GStitleInfoBuffer[0])
|
||||
{
|
||||
strcpy(dest, "GSdx");
|
||||
}
|
||||
else
|
||||
{
|
||||
EnterCriticalSection(&s_gs->m_pGSsetTitle_Crit);
|
||||
snprintf(dest, length-1, "GSdx | %s", s_gs->m_GStitleInfoBuffer);
|
||||
dest[length-1] = 0; // just in case!
|
||||
snprintf(dest, length - 1, "GSdx | %s", s_gs->m_GStitleInfoBuffer);
|
||||
dest[length - 1] = 0; // just in case!
|
||||
LeaveCriticalSection(&s_gs->m_pGSsetTitle_Crit);
|
||||
}
|
||||
}
|
||||
|
@ -505,22 +533,31 @@ EXPORT_C GSsetFrameSkip(int frameskip)
|
|||
EXPORT_C GSsetVsync(int enabled)
|
||||
{
|
||||
s_vsync = !!enabled;
|
||||
if( s_gs )
|
||||
|
||||
if(s_gs)
|
||||
{
|
||||
s_gs->SetVsync(s_vsync);
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_C GSsetExclusive(int enabled)
|
||||
{
|
||||
s_exclusive = !!enabled;
|
||||
if( s_gs )
|
||||
|
||||
if(s_gs)
|
||||
{
|
||||
s_gs->SetVsync(s_vsync);
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_C GSsetFrameLimit(int limit)
|
||||
{
|
||||
s_framelimit = !!limit;
|
||||
if( s_gs )
|
||||
|
||||
if(s_gs)
|
||||
{
|
||||
s_gs->SetFrameLimit(s_framelimit);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
@ -595,6 +632,7 @@ public:
|
|||
// lpszCmdLine:
|
||||
// First parameter is the renderer.
|
||||
// Second parameter is the gs file to load and run.
|
||||
|
||||
EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
||||
{
|
||||
int renderer = -1;
|
||||
|
|
|
@ -23,13 +23,13 @@
|
|||
#include "GSDrawScanline.h"
|
||||
#include "GSTextureCacheSW.h"
|
||||
|
||||
GSDrawScanline::GSDrawScanline(GSState* state, int id)
|
||||
: m_state(state)
|
||||
, m_id(id)
|
||||
, m_sp_map("GSSetupPrim", &m_env)
|
||||
, m_ds_map("GSDrawScanline", &m_env)
|
||||
GSDrawScanline::GSDrawScanline(GSScanlineGlobalData* gd)
|
||||
: m_sp_map("GSSetupPrim", &m_local)
|
||||
, m_ds_map("GSDrawScanline", &m_local)
|
||||
{
|
||||
memset(&m_env, 0, sizeof(m_env));
|
||||
memset(&m_local, 0, sizeof(m_local));
|
||||
|
||||
m_local.gd = gd;
|
||||
}
|
||||
|
||||
GSDrawScanline::~GSDrawScanline()
|
||||
|
@ -38,137 +38,13 @@ GSDrawScanline::~GSDrawScanline()
|
|||
|
||||
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||
{
|
||||
GSDrawingEnvironment& env = m_state->m_env;
|
||||
GSDrawingContext* context = m_state->m_context;
|
||||
m_ds = m_ds_map[m_local.gd->sel];
|
||||
|
||||
const GSScanlineParam* p = (const GSScanlineParam*)data->param;
|
||||
|
||||
m_sel = p->sel;
|
||||
|
||||
m_env.vm = p->vm;
|
||||
m_env.fbr = p->fbo->pixel.row;
|
||||
m_env.zbr = p->zbo->pixel.row;
|
||||
m_env.fbc = p->fbo->pixel.col[0];
|
||||
m_env.zbc = p->zbo->pixel.col[0];
|
||||
m_env.fzbr = p->fzbo->row;
|
||||
m_env.fzbc = p->fzbo->col;
|
||||
m_env.fm = GSVector4i(p->fm);
|
||||
m_env.zm = GSVector4i(p->zm);
|
||||
m_env.aref = GSVector4i((int)context->TEST.AREF);
|
||||
m_env.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh();
|
||||
m_env.frb = GSVector4i((int)env.FOGCOL.u32[0] & 0x00ff00ff);
|
||||
m_env.fga = GSVector4i((int)(env.FOGCOL.u32[0] >> 8) & 0x00ff00ff);
|
||||
m_env.dimx = env.dimx;
|
||||
|
||||
if(m_sel.fpsm == 1)
|
||||
{
|
||||
m_env.fm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if(m_sel.fpsm == 2)
|
||||
{
|
||||
GSVector4i rb = m_env.fm & 0x00f800f8;
|
||||
GSVector4i ga = m_env.fm & 0x8000f800;
|
||||
|
||||
m_env.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if(m_sel.zpsm == 1)
|
||||
{
|
||||
m_env.zm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if(m_sel.zpsm == 2)
|
||||
{
|
||||
m_env.zm |= GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if(m_sel.atst == ATST_LESS)
|
||||
{
|
||||
m_sel.atst = ATST_LEQUAL;
|
||||
|
||||
m_env.aref -= GSVector4i::x00000001();
|
||||
}
|
||||
else if(m_sel.atst == ATST_GREATER)
|
||||
{
|
||||
m_sel.atst = ATST_GEQUAL;
|
||||
|
||||
m_env.aref += GSVector4i::x00000001();
|
||||
}
|
||||
|
||||
if(m_sel.tfx != TFX_NONE)
|
||||
{
|
||||
m_env.tex = p->tex;
|
||||
m_env.clut = p->clut;
|
||||
|
||||
unsigned short tw = (unsigned short)(1 << context->TEX0.TW);
|
||||
unsigned short th = (unsigned short)(1 << context->TEX0.TH);
|
||||
|
||||
switch(context->CLAMP.WMS)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
m_env.t.min.u16[0] = tw - 1;
|
||||
m_env.t.max.u16[0] = 0;
|
||||
m_env.t.mask.u32[0] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
m_env.t.min.u16[0] = 0;
|
||||
m_env.t.max.u16[0] = tw - 1;
|
||||
m_env.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
m_env.t.min.u16[0] = std::min<int>(context->CLAMP.MINU, tw - 1);
|
||||
m_env.t.max.u16[0] = std::min<int>(context->CLAMP.MAXU, tw - 1);
|
||||
m_env.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
m_env.t.min.u16[0] = context->CLAMP.MINU;
|
||||
m_env.t.max.u16[0] = context->CLAMP.MAXU;
|
||||
m_env.t.mask.u32[0] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(context->CLAMP.WMT)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
m_env.t.min.u16[4] = th - 1;
|
||||
m_env.t.max.u16[4] = 0;
|
||||
m_env.t.mask.u32[2] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
m_env.t.min.u16[4] = 0;
|
||||
m_env.t.max.u16[4] = th - 1;
|
||||
m_env.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
m_env.t.min.u16[4] = std::min<int>(context->CLAMP.MINV, th - 1);
|
||||
m_env.t.max.u16[4] = std::min<int>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
m_env.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
m_env.t.min.u16[4] = context->CLAMP.MINV;
|
||||
m_env.t.max.u16[4] = context->CLAMP.MAXV;
|
||||
m_env.t.mask.u32[2] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
m_env.t.min = m_env.t.min.xxxxlh();
|
||||
m_env.t.max = m_env.t.max.xxxxlh();
|
||||
m_env.t.mask = m_env.t.mask.xxzz();
|
||||
m_env.t.invmask = ~m_env.t.mask;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
m_ds = m_ds_map[m_sel];
|
||||
|
||||
if(m_sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
|
||||
if(m_local.gd->sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
sel.key = m_sel.key;
|
||||
sel.key = m_local.gd->sel.key;
|
||||
sel.zwrite = 0;
|
||||
sel.edge = 1;
|
||||
|
||||
|
@ -179,7 +55,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
m_de = NULL;
|
||||
}
|
||||
|
||||
if(m_sel.IsSolidRect())
|
||||
if(m_local.gd->sel.IsSolidRect())
|
||||
{
|
||||
m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect;
|
||||
}
|
||||
|
@ -194,22 +70,22 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
|
||||
sel.key = 0;
|
||||
|
||||
sel.iip = m_sel.iip;
|
||||
sel.tfx = m_sel.tfx;
|
||||
sel.tcc = m_sel.tcc;
|
||||
sel.fst = m_sel.fst;
|
||||
sel.fge = m_sel.fge;
|
||||
sel.sprite = m_sel.sprite;
|
||||
sel.fb = m_sel.fb;
|
||||
sel.zb = m_sel.zb;
|
||||
sel.zoverflow = m_sel.zoverflow;
|
||||
sel.iip = m_local.gd->sel.iip;
|
||||
sel.tfx = m_local.gd->sel.tfx;
|
||||
sel.tcc = m_local.gd->sel.tcc;
|
||||
sel.fst = m_local.gd->sel.fst;
|
||||
sel.fge = m_local.gd->sel.fge;
|
||||
sel.sprite = m_local.gd->sel.sprite;
|
||||
sel.fb = m_local.gd->sel.fb;
|
||||
sel.zb = m_local.gd->sel.zb;
|
||||
sel.zoverflow = m_local.gd->sel.zoverflow;
|
||||
|
||||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
||||
void GSDrawScanline::EndDraw(const GSRasterizerStats& stats)
|
||||
void GSDrawScanline::EndDraw(const GSRasterizerStats& stats, uint64 frame)
|
||||
{
|
||||
m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
m_ds_map.UpdateStats(stats, frame);
|
||||
}
|
||||
|
||||
void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||
|
@ -221,56 +97,62 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
|||
|
||||
uint32 m;
|
||||
|
||||
m = m_env.zm.u32[0];
|
||||
m = m_local.gd->zm.u32[0];
|
||||
|
||||
if(m != 0xffffffff)
|
||||
{
|
||||
const int* zbr = m_local.gd->zbr;
|
||||
const int* zbc = m_local.gd->zbc;
|
||||
|
||||
uint32 z = (uint32)v.p.z;
|
||||
|
||||
if(m_sel.zpsm != 2)
|
||||
if(m_local.gd->sel.zpsm != 2)
|
||||
{
|
||||
if(m == 0)
|
||||
{
|
||||
DrawRectT<uint32, false>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint32, false>(zbr, zbc, r, z, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawRectT<uint32, true>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint32, true>(zbr, zbc, r, z, m);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m == 0)
|
||||
{
|
||||
DrawRectT<uint16, false>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint16, false>(zbr, zbc, r, z, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawRectT<uint16, true>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint16, true>(zbr, zbc, r, z, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m = m_env.fm.u32[0];
|
||||
m = m_local.gd->fm.u32[0];
|
||||
|
||||
if(m != 0xffffffff)
|
||||
{
|
||||
const int* fbr = m_local.gd->fbr;
|
||||
const int* fbc = m_local.gd->fbc;
|
||||
|
||||
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
|
||||
|
||||
if(m_state->m_context->FBA.FBA)
|
||||
if(m_local.gd->sel.fba)
|
||||
{
|
||||
c |= 0x80000000;
|
||||
}
|
||||
|
||||
if(m_sel.fpsm != 2)
|
||||
if(m_local.gd->sel.fpsm != 2)
|
||||
{
|
||||
if(m == 0)
|
||||
{
|
||||
DrawRectT<uint32, false>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint32, false>(fbr, fbc, r, c, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawRectT<uint32, true>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint32, true>(fbr, fbc, r, c, m);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -279,11 +161,11 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
|||
|
||||
if(m == 0)
|
||||
{
|
||||
DrawRectT<uint16, false>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint16, false>(fbr, fbc, r, c, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawRectT<uint16, true>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint16, true>(fbr, fbc, r, c, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -331,9 +213,11 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
|
|||
{
|
||||
if(r.x >= r.z) return;
|
||||
|
||||
T* vm = (T*)m_local.gd->vm;
|
||||
|
||||
for(int y = r.y; y < r.w; y++)
|
||||
{
|
||||
T* RESTRICT d = &((T*)m_env.vm)[row[y]];
|
||||
T* RESTRICT d = &vm[row[y]];
|
||||
|
||||
for(int x = r.x; x < r.z; x++)
|
||||
{
|
||||
|
@ -347,9 +231,11 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
|
|||
{
|
||||
if(r.x >= r.z) return;
|
||||
|
||||
T* vm = (T*)m_local.gd->vm;
|
||||
|
||||
for(int y = r.y; y < r.w; y += 8)
|
||||
{
|
||||
T* RESTRICT d = &((T*)m_env.vm)[row[y]];
|
||||
T* RESTRICT d = &vm[row[y]];
|
||||
|
||||
for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
|
||||
{
|
||||
|
|
|
@ -29,8 +29,7 @@
|
|||
|
||||
class GSDrawScanline : public IDrawScanline
|
||||
{
|
||||
GSScanlineEnvironment m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData m_local;
|
||||
|
||||
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
|
||||
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
|
||||
|
@ -46,17 +45,13 @@ class GSDrawScanline : public IDrawScanline
|
|||
template<class T, bool masked>
|
||||
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
|
||||
|
||||
protected:
|
||||
GSState* m_state;
|
||||
int m_id;
|
||||
|
||||
public:
|
||||
GSDrawScanline(GSState* state, int id);
|
||||
GSDrawScanline(GSScanlineGlobalData* gd);
|
||||
virtual ~GSDrawScanline();
|
||||
|
||||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(const GSRasterizerStats& stats);
|
||||
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
||||
void PrintStats() {m_ds_map.PrintStats();}
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -32,12 +32,12 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
|
||||
static const GSVector4i m_test[8];
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
|
||||
void Generate();
|
||||
|
||||
void Init(int params);
|
||||
void Init();
|
||||
void Step();
|
||||
void TestZ(const Xmm& temp1, const Xmm& temp2);
|
||||
void SampleTexture();
|
||||
|
@ -53,7 +53,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
void WriteMask();
|
||||
void WriteZBuf();
|
||||
void AlphaBlend();
|
||||
void WriteFrame(int params);
|
||||
void WriteFrame();
|
||||
|
||||
void ReadPixel(const Xmm& dst, const Reg32& addr);
|
||||
void WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz);
|
||||
|
|
|
@ -110,7 +110,7 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
|
|||
|
||||
m_stats.ticks = __rdtsc() - start;
|
||||
|
||||
m_ds->EndDraw(m_stats);
|
||||
m_ds->EndDraw(m_stats, data->frame);
|
||||
}
|
||||
|
||||
void GSRasterizer::GetStats(GSRasterizerStats& stats)
|
||||
|
@ -908,23 +908,19 @@ void GSRasterizerMT::ThreadProc()
|
|||
//
|
||||
|
||||
GSRasterizerList::GSRasterizerList()
|
||||
: m_sync(0)
|
||||
, m_syncstart(0)
|
||||
, m_param(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
GSRasterizerList::~GSRasterizerList()
|
||||
{
|
||||
FreeRasterizers();
|
||||
}
|
||||
|
||||
void GSRasterizerList::FreeRasterizers()
|
||||
{
|
||||
for(size_t i = 0; i < size(); i++) delete (*this)[i];
|
||||
|
||||
clear();
|
||||
|
||||
for(size_t i = 0; i < m_ready.size(); i++) CloseHandle(m_ready[i]);
|
||||
|
||||
m_ready.clear();
|
||||
if(m_param) _aligned_free(m_param);
|
||||
}
|
||||
|
||||
void GSRasterizerList::Sync()
|
||||
|
@ -956,6 +952,8 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
|
|||
{
|
||||
m_stats.Reset();
|
||||
|
||||
memcpy(m_param, data->param, m_param_size);
|
||||
|
||||
m_start = __rdtsc();
|
||||
|
||||
m_sync = m_syncstart;
|
||||
|
|
|
@ -34,6 +34,7 @@ public:
|
|||
GS_PRIM_CLASS primclass;
|
||||
const GSVertexSW* vertices;
|
||||
int count;
|
||||
uint64 frame;
|
||||
const void* param;
|
||||
};
|
||||
|
||||
|
@ -55,7 +56,7 @@ public:
|
|||
virtual ~IDrawScanline() {}
|
||||
|
||||
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
||||
virtual void EndDraw(const GSRasterizerStats& stats) = 0;
|
||||
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
|
||||
virtual void PrintStats() = 0;
|
||||
|
||||
__forceinline void SetupPrim(const GSVertexSW* v, const GSVertexSW& dscan) {m_sp(v, dscan);}
|
||||
|
@ -141,28 +142,29 @@ protected:
|
|||
long m_syncstart;
|
||||
GSRasterizerStats m_stats;
|
||||
int64 m_start;
|
||||
|
||||
void FreeRasterizers();
|
||||
void* m_param;
|
||||
size_t m_param_size;
|
||||
|
||||
public:
|
||||
GSRasterizerList();
|
||||
virtual ~GSRasterizerList();
|
||||
|
||||
template<class DS, class T> void Create(T* parent, int threads)
|
||||
template<class DS, class PARAM> void Create(int threads)
|
||||
{
|
||||
FreeRasterizers();
|
||||
|
||||
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
|
||||
|
||||
m_param = _aligned_malloc(sizeof(PARAM), 32);
|
||||
m_param_size = sizeof(PARAM);
|
||||
|
||||
m_syncstart = 0;
|
||||
|
||||
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
|
||||
push_back(new GSRasterizer(new DS((PARAM*)m_param), 0, threads));
|
||||
|
||||
for(int i = 1; i < threads; i++)
|
||||
{
|
||||
HANDLE ready = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
HANDLE ready = CreateEvent(NULL, FALSE, TRUE, NULL);
|
||||
|
||||
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, ready, m_sync));
|
||||
push_back(new GSRasterizerMT(new DS((PARAM*)m_param), i, threads, ready, m_sync));
|
||||
|
||||
m_ready.push_back(ready);
|
||||
|
||||
|
|
|
@ -24,14 +24,14 @@
|
|||
|
||||
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
|
||||
GSRendererSW::GSRendererSW()
|
||||
GSRendererSW::GSRendererSW(int threads)
|
||||
: GSRendererT()
|
||||
{
|
||||
m_tc = new GSTextureCacheSW(this);
|
||||
|
||||
memset(m_texture, 0, sizeof(m_texture));
|
||||
|
||||
m_rl.Create<GSDrawScanline>(this, theApp.GetConfig("swthreads", 1));
|
||||
m_rl.Create<GSDrawScanline, GSScanlineGlobalData>(threads);
|
||||
|
||||
InitVertexKick<GSRendererSW>();
|
||||
}
|
||||
|
@ -128,11 +128,11 @@ void GSRendererSW::Draw()
|
|||
m_dump.Object(m_vertices, m_count, m_vt.m_primclass);
|
||||
}
|
||||
|
||||
GSScanlineParam p;
|
||||
GSScanlineGlobalData gd;
|
||||
|
||||
GetScanlineParam(p, m_vt.m_primclass);
|
||||
GetScanlineGlobalData(gd);
|
||||
|
||||
if((p.fm & p.zm) == 0xffffffff)
|
||||
if(!gd.sel.fwrite && !gd.sel.zwrite)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -176,18 +176,19 @@ void GSRendererSW::Draw()
|
|||
data.primclass = m_vt.m_primclass;
|
||||
data.vertices = m_vertices;
|
||||
data.count = m_count;
|
||||
data.param = &p;
|
||||
data.frame = m_perfmon.GetFrame();
|
||||
data.param = &gd;
|
||||
|
||||
m_rl.Draw(&data);
|
||||
|
||||
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
|
||||
|
||||
if(p.fm != 0xffffffff)
|
||||
if(gd.sel.fwrite)
|
||||
{
|
||||
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
|
||||
}
|
||||
|
||||
if(p.zm != 0xffffffff)
|
||||
if(gd.sel.zwrite)
|
||||
{
|
||||
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
|
||||
}
|
||||
|
@ -230,7 +231,7 @@ void GSRendererSW::Draw()
|
|||
if(0)//stats.ticks > 5000000)
|
||||
{
|
||||
printf("* [%I64d | %012I64x] ticks %I64d prims %d (%d) pixels %d (%d)\n",
|
||||
m_perfmon.GetFrame(), p.sel.key,
|
||||
m_perfmon.GetFrame(), gd.sel.key,
|
||||
stats.ticks,
|
||||
stats.prims, stats.prims > 0 ? (int)(stats.ticks / stats.prims) : -1,
|
||||
stats.pixels, stats.pixels > 0 ? (int)(stats.ticks / stats.pixels) : -1);
|
||||
|
@ -242,33 +243,38 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
|
||||
}
|
||||
|
||||
void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
||||
void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||
{
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
|
||||
|
||||
p.vm = m_mem.m_vm8;
|
||||
gd.vm = m_mem.m_vm8;
|
||||
gd.dimx = env.dimx;
|
||||
|
||||
p.fbo = context->offset.fb;
|
||||
p.zbo = context->offset.zb;
|
||||
p.fzbo = context->offset.fzb;
|
||||
gd.fbr = context->offset.fb->pixel.row;
|
||||
gd.zbr = context->offset.zb->pixel.row;
|
||||
gd.fbc = context->offset.fb->pixel.col[0];
|
||||
gd.zbc = context->offset.zb->pixel.col[0];
|
||||
gd.fzbr = context->offset.fzb->row;
|
||||
gd.fzbc = context->offset.fzb->col;
|
||||
|
||||
p.sel.key = 0;
|
||||
gd.sel.key = 0;
|
||||
|
||||
p.sel.fpsm = 3;
|
||||
p.sel.zpsm = 3;
|
||||
p.sel.atst = ATST_ALWAYS;
|
||||
p.sel.tfx = TFX_NONE;
|
||||
p.sel.ababcd = 255;
|
||||
p.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
|
||||
gd.sel.fpsm = 3;
|
||||
gd.sel.zpsm = 3;
|
||||
gd.sel.atst = ATST_ALWAYS;
|
||||
gd.sel.tfx = TFX_NONE;
|
||||
gd.sel.ababcd = 255;
|
||||
gd.sel.sprite = primclass == GS_SPRITE_CLASS ? 1 : 0;
|
||||
|
||||
p.fm = context->FRAME.FBMSK;
|
||||
p.zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
uint32 fm = context->FRAME.FBMSK;
|
||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
|
||||
if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
|
||||
{
|
||||
p.fm = 0xffffffff;
|
||||
p.zm = 0xffffffff;
|
||||
fm = 0xffffffff;
|
||||
zm = 0xffffffff;
|
||||
}
|
||||
|
||||
if(PRIM->TME)
|
||||
|
@ -278,46 +284,60 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
|||
|
||||
if(context->TEST.ATE)
|
||||
{
|
||||
if(!TryAlphaTest(p.fm, p.zm))
|
||||
if(!TryAlphaTest(fm, zm))
|
||||
{
|
||||
p.sel.atst = context->TEST.ATST;
|
||||
p.sel.afail = context->TEST.AFAIL;
|
||||
gd.sel.atst = context->TEST.ATST;
|
||||
gd.sel.afail = context->TEST.AFAIL;
|
||||
|
||||
gd.aref = GSVector4i((int)context->TEST.AREF);
|
||||
|
||||
switch(gd.sel.atst)
|
||||
{
|
||||
case ATST_LESS:
|
||||
gd.sel.atst = ATST_LEQUAL;
|
||||
gd.aref -= GSVector4i::x00000001();
|
||||
break;
|
||||
case ATST_GREATER:
|
||||
gd.sel.atst = ATST_GEQUAL;
|
||||
gd.aref += GSVector4i::x00000001();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool fwrite = p.fm != 0xffffffff;
|
||||
bool ftest = p.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
bool fwrite = fm != 0xffffffff;
|
||||
bool ftest = gd.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
|
||||
p.sel.fwrite = fwrite;
|
||||
p.sel.ftest = ftest;
|
||||
gd.sel.fwrite = fwrite;
|
||||
gd.sel.ftest = ftest;
|
||||
|
||||
if(fwrite || ftest)
|
||||
{
|
||||
p.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
||||
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
||||
|
||||
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
|
||||
{
|
||||
p.sel.iip = PRIM->IIP;
|
||||
gd.sel.iip = PRIM->IIP;
|
||||
}
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
p.sel.tfx = context->TEX0.TFX;
|
||||
p.sel.tcc = context->TEX0.TCC;
|
||||
p.sel.fst = PRIM->FST;
|
||||
p.sel.ltf = IsLinear();
|
||||
p.sel.tlu = GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0;
|
||||
p.sel.wms = context->CLAMP.WMS;
|
||||
p.sel.wmt = context->CLAMP.WMT;
|
||||
gd.sel.tfx = context->TEX0.TFX;
|
||||
gd.sel.tcc = context->TEX0.TCC;
|
||||
gd.sel.fst = PRIM->FST;
|
||||
gd.sel.ltf = IsLinear();
|
||||
gd.sel.tlu = GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0;
|
||||
gd.sel.wms = context->CLAMP.WMS;
|
||||
gd.sel.wmt = context->CLAMP.WMT;
|
||||
|
||||
if(p.sel.tfx == TFX_MODULATE && p.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
|
||||
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
|
||||
{
|
||||
// modulate does not do anything when vertex color is 0x80
|
||||
|
||||
p.sel.tfx = TFX_DECAL;
|
||||
gd.sel.tfx = TFX_DECAL;
|
||||
}
|
||||
|
||||
if(p.sel.fst == 0)
|
||||
if(gd.sel.fst == 0)
|
||||
{
|
||||
// skip per pixel division if q is constant
|
||||
|
||||
|
@ -325,7 +345,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
|||
|
||||
if(m_vt.m_eq.q)
|
||||
{
|
||||
p.sel.fst = 1;
|
||||
gd.sel.fst = 1;
|
||||
|
||||
if(v[0].t.z != 1.0f)
|
||||
{
|
||||
|
@ -339,7 +359,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
|||
}
|
||||
else if(primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
p.sel.fst = 1;
|
||||
gd.sel.fst = 1;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i += 2)
|
||||
{
|
||||
|
@ -351,11 +371,11 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
|||
}
|
||||
}
|
||||
|
||||
if(p.sel.ltf)
|
||||
if(gd.sel.ltf)
|
||||
{
|
||||
GSVector4 half(0x8000, 0x8000);
|
||||
|
||||
if(p.sel.fst)
|
||||
if(gd.sel.fst)
|
||||
{
|
||||
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
|
||||
|
||||
|
@ -370,68 +390,160 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, p.sel.ltf);
|
||||
GetTextureMinMax(r, gd.sel.ltf);
|
||||
|
||||
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
|
||||
|
||||
if(!t) {ASSERT(0); return;}
|
||||
|
||||
p.tex = t->m_buff;
|
||||
p.clut = m_mem.m_clut;
|
||||
gd.tex = t->m_buff;
|
||||
gd.clut = m_mem.m_clut;
|
||||
|
||||
p.sel.tw = t->m_tw - 3;
|
||||
gd.sel.tw = t->m_tw - 3;
|
||||
|
||||
uint16 tw = (uint16)(1 << context->TEX0.TW);
|
||||
uint16 th = (uint16)(1 << context->TEX0.TH);
|
||||
|
||||
switch(context->CLAMP.WMS)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.u16[0] = tw - 1;
|
||||
gd.t.max.u16[0] = 0;
|
||||
gd.t.mask.u32[0] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.u16[0] = 0;
|
||||
gd.t.max.u16[0] = tw - 1;
|
||||
gd.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
gd.t.min.u16[0] = std::min<int>(context->CLAMP.MINU, tw - 1);
|
||||
gd.t.max.u16[0] = std::min<int>(context->CLAMP.MAXU, tw - 1);
|
||||
gd.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
gd.t.min.u16[0] = context->CLAMP.MINU;
|
||||
gd.t.max.u16[0] = context->CLAMP.MAXU;
|
||||
gd.t.mask.u32[0] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(context->CLAMP.WMT)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.u16[4] = th - 1;
|
||||
gd.t.max.u16[4] = 0;
|
||||
gd.t.mask.u32[2] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.u16[4] = 0;
|
||||
gd.t.max.u16[4] = th - 1;
|
||||
gd.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
gd.t.min.u16[4] = std::min<int>(context->CLAMP.MINV, th - 1);
|
||||
gd.t.max.u16[4] = std::min<int>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
gd.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
gd.t.min.u16[4] = context->CLAMP.MINV;
|
||||
gd.t.max.u16[4] = context->CLAMP.MAXV;
|
||||
gd.t.mask.u32[2] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
gd.t.min = gd.t.min.xxxxlh();
|
||||
gd.t.max = gd.t.max.xxxxlh();
|
||||
gd.t.mask = gd.t.mask.xxzz();
|
||||
gd.t.invmask = ~gd.t.mask;
|
||||
}
|
||||
|
||||
p.sel.fge = PRIM->FGE;
|
||||
if(PRIM->FGE)
|
||||
{
|
||||
gd.sel.fge = 1;
|
||||
|
||||
gd.frb = GSVector4i((int)env.FOGCOL.u32[0] & 0x00ff00ff);
|
||||
gd.fga = GSVector4i((int)(env.FOGCOL.u32[0] >> 8) & 0x00ff00ff);
|
||||
}
|
||||
|
||||
if(context->FRAME.PSM != PSM_PSMCT24)
|
||||
{
|
||||
p.sel.date = context->TEST.DATE;
|
||||
p.sel.datm = context->TEST.DATM;
|
||||
gd.sel.date = context->TEST.DATE;
|
||||
gd.sel.datm = context->TEST.DATM;
|
||||
}
|
||||
|
||||
if(!IsOpaque())
|
||||
{
|
||||
p.sel.abe = PRIM->ABE;
|
||||
p.sel.ababcd = context->ALPHA.u32[0];
|
||||
gd.sel.abe = PRIM->ABE;
|
||||
gd.sel.ababcd = context->ALPHA.u32[0];
|
||||
|
||||
if(env.PABE.PABE)
|
||||
{
|
||||
p.sel.pabe = 1;
|
||||
gd.sel.pabe = 1;
|
||||
}
|
||||
|
||||
if(m_aa1 && PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS))
|
||||
{
|
||||
p.sel.aa1 = 1;
|
||||
gd.sel.aa1 = 1;
|
||||
}
|
||||
|
||||
gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh();
|
||||
}
|
||||
|
||||
if(p.sel.date
|
||||
|| p.sel.aba == 1 || p.sel.abb == 1 || p.sel.abc == 1 || p.sel.abd == 1
|
||||
|| p.sel.atst != ATST_ALWAYS && p.sel.afail == AFAIL_RGB_ONLY
|
||||
|| p.sel.fpsm == 0 && p.fm != 0 && p.fm != 0xffffffff
|
||||
|| p.sel.fpsm == 1 && (p.fm & 0x00ffffff) != 0 && (p.fm & 0x00ffffff) != 0x00ffffff
|
||||
|| p.sel.fpsm == 2 && (p.fm & 0x80f8f8f8) != 0 && (p.fm & 0x80f8f8f8) != 0x80f8f8f8)
|
||||
if(gd.sel.date
|
||||
|| gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1
|
||||
|| gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY
|
||||
|| gd.sel.fpsm == 0 && fm != 0 && fm != 0xffffffff
|
||||
|| gd.sel.fpsm == 1 && (fm & 0x00ffffff) != 0 && (fm & 0x00ffffff) != 0x00ffffff
|
||||
|| gd.sel.fpsm == 2 && (fm & 0x80f8f8f8) != 0 && (fm & 0x80f8f8f8) != 0x80f8f8f8)
|
||||
{
|
||||
p.sel.rfb = 1;
|
||||
gd.sel.rfb = 1;
|
||||
}
|
||||
|
||||
p.sel.colclamp = env.COLCLAMP.CLAMP;
|
||||
p.sel.fba = context->FBA.FBA;
|
||||
p.sel.dthe = env.DTHE.DTHE;
|
||||
gd.sel.colclamp = env.COLCLAMP.CLAMP;
|
||||
gd.sel.fba = context->FBA.FBA;
|
||||
gd.sel.dthe = env.DTHE.DTHE;
|
||||
}
|
||||
|
||||
bool zwrite = p.zm != 0xffffffff;
|
||||
bool zwrite = zm != 0xffffffff;
|
||||
bool ztest = context->TEST.ZTE && context->TEST.ZTST > ZTST_ALWAYS;
|
||||
|
||||
p.sel.zwrite = zwrite;
|
||||
p.sel.ztest = ztest;
|
||||
gd.sel.zwrite = zwrite;
|
||||
gd.sel.ztest = ztest;
|
||||
|
||||
if(zwrite || ztest)
|
||||
{
|
||||
p.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||
p.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
|
||||
p.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
|
||||
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||
gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
|
||||
gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
|
||||
}
|
||||
|
||||
gd.fm = GSVector4i(fm);
|
||||
gd.zm = GSVector4i(zm);
|
||||
|
||||
if(gd.sel.fpsm == 1)
|
||||
{
|
||||
gd.fm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if(gd.sel.fpsm == 2)
|
||||
{
|
||||
GSVector4i rb = gd.fm & 0x00f800f8;
|
||||
GSVector4i ga = gd.fm & 0x8000f800;
|
||||
|
||||
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if(gd.sel.zpsm == 1)
|
||||
{
|
||||
gd.zm |= GSVector4i::xff000000();
|
||||
}
|
||||
else if(gd.sel.zpsm == 2)
|
||||
{
|
||||
gd.zm |= GSVector4i::xffff0000();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,10 +41,10 @@ protected:
|
|||
void Draw();
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
|
||||
void GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass);
|
||||
void GetScanlineGlobalData(GSScanlineGlobalData& gd);
|
||||
|
||||
public:
|
||||
GSRendererSW();
|
||||
GSRendererSW(int threads);
|
||||
virtual ~GSRendererSW();
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
|
|
|
@ -85,10 +85,10 @@ union GSScanlineSelector
|
|||
|
||||
uint64 key;
|
||||
|
||||
operator uint32() {return lo;}
|
||||
operator uint64() {return key;}
|
||||
operator uint32() const {return lo;}
|
||||
operator uint64() const {return key;}
|
||||
|
||||
bool IsSolidRect()
|
||||
bool IsSolidRect() const
|
||||
{
|
||||
return sprite
|
||||
&& iip == 0
|
||||
|
@ -101,45 +101,44 @@ union GSScanlineSelector
|
|||
}
|
||||
};
|
||||
|
||||
__aligned32 struct GSScanlineParam
|
||||
__aligned32 struct GSScanlineGlobalData // per batch variables, this is like a pixel shader constant buffer
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
// - the data of vm, tex, clut, dimx may change, multi-threaded drawing must be finished before that happens (an idea: remember which pages are used, sync when something needs to read or write them)
|
||||
// - tex is a cached texture, it may be recycled to free up memory, its absolute address cannot be compiled into code
|
||||
// - row and column pointers are allocated once and never change or freed, thier address can be used directly
|
||||
// - if in the future drawing does not have to be synchronized per batch, the rest of GSRasterizerData should be copied here, too (scissor, prim type, vertices)
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const uint32* clut;
|
||||
const GSVector4i* dimx;
|
||||
|
||||
GSOffset* fbo;
|
||||
GSOffset* zbo;
|
||||
GSPixelOffset4* fzbo;
|
||||
|
||||
uint32 fm, zm;
|
||||
};
|
||||
|
||||
__aligned32 struct GSScanlineEnvironment
|
||||
{
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const uint32* clut;
|
||||
|
||||
int* fbr;
|
||||
int* zbr;
|
||||
int* fbc;
|
||||
int* zbc;
|
||||
GSVector2i* fzbr;
|
||||
GSVector2i* fzbc;
|
||||
|
||||
GSVector4i* dimx;
|
||||
const int* fbr;
|
||||
const int* zbr;
|
||||
const int* fbc;
|
||||
const int* zbc;
|
||||
const GSVector2i* fzbr;
|
||||
const GSVector2i* fzbc;
|
||||
|
||||
GSVector4i fm, zm;
|
||||
struct {GSVector4i min, max, mask, invmask;} t; // [u] x 4 [v] x 4
|
||||
GSVector4i aref;
|
||||
GSVector4i afix;
|
||||
GSVector4i frb, fga;
|
||||
};
|
||||
|
||||
__aligned32 struct GSScanlineLocalData // per prim variables, each thread has its own
|
||||
{
|
||||
const GSScanlineGlobalData* gd;
|
||||
|
||||
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[7];} d[4];
|
||||
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
|
||||
struct {GSVector4i rb, ga;} c;
|
||||
struct {GSVector4i z, f;} p;
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov;} temp;
|
||||
|
||||
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
||||
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov;} temp;
|
||||
};
|
||||
|
|
|
@ -28,8 +28,12 @@ using namespace Xbyak;
|
|||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_env(*(GSScanlineEnvironment*)param)
|
||||
, m_local(*(GSScanlineLocalData*)param)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
#endif
|
||||
|
||||
m_sel.key = key;
|
||||
|
||||
m_en.z = m_sel.zb ? 1 : 0;
|
||||
|
@ -37,10 +41,6 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void
|
|||
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
|
||||
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
|
||||
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
#endif
|
||||
|
||||
Generate();
|
||||
}
|
||||
|
||||
|
@ -91,23 +91,23 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||
// m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||
|
||||
vmulps(xmm2, xmm1, xmm3);
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vmovdqa(ptr[&m_env.d4.f], xmm2);
|
||||
vmovdqa(ptr[&m_local.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vmovdqa(ptr[&m_env.d[i].f], xmm2);
|
||||
vmovdqa(ptr[&m_local.d[i].f], xmm2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,17 +117,17 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_env.d4.z = dz * 4.0f;
|
||||
// m_local.d4.z = dz * 4.0f;
|
||||
|
||||
vmulps(xmm1, xmm0, xmm3);
|
||||
vmovdqa(ptr[&m_env.d4.z], xmm1);
|
||||
vmovdqa(ptr[&m_local.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].z = dz * m_shift[i];
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
vmulps(xmm1, xmm0, Xmm(4 + i));
|
||||
vmovdqa(ptr[&m_env.d[i].z], xmm1);
|
||||
vmovdqa(ptr[&m_local.d[i].z], xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -139,12 +139,12 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
if(m_en.f)
|
||||
{
|
||||
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
// m_local.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
|
||||
vcvttps2dq(xmm1, xmm0);
|
||||
vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vmovdqa(ptr[&m_env.p.f], xmm1);
|
||||
vmovdqa(ptr[&m_local.p.f], xmm1);
|
||||
}
|
||||
|
||||
if(m_en.z)
|
||||
|
@ -155,7 +155,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
// m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
static const float half = 0.5f;
|
||||
|
||||
|
@ -173,12 +173,12 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
}
|
||||
else
|
||||
{
|
||||
// m_env.p.z = GSVector4i(z);
|
||||
// m_local.p.z = GSVector4i(z);
|
||||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
|
||||
vmovdqa(ptr[&m_env.p.z], xmm0);
|
||||
vmovdqa(ptr[&m_local.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -197,25 +197,25 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
// m_env.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||
// m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||
|
||||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, xmm3);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(ptr[&m_env.d4.f], xmm2);
|
||||
movdqa(ptr[&m_local.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(ptr[&m_env.d[i].f], xmm2);
|
||||
movdqa(ptr[&m_local.d[i].f], xmm2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -225,19 +225,19 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_env.d4.z = dz * 4.0f;
|
||||
// m_local.d4.z = dz * 4.0f;
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, xmm3);
|
||||
movdqa(ptr[&m_env.d4.z], xmm1);
|
||||
movdqa(ptr[&m_local.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
// m_env.d[i].z = dz * m_shift[i];
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
movaps(xmm1, xmm0);
|
||||
mulps(xmm1, Xmm(4 + i));
|
||||
movdqa(ptr[&m_env.d[i].z], xmm1);
|
||||
movdqa(ptr[&m_local.d[i].z], xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -249,12 +249,12 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
if(m_en.f)
|
||||
{
|
||||
// m_env.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
// m_local.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
|
||||
cvttps2dq(xmm1, xmm0);
|
||||
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
movdqa(ptr[&m_env.p.f], xmm1);
|
||||
movdqa(ptr[&m_local.p.f], xmm1);
|
||||
}
|
||||
|
||||
if(m_en.z)
|
||||
|
@ -265,7 +265,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_env.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
// m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
static const float half = 0.5f;
|
||||
|
||||
|
@ -284,12 +284,12 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
}
|
||||
else
|
||||
{
|
||||
// m_env.p.z = GSVector4i(z);
|
||||
// m_local.p.z = GSVector4i(z);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
|
||||
movdqa(ptr[&m_env.p.z], xmm0);
|
||||
movdqa(ptr[&m_local.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -312,16 +312,16 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vmovdqa(ptr[&m_env.d4.st], xmm1);
|
||||
vmovdqa(ptr[&m_local.d4.st], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d4.stq = t * 4.0f;
|
||||
// m_local.d4.stq = t * 4.0f;
|
||||
|
||||
vmovaps(ptr[&m_env.d4.stq], xmm1);
|
||||
vmovaps(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
|
@ -340,25 +340,25 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d[i].si/ti = GSVector4i(v);
|
||||
// m_local.d[i].si/ti = GSVector4i(v);
|
||||
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: vmovdqa(ptr[&m_env.d[i].si], xmm2); break;
|
||||
case 1: vmovdqa(ptr[&m_env.d[i].ti], xmm2); break;
|
||||
case 0: vmovdqa(ptr[&m_local.d[i].si], xmm2); break;
|
||||
case 1: vmovdqa(ptr[&m_local.d[i].ti], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d[i].s/t/q = v;
|
||||
// m_local.d[i].s/t/q = v;
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: vmovaps(ptr[&m_env.d[i].s], xmm2); break;
|
||||
case 1: vmovaps(ptr[&m_env.d[i].t], xmm2); break;
|
||||
case 2: vmovaps(ptr[&m_env.d[i].q], xmm2); break;
|
||||
case 0: vmovaps(ptr[&m_local.d[i].s], xmm2); break;
|
||||
case 1: vmovaps(ptr[&m_local.d[i].t], xmm2); break;
|
||||
case 2: vmovaps(ptr[&m_local.d[i].q], xmm2); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -375,16 +375,16 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d4.st = GSVector4i(t * 4.0f);
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
movdqa(ptr[&m_env.d4.st], xmm1);
|
||||
movdqa(ptr[&m_local.d4.st], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d4.stq = t * 4.0f;
|
||||
// m_local.d4.stq = t * 4.0f;
|
||||
|
||||
movaps(ptr[&m_env.d4.stq], xmm1);
|
||||
movaps(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
|
@ -405,25 +405,25 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_env.d[i].si/ti = GSVector4i(v);
|
||||
// m_local.d[i].si/ti = GSVector4i(v);
|
||||
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: movdqa(ptr[&m_env.d[i].si], xmm2); break;
|
||||
case 1: movdqa(ptr[&m_env.d[i].ti], xmm2); break;
|
||||
case 0: movdqa(ptr[&m_local.d[i].si], xmm2); break;
|
||||
case 1: movdqa(ptr[&m_local.d[i].ti], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_env.d[i].s/t/q = v;
|
||||
// m_local.d[i].s/t/q = v;
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: movaps(ptr[&m_env.d[i].s], xmm2); break;
|
||||
case 1: movaps(ptr[&m_env.d[i].t], xmm2); break;
|
||||
case 2: movaps(ptr[&m_env.d[i].q], xmm2); break;
|
||||
case 0: movaps(ptr[&m_local.d[i].s], xmm2); break;
|
||||
case 1: movaps(ptr[&m_local.d[i].t], xmm2); break;
|
||||
case 2: movaps(ptr[&m_local.d[i].q], xmm2); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -446,13 +446,13 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
|
||||
vmovaps(xmm0, ptr[edx]);
|
||||
|
||||
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
// m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
|
||||
vmulps(xmm1, xmm0, xmm3);
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
vpackssdw(xmm1, xmm1);
|
||||
vmovdqa(ptr[&m_env.d4.c], xmm1);
|
||||
vmovdqa(ptr[&m_local.d4.c], xmm1);
|
||||
|
||||
// xmm3 is not needed anymore
|
||||
|
||||
|
@ -476,10 +476,10 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
vcvttps2dq(xmm1, xmm1);
|
||||
vpackssdw(xmm1, xmm1);
|
||||
|
||||
// m_env.d[i].rb = r.upl16(b);
|
||||
// m_local.d[i].rb = r.upl16(b);
|
||||
|
||||
vpunpcklwd(xmm0, xmm1);
|
||||
vmovdqa(ptr[&m_env.d[i].rb], xmm0);
|
||||
vmovdqa(ptr[&m_local.d[i].rb], xmm0);
|
||||
}
|
||||
|
||||
// GSVector4 c = dscan.c;
|
||||
|
@ -506,10 +506,10 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
vcvttps2dq(xmm1, xmm1);
|
||||
vpackssdw(xmm1, xmm1);
|
||||
|
||||
// m_env.d[i].ga = g.upl16(a);
|
||||
// m_local.d[i].ga = g.upl16(a);
|
||||
|
||||
vpunpcklwd(xmm0, xmm1);
|
||||
vmovdqa(ptr[&m_env.d[i].ga], xmm0);
|
||||
vmovdqa(ptr[&m_local.d[i].ga], xmm0);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -530,14 +530,14 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
vpsrlw(xmm0, 7);
|
||||
}
|
||||
|
||||
// m_env.c.rb = c.xxxx();
|
||||
// m_env.c.ga = c.zzzz();
|
||||
// m_local.c.rb = c.xxxx();
|
||||
// m_local.c.ga = c.zzzz();
|
||||
|
||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
vmovdqa(ptr[&m_env.c.rb], xmm1);
|
||||
vmovdqa(ptr[&m_env.c.ga], xmm2);
|
||||
vmovdqa(ptr[&m_local.c.rb], xmm1);
|
||||
vmovdqa(ptr[&m_local.c.ga], xmm2);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -549,14 +549,14 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
movaps(xmm0, ptr[edx]);
|
||||
movaps(xmm1, xmm0);
|
||||
|
||||
// m_env.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
// m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
|
||||
movaps(xmm2, xmm0);
|
||||
mulps(xmm2, xmm3);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
packssdw(xmm2, xmm2);
|
||||
movdqa(ptr[&m_env.d4.c], xmm2);
|
||||
movdqa(ptr[&m_local.d4.c], xmm2);
|
||||
|
||||
// xmm3 is not needed anymore
|
||||
|
||||
|
@ -582,10 +582,10 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
cvttps2dq(xmm3, xmm3);
|
||||
packssdw(xmm3, xmm3);
|
||||
|
||||
// m_env.d[i].rb = r.upl16(b);
|
||||
// m_local.d[i].rb = r.upl16(b);
|
||||
|
||||
punpcklwd(xmm2, xmm3);
|
||||
movdqa(ptr[&m_env.d[i].rb], xmm2);
|
||||
movdqa(ptr[&m_local.d[i].rb], xmm2);
|
||||
}
|
||||
|
||||
// GSVector4 c = dscan.c;
|
||||
|
@ -615,10 +615,10 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
cvttps2dq(xmm3, xmm3);
|
||||
packssdw(xmm3, xmm3);
|
||||
|
||||
// m_env.d[i].ga = g.upl16(a);
|
||||
// m_local.d[i].ga = g.upl16(a);
|
||||
|
||||
punpcklwd(xmm2, xmm3);
|
||||
movdqa(ptr[&m_env.d[i].ga], xmm2);
|
||||
movdqa(ptr[&m_local.d[i].ga], xmm2);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -640,14 +640,14 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
psrlw(xmm0, 7);
|
||||
}
|
||||
|
||||
// m_env.c.rb = c.xxxx();
|
||||
// m_env.c.ga = c.zzzz();
|
||||
// m_local.c.rb = c.xxxx();
|
||||
// m_local.c.ga = c.zzzz();
|
||||
|
||||
pshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
movdqa(ptr[&m_env.c.rb], xmm1);
|
||||
movdqa(ptr[&m_env.c.ga], xmm2);
|
||||
movdqa(ptr[&m_local.c.rb], xmm1);
|
||||
movdqa(ptr[&m_local.c.ga], xmm2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,8 +30,8 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
|
||||
static const GSVector4 m_shift[5];
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
|
||||
struct {uint32 z:1, f:1, t:1, c:1;} m_en;
|
||||
|
||||
|
|
|
@ -126,6 +126,12 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
|
|||
|
||||
using namespace Xbyak;
|
||||
|
||||
static const int _args = 0;
|
||||
static const int _v = _args + 4;
|
||||
static const int _count = _args + 8;
|
||||
static const int _min = _args + 12;
|
||||
static const int _max = _args + 16;
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
|
@ -133,8 +139,6 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
#error TODO
|
||||
#endif
|
||||
|
||||
const int params = 0;
|
||||
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
|
@ -157,11 +161,6 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
break;
|
||||
}
|
||||
|
||||
const int _v = params + 4;
|
||||
const int _count = params + 8;
|
||||
const int _min = params + 12;
|
||||
const int _max = params + 16;
|
||||
|
||||
//
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
|
@ -410,8 +409,6 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
#error TODO
|
||||
#endif
|
||||
|
||||
const int params = 0;
|
||||
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
|
@ -436,11 +433,6 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
break;
|
||||
}
|
||||
|
||||
const int _v = params + 4;
|
||||
const int _count = params + 8;
|
||||
const int _min = params + 12;
|
||||
const int _max = params + 16;
|
||||
|
||||
//
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
|
@ -748,8 +740,6 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
#error TODO
|
||||
#endif
|
||||
|
||||
const int params = 0;
|
||||
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
|
@ -772,11 +762,6 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
break;
|
||||
}
|
||||
|
||||
const int _v = params + 4;
|
||||
const int _count = params + 8;
|
||||
const int _min = params + 12;
|
||||
const int _max = params + 16;
|
||||
|
||||
//
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
|
|
Loading…
Reference in New Issue