Local static initializers are evil, avoid them like plague.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4304 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-02-16 03:19:36 +00:00
parent 4dac657c9b
commit 65fc196688
25 changed files with 337 additions and 383 deletions

View File

@ -25,8 +25,8 @@
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
: m_state(state)
, m_id(id)
, m_sp(m_env)
, m_ds(m_env)
, m_sp_map("GPUSetupPrim", &m_env)
, m_ds_map("GPUDrawScanline", &m_env)
{
}
@ -34,7 +34,7 @@ GPUDrawScanline::~GPUDrawScanline()
{
}
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
{
GPUDrawingEnvironment& env = m_state->m_env;
@ -69,9 +69,11 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
//
f->ssl = m_ds[m_env.sel];
m_ds = m_ds_map[m_env.sel];
f->sr = NULL; // TODO
m_de = NULL;
m_dr = NULL; // TODO
// doesn't need all bits => less functions generated
@ -84,36 +86,10 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
sel.twin = m_env.sel.twin;
sel.sprite = m_env.sel.sprite;
f->ssp = m_sp[sel];
m_sp = m_sp_map[sel];
}
void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats)
{
m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
}
//
GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap(GPUScanlineEnvironment& env)
: GSCodeGeneratorFunctionMap("GPUSetupPrim")
, m_env(env)
{
}
GPUSetupPrimCodeGenerator* GPUDrawScanline::GPUSetupPrimMap::Create(uint32 key, void* ptr, size_t maxsize)
{
return new GPUSetupPrimCodeGenerator(m_env, ptr, maxsize);
}
//
GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap(GPUScanlineEnvironment& env)
: GSCodeGeneratorFunctionMap("GPUDrawScanline")
, m_env(env)
{
}
GPUDrawScanlineCodeGenerator* GPUDrawScanline::GPUDrawScanlineMap::Create(uint32 key, void* ptr, size_t maxsize)
{
return new GPUDrawScanlineCodeGenerator(m_env, ptr, maxsize);
m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame());
}

View File

@ -33,25 +33,8 @@ class GPUDrawScanline : public IDrawScanline
//
class GPUSetupPrimMap : public GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimStaticPtr>
{
GPUScanlineEnvironment& m_env;
public:
GPUSetupPrimMap(GPUScanlineEnvironment& env);
GPUSetupPrimCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize);
} m_sp;
//
class GPUDrawScanlineMap : public GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlineStaticPtr>
{
GPUScanlineEnvironment& m_env;
public:
GPUDrawScanlineMap(GPUScanlineEnvironment& env);
GPUDrawScanlineCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize);
} m_ds;
GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlinePtr> m_ds_map;
protected:
GPUState* m_state;
@ -63,7 +46,7 @@ public:
// IDrawScanline
void BeginDraw(const GSRasterizerData* data, Functions* f);
void BeginDraw(const GSRasterizerData* data);
void EndDraw(const GSRasterizerStats& stats);
void PrintStats() {m_ds.PrintStats();}
void PrintStats() {m_ds_map.PrintStats();}
};

View File

@ -24,9 +24,9 @@
#include "StdAfx.h"
#include "GPUDrawScanlineCodeGenerator.h"
GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
, m_env(env)
GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
, m_env(*(GPUScanlineEnvironment*)param)
{
#if _M_AMD64
#error TODO

View File

@ -22,20 +22,17 @@
#pragma once
#include "GPUScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#include "GSFunctionMap.h"
using namespace Xbyak;
class GPUDrawScanlineCodeGenerator : public CodeGenerator
class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
{
void operator = (const GPUDrawScanlineCodeGenerator&);
static const GSVector4i m_test[8];
static const uint16 m_dither[4][16];
util::Cpu m_cpu;
GPUScanlineEnvironment& m_env;
void Generate();
@ -58,5 +55,5 @@ class GPUDrawScanlineCodeGenerator : public CodeGenerator
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
public:
GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
};

View File

@ -25,9 +25,11 @@
#include "GSVertexSW.h"
#include "GPUSetupPrimCodeGenerator.h"
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
, m_env(env)
using namespace Xbyak;
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
, m_env(*(GPUScanlineEnvironment*)param)
{
#if _M_AMD64
#error TODO

View File

@ -22,23 +22,18 @@
#pragma once
#include "GPUScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#include "GSFunctionMap.h"
using namespace Xbyak;
class GPUSetupPrimCodeGenerator : public CodeGenerator
class GPUSetupPrimCodeGenerator : public GSCodeGenerator
{
void operator = (const GPUSetupPrimCodeGenerator&);
static const GSVector4 m_shift[3];
util::Cpu m_cpu;
GPUScanlineEnvironment& m_env;
void Generate();
public:
GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
};

View File

@ -525,6 +525,73 @@ EXPORT_C GSsetFrameLimit(int limit)
#ifdef _WINDOWS
#include <io.h>
#include <fcntl.h>
class Console
{
HANDLE m_console;
string m_title;
public:
Console::Console(LPCSTR title, bool open)
: m_console(NULL)
, m_title(title)
{
if(open) Open();
}
Console::~Console()
{
Close();
}
void Console::Open()
{
if(m_console == NULL)
{
CONSOLE_SCREEN_BUFFER_INFO csbiInfo;
AllocConsole();
SetConsoleTitle(m_title.c_str());
m_console = GetStdHandle(STD_OUTPUT_HANDLE);
COORD size;
size.X = 100;
size.Y = 300;
SetConsoleScreenBufferSize(m_console, size);
GetConsoleScreenBufferInfo(m_console, &csbiInfo);
SMALL_RECT rect;
rect = csbiInfo.srWindow;
rect.Right = rect.Left + 99;
rect.Bottom = rect.Top + 64;
SetConsoleWindowInfo(m_console, TRUE, &rect);
*stdout = *_fdopen(_open_osfhandle((long)m_console, _O_TEXT), "w");
setvbuf(stdout, NULL, _IONBF, 0);
}
}
void Console::Close()
{
if(m_console != NULL)
{
FreeConsole();
m_console = NULL;
}
}
};
// lpszCmdLine:
// First parameter is the renderer.
// Second parameter is the gs file to load and run.
@ -547,6 +614,8 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
if(FILE* fp = fopen(lpszCmdLine, "rb"))
{
Console console("GSdx", true);
GSinit();
uint8 regs[0x2000];

View File

@ -1075,23 +1075,10 @@ public:
GSVector4i::sw64(v0, v2, v1, v3);
#ifdef _M_AMD64
((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, mask);
((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, mask);
((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, mask);
((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask);
#else
// here blend is faster than blend8 because vc8 has a little problem optimizing register usage for pblendvb (3rd op must be xmm0)
((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend(v0, mask);
((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend(v1, mask);
((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, mask);
((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, mask);
#endif
}
}

View File

@ -26,8 +26,8 @@
GSDrawScanline::GSDrawScanline(GSState* state, int id)
: m_state(state)
, m_id(id)
, m_sp(m_env)
, m_ds(m_env)
, m_sp_map("GSSetupPrim", &m_env)
, m_ds_map("GSDrawScanline", &m_env)
{
memset(&m_env, 0, sizeof(m_env));
}
@ -36,7 +36,7 @@ GSDrawScanline::~GSDrawScanline()
{
}
void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
{
GSDrawingEnvironment& env = m_state->m_env;
GSDrawingContext* context = m_state->m_context;
@ -98,7 +98,6 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
{
m_env.tex = p->tex;
m_env.clut = p->clut;
// m_env.tw = p->tw;
unsigned short tw = (unsigned short)(1 << context->TEX0.TW);
unsigned short th = (unsigned short)(1 << context->TEX0.TH);
@ -163,7 +162,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
//
f->ssl = m_ds[m_sel];
m_ds = m_ds_map[m_sel];
if(m_sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
{
@ -173,12 +172,20 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
sel.zwrite = 0;
sel.edge = 1;
f->ssle = m_ds[sel];
m_de = m_ds_map[sel];
}
else
{
m_de = NULL;
}
if(m_sel.IsSolidRect())
{
f->sr = (DrawSolidRectPtr)&GSDrawScanline::DrawSolidRect;
m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect;
}
else
{
m_dr = NULL;
}
// doesn't need all bits => less functions generated
@ -197,15 +204,15 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
sel.zb = m_sel.zb;
sel.zoverflow = m_sel.zoverflow;
f->ssp = m_sp[sel];
m_sp = m_sp_map[sel];
}
void GSDrawScanline::EndDraw(const GSRasterizerStats& stats)
{
m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame());
}
void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
{
ASSERT(r.y >= 0);
ASSERT(r.w >= 0);
@ -224,22 +231,22 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{
if(m == 0)
{
DrawSolidRectT<uint32, false>(m_env.zbr, m_env.zbc, r, z, m);
DrawRectT<uint32, false>(m_env.zbr, m_env.zbc, r, z, m);
}
else
{
DrawSolidRectT<uint32, true>(m_env.zbr, m_env.zbc, r, z, m);
DrawRectT<uint32, true>(m_env.zbr, m_env.zbc, r, z, m);
}
}
else
{
if(m == 0)
{
DrawSolidRectT<uint16, false>(m_env.zbr, m_env.zbc, r, z, m);
DrawRectT<uint16, false>(m_env.zbr, m_env.zbc, r, z, m);
}
else
{
DrawSolidRectT<uint16, true>(m_env.zbr, m_env.zbc, r, z, m);
DrawRectT<uint16, true>(m_env.zbr, m_env.zbc, r, z, m);
}
}
}
@ -259,11 +266,11 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{
if(m == 0)
{
DrawSolidRectT<uint32, false>(m_env.fbr, m_env.fbc, r, c, m);
DrawRectT<uint32, false>(m_env.fbr, m_env.fbc, r, c, m);
}
else
{
DrawSolidRectT<uint32, true>(m_env.fbr, m_env.fbc, r, c, m);
DrawRectT<uint32, true>(m_env.fbr, m_env.fbc, r, c, m);
}
}
else
@ -272,18 +279,18 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
if(m == 0)
{
DrawSolidRectT<uint16, false>(m_env.fbr, m_env.fbc, r, c, m);
DrawRectT<uint16, false>(m_env.fbr, m_env.fbc, r, c, m);
}
else
{
DrawSolidRectT<uint16, true>(m_env.fbr, m_env.fbc, r, c, m);
DrawRectT<uint16, true>(m_env.fbr, m_env.fbc, r, c, m);
}
}
}
}
template<class T, bool masked>
void GSDrawScanline::DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
{
if(m == 0xffffffff) return;
@ -358,29 +365,3 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
}
}
}
//
GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env)
: GSCodeGeneratorFunctionMap("GSSetupPrim")
, m_env(env)
{
}
GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(uint64 key, void* ptr, size_t maxsize)
{
return new GSSetupPrimCodeGenerator(m_env, key, ptr, maxsize);
}
//
GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env)
: GSCodeGeneratorFunctionMap("GSDrawScanline")
, m_env(env)
{
}
GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(uint64 key, void* ptr, size_t maxsize)
{
return new GSDrawScanlineCodeGenerator(m_env, key, ptr, maxsize);
}

View File

@ -32,34 +32,13 @@ class GSDrawScanline : public IDrawScanline
GSScanlineEnvironment m_env;
GSScanlineSelector m_sel;
//
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
class GSSetupPrimMap : public GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimStaticPtr>
{
GSScanlineEnvironment& m_env;
public:
GSSetupPrimMap(GSScanlineEnvironment& env);
GSSetupPrimCodeGenerator* Create(uint64 key, void* ptr, size_t maxsize);
} m_sp;
//
class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlineStaticPtr>
{
GSScanlineEnvironment& m_env;
public:
GSDrawScanlineMap(GSScanlineEnvironment& env);
GSDrawScanlineCodeGenerator* Create(uint64 key, void* ptr, size_t maxsize);
} m_ds;
//
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
template<class T, bool masked>
void DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
template<class T, bool masked>
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
@ -77,7 +56,7 @@ public:
// IDrawScanline
void BeginDraw(const GSRasterizerData* data, Functions* f);
void BeginDraw(const GSRasterizerData* data);
void EndDraw(const GSRasterizerStats& stats);
void PrintStats() {m_ds.PrintStats();}
void PrintStats() {m_ds_map.PrintStats();}
};

View File

@ -25,9 +25,9 @@
#include "StdAfx.h"
#include "GSDrawScanlineCodeGenerator.h"
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
, m_env(env)
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
, m_env(*(GSScanlineEnvironment*)param)
{
#if _M_AMD64
#error TODO
@ -385,8 +385,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
{
if(m_sel.ltf)
{
vmovdqa(xmm4, xmm3);
vpshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
vpshuflw(xmm4, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
vpshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
vpsrlw(xmm4, 1);
vmovdqa(ptr[&m_env.temp.vf], xmm4);
@ -1136,6 +1135,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
mov(eax, 0x8000);
vmovd(xmm4, eax);
vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
vpsubd(xmm2, xmm4);
vpsubd(xmm3, xmm4);
}
@ -1199,18 +1199,16 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i y0 = uv0.uph16() << tw;
vpxor(xmm0, xmm0);
//vmovd(xmm1, ptr[&m_env.tw]);
vpunpcklwd(xmm4, xmm2, xmm0);
vpunpckhwd(xmm2, xmm2, xmm0);
vpslld(xmm2, m_sel.tw + 3); // xmm1);
vpslld(xmm2, m_sel.tw + 3);
// xmm0 = 0
// xmm1 = free // tw
// xmm2 = y0
// xmm3 = uv1 (ltf)
// xmm4 = x0
// xmm5, xmm6 = free
// xmm1, xmm5, xmm6 = free
// xmm7 = used
if(m_sel.ltf)
@ -1220,7 +1218,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
vpunpcklwd(xmm6, xmm3, xmm0);
vpunpckhwd(xmm3, xmm3, xmm0);
vpslld(xmm3, m_sel.tw + 3); // xmm1);
vpslld(xmm3, m_sel.tw + 3);
// xmm2 = y0
// xmm3 = y1
@ -1392,6 +1390,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
mov(eax, 0x8000);
movd(xmm4, eax);
pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
psubd(xmm2, xmm4);
psubd(xmm3, xmm4);
}
@ -1458,19 +1457,17 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i x0 = uv0.upl16();
pxor(xmm0, xmm0);
// movd(xmm1, ptr[&m_env.tw]);
movdqa(xmm4, xmm2);
punpckhwd(xmm2, xmm0);
punpcklwd(xmm4, xmm0);
pslld(xmm2, m_sel.tw + 3); // xmm1);
pslld(xmm2, m_sel.tw + 3);
// xmm0 = 0
// xmm1 = free // tw
// xmm2 = y0
// xmm3 = uv1 (ltf)
// xmm4 = x0
// xmm5, xmm6 = free
// xmm1, xmm5, xmm6 = free
// xmm7 = used
if(m_sel.ltf)
@ -1481,7 +1478,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
movdqa(xmm6, xmm3);
punpckhwd(xmm3, xmm0);
punpcklwd(xmm6, xmm0);
pslld(xmm3, m_sel.tw + 3); // xmm1);
pslld(xmm3, m_sel.tw + 3);
// xmm2 = y0
// xmm3 = y1
@ -3475,7 +3472,7 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
{
if(m_cpu.has(util::Cpu::tAVX))
{
movq(dst, qword[addr * 2 + (size_t)m_env.vm]);
vmovq(dst, qword[addr * 2 + (size_t)m_env.vm]);
vmovhps(dst, qword[addr * 2 + (size_t)m_env.vm + 8 * 2]);
}
else
@ -3496,7 +3493,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
{
test(mask, 0x0f);
je("@f");
movq(qword[addr * 2 + (size_t)m_env.vm], src);
vmovq(qword[addr * 2 + (size_t)m_env.vm], src);
L("@@");
test(mask, 0xf0);
@ -3548,11 +3545,11 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
}
}
static const int s_offsets[4] = {0, 2, 8, 10};
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm)
{
static const int offsets[4] = {0, 2, 8, 10};
Address dst = ptr[addr * 2 + (size_t)m_env.vm + offsets[i] * 2];
Address dst = ptr[addr * 2 + (size_t)m_env.vm + s_offsets[i] * 2];
if(m_cpu.has(util::Cpu::tAVX))
{

View File

@ -22,19 +22,16 @@
#pragma once
#include "GSScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#include "GSFunctionMap.h"
using namespace Xbyak;
class GSDrawScanlineCodeGenerator : public CodeGenerator
class GSDrawScanlineCodeGenerator : public GSCodeGenerator
{
void operator = (const GSDrawScanlineCodeGenerator&);
static const GSVector4i m_test[8];
util::Cpu m_cpu;
GSScanlineEnvironment& m_env;
GSScanlineSelector m_sel;
@ -75,5 +72,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
void blend8r(const Xmm& b, const Xmm& a);
public:
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize);
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
};

View File

@ -23,6 +23,8 @@
#include "GS.h"
#include "GSCodeBuffer.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
struct GSRasterizerStats
{
@ -128,6 +130,8 @@ public:
}
}
printf("GS stats\n");
for(hash_map<KEY, ActivePtr*>::iterator i = m_map_active.begin(); i != m_map_active.end(); i++)
{
KEY key = i->first;
@ -139,7 +143,7 @@ public:
int64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
printf("[%012I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
printf("[%016I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
(uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ',
(float)(tpf * 10000 / 50000000) / 100,
(float)(tpf * 10000 / ttpf) / 100,
@ -150,54 +154,58 @@ public:
}
};
class GSCodeGenerator : public Xbyak::CodeGenerator
{
protected:
Xbyak::util::Cpu m_cpu;
public:
GSCodeGenerator(void* code, size_t maxsize)
: Xbyak::CodeGenerator(maxsize, code)
{
}
};
#include "vtune/JITProfiling.h"
template<class CG, class KEY, class VALUE>
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{
string m_name;
hash_map<uint64, CG*> m_cgmap;
void* m_param;
hash_map<uint64, VALUE> m_cgmap;
GSCodeBuffer m_cb;
enum {MAX_SIZE = 4096};
protected:
virtual CG* Create(KEY key, void* ptr, size_t maxsize = MAX_SIZE) = 0;
public:
GSCodeGeneratorFunctionMap(const char* name)
GSCodeGeneratorFunctionMap(const char* name, void* param)
: m_name(name)
, m_param(param)
{
}
virtual ~GSCodeGeneratorFunctionMap()
{
for_each(m_cgmap.begin(), m_cgmap.end(), delete_second());
}
VALUE GetDefaultFunction(KEY key)
{
CG* cg = NULL;
VALUE ret = NULL;
hash_map<uint64, CG*>::iterator i = m_cgmap.find(key);
hash_map<uint64, VALUE>::iterator i = m_cgmap.find(key);
if(i != m_cgmap.end())
{
cg = i->second;
ret = i->second;
}
else
{
void* ptr = m_cb.GetBuffer(MAX_SIZE);
cg = Create(key, ptr, MAX_SIZE);
ASSERT(cg);
CG* cg = new CG(m_param, key, m_cb.GetBuffer(MAX_SIZE), MAX_SIZE);
ASSERT(cg->getSize() < MAX_SIZE);
m_cb.ReleaseBuffer(cg->getSize());
m_cgmap[key] = cg;
ret = (VALUE)cg->getCode();
m_cgmap[key] = ret;
// vtune method registration
@ -216,8 +224,10 @@ public:
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
}
delete cg;
}
return (VALUE)cg->getCode();
return ret;
}
};

View File

@ -28,6 +28,8 @@
// to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer,
// but that's still worlds better than 2-6 spinning threads like before.
// NOTE: spinning: 100-500 ticks, waiting: 1000-5000 ticks
//
#define UseSpinningFinish
@ -71,12 +73,7 @@ __forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
void GSRasterizer::Draw(const GSRasterizerData* data)
{
m_dsf.ssl = NULL;
m_dsf.ssle = NULL;
m_dsf.ssp = NULL;
m_dsf.sr = NULL;
m_ds->BeginDraw(data, &m_dsf);
m_ds->BeginDraw(data);
const GSVector4i scissor = data->scissor;
const GSVertexSW* vertices = data->vertices;
@ -131,9 +128,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
{
if(IsOneOfMyScanlines(p.y))
{
m_dsf.ssp(v, *v);
m_ds->SetupPrim(v, *v);
m_dsf.ssl(p.x + 1, p.x, p.y, *v);
m_ds->DrawScanline(p.x + 1, p.x, p.y, *v);
m_stats.pixels++;
}
@ -146,7 +143,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
GSVector4 dp = dv.p.abs();
if(m_dsf.ssle)
if(m_ds->IsEdge())
{
int i = (dp < dp.yxwz()).mask() & 1; // |x| <= |y|
@ -156,7 +153,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
dscan.t = GSVector4::zero();
dscan.c = GSVector4::zero();
m_dsf.ssp(v, dscan);
m_ds->SetupPrim(v, dscan);
DrawEdge(v[0], v[1], dv, scissor, i, 0);
DrawEdge(v[0], v[1], dv, scissor, i, 1);
@ -190,7 +187,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW dscan = dv / dv.p.xxxx();
m_dsf.ssp(v, dscan);
m_ds->SetupPrim(v, dscan);
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
@ -250,9 +247,9 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
i = (aabb == bccb).mask() & 7;
if(m_dsf.ssle)
if(m_ds->IsEdge())
{
DrawTriangleEdge(v, scissor);
DrawEdge(v, scissor);
}
switch(i)
@ -273,7 +270,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
}
}
void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor)
void GSRasterizer::DrawEdge(const GSVertexSW* v, const GSVector4i& scissor)
{
GSVertexSW dv[3];
@ -297,7 +294,7 @@ void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& sciss
dscan.t = GSVector4::zero();
dscan.c = GSVector4::zero();
m_dsf.ssp(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
m_ds->SetupPrim(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1);
DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2);
@ -351,7 +348,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
l += dl * dy;
m_dsf.ssp(v, dscan);
m_ds->SetupPrim(v, dscan);
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
}
@ -403,7 +400,7 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
l += dl * dy;
m_dsf.ssp(v, dscan);
m_ds->SetupPrim(v, dscan);
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
}
@ -425,7 +422,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
m_dsf.ssp(v, dscan);
m_ds->SetupPrim(v, dscan);
GSVector4 fscissor(scissor);
@ -511,7 +508,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
m_dsf.ssl(right, left, top, scan);
m_ds->DrawScanline(right, left, top, scan);
}
}
}
@ -552,7 +549,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
m_dsf.ssl(right, left, top, scan);
m_ds->DrawScanline(right, left, top, scan);
}
}
}
@ -585,11 +582,11 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
GSVertexSW scan = v[0];
if(m_dsf.sr)
if(m_ds->IsRect())
{
if(m_id == 0)
{
(m_ds->*m_dsf.sr)(r, scan);
m_ds->DrawRect(r, scan);
m_stats.pixels += r.width() * r.height();
}
@ -615,13 +612,13 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
if(scan.p.y < (float)r.top) scan.t += dedge.t * ((float)r.top - scan.p.y);
if(scan.p.x < (float)r.left) scan.t += dscan.t * ((float)r.left - scan.p.x);
m_dsf.ssp(v, dscan);
m_ds->SetupPrim(v, dscan);
for(; r.top < r.bottom; r.top++, scan.t += dedge.t)
{
if(IsOneOfMyScanlines(r.top))
{
m_dsf.ssl(r.right, r.left, r.top, scan);
m_ds->DrawScanline(r.right, r.left, r.top, scan);
m_stats.pixels += r.width();
}
@ -700,7 +697,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
edge.t.u32[3] = (0x10000 - xf) & 0xffff;
m_dsf.ssle(xi + 1, xi, top, edge);
m_ds->DrawEdge(xi + 1, xi, top, edge);
edge.t.u32[3] = 0;
}
@ -728,7 +725,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
edge.t.u32[3] = xf;
m_dsf.ssle(xi + 1, xi, top, edge);
m_ds->DrawEdge(xi + 1, xi, top, edge);
edge.t.u32[3] = 0;
}
@ -798,7 +795,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
edge.t.u32[3] = (0x10000 - yf) & 0xffff;
m_dsf.ssle(left + 1, left, yi, edge);
m_ds->DrawEdge(left + 1, left, yi, edge);
edge.t.u32[3] = 0;
}
@ -826,7 +823,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
edge.t.u32[3] = yf;
m_dsf.ssle(left + 1, left, yi, edge);
m_ds->DrawEdge(left + 1, left, yi, edge);
edge.t.u32[3] = 0;
}
@ -888,7 +885,7 @@ void GSRasterizerMT::ThreadProc()
return;
case WAIT_OBJECT_0 + 1: // draw
__super::Draw(m_data);
#ifdef UseSpinningFinish
@ -930,21 +927,8 @@ void GSRasterizerList::FreeRasterizers()
m_ready.clear();
}
void GSRasterizerList::Draw(const GSRasterizerData* data)
void GSRasterizerList::Sync()
{
m_stats.Reset();
int64 start = __rdtsc();
m_sync = m_syncstart;
for(size_t i = 1; i < size(); i++)
{
(*this)[i]->Draw(data);
}
(*this)[0]->Draw(data);
#ifdef UseSpinningFinish
while(m_sync) _mm_pause();
@ -955,7 +939,7 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
#endif
m_stats.ticks = __rdtsc() - start;
m_stats.ticks = __rdtsc() - m_start;
for(size_t i = 0; i < size(); i++)
{
@ -968,6 +952,22 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
}
}
void GSRasterizerList::Draw(const GSRasterizerData* data)
{
m_stats.Reset();
m_start = __rdtsc();
m_sync = m_syncstart;
for(size_t i = 1; i < size(); i++)
{
(*this)[i]->Draw(data);
}
(*this)[0]->Draw(data);
}
void GSRasterizerList::GetStats(GSRasterizerStats& stats)
{
stats = m_stats;

View File

@ -36,6 +36,36 @@ public:
int count;
const void* param;
};
class IDrawScanline : public GSAlignedClass<32>
{
public:
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (__fastcall *DrawScanlinePtr)(int right, int left, int top, const GSVertexSW& scan);
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
protected:
SetupPrimPtr m_sp;
DrawScanlinePtr m_ds;
DrawScanlinePtr m_de;
DrawRectPtr m_dr;
public:
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
virtual ~IDrawScanline() {}
virtual void BeginDraw(const GSRasterizerData* data) = 0;
virtual void EndDraw(const GSRasterizerStats& stats) = 0;
virtual void PrintStats() = 0;
__forceinline void SetupPrim(const GSVertexSW* v, const GSVertexSW& dscan) {m_sp(v, dscan);}
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);}
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
__forceinline bool IsEdge() const {return m_de != NULL;}
__forceinline bool IsRect() const {return m_dr != NULL;}
};
class IRasterizer
{
@ -47,33 +77,10 @@ public:
virtual void PrintStats() = 0;
};
class IDrawScanline : public GSAlignedClass<32>
{
public:
typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v);
typedef void (__fastcall *SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
struct Functions
{
DrawScanlineStaticPtr ssl;
DrawScanlineStaticPtr ssle;
SetupPrimStaticPtr ssp;
DrawSolidRectPtr sr; // TODO
};
virtual ~IDrawScanline() {}
virtual void BeginDraw(const GSRasterizerData* data, Functions* dsf) = 0;
virtual void EndDraw(const GSRasterizerStats& stats) = 0;
virtual void PrintStats() = 0;
};
class GSRasterizer : public IRasterizer
{
protected:
IDrawScanline* m_ds;
IDrawScanline::Functions m_dsf;
int m_id;
int m_threads;
GSRasterizerStats m_stats;
@ -81,7 +88,7 @@ protected:
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor);
void DrawEdge(const GSVertexSW* v, const GSVector4i& scissor);
void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor);
void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor);
@ -133,6 +140,8 @@ protected:
volatile long m_sync;
long m_syncstart;
GSRasterizerStats m_stats;
int64 m_start;
void FreeRasterizers();
public:
@ -145,10 +154,10 @@ public:
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
m_syncstart = 0;
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
for(int i = 1; i < threads; i++)
{
HANDLE ready = CreateEvent(NULL, FALSE, FALSE, NULL);
@ -161,6 +170,8 @@ public:
}
}
void Sync();
// IRasterizer
void Draw(const GSRasterizerData* data);

View File

@ -180,13 +180,6 @@ void GSRendererSW::Draw()
m_rl.Draw(&data);
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
if(p.fm != 0xffffffff)
@ -199,6 +192,18 @@ void GSRendererSW::Draw()
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
}
// By only syncing here we can do the two InvalidateVideoMem calls free if the other threads finish
// their drawings later than this one (they usually do because they start on an event).
m_rl.Sync();
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
@ -373,7 +378,6 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
p.tex = t->m_buff;
p.clut = m_mem.m_clut;
// p.tw = t->m_tw;
p.sel.tw = t->m_tw - 3;
}

View File

@ -108,7 +108,6 @@ __aligned32 struct GSScanlineParam
void* vm;
const void* tex;
const uint32* clut;
//uint32 tw;
GSOffset* fbo;
GSOffset* zbo;
@ -122,7 +121,6 @@ __aligned32 struct GSScanlineEnvironment
void* vm;
const void* tex;
const uint32* clut;
//uint32 tw;
int* fbr;
int* zbr;

View File

@ -24,9 +24,11 @@
#include "StdAfx.h"
#include "GSSetupPrimCodeGenerator.h"
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
, m_env(env)
using namespace Xbyak;
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
, m_env(*(GSScanlineEnvironment*)param)
{
m_sel.key = key;
@ -79,7 +81,7 @@ void GSSetupPrimCodeGenerator::Depth()
{
if(!m_sel.sprite)
{
// GSVector4 t = dscan.p;
// GSVector4 p = dscan.p;
vmovaps(xmm0, ptr[edx + 16]);
@ -184,7 +186,7 @@ void GSSetupPrimCodeGenerator::Depth()
{
if(!m_sel.sprite)
{
// GSVector4 t = dscan.p;
// GSVector4 p = dscan.p;
movaps(xmm0, ptr[edx + 16]);

View File

@ -22,19 +22,14 @@
#pragma once
#include "GSScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#include "GSFunctionMap.h"
using namespace Xbyak;
class GSSetupPrimCodeGenerator : public CodeGenerator
class GSSetupPrimCodeGenerator : public GSCodeGenerator
{
void operator = (const GSSetupPrimCodeGenerator&);
static const GSVector4 m_shift[5];
util::Cpu m_cpu;
GSScanlineEnvironment& m_env;
GSScanlineSelector m_sel;
@ -47,5 +42,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
void Color();
public:
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize);
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
};

View File

@ -61,15 +61,15 @@ GSVector4i GSVector4i::fit(int arx, int ary) const
return r;
}
static const int s_ar[][2] = {{0, 0}, {4, 3}, {16, 9}};
GSVector4i GSVector4i::fit(int preset) const
{
GSVector4i r;
static const int ar[][2] = {{0, 0}, {4, 3}, {16, 9}};
if(preset > 0 && preset < countof(ar))
if(preset > 0 && preset < countof(s_ar))
{
r = fit(ar[preset][0], ar[preset][1]);
r = fit(s_ar[preset][0], s_ar[preset][1]);
}
else
{

View File

@ -26,8 +26,14 @@
#include "GSUtil.h"
#include "GSState.h"
static const float s_fmin = -FLT_MAX;
static const float s_fmax = FLT_MAX;
GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state)
, m_map_sw("VertexTraceSW", NULL)
, m_map_hw9("VertexTraceHW9", NULL)
, m_map_hw11("VertexTraceHW11", NULL)
{
}
@ -120,8 +126,8 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
using namespace Xbyak;
GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
: CodeGenerator(maxsize, code)
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
#if _M_AMD64
#error TODO
@ -158,16 +164,13 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
if(m_cpu.has(util::Cpu::tAVX))
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
vbroadcastss(xmm4, ptr[&fmax]);
vbroadcastss(xmm5, ptr[&fmin]);
vbroadcastss(xmm4, ptr[&s_fmax]);
vbroadcastss(xmm5, ptr[&s_fmin]);
if(color)
{
@ -282,8 +285,8 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&fmax]);
movss(xmm5, ptr[&fmin]);
movss(xmm4, ptr[&s_fmax]);
movss(xmm5, ptr[&s_fmin]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
@ -400,8 +403,8 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
ret();
}
GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
: CodeGenerator(maxsize, code)
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
#if _M_AMD64
#error TODO
@ -440,16 +443,13 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
if(m_cpu.has(util::Cpu::tAVX))
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
vbroadcastss(xmm4, ptr[&fmax]);
vbroadcastss(xmm5, ptr[&fmin]);
vbroadcastss(xmm4, ptr[&s_fmax]);
vbroadcastss(xmm5, ptr[&s_fmin]);
if(color)
{
@ -593,8 +593,8 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&fmax]);
movss(xmm5, ptr[&fmin]);
movss(xmm4, ptr[&s_fmax]);
movss(xmm5, ptr[&s_fmin]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
@ -741,8 +741,8 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
ret();
}
GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
: CodeGenerator(maxsize, code)
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
#if _M_AMD64
#error TODO
@ -779,16 +779,13 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
if(m_cpu.has(util::Cpu::tAVX))
{
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
vbroadcastss(xmm4, ptr[&fmax]);
vbroadcastss(xmm5, ptr[&fmin]);
vbroadcastss(xmm4, ptr[&s_fmax]);
vbroadcastss(xmm5, ptr[&s_fmin]);
if(color)
{
@ -931,8 +928,8 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&fmax]);
movss(xmm5, ptr[&fmin]);
movss(xmm4, ptr[&s_fmax]);
movss(xmm5, ptr[&s_fmin]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));

View File

@ -26,8 +26,6 @@
#include "GSVertexSW.h"
#include "GSVertexHW.h"
#include "GSFunctionMap.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
class GSState;
@ -38,54 +36,27 @@ __aligned32 class GSVertexTrace
typedef void (*VertexTracePtr)(const void* v, int count, Vertex& min, Vertex& max);
class CGSW : public Xbyak::CodeGenerator
{
Xbyak::util::Cpu m_cpu;
public:
CGSW(uint32 key, void* code, size_t maxsize);
};
class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr>
class CGSW : public GSCodeGenerator
{
public:
GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {}
CGSW* Create(uint32 key, void* code, size_t maxsize) {return new CGSW(key, code, maxsize);}
CGSW(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW9 : public Xbyak::CodeGenerator
{
Xbyak::util::Cpu m_cpu;
public:
CGHW9(uint32 key, void* ptr, size_t maxsize);
};
class GSVertexTraceMapHW9 : public GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr>
class CGHW9 : public GSCodeGenerator
{
public:
GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {}
CGHW9* Create(uint32 key, void* code, size_t maxsize) {return new CGHW9(key, code, maxsize);}
CGHW9(const void* param, uint32 key, void* code, size_t maxsize);
};
class CGHW11 : public Xbyak::CodeGenerator
{
Xbyak::util::Cpu m_cpu;
public:
CGHW11(uint32 key, void* ptr, size_t maxsize);
};
class GSVertexTraceMapHW11 : public GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr>
class CGHW11 : public GSCodeGenerator
{
public:
GSVertexTraceMapHW11() : GSCodeGeneratorFunctionMap("VertexTraceHW11") {}
CGHW11* Create(uint32 key, void* code, size_t maxsize) {return new CGHW11(key, code, maxsize);}
CGHW11(const void* param, uint32 key, void* code, size_t maxsize);
};
GSVertexTraceMapSW m_map_sw;
GSVertexTraceMapHW9 m_map_hw9;
GSVertexTraceMapHW11 m_map_hw11;
GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr> m_map_sw;
GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr> m_map_hw9;
GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr> m_map_hw11;
uint32 Hash(GS_PRIM_CLASS primclass);

View File

@ -720,7 +720,9 @@
<ClCompile Include="GSDeviceNull.cpp" />
<ClCompile Include="GSDialog.cpp" />
<ClCompile Include="GSDirtyRect.cpp" />
<ClCompile Include="GSDrawScanline.cpp" />
<ClCompile Include="GSDrawScanline.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
</ClCompile>
<ClCompile Include="GSDrawScanlineCodeGenerator.cpp" />
<ClCompile Include="GSDump.cpp" />
<ClCompile Include="GSdx.cpp">

View File

@ -1079,6 +1079,8 @@ public:
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
const AddressFrame ptr, byte, word, dword, qword;
const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
const Xmm* xmTbl[16];
const Ymm* ymTbl[16];
#ifdef XBYAK64
const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
@ -1444,18 +1446,6 @@ public:
const Operand& cvtReg(const Operand& op, bool cvt, Operand::Kind kind) const
{
if (!cvt) return op;
static const Xmm* xmTbl[] = {
&xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7,
#ifdef XBYAK64
&xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15
#endif
};
static const Ymm* ymTbl[] = {
&ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7,
#ifdef XBYAK64
&ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15
#endif
};
return (kind == Operand::XMM) ? *xmTbl[op.getIdx()] : *ymTbl[op.getIdx()];
}
// support (x, x/m, imm), (y, y/m, imm)
@ -1490,6 +1480,17 @@ public:
, rip()
#endif
{
xmTbl[0] = &xm0; xmTbl[1] = &xm1; xmTbl[2] = &xm2; xmTbl[3] = &xm3;
xmTbl[4] = &xm4; xmTbl[5] = &xm5; xmTbl[6] = &xm6; xmTbl[7] = &xm7;
ymTbl[0] = &ym0; ymTbl[1] = &ym1; ymTbl[2] = &ym2; ymTbl[3] = &ym3;
ymTbl[4] = &ym4; ymTbl[5] = &ym5; ymTbl[6] = &ym6; ymTbl[7] = &ym7;
#ifdef XBYAK64
xmTbl[8] = &xm8; xmTbl[9] = &xm9; xmTbl[10] = &xm10; xmTbl[11] = &xm11;
xmTbl[12] = &xm12; xmTbl[13] = &xm13; xmTbl[14] = &xm14; xmTbl[15] = &xm15;
ymTbl[8] = &ym8; ymTbl[9] = &ym9; ymTbl[10] = &ym10; ymTbl[11] = &ym11;
ymTbl[12] = &ym12; ymTbl[13] = &ym13; ymTbl[14] = &ym14; ymTbl[15] = &ym15;
#endif
label_.set(this);
}
bool hasUndefinedLabel() const { return label_.hasUndefinedLabel(); }

View File

@ -1008,11 +1008,11 @@ void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()
void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); }
void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); }
void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); }
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
#ifdef XBYAK64
void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); }
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); }
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); }
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 1); db(imm); }
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }