mirror of https://github.com/PCSX2/pcsx2.git
Local static initializers are evil, avoid them like plague.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4304 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
4dac657c9b
commit
65fc196688
|
@ -25,8 +25,8 @@
|
|||
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
|
||||
: m_state(state)
|
||||
, m_id(id)
|
||||
, m_sp(m_env)
|
||||
, m_ds(m_env)
|
||||
, m_sp_map("GPUSetupPrim", &m_env)
|
||||
, m_ds_map("GPUDrawScanline", &m_env)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ GPUDrawScanline::~GPUDrawScanline()
|
|||
{
|
||||
}
|
||||
|
||||
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
||||
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||
{
|
||||
GPUDrawingEnvironment& env = m_state->m_env;
|
||||
|
||||
|
@ -69,9 +69,11 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
//
|
||||
|
||||
f->ssl = m_ds[m_env.sel];
|
||||
m_ds = m_ds_map[m_env.sel];
|
||||
|
||||
f->sr = NULL; // TODO
|
||||
m_de = NULL;
|
||||
|
||||
m_dr = NULL; // TODO
|
||||
|
||||
// doesn't need all bits => less functions generated
|
||||
|
||||
|
@ -84,36 +86,10 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
sel.twin = m_env.sel.twin;
|
||||
sel.sprite = m_env.sel.sprite;
|
||||
|
||||
f->ssp = m_sp[sel];
|
||||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
||||
void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats)
|
||||
{
|
||||
m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap(GPUScanlineEnvironment& env)
|
||||
: GSCodeGeneratorFunctionMap("GPUSetupPrim")
|
||||
, m_env(env)
|
||||
{
|
||||
}
|
||||
|
||||
GPUSetupPrimCodeGenerator* GPUDrawScanline::GPUSetupPrimMap::Create(uint32 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GPUSetupPrimCodeGenerator(m_env, ptr, maxsize);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap(GPUScanlineEnvironment& env)
|
||||
: GSCodeGeneratorFunctionMap("GPUDrawScanline")
|
||||
, m_env(env)
|
||||
{
|
||||
}
|
||||
|
||||
GPUDrawScanlineCodeGenerator* GPUDrawScanline::GPUDrawScanlineMap::Create(uint32 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GPUDrawScanlineCodeGenerator(m_env, ptr, maxsize);
|
||||
m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
}
|
||||
|
|
|
@ -33,25 +33,8 @@ class GPUDrawScanline : public IDrawScanline
|
|||
|
||||
//
|
||||
|
||||
class GPUSetupPrimMap : public GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimStaticPtr>
|
||||
{
|
||||
GPUScanlineEnvironment& m_env;
|
||||
|
||||
public:
|
||||
GPUSetupPrimMap(GPUScanlineEnvironment& env);
|
||||
GPUSetupPrimCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize);
|
||||
} m_sp;
|
||||
|
||||
//
|
||||
|
||||
class GPUDrawScanlineMap : public GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlineStaticPtr>
|
||||
{
|
||||
GPUScanlineEnvironment& m_env;
|
||||
|
||||
public:
|
||||
GPUDrawScanlineMap(GPUScanlineEnvironment& env);
|
||||
GPUDrawScanlineCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize);
|
||||
} m_ds;
|
||||
GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimPtr> m_sp_map;
|
||||
GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlinePtr> m_ds_map;
|
||||
|
||||
protected:
|
||||
GPUState* m_state;
|
||||
|
@ -63,7 +46,7 @@ public:
|
|||
|
||||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const GSRasterizerData* data, Functions* f);
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(const GSRasterizerStats& stats);
|
||||
void PrintStats() {m_ds.PrintStats();}
|
||||
void PrintStats() {m_ds_map.PrintStats();}
|
||||
};
|
||||
|
|
|
@ -24,9 +24,9 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GPUDrawScanlineCodeGenerator.h"
|
||||
|
||||
GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_env(*(GPUScanlineEnvironment*)param)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
|
|
@ -22,20 +22,17 @@
|
|||
#pragma once
|
||||
|
||||
#include "GPUScanlineEnvironment.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#include "GSFunctionMap.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class GPUDrawScanlineCodeGenerator : public CodeGenerator
|
||||
class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
|
||||
{
|
||||
void operator = (const GPUDrawScanlineCodeGenerator&);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static const uint16 m_dither[4][16];
|
||||
|
||||
util::Cpu m_cpu;
|
||||
|
||||
GPUScanlineEnvironment& m_env;
|
||||
|
||||
void Generate();
|
||||
|
@ -58,5 +55,5 @@ class GPUDrawScanlineCodeGenerator : public CodeGenerator
|
|||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||
|
||||
public:
|
||||
GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
|
||||
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
|
@ -25,9 +25,11 @@
|
|||
#include "GSVertexSW.h"
|
||||
#include "GPUSetupPrimCodeGenerator.h"
|
||||
|
||||
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
using namespace Xbyak;
|
||||
|
||||
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_env(*(GPUScanlineEnvironment*)param)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
|
|
@ -22,23 +22,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "GPUScanlineEnvironment.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#include "GSFunctionMap.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class GPUSetupPrimCodeGenerator : public CodeGenerator
|
||||
class GPUSetupPrimCodeGenerator : public GSCodeGenerator
|
||||
{
|
||||
void operator = (const GPUSetupPrimCodeGenerator&);
|
||||
|
||||
static const GSVector4 m_shift[3];
|
||||
|
||||
util::Cpu m_cpu;
|
||||
|
||||
GPUScanlineEnvironment& m_env;
|
||||
|
||||
void Generate();
|
||||
|
||||
public:
|
||||
GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize);
|
||||
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
|
@ -525,6 +525,73 @@ EXPORT_C GSsetFrameLimit(int limit)
|
|||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
#include <io.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
class Console
|
||||
{
|
||||
HANDLE m_console;
|
||||
string m_title;
|
||||
|
||||
public:
|
||||
Console::Console(LPCSTR title, bool open)
|
||||
: m_console(NULL)
|
||||
, m_title(title)
|
||||
{
|
||||
if(open) Open();
|
||||
}
|
||||
|
||||
Console::~Console()
|
||||
{
|
||||
Close();
|
||||
}
|
||||
|
||||
void Console::Open()
|
||||
{
|
||||
if(m_console == NULL)
|
||||
{
|
||||
CONSOLE_SCREEN_BUFFER_INFO csbiInfo;
|
||||
|
||||
AllocConsole();
|
||||
|
||||
SetConsoleTitle(m_title.c_str());
|
||||
|
||||
m_console = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
|
||||
COORD size;
|
||||
|
||||
size.X = 100;
|
||||
size.Y = 300;
|
||||
|
||||
SetConsoleScreenBufferSize(m_console, size);
|
||||
|
||||
GetConsoleScreenBufferInfo(m_console, &csbiInfo);
|
||||
|
||||
SMALL_RECT rect;
|
||||
|
||||
rect = csbiInfo.srWindow;
|
||||
rect.Right = rect.Left + 99;
|
||||
rect.Bottom = rect.Top + 64;
|
||||
|
||||
SetConsoleWindowInfo(m_console, TRUE, &rect);
|
||||
|
||||
*stdout = *_fdopen(_open_osfhandle((long)m_console, _O_TEXT), "w");
|
||||
|
||||
setvbuf(stdout, NULL, _IONBF, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Console::Close()
|
||||
{
|
||||
if(m_console != NULL)
|
||||
{
|
||||
FreeConsole();
|
||||
|
||||
m_console = NULL;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// lpszCmdLine:
|
||||
// First parameter is the renderer.
|
||||
// Second parameter is the gs file to load and run.
|
||||
|
@ -547,6 +614,8 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow)
|
|||
|
||||
if(FILE* fp = fopen(lpszCmdLine, "rb"))
|
||||
{
|
||||
Console console("GSdx", true);
|
||||
|
||||
GSinit();
|
||||
|
||||
uint8 regs[0x2000];
|
||||
|
|
|
@ -1075,23 +1075,10 @@ public:
|
|||
|
||||
GSVector4i::sw64(v0, v2, v1, v3);
|
||||
|
||||
#ifdef _M_AMD64
|
||||
|
||||
((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, mask);
|
||||
((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, mask);
|
||||
((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, mask);
|
||||
((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask);
|
||||
|
||||
#else
|
||||
|
||||
// here blend is faster than blend8 because vc8 has a little problem optimizing register usage for pblendvb (3rd op must be xmm0)
|
||||
|
||||
((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend(v0, mask);
|
||||
((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend(v1, mask);
|
||||
((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, mask);
|
||||
((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, mask);
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
GSDrawScanline::GSDrawScanline(GSState* state, int id)
|
||||
: m_state(state)
|
||||
, m_id(id)
|
||||
, m_sp(m_env)
|
||||
, m_ds(m_env)
|
||||
, m_sp_map("GSSetupPrim", &m_env)
|
||||
, m_ds_map("GSDrawScanline", &m_env)
|
||||
{
|
||||
memset(&m_env, 0, sizeof(m_env));
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ GSDrawScanline::~GSDrawScanline()
|
|||
{
|
||||
}
|
||||
|
||||
void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
||||
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||
{
|
||||
GSDrawingEnvironment& env = m_state->m_env;
|
||||
GSDrawingContext* context = m_state->m_context;
|
||||
|
@ -98,7 +98,6 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
{
|
||||
m_env.tex = p->tex;
|
||||
m_env.clut = p->clut;
|
||||
// m_env.tw = p->tw;
|
||||
|
||||
unsigned short tw = (unsigned short)(1 << context->TEX0.TW);
|
||||
unsigned short th = (unsigned short)(1 << context->TEX0.TH);
|
||||
|
@ -163,7 +162,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
//
|
||||
|
||||
f->ssl = m_ds[m_sel];
|
||||
m_ds = m_ds_map[m_sel];
|
||||
|
||||
if(m_sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
|
||||
{
|
||||
|
@ -173,12 +172,20 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
sel.zwrite = 0;
|
||||
sel.edge = 1;
|
||||
|
||||
f->ssle = m_ds[sel];
|
||||
m_de = m_ds_map[sel];
|
||||
}
|
||||
else
|
||||
{
|
||||
m_de = NULL;
|
||||
}
|
||||
|
||||
if(m_sel.IsSolidRect())
|
||||
{
|
||||
f->sr = (DrawSolidRectPtr)&GSDrawScanline::DrawSolidRect;
|
||||
m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_dr = NULL;
|
||||
}
|
||||
|
||||
// doesn't need all bits => less functions generated
|
||||
|
@ -197,15 +204,15 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
sel.zb = m_sel.zb;
|
||||
sel.zoverflow = m_sel.zoverflow;
|
||||
|
||||
f->ssp = m_sp[sel];
|
||||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
||||
void GSDrawScanline::EndDraw(const GSRasterizerStats& stats)
|
||||
{
|
||||
m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
}
|
||||
|
||||
void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
||||
void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||
{
|
||||
ASSERT(r.y >= 0);
|
||||
ASSERT(r.w >= 0);
|
||||
|
@ -224,22 +231,22 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
|||
{
|
||||
if(m == 0)
|
||||
{
|
||||
DrawSolidRectT<uint32, false>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint32, false>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawSolidRectT<uint32, true>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint32, true>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m == 0)
|
||||
{
|
||||
DrawSolidRectT<uint16, false>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint16, false>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawSolidRectT<uint16, true>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
DrawRectT<uint16, true>(m_env.zbr, m_env.zbc, r, z, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -259,11 +266,11 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
|||
{
|
||||
if(m == 0)
|
||||
{
|
||||
DrawSolidRectT<uint32, false>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint32, false>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawSolidRectT<uint32, true>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint32, true>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -272,18 +279,18 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
|||
|
||||
if(m == 0)
|
||||
{
|
||||
DrawSolidRectT<uint16, false>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint16, false>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawSolidRectT<uint16, true>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
DrawRectT<uint16, true>(m_env.fbr, m_env.fbc, r, c, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, bool masked>
|
||||
void GSDrawScanline::DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
|
||||
void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m)
|
||||
{
|
||||
if(m == 0xffffffff) return;
|
||||
|
||||
|
@ -358,29 +365,3 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env)
|
||||
: GSCodeGeneratorFunctionMap("GSSetupPrim")
|
||||
, m_env(env)
|
||||
{
|
||||
}
|
||||
|
||||
GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(uint64 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GSSetupPrimCodeGenerator(m_env, key, ptr, maxsize);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env)
|
||||
: GSCodeGeneratorFunctionMap("GSDrawScanline")
|
||||
, m_env(env)
|
||||
{
|
||||
}
|
||||
|
||||
GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(uint64 key, void* ptr, size_t maxsize)
|
||||
{
|
||||
return new GSDrawScanlineCodeGenerator(m_env, key, ptr, maxsize);
|
||||
}
|
||||
|
|
|
@ -32,34 +32,13 @@ class GSDrawScanline : public IDrawScanline
|
|||
GSScanlineEnvironment m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
|
||||
//
|
||||
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
|
||||
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
|
||||
|
||||
class GSSetupPrimMap : public GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimStaticPtr>
|
||||
{
|
||||
GSScanlineEnvironment& m_env;
|
||||
|
||||
public:
|
||||
GSSetupPrimMap(GSScanlineEnvironment& env);
|
||||
GSSetupPrimCodeGenerator* Create(uint64 key, void* ptr, size_t maxsize);
|
||||
} m_sp;
|
||||
|
||||
//
|
||||
|
||||
class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlineStaticPtr>
|
||||
{
|
||||
GSScanlineEnvironment& m_env;
|
||||
|
||||
public:
|
||||
GSDrawScanlineMap(GSScanlineEnvironment& env);
|
||||
GSDrawScanlineCodeGenerator* Create(uint64 key, void* ptr, size_t maxsize);
|
||||
} m_ds;
|
||||
|
||||
//
|
||||
|
||||
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
template<class T, bool masked>
|
||||
void DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
|
||||
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
|
||||
|
||||
template<class T, bool masked>
|
||||
__forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
|
||||
|
@ -77,7 +56,7 @@ public:
|
|||
|
||||
// IDrawScanline
|
||||
|
||||
void BeginDraw(const GSRasterizerData* data, Functions* f);
|
||||
void BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(const GSRasterizerStats& stats);
|
||||
void PrintStats() {m_ds.PrintStats();}
|
||||
void PrintStats() {m_ds_map.PrintStats();}
|
||||
};
|
||||
|
|
|
@ -25,9 +25,9 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_env(*(GSScanlineEnvironment*)param)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -385,8 +385,7 @@ void GSDrawScanlineCodeGenerator::Init(int params)
|
|||
{
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
vmovdqa(xmm4, xmm3);
|
||||
vpshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshuflw(xmm4, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm4, 1);
|
||||
vmovdqa(ptr[&m_env.temp.vf], xmm4);
|
||||
|
@ -1136,6 +1135,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
mov(eax, 0x8000);
|
||||
vmovd(xmm4, eax);
|
||||
vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
vpsubd(xmm2, xmm4);
|
||||
vpsubd(xmm3, xmm4);
|
||||
}
|
||||
|
@ -1199,18 +1199,16 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// GSVector4i y0 = uv0.uph16() << tw;
|
||||
|
||||
vpxor(xmm0, xmm0);
|
||||
//vmovd(xmm1, ptr[&m_env.tw]);
|
||||
|
||||
vpunpcklwd(xmm4, xmm2, xmm0);
|
||||
vpunpckhwd(xmm2, xmm2, xmm0);
|
||||
vpslld(xmm2, m_sel.tw + 3); // xmm1);
|
||||
vpslld(xmm2, m_sel.tw + 3);
|
||||
|
||||
// xmm0 = 0
|
||||
// xmm1 = free // tw
|
||||
// xmm2 = y0
|
||||
// xmm3 = uv1 (ltf)
|
||||
// xmm4 = x0
|
||||
// xmm5, xmm6 = free
|
||||
// xmm1, xmm5, xmm6 = free
|
||||
// xmm7 = used
|
||||
|
||||
if(m_sel.ltf)
|
||||
|
@ -1220,7 +1218,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
vpunpcklwd(xmm6, xmm3, xmm0);
|
||||
vpunpckhwd(xmm3, xmm3, xmm0);
|
||||
vpslld(xmm3, m_sel.tw + 3); // xmm1);
|
||||
vpslld(xmm3, m_sel.tw + 3);
|
||||
|
||||
// xmm2 = y0
|
||||
// xmm3 = y1
|
||||
|
@ -1392,6 +1390,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
mov(eax, 0x8000);
|
||||
movd(xmm4, eax);
|
||||
pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
psubd(xmm2, xmm4);
|
||||
psubd(xmm3, xmm4);
|
||||
}
|
||||
|
@ -1458,19 +1457,17 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// GSVector4i x0 = uv0.upl16();
|
||||
|
||||
pxor(xmm0, xmm0);
|
||||
// movd(xmm1, ptr[&m_env.tw]);
|
||||
|
||||
movdqa(xmm4, xmm2);
|
||||
punpckhwd(xmm2, xmm0);
|
||||
punpcklwd(xmm4, xmm0);
|
||||
pslld(xmm2, m_sel.tw + 3); // xmm1);
|
||||
pslld(xmm2, m_sel.tw + 3);
|
||||
|
||||
// xmm0 = 0
|
||||
// xmm1 = free // tw
|
||||
// xmm2 = y0
|
||||
// xmm3 = uv1 (ltf)
|
||||
// xmm4 = x0
|
||||
// xmm5, xmm6 = free
|
||||
// xmm1, xmm5, xmm6 = free
|
||||
// xmm7 = used
|
||||
|
||||
if(m_sel.ltf)
|
||||
|
@ -1481,7 +1478,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
movdqa(xmm6, xmm3);
|
||||
punpckhwd(xmm3, xmm0);
|
||||
punpcklwd(xmm6, xmm0);
|
||||
pslld(xmm3, m_sel.tw + 3); // xmm1);
|
||||
pslld(xmm3, m_sel.tw + 3);
|
||||
|
||||
// xmm2 = y0
|
||||
// xmm3 = y1
|
||||
|
@ -3475,7 +3472,7 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
|||
{
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
movq(dst, qword[addr * 2 + (size_t)m_env.vm]);
|
||||
vmovq(dst, qword[addr * 2 + (size_t)m_env.vm]);
|
||||
vmovhps(dst, qword[addr * 2 + (size_t)m_env.vm + 8 * 2]);
|
||||
}
|
||||
else
|
||||
|
@ -3496,7 +3493,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
{
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
movq(qword[addr * 2 + (size_t)m_env.vm], src);
|
||||
vmovq(qword[addr * 2 + (size_t)m_env.vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xf0);
|
||||
|
@ -3548,11 +3545,11 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
}
|
||||
}
|
||||
|
||||
static const int s_offsets[4] = {0, 2, 8, 10};
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm)
|
||||
{
|
||||
static const int offsets[4] = {0, 2, 8, 10};
|
||||
|
||||
Address dst = ptr[addr * 2 + (size_t)m_env.vm + offsets[i] * 2];
|
||||
Address dst = ptr[addr * 2 + (size_t)m_env.vm + s_offsets[i] * 2];
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
|
|
|
@ -22,19 +22,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#include "GSFunctionMap.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class GSDrawScanlineCodeGenerator : public CodeGenerator
|
||||
class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
||||
{
|
||||
void operator = (const GSDrawScanlineCodeGenerator&);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
|
||||
util::Cpu m_cpu;
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
|
||||
|
@ -75,5 +72,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
|
|||
void blend8r(const Xmm& b, const Xmm& a);
|
||||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize);
|
||||
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
|
||||
#include "GS.h"
|
||||
#include "GSCodeBuffer.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
struct GSRasterizerStats
|
||||
{
|
||||
|
@ -128,6 +130,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
printf("GS stats\n");
|
||||
|
||||
for(hash_map<KEY, ActivePtr*>::iterator i = m_map_active.begin(); i != m_map_active.end(); i++)
|
||||
{
|
||||
KEY key = i->first;
|
||||
|
@ -139,7 +143,7 @@ public:
|
|||
int64 tpf = p->frames > 0 ? p->ticks / p->frames : 0;
|
||||
int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
|
||||
|
||||
printf("[%012I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
|
||||
printf("[%016I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
|
||||
(uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ',
|
||||
(float)(tpf * 10000 / 50000000) / 100,
|
||||
(float)(tpf * 10000 / ttpf) / 100,
|
||||
|
@ -150,54 +154,58 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class GSCodeGenerator : public Xbyak::CodeGenerator
|
||||
{
|
||||
protected:
|
||||
Xbyak::util::Cpu m_cpu;
|
||||
|
||||
public:
|
||||
GSCodeGenerator(void* code, size_t maxsize)
|
||||
: Xbyak::CodeGenerator(maxsize, code)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
#include "vtune/JITProfiling.h"
|
||||
|
||||
template<class CG, class KEY, class VALUE>
|
||||
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
|
||||
{
|
||||
string m_name;
|
||||
hash_map<uint64, CG*> m_cgmap;
|
||||
void* m_param;
|
||||
hash_map<uint64, VALUE> m_cgmap;
|
||||
GSCodeBuffer m_cb;
|
||||
|
||||
enum {MAX_SIZE = 4096};
|
||||
|
||||
protected:
|
||||
virtual CG* Create(KEY key, void* ptr, size_t maxsize = MAX_SIZE) = 0;
|
||||
|
||||
public:
|
||||
GSCodeGeneratorFunctionMap(const char* name)
|
||||
GSCodeGeneratorFunctionMap(const char* name, void* param)
|
||||
: m_name(name)
|
||||
, m_param(param)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~GSCodeGeneratorFunctionMap()
|
||||
{
|
||||
for_each(m_cgmap.begin(), m_cgmap.end(), delete_second());
|
||||
}
|
||||
|
||||
VALUE GetDefaultFunction(KEY key)
|
||||
{
|
||||
CG* cg = NULL;
|
||||
VALUE ret = NULL;
|
||||
|
||||
hash_map<uint64, CG*>::iterator i = m_cgmap.find(key);
|
||||
hash_map<uint64, VALUE>::iterator i = m_cgmap.find(key);
|
||||
|
||||
if(i != m_cgmap.end())
|
||||
{
|
||||
cg = i->second;
|
||||
ret = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
void* ptr = m_cb.GetBuffer(MAX_SIZE);
|
||||
|
||||
cg = Create(key, ptr, MAX_SIZE);
|
||||
|
||||
ASSERT(cg);
|
||||
CG* cg = new CG(m_param, key, m_cb.GetBuffer(MAX_SIZE), MAX_SIZE);
|
||||
|
||||
ASSERT(cg->getSize() < MAX_SIZE);
|
||||
|
||||
m_cb.ReleaseBuffer(cg->getSize());
|
||||
|
||||
m_cgmap[key] = cg;
|
||||
ret = (VALUE)cg->getCode();
|
||||
|
||||
m_cgmap[key] = ret;
|
||||
|
||||
// vtune method registration
|
||||
|
||||
|
@ -216,8 +224,10 @@ public:
|
|||
|
||||
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
|
||||
}
|
||||
|
||||
delete cg;
|
||||
}
|
||||
|
||||
return (VALUE)cg->getCode();
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
// to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer,
|
||||
// but that's still worlds better than 2-6 spinning threads like before.
|
||||
|
||||
// NOTE: spinning: 100-500 ticks, waiting: 1000-5000 ticks
|
||||
|
||||
//
|
||||
#define UseSpinningFinish
|
||||
|
||||
|
@ -71,12 +73,7 @@ __forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
|||
|
||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||
{
|
||||
m_dsf.ssl = NULL;
|
||||
m_dsf.ssle = NULL;
|
||||
m_dsf.ssp = NULL;
|
||||
m_dsf.sr = NULL;
|
||||
|
||||
m_ds->BeginDraw(data, &m_dsf);
|
||||
m_ds->BeginDraw(data);
|
||||
|
||||
const GSVector4i scissor = data->scissor;
|
||||
const GSVertexSW* vertices = data->vertices;
|
||||
|
@ -131,9 +128,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
|
|||
{
|
||||
if(IsOneOfMyScanlines(p.y))
|
||||
{
|
||||
m_dsf.ssp(v, *v);
|
||||
m_ds->SetupPrim(v, *v);
|
||||
|
||||
m_dsf.ssl(p.x + 1, p.x, p.y, *v);
|
||||
m_ds->DrawScanline(p.x + 1, p.x, p.y, *v);
|
||||
|
||||
m_stats.pixels++;
|
||||
}
|
||||
|
@ -146,7 +143,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
|||
|
||||
GSVector4 dp = dv.p.abs();
|
||||
|
||||
if(m_dsf.ssle)
|
||||
if(m_ds->IsEdge())
|
||||
{
|
||||
int i = (dp < dp.yxwz()).mask() & 1; // |x| <= |y|
|
||||
|
||||
|
@ -156,7 +153,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
|||
dscan.t = GSVector4::zero();
|
||||
dscan.c = GSVector4::zero();
|
||||
|
||||
m_dsf.ssp(v, dscan);
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
DrawEdge(v[0], v[1], dv, scissor, i, 0);
|
||||
DrawEdge(v[0], v[1], dv, scissor, i, 1);
|
||||
|
@ -190,7 +187,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor)
|
|||
{
|
||||
GSVertexSW dscan = dv / dv.p.xxxx();
|
||||
|
||||
m_dsf.ssp(v, dscan);
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
|
||||
|
||||
|
@ -250,9 +247,9 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
|
|||
|
||||
i = (aabb == bccb).mask() & 7;
|
||||
|
||||
if(m_dsf.ssle)
|
||||
if(m_ds->IsEdge())
|
||||
{
|
||||
DrawTriangleEdge(v, scissor);
|
||||
DrawEdge(v, scissor);
|
||||
}
|
||||
|
||||
switch(i)
|
||||
|
@ -273,7 +270,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc
|
|||
}
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor)
|
||||
void GSRasterizer::DrawEdge(const GSVertexSW* v, const GSVector4i& scissor)
|
||||
{
|
||||
GSVertexSW dv[3];
|
||||
|
||||
|
@ -297,7 +294,7 @@ void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& sciss
|
|||
dscan.t = GSVector4::zero();
|
||||
dscan.c = GSVector4::zero();
|
||||
|
||||
m_dsf.ssp(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
|
||||
m_ds->SetupPrim(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small)
|
||||
|
||||
DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1);
|
||||
DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2);
|
||||
|
@ -351,7 +348,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
|
|||
|
||||
l += dl * dy;
|
||||
|
||||
m_dsf.ssp(v, dscan);
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
|
||||
}
|
||||
|
@ -403,7 +400,7 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor)
|
|||
|
||||
l += dl * dy;
|
||||
|
||||
m_dsf.ssp(v, dscan);
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
DrawTriangleSection(top, bottom, l, dl, dscan, fscissor);
|
||||
}
|
||||
|
@ -425,7 +422,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
|
|||
|
||||
GSVertexSW dscan = longest * longest.p.xxxx().rcp();
|
||||
|
||||
m_dsf.ssp(v, dscan);
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
GSVector4 fscissor(scissor);
|
||||
|
||||
|
@ -511,7 +508,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
|
||||
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
|
||||
|
||||
m_dsf.ssl(right, left, top, scan);
|
||||
m_ds->DrawScanline(right, left, top, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -552,7 +549,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
|||
|
||||
GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx();
|
||||
|
||||
m_dsf.ssl(right, left, top, scan);
|
||||
m_ds->DrawScanline(right, left, top, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -585,11 +582,11 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
|||
|
||||
GSVertexSW scan = v[0];
|
||||
|
||||
if(m_dsf.sr)
|
||||
if(m_ds->IsRect())
|
||||
{
|
||||
if(m_id == 0)
|
||||
{
|
||||
(m_ds->*m_dsf.sr)(r, scan);
|
||||
m_ds->DrawRect(r, scan);
|
||||
|
||||
m_stats.pixels += r.width() * r.height();
|
||||
}
|
||||
|
@ -615,13 +612,13 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
|||
if(scan.p.y < (float)r.top) scan.t += dedge.t * ((float)r.top - scan.p.y);
|
||||
if(scan.p.x < (float)r.left) scan.t += dscan.t * ((float)r.left - scan.p.x);
|
||||
|
||||
m_dsf.ssp(v, dscan);
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
for(; r.top < r.bottom; r.top++, scan.t += dedge.t)
|
||||
{
|
||||
if(IsOneOfMyScanlines(r.top))
|
||||
{
|
||||
m_dsf.ssl(r.right, r.left, r.top, scan);
|
||||
m_ds->DrawScanline(r.right, r.left, r.top, scan);
|
||||
|
||||
m_stats.pixels += r.width();
|
||||
}
|
||||
|
@ -700,7 +697,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
edge.t.u32[3] = (0x10000 - xf) & 0xffff;
|
||||
|
||||
m_dsf.ssle(xi + 1, xi, top, edge);
|
||||
m_ds->DrawEdge(xi + 1, xi, top, edge);
|
||||
|
||||
edge.t.u32[3] = 0;
|
||||
}
|
||||
|
@ -728,7 +725,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
edge.t.u32[3] = xf;
|
||||
|
||||
m_dsf.ssle(xi + 1, xi, top, edge);
|
||||
m_ds->DrawEdge(xi + 1, xi, top, edge);
|
||||
|
||||
edge.t.u32[3] = 0;
|
||||
}
|
||||
|
@ -798,7 +795,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
edge.t.u32[3] = (0x10000 - yf) & 0xffff;
|
||||
|
||||
m_dsf.ssle(left + 1, left, yi, edge);
|
||||
m_ds->DrawEdge(left + 1, left, yi, edge);
|
||||
|
||||
edge.t.u32[3] = 0;
|
||||
}
|
||||
|
@ -826,7 +823,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
|||
|
||||
edge.t.u32[3] = yf;
|
||||
|
||||
m_dsf.ssle(left + 1, left, yi, edge);
|
||||
m_ds->DrawEdge(left + 1, left, yi, edge);
|
||||
|
||||
edge.t.u32[3] = 0;
|
||||
}
|
||||
|
@ -930,21 +927,8 @@ void GSRasterizerList::FreeRasterizers()
|
|||
m_ready.clear();
|
||||
}
|
||||
|
||||
void GSRasterizerList::Draw(const GSRasterizerData* data)
|
||||
void GSRasterizerList::Sync()
|
||||
{
|
||||
m_stats.Reset();
|
||||
|
||||
int64 start = __rdtsc();
|
||||
|
||||
m_sync = m_syncstart;
|
||||
|
||||
for(size_t i = 1; i < size(); i++)
|
||||
{
|
||||
(*this)[i]->Draw(data);
|
||||
}
|
||||
|
||||
(*this)[0]->Draw(data);
|
||||
|
||||
#ifdef UseSpinningFinish
|
||||
|
||||
while(m_sync) _mm_pause();
|
||||
|
@ -955,7 +939,7 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
|
|||
|
||||
#endif
|
||||
|
||||
m_stats.ticks = __rdtsc() - start;
|
||||
m_stats.ticks = __rdtsc() - m_start;
|
||||
|
||||
for(size_t i = 0; i < size(); i++)
|
||||
{
|
||||
|
@ -968,6 +952,22 @@ void GSRasterizerList::Draw(const GSRasterizerData* data)
|
|||
}
|
||||
}
|
||||
|
||||
void GSRasterizerList::Draw(const GSRasterizerData* data)
|
||||
{
|
||||
m_stats.Reset();
|
||||
|
||||
m_start = __rdtsc();
|
||||
|
||||
m_sync = m_syncstart;
|
||||
|
||||
for(size_t i = 1; i < size(); i++)
|
||||
{
|
||||
(*this)[i]->Draw(data);
|
||||
}
|
||||
|
||||
(*this)[0]->Draw(data);
|
||||
}
|
||||
|
||||
void GSRasterizerList::GetStats(GSRasterizerStats& stats)
|
||||
{
|
||||
stats = m_stats;
|
||||
|
|
|
@ -37,6 +37,36 @@ public:
|
|||
const void* param;
|
||||
};
|
||||
|
||||
class IDrawScanline : public GSAlignedClass<32>
|
||||
{
|
||||
public:
|
||||
typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (__fastcall *DrawScanlinePtr)(int right, int left, int top, const GSVertexSW& scan);
|
||||
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
|
||||
|
||||
protected:
|
||||
SetupPrimPtr m_sp;
|
||||
DrawScanlinePtr m_ds;
|
||||
DrawScanlinePtr m_de;
|
||||
DrawRectPtr m_dr;
|
||||
|
||||
public:
|
||||
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
|
||||
virtual ~IDrawScanline() {}
|
||||
|
||||
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
||||
virtual void EndDraw(const GSRasterizerStats& stats) = 0;
|
||||
virtual void PrintStats() = 0;
|
||||
|
||||
__forceinline void SetupPrim(const GSVertexSW* v, const GSVertexSW& dscan) {m_sp(v, dscan);}
|
||||
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);}
|
||||
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);}
|
||||
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
|
||||
|
||||
__forceinline bool IsEdge() const {return m_de != NULL;}
|
||||
__forceinline bool IsRect() const {return m_dr != NULL;}
|
||||
};
|
||||
|
||||
class IRasterizer
|
||||
{
|
||||
public:
|
||||
|
@ -47,33 +77,10 @@ public:
|
|||
virtual void PrintStats() = 0;
|
||||
};
|
||||
|
||||
class IDrawScanline : public GSAlignedClass<32>
|
||||
{
|
||||
public:
|
||||
typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v);
|
||||
typedef void (__fastcall *SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
struct Functions
|
||||
{
|
||||
DrawScanlineStaticPtr ssl;
|
||||
DrawScanlineStaticPtr ssle;
|
||||
SetupPrimStaticPtr ssp;
|
||||
DrawSolidRectPtr sr; // TODO
|
||||
};
|
||||
|
||||
virtual ~IDrawScanline() {}
|
||||
|
||||
virtual void BeginDraw(const GSRasterizerData* data, Functions* dsf) = 0;
|
||||
virtual void EndDraw(const GSRasterizerStats& stats) = 0;
|
||||
virtual void PrintStats() = 0;
|
||||
};
|
||||
|
||||
class GSRasterizer : public IRasterizer
|
||||
{
|
||||
protected:
|
||||
IDrawScanline* m_ds;
|
||||
IDrawScanline::Functions m_dsf;
|
||||
int m_id;
|
||||
int m_threads;
|
||||
GSRasterizerStats m_stats;
|
||||
|
@ -81,7 +88,7 @@ protected:
|
|||
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawEdge(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
|
||||
void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor);
|
||||
|
@ -133,6 +140,8 @@ protected:
|
|||
volatile long m_sync;
|
||||
long m_syncstart;
|
||||
GSRasterizerStats m_stats;
|
||||
int64 m_start;
|
||||
|
||||
void FreeRasterizers();
|
||||
|
||||
public:
|
||||
|
@ -145,10 +154,10 @@ public:
|
|||
|
||||
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
|
||||
|
||||
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
|
||||
|
||||
m_syncstart = 0;
|
||||
|
||||
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
|
||||
|
||||
for(int i = 1; i < threads; i++)
|
||||
{
|
||||
HANDLE ready = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
|
@ -161,6 +170,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void Sync();
|
||||
|
||||
// IRasterizer
|
||||
|
||||
void Draw(const GSRasterizerData* data);
|
||||
|
|
|
@ -180,13 +180,6 @@ void GSRendererSW::Draw()
|
|||
|
||||
m_rl.Draw(&data);
|
||||
|
||||
GSRasterizerStats stats;
|
||||
|
||||
m_rl.GetStats(stats);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
|
||||
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
|
||||
|
||||
if(p.fm != 0xffffffff)
|
||||
|
@ -199,6 +192,18 @@ void GSRendererSW::Draw()
|
|||
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
|
||||
}
|
||||
|
||||
// By only syncing here we can do the two InvalidateVideoMem calls free if the other threads finish
|
||||
// their drawings later than this one (they usually do because they start on an event).
|
||||
|
||||
m_rl.Sync();
|
||||
|
||||
GSRasterizerStats stats;
|
||||
|
||||
m_rl.GetStats(stats);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
@ -373,7 +378,6 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
|
|||
|
||||
p.tex = t->m_buff;
|
||||
p.clut = m_mem.m_clut;
|
||||
// p.tw = t->m_tw;
|
||||
|
||||
p.sel.tw = t->m_tw - 3;
|
||||
}
|
||||
|
|
|
@ -108,7 +108,6 @@ __aligned32 struct GSScanlineParam
|
|||
void* vm;
|
||||
const void* tex;
|
||||
const uint32* clut;
|
||||
//uint32 tw;
|
||||
|
||||
GSOffset* fbo;
|
||||
GSOffset* zbo;
|
||||
|
@ -122,7 +121,6 @@ __aligned32 struct GSScanlineEnvironment
|
|||
void* vm;
|
||||
const void* tex;
|
||||
const uint32* clut;
|
||||
//uint32 tw;
|
||||
|
||||
int* fbr;
|
||||
int* zbr;
|
||||
|
|
|
@ -24,9 +24,11 @@
|
|||
#include "StdAfx.h"
|
||||
#include "GSSetupPrimCodeGenerator.h"
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize)
|
||||
: CodeGenerator(maxsize, ptr)
|
||||
, m_env(env)
|
||||
using namespace Xbyak;
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_env(*(GSScanlineEnvironment*)param)
|
||||
{
|
||||
m_sel.key = key;
|
||||
|
||||
|
@ -79,7 +81,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
{
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
// GSVector4 t = dscan.p;
|
||||
// GSVector4 p = dscan.p;
|
||||
|
||||
vmovaps(xmm0, ptr[edx + 16]);
|
||||
|
||||
|
@ -184,7 +186,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
{
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
// GSVector4 t = dscan.p;
|
||||
// GSVector4 p = dscan.p;
|
||||
|
||||
movaps(xmm0, ptr[edx + 16]);
|
||||
|
||||
|
|
|
@ -22,19 +22,14 @@
|
|||
#pragma once
|
||||
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#include "GSFunctionMap.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class GSSetupPrimCodeGenerator : public CodeGenerator
|
||||
class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
||||
{
|
||||
void operator = (const GSSetupPrimCodeGenerator&);
|
||||
|
||||
static const GSVector4 m_shift[5];
|
||||
|
||||
util::Cpu m_cpu;
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
GSScanlineSelector m_sel;
|
||||
|
||||
|
@ -47,5 +42,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator
|
|||
void Color();
|
||||
|
||||
public:
|
||||
GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize);
|
||||
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
|
|
@ -61,15 +61,15 @@ GSVector4i GSVector4i::fit(int arx, int ary) const
|
|||
return r;
|
||||
}
|
||||
|
||||
static const int s_ar[][2] = {{0, 0}, {4, 3}, {16, 9}};
|
||||
|
||||
GSVector4i GSVector4i::fit(int preset) const
|
||||
{
|
||||
GSVector4i r;
|
||||
|
||||
static const int ar[][2] = {{0, 0}, {4, 3}, {16, 9}};
|
||||
|
||||
if(preset > 0 && preset < countof(ar))
|
||||
if(preset > 0 && preset < countof(s_ar))
|
||||
{
|
||||
r = fit(ar[preset][0], ar[preset][1]);
|
||||
r = fit(s_ar[preset][0], s_ar[preset][1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -26,8 +26,14 @@
|
|||
#include "GSUtil.h"
|
||||
#include "GSState.h"
|
||||
|
||||
static const float s_fmin = -FLT_MAX;
|
||||
static const float s_fmax = FLT_MAX;
|
||||
|
||||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||
: m_state(state)
|
||||
, m_map_sw("VertexTraceSW", NULL)
|
||||
, m_map_hw9("VertexTraceHW9", NULL)
|
||||
, m_map_hw11("VertexTraceHW11", NULL)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -120,8 +126,8 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc
|
|||
|
||||
using namespace Xbyak;
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
|
||||
: CodeGenerator(maxsize, code)
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -158,16 +164,13 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
|
|||
|
||||
//
|
||||
|
||||
static const float fmin = -FLT_MAX;
|
||||
static const float fmax = FLT_MAX;
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&fmax]);
|
||||
vbroadcastss(xmm5, ptr[&fmin]);
|
||||
vbroadcastss(xmm4, ptr[&s_fmax]);
|
||||
vbroadcastss(xmm5, ptr[&s_fmin]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
|
@ -282,8 +285,8 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
|
|||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&fmax]);
|
||||
movss(xmm5, ptr[&fmin]);
|
||||
movss(xmm4, ptr[&s_fmax]);
|
||||
movss(xmm5, ptr[&s_fmin]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
@ -400,8 +403,8 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize)
|
|||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
|
||||
: CodeGenerator(maxsize, code)
|
||||
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -440,16 +443,13 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
|
|||
|
||||
//
|
||||
|
||||
static const float fmin = -FLT_MAX;
|
||||
static const float fmax = FLT_MAX;
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&fmax]);
|
||||
vbroadcastss(xmm5, ptr[&fmin]);
|
||||
vbroadcastss(xmm4, ptr[&s_fmax]);
|
||||
vbroadcastss(xmm5, ptr[&s_fmin]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
|
@ -593,8 +593,8 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
|
|||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&fmax]);
|
||||
movss(xmm5, ptr[&fmin]);
|
||||
movss(xmm4, ptr[&s_fmax]);
|
||||
movss(xmm5, ptr[&s_fmin]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
@ -741,8 +741,8 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize)
|
|||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
|
||||
: CodeGenerator(maxsize, code)
|
||||
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
#if _M_AMD64
|
||||
#error TODO
|
||||
|
@ -779,16 +779,13 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
|
|||
|
||||
//
|
||||
|
||||
static const float fmin = -FLT_MAX;
|
||||
static const float fmax = FLT_MAX;
|
||||
|
||||
if(m_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&fmax]);
|
||||
vbroadcastss(xmm5, ptr[&fmin]);
|
||||
vbroadcastss(xmm4, ptr[&s_fmax]);
|
||||
vbroadcastss(xmm5, ptr[&s_fmin]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
|
@ -931,8 +928,8 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize)
|
|||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&fmax]);
|
||||
movss(xmm5, ptr[&fmin]);
|
||||
movss(xmm4, ptr[&s_fmax]);
|
||||
movss(xmm5, ptr[&s_fmin]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
|
|
@ -26,8 +26,6 @@
|
|||
#include "GSVertexSW.h"
|
||||
#include "GSVertexHW.h"
|
||||
#include "GSFunctionMap.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
class GSState;
|
||||
|
||||
|
@ -38,54 +36,27 @@ __aligned32 class GSVertexTrace
|
|||
|
||||
typedef void (*VertexTracePtr)(const void* v, int count, Vertex& min, Vertex& max);
|
||||
|
||||
class CGSW : public Xbyak::CodeGenerator
|
||||
{
|
||||
Xbyak::util::Cpu m_cpu;
|
||||
|
||||
public:
|
||||
CGSW(uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr>
|
||||
class CGSW : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {}
|
||||
CGSW* Create(uint32 key, void* code, size_t maxsize) {return new CGSW(key, code, maxsize);}
|
||||
CGSW(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
class CGHW9 : public Xbyak::CodeGenerator
|
||||
{
|
||||
Xbyak::util::Cpu m_cpu;
|
||||
|
||||
public:
|
||||
CGHW9(uint32 key, void* ptr, size_t maxsize);
|
||||
};
|
||||
|
||||
class GSVertexTraceMapHW9 : public GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr>
|
||||
class CGHW9 : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {}
|
||||
CGHW9* Create(uint32 key, void* code, size_t maxsize) {return new CGHW9(key, code, maxsize);}
|
||||
CGHW9(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
class CGHW11 : public Xbyak::CodeGenerator
|
||||
{
|
||||
Xbyak::util::Cpu m_cpu;
|
||||
|
||||
public:
|
||||
CGHW11(uint32 key, void* ptr, size_t maxsize);
|
||||
};
|
||||
|
||||
class GSVertexTraceMapHW11 : public GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr>
|
||||
class CGHW11 : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
GSVertexTraceMapHW11() : GSCodeGeneratorFunctionMap("VertexTraceHW11") {}
|
||||
CGHW11* Create(uint32 key, void* code, size_t maxsize) {return new CGHW11(key, code, maxsize);}
|
||||
CGHW11(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSVertexTraceMapSW m_map_sw;
|
||||
GSVertexTraceMapHW9 m_map_hw9;
|
||||
GSVertexTraceMapHW11 m_map_hw11;
|
||||
GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr> m_map_sw;
|
||||
GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr> m_map_hw9;
|
||||
GSCodeGeneratorFunctionMap<CGHW11, uint32, VertexTracePtr> m_map_hw11;
|
||||
|
||||
uint32 Hash(GS_PRIM_CLASS primclass);
|
||||
|
||||
|
|
|
@ -720,7 +720,9 @@
|
|||
<ClCompile Include="GSDeviceNull.cpp" />
|
||||
<ClCompile Include="GSDialog.cpp" />
|
||||
<ClCompile Include="GSDirtyRect.cpp" />
|
||||
<ClCompile Include="GSDrawScanline.cpp" />
|
||||
<ClCompile Include="GSDrawScanline.cpp">
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSDrawScanlineCodeGenerator.cpp" />
|
||||
<ClCompile Include="GSDump.cpp" />
|
||||
<ClCompile Include="GSdx.cpp">
|
||||
|
|
|
@ -1079,6 +1079,8 @@ public:
|
|||
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
|
||||
const AddressFrame ptr, byte, word, dword, qword;
|
||||
const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
|
||||
const Xmm* xmTbl[16];
|
||||
const Ymm* ymTbl[16];
|
||||
#ifdef XBYAK64
|
||||
const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
|
||||
const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
|
||||
|
@ -1444,18 +1446,6 @@ public:
|
|||
const Operand& cvtReg(const Operand& op, bool cvt, Operand::Kind kind) const
|
||||
{
|
||||
if (!cvt) return op;
|
||||
static const Xmm* xmTbl[] = {
|
||||
&xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7,
|
||||
#ifdef XBYAK64
|
||||
&xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15
|
||||
#endif
|
||||
};
|
||||
static const Ymm* ymTbl[] = {
|
||||
&ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7,
|
||||
#ifdef XBYAK64
|
||||
&ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15
|
||||
#endif
|
||||
};
|
||||
return (kind == Operand::XMM) ? *xmTbl[op.getIdx()] : *ymTbl[op.getIdx()];
|
||||
}
|
||||
// support (x, x/m, imm), (y, y/m, imm)
|
||||
|
@ -1490,6 +1480,17 @@ public:
|
|||
, rip()
|
||||
#endif
|
||||
{
|
||||
xmTbl[0] = &xm0; xmTbl[1] = &xm1; xmTbl[2] = &xm2; xmTbl[3] = &xm3;
|
||||
xmTbl[4] = &xm4; xmTbl[5] = &xm5; xmTbl[6] = &xm6; xmTbl[7] = &xm7;
|
||||
ymTbl[0] = &ym0; ymTbl[1] = &ym1; ymTbl[2] = &ym2; ymTbl[3] = &ym3;
|
||||
ymTbl[4] = &ym4; ymTbl[5] = &ym5; ymTbl[6] = &ym6; ymTbl[7] = &ym7;
|
||||
#ifdef XBYAK64
|
||||
xmTbl[8] = &xm8; xmTbl[9] = &xm9; xmTbl[10] = &xm10; xmTbl[11] = &xm11;
|
||||
xmTbl[12] = &xm12; xmTbl[13] = &xm13; xmTbl[14] = &xm14; xmTbl[15] = &xm15;
|
||||
ymTbl[8] = &ym8; ymTbl[9] = &ym9; ymTbl[10] = &ym10; ymTbl[11] = &ym11;
|
||||
ymTbl[12] = &ym12; ymTbl[13] = &ym13; ymTbl[14] = &ym14; ymTbl[15] = &ym15;
|
||||
#endif
|
||||
|
||||
label_.set(this);
|
||||
}
|
||||
bool hasUndefinedLabel() const { return label_.hasUndefinedLabel(); }
|
||||
|
|
|
@ -1008,11 +1008,11 @@ void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()
|
|||
void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); }
|
||||
void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); }
|
||||
void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); }
|
||||
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
|
||||
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
|
||||
#ifdef XBYAK64
|
||||
void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); }
|
||||
void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); }
|
||||
void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); }
|
||||
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); }
|
||||
void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); }
|
||||
void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 1); db(imm); }
|
||||
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }
|
||||
|
|
Loading…
Reference in New Issue