diff --git a/plugins/GSdx/GPUDrawScanline.cpp b/plugins/GSdx/GPUDrawScanline.cpp index 8411768e3d..0c8b09944d 100644 --- a/plugins/GSdx/GPUDrawScanline.cpp +++ b/plugins/GSdx/GPUDrawScanline.cpp @@ -25,8 +25,8 @@ GPUDrawScanline::GPUDrawScanline(GPUState* state, int id) : m_state(state) , m_id(id) - , m_sp(m_env) - , m_ds(m_env) + , m_sp_map("GPUSetupPrim", &m_env) + , m_ds_map("GPUDrawScanline", &m_env) { } @@ -34,7 +34,7 @@ GPUDrawScanline::~GPUDrawScanline() { } -void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) +void GPUDrawScanline::BeginDraw(const GSRasterizerData* data) { GPUDrawingEnvironment& env = m_state->m_env; @@ -69,9 +69,11 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) // - f->ssl = m_ds[m_env.sel]; + m_ds = m_ds_map[m_env.sel]; - f->sr = NULL; // TODO + m_de = NULL; + + m_dr = NULL; // TODO // doesn't need all bits => less functions generated @@ -84,36 +86,10 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) sel.twin = m_env.sel.twin; sel.sprite = m_env.sel.sprite; - f->ssp = m_sp[sel]; + m_sp = m_sp_map[sel]; } void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats) { - m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame()); -} - -// - -GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap(GPUScanlineEnvironment& env) - : GSCodeGeneratorFunctionMap("GPUSetupPrim") - , m_env(env) -{ -} - -GPUSetupPrimCodeGenerator* GPUDrawScanline::GPUSetupPrimMap::Create(uint32 key, void* ptr, size_t maxsize) -{ - return new GPUSetupPrimCodeGenerator(m_env, ptr, maxsize); -} - -// - -GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap(GPUScanlineEnvironment& env) - : GSCodeGeneratorFunctionMap("GPUDrawScanline") - , m_env(env) -{ -} - -GPUDrawScanlineCodeGenerator* GPUDrawScanline::GPUDrawScanlineMap::Create(uint32 key, void* ptr, size_t maxsize) -{ - return new GPUDrawScanlineCodeGenerator(m_env, ptr, maxsize); + m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame()); } diff --git a/plugins/GSdx/GPUDrawScanline.h b/plugins/GSdx/GPUDrawScanline.h index 50bb80b438..1fb0292514 100644 --- a/plugins/GSdx/GPUDrawScanline.h +++ b/plugins/GSdx/GPUDrawScanline.h @@ -33,25 +33,8 @@ class GPUDrawScanline : public IDrawScanline // - class GPUSetupPrimMap : public GSCodeGeneratorFunctionMap - { - GPUScanlineEnvironment& m_env; - - public: - GPUSetupPrimMap(GPUScanlineEnvironment& env); - GPUSetupPrimCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize); - } m_sp; - - // - - class GPUDrawScanlineMap : public GSCodeGeneratorFunctionMap - { - GPUScanlineEnvironment& m_env; - - public: - GPUDrawScanlineMap(GPUScanlineEnvironment& env); - GPUDrawScanlineCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize); - } m_ds; + GSCodeGeneratorFunctionMap m_sp_map; + GSCodeGeneratorFunctionMap m_ds_map; protected: GPUState* m_state; @@ -63,7 +46,7 @@ public: // IDrawScanline - void BeginDraw(const GSRasterizerData* data, Functions* f); + void BeginDraw(const GSRasterizerData* data); void EndDraw(const GSRasterizerStats& stats); - void PrintStats() {m_ds.PrintStats();} + void PrintStats() {m_ds_map.PrintStats();} }; diff --git a/plugins/GSdx/GPUDrawScanlineCodeGenerator.cpp b/plugins/GSdx/GPUDrawScanlineCodeGenerator.cpp index 56164b42f9..f6cb728931 100644 --- a/plugins/GSdx/GPUDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/GPUDrawScanlineCodeGenerator.cpp @@ -24,9 +24,9 @@ #include "StdAfx.h" #include "GPUDrawScanlineCodeGenerator.h" -GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize) - : CodeGenerator(maxsize, ptr) - , m_env(env) +GPUDrawScanlineCodeGenerator::GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize) + : GSCodeGenerator(code, maxsize) + , m_env(*(GPUScanlineEnvironment*)param) { #if _M_AMD64 #error TODO diff --git a/plugins/GSdx/GPUDrawScanlineCodeGenerator.h b/plugins/GSdx/GPUDrawScanlineCodeGenerator.h index 423074b8bf..81761d5506 100644 --- a/plugins/GSdx/GPUDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/GPUDrawScanlineCodeGenerator.h @@ -22,20 +22,17 @@ #pragma once #include "GPUScanlineEnvironment.h" -#include "xbyak/xbyak.h" -#include "xbyak/xbyak_util.h" +#include "GSFunctionMap.h" using namespace Xbyak; -class GPUDrawScanlineCodeGenerator : public CodeGenerator +class GPUDrawScanlineCodeGenerator : public GSCodeGenerator { void operator = (const GPUDrawScanlineCodeGenerator&); static const GSVector4i m_test[8]; static const uint16 m_dither[4][16]; - util::Cpu m_cpu; - GPUScanlineEnvironment& m_env; void Generate(); @@ -58,5 +55,5 @@ class GPUDrawScanlineCodeGenerator : public CodeGenerator void blend(const Xmm& a, const Xmm& b, const Xmm& mask); public: - GPUDrawScanlineCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize); + GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); }; \ No newline at end of file diff --git a/plugins/GSdx/GPUSetupPrimCodeGenerator.cpp b/plugins/GSdx/GPUSetupPrimCodeGenerator.cpp index 6a90589302..b28bffe9d1 100644 --- a/plugins/GSdx/GPUSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/GPUSetupPrimCodeGenerator.cpp @@ -25,9 +25,11 @@ #include "GSVertexSW.h" #include "GPUSetupPrimCodeGenerator.h" -GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize) - : CodeGenerator(maxsize, ptr) - , m_env(env) +using namespace Xbyak; + +GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize) + : GSCodeGenerator(code, maxsize) + , m_env(*(GPUScanlineEnvironment*)param) { #if _M_AMD64 #error TODO diff --git a/plugins/GSdx/GPUSetupPrimCodeGenerator.h b/plugins/GSdx/GPUSetupPrimCodeGenerator.h index a45c82c388..e6e06452a6 100644 --- a/plugins/GSdx/GPUSetupPrimCodeGenerator.h +++ b/plugins/GSdx/GPUSetupPrimCodeGenerator.h @@ -22,23 +22,18 @@ #pragma once #include "GPUScanlineEnvironment.h" -#include "xbyak/xbyak.h" -#include "xbyak/xbyak_util.h" +#include "GSFunctionMap.h" -using namespace Xbyak; - -class GPUSetupPrimCodeGenerator : public CodeGenerator +class GPUSetupPrimCodeGenerator : public GSCodeGenerator { void operator = (const GPUSetupPrimCodeGenerator&); static const GSVector4 m_shift[3]; - util::Cpu m_cpu; - GPUScanlineEnvironment& m_env; void Generate(); public: - GPUSetupPrimCodeGenerator(GPUScanlineEnvironment& env, void* ptr, size_t maxsize); + GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); }; \ No newline at end of file diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index e44d0f0289..08926ed164 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -525,6 +525,73 @@ EXPORT_C GSsetFrameLimit(int limit) #ifdef _WINDOWS +#include +#include + +class Console +{ + HANDLE m_console; + string m_title; + +public: + Console::Console(LPCSTR title, bool open) + : m_console(NULL) + , m_title(title) + { + if(open) Open(); + } + + Console::~Console() + { + Close(); + } + + void Console::Open() + { + if(m_console == NULL) + { + CONSOLE_SCREEN_BUFFER_INFO csbiInfo; + + AllocConsole(); + + SetConsoleTitle(m_title.c_str()); + + m_console = GetStdHandle(STD_OUTPUT_HANDLE); + + COORD size; + + size.X = 100; + size.Y = 300; + + SetConsoleScreenBufferSize(m_console, size); + + GetConsoleScreenBufferInfo(m_console, &csbiInfo); + + SMALL_RECT rect; + + rect = csbiInfo.srWindow; + rect.Right = rect.Left + 99; + rect.Bottom = rect.Top + 64; + + SetConsoleWindowInfo(m_console, TRUE, &rect); + + *stdout = *_fdopen(_open_osfhandle((long)m_console, _O_TEXT), "w"); + + setvbuf(stdout, NULL, _IONBF, 0); + } + } + + void Console::Close() + { + if(m_console != NULL) + { + FreeConsole(); + + m_console = NULL; + } + } +}; + // lpszCmdLine: // First parameter is the renderer. // Second parameter is the gs file to load and run. @@ -547,6 +614,8 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) if(FILE* fp = fopen(lpszCmdLine, "rb")) { + Console console("GSdx", true); + GSinit(); uint8 regs[0x2000]; diff --git a/plugins/GSdx/GSBlock.h b/plugins/GSdx/GSBlock.h index b9f3bfa5dd..622fdd1894 100644 --- a/plugins/GSdx/GSBlock.h +++ b/plugins/GSdx/GSBlock.h @@ -1075,23 +1075,10 @@ public: GSVector4i::sw64(v0, v2, v1, v3); - #ifdef _M_AMD64 - ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, mask); ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, mask); ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend8(v2, mask); ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask); - - #else - - // here blend is faster than blend8 because vc8 has a little problem optimizing register usage for pblendvb (3rd op must be xmm0) - - ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend(v0, mask); - ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend(v1, mask); - ((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, mask); - ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, mask); - - #endif } } diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index acebf7fbd3..ffd1a2b344 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -26,8 +26,8 @@ GSDrawScanline::GSDrawScanline(GSState* state, int id) : m_state(state) , m_id(id) - , m_sp(m_env) - , m_ds(m_env) + , m_sp_map("GSSetupPrim", &m_env) + , m_ds_map("GSDrawScanline", &m_env) { memset(&m_env, 0, sizeof(m_env)); } @@ -36,7 +36,7 @@ GSDrawScanline::~GSDrawScanline() { } -void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) +void GSDrawScanline::BeginDraw(const GSRasterizerData* data) { GSDrawingEnvironment& env = m_state->m_env; GSDrawingContext* context = m_state->m_context; @@ -98,7 +98,6 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) { m_env.tex = p->tex; m_env.clut = p->clut; - // m_env.tw = p->tw; unsigned short tw = (unsigned short)(1 << context->TEX0.TW); unsigned short th = (unsigned short)(1 << context->TEX0.TH); @@ -163,7 +162,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) // - f->ssl = m_ds[m_sel]; + m_ds = m_ds_map[m_sel]; if(m_sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40)) { @@ -173,12 +172,20 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) sel.zwrite = 0; sel.edge = 1; - f->ssle = m_ds[sel]; + m_de = m_ds_map[sel]; + } + else + { + m_de = NULL; } if(m_sel.IsSolidRect()) { - f->sr = (DrawSolidRectPtr)&GSDrawScanline::DrawSolidRect; + m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect; + } + else + { + m_dr = NULL; } // doesn't need all bits => less functions generated @@ -197,15 +204,15 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) sel.zb = m_sel.zb; sel.zoverflow = m_sel.zoverflow; - f->ssp = m_sp[sel]; + m_sp = m_sp_map[sel]; } void GSDrawScanline::EndDraw(const GSRasterizerStats& stats) { - m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame()); + m_ds_map.UpdateStats(stats, m_state->m_perfmon.GetFrame()); } -void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) +void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) { ASSERT(r.y >= 0); ASSERT(r.w >= 0); @@ -224,22 +231,22 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) { if(m == 0) { - DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); + DrawRectT(m_env.zbr, m_env.zbc, r, z, m); } else { - DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); + DrawRectT(m_env.zbr, m_env.zbc, r, z, m); } } else { if(m == 0) { - DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); + DrawRectT(m_env.zbr, m_env.zbc, r, z, m); } else { - DrawSolidRectT(m_env.zbr, m_env.zbc, r, z, m); + DrawRectT(m_env.zbr, m_env.zbc, r, z, m); } } } @@ -259,11 +266,11 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) { if(m == 0) { - DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); + DrawRectT(m_env.fbr, m_env.fbc, r, c, m); } else { - DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); + DrawRectT(m_env.fbr, m_env.fbc, r, c, m); } } else @@ -272,18 +279,18 @@ void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) if(m == 0) { - DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); + DrawRectT(m_env.fbr, m_env.fbc, r, c, m); } else { - DrawSolidRectT(m_env.fbr, m_env.fbc, r, c, m); + DrawRectT(m_env.fbr, m_env.fbc, r, c, m); } } } } template -void GSDrawScanline::DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) +void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) { if(m == 0xffffffff) return; @@ -358,29 +365,3 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, } } } - -// - -GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap(GSScanlineEnvironment& env) - : GSCodeGeneratorFunctionMap("GSSetupPrim") - , m_env(env) -{ -} - -GSSetupPrimCodeGenerator* GSDrawScanline::GSSetupPrimMap::Create(uint64 key, void* ptr, size_t maxsize) -{ - return new GSSetupPrimCodeGenerator(m_env, key, ptr, maxsize); -} - -// - -GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap(GSScanlineEnvironment& env) - : GSCodeGeneratorFunctionMap("GSDrawScanline") - , m_env(env) -{ -} - -GSDrawScanlineCodeGenerator* GSDrawScanline::GSDrawScanlineMap::Create(uint64 key, void* ptr, size_t maxsize) -{ - return new GSDrawScanlineCodeGenerator(m_env, key, ptr, maxsize); -} diff --git a/plugins/GSdx/GSDrawScanline.h b/plugins/GSdx/GSDrawScanline.h index 27e6f4819c..accacb1451 100644 --- a/plugins/GSdx/GSDrawScanline.h +++ b/plugins/GSdx/GSDrawScanline.h @@ -32,34 +32,13 @@ class GSDrawScanline : public IDrawScanline GSScanlineEnvironment m_env; GSScanlineSelector m_sel; - // + GSCodeGeneratorFunctionMap m_sp_map; + GSCodeGeneratorFunctionMap m_ds_map; - class GSSetupPrimMap : public GSCodeGeneratorFunctionMap - { - GSScanlineEnvironment& m_env; - - public: - GSSetupPrimMap(GSScanlineEnvironment& env); - GSSetupPrimCodeGenerator* Create(uint64 key, void* ptr, size_t maxsize); - } m_sp; - - // - - class GSDrawScanlineMap : public GSCodeGeneratorFunctionMap - { - GSScanlineEnvironment& m_env; - - public: - GSDrawScanlineMap(GSScanlineEnvironment& env); - GSDrawScanlineCodeGenerator* Create(uint64 key, void* ptr, size_t maxsize); - } m_ds; - - // - - void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v); + void DrawRect(const GSVector4i& r, const GSVertexSW& v); template - void DrawSolidRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); + void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); template __forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); @@ -77,7 +56,7 @@ public: // IDrawScanline - void BeginDraw(const GSRasterizerData* data, Functions* f); + void BeginDraw(const GSRasterizerData* data); void EndDraw(const GSRasterizerStats& stats); - void PrintStats() {m_ds.PrintStats();} + void PrintStats() {m_ds_map.PrintStats();} }; diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp index a05ebc1f07..6711b78f55 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp @@ -25,9 +25,9 @@ #include "StdAfx.h" #include "GSDrawScanlineCodeGenerator.h" -GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize) - : CodeGenerator(maxsize, ptr) - , m_env(env) +GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) + : GSCodeGenerator(code, maxsize) + , m_env(*(GSScanlineEnvironment*)param) { #if _M_AMD64 #error TODO @@ -385,8 +385,7 @@ void GSDrawScanlineCodeGenerator::Init(int params) { if(m_sel.ltf) { - vmovdqa(xmm4, xmm3); - vpshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); + vpshuflw(xmm4, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(xmm4, 1); vmovdqa(ptr[&m_env.temp.vf], xmm4); @@ -1136,6 +1135,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() mov(eax, 0x8000); vmovd(xmm4, eax); vpshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); + vpsubd(xmm2, xmm4); vpsubd(xmm3, xmm4); } @@ -1199,18 +1199,16 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // GSVector4i y0 = uv0.uph16() << tw; vpxor(xmm0, xmm0); - //vmovd(xmm1, ptr[&m_env.tw]); vpunpcklwd(xmm4, xmm2, xmm0); vpunpckhwd(xmm2, xmm2, xmm0); - vpslld(xmm2, m_sel.tw + 3); // xmm1); + vpslld(xmm2, m_sel.tw + 3); // xmm0 = 0 - // xmm1 = free // tw // xmm2 = y0 // xmm3 = uv1 (ltf) // xmm4 = x0 - // xmm5, xmm6 = free + // xmm1, xmm5, xmm6 = free // xmm7 = used if(m_sel.ltf) @@ -1220,7 +1218,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpunpcklwd(xmm6, xmm3, xmm0); vpunpckhwd(xmm3, xmm3, xmm0); - vpslld(xmm3, m_sel.tw + 3); // xmm1); + vpslld(xmm3, m_sel.tw + 3); // xmm2 = y0 // xmm3 = y1 @@ -1392,6 +1390,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() mov(eax, 0x8000); movd(xmm4, eax); pshufd(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); + psubd(xmm2, xmm4); psubd(xmm3, xmm4); } @@ -1458,19 +1457,17 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // GSVector4i x0 = uv0.upl16(); pxor(xmm0, xmm0); - // movd(xmm1, ptr[&m_env.tw]); movdqa(xmm4, xmm2); punpckhwd(xmm2, xmm0); punpcklwd(xmm4, xmm0); - pslld(xmm2, m_sel.tw + 3); // xmm1); + pslld(xmm2, m_sel.tw + 3); // xmm0 = 0 - // xmm1 = free // tw // xmm2 = y0 // xmm3 = uv1 (ltf) // xmm4 = x0 - // xmm5, xmm6 = free + // xmm1, xmm5, xmm6 = free // xmm7 = used if(m_sel.ltf) @@ -1481,7 +1478,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() movdqa(xmm6, xmm3); punpckhwd(xmm3, xmm0); punpcklwd(xmm6, xmm0); - pslld(xmm3, m_sel.tw + 3); // xmm1); + pslld(xmm3, m_sel.tw + 3); // xmm2 = y0 // xmm3 = y1 @@ -3475,7 +3472,7 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr) { if(m_cpu.has(util::Cpu::tAVX)) { - movq(dst, qword[addr * 2 + (size_t)m_env.vm]); + vmovq(dst, qword[addr * 2 + (size_t)m_env.vm]); vmovhps(dst, qword[addr * 2 + (size_t)m_env.vm + 8 * 2]); } else @@ -3496,7 +3493,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, { test(mask, 0x0f); je("@f"); - movq(qword[addr * 2 + (size_t)m_env.vm], src); + vmovq(qword[addr * 2 + (size_t)m_env.vm], src); L("@@"); test(mask, 0xf0); @@ -3548,11 +3545,11 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, } } +static const int s_offsets[4] = {0, 2, 8, 10}; + void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm) { - static const int offsets[4] = {0, 2, 8, 10}; - - Address dst = ptr[addr * 2 + (size_t)m_env.vm + offsets[i] * 2]; + Address dst = ptr[addr * 2 + (size_t)m_env.vm + s_offsets[i] * 2]; if(m_cpu.has(util::Cpu::tAVX)) { diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.h b/plugins/GSdx/GSDrawScanlineCodeGenerator.h index 650171ef3f..e2d1389ecb 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.h @@ -22,19 +22,16 @@ #pragma once #include "GSScanlineEnvironment.h" -#include "xbyak/xbyak.h" -#include "xbyak/xbyak_util.h" +#include "GSFunctionMap.h" using namespace Xbyak; -class GSDrawScanlineCodeGenerator : public CodeGenerator +class GSDrawScanlineCodeGenerator : public GSCodeGenerator { void operator = (const GSDrawScanlineCodeGenerator&); static const GSVector4i m_test[8]; - util::Cpu m_cpu; - GSScanlineEnvironment& m_env; GSScanlineSelector m_sel; @@ -75,5 +72,5 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator void blend8r(const Xmm& b, const Xmm& a); public: - GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize); + GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); }; diff --git a/plugins/GSdx/GSFunctionMap.h b/plugins/GSdx/GSFunctionMap.h index 86f952f1ae..b12ea1cccd 100644 --- a/plugins/GSdx/GSFunctionMap.h +++ b/plugins/GSdx/GSFunctionMap.h @@ -23,6 +23,8 @@ #include "GS.h" #include "GSCodeBuffer.h" +#include "xbyak/xbyak.h" +#include "xbyak/xbyak_util.h" struct GSRasterizerStats { @@ -128,6 +130,8 @@ public: } } + printf("GS stats\n"); + for(hash_map::iterator i = m_map_active.begin(); i != m_map_active.end(); i++) { KEY key = i->first; @@ -139,7 +143,7 @@ public: int64 tpf = p->frames > 0 ? p->ticks / p->frames : 0; int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0; - printf("[%012I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n", + printf("[%016I64x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n", (uint64)key, m_map.find(key) == m_map.end() ? '*' : ' ', (float)(tpf * 10000 / 50000000) / 100, (float)(tpf * 10000 / ttpf) / 100, @@ -150,54 +154,58 @@ public: } }; +class GSCodeGenerator : public Xbyak::CodeGenerator +{ +protected: + Xbyak::util::Cpu m_cpu; + +public: + GSCodeGenerator(void* code, size_t maxsize) + : Xbyak::CodeGenerator(maxsize, code) + { + } +}; + #include "vtune/JITProfiling.h" template class GSCodeGeneratorFunctionMap : public GSFunctionMap { string m_name; - hash_map m_cgmap; + void* m_param; + hash_map m_cgmap; GSCodeBuffer m_cb; enum {MAX_SIZE = 4096}; -protected: - virtual CG* Create(KEY key, void* ptr, size_t maxsize = MAX_SIZE) = 0; - public: - GSCodeGeneratorFunctionMap(const char* name) + GSCodeGeneratorFunctionMap(const char* name, void* param) : m_name(name) + , m_param(param) { } - virtual ~GSCodeGeneratorFunctionMap() - { - for_each(m_cgmap.begin(), m_cgmap.end(), delete_second()); - } - VALUE GetDefaultFunction(KEY key) { - CG* cg = NULL; + VALUE ret = NULL; - hash_map::iterator i = m_cgmap.find(key); + hash_map::iterator i = m_cgmap.find(key); if(i != m_cgmap.end()) { - cg = i->second; + ret = i->second; } else { - void* ptr = m_cb.GetBuffer(MAX_SIZE); - - cg = Create(key, ptr, MAX_SIZE); - - ASSERT(cg); + CG* cg = new CG(m_param, key, m_cb.GetBuffer(MAX_SIZE), MAX_SIZE); ASSERT(cg->getSize() < MAX_SIZE); m_cb.ReleaseBuffer(cg->getSize()); - m_cgmap[key] = cg; + ret = (VALUE)cg->getCode(); + + m_cgmap[key] = ret; // vtune method registration @@ -216,8 +224,10 @@ public: iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml); } + + delete cg; } - return (VALUE)cg->getCode(); + return ret; } }; diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 611d7e565f..074637c360 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -28,6 +28,8 @@ // to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer, // but that's still worlds better than 2-6 spinning threads like before. +// NOTE: spinning: 100-500 ticks, waiting: 1000-5000 ticks + // #define UseSpinningFinish @@ -71,12 +73,7 @@ __forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const void GSRasterizer::Draw(const GSRasterizerData* data) { - m_dsf.ssl = NULL; - m_dsf.ssle = NULL; - m_dsf.ssp = NULL; - m_dsf.sr = NULL; - - m_ds->BeginDraw(data, &m_dsf); + m_ds->BeginDraw(data); const GSVector4i scissor = data->scissor; const GSVertexSW* vertices = data->vertices; @@ -131,9 +128,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor) { if(IsOneOfMyScanlines(p.y)) { - m_dsf.ssp(v, *v); + m_ds->SetupPrim(v, *v); - m_dsf.ssl(p.x + 1, p.x, p.y, *v); + m_ds->DrawScanline(p.x + 1, p.x, p.y, *v); m_stats.pixels++; } @@ -146,7 +143,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor) GSVector4 dp = dv.p.abs(); - if(m_dsf.ssle) + if(m_ds->IsEdge()) { int i = (dp < dp.yxwz()).mask() & 1; // |x| <= |y| @@ -156,7 +153,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor) dscan.t = GSVector4::zero(); dscan.c = GSVector4::zero(); - m_dsf.ssp(v, dscan); + m_ds->SetupPrim(v, dscan); DrawEdge(v[0], v[1], dv, scissor, i, 0); DrawEdge(v[0], v[1], dv, scissor, i, 1); @@ -190,7 +187,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW dscan = dv / dv.p.xxxx(); - m_dsf.ssp(v, dscan); + m_ds->SetupPrim(v, dscan); l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y @@ -250,9 +247,9 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc i = (aabb == bccb).mask() & 7; - if(m_dsf.ssle) + if(m_ds->IsEdge()) { - DrawTriangleEdge(v, scissor); + DrawEdge(v, scissor); } switch(i) @@ -273,7 +270,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices, const GSVector4i& sc } } -void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor) +void GSRasterizer::DrawEdge(const GSVertexSW* v, const GSVector4i& scissor) { GSVertexSW dv[3]; @@ -297,7 +294,7 @@ void GSRasterizer::DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& sciss dscan.t = GSVector4::zero(); dscan.c = GSVector4::zero(); - m_dsf.ssp(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small) + m_ds->SetupPrim(v, dscan); // TODO: don't call it twice (can't be sure about the second call if the triangle is too small) DrawEdge(v[0], v[1], dv[0], scissor, i & 1, j & 1); DrawEdge(v[0], v[2], dv[1], scissor, i & 2, j & 2); @@ -351,7 +348,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor) l += dl * dy; - m_dsf.ssp(v, dscan); + m_ds->SetupPrim(v, dscan); DrawTriangleSection(top, bottom, l, dl, dscan, fscissor); } @@ -403,7 +400,7 @@ void GSRasterizer::DrawTriangleBottom(GSVertexSW* v, const GSVector4i& scissor) l += dl * dy; - m_dsf.ssp(v, dscan); + m_ds->SetupPrim(v, dscan); DrawTriangleSection(top, bottom, l, dl, dscan, fscissor); } @@ -425,7 +422,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso GSVertexSW dscan = longest * longest.p.xxxx().rcp(); - m_dsf.ssp(v, dscan); + m_ds->SetupPrim(v, dscan); GSVector4 fscissor(scissor); @@ -511,7 +508,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx(); - m_dsf.ssl(right, left, top, scan); + m_ds->DrawScanline(right, left, top, scan); } } } @@ -552,7 +549,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW scan = l + dscan * (lrmax - l.p).xxxx(); - m_dsf.ssl(right, left, top, scan); + m_ds->DrawScanline(right, left, top, scan); } } } @@ -585,11 +582,11 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis GSVertexSW scan = v[0]; - if(m_dsf.sr) + if(m_ds->IsRect()) { if(m_id == 0) { - (m_ds->*m_dsf.sr)(r, scan); + m_ds->DrawRect(r, scan); m_stats.pixels += r.width() * r.height(); } @@ -615,13 +612,13 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis if(scan.p.y < (float)r.top) scan.t += dedge.t * ((float)r.top - scan.p.y); if(scan.p.x < (float)r.left) scan.t += dscan.t * ((float)r.left - scan.p.x); - m_dsf.ssp(v, dscan); + m_ds->SetupPrim(v, dscan); for(; r.top < r.bottom; r.top++, scan.t += dedge.t) { if(IsOneOfMyScanlines(r.top)) { - m_dsf.ssl(r.right, r.left, r.top, scan); + m_ds->DrawScanline(r.right, r.left, r.top, scan); m_stats.pixels += r.width(); } @@ -700,7 +697,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS edge.t.u32[3] = (0x10000 - xf) & 0xffff; - m_dsf.ssle(xi + 1, xi, top, edge); + m_ds->DrawEdge(xi + 1, xi, top, edge); edge.t.u32[3] = 0; } @@ -728,7 +725,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS edge.t.u32[3] = xf; - m_dsf.ssle(xi + 1, xi, top, edge); + m_ds->DrawEdge(xi + 1, xi, top, edge); edge.t.u32[3] = 0; } @@ -798,7 +795,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS edge.t.u32[3] = (0x10000 - yf) & 0xffff; - m_dsf.ssle(left + 1, left, yi, edge); + m_ds->DrawEdge(left + 1, left, yi, edge); edge.t.u32[3] = 0; } @@ -826,7 +823,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS edge.t.u32[3] = yf; - m_dsf.ssle(left + 1, left, yi, edge); + m_ds->DrawEdge(left + 1, left, yi, edge); edge.t.u32[3] = 0; } @@ -888,7 +885,7 @@ void GSRasterizerMT::ThreadProc() return; case WAIT_OBJECT_0 + 1: // draw - + __super::Draw(m_data); #ifdef UseSpinningFinish @@ -930,21 +927,8 @@ void GSRasterizerList::FreeRasterizers() m_ready.clear(); } -void GSRasterizerList::Draw(const GSRasterizerData* data) +void GSRasterizerList::Sync() { - m_stats.Reset(); - - int64 start = __rdtsc(); - - m_sync = m_syncstart; - - for(size_t i = 1; i < size(); i++) - { - (*this)[i]->Draw(data); - } - - (*this)[0]->Draw(data); - #ifdef UseSpinningFinish while(m_sync) _mm_pause(); @@ -955,7 +939,7 @@ void GSRasterizerList::Draw(const GSRasterizerData* data) #endif - m_stats.ticks = __rdtsc() - start; + m_stats.ticks = __rdtsc() - m_start; for(size_t i = 0; i < size(); i++) { @@ -968,6 +952,22 @@ void GSRasterizerList::Draw(const GSRasterizerData* data) } } +void GSRasterizerList::Draw(const GSRasterizerData* data) +{ + m_stats.Reset(); + + m_start = __rdtsc(); + + m_sync = m_syncstart; + + for(size_t i = 1; i < size(); i++) + { + (*this)[i]->Draw(data); + } + + (*this)[0]->Draw(data); +} + void GSRasterizerList::GetStats(GSRasterizerStats& stats) { stats = m_stats; diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index 2720722e32..c45b7892bf 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -36,6 +36,36 @@ public: int count; const void* param; }; + +class IDrawScanline : public GSAlignedClass<32> +{ +public: + typedef void (__fastcall *SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); + typedef void (__fastcall *DrawScanlinePtr)(int right, int left, int top, const GSVertexSW& scan); + typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit + +protected: + SetupPrimPtr m_sp; + DrawScanlinePtr m_ds; + DrawScanlinePtr m_de; + DrawRectPtr m_dr; + +public: + IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {} + virtual ~IDrawScanline() {} + + virtual void BeginDraw(const GSRasterizerData* data) = 0; + virtual void EndDraw(const GSRasterizerStats& stats) = 0; + virtual void PrintStats() = 0; + + __forceinline void SetupPrim(const GSVertexSW* v, const GSVertexSW& dscan) {m_sp(v, dscan);} + __forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);} + __forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);} + __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} + + __forceinline bool IsEdge() const {return m_de != NULL;} + __forceinline bool IsRect() const {return m_dr != NULL;} +}; class IRasterizer { @@ -47,33 +77,10 @@ public: virtual void PrintStats() = 0; }; -class IDrawScanline : public GSAlignedClass<32> -{ -public: - typedef void (__fastcall *DrawScanlineStaticPtr)(int right, int left, int top, const GSVertexSW& v); - typedef void (__fastcall *SetupPrimStaticPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan); - typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v); - - struct Functions - { - DrawScanlineStaticPtr ssl; - DrawScanlineStaticPtr ssle; - SetupPrimStaticPtr ssp; - DrawSolidRectPtr sr; // TODO - }; - - virtual ~IDrawScanline() {} - - virtual void BeginDraw(const GSRasterizerData* data, Functions* dsf) = 0; - virtual void EndDraw(const GSRasterizerStats& stats) = 0; - virtual void PrintStats() = 0; -}; - class GSRasterizer : public IRasterizer { protected: IDrawScanline* m_ds; - IDrawScanline::Functions m_dsf; int m_id; int m_threads; GSRasterizerStats m_stats; @@ -81,7 +88,7 @@ protected: void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor); void DrawLine(const GSVertexSW* v, const GSVector4i& scissor); void DrawTriangle(const GSVertexSW* v, const GSVector4i& scissor); - void DrawTriangleEdge(const GSVertexSW* v, const GSVector4i& scissor); + void DrawEdge(const GSVertexSW* v, const GSVector4i& scissor); void DrawSprite(const GSVertexSW* v, const GSVector4i& scissor); void DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor); @@ -133,6 +140,8 @@ protected: volatile long m_sync; long m_syncstart; GSRasterizerStats m_stats; + int64 m_start; + void FreeRasterizers(); public: @@ -145,10 +154,10 @@ public: threads = std::max(threads, 1); // TODO: min(threads, number of cpu cores) - push_back(new GSRasterizer(new DS(parent, 0), 0, threads)); - m_syncstart = 0; + push_back(new GSRasterizer(new DS(parent, 0), 0, threads)); + for(int i = 1; i < threads; i++) { HANDLE ready = CreateEvent(NULL, FALSE, FALSE, NULL); @@ -161,6 +170,8 @@ public: } } + void Sync(); + // IRasterizer void Draw(const GSRasterizerData* data); diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index ba76f26ea0..510c50e8a0 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -180,13 +180,6 @@ void GSRendererSW::Draw() m_rl.Draw(&data); - GSRasterizerStats stats; - - m_rl.GetStats(stats); - - m_perfmon.Put(GSPerfMon::Prim, stats.prims); - m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); - GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor); if(p.fm != 0xffffffff) @@ -199,6 +192,18 @@ void GSRendererSW::Draw() m_tc->InvalidateVideoMem(m_context->offset.zb, r); } + // By only syncing here we can do the two InvalidateVideoMem calls free if the other threads finish + // their drawings later than this one (they usually do because they start on an event). + + m_rl.Sync(); + + GSRasterizerStats stats; + + m_rl.GetStats(stats); + + m_perfmon.Put(GSPerfMon::Prim, stats.prims); + m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); + if(s_dump) { uint64 frame = m_perfmon.GetFrame(); @@ -373,7 +378,6 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) p.tex = t->m_buff; p.clut = m_mem.m_clut; - // p.tw = t->m_tw; p.sel.tw = t->m_tw - 3; } diff --git a/plugins/GSdx/GSScanlineEnvironment.h b/plugins/GSdx/GSScanlineEnvironment.h index 8fcbe664da..afeed0beec 100644 --- a/plugins/GSdx/GSScanlineEnvironment.h +++ b/plugins/GSdx/GSScanlineEnvironment.h @@ -108,7 +108,6 @@ __aligned32 struct GSScanlineParam void* vm; const void* tex; const uint32* clut; - //uint32 tw; GSOffset* fbo; GSOffset* zbo; @@ -122,7 +121,6 @@ __aligned32 struct GSScanlineEnvironment void* vm; const void* tex; const uint32* clut; - //uint32 tw; int* fbr; int* zbr; diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp index a89e6f8058..fc2aae2e92 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp @@ -24,9 +24,11 @@ #include "StdAfx.h" #include "GSSetupPrimCodeGenerator.h" -GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize) - : CodeGenerator(maxsize, ptr) - , m_env(env) +using namespace Xbyak; + +GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) + : GSCodeGenerator(code, maxsize) + , m_env(*(GSScanlineEnvironment*)param) { m_sel.key = key; @@ -79,7 +81,7 @@ void GSSetupPrimCodeGenerator::Depth() { if(!m_sel.sprite) { - // GSVector4 t = dscan.p; + // GSVector4 p = dscan.p; vmovaps(xmm0, ptr[edx + 16]); @@ -184,7 +186,7 @@ void GSSetupPrimCodeGenerator::Depth() { if(!m_sel.sprite) { - // GSVector4 t = dscan.p; + // GSVector4 p = dscan.p; movaps(xmm0, ptr[edx + 16]); diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.h b/plugins/GSdx/GSSetupPrimCodeGenerator.h index 0639dfd797..a53b7a71f8 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.h +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.h @@ -22,19 +22,14 @@ #pragma once #include "GSScanlineEnvironment.h" -#include "xbyak/xbyak.h" -#include "xbyak/xbyak_util.h" +#include "GSFunctionMap.h" -using namespace Xbyak; - -class GSSetupPrimCodeGenerator : public CodeGenerator +class GSSetupPrimCodeGenerator : public GSCodeGenerator { void operator = (const GSSetupPrimCodeGenerator&); static const GSVector4 m_shift[5]; - util::Cpu m_cpu; - GSScanlineEnvironment& m_env; GSScanlineSelector m_sel; @@ -47,5 +42,5 @@ class GSSetupPrimCodeGenerator : public CodeGenerator void Color(); public: - GSSetupPrimCodeGenerator(GSScanlineEnvironment& env, uint64 key, void* ptr, size_t maxsize); + GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); }; diff --git a/plugins/GSdx/GSVector.cpp b/plugins/GSdx/GSVector.cpp index 8b76e96688..2db5add0ec 100644 --- a/plugins/GSdx/GSVector.cpp +++ b/plugins/GSdx/GSVector.cpp @@ -61,15 +61,15 @@ GSVector4i GSVector4i::fit(int arx, int ary) const return r; } +static const int s_ar[][2] = {{0, 0}, {4, 3}, {16, 9}}; + GSVector4i GSVector4i::fit(int preset) const { GSVector4i r; - static const int ar[][2] = {{0, 0}, {4, 3}, {16, 9}}; - - if(preset > 0 && preset < countof(ar)) + if(preset > 0 && preset < countof(s_ar)) { - r = fit(ar[preset][0], ar[preset][1]); + r = fit(s_ar[preset][0], s_ar[preset][1]); } else { diff --git a/plugins/GSdx/GSVertexTrace.cpp b/plugins/GSdx/GSVertexTrace.cpp index 2eef982949..7629ec5d75 100644 --- a/plugins/GSdx/GSVertexTrace.cpp +++ b/plugins/GSdx/GSVertexTrace.cpp @@ -26,8 +26,14 @@ #include "GSUtil.h" #include "GSState.h" +static const float s_fmin = -FLT_MAX; +static const float s_fmax = FLT_MAX; + GSVertexTrace::GSVertexTrace(const GSState* state) : m_state(state) + , m_map_sw("VertexTraceSW", NULL) + , m_map_hw9("VertexTraceHW9", NULL) + , m_map_hw11("VertexTraceHW11", NULL) { } @@ -120,8 +126,8 @@ void GSVertexTrace::Update(const GSVertexHW11* v, int count, GS_PRIM_CLASS primc using namespace Xbyak; -GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize) - : CodeGenerator(maxsize, code) +GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize) + : GSCodeGenerator(code, maxsize) { #if _M_AMD64 #error TODO @@ -158,16 +164,13 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize) // - static const float fmin = -FLT_MAX; - static const float fmax = FLT_MAX; - if(m_cpu.has(util::Cpu::tAVX)) { // min.p = FLT_MAX; // max.p = -FLT_MAX; - vbroadcastss(xmm4, ptr[&fmax]); - vbroadcastss(xmm5, ptr[&fmin]); + vbroadcastss(xmm4, ptr[&s_fmax]); + vbroadcastss(xmm5, ptr[&s_fmin]); if(color) { @@ -282,8 +285,8 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize) // min.p = FLT_MAX; // max.p = -FLT_MAX; - movss(xmm4, ptr[&fmax]); - movss(xmm5, ptr[&fmin]); + movss(xmm4, ptr[&s_fmax]); + movss(xmm5, ptr[&s_fmin]); shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); @@ -400,8 +403,8 @@ GSVertexTrace::CGSW::CGSW(uint32 key, void* code, size_t maxsize) ret(); } -GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize) - : CodeGenerator(maxsize, code) +GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize) + : GSCodeGenerator(code, maxsize) { #if _M_AMD64 #error TODO @@ -440,16 +443,13 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize) // - static const float fmin = -FLT_MAX; - static const float fmax = FLT_MAX; - if(m_cpu.has(util::Cpu::tAVX)) { // min.p = FLT_MAX; // max.p = -FLT_MAX; - vbroadcastss(xmm4, ptr[&fmax]); - vbroadcastss(xmm5, ptr[&fmin]); + vbroadcastss(xmm4, ptr[&s_fmax]); + vbroadcastss(xmm5, ptr[&s_fmin]); if(color) { @@ -593,8 +593,8 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize) // min.p = FLT_MAX; // max.p = -FLT_MAX; - movss(xmm4, ptr[&fmax]); - movss(xmm5, ptr[&fmin]); + movss(xmm4, ptr[&s_fmax]); + movss(xmm5, ptr[&s_fmin]); shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); @@ -741,8 +741,8 @@ GSVertexTrace::CGHW9::CGHW9(uint32 key, void* code, size_t maxsize) ret(); } -GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize) - : CodeGenerator(maxsize, code) +GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize) + : GSCodeGenerator(code, maxsize) { #if _M_AMD64 #error TODO @@ -779,16 +779,13 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize) // - static const float fmin = -FLT_MAX; - static const float fmax = FLT_MAX; - if(m_cpu.has(util::Cpu::tAVX)) { // min.p = FLT_MAX; // max.p = -FLT_MAX; - vbroadcastss(xmm4, ptr[&fmax]); - vbroadcastss(xmm5, ptr[&fmin]); + vbroadcastss(xmm4, ptr[&s_fmax]); + vbroadcastss(xmm5, ptr[&s_fmin]); if(color) { @@ -931,8 +928,8 @@ GSVertexTrace::CGHW11::CGHW11(uint32 key, void* code, size_t maxsize) // min.p = FLT_MAX; // max.p = -FLT_MAX; - movss(xmm4, ptr[&fmax]); - movss(xmm5, ptr[&fmin]); + movss(xmm4, ptr[&s_fmax]); + movss(xmm5, ptr[&s_fmin]); shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0)); shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0)); diff --git a/plugins/GSdx/GSVertexTrace.h b/plugins/GSdx/GSVertexTrace.h index d6a5860359..01c1def638 100644 --- a/plugins/GSdx/GSVertexTrace.h +++ b/plugins/GSdx/GSVertexTrace.h @@ -26,8 +26,6 @@ #include "GSVertexSW.h" #include "GSVertexHW.h" #include "GSFunctionMap.h" -#include "xbyak/xbyak.h" -#include "xbyak/xbyak_util.h" class GSState; @@ -38,54 +36,27 @@ __aligned32 class GSVertexTrace typedef void (*VertexTracePtr)(const void* v, int count, Vertex& min, Vertex& max); - class CGSW : public Xbyak::CodeGenerator - { - Xbyak::util::Cpu m_cpu; - - public: - CGSW(uint32 key, void* code, size_t maxsize); - }; - - class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap + class CGSW : public GSCodeGenerator { public: - GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {} - CGSW* Create(uint32 key, void* code, size_t maxsize) {return new CGSW(key, code, maxsize);} + CGSW(const void* param, uint32 key, void* code, size_t maxsize); }; - class CGHW9 : public Xbyak::CodeGenerator - { - Xbyak::util::Cpu m_cpu; - - public: - CGHW9(uint32 key, void* ptr, size_t maxsize); - }; - - class GSVertexTraceMapHW9 : public GSCodeGeneratorFunctionMap + class CGHW9 : public GSCodeGenerator { public: - GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {} - CGHW9* Create(uint32 key, void* code, size_t maxsize) {return new CGHW9(key, code, maxsize);} + CGHW9(const void* param, uint32 key, void* code, size_t maxsize); }; - class CGHW11 : public Xbyak::CodeGenerator - { - Xbyak::util::Cpu m_cpu; - - public: - CGHW11(uint32 key, void* ptr, size_t maxsize); - }; - - class GSVertexTraceMapHW11 : public GSCodeGeneratorFunctionMap + class CGHW11 : public GSCodeGenerator { public: - GSVertexTraceMapHW11() : GSCodeGeneratorFunctionMap("VertexTraceHW11") {} - CGHW11* Create(uint32 key, void* code, size_t maxsize) {return new CGHW11(key, code, maxsize);} + CGHW11(const void* param, uint32 key, void* code, size_t maxsize); }; - GSVertexTraceMapSW m_map_sw; - GSVertexTraceMapHW9 m_map_hw9; - GSVertexTraceMapHW11 m_map_hw11; + GSCodeGeneratorFunctionMap m_map_sw; + GSCodeGeneratorFunctionMap m_map_hw9; + GSCodeGeneratorFunctionMap m_map_hw11; uint32 Hash(GS_PRIM_CLASS primclass); diff --git a/plugins/GSdx/GSdx.vcxproj b/plugins/GSdx/GSdx.vcxproj index 2d0405258f..ab276ff176 100644 --- a/plugins/GSdx/GSdx.vcxproj +++ b/plugins/GSdx/GSdx.vcxproj @@ -720,7 +720,9 @@ - + + AssemblyAndSourceCode + diff --git a/plugins/GSdx/xbyak/xbyak.h b/plugins/GSdx/xbyak/xbyak.h index 8c8e35d760..dada1a88ed 100644 --- a/plugins/GSdx/xbyak/xbyak.h +++ b/plugins/GSdx/xbyak/xbyak.h @@ -1079,6 +1079,8 @@ public: const Reg8 al, cl, dl, bl, ah, ch, dh, bh; const AddressFrame ptr, byte, word, dword, qword; const Fpu st0, st1, st2, st3, st4, st5, st6, st7; + const Xmm* xmTbl[16]; + const Ymm* ymTbl[16]; #ifdef XBYAK64 const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; @@ -1444,18 +1446,6 @@ public: const Operand& cvtReg(const Operand& op, bool cvt, Operand::Kind kind) const { if (!cvt) return op; - static const Xmm* xmTbl[] = { - &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7, -#ifdef XBYAK64 - &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15 -#endif - }; - static const Ymm* ymTbl[] = { - &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7, -#ifdef XBYAK64 - &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15 -#endif - }; return (kind == Operand::XMM) ? *xmTbl[op.getIdx()] : *ymTbl[op.getIdx()]; } // support (x, x/m, imm), (y, y/m, imm) @@ -1490,6 +1480,17 @@ public: , rip() #endif { + xmTbl[0] = &xm0; xmTbl[1] = &xm1; xmTbl[2] = &xm2; xmTbl[3] = &xm3; + xmTbl[4] = &xm4; xmTbl[5] = &xm5; xmTbl[6] = &xm6; xmTbl[7] = &xm7; + ymTbl[0] = &ym0; ymTbl[1] = &ym1; ymTbl[2] = &ym2; ymTbl[3] = &ym3; + ymTbl[4] = &ym4; ymTbl[5] = &ym5; ymTbl[6] = &ym6; ymTbl[7] = &ym7; +#ifdef XBYAK64 + xmTbl[8] = &xm8; xmTbl[9] = &xm9; xmTbl[10] = &xm10; xmTbl[11] = &xm11; + xmTbl[12] = &xm12; xmTbl[13] = &xm13; xmTbl[14] = &xm14; xmTbl[15] = &xm15; + ymTbl[8] = &ym8; ymTbl[9] = &ym9; ymTbl[10] = &ym10; ymTbl[11] = &ym11; + ymTbl[12] = &ym12; ymTbl[13] = &ym13; ymTbl[14] = &ym14; ymTbl[15] = &ym15; +#endif + label_.set(this); } bool hasUndefinedLabel() const { return label_.hasUndefinedLabel(); } diff --git a/plugins/GSdx/xbyak/xbyak_mnemonic.h b/plugins/GSdx/xbyak/xbyak_mnemonic.h index 88aadd0592..a5b99ac303 100644 --- a/plugins/GSdx/xbyak/xbyak_mnemonic.h +++ b/plugins/GSdx/xbyak/xbyak_mnemonic.h @@ -1008,11 +1008,11 @@ void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM() void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); } void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); } void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); } +void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); } +void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); } #ifdef XBYAK64 void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); } -void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); } void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); } -void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); } void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); } void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 1); db(imm); } void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); }