From ae1bc651d64ab3f871e02942dc100e75d13a32cf Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 12 Apr 2021 04:31:30 -0500 Subject: [PATCH] GSdx: Format --- plugins/GSdx/GS.cpp | 451 ++--- plugins/GSdx/GS.h | 1591 +++++++++-------- plugins/GSdx/GSAlignedClass.h | 11 +- plugins/GSdx/GSBlock.h | 419 +++-- plugins/GSdx/GSCapture.cpp | 134 +- plugins/GSdx/GSCapture.h | 10 +- plugins/GSdx/GSClut.h | 15 +- plugins/GSdx/GSCodeBuffer.cpp | 4 +- plugins/GSdx/GSCrc.cpp | 53 +- plugins/GSdx/GSDrawingContext.cpp | 46 +- plugins/GSdx/GSDrawingContext.h | 235 +-- plugins/GSdx/GSDrawingEnvironment.h | 194 +- plugins/GSdx/GSDump.cpp | 23 +- plugins/GSdx/GSDump.h | 8 +- plugins/GSdx/GSLocalMemory.cpp | 1097 ++++++------ plugins/GSdx/GSLocalMemory.h | 52 +- plugins/GSdx/GSLzma.cpp | 78 +- plugins/GSdx/GSLzma.h | 45 +- plugins/GSdx/GSPerfMon.cpp | 16 +- plugins/GSdx/GSPerfMon.h | 37 +- plugins/GSdx/GSPng.cpp | 200 ++- plugins/GSdx/GSPng.h | 54 +- plugins/GSdx/GSState.cpp | 18 +- plugins/GSdx/GSState.h | 17 +- plugins/GSdx/GSThread.h | 8 +- plugins/GSdx/GSThread_CXX11.h | 26 +- plugins/GSdx/GSUtil.cpp | 26 +- plugins/GSdx/GSUtil.h | 2 +- plugins/GSdx/GSVector.cpp | 12 +- plugins/GSdx/GSVector.h | 13 +- plugins/GSdx/GSVector4.h | 319 ++-- plugins/GSdx/GSVector4i.h | 742 ++++---- plugins/GSdx/GSVector8.h | 340 ++-- plugins/GSdx/GSVector8i.h | 616 ++++--- plugins/GSdx/GSdx.cpp | 86 +- plugins/GSdx/GSdx.h | 32 +- plugins/GSdx/Renderers/Common/GSDevice.cpp | 58 +- plugins/GSdx/Renderers/Common/GSDevice.h | 50 +- plugins/GSdx/Renderers/Common/GSDirtyRect.cpp | 6 +- plugins/GSdx/Renderers/Common/GSFastList.h | 130 +- plugins/GSdx/Renderers/Common/GSFunctionMap.h | 32 +- .../GSdx/Renderers/Common/GSOsdManager.cpp | 275 +-- plugins/GSdx/Renderers/Common/GSOsdManager.h | 25 +- plugins/GSdx/Renderers/Common/GSRenderer.cpp | 192 +- plugins/GSdx/Renderers/Common/GSRenderer.h | 8 +- plugins/GSdx/Renderers/Common/GSTexture.h | 39 +- plugins/GSdx/Renderers/Common/GSVertex.h | 29 +- plugins/GSdx/Renderers/Common/GSVertexList.h | 9 +- .../GSdx/Renderers/Common/GSVertexTrace.cpp | 119 +- plugins/GSdx/Renderers/Common/GSVertexTrace.h | 24 +- plugins/GSdx/Renderers/DX11/GSDevice11.cpp | 293 +-- plugins/GSdx/Renderers/DX11/GSDevice11.h | 212 ++- .../GSdx/Renderers/DX11/GSRendererDX11.cpp | 96 +- plugins/GSdx/Renderers/DX11/GSRendererDX11.h | 3 +- plugins/GSdx/Renderers/DX11/GSTexture11.cpp | 56 +- .../GSdx/Renderers/DX11/GSTextureCache11.cpp | 6 +- .../GSdx/Renderers/DX11/GSTextureCache11.h | 2 +- plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp | 46 +- plugins/GSdx/Renderers/HW/GSRendererHW.cpp | 653 ++++--- plugins/GSdx/Renderers/HW/GSRendererHW.h | 32 +- plugins/GSdx/Renderers/HW/GSTextureCache.cpp | 704 +++++--- plugins/GSdx/Renderers/HW/GSTextureCache.h | 40 +- plugins/GSdx/Renderers/HW/GSVertexHW.h | 9 +- plugins/GSdx/Renderers/Null/GSDeviceNull.cpp | 5 +- plugins/GSdx/Renderers/Null/GSDeviceNull.h | 3 +- plugins/GSdx/Renderers/Null/GSRendererNull.h | 9 +- plugins/GSdx/Renderers/Null/GSTextureNull.h | 15 +- plugins/GSdx/Renderers/OpenGL/GLLoader.h | 20 +- plugins/GSdx/Renderers/OpenGL/GLState.cpp | 8 +- plugins/GSdx/Renderers/OpenGL/GLState.h | 5 +- plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp | 521 +++--- plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h | 295 +-- .../GSdx/Renderers/OpenGL/GSRendererOGL.cpp | 508 ++++-- plugins/GSdx/Renderers/OpenGL/GSRendererOGL.h | 66 +- plugins/GSdx/Renderers/OpenGL/GSShaderOGL.cpp | 106 +- plugins/GSdx/Renderers/OpenGL/GSShaderOGL.h | 5 +- .../Renderers/OpenGL/GSTextureCacheOGL.cpp | 12 +- .../GSdx/Renderers/OpenGL/GSTextureCacheOGL.h | 2 +- .../GSdx/Renderers/OpenGL/GSTextureOGL.cpp | 161 +- plugins/GSdx/Renderers/OpenGL/GSTextureOGL.h | 87 +- .../Renderers/OpenGL/GSUniformBufferOGL.h | 40 +- .../GSdx/Renderers/OpenGL/GSVertexArrayOGL.h | 129 +- plugins/GSdx/Renderers/OpenGL/glext_extra.h | 336 ++-- plugins/GSdx/Renderers/SW/GSDrawScanline.cpp | 1294 +++++++------- plugins/GSdx/Renderers/SW/GSDrawScanline.h | 25 +- .../SW/GSDrawScanlineCodeGenerator.cpp | 65 +- .../SW/GSDrawScanlineCodeGenerator.h | 12 +- .../GSDrawScanlineCodeGenerator.x64.avx.cpp | 774 ++++---- .../GSDrawScanlineCodeGenerator.x64.avx2.cpp | 889 ++++----- .../GSDrawScanlineCodeGenerator.x86.avx.cpp | 841 +++++---- .../GSDrawScanlineCodeGenerator.x86.avx2.cpp | 859 ++++----- .../SW/GSDrawScanlineCodeGenerator.x86.cpp | 861 ++++----- plugins/GSdx/Renderers/SW/GSRasterizer.cpp | 337 ++-- plugins/GSdx/Renderers/SW/GSRasterizer.h | 58 +- plugins/GSdx/Renderers/SW/GSRendererSW.cpp | 583 +++--- plugins/GSdx/Renderers/SW/GSRendererSW.h | 11 +- .../GSdx/Renderers/SW/GSScanlineEnvironment.h | 187 +- .../Renderers/SW/GSSetupPrimCodeGenerator.cpp | 9 +- .../Renderers/SW/GSSetupPrimCodeGenerator.h | 7 +- .../SW/GSSetupPrimCodeGenerator.x64.avx.cpp | 64 +- .../SW/GSSetupPrimCodeGenerator.x64.avx2.cpp | 70 +- .../SW/GSSetupPrimCodeGenerator.x64.cpp | 66 +- .../SW/GSSetupPrimCodeGenerator.x86.avx.cpp | 66 +- .../SW/GSSetupPrimCodeGenerator.x86.avx2.cpp | 114 +- .../SW/GSSetupPrimCodeGenerator.x86.cpp | 66 +- .../GSdx/Renderers/SW/GSTextureCacheSW.cpp | 75 +- plugins/GSdx/Renderers/SW/GSTextureCacheSW.h | 2 +- plugins/GSdx/Renderers/SW/GSTextureSW.cpp | 6 +- plugins/GSdx/Renderers/SW/GSVertexSW.h | 67 +- plugins/GSdx/Window/GSCaptureDlg.cpp | 194 +- plugins/GSdx/Window/GSDialog.cpp | 50 +- plugins/GSdx/Window/GSDialog.h | 4 +- plugins/GSdx/Window/GSSetting.cpp | 2 +- plugins/GSdx/Window/GSSetting.h | 13 +- plugins/GSdx/Window/GSSettingsDlg.cpp | 545 +++--- plugins/GSdx/Window/GSSettingsDlg.h | 11 +- plugins/GSdx/Window/GSWnd.cpp | 2 +- plugins/GSdx/Window/GSWnd.h | 27 +- plugins/GSdx/Window/GSWndDX.cpp | 56 +- plugins/GSdx/Window/GSWndDX.h | 4 +- plugins/GSdx/Window/GSWndEGL.cpp | 109 +- plugins/GSdx/Window/GSWndEGL.h | 52 +- plugins/GSdx/Window/GSWndWGL.cpp | 104 +- plugins/GSdx/Window/GSWndWGL.h | 14 +- plugins/GSdx/config.h | 2 +- plugins/GSdx/linux_replay.cpp | 46 +- plugins/GSdx/stdafx.cpp | 42 +- plugins/GSdx/stdafx.h | 113 +- 128 files changed, 11631 insertions(+), 9678 deletions(-) diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 17bca81d1d..49b656bc77 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -54,8 +54,8 @@ extern bool RunLinuxDialog(); #define PS2E_LT_GS 0x01 #define PS2E_GS_VERSION 0x0006 -#define PS2E_X86 0x01 // 32 bit -#define PS2E_X86_64 0x02 // 64 bit +#define PS2E_X86 0x01 // 32 bit +#define PS2E_X86_64 0x02 // 64 bit static GSRenderer* s_gs = NULL; static void (*s_irq)() = NULL; @@ -100,7 +100,7 @@ EXPORT_C GSsetBaseMem(uint8* mem) { s_basemem = mem; - if(s_gs) + if (s_gs) { s_gs->SetRegsMem(s_basemem); } @@ -113,7 +113,7 @@ EXPORT_C GSsetSettingsDir(const char* dir) EXPORT_C_(int) GSinit() { - if(!GSUtil::CheckSSE()) + if (!GSUtil::CheckSSE()) { return -1; } @@ -148,7 +148,7 @@ EXPORT_C GSshutdown() theApp.SetCurrentRendererType(GSRendererType::Undefined); #ifdef _WIN32 - if(SUCCEEDED(s_hr)) + if (SUCCEEDED(s_hr)) { ::CoUninitialize(); @@ -161,7 +161,8 @@ EXPORT_C GSclose() { gsopen_done = false; - if(s_gs == NULL) return; + if (s_gs == NULL) + return; s_gs->ResetDevice(); @@ -183,7 +184,7 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t bool old_api = *dsp == NULL; // Fresh start up or config file changed - if(renderer == GSRendererType::Undefined) + if (renderer == GSRendererType::Undefined) { renderer = static_cast(theApp.GetConfigI("Renderer")); #ifdef _WIN32 @@ -192,7 +193,7 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t #endif } - if(threads == -1) + if (threads == -1) { threads = theApp.GetConfigI("extrathreads"); } @@ -223,7 +224,8 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t #if defined(__unix__) // Note: EGL code use GLX otherwise maybe it could be also compatible with Windows // Yes OpenGL code isn't complicated enough ! - switch (GSWndEGL::SelectPlatform()) { + switch (GSWndEGL::SelectPlatform()) + { #if GS_EGL_X11 case EGL_PLATFORM_X11_KHR: wnds.push_back(std::make_shared()); @@ -257,12 +259,12 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t int w = theApp.GetConfigI("ModeWidth"); int h = theApp.GetConfigI("ModeHeight"); #if defined(__unix__) - void *win_handle = (void*)((uptr*)(dsp)+1); + void* win_handle = (void*)((uptr*)(dsp) + 1); #else - void *win_handle = *dsp; + void* win_handle = *dsp; #endif - for(auto& wnd : wnds) + for (auto& wnd : wnds) { try { @@ -290,7 +292,7 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t } } - if(!window) + if (!window) { GSclose(); @@ -302,29 +304,29 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t switch (renderer) { - default: + default: #ifdef _WIN32 - case GSRendererType::DX1011_HW: - dev = new GSDevice11(); - s_renderer_name = "D3D11"; - renderer_name = "Direct3D 11"; - break; + case GSRendererType::DX1011_HW: + dev = new GSDevice11(); + s_renderer_name = "D3D11"; + renderer_name = "Direct3D 11"; + break; #endif - case GSRendererType::OGL_HW: - dev = new GSDeviceOGL(); - s_renderer_name = "OGL"; - renderer_name = "OpenGL"; - break; - case GSRendererType::OGL_SW: - dev = new GSDeviceOGL(); - s_renderer_name = "SW"; - renderer_name = "Software"; - break; - case GSRendererType::Null: - dev = new GSDeviceNull(); - s_renderer_name = "NULL"; - renderer_name = "Null"; - break; + case GSRendererType::OGL_HW: + dev = new GSDeviceOGL(); + s_renderer_name = "OGL"; + renderer_name = "OpenGL"; + break; + case GSRendererType::OGL_SW: + dev = new GSDeviceOGL(); + s_renderer_name = "SW"; + renderer_name = "Software"; + break; + case GSRendererType::Null: + dev = new GSDeviceNull(); + s_renderer_name = "NULL"; + renderer_name = "Null"; + break; } printf("Current Renderer: %s\n", renderer_name.c_str()); @@ -338,21 +340,21 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t { switch (renderer) { - default: + default: #ifdef _WIN32 - case GSRendererType::DX1011_HW: - s_gs = (GSRenderer*)new GSRendererDX11(); - break; + case GSRendererType::DX1011_HW: + s_gs = (GSRenderer*)new GSRendererDX11(); + break; #endif - case GSRendererType::OGL_HW: - s_gs = (GSRenderer*)new GSRendererOGL(); - break; - case GSRendererType::OGL_SW: - s_gs = new GSRendererSW(threads); - break; - case GSRendererType::Null: - s_gs = new GSRendererNull(); - break; + case GSRendererType::OGL_HW: + s_gs = (GSRenderer*)new GSRendererOGL(); + break; + case GSRendererType::OGL_SW: + s_gs = new GSRendererSW(threads); + break; + case GSRendererType::Null: + s_gs = new GSRendererNull(); + break; } if (s_gs == NULL) return -1; @@ -375,10 +377,10 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t s_gs->SetIrqCallback(s_irq); s_gs->SetVSync(s_vsync); - if(!old_api) + if (!old_api) s_gs->SetMultithreaded(true); - if(!s_gs->CreateDevice(dev)) + if (!s_gs->CreateDevice(dev)) { // This probably means the user has DX11 configured with a video card that is only DX9 // compliant. Cound mean drivr issues of some sort also, but to be sure, that's the most @@ -389,24 +391,27 @@ static int _GSopen(void** dsp, const char* title, GSRendererType renderer, int t return -1; } - if (renderer == GSRendererType::OGL_HW && theApp.GetConfigI("debug_glsl_shader") == 2) { + if (renderer == GSRendererType::OGL_HW && theApp.GetConfigI("debug_glsl_shader") == 2) + { printf("GSdx: test OpenGL shader. Please wait...\n\n"); static_cast(s_gs->m_dev)->SelfShaderTest(); printf("\nGSdx: test OpenGL shader done. It will now exit\n"); return -1; } - + return 0; } -EXPORT_C_(void) GSosdLog(const char *utf8, uint32 color) +EXPORT_C_(void) GSosdLog(const char* utf8, uint32 color) { - if(s_gs && s_gs->m_dev) s_gs->m_dev->m_osd.Log(utf8); + if (s_gs && s_gs->m_dev) + s_gs->m_dev->m_osd.Log(utf8); } -EXPORT_C_(void) GSosdMonitor(const char *key, const char *value, uint32 color) +EXPORT_C_(void) GSosdMonitor(const char* key, const char* value, uint32 color) { - if(s_gs && s_gs->m_dev) s_gs->m_dev->m_osd.Monitor(key, value); + if (s_gs && s_gs->m_dev) + s_gs->m_dev->m_osd.Monitor(key, value); } EXPORT_C_(int) GSopen2(void** dsp, uint32 flags) @@ -422,27 +427,25 @@ EXPORT_C_(int) GSopen2(void** dsp, uint32 flags) // SW -> HW and HW -> SW (F9 Switch) switch (current_renderer) { - #ifdef _WIN32 +#ifdef _WIN32 case GSRendererType::DX1011_HW: current_renderer = GSRendererType::OGL_SW; break; - #endif +#endif case GSRendererType::OGL_SW: - #ifdef _WIN32 +#ifdef _WIN32 { - const auto config_renderer = static_cast( - theApp.GetConfigI("Renderer") - ); + const auto config_renderer = static_cast(theApp.GetConfigI("Renderer")); if (current_renderer == config_renderer) current_renderer = GSUtil::GetBestRenderer(); else current_renderer = config_renderer; } - #else +#else current_renderer = GSRendererType::OGL_HW; - #endif - break; +#endif + break; case GSRendererType::OGL_HW: current_renderer = GSRendererType::OGL_SW; break; @@ -456,7 +459,7 @@ EXPORT_C_(int) GSopen2(void** dsp, uint32 flags) int retval = _GSopen(dsp, "", current_renderer); if (s_gs != NULL) - s_gs->SetAspectRatio(0); // PCSX2 manages the aspect ratios + s_gs->SetAspectRatio(0); // PCSX2 manages the aspect ratios gsopen_done = true; @@ -471,7 +474,7 @@ EXPORT_C_(int) GSopen(void** dsp, const char* title, int mt) s_vsync = theApp.GetConfigI("vsync"); - if(mt == 2) + if (mt == 2) { // pcsx2 sent a switch renderer request mt = 1; @@ -487,7 +490,7 @@ EXPORT_C_(int) GSopen(void** dsp, const char* title, int mt) int retval = _GSopen(dsp, title, renderer); - if(retval == 0 && s_gs) + if (retval == 0 && s_gs) { s_gs->SetMultithreaded(!!mt); } @@ -642,13 +645,13 @@ EXPORT_C GSvsync(int field) { #ifdef _WIN32 - if(s_gs->m_wnd->IsManaged()) + if (s_gs->m_wnd->IsManaged()) { MSG msg; memset(&msg, 0, sizeof(msg)); - while(msg.message != WM_QUIT && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) + while (msg.message != WM_QUIT && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { TranslateMessage(&msg); DispatchMessage(&msg); @@ -701,7 +704,7 @@ EXPORT_C GSkeyEvent(GSKeyEventData* e) { try { - if(gsopen_done) + if (gsopen_done) { s_gs->KeyEvent(e); } @@ -715,15 +718,15 @@ EXPORT_C_(int) GSfreeze(int mode, GSFreezeData* data) { try { - if(mode == FREEZE_SAVE) + if (mode == FREEZE_SAVE) { return s_gs->Freeze(data, false); } - else if(mode == FREEZE_SIZE) + else if (mode == FREEZE_SIZE) { return s_gs->Freeze(data, true); } - else if(mode == FREEZE_LOAD) + else if (mode == FREEZE_LOAD) { return s_gs->Defrost(data); } @@ -739,13 +742,14 @@ EXPORT_C GSconfigure() { try { - if(!GSUtil::CheckSSE()) return; + if (!GSUtil::CheckSSE()) + return; theApp.Init(); #ifdef _WIN32 GSDialog::InitCommonControls(); - if(GSSettingsDlg().DoModal() == IDOK) + if (GSSettingsDlg().DoModal() == IDOK) { // Force a reload of the gs state theApp.SetCurrentRendererType(GSRendererType::Undefined); @@ -759,7 +763,8 @@ EXPORT_C GSconfigure() // We can convince it that touching that pool would be unsafe by running all GTK calls within a CFRunLoop // (Blocks submitted to the main queue by dispatch_async are run by its CFRunLoop) dispatch_async(dispatch_get_main_queue(), ^{ - if (RunLinuxDialog()) { + if (RunLinuxDialog()) + { theApp.ReloadConfig(); // Force a reload of the gs state theApp.SetCurrentRendererType(GSRendererType::Undefined); @@ -767,22 +772,23 @@ EXPORT_C GSconfigure() }); #else - if (RunLinuxDialog()) { + if (RunLinuxDialog()) + { theApp.ReloadConfig(); // Force a reload of the gs state theApp.SetCurrentRendererType(GSRendererType::Undefined); } #endif - - } catch (GSDXRecoverableError) + } + catch (GSDXRecoverableError) { } } EXPORT_C_(int) GStest() { - if(!GSUtil::CheckSSE()) + if (!GSUtil::CheckSSE()) return -1; return 0; @@ -796,16 +802,17 @@ EXPORT_C GSirqCallback(void (*irq)()) { s_irq = irq; - if(s_gs) + if (s_gs) { s_gs->SetIrqCallback(s_irq); } } -void pt(const char* str){ - struct tm *current; +void pt(const char* str) +{ + struct tm* current; time_t now; - + time(&now); current = localtime(&now); @@ -814,12 +821,14 @@ void pt(const char* str){ EXPORT_C_(bool) GSsetupRecording(std::string& filename) { - if (s_gs == NULL) { + if (s_gs == NULL) + { printf("GSdx: no s_gs for recording\n"); return false; } #if defined(__unix__) || defined(__APPLE__) - if (!theApp.GetConfigB("capture_enabled")) { + if (!theApp.GetConfigB("capture_enabled")) + { printf("GSdx: Recording is disabled\n"); return false; } @@ -865,7 +874,7 @@ EXPORT_C GSgetTitleInfo2(char* dest, size_t length) s.append(" | ").append(s_gs->m_GStitleInfoBuffer); - if(s.size() > length - 1) + if (s.size() > length - 1) { s = s.substr(0, length - 1); } @@ -883,7 +892,7 @@ EXPORT_C GSsetVsync(int vsync) { s_vsync = vsync; - if(s_gs) + if (s_gs) { s_gs->SetVSync(s_vsync); } @@ -893,7 +902,7 @@ EXPORT_C GSsetExclusive(int enabled) { s_exclusive = !!enabled; - if(s_gs) + if (s_gs) { s_gs->SetVSync(s_vsync); } @@ -914,7 +923,8 @@ public: : m_console(NULL) , m_title(title) { - if(open) Open(); + if (open) + Open(); } Console::~Console() @@ -924,7 +934,7 @@ public: void Console::Open() { - if(m_console == NULL) + if (m_console == NULL) { CONSOLE_SCREEN_BUFFER_INFO csbiInfo; @@ -962,7 +972,7 @@ public: void Console::Close() { - if(m_console != NULL) + if (m_console != NULL) { FreeConsole(); @@ -983,10 +993,15 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) char* start = lpszCmdLine; char* end = NULL; long n = strtol(lpszCmdLine, &end, 10); - if(end > start) {renderer = static_cast(n); lpszCmdLine = end;} + if (end > start) + { + renderer = static_cast(n); + lpszCmdLine = end; + } } - while(*lpszCmdLine == ' ') lpszCmdLine++; + while (*lpszCmdLine == ' ') + lpszCmdLine++; ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); @@ -1027,40 +1042,47 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) GSvsync(1); - struct Packet {uint8 type, param; uint32 size, addr; std::vector buff;}; + struct Packet + { + uint8 type, param; + uint32 size, addr; + std::vector buff; + }; auto read_packet = [&file](uint8 type) { Packet p; p.type = type; - switch(p.type) { - case 0: - file->Read(&p.param, 1); - file->Read(&p.size, 4); - switch(p.param) { + switch (p.type) + { case 0: - p.buff.resize(0x4000); - p.addr = 0x4000 - p.size; - file->Read(&p.buff[p.addr], p.size); + file->Read(&p.param, 1); + file->Read(&p.size, 4); + switch (p.param) + { + case 0: + p.buff.resize(0x4000); + p.addr = 0x4000 - p.size; + file->Read(&p.buff[p.addr], p.size); + break; + case 1: + case 2: + case 3: + p.buff.resize(p.size); + file->Read(p.buff.data(), p.size); + break; + } break; case 1: - case 2: - case 3: - p.buff.resize(p.size); - file->Read(p.buff.data(), p.size); + file->Read(&p.param, 1); + break; + case 2: + file->Read(&p.size, 4); + break; + case 3: + p.buff.resize(0x2000); + file->Read(p.buff.data(), 0x2000); break; - } - break; - case 1: - file->Read(&p.param, 1); - break; - case 2: - file->Read(&p.size, 4); - break; - case 3: - p.buff.resize(0x2000); - file->Read(p.buff.data(), 0x2000); - break; } return p; @@ -1068,37 +1090,37 @@ EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) std::list packets; uint8 type; - while(file->Read(&type, 1)) + while (file->Read(&type, 1)) packets.push_back(read_packet(type)); Sleep(100); std::vector buff; - while(IsWindowVisible(hWnd)) + while (IsWindowVisible(hWnd)) { - for(auto &p : packets) + for (auto& p : packets) { - switch(p.type) + switch (p.type) { - case 0: - switch(p.param) - { - case 0: GSgifTransfer1(p.buff.data(), p.addr); break; - case 1: GSgifTransfer2(p.buff.data(), p.size / 16); break; - case 2: GSgifTransfer3(p.buff.data(), p.size / 16); break; - case 3: GSgifTransfer(p.buff.data(), p.size / 16); break; - } - break; - case 1: - GSvsync(p.param); - break; - case 2: - if(buff.size() < p.size) buff.resize(p.size); - GSreadFIFO2(p.buff.data(), p.size / 16); - break; - case 3: - memcpy(regs.data(), p.buff.data(), 0x2000); - break; + case 0: + switch(p.param) + { + case 0: GSgifTransfer1(p.buff.data(), p.addr); break; + case 1: GSgifTransfer2(p.buff.data(), p.size / 16); break; + case 2: GSgifTransfer3(p.buff.data(), p.size / 16); break; + case 3: GSgifTransfer(p.buff.data(), p.size / 16); break; + } + break; + case 1: + GSvsync(p.param); + break; + case 2: + if(buff.size() < p.size) buff.resize(p.size); + GSreadFIFO2(p.buff.data(), p.size / 16); + break; + case 3: + memcpy(regs.data(), p.buff.data(), 0x2000); + break; } } } @@ -1115,9 +1137,9 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow Console console("GSdx", true); - if(1) + if (1) { - GSLocalMemory* mem = new GSLocalMemory(); + GSLocalMemory* mem = new GSLocalMemory(); static struct {int psm; const char* name;} s_format[] = { @@ -1138,11 +1160,12 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32); - for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i; + for (int i = 0; i < 1024 * 1024 * 4; i++) + ptr[i] = (uint8)i; // - for(int tbw = 5; tbw <= 10; tbw++) + for (int tbw = 5; tbw <= 10; tbw++) { int n = 256 << ((10 - tbw) * 2); @@ -1151,7 +1174,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow printf("%d x %d\n\n", w, h); - for(size_t i = 0; i < countof(s_format); i++) + for (size_t i = 0; i < countof(s_format); i++) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[s_format[i].psm]; @@ -1203,7 +1226,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow start = clock(); - for(int j = 0; j < n; j++) + for (int j = 0; j < n; j++) { int x = 0; int y = 0; @@ -1217,7 +1240,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow start = clock(); - for(int j = 0; j < n; j++) + for (int j = 0; j < n; j++) { int x = 0; int y = 0; @@ -1233,7 +1256,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow start = clock(); - for(int j = 0; j < n; j++) + for (int j = 0; j < n; j++) { (mem->*rtx)(off, r, ptr, w * 4, TEXA); } @@ -1242,11 +1265,11 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow printf("%6d %6d ", (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - if(psm.pal > 0) + if (psm.pal > 0) { start = clock(); - for(int j = 0; j < n; j++) + for (int j = 0; j < n; j++) { (mem->*rtxP)(off, r, ptr, w, TEXA); } @@ -1269,13 +1292,14 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow // - if(0) + if (0) { GSLocalMemory* mem = new GSLocalMemory(); uint8* ptr = (uint8*)_aligned_malloc(1024 * 1024 * 4, 32); - for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (uint8)i; + for (int i = 0; i < 1024 * 1024 * 4; i++) + ptr[i] = (uint8)i; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32]; @@ -1320,7 +1344,7 @@ inline unsigned long timeGetTime() { struct timespec t; clock_gettime(CLOCK_REALTIME, &t); - return (unsigned long)(t.tv_sec*1000 + t.tv_nsec/1000000); + return (unsigned long)(t.tv_sec * 1000 + t.tv_nsec / 1000000); } // Note @@ -1344,7 +1368,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) return; } - struct Packet {uint8 type, param; uint32 size, addr; std::vector buff;}; + struct Packet + { + uint8 type, param; + uint32 size, addr; + std::vector buff; + }; std::list packets; std::vector buff; @@ -1356,7 +1385,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) int finished = theApp.GetConfigI("linux_replay"); bool repack_dump = (finished < 0); - if (theApp.GetConfigI("dump")) { + if (theApp.GetConfigI("dump")) + { fprintf(stderr, "Dump is enabled. Replay will be disabled\n"); finished = 1; } @@ -1365,19 +1395,21 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) void* hWnd = NULL; int err = _GSopen((void**)&hWnd, "", m_renderer); - if (err != 0) { + if (err != 0) + { fprintf(stderr, "Error failed to GSopen\n"); return; } - if (s_gs->m_wnd == NULL) return; + if (s_gs->m_wnd == NULL) + return; { // Read .gs content std::string f(lpszCmdLine); - bool is_xz = (f.size() >= 4) && (f.compare(f.size()-3, 3, ".xz") == 0); + bool is_xz = (f.size() >= 4) && (f.compare(f.size() - 3, 3, ".xz") == 0); if (is_xz) - f.replace(f.end()-6, f.end(), "_repack.gs"); + f.replace(f.end() - 6, f.end(), "_repack.gs"); else - f.replace(f.end()-3, f.end(), "_repack.gs"); + f.replace(f.end() - 3, f.end(), "_repack.gs"); GSDumpFile* file = is_xz ? (GSDumpFile*) new GSDumpLzma(lpszCmdLine, repack_dump ? f.c_str() : nullptr) @@ -1393,57 +1425,57 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) file->Read(fd.data, fd.size); GSfreeze(FREEZE_LOAD, &fd); - delete [] fd.data; + delete[] fd.data; file->Read(regs, 0x2000); uint8 type; - while(file->Read(&type, 1)) + while (file->Read(&type, 1)) { Packet* p = new Packet(); p->type = type; - switch(type) + switch (type) { - case 0: - file->Read(&p->param, 1); - file->Read(&p->size, 4); - - switch(p->param) - { case 0: - p->buff.resize(0x4000); - p->addr = 0x4000 - p->size; - file->Read(&p->buff[p->addr], p->size); + file->Read(&p->param, 1); + file->Read(&p->size, 4); + + switch (p->param) + { + case 0: + p->buff.resize(0x4000); + p->addr = 0x4000 - p->size; + file->Read(&p->buff[p->addr], p->size); + break; + case 1: + case 2: + case 3: + p->buff.resize(p->size); + file->Read(&p->buff[0], p->size); + break; + } + break; + case 1: - case 2: - case 3: - p->buff.resize(p->size); - file->Read(&p->buff[0], p->size); + file->Read(&p->param, 1); + frame_number++; + break; - } - break; + case 2: + file->Read(&p->size, 4); - case 1: - file->Read(&p->param, 1); - frame_number++; + break; - break; + case 3: + p->buff.resize(0x2000); - case 2: - file->Read(&p->size, 4); + file->Read(&p->buff[0], 0x2000); - break; - - case 3: - p->buff.resize(0x2000); - - file->Read(&p->buff[0], 0x2000); - - break; + break; } packets.push_back(p); @@ -1463,17 +1495,17 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) // Init vsync stuff GSvsync(1); - while(finished > 0) + while (finished > 0) { - for(auto i = packets.begin(); i != packets.end(); i++) + for (auto i = packets.begin(); i != packets.end(); i++) { Packet* p = *i; - switch(p->type) + switch (p->type) { case 0: - switch(p->param) + switch (p->param) { case 0: GSgifTransfer1(&p->buff[0], p->addr); break; case 1: GSgifTransfer2(&p->buff[0], p->size / 16); break; @@ -1492,7 +1524,8 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) case 2: - if(buff.size() < p->size) buff.resize(p->size); + if (buff.size() < p->size) + buff.resize(p->size); GSreadFIFO2(&buff[0], p->size / 16); @@ -1506,11 +1539,16 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) } } - if (finished >= 200) { + if (finished >= 200) + { ; // Nop for Nvidia Profiler - } else if (finished > 90) { + } + else if (finished > 90) + { sleep(1); - } else { + } + else + { finished--; } } @@ -1520,13 +1558,12 @@ EXPORT_C GSReplay(char* lpszCmdLine, int renderer) #ifdef ENABLE_OGL_DEBUG_MEM_BW unsigned long total_frame_nb = std::max(1l, frame_number) << 10; fprintf(stderr, "memory bandwith. T: %f KB/f. V: %f KB/f. U: %f KB/f\n", - (float)g_real_texture_upload_byte/(float)total_frame_nb, - (float)g_vertex_upload_byte/(float)total_frame_nb, - (float)g_uniform_upload_byte/(float)total_frame_nb - ); + (float)g_real_texture_upload_byte / (float)total_frame_nb, + (float)g_vertex_upload_byte / (float)total_frame_nb, + (float)g_uniform_upload_byte / (float)total_frame_nb); #endif - for(auto i = packets.begin(); i != packets.end(); i++) + for (auto i = packets.begin(); i != packets.end(); i++) { delete *i; } diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index cd56bb044e..a080a3a72e 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -43,179 +43,179 @@ enum GS_PRIM { - GS_POINTLIST = 0, - GS_LINELIST = 1, - GS_LINESTRIP = 2, - GS_TRIANGLELIST = 3, - GS_TRIANGLESTRIP = 4, - GS_TRIANGLEFAN = 5, - GS_SPRITE = 6, - GS_INVALID = 7, + GS_POINTLIST = 0, + GS_LINELIST = 1, + GS_LINESTRIP = 2, + GS_TRIANGLELIST = 3, + GS_TRIANGLESTRIP = 4, + GS_TRIANGLEFAN = 5, + GS_SPRITE = 6, + GS_INVALID = 7, }; enum GS_PRIM_CLASS { - GS_POINT_CLASS = 0, - GS_LINE_CLASS = 1, - GS_TRIANGLE_CLASS = 2, - GS_SPRITE_CLASS = 3, - GS_INVALID_CLASS = 7, + GS_POINT_CLASS = 0, + GS_LINE_CLASS = 1, + GS_TRIANGLE_CLASS = 2, + GS_SPRITE_CLASS = 3, + GS_INVALID_CLASS = 7, }; enum GIF_REG { - GIF_REG_PRIM = 0x00, - GIF_REG_RGBA = 0x01, - GIF_REG_STQ = 0x02, - GIF_REG_UV = 0x03, - GIF_REG_XYZF2 = 0x04, - GIF_REG_XYZ2 = 0x05, - GIF_REG_TEX0_1 = 0x06, - GIF_REG_TEX0_2 = 0x07, - GIF_REG_CLAMP_1 = 0x08, - GIF_REG_CLAMP_2 = 0x09, - GIF_REG_FOG = 0x0a, - GIF_REG_INVALID = 0x0b, - GIF_REG_XYZF3 = 0x0c, - GIF_REG_XYZ3 = 0x0d, - GIF_REG_A_D = 0x0e, - GIF_REG_NOP = 0x0f, + GIF_REG_PRIM = 0x00, + GIF_REG_RGBA = 0x01, + GIF_REG_STQ = 0x02, + GIF_REG_UV = 0x03, + GIF_REG_XYZF2 = 0x04, + GIF_REG_XYZ2 = 0x05, + GIF_REG_TEX0_1 = 0x06, + GIF_REG_TEX0_2 = 0x07, + GIF_REG_CLAMP_1 = 0x08, + GIF_REG_CLAMP_2 = 0x09, + GIF_REG_FOG = 0x0a, + GIF_REG_INVALID = 0x0b, + GIF_REG_XYZF3 = 0x0c, + GIF_REG_XYZ3 = 0x0d, + GIF_REG_A_D = 0x0e, + GIF_REG_NOP = 0x0f, }; enum GIF_REG_COMPLEX { - GIF_REG_STQRGBAXYZF2 = 0x00, - GIF_REG_STQRGBAXYZ2 = 0x01, + GIF_REG_STQRGBAXYZF2 = 0x00, + GIF_REG_STQRGBAXYZ2 = 0x01, }; enum GIF_A_D_REG { - GIF_A_D_REG_PRIM = 0x00, - GIF_A_D_REG_RGBAQ = 0x01, - GIF_A_D_REG_ST = 0x02, - GIF_A_D_REG_UV = 0x03, - GIF_A_D_REG_XYZF2 = 0x04, - GIF_A_D_REG_XYZ2 = 0x05, - GIF_A_D_REG_TEX0_1 = 0x06, - GIF_A_D_REG_TEX0_2 = 0x07, - GIF_A_D_REG_CLAMP_1 = 0x08, - GIF_A_D_REG_CLAMP_2 = 0x09, - GIF_A_D_REG_FOG = 0x0a, - GIF_A_D_REG_XYZF3 = 0x0c, - GIF_A_D_REG_XYZ3 = 0x0d, - GIF_A_D_REG_NOP = 0x0f, - GIF_A_D_REG_TEX1_1 = 0x14, - GIF_A_D_REG_TEX1_2 = 0x15, - GIF_A_D_REG_TEX2_1 = 0x16, - GIF_A_D_REG_TEX2_2 = 0x17, - GIF_A_D_REG_XYOFFSET_1 = 0x18, - GIF_A_D_REG_XYOFFSET_2 = 0x19, - GIF_A_D_REG_PRMODECONT = 0x1a, - GIF_A_D_REG_PRMODE = 0x1b, - GIF_A_D_REG_TEXCLUT = 0x1c, - GIF_A_D_REG_SCANMSK = 0x22, - GIF_A_D_REG_MIPTBP1_1 = 0x34, - GIF_A_D_REG_MIPTBP1_2 = 0x35, - GIF_A_D_REG_MIPTBP2_1 = 0x36, - GIF_A_D_REG_MIPTBP2_2 = 0x37, - GIF_A_D_REG_TEXA = 0x3b, - GIF_A_D_REG_FOGCOL = 0x3d, - GIF_A_D_REG_TEXFLUSH = 0x3f, - GIF_A_D_REG_SCISSOR_1 = 0x40, - GIF_A_D_REG_SCISSOR_2 = 0x41, - GIF_A_D_REG_ALPHA_1 = 0x42, - GIF_A_D_REG_ALPHA_2 = 0x43, - GIF_A_D_REG_DIMX = 0x44, - GIF_A_D_REG_DTHE = 0x45, - GIF_A_D_REG_COLCLAMP = 0x46, - GIF_A_D_REG_TEST_1 = 0x47, - GIF_A_D_REG_TEST_2 = 0x48, - GIF_A_D_REG_PABE = 0x49, - GIF_A_D_REG_FBA_1 = 0x4a, - GIF_A_D_REG_FBA_2 = 0x4b, - GIF_A_D_REG_FRAME_1 = 0x4c, - GIF_A_D_REG_FRAME_2 = 0x4d, - GIF_A_D_REG_ZBUF_1 = 0x4e, - GIF_A_D_REG_ZBUF_2 = 0x4f, - GIF_A_D_REG_BITBLTBUF = 0x50, - GIF_A_D_REG_TRXPOS = 0x51, - GIF_A_D_REG_TRXREG = 0x52, - GIF_A_D_REG_TRXDIR = 0x53, - GIF_A_D_REG_HWREG = 0x54, - GIF_A_D_REG_SIGNAL = 0x60, - GIF_A_D_REG_FINISH = 0x61, - GIF_A_D_REG_LABEL = 0x62, + GIF_A_D_REG_PRIM = 0x00, + GIF_A_D_REG_RGBAQ = 0x01, + GIF_A_D_REG_ST = 0x02, + GIF_A_D_REG_UV = 0x03, + GIF_A_D_REG_XYZF2 = 0x04, + GIF_A_D_REG_XYZ2 = 0x05, + GIF_A_D_REG_TEX0_1 = 0x06, + GIF_A_D_REG_TEX0_2 = 0x07, + GIF_A_D_REG_CLAMP_1 = 0x08, + GIF_A_D_REG_CLAMP_2 = 0x09, + GIF_A_D_REG_FOG = 0x0a, + GIF_A_D_REG_XYZF3 = 0x0c, + GIF_A_D_REG_XYZ3 = 0x0d, + GIF_A_D_REG_NOP = 0x0f, + GIF_A_D_REG_TEX1_1 = 0x14, + GIF_A_D_REG_TEX1_2 = 0x15, + GIF_A_D_REG_TEX2_1 = 0x16, + GIF_A_D_REG_TEX2_2 = 0x17, + GIF_A_D_REG_XYOFFSET_1 = 0x18, + GIF_A_D_REG_XYOFFSET_2 = 0x19, + GIF_A_D_REG_PRMODECONT = 0x1a, + GIF_A_D_REG_PRMODE = 0x1b, + GIF_A_D_REG_TEXCLUT = 0x1c, + GIF_A_D_REG_SCANMSK = 0x22, + GIF_A_D_REG_MIPTBP1_1 = 0x34, + GIF_A_D_REG_MIPTBP1_2 = 0x35, + GIF_A_D_REG_MIPTBP2_1 = 0x36, + GIF_A_D_REG_MIPTBP2_2 = 0x37, + GIF_A_D_REG_TEXA = 0x3b, + GIF_A_D_REG_FOGCOL = 0x3d, + GIF_A_D_REG_TEXFLUSH = 0x3f, + GIF_A_D_REG_SCISSOR_1 = 0x40, + GIF_A_D_REG_SCISSOR_2 = 0x41, + GIF_A_D_REG_ALPHA_1 = 0x42, + GIF_A_D_REG_ALPHA_2 = 0x43, + GIF_A_D_REG_DIMX = 0x44, + GIF_A_D_REG_DTHE = 0x45, + GIF_A_D_REG_COLCLAMP = 0x46, + GIF_A_D_REG_TEST_1 = 0x47, + GIF_A_D_REG_TEST_2 = 0x48, + GIF_A_D_REG_PABE = 0x49, + GIF_A_D_REG_FBA_1 = 0x4a, + GIF_A_D_REG_FBA_2 = 0x4b, + GIF_A_D_REG_FRAME_1 = 0x4c, + GIF_A_D_REG_FRAME_2 = 0x4d, + GIF_A_D_REG_ZBUF_1 = 0x4e, + GIF_A_D_REG_ZBUF_2 = 0x4f, + GIF_A_D_REG_BITBLTBUF = 0x50, + GIF_A_D_REG_TRXPOS = 0x51, + GIF_A_D_REG_TRXREG = 0x52, + GIF_A_D_REG_TRXDIR = 0x53, + GIF_A_D_REG_HWREG = 0x54, + GIF_A_D_REG_SIGNAL = 0x60, + GIF_A_D_REG_FINISH = 0x61, + GIF_A_D_REG_LABEL = 0x62, }; enum GIF_FLG { - GIF_FLG_PACKED = 0, - GIF_FLG_REGLIST = 1, - GIF_FLG_IMAGE = 2, - GIF_FLG_IMAGE2 = 3 + GIF_FLG_PACKED = 0, + GIF_FLG_REGLIST = 1, + GIF_FLG_IMAGE = 2, + GIF_FLG_IMAGE2 = 3, }; enum GS_PSM { - PSM_PSMCT32 = 0, // 0000-0000 - PSM_PSMCT24 = 1, // 0000-0001 - PSM_PSMCT16 = 2, // 0000-0010 - PSM_PSMCT16S = 10, // 0000-1010 - PSM_PSGPU24 = 18, // 0001-0010 - PSM_PSMT8 = 19, // 0001-0011 - PSM_PSMT4 = 20, // 0001-0100 - PSM_PSMT8H = 27, // 0001-1011 - PSM_PSMT4HL = 36, // 0010-0100 - PSM_PSMT4HH = 44, // 0010-1100 - PSM_PSMZ32 = 48, // 0011-0000 - PSM_PSMZ24 = 49, // 0011-0001 - PSM_PSMZ16 = 50, // 0011-0010 - PSM_PSMZ16S = 58, // 0011-1010 + PSM_PSMCT32 = 0, // 0000-0000 + PSM_PSMCT24 = 1, // 0000-0001 + PSM_PSMCT16 = 2, // 0000-0010 + PSM_PSMCT16S = 10, // 0000-1010 + PSM_PSGPU24 = 18, // 0001-0010 + PSM_PSMT8 = 19, // 0001-0011 + PSM_PSMT4 = 20, // 0001-0100 + PSM_PSMT8H = 27, // 0001-1011 + PSM_PSMT4HL = 36, // 0010-0100 + PSM_PSMT4HH = 44, // 0010-1100 + PSM_PSMZ32 = 48, // 0011-0000 + PSM_PSMZ24 = 49, // 0011-0001 + PSM_PSMZ16 = 50, // 0011-0010 + PSM_PSMZ16S = 58, // 0011-1010 }; enum GS_TFX { - TFX_MODULATE = 0, - TFX_DECAL = 1, - TFX_HIGHLIGHT = 2, - TFX_HIGHLIGHT2 = 3, - TFX_NONE = 4, + TFX_MODULATE = 0, + TFX_DECAL = 1, + TFX_HIGHLIGHT = 2, + TFX_HIGHLIGHT2 = 3, + TFX_NONE = 4, }; enum GS_CLAMP { - CLAMP_REPEAT = 0, - CLAMP_CLAMP = 1, - CLAMP_REGION_CLAMP = 2, - CLAMP_REGION_REPEAT = 3, + CLAMP_REPEAT = 0, + CLAMP_CLAMP = 1, + CLAMP_REGION_CLAMP = 2, + CLAMP_REGION_REPEAT = 3, }; enum GS_ZTST { - ZTST_NEVER = 0, - ZTST_ALWAYS = 1, - ZTST_GEQUAL = 2, - ZTST_GREATER = 3, + ZTST_NEVER = 0, + ZTST_ALWAYS = 1, + ZTST_GEQUAL = 2, + ZTST_GREATER = 3, }; enum GS_ATST { - ATST_NEVER = 0, - ATST_ALWAYS = 1, - ATST_LESS = 2, - ATST_LEQUAL = 3, - ATST_EQUAL = 4, - ATST_GEQUAL = 5, - ATST_GREATER = 6, - ATST_NOTEQUAL = 7, + ATST_NEVER = 0, + ATST_ALWAYS = 1, + ATST_LESS = 2, + ATST_LEQUAL = 3, + ATST_EQUAL = 4, + ATST_GEQUAL = 5, + ATST_GREATER = 6, + ATST_NOTEQUAL = 7, }; enum GS_AFAIL { - AFAIL_KEEP = 0, - AFAIL_FB_ONLY = 1, - AFAIL_ZB_ONLY = 2, - AFAIL_RGB_ONLY = 3, + AFAIL_KEEP = 0, + AFAIL_FB_ONLY = 1, + AFAIL_ZB_ONLY = 2, + AFAIL_RGB_ONLY = 3, }; enum class GS_MIN_FILTER : uint8_t @@ -249,28 +249,28 @@ enum class GSRendererType : int8_t #define REG32(name) \ -union name \ -{ \ - uint32 u32; \ - struct { \ + union name \ + { \ + uint32 u32; \ + struct { \ -#define REG64(name) \ -union name \ -{ \ - uint64 u64; \ - uint32 u32[2]; \ - void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} \ - bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \ - bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \ - operator GSVector4i() const {return GSVector4i::loadl(this);} \ - struct { \ +#define REG64(name) \ + union name \ + { \ + uint64 u64; \ + uint32 u32[2]; \ + void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} \ + bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \ + bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \ + operator GSVector4i() const {return GSVector4i::loadl(this);} \ + struct { -#define REG128(name)\ -union name \ -{ \ - uint64 u64[2]; \ - uint32 u32[4]; \ - struct { \ +#define REG128(name) \ + union name \ + { \ + uint64 u64[2]; \ + uint32 u32[4]; \ + struct { #define REG32_(prefix, name) REG32(prefix##name) #define REG64_(prefix, name) REG64(prefix##name) @@ -280,22 +280,22 @@ union name \ #define REG_END2 }; #define REG32_SET(name) \ -union name \ -{ \ - uint32 u32; \ +union name \ +{ \ + uint32 u32; #define REG64_SET(name) \ -union name \ -{ \ - uint64 u64; \ - uint32 u32[2]; \ + union name \ + { \ + uint64 u64; \ + uint32 u32[2]; -#define REG128_SET(name)\ -union name \ -{ \ - __m128i m128; \ - uint64 u64[2]; \ - uint32 u32[4]; \ +#define REG128_SET(name) \ + union name \ + { \ + __m128i m128; \ + uint64 u64[2]; \ + uint32 u32[4]; #define REG_SET_END }; @@ -307,108 +307,108 @@ REG64_(GSReg, BGCOLOR) REG_END REG64_(GSReg, BUSDIR) - uint32 DIR:1; - uint32 _PAD1:31; - uint32 _PAD2:32; + uint32 DIR : 1; + uint32 _PAD1 : 31; + uint32 _PAD2 : 32; REG_END REG64_(GSReg, CSR) - uint32 rSIGNAL:1; - uint32 rFINISH:1; - uint32 rHSINT:1; - uint32 rVSINT:1; - uint32 rEDWINT:1; - uint32 rZERO1:1; - uint32 rZERO2:1; - uint32 r_PAD1:1; - uint32 rFLUSH:1; - uint32 rRESET:1; - uint32 r_PAD2:2; - uint32 rNFIELD:1; - uint32 rFIELD:1; - uint32 rFIFO:2; - uint32 rREV:8; - uint32 rID:8; - uint32 wSIGNAL:1; - uint32 wFINISH:1; - uint32 wHSINT:1; - uint32 wVSINT:1; - uint32 wEDWINT:1; - uint32 wZERO1:1; - uint32 wZERO2:1; - uint32 w_PAD1:1; - uint32 wFLUSH:1; - uint32 wRESET:1; - uint32 w_PAD2:2; - uint32 wNFIELD:1; - uint32 wFIELD:1; - uint32 wFIFO:2; - uint32 wREV:8; - uint32 wID:8; + uint32 rSIGNAL : 1; + uint32 rFINISH : 1; + uint32 rHSINT : 1; + uint32 rVSINT : 1; + uint32 rEDWINT : 1; + uint32 rZERO1 : 1; + uint32 rZERO2 : 1; + uint32 r_PAD1 : 1; + uint32 rFLUSH : 1; + uint32 rRESET : 1; + uint32 r_PAD2 : 2; + uint32 rNFIELD : 1; + uint32 rFIELD : 1; + uint32 rFIFO : 2; + uint32 rREV : 8; + uint32 rID : 8; + uint32 wSIGNAL : 1; + uint32 wFINISH : 1; + uint32 wHSINT : 1; + uint32 wVSINT : 1; + uint32 wEDWINT : 1; + uint32 wZERO1 : 1; + uint32 wZERO2 : 1; + uint32 w_PAD1 : 1; + uint32 wFLUSH : 1; + uint32 wRESET : 1; + uint32 w_PAD2 : 2; + uint32 wNFIELD : 1; + uint32 wFIELD : 1; + uint32 wFIFO : 2; + uint32 wREV : 8; + uint32 wID : 8; REG_END REG64_(GSReg, DISPFB) // (-1/2) - uint32 FBP:9; - uint32 FBW:6; - uint32 PSM:5; - uint32 _PAD:12; - uint32 DBX:11; - uint32 DBY:11; - uint32 _PAD2:10; + uint32 FBP : 9; + uint32 FBW : 6; + uint32 PSM : 5; + uint32 _PAD : 12; + uint32 DBX : 11; + uint32 DBY : 11; + uint32 _PAD2 : 10; REG_END2 - uint32 Block() const {return FBP << 5;} + uint32 Block() const { return FBP << 5; } REG_END2 REG64_(GSReg, DISPLAY) // (-1/2) - uint32 DX:12; - uint32 DY:11; - uint32 MAGH:4; - uint32 MAGV:2; - uint32 _PAD:3; - uint32 DW:12; - uint32 DH:11; - uint32 _PAD2:9; + uint32 DX : 12; + uint32 DY : 11; + uint32 MAGH : 4; + uint32 MAGV : 2; + uint32 _PAD : 3; + uint32 DW : 12; + uint32 DH : 11; + uint32 _PAD2 : 9; REG_END REG64_(GSReg, EXTBUF) - uint32 EXBP:14; - uint32 EXBW:6; - uint32 FBIN:2; - uint32 WFFMD:1; - uint32 EMODA:2; - uint32 EMODC:2; - uint32 _PAD1:5; - uint32 WDX:11; - uint32 WDY:11; - uint32 _PAD2:10; + uint32 EXBP : 14; + uint32 EXBW : 6; + uint32 FBIN : 2; + uint32 WFFMD : 1; + uint32 EMODA : 2; + uint32 EMODC : 2; + uint32 _PAD1 : 5; + uint32 WDX : 11; + uint32 WDY : 11; + uint32 _PAD2 : 10; REG_END REG64_(GSReg, EXTDATA) - uint32 SX:12; - uint32 SY:11; - uint32 SMPH:4; - uint32 SMPV:2; - uint32 _PAD1:3; - uint32 WW:12; - uint32 WH:11; - uint32 _PAD2:9; + uint32 SX : 12; + uint32 SY : 11; + uint32 SMPH : 4; + uint32 SMPV : 2; + uint32 _PAD1 : 3; + uint32 WW : 12; + uint32 WH : 11; + uint32 _PAD2 : 9; REG_END REG64_(GSReg, EXTWRITE) - uint32 WRITE:1; - uint32 _PAD1:31; - uint32 _PAD2:32; + uint32 WRITE : 1; + uint32 _PAD1 : 31; + uint32 _PAD2 : 32; REG_END REG64_(GSReg, IMR) - uint32 _PAD1:8; - uint32 SIGMSK:1; - uint32 FINISHMSK:1; - uint32 HSMSK:1; - uint32 VSMSK:1; - uint32 EDWMSK:1; - uint32 _PAD2:19; - uint32 _PAD3:32; + uint32 _PAD1 : 8; + uint32 SIGMSK : 1; + uint32 FINISHMSK : 1; + uint32 HSMSK : 1; + uint32 VSMSK : 1; + uint32 EDWMSK : 1; + uint32 _PAD2 : 19; + uint32 _PAD3 : 32; REG_END REG64_(GSReg, PMODE) @@ -416,22 +416,22 @@ union { struct { - uint32 EN1:1; - uint32 EN2:1; - uint32 CRTMD:3; - uint32 MMOD:1; - uint32 AMOD:1; - uint32 SLBG:1; - uint32 ALP:8; - uint32 _PAD:16; - uint32 _PAD1:32; + uint32 EN1 : 1; + uint32 EN2 : 1; + uint32 CRTMD : 3; + uint32 MMOD : 1; + uint32 AMOD : 1; + uint32 SLBG : 1; + uint32 ALP : 8; + uint32 _PAD : 16; + uint32 _PAD1 : 32; }; struct { - uint32 EN:2; - uint32 _PAD2:30; - uint32 _PAD3:32; + uint32 EN : 2; + uint32 _PAD2 : 30; + uint32 _PAD3 : 32; }; }; REG_END @@ -442,28 +442,28 @@ REG64_(GSReg, SIGLBLID) REG_END REG64_(GSReg, SMODE1) - uint32 RC:3; - uint32 LC:7; - uint32 T1248:2; - uint32 SLCK:1; - uint32 CMOD:2; - uint32 EX:1; - uint32 PRST:1; - uint32 SINT:1; - uint32 XPCK:1; - uint32 PCK2:2; - uint32 SPML:4; - uint32 GCONT:1; // YCrCb - uint32 PHS:1; - uint32 PVS:1; - uint32 PEHS:1; - uint32 PEVS:1; - uint32 CLKSEL:2; - uint32 NVCK:1; - uint32 SLCK2:1; - uint32 VCKSEL:2; - uint32 VHP:1; - uint32 _PAD1:27; + uint32 RC : 3; + uint32 LC : 7; + uint32 T1248 : 2; + uint32 SLCK : 1; + uint32 CMOD : 2; + uint32 EX : 1; + uint32 PRST : 1; + uint32 SINT : 1; + uint32 XPCK : 1; + uint32 PCK2 : 2; + uint32 SPML : 4; + uint32 GCONT : 1; // YCrCb + uint32 PHS : 1; + uint32 PVS : 1; + uint32 PEHS : 1; + uint32 PEVS : 1; + uint32 CLKSEL : 2; + uint32 NVCK : 1; + uint32 SLCK2 : 1; + uint32 VCKSEL : 2; + uint32 VHP : 1; + uint32 _PAD1 : 27; REG_END /* @@ -483,11 +483,11 @@ CLKSEL=1 CMOD=0 EX=0 GCONT=0 LC=32 NVCK=1 PCK2=0 PEHS=0 PEVS=0 PHS=0 PRST=1 PVS= */ REG64_(GSReg, SMODE2) - uint32 INT:1; - uint32 FFMD:1; - uint32 DPMS:2; - uint32 _PAD2:28; - uint32 _PAD3:32; + uint32 INT : 1; + uint32 FFMD : 1; + uint32 DPMS : 2; + uint32 _PAD2 : 28; + uint32 _PAD3 : 32; REG_END REG64_(GSReg, SRFSH) @@ -506,75 +506,75 @@ REG64_(GSReg, SYNCH2) REG_END REG64_(GSReg, SYNCV) - uint32 VFP:10; // Vertical Front Porchinterval (?s) - uint32 VFPE:10; // Vertical Front Porchinterval End (?s) - uint32 VBP:12; // Vertical Back Porchinterval (?s) - uint32 VBPE:10; // Vertical Back Porchinterval End (?s) - uint32 VDP:11; // Vertical Differential Phase - uint32 VS:11; // Vertical Synchronization Timing + uint32 VFP : 10; // Vertical Front Porchinterval (?s) + uint32 VFPE : 10; // Vertical Front Porchinterval End (?s) + uint32 VBP : 12; // Vertical Back Porchinterval (?s) + uint32 VBPE : 10; // Vertical Back Porchinterval End (?s) + uint32 VDP : 11; // Vertical Differential Phase + uint32 VS : 11; // Vertical Synchronization Timing REG_END REG64_SET(GSReg) - GSRegBGCOLOR BGCOLOR; - GSRegBUSDIR BUSDIR; - GSRegCSR CSR; - GSRegDISPFB DISPFB; - GSRegDISPLAY DISPLAY; - GSRegEXTBUF EXTBUF; - GSRegEXTDATA EXTDATA; - GSRegEXTWRITE EXTWRITE; - GSRegIMR IMR; - GSRegPMODE PMODE; - GSRegSIGLBLID SIGLBLID; - GSRegSMODE1 SMODE1; - GSRegSMODE2 SMODE2; + GSRegBGCOLOR BGCOLOR; + GSRegBUSDIR BUSDIR; + GSRegCSR CSR; + GSRegDISPFB DISPFB; + GSRegDISPLAY DISPLAY; + GSRegEXTBUF EXTBUF; + GSRegEXTDATA EXTDATA; + GSRegEXTWRITE EXTWRITE; + GSRegIMR IMR; + GSRegPMODE PMODE; + GSRegSIGLBLID SIGLBLID; + GSRegSMODE1 SMODE1; + GSRegSMODE2 SMODE2; REG_SET_END // // GIFTag REG128(GIFTag) - uint32 NLOOP:15; - uint32 EOP:1; - uint32 _PAD1:16; - uint32 _PAD2:14; - uint32 PRE:1; - uint32 PRIM:11; - uint32 FLG:2; // enum GIF_FLG - uint32 NREG:4; + uint32 NLOOP : 15; + uint32 EOP : 1; + uint32 _PAD1 : 16; + uint32 _PAD2 : 14; + uint32 PRE : 1; + uint32 PRIM : 11; + uint32 FLG : 2; // enum GIF_FLG + uint32 NREG : 4; uint64 REGS; REG_END // GIFReg REG64_(GIFReg, ALPHA) - uint32 A:2; - uint32 B:2; - uint32 C:2; - uint32 D:2; - uint32 _PAD1:24; + uint32 A : 2; + uint32 B : 2; + uint32 C : 2; + uint32 D : 2; + uint32 _PAD1 : 24; uint8 FIX; uint8 _PAD2[3]; REG_END2 // opaque => output will be Cs/As - __forceinline bool IsOpaque() const {return ((A == B || (C == 2 && FIX == 0)) && D == 0) || (A == 0 && B == D && C == 2 && FIX == 0x80);} - __forceinline bool IsOpaque(int amin, int amax) const {return ((A == B || amax == 0) && D == 0) || (A == 0 && B == D && amin == 0x80 && amax == 0x80);} - __forceinline bool IsCd() { return (A == B) && (D == 1);} + __forceinline bool IsOpaque() const { return ((A == B || (C == 2 && FIX == 0)) && D == 0) || (A == 0 && B == D && C == 2 && FIX == 0x80); } + __forceinline bool IsOpaque(int amin, int amax) const { return ((A == B || amax == 0) && D == 0) || (A == 0 && B == D && amin == 0x80 && amax == 0x80); } + __forceinline bool IsCd() { return (A == B) && (D == 1); } REG_END2 REG64_(GIFReg, BITBLTBUF) - uint32 SBP:14; - uint32 _PAD1:2; - uint32 SBW:6; - uint32 _PAD2:2; - uint32 SPSM:6; - uint32 _PAD3:2; - uint32 DBP:14; - uint32 _PAD4:2; - uint32 DBW:6; - uint32 _PAD5:2; - uint32 DPSM:6; - uint32 _PAD6:2; + uint32 SBP : 14; + uint32 _PAD1 : 2; + uint32 SBW : 6; + uint32 _PAD2 : 2; + uint32 SPSM : 6; + uint32 _PAD3 : 2; + uint32 DBP : 14; + uint32 _PAD4 : 2; + uint32 DBW : 6; + uint32 _PAD5 : 2; + uint32 DPSM : 6; + uint32 _PAD6 : 2; REG_END REG64_(GIFReg, CLAMP) @@ -582,76 +582,76 @@ union { struct { - uint32 WMS:2; - uint32 WMT:2; - uint32 MINU:10; - uint32 MAXU:10; - uint32 _PAD1:8; - uint32 _PAD2:2; - uint32 MAXV:10; - uint32 _PAD3:20; + uint32 WMS : 2; + uint32 WMT : 2; + uint32 MINU : 10; + uint32 MAXU : 10; + uint32 _PAD1 : 8; + uint32 _PAD2 : 2; + uint32 MAXV : 10; + uint32 _PAD3 : 20; }; struct { - uint64 _PAD4:24; - uint64 MINV:10; - uint64 _PAD5:30; + uint64 _PAD4 : 24; + uint64 MINV : 10; + uint64 _PAD5 : 30; }; }; REG_END REG64_(GIFReg, COLCLAMP) - uint32 CLAMP:1; - uint32 _PAD1:31; - uint32 _PAD2:32; + uint32 CLAMP : 1; + uint32 _PAD1 : 31; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, DIMX) - int32 DM00:3; - int32 _PAD00:1; - int32 DM01:3; - int32 _PAD01:1; - int32 DM02:3; - int32 _PAD02:1; - int32 DM03:3; - int32 _PAD03:1; - int32 DM10:3; - int32 _PAD10:1; - int32 DM11:3; - int32 _PAD11:1; - int32 DM12:3; - int32 _PAD12:1; - int32 DM13:3; - int32 _PAD13:1; - int32 DM20:3; - int32 _PAD20:1; - int32 DM21:3; - int32 _PAD21:1; - int32 DM22:3; - int32 _PAD22:1; - int32 DM23:3; - int32 _PAD23:1; - int32 DM30:3; - int32 _PAD30:1; - int32 DM31:3; - int32 _PAD31:1; - int32 DM32:3; - int32 _PAD32:1; - int32 DM33:3; - int32 _PAD33:1; + int32 DM00 : 3; + int32 _PAD00 : 1; + int32 DM01 : 3; + int32 _PAD01 : 1; + int32 DM02 : 3; + int32 _PAD02 : 1; + int32 DM03 : 3; + int32 _PAD03 : 1; + int32 DM10 : 3; + int32 _PAD10 : 1; + int32 DM11 : 3; + int32 _PAD11 : 1; + int32 DM12 : 3; + int32 _PAD12 : 1; + int32 DM13 : 3; + int32 _PAD13 : 1; + int32 DM20 : 3; + int32 _PAD20 : 1; + int32 DM21 : 3; + int32 _PAD21 : 1; + int32 DM22 : 3; + int32 _PAD22 : 1; + int32 DM23 : 3; + int32 _PAD23 : 1; + int32 DM30 : 3; + int32 _PAD30 : 1; + int32 DM31 : 3; + int32 _PAD31 : 1; + int32 DM32 : 3; + int32 _PAD32 : 1; + int32 DM33 : 3; + int32 _PAD33 : 1; REG_END REG64_(GIFReg, DTHE) - uint32 DTHE:1; - uint32 _PAD1:31; - uint32 _PAD2:32; + uint32 DTHE : 1; + uint32 _PAD1 : 31; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, FBA) - uint32 FBA:1; - uint32 _PAD1:31; - uint32 _PAD2:32; + uint32 FBA : 1; + uint32 _PAD1 : 31; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, FINISH) @@ -671,15 +671,15 @@ REG64_(GIFReg, FOGCOL) REG_END REG64_(GIFReg, FRAME) - uint32 FBP:9; - uint32 _PAD1:7; - uint32 FBW:6; - uint32 _PAD2:2; - uint32 PSM:6; - uint32 _PAD3:2; + uint32 FBP : 9; + uint32 _PAD1 : 7; + uint32 FBW : 6; + uint32 _PAD2 : 2; + uint32 PSM : 6; + uint32 _PAD3 : 2; uint32 FBMSK; REG_END2 - uint32 Block() const {return FBP << 5;} + uint32 Block() const { return FBP << 5; } REG_END2 REG64_(GIFReg, HWREG) @@ -693,23 +693,23 @@ REG64_(GIFReg, LABEL) REG_END REG64_(GIFReg, MIPTBP1) - uint64 TBP1:14; - uint64 TBW1:6; - uint64 TBP2:14; - uint64 TBW2:6; - uint64 TBP3:14; - uint64 TBW3:6; - uint64 _PAD:4; + uint64 TBP1 : 14; + uint64 TBW1 : 6; + uint64 TBP2 : 14; + uint64 TBW2 : 6; + uint64 TBP3 : 14; + uint64 TBW3 : 6; + uint64 _PAD : 4; REG_END REG64_(GIFReg, MIPTBP2) - uint64 TBP4:14; - uint64 TBW4:6; - uint64 TBP5:14; - uint64 TBW5:6; - uint64 TBP6:14; - uint64 TBW6:6; - uint64 _PAD:4; + uint64 TBP4 : 14; + uint64 TBW4 : 6; + uint64 TBP5 : 14; + uint64 TBW5 : 6; + uint64 TBP6 : 14; + uint64 TBW6 : 6; + uint64 _PAD : 4; REG_END REG64_(GIFReg, NOP) @@ -717,43 +717,43 @@ REG64_(GIFReg, NOP) REG_END REG64_(GIFReg, PABE) - uint32 PABE:1; - uint32 _PAD1:31; - uint32 _PAD2:32; + uint32 PABE : 1; + uint32 _PAD1 : 31; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, PRIM) - uint32 PRIM:3; - uint32 IIP:1; - uint32 TME:1; - uint32 FGE:1; - uint32 ABE:1; - uint32 AA1:1; - uint32 FST:1; - uint32 CTXT:1; - uint32 FIX:1; - uint32 _PAD1:21; - uint32 _PAD2:32; + uint32 PRIM : 3; + uint32 IIP : 1; + uint32 TME : 1; + uint32 FGE : 1; + uint32 ABE : 1; + uint32 AA1 : 1; + uint32 FST : 1; + uint32 CTXT : 1; + uint32 FIX : 1; + uint32 _PAD1 : 21; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, PRMODE) - uint32 _PRIM:3; - uint32 IIP:1; - uint32 TME:1; - uint32 FGE:1; - uint32 ABE:1; - uint32 AA1:1; - uint32 FST:1; - uint32 CTXT:1; - uint32 FIX:1; - uint32 _PAD2:21; - uint32 _PAD3:32; + uint32 _PRIM : 3; + uint32 IIP : 1; + uint32 TME : 1; + uint32 FGE : 1; + uint32 ABE : 1; + uint32 AA1 : 1; + uint32 FST : 1; + uint32 CTXT : 1; + uint32 FIX : 1; + uint32 _PAD2 : 21; + uint32 _PAD3 : 32; REG_END REG64_(GIFReg, PRMODECONT) - uint32 AC:1; - uint32 _PAD1:31; - uint32 _PAD2:32; + uint32 AC : 1; + uint32 _PAD1 : 31; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, RGBAQ) @@ -765,20 +765,20 @@ REG64_(GIFReg, RGBAQ) REG_END REG64_(GIFReg, SCANMSK) - uint32 MSK:2; - uint32 _PAD1:30; - uint32 _PAD2:32; + uint32 MSK : 2; + uint32 _PAD1 : 30; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, SCISSOR) - uint32 SCAX0:11; - uint32 _PAD1:5; - uint32 SCAX1:11; - uint32 _PAD2:5; - uint32 SCAY0:11; - uint32 _PAD3:5; - uint32 SCAY1:11; - uint32 _PAD4:5; + uint32 SCAX0 : 11; + uint32 _PAD1 : 5; + uint32 SCAX1 : 11; + uint32 _PAD2 : 5; + uint32 SCAY0 : 11; + uint32 _PAD3 : 5; + uint32 SCAY1 : 11; + uint32 _PAD4 : 5; REG_END REG64_(GIFReg, SIGNAL) @@ -792,20 +792,20 @@ REG64_(GIFReg, ST) REG_END REG64_(GIFReg, TEST) - uint32 ATE:1; - uint32 ATST:3; - uint32 AREF:8; - uint32 AFAIL:2; - uint32 DATE:1; - uint32 DATM:1; - uint32 ZTE:1; - uint32 ZTST:2; - uint32 _PAD1:13; - uint32 _PAD2:32; + uint32 ATE : 1; + uint32 ATST : 3; + uint32 AREF : 8; + uint32 AFAIL : 2; + uint32 DATE : 1; + uint32 DATM : 1; + uint32 ZTE : 1; + uint32 ZTST : 2; + uint32 _PAD1 : 13; + uint32 _PAD2 : 32; REG_END2 - __forceinline bool DoFirstPass() const {return !ATE || ATST != ATST_NEVER;} // not all pixels fail automatically - __forceinline bool DoSecondPass() const {return ATE && ATST != ATST_ALWAYS && AFAIL != AFAIL_KEEP;} // pixels may fail, write fb/z - __forceinline bool NoSecondPass() const {return ATE && ATST != ATST_ALWAYS && AFAIL == AFAIL_KEEP;} // pixels may fail, no output + __forceinline bool DoFirstPass() const { return !ATE || ATST != ATST_NEVER; } // not all pixels fail automatically + __forceinline bool DoSecondPass() const { return ATE && ATST != ATST_ALWAYS && AFAIL != AFAIL_KEEP; } // pixels may fail, write fb/z + __forceinline bool NoSecondPass() const { return ATE && ATST != ATST_ALWAYS && AFAIL == AFAIL_KEEP; } // pixels may fail, no output REG_END2 REG64_(GIFReg, TEX0) @@ -813,35 +813,37 @@ union { struct { - uint32 TBP0:14; - uint32 TBW:6; - uint32 PSM:6; - uint32 TW:4; - uint32 _PAD1:2; - uint32 _PAD2:2; - uint32 TCC:1; - uint32 TFX:2; - uint32 CBP:14; - uint32 CPSM:4; - uint32 CSM:1; - uint32 CSA:5; - uint32 CLD:3; + uint32 TBP0 : 14; + uint32 TBW : 6; + uint32 PSM : 6; + uint32 TW : 4; + uint32 _PAD1 : 2; + uint32 _PAD2 : 2; + uint32 TCC : 1; + uint32 TFX : 2; + uint32 CBP : 14; + uint32 CPSM : 4; + uint32 CSM : 1; + uint32 CSA : 5; + uint32 CLD : 3; }; struct { - uint64 _PAD3:30; - uint64 TH:4; - uint64 _PAD4:30; + uint64 _PAD3 : 30; + uint64 TH : 4; + uint64 _PAD4 : 30; }; }; REG_END2 __forceinline bool IsRepeating() const { - if(TBW < 2) + if (TBW < 2) { - if(PSM == PSM_PSMT8) return TW > 7 || TH > 6; - if(PSM == PSM_PSMT4) return TW > 7 || TH > 7; + if (PSM == PSM_PSMT8) + return TW > 7 || TH > 6; + if (PSM == PSM_PSMT4) + return TW > 7 || TH > 7; } // The recast of TBW seems useless but it avoid tons of warning from GCC... @@ -850,97 +852,97 @@ REG_END2 REG_END2 REG64_(GIFReg, TEX1) - uint32 LCM:1; - uint32 _PAD1:1; - uint32 MXL:3; - uint32 MMAG:1; - uint32 MMIN:3; - uint32 MTBA:1; - uint32 _PAD2:9; - uint32 L:2; - uint32 _PAD3:11; - int32 K:12; // 1:7:4 - uint32 _PAD4:20; + uint32 LCM : 1; + uint32 _PAD1 : 1; + uint32 MXL : 3; + uint32 MMAG : 1; + uint32 MMIN : 3; + uint32 MTBA : 1; + uint32 _PAD2 : 9; + uint32 L : 2; + uint32 _PAD3 : 11; + int32 K : 12; // 1:7:4 + uint32 _PAD4 : 20; REG_END2 - bool IsMinLinear() const {return (MMIN == 1) || (MMIN & 4);} - bool IsMagLinear() const {return MMAG;} + bool IsMinLinear() const { return (MMIN == 1) || (MMIN & 4); } + bool IsMagLinear() const { return MMAG; } REG_END2 REG64_(GIFReg, TEX2) - uint32 _PAD1:20; - uint32 PSM:6; - uint32 _PAD2:6; - uint32 _PAD3:5; - uint32 CBP:14; - uint32 CPSM:4; - uint32 CSM:1; - uint32 CSA:5; - uint32 CLD:3; + uint32 _PAD1 : 20; + uint32 PSM : 6; + uint32 _PAD2 : 6; + uint32 _PAD3 : 5; + uint32 CBP : 14; + uint32 CPSM : 4; + uint32 CSM : 1; + uint32 CSA : 5; + uint32 CLD : 3; REG_END REG64_(GIFReg, TEXA) uint8 TA0; - uint8 _PAD1:7; - uint8 AEM:1; + uint8 _PAD1 : 7; + uint8 AEM : 1; uint16 _PAD2; - uint8 TA1:8; + uint8 TA1 : 8; uint8 _PAD3[3]; REG_END REG64_(GIFReg, TEXCLUT) - uint32 CBW:6; - uint32 COU:6; - uint32 COV:10; - uint32 _PAD1:10; - uint32 _PAD2:32; + uint32 CBW : 6; + uint32 COU : 6; + uint32 COV : 10; + uint32 _PAD1 : 10; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, TEXFLUSH) - uint32 _PAD1:32; - uint32 _PAD2:32; + uint32 _PAD1 : 32; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, TRXDIR) - uint32 XDIR:2; - uint32 _PAD1:30; - uint32 _PAD2:32; + uint32 XDIR : 2; + uint32 _PAD1 : 30; + uint32 _PAD2 : 32; REG_END REG64_(GIFReg, TRXPOS) - uint32 SSAX:11; - uint32 _PAD1:5; - uint32 SSAY:11; - uint32 _PAD2:5; - uint32 DSAX:11; - uint32 _PAD3:5; - uint32 DSAY:11; - uint32 DIRY:1; - uint32 DIRX:1; - uint32 _PAD4:3; + uint32 SSAX : 11; + uint32 _PAD1 : 5; + uint32 SSAY : 11; + uint32 _PAD2 : 5; + uint32 DSAX : 11; + uint32 _PAD3 : 5; + uint32 DSAY : 11; + uint32 DIRY : 1; + uint32 DIRX : 1; + uint32 _PAD4 : 3; REG_END REG64_(GIFReg, TRXREG) - uint32 RRW:12; - uint32 _PAD1:20; - uint32 RRH:12; - uint32 _PAD2:20; + uint32 RRW : 12; + uint32 _PAD1 : 20; + uint32 RRH : 12; + uint32 _PAD2 : 20; REG_END // GSState::GIFPackedRegHandlerUV and GSState::GIFRegHandlerUV will make sure that the _PAD1/2 bits are set to zero REG64_(GIFReg, UV) uint16 U; -// uint32 _PAD1:2; + // uint32 _PAD1 : 2; uint16 V; -// uint32 _PAD2:2; + // uint32 _PAD2 : 2; uint32 _PAD3; REG_END // GSState::GIFRegHandlerXYOFFSET will make sure that the _PAD1/2 bits are set to zero REG64_(GIFReg, XYOFFSET) - uint32 OFX; // :16; uint32 _PAD1:16; - uint32 OFY; // :16; uint32 _PAD2:16; + uint32 OFX; // : 16; uint32 _PAD1 : 16; + uint32 OFY; // : 16; uint32 _PAD2 : 16; REG_END REG64_(GIFReg, XYZ) @@ -952,71 +954,71 @@ REG_END REG64_(GIFReg, XYZF) uint16 X; uint16 Y; - uint32 Z:24; - uint32 F:8; + uint32 Z : 24; + uint32 F : 8; REG_END REG64_(GIFReg, ZBUF) - uint32 ZBP:9; - uint32 _PAD1:15; - // uint32 PSM:4; - // uint32 _PAD2:4; - uint32 PSM:6; - uint32 _PAD2:2; - uint32 ZMSK:1; - uint32 _PAD3:31; + uint32 ZBP : 9; + uint32 _PAD1 : 15; + // uint32 PSM : 4; + // uint32 _PAD2 : 4; + uint32 PSM : 6; + uint32 _PAD2 : 2; + uint32 ZMSK : 1; + uint32 _PAD3 : 31; REG_END2 - uint32 Block() const {return ZBP << 5;} + uint32 Block() const { return ZBP << 5; } REG_END2 REG64_SET(GIFReg) - GIFRegALPHA ALPHA; - GIFRegBITBLTBUF BITBLTBUF; - GIFRegCLAMP CLAMP; - GIFRegCOLCLAMP COLCLAMP; - GIFRegDIMX DIMX; - GIFRegDTHE DTHE; - GIFRegFBA FBA; - GIFRegFINISH FINISH; - GIFRegFOG FOG; - GIFRegFOGCOL FOGCOL; - GIFRegFRAME FRAME; - GIFRegHWREG HWREG; - GIFRegLABEL LABEL; - GIFRegMIPTBP1 MIPTBP1; - GIFRegMIPTBP2 MIPTBP2; - GIFRegNOP NOP; - GIFRegPABE PABE; - GIFRegPRIM PRIM; - GIFRegPRMODE PRMODE; - GIFRegPRMODECONT PRMODECONT; - GIFRegRGBAQ RGBAQ; - GIFRegSCANMSK SCANMSK; - GIFRegSCISSOR SCISSOR; - GIFRegSIGNAL SIGNAL; - GIFRegST ST; - GIFRegTEST TEST; - GIFRegTEX0 TEX0; - GIFRegTEX1 TEX1; - GIFRegTEX2 TEX2; - GIFRegTEXA TEXA; - GIFRegTEXCLUT TEXCLUT; - GIFRegTEXFLUSH TEXFLUSH; - GIFRegTRXDIR TRXDIR; - GIFRegTRXPOS TRXPOS; - GIFRegTRXREG TRXREG; - GIFRegUV UV; - GIFRegXYOFFSET XYOFFSET; - GIFRegXYZ XYZ; - GIFRegXYZF XYZF; - GIFRegZBUF ZBUF; + GIFRegALPHA ALPHA; + GIFRegBITBLTBUF BITBLTBUF; + GIFRegCLAMP CLAMP; + GIFRegCOLCLAMP COLCLAMP; + GIFRegDIMX DIMX; + GIFRegDTHE DTHE; + GIFRegFBA FBA; + GIFRegFINISH FINISH; + GIFRegFOG FOG; + GIFRegFOGCOL FOGCOL; + GIFRegFRAME FRAME; + GIFRegHWREG HWREG; + GIFRegLABEL LABEL; + GIFRegMIPTBP1 MIPTBP1; + GIFRegMIPTBP2 MIPTBP2; + GIFRegNOP NOP; + GIFRegPABE PABE; + GIFRegPRIM PRIM; + GIFRegPRMODE PRMODE; + GIFRegPRMODECONT PRMODECONT; + GIFRegRGBAQ RGBAQ; + GIFRegSCANMSK SCANMSK; + GIFRegSCISSOR SCISSOR; + GIFRegSIGNAL SIGNAL; + GIFRegST ST; + GIFRegTEST TEST; + GIFRegTEX0 TEX0; + GIFRegTEX1 TEX1; + GIFRegTEX2 TEX2; + GIFRegTEXA TEXA; + GIFRegTEXCLUT TEXCLUT; + GIFRegTEXFLUSH TEXFLUSH; + GIFRegTRXDIR TRXDIR; + GIFRegTRXPOS TRXPOS; + GIFRegTRXREG TRXREG; + GIFRegUV UV; + GIFRegXYOFFSET XYOFFSET; + GIFRegXYZ XYZ; + GIFRegXYZF XYZF; + GIFRegZBUF ZBUF; REG_SET_END // GIFPacked REG128_(GIFPacked, PRIM) - uint32 PRIM:11; - uint32 _PAD1:21; + uint32 PRIM : 11; + uint32 _PAD1 : 21; uint32 _PAD2[3]; REG_END @@ -1035,16 +1037,16 @@ REG128_(GIFPacked, STQ) float S; float T; float Q; - uint32 _PAD1:32; + uint32 _PAD1 : 32; REG_END REG128_(GIFPacked, UV) - uint32 U:14; - uint32 _PAD1:18; - uint32 V:14; - uint32 _PAD2:18; - uint32 _PAD3:32; - uint32 _PAD4:32; + uint32 U : 14; + uint32 _PAD1 : 18; + uint32 V : 14; + uint32 _PAD2 : 18; + uint32 _PAD3 : 32; + uint32 _PAD4 : 32; REG_END REG128_(GIFPacked, XYZF2) @@ -1052,16 +1054,16 @@ REG128_(GIFPacked, XYZF2) uint16 _PAD1; uint16 Y; uint16 _PAD2; - uint32 _PAD3:4; - uint32 Z:24; - uint32 _PAD4:4; - uint32 _PAD5:4; - uint32 F:8; - uint32 _PAD6:3; - uint32 ADC:1; - uint32 _PAD7:16; + uint32 _PAD3 : 4; + uint32 Z : 24; + uint32 _PAD4 : 4; + uint32 _PAD5 : 4; + uint32 F : 8; + uint32 _PAD6 : 3; + uint32 ADC : 1; + uint32 _PAD7 : 16; REG_END2 - uint32 Skip() const {return u32[3] & 0x8000;} +uint32 Skip() const { return u32[3] & 0x8000; } REG_END2 REG128_(GIFPacked, XYZ2) @@ -1070,26 +1072,26 @@ REG128_(GIFPacked, XYZ2) uint16 Y; uint16 _PAD2; uint32 Z; - uint32 _PAD3:15; - uint32 ADC:1; - uint32 _PAD4:16; + uint32 _PAD3 : 15; + uint32 ADC : 1; + uint32 _PAD4 : 16; REG_END2 - uint32 Skip() const {return u32[3] & 0x8000;} + uint32 Skip() const { return u32[3] & 0x8000; } REG_END2 REG128_(GIFPacked, FOG) uint32 _PAD1; uint32 _PAD2; uint32 _PAD3; - uint32 _PAD4:4; - uint32 F:8; - uint32 _PAD5:20; + uint32 _PAD4 : 4; + uint32 F : 8; + uint32 _PAD5 : 20; REG_END REG128_(GIFPacked, A_D) uint64 DATA; - uint8 ADDR:8; // enum GIF_A_D_REG - uint8 _PAD1[3+4]; + uint8 ADDR : 8; // enum GIF_A_D_REG + uint8 _PAD1[3 + 4]; REG_END REG128_(GIFPacked, NOP) @@ -1100,16 +1102,16 @@ REG128_(GIFPacked, NOP) REG_END REG128_SET(GIFPackedReg) - GIFReg r; - GIFPackedPRIM PRIM; - GIFPackedRGBA RGBA; - GIFPackedSTQ STQ; - GIFPackedUV UV; - GIFPackedXYZF2 XYZF2; - GIFPackedXYZ2 XYZ2; - GIFPackedFOG FOG; - GIFPackedA_D A_D; - GIFPackedNOP NOP; + GIFReg r; + GIFPackedPRIM PRIM; + GIFPackedRGBA RGBA; + GIFPackedSTQ STQ; + GIFPackedUV UV; + GIFPackedXYZF2 XYZF2; + GIFPackedXYZ2 XYZ2; + GIFPackedFOG FOG; + GIFPackedA_D A_D; + GIFPackedNOP NOP; REG_SET_END struct alignas(32) GIFPath @@ -1121,7 +1123,13 @@ struct alignas(32) GIFPath uint32 type; GSVector4i regs; - enum {TYPE_UNKNOWN, TYPE_ADONLY, TYPE_STQRGBAXYZF2, TYPE_STQRGBAXYZ2}; + enum + { + TYPE_UNKNOWN, + TYPE_ADONLY, + TYPE_STQRGBAXYZF2, + TYPE_STQRGBAXYZ2 + }; __forceinline void SetTag(const void* mem) { @@ -1137,7 +1145,8 @@ struct alignas(32) GIFPath nloop = a & 0x7fff; - if(nloop == 0) return; + if (nloop == 0) + return; GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though @@ -1147,42 +1156,71 @@ struct alignas(32) GIFPath type = TYPE_UNKNOWN; - if(tag.FLG == GIF_FLG_PACKED) + if (tag.FLG == GIF_FLG_PACKED) { - if(regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1) + if (regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1) { type = TYPE_ADONLY; } else { - switch(nreg) + switch (nreg) { - case 1: break; - case 2: break; - case 3: - if(regs.u32[0] == 0x00040102) type = TYPE_STQRGBAXYZF2; // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f) - if(regs.u32[0] == 0x00050102) type = TYPE_STQRGBAXYZ2; // GoW (has other crazy formats, like ...030503050103) - // TODO: common types with UV instead - break; - case 4: break; - case 5: break; - case 6: break; - case 7: break; - case 8: break; - case 9: - if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x00000004) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 3;} // ffx - break; - case 10: break; - case 11: break; - case 12: - if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x04010204) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 4;} // dq8 (not many, mostly 040102) - break; - case 13: break; - case 14: break; - case 15: break; - case 16: break; - default: - __assume(0); + case 1: + break; + case 2: + break; + case 3: + // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f) + if (regs.u32[0] == 0x00040102) + type = TYPE_STQRGBAXYZF2; + // GoW (has other crazy formats, like ...030503050103) + if (regs.u32[0] == 0x00050102) + type = TYPE_STQRGBAXYZ2; + // TODO: common types with UV instead + break; + case 4: + break; + case 5: + break; + case 6: + break; + case 7: + break; + case 8: + break; + case 9: + // ffx + if (regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x00000004) + { + type = TYPE_STQRGBAXYZF2; + nreg = 3; + nloop *= 3; + } + break; + case 10: + break; + case 11: + break; + case 12: + // dq8 (not many, mostly 040102) + if (regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x04010204) + { + type = TYPE_STQRGBAXYZF2; + nreg = 3; + nloop *= 4; + } + break; + case 13: + break; + case 14: + break; + case 15: + break; + case 16: + break; + default: + __assume(0); } } } @@ -1200,11 +1238,11 @@ struct alignas(32) GIFPath __forceinline bool StepReg() { - if(++reg == nreg) + if (++reg == nreg) { reg = 0; - if(--nloop == 0) + if (--nloop == 0) { return false; } @@ -1220,34 +1258,35 @@ struct GSPrivRegSet { struct { - GSRegPMODE PMODE; - uint64 _pad1; - GSRegSMODE1 SMODE1; - uint64 _pad2; - GSRegSMODE2 SMODE2; - uint64 _pad3; - GSRegSRFSH SRFSH; - uint64 _pad4; - GSRegSYNCH1 SYNCH1; - uint64 _pad5; - GSRegSYNCH2 SYNCH2; - uint64 _pad6; - GSRegSYNCV SYNCV; - uint64 _pad7; - struct { - GSRegDISPFB DISPFB; - uint64 _pad1; - GSRegDISPLAY DISPLAY; - uint64 _pad2; + GSRegPMODE PMODE; + uint64 _pad1; + GSRegSMODE1 SMODE1; + uint64 _pad2; + GSRegSMODE2 SMODE2; + uint64 _pad3; + GSRegSRFSH SRFSH; + uint64 _pad4; + GSRegSYNCH1 SYNCH1; + uint64 _pad5; + GSRegSYNCH2 SYNCH2; + uint64 _pad6; + GSRegSYNCV SYNCV; + uint64 _pad7; + struct + { + GSRegDISPFB DISPFB; + uint64 _pad1; + GSRegDISPLAY DISPLAY; + uint64 _pad2; } DISP[2]; - GSRegEXTBUF EXTBUF; - uint64 _pad8; - GSRegEXTDATA EXTDATA; - uint64 _pad9; - GSRegEXTWRITE EXTWRITE; - uint64 _pad10; - GSRegBGCOLOR BGCOLOR; - uint64 _pad11; + GSRegEXTBUF EXTBUF; + uint64 _pad8; + GSRegEXTDATA EXTDATA; + uint64 _pad9; + GSRegEXTWRITE EXTWRITE; + uint64 _pad10; + GSRegBGCOLOR BGCOLOR; + uint64 _pad11; }; uint8 _pad12[0x1000]; @@ -1257,16 +1296,16 @@ struct GSPrivRegSet { struct { - GSRegCSR CSR; - uint64 _pad13; - GSRegIMR IMR; - uint64 _pad14; - uint64 _unk1[4]; - GSRegBUSDIR BUSDIR; - uint64 _pad15; - uint64 _unk2[6]; - GSRegSIGLBLID SIGLBLID; - uint64 _pad16; + GSRegCSR CSR; + uint64 _pad13; + GSRegIMR IMR; + uint64 _pad14; + uint64 _unk1[4]; + GSRegBUSDIR BUSDIR; + uint64 _pad15; + uint64 _unk2[6]; + GSRegSIGLBLID SIGLBLID; + uint64 _pad16; }; uint8 _pad17[0x1000]; @@ -1274,116 +1313,106 @@ struct GSPrivRegSet void Dump(FILE* fp) { - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { - if (!fp) return; + if (!fp) + return; - if(i == 0 && !PMODE.EN1) continue; - if(i == 1 && !PMODE.EN2) continue; + if (i == 0 && !PMODE.EN1) + continue; + if (i == 1 && !PMODE.EN2) + continue; fprintf(fp, "DISPFB[%d] BP=%05x BW=%u PSM=%u DBX=%u DBY=%u\n", - i, - DISP[i].DISPFB.Block(), - DISP[i].DISPFB.FBW, - DISP[i].DISPFB.PSM, - DISP[i].DISPFB.DBX, - DISP[i].DISPFB.DBY - ); + i, + DISP[i].DISPFB.Block(), + DISP[i].DISPFB.FBW, + DISP[i].DISPFB.PSM, + DISP[i].DISPFB.DBX, + DISP[i].DISPFB.DBY); fprintf(fp, "DISPLAY[%d] DX=%u DY=%u DW=%u DH=%u MAGH=%u MAGV=%u\n", - i, - DISP[i].DISPLAY.DX, - DISP[i].DISPLAY.DY, - DISP[i].DISPLAY.DW, - DISP[i].DISPLAY.DH, - DISP[i].DISPLAY.MAGH, - DISP[i].DISPLAY.MAGV - ); + i, + DISP[i].DISPLAY.DX, + DISP[i].DISPLAY.DY, + DISP[i].DISPLAY.DW, + DISP[i].DISPLAY.DH, + DISP[i].DISPLAY.MAGH, + DISP[i].DISPLAY.MAGV); } fprintf(fp, "PMODE EN1=%u EN2=%u CRTMD=%u MMOD=%u AMOD=%u SLBG=%u ALP=%u\n", - PMODE.EN1, - PMODE.EN2, - PMODE.CRTMD, - PMODE.MMOD, - PMODE.AMOD, - PMODE.SLBG, - PMODE.ALP - ); + PMODE.EN1, + PMODE.EN2, + PMODE.CRTMD, + PMODE.MMOD, + PMODE.AMOD, + PMODE.SLBG, + PMODE.ALP); fprintf(fp, "SMODE1 CLKSEL=%u CMOD=%u EX=%u GCONT=%u LC=%u NVCK=%u PCK2=%u PEHS=%u PEVS=%u PHS=%u PRST=%u PVS=%u RC=%u SINT=%u SLCK=%u SLCK2=%u SPML=%u T1248=%u VCKSEL=%u VHP=%u XPCK=%u\n", - SMODE1.CLKSEL, - SMODE1.CMOD, - SMODE1.EX, - SMODE1.GCONT, - SMODE1.LC, - SMODE1.NVCK, - SMODE1.PCK2, - SMODE1.PEHS, - SMODE1.PEVS, - SMODE1.PHS, - SMODE1.PRST, - SMODE1.PVS, - SMODE1.RC, - SMODE1.SINT, - SMODE1.SLCK, - SMODE1.SLCK2, - SMODE1.SPML, - SMODE1.T1248, - SMODE1.VCKSEL, - SMODE1.VHP, - SMODE1.XPCK - ); + SMODE1.CLKSEL, + SMODE1.CMOD, + SMODE1.EX, + SMODE1.GCONT, + SMODE1.LC, + SMODE1.NVCK, + SMODE1.PCK2, + SMODE1.PEHS, + SMODE1.PEVS, + SMODE1.PHS, + SMODE1.PRST, + SMODE1.PVS, + SMODE1.RC, + SMODE1.SINT, + SMODE1.SLCK, + SMODE1.SLCK2, + SMODE1.SPML, + SMODE1.T1248, + SMODE1.VCKSEL, + SMODE1.VHP, + SMODE1.XPCK); fprintf(fp, "SMODE2 INT=%u FFMD=%u DPMS=%u\n", - SMODE2.INT, - SMODE2.FFMD, - SMODE2.DPMS - ); + SMODE2.INT, + SMODE2.FFMD, + SMODE2.DPMS); fprintf(fp, "SRFSH %08x_%08x\n", - SRFSH.u32[0], - SRFSH.u32[1] - ); + SRFSH.u32[0], + SRFSH.u32[1]); fprintf(fp, "SYNCH1 %08x_%08x\n", - SYNCH1.u32[0], - SYNCH1.u32[1] - ); + SYNCH1.u32[0], + SYNCH1.u32[1]); fprintf(fp, "SYNCH2 %08x_%08x\n", - SYNCH2.u32[0], - SYNCH2.u32[1] - ); + SYNCH2.u32[0], + SYNCH2.u32[1]); fprintf(fp, "SYNCV VBP=%u VBPE=%u VDP=%u VFP=%u VFPE=%u VS=%u\n", - SYNCV.VBP, - SYNCV.VBPE, - SYNCV.VDP, - SYNCV.VFP, - SYNCV.VFPE, - SYNCV.VS - ); + SYNCV.VBP, + SYNCV.VBPE, + SYNCV.VDP, + SYNCV.VFP, + SYNCV.VFPE, + SYNCV.VS); fprintf(fp, "CSR %08x_%08x\n", - CSR.u32[0], - CSR.u32[1] - ); + CSR.u32[0], + CSR.u32[1]); fprintf(fp, "BGCOLOR B=%u G=%u R=%u\n", - BGCOLOR.B, - BGCOLOR.G, - BGCOLOR.R - ); + BGCOLOR.B, + BGCOLOR.G, + BGCOLOR.R); fprintf(fp, "EXTBUF BP=0x%x BW=%u FBIN=%u WFFMD=%u EMODA=%u EMODC=%u WDX=%u WDY=%u\n", - EXTBUF.EXBP, EXTBUF.EXBW, EXTBUF.FBIN, EXTBUF.WFFMD, - EXTBUF.EMODA, EXTBUF.EMODC, EXTBUF.WDX, EXTBUF.WDY - ); + EXTBUF.EXBP, EXTBUF.EXBW, EXTBUF.FBIN, EXTBUF.WFFMD, + EXTBUF.EMODA, EXTBUF.EMODC, EXTBUF.WDX, EXTBUF.WDY); fprintf(fp, "EXTDATA SX=%u SY=%u SMPH=%u SMPV=%u WW=%u WH=%u\n", - EXTDATA.SX, EXTDATA.SY, EXTDATA.SMPH, EXTDATA.SMPV, EXTDATA.WW, EXTDATA.WH - ); + EXTDATA.SX, EXTDATA.SY, EXTDATA.SMPH, EXTDATA.SMPV, EXTDATA.WW, EXTDATA.WH); fprintf(fp, "EXTWRITE EN=%u\n", EXTWRITE.WRITE); } @@ -1391,7 +1420,8 @@ struct GSPrivRegSet void Dump(const std::string& filename) { FILE* fp = fopen(filename.c_str(), "wt"); - if (fp) { + if (fp) + { Dump(fp); fclose(fp); } @@ -1400,13 +1430,34 @@ struct GSPrivRegSet #pragma pack(pop) -enum {KEYPRESS=1, KEYRELEASE=2}; -struct GSKeyEventData {uint32 key, type;}; +enum +{ + KEYPRESS = 1, + KEYRELEASE = 2 +}; +struct GSKeyEventData +{ + uint32 key, type; +}; -enum {FREEZE_LOAD=0, FREEZE_SAVE=1, FREEZE_SIZE=2}; -struct GSFreezeData {int size; uint8* data;}; +enum +{ + FREEZE_LOAD = 0, + FREEZE_SAVE = 1, + FREEZE_SIZE = 2 +}; +struct GSFreezeData +{ + int size; + uint8* data; +}; -enum stateType {ST_WRITE, ST_TRANSFER, ST_VSYNC}; +enum stateType +{ + ST_WRITE, + ST_TRANSFER, + ST_VSYNC +}; enum class GSVideoMode : uint8 { diff --git a/plugins/GSdx/GSAlignedClass.h b/plugins/GSdx/GSAlignedClass.h index 81e83180b0..fc3c140916 100644 --- a/plugins/GSdx/GSAlignedClass.h +++ b/plugins/GSdx/GSAlignedClass.h @@ -21,28 +21,29 @@ #pragma once -template class GSAlignedClass +template +class GSAlignedClass { public: GSAlignedClass() {} virtual ~GSAlignedClass() {} - void* operator new (size_t size) + void* operator new(size_t size) { return _aligned_malloc(size, i); } - void operator delete (void* p) + void operator delete(void* p) { _aligned_free(p); } - void* operator new [] (size_t size) + void* operator new[](size_t size) { return _aligned_malloc(size, i); } - void operator delete [] (void* p) + void operator delete[](void* p) { _aligned_free(p); } diff --git a/plugins/GSdx/GSBlock.h b/plugins/GSdx/GSBlock.h index eef08abc43..38bf87dbcc 100644 --- a/plugins/GSdx/GSBlock.h +++ b/plugins/GSdx/GSBlock.h @@ -27,25 +27,25 @@ class GSBlock { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 static const GSVector8i m_r16mask; - #else +#else static const GSVector4i m_r16mask; - #endif +#endif static const GSVector4i m_r8mask; static const GSVector4i m_r4mask; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 static const GSVector8i m_xxxa; static const GSVector8i m_xxbx; static const GSVector8i m_xgxx; static const GSVector8i m_rxxx; - #else +#else static const GSVector4i m_xxxa; static const GSVector4i m_xxbx; static const GSVector4i m_xgxx; static const GSVector4i m_rxxx; - #endif +#endif static const GSVector4i m_uw8hmask0; static const GSVector4i m_uw8hmask1; @@ -53,16 +53,17 @@ class GSBlock static const GSVector4i m_uw8hmask3; public: - template __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { const uint8* RESTRICT s0 = &src[srcpitch * 0]; const uint8* RESTRICT s1 = &src[srcpitch * 1]; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector8i v0, v1; - if(alignment == 32) + if (alignment == 32) { v0 = GSVector8i::load(s0).acbd(); v1 = GSVector8i::load(s1).acbd(); @@ -71,7 +72,7 @@ public: } else { - if(alignment == 16) + if (alignment == 16) { v0 = GSVector8i::load(&s0[0], &s0[16]).acbd(); v1 = GSVector8i::load(&s1[0], &s1[16]).acbd(); @@ -88,7 +89,7 @@ public: GSVector4i v6 = GSVector4i::load(&s0[16], &s1[16]); GSVector4i v7 = GSVector4i::load(&s0[24], &s1[24]); - if(mask == 0xffffffff) + if (mask == 0xffffffff) { // just write them out directly @@ -105,16 +106,16 @@ public: } } - if(mask == 0xffffffff) + if (mask == 0xffffffff) { ((GSVector8i*)dst)[i * 2 + 0] = v0; ((GSVector8i*)dst)[i * 2 + 1] = v1; } - else + else { GSVector8i v2((int)mask); - if(mask == 0xff000000 || mask == 0x00ffffff) + if (mask == 0xff000000 || mask == 0x00ffffff) { ((GSVector8i*)dst)[i * 2 + 0] = ((GSVector8i*)dst)[i * 2 + 0].blend8(v0, v2); ((GSVector8i*)dst)[i * 2 + 1] = ((GSVector8i*)dst)[i * 2 + 1].blend8(v1, v2); @@ -126,11 +127,11 @@ public: } } - #else +#else GSVector4i v0, v1, v2, v3; - if(alignment != 0) + if (alignment != 0) { v0 = GSVector4i::load(&s0[0]); v1 = GSVector4i::load(&s0[16]); @@ -147,7 +148,7 @@ public: v3 = GSVector4i::load(&s0[24], &s1[24]); } - if(mask == 0xffffffff) + if (mask == 0xffffffff) { ((GSVector4i*)dst)[i * 4 + 0] = v0; ((GSVector4i*)dst)[i * 4 + 1] = v1; @@ -158,7 +159,7 @@ public: { GSVector4i v4((int)mask); - if(mask == 0xff000000 || mask == 0x00ffffff) + if (mask == 0xff000000 || mask == 0x00ffffff) { ((GSVector4i*)dst)[i * 4 + 0] = ((GSVector4i*)dst)[i * 4 + 0].blend8(v0, v4); ((GSVector4i*)dst)[i * 4 + 1] = ((GSVector4i*)dst)[i * 4 + 1].blend8(v1, v4); @@ -174,21 +175,22 @@ public: } } - #endif +#endif } - template __forceinline static void WriteColumn16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + __forceinline static void WriteColumn16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { const uint8* RESTRICT s0 = &src[srcpitch * 0]; const uint8* RESTRICT s1 = &src[srcpitch * 1]; // for(int j = 0; j < 16; j++) {((uint16*)s0)[j] = columnTable16[0][j]; ((uint16*)s1)[j] = columnTable16[1][j];} - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector8i v0, v1; - if(alignment == 32) + if (alignment == 32) { v0 = GSVector8i::load(s0); v1 = GSVector8i::load(s1); @@ -198,7 +200,7 @@ public: } else { - if(alignment == 16) + if (alignment == 16) { v0 = GSVector8i::load(&s0[0], &s1[0]); v1 = GSVector8i::load(&s0[16], &s1[16]); @@ -214,15 +216,15 @@ public: v0 = v0.acbd(); v1 = v1.acbd(); - + ((GSVector8i*)dst)[i * 2 + 0] = v0; ((GSVector8i*)dst)[i * 2 + 1] = v1; - #else +#else GSVector4i v0, v1, v2, v3; - if(alignment != 0) + if (alignment != 0) { v0 = GSVector4i::load(&s0[0]); v1 = GSVector4i::load(&s0[16]); @@ -247,14 +249,15 @@ public: ((GSVector4i*)dst)[i * 4 + 2] = v1; ((GSVector4i*)dst)[i * 4 + 3] = v3; - #endif +#endif } - template __forceinline static void WriteColumn8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + __forceinline static void WriteColumn8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { // TODO: read unaligned as WriteColumn32 does and try saving a few shuffles - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector4i v4 = GSVector4i::load(&src[srcpitch * 0]); GSVector4i v5 = GSVector4i::load(&src[srcpitch * 1]); @@ -264,32 +267,32 @@ public: GSVector8i v0(v4, v5); GSVector8i v1(v6, v7); - if((i & 1) == 0) + if ((i & 1) == 0) { v1 = v1.yxwz(); } - else + else { v0 = v0.yxwz(); } GSVector8i::sw8(v0, v1); GSVector8i::sw16(v0, v1); - + v0 = v0.acbd(); v1 = v1.acbd(); ((GSVector8i*)dst)[i * 2 + 0] = v0; ((GSVector8i*)dst)[i * 2 + 1] = v1; - #else +#else GSVector4i v0 = GSVector4i::load(&src[srcpitch * 0]); GSVector4i v1 = GSVector4i::load(&src[srcpitch * 1]); GSVector4i v2 = GSVector4i::load(&src[srcpitch * 2]); GSVector4i v3 = GSVector4i::load(&src[srcpitch * 3]); - if((i & 1) == 0) + if ((i & 1) == 0) { v2 = v2.yxwz(); v3 = v3.yxwz(); @@ -309,10 +312,11 @@ public: ((GSVector4i*)dst)[i * 4 + 2] = v1; ((GSVector4i*)dst)[i * 4 + 3] = v3; - #endif +#endif } - template __forceinline static void WriteColumn4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + __forceinline static void WriteColumn4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { //printf("WriteColumn4\n"); @@ -325,7 +329,7 @@ public: GSVector4i v2 = GSVector4i::load(&src[srcpitch * 2]); GSVector4i v3 = GSVector4i::load(&src[srcpitch * 3]); - if((i & 1) == 0) + if ((i & 1) == 0) { v2 = v2.yxwzlh(); v3 = v3.yxwzlh(); @@ -347,55 +351,60 @@ public: ((GSVector4i*)dst)[i * 4 + 3] = v3; } - template static void WriteColumn32(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteColumn32(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { - switch((y >> 1) & 3) + switch ((y >> 1) & 3) { - case 0: WriteColumn32<0, alignment, mask>(dst, src, srcpitch); break; - case 1: WriteColumn32<1, alignment, mask>(dst, src, srcpitch); break; - case 2: WriteColumn32<2, alignment, mask>(dst, src, srcpitch); break; - case 3: WriteColumn32<3, alignment, mask>(dst, src, srcpitch); break; - default: __assume(0); + case 0: WriteColumn32<0, alignment, mask>(dst, src, srcpitch); break; + case 1: WriteColumn32<1, alignment, mask>(dst, src, srcpitch); break; + case 2: WriteColumn32<2, alignment, mask>(dst, src, srcpitch); break; + case 3: WriteColumn32<3, alignment, mask>(dst, src, srcpitch); break; + default: __assume(0); } } - template static void WriteColumn16(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteColumn16(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { - switch((y >> 1) & 3) + switch ((y >> 1) & 3) { - case 0: WriteColumn16<0, alignment>(dst, src, srcpitch); break; - case 1: WriteColumn16<1, alignment>(dst, src, srcpitch); break; - case 2: WriteColumn16<2, alignment>(dst, src, srcpitch); break; - case 3: WriteColumn16<3, alignment>(dst, src, srcpitch); break; - default: __assume(0); + case 0: WriteColumn16<0, alignment>(dst, src, srcpitch); break; + case 1: WriteColumn16<1, alignment>(dst, src, srcpitch); break; + case 2: WriteColumn16<2, alignment>(dst, src, srcpitch); break; + case 3: WriteColumn16<3, alignment>(dst, src, srcpitch); break; + default: __assume(0); } } - template static void WriteColumn8(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteColumn8(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { - switch((y >> 2) & 3) + switch ((y >> 2) & 3) { - case 0: WriteColumn8<0, alignment>(dst, src, srcpitch); break; - case 1: WriteColumn8<1, alignment>(dst, src, srcpitch); break; - case 2: WriteColumn8<2, alignment>(dst, src, srcpitch); break; - case 3: WriteColumn8<3, alignment>(dst, src, srcpitch); break; - default: __assume(0); + case 0: WriteColumn8<0, alignment>(dst, src, srcpitch); break; + case 1: WriteColumn8<1, alignment>(dst, src, srcpitch); break; + case 2: WriteColumn8<2, alignment>(dst, src, srcpitch); break; + case 3: WriteColumn8<3, alignment>(dst, src, srcpitch); break; + default: __assume(0); } } - template static void WriteColumn4(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteColumn4(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { - switch((y >> 2) & 3) + switch ((y >> 2) & 3) { - case 0: WriteColumn4<0, alignment>(dst, src, srcpitch); break; - case 1: WriteColumn4<1, alignment>(dst, src, srcpitch); break; - case 2: WriteColumn4<2, alignment>(dst, src, srcpitch); break; - case 3: WriteColumn4<3, alignment>(dst, src, srcpitch); break; - default: __assume(0); + case 0: WriteColumn4<0, alignment>(dst, src, srcpitch); break; + case 1: WriteColumn4<1, alignment>(dst, src, srcpitch); break; + case 2: WriteColumn4<2, alignment>(dst, src, srcpitch); break; + case 3: WriteColumn4<3, alignment>(dst, src, srcpitch); break; + default: __assume(0); } } - template static void WriteBlock32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteBlock32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { WriteColumn32<0, alignment, mask>(dst, src, srcpitch); src += srcpitch * 2; @@ -406,7 +415,8 @@ public: WriteColumn32<3, alignment, mask>(dst, src, srcpitch); } - template static void WriteBlock16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteBlock16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { WriteColumn16<0, alignment>(dst, src, srcpitch); src += srcpitch * 2; @@ -417,7 +427,8 @@ public: WriteColumn16<3, alignment>(dst, src, srcpitch); } - template static void WriteBlock8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteBlock8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { WriteColumn8<0, alignment>(dst, src, srcpitch); src += srcpitch * 4; @@ -428,7 +439,8 @@ public: WriteColumn8<3, alignment>(dst, src, srcpitch); } - template static void WriteBlock4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) + template + static void WriteBlock4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { WriteColumn4<0, alignment>(dst, src, srcpitch); src += srcpitch * 4; @@ -439,12 +451,13 @@ public: WriteColumn4<3, alignment>(dst, src, srcpitch); } - template __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) + template + __forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 const GSVector8i* s = (const GSVector8i*)src; - + GSVector8i v0 = s[i * 2 + 0]; GSVector8i v1 = s[i * 2 + 1]; @@ -454,10 +467,10 @@ public: GSVector8i::store(&dst[dstpitch * 0], v0); GSVector8i::store(&dst[dstpitch * 1], v1); - #else +#else const GSVector4i* s = (const GSVector4i*)src; - + GSVector4i v0 = s[i * 4 + 0]; GSVector4i v1 = s[i * 4 + 1]; GSVector4i v2 = s[i * 4 + 2]; @@ -473,15 +486,16 @@ public: GSVector4i::store(&d1[0], v2); GSVector4i::store(&d1[1], v3); - #endif +#endif } - template __forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) + template + __forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 const GSVector8i* s = (const GSVector8i*)src; - + GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask); GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask); @@ -494,7 +508,7 @@ public: GSVector8i::store(&dst[dstpitch * 0], v0); GSVector8i::store(&dst[dstpitch * 1], v1); - #elif _M_SSE >= 0x301 +#elif _M_SSE >= 0x301 const GSVector4i* s = (const GSVector4i*)src; @@ -514,15 +528,16 @@ public: GSVector4i::store(&d1[0], v1); GSVector4i::store(&d1[1], v3); - #endif +#endif } - template __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) + template + __forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { //for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j; - #if 0 //_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 const GSVector8i* s = (const GSVector8i*)src; @@ -546,13 +561,13 @@ public: // TODO: not sure if this is worth it, not in this form, there should be a shorter path - #else +#else const GSVector4i* s = (const GSVector4i*)src; GSVector4i v0, v1, v2, v3; - if((i & 1) == 0) + if ((i & 1) == 0) { v0 = s[i * 4 + 0]; v1 = s[i * 4 + 1]; @@ -580,10 +595,11 @@ public: GSVector4i::store(&dst[dstpitch * 2], v1); GSVector4i::store(&dst[dstpitch * 3], v2); - #endif +#endif } - template __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) + template + __forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { //printf("ReadColumn4\n"); @@ -603,7 +619,7 @@ public: v2 = v2.shuffle8(m_r4mask); v3 = v3.shuffle8(m_r4mask); - if((i & 1) == 0) + if ((i & 1) == 0) { GSVector4i::sw16rh(v0, v1, v2, v3); } @@ -620,49 +636,49 @@ public: static void ReadColumn32(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - switch((y >> 1) & 3) + switch ((y >> 1) & 3) { - case 0: ReadColumn32<0>(src, dst, dstpitch); break; - case 1: ReadColumn32<1>(src, dst, dstpitch); break; - case 2: ReadColumn32<2>(src, dst, dstpitch); break; - case 3: ReadColumn32<3>(src, dst, dstpitch); break; - default: __assume(0); + case 0: ReadColumn32<0>(src, dst, dstpitch); break; + case 1: ReadColumn32<1>(src, dst, dstpitch); break; + case 2: ReadColumn32<2>(src, dst, dstpitch); break; + case 3: ReadColumn32<3>(src, dst, dstpitch); break; + default: __assume(0); } } static void ReadColumn16(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - switch((y >> 1) & 3) + switch ((y >> 1) & 3) { - case 0: ReadColumn16<0>(src, dst, dstpitch); break; - case 1: ReadColumn16<1>(src, dst, dstpitch); break; - case 2: ReadColumn16<2>(src, dst, dstpitch); break; - case 3: ReadColumn16<3>(src, dst, dstpitch); break; - default: __assume(0); + case 0: ReadColumn16<0>(src, dst, dstpitch); break; + case 1: ReadColumn16<1>(src, dst, dstpitch); break; + case 2: ReadColumn16<2>(src, dst, dstpitch); break; + case 3: ReadColumn16<3>(src, dst, dstpitch); break; + default: __assume(0); } } static void ReadColumn8(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - switch((y >> 2) & 3) + switch ((y >> 2) & 3) { - case 0: ReadColumn8<0>(src, dst, dstpitch); break; - case 1: ReadColumn8<1>(src, dst, dstpitch); break; - case 2: ReadColumn8<2>(src, dst, dstpitch); break; - case 3: ReadColumn8<3>(src, dst, dstpitch); break; - default: __assume(0); + case 0: ReadColumn8<0>(src, dst, dstpitch); break; + case 1: ReadColumn8<1>(src, dst, dstpitch); break; + case 2: ReadColumn8<2>(src, dst, dstpitch); break; + case 3: ReadColumn8<3>(src, dst, dstpitch); break; + default: __assume(0); } } static void ReadColumn4(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - switch((y >> 2) & 3) + switch ((y >> 2) & 3) { - case 0: ReadColumn4<0>(src, dst, dstpitch); break; - case 1: ReadColumn4<1>(src, dst, dstpitch); break; - case 2: ReadColumn4<2>(src, dst, dstpitch); break; - case 3: ReadColumn4<3>(src, dst, dstpitch); break; - default: __assume(0); + case 0: ReadColumn4<0>(src, dst, dstpitch); break; + case 1: ReadColumn4<1>(src, dst, dstpitch); break; + case 2: ReadColumn4<2>(src, dst, dstpitch); break; + case 3: ReadColumn4<3>(src, dst, dstpitch); break; + default: __assume(0); } } @@ -720,7 +736,7 @@ public: GSVector4i mask(0x0f0f0f0f); - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { // col 0, 2 @@ -776,13 +792,13 @@ public: __forceinline static void ReadBlock8HP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 uint8* RESTRICT d0 = &dst[dstpitch * 0]; uint8* RESTRICT d1 = &dst[dstpitch * 4]; const GSVector8i* s = (const GSVector8i*)src; - + GSVector8i v0, v1, v2, v3; GSVector4i v4, v5; @@ -816,13 +832,13 @@ public: GSVector4i::storeh(&d1[dstpitch * 2], v4); GSVector4i::storeh(&d1[dstpitch * 3], v5); - #else +#else const GSVector4i* s = (const GSVector4i*)src; GSVector4i v0, v1, v2, v3; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { v0 = s[i * 4 + 0]; v1 = s[i * 4 + 1]; @@ -842,18 +858,18 @@ public: dst += dstpitch; } - #endif +#endif } __forceinline static void ReadBlock4HLP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 uint8* RESTRICT d0 = &dst[dstpitch * 0]; uint8* RESTRICT d1 = &dst[dstpitch * 4]; const GSVector8i* s = (const GSVector8i*)src; - + GSVector8i v0, v1, v2, v3; GSVector4i v4, v5; GSVector8i mask(0x0f0f0f0f); @@ -888,7 +904,7 @@ public: GSVector4i::storeh(&d1[dstpitch * 2], v4); GSVector4i::storeh(&d1[dstpitch * 3], v5); - #else +#else const GSVector4i* s = (const GSVector4i*)src; @@ -896,7 +912,7 @@ public: GSVector4i mask(0x0f0f0f0f); - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { v0 = s[i * 4 + 0]; v1 = s[i * 4 + 1]; @@ -916,18 +932,18 @@ public: dst += dstpitch; } - #endif +#endif } __forceinline static void ReadBlock4HHP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 uint8* RESTRICT d0 = &dst[dstpitch * 0]; uint8* RESTRICT d1 = &dst[dstpitch * 4]; const GSVector8i* s = (const GSVector8i*)src; - + GSVector8i v0, v1, v2, v3; GSVector4i v4, v5; @@ -961,13 +977,13 @@ public: GSVector4i::storeh(&d1[dstpitch * 2], v4); GSVector4i::storeh(&d1[dstpitch * 3], v5); - #else +#else const GSVector4i* s = (const GSVector4i*)src; GSVector4i v0, v1, v2, v3; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { v0 = s[i * 4 + 0]; v1 = s[i * 4 + 1]; @@ -987,29 +1003,32 @@ public: dst += dstpitch; } - #endif +#endif } - template __forceinline static V Expand24to32(const V& c, const V& TA0) + template + __forceinline static V Expand24to32(const V& c, const V& TA0) { return c | (AEM ? TA0.andnot(c == V::zero()) : TA0); // TA0 & (c != GSVector4i::zero()) } - template __forceinline static V Expand16to32(const V& c, const V& TA0, const V& TA1) + template + __forceinline static V Expand16to32(const V& c, const V& TA0, const V& TA1) { return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == V::zero()) : TA0.blend(TA1, c.sra16(15))); } - template static void ExpandBlock24(const uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) + template + static void ExpandBlock24(const uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 const GSVector8i* s = (const GSVector8i*)src; GSVector8i TA0(TEXA.TA0 << 24); GSVector8i mask = GSVector8i::x00ffffff(); - for(int i = 0; i < 4; i++, dst += dstpitch * 2) + for (int i = 0; i < 4; i++, dst += dstpitch * 2) { GSVector8i v0 = s[i * 2 + 0] & mask; GSVector8i v1 = s[i * 2 + 1] & mask; @@ -1021,14 +1040,14 @@ public: d1[0] = Expand24to32(v1, TA0); } - #else +#else const GSVector4i* s = (const GSVector4i*)src; GSVector4i TA0(TEXA.TA0 << 24); GSVector4i mask = GSVector4i::x00ffffff(); - for(int i = 0; i < 4; i++, dst += dstpitch * 2) + for (int i = 0; i < 4; i++, dst += dstpitch * 2) { GSVector4i v0 = s[i * 4 + 0] & mask; GSVector4i v1 = s[i * 4 + 1] & mask; @@ -1044,19 +1063,20 @@ public: d1[1] = Expand24to32(v3, TA0); } - #endif +#endif } - template static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs + template + static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs { - #if _M_SSE >= 0x501 - +#if _M_SSE >= 0x501 + const GSVector8i* s = (const GSVector8i*)src; GSVector8i TA0(TEXA.TA0 << 24); GSVector8i TA1(TEXA.TA1 << 24); - for(int i = 0; i < 8; i++, dst += dstpitch) + for (int i = 0; i < 8; i++, dst += dstpitch) { GSVector8i v = s[i].acbd(); @@ -1064,14 +1084,14 @@ public: ((GSVector8i*)dst)[1] = Expand16to32(v.uph16(v), TA0, TA1); } - #else - +#else + const GSVector4i* s = (const GSVector4i*)src; GSVector4i TA0(TEXA.TA0 << 24); GSVector4i TA1(TEXA.TA1 << 24); - for(int i = 0; i < 8; i++, dst += dstpitch) + for (int i = 0; i < 8; i++, dst += dstpitch) { GSVector4i v0 = s[i * 2 + 0]; @@ -1084,12 +1104,12 @@ public: ((GSVector4i*)dst)[3] = Expand16to32(v1.uph16(v1), TA0, TA1); } - #endif +#endif } __forceinline static void ExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 16; j++, dst += dstpitch) + for (int j = 0; j < 16; j++, dst += dstpitch) { ((const GSVector4i*)src)[j].gather32_8(pal, (GSVector4i*)dst); } @@ -1097,7 +1117,7 @@ public: __forceinline static void ExpandBlock8_16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 16; j++, dst += dstpitch) + for (int j = 0; j < 16; j++, dst += dstpitch) { ((const GSVector4i*)src)[j].gather16_8(pal, (GSVector4i*)dst); } @@ -1105,7 +1125,7 @@ public: __forceinline static void ExpandBlock4_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal) { - for(int j = 0; j < 16; j++, dst += dstpitch) + for (int j = 0; j < 16; j++, dst += dstpitch) { ((const GSVector4i*)src)[j].gather64_8(pal, (GSVector4i*)dst); } @@ -1113,7 +1133,7 @@ public: __forceinline static void ExpandBlock4_16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal) { - for(int j = 0; j < 16; j++, dst += dstpitch) + for (int j = 0; j < 16; j++, dst += dstpitch) { ((const GSVector4i*)src)[j].gather32_8(pal, (GSVector4i*)dst); } @@ -1121,7 +1141,7 @@ public: __forceinline static void ExpandBlock8H_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 8; j++, dst += dstpitch) + for (int j = 0; j < 8; j++, dst += dstpitch) { const GSVector4i* s = (const GSVector4i*)src; @@ -1132,7 +1152,7 @@ public: __forceinline static void ExpandBlock8H_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 8; j++, dst += dstpitch) + for (int j = 0; j < 8; j++, dst += dstpitch) { const GSVector4i* s = (const GSVector4i*)src; @@ -1146,7 +1166,7 @@ public: __forceinline static void ExpandBlock4HL_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 8; j++, dst += dstpitch) + for (int j = 0; j < 8; j++, dst += dstpitch) { const GSVector4i* s = (const GSVector4i*)src; @@ -1157,7 +1177,7 @@ public: __forceinline static void ExpandBlock4HL_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 8; j++, dst += dstpitch) + for (int j = 0; j < 8; j++, dst += dstpitch) { const GSVector4i* s = (const GSVector4i*)src; @@ -1170,7 +1190,7 @@ public: __forceinline static void ExpandBlock4HH_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 8; j++, dst += dstpitch) + for (int j = 0; j < 8; j++, dst += dstpitch) { const GSVector4i* s = (const GSVector4i*)src; @@ -1181,7 +1201,7 @@ public: __forceinline static void ExpandBlock4HH_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) { - for(int j = 0; j < 8; j++, dst += dstpitch) + for (int j = 0; j < 8; j++, dst += dstpitch) { const GSVector4i* s = (const GSVector4i*)src; @@ -1194,7 +1214,7 @@ public: __forceinline static void UnpackAndWriteBlock24(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 const uint8* RESTRICT s0 = &src[srcpitch * 0]; const uint8* RESTRICT s1 = &src[srcpitch * 1]; @@ -1207,7 +1227,7 @@ public: v4 = GSVector8i::load(s0, s0 + 8, s2, s2 + 8); v5 = GSVector8i::load(s0 + 16, s1, s2 + 16, s3); v6 = GSVector8i::load(s1 + 8, s1 + 16, s3 + 8, s3 + 16); - + v0 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); v4 = v4.srl<12>(v5); v1 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); @@ -1233,7 +1253,7 @@ public: v4 = GSVector8i::load(s0, s0 + 8, s2, s2 + 8); v5 = GSVector8i::load(s0 + 16, s1, s2 + 16, s3); v6 = GSVector8i::load(s1 + 8, s1 + 16, s3 + 8, s3 + 16); - + v0 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); v4 = v4.srl<12>(v5); v1 = v4.upl32(v4.srl<3>()).upl64(v4.srl<6>().upl32(v4.srl<9>())).acbd(); @@ -1249,12 +1269,12 @@ public: ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend8(v1, mask); ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend8(v3, mask); - #else +#else GSVector4i v0, v1, v2, v3, v4, v5, v6; GSVector4i mask = GSVector4i::x00ffffff(); - for(int i = 0; i < 4; i++, src += srcpitch * 2) + for (int i = 0; i < 4; i++, src += srcpitch * 2) { v4 = GSVector4i::load(src); v5 = GSVector4i::load(src + 16, src + srcpitch); @@ -1276,14 +1296,14 @@ public: ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask); } - #endif +#endif } __forceinline static void UnpackAndWriteBlock8H(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) { GSVector4i v4, v5, v6, v7; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector8i v0, v1, v2, v3; GSVector8i mask = GSVector8i::xff000000(); @@ -1295,7 +1315,7 @@ public: v2 = GSVector8i::cast(v4.upl16(v5)); v3 = GSVector8i::cast(v6.upl16(v7)); - + v0 = v2.u8to32c() << 24; v1 = v2.bbbb().u8to32c() << 24; v2 = v3.u8to32c() << 24; @@ -1315,7 +1335,7 @@ public: v2 = GSVector8i::cast(v4.upl16(v5)); v3 = GSVector8i::cast(v6.upl16(v7)); - + v0 = v2.u8to32c() << 24; v1 = v2.bbbb().u8to32c() << 24; v2 = v3.u8to32c() << 24; @@ -1326,7 +1346,7 @@ public: ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend8(v2, mask); ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend8(v3, mask); - #elif _M_SSE >= 0x301 +#elif _M_SSE >= 0x301 GSVector4i v0, v1, v2, v3; GSVector4i mask = GSVector4i::xff000000(); @@ -1335,7 +1355,7 @@ public: GSVector4i mask2 = m_uw8hmask2; GSVector4i mask3 = m_uw8hmask3; - for(int i = 0; i < 4; i++, src += srcpitch * 2) + for (int i = 0; i < 4; i++, src += srcpitch * 2) { v4 = GSVector4i::load(src, src + srcpitch); @@ -1350,23 +1370,24 @@ public: ((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend8(v3, mask); } - #endif +#endif } __forceinline static void UnpackAndWriteBlock4HL(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) { //printf("4HL\n"); - if(0) + if (0) { uint8* s = (uint8*)src; - for(int j = 0; j < 8; j++, s += srcpitch) - for(int i = 0; i < 4; i++) s[i] = (columnTable32[j][i*2] & 0x0f) | (columnTable32[j][i*2+1] << 4); + for (int j = 0; j < 8; j++, s += srcpitch) + for (int i = 0; i < 4; i++) + s[i] = (columnTable32[j][i * 2] & 0x0f) | (columnTable32[j][i * 2 + 1] << 4); } GSVector4i v4, v5, v6; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector8i v0, v1, v2, v3; GSVector8i mask(0x0f000000); @@ -1378,7 +1399,7 @@ public: v2 = GSVector8i::cast(v4.upl16(v5)); v3 = GSVector8i::cast(v4.uph16(v5)); - + v0 = v2.u8to32c() << 24; v1 = v2.bbbb().u8to32c() << 24; v2 = v3.u8to32c() << 24; @@ -1398,7 +1419,7 @@ public: v2 = GSVector8i::cast(v4.upl16(v5)); v3 = GSVector8i::cast(v4.uph16(v5)); - + v0 = v2.u8to32c() << 24; v1 = v2.bbbb().u8to32c() << 24; v2 = v3.u8to32c() << 24; @@ -1409,7 +1430,7 @@ public: ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend(v2, mask); ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend(v3, mask); - #elif _M_SSE >= 0x301 +#elif _M_SSE >= 0x301 GSVector4i v0, v1, v2, v3; GSVector4i mask = GSVector4i(0x0f000000); @@ -1418,7 +1439,7 @@ public: GSVector4i mask2 = m_uw8hmask2; GSVector4i mask3 = m_uw8hmask3; - for(int i = 0; i < 2; i++, src += srcpitch * 4) + for (int i = 0; i < 2; i++, src += srcpitch * 4) { GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 3]); @@ -1446,14 +1467,14 @@ public: ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask); } - #endif +#endif } __forceinline static void UnpackAndWriteBlock4HH(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst) { GSVector4i v4, v5, v6; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector8i v0, v1, v2, v3; GSVector8i mask = GSVector8i::xf0000000(); @@ -1465,7 +1486,7 @@ public: v2 = GSVector8i::cast(v4.upl16(v5)); v3 = GSVector8i::cast(v4.uph16(v5)); - + v0 = v2.u8to32c() << 24; v1 = v2.bbbb().u8to32c() << 24; v2 = v3.u8to32c() << 24; @@ -1485,7 +1506,7 @@ public: v2 = GSVector8i::cast(v4.upl16(v5)); v3 = GSVector8i::cast(v4.uph16(v5)); - + v0 = v2.u8to32c() << 24; v1 = v2.bbbb().u8to32c() << 24; v2 = v3.u8to32c() << 24; @@ -1496,7 +1517,7 @@ public: ((GSVector8i*)dst)[6] = ((GSVector8i*)dst)[6].blend(v2, mask); ((GSVector8i*)dst)[7] = ((GSVector8i*)dst)[7].blend(v3, mask); - #elif _M_SSE >= 0x301 +#elif _M_SSE >= 0x301 GSVector4i v0, v1, v2, v3; GSVector4i mask = GSVector4i::xf0000000(); @@ -1505,7 +1526,7 @@ public: GSVector4i mask2 = m_uw8hmask2; GSVector4i mask3 = m_uw8hmask3; - for(int i = 0; i < 2; i++, src += srcpitch * 4) + for (int i = 0; i < 2; i++, src += srcpitch * 4) { GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 3]); @@ -1533,15 +1554,16 @@ public: ((GSVector4i*)dst)[i * 8 + 7] = ((GSVector4i*)dst)[i * 8 + 7].blend(v3, mask); } - #endif +#endif } - template __forceinline static void ReadAndExpandBlock24(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) + template + __forceinline static void ReadAndExpandBlock24(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 const GSVector8i* s = (const GSVector8i*)src; - + GSVector8i TA0(TEXA.TA0 << 24); GSVector8i mask = GSVector8i::x00ffffff(); @@ -1579,14 +1601,14 @@ public: *(GSVector8i*)&dst[dstpitch * 2] = Expand24to32(v2, TA0); *(GSVector8i*)&dst[dstpitch * 3] = Expand24to32(v3, TA0); - #else +#else const GSVector4i* s = (const GSVector4i*)src; GSVector4i TA0(TEXA.TA0 << 24); GSVector4i mask = GSVector4i::x00ffffff(); - for(int i = 0; i < 4; i++, dst += dstpitch * 2) + for (int i = 0; i < 4; i++, dst += dstpitch * 2) { GSVector4i v0 = s[i * 4 + 0]; GSVector4i v1 = s[i * 4 + 1]; @@ -1609,19 +1631,20 @@ public: d1[1] = Expand24to32(v3, TA0); } - #endif +#endif } - template __forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) + template + __forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 const GSVector8i* s = (const GSVector8i*)src; GSVector8i TA0(TEXA.TA0 << 24); GSVector8i TA1(TEXA.TA1 << 24); - for(int i = 0; i < 4; i++, dst += dstpitch * 2) + for (int i = 0; i < 4; i++, dst += dstpitch * 2) { GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask); GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask); @@ -1638,15 +1661,15 @@ public: d1[1] = Expand16to32(v1.uph16(v1), TA0, TA1); } - #else - +#else + alignas(32) uint16 block[16 * 8]; - + ReadBlock16(src, (uint8*)block, sizeof(block) / 8); ExpandBlock16(block, dst, dstpitch, TEXA); - #endif +#endif } __forceinline static void ReadAndExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal) @@ -1658,7 +1681,7 @@ public: GSVector4i v0, v1, v2, v3; GSVector4i mask = m_r8mask; - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { v0 = s[i * 8 + 0].shuffle8(mask); v1 = s[i * 8 + 1].shuffle8(mask); @@ -1707,7 +1730,7 @@ public: GSVector4i v0, v1, v2, v3; GSVector4i mask = m_r4mask; - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { v0 = s[i * 8 + 0].xzyw(); v1 = s[i * 8 + 1].xzyw(); @@ -1771,7 +1794,7 @@ public: GSVector4i v0, v1, v2, v3; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { v0 = s[i * 4 + 0]; v1 = s[i * 4 + 1]; @@ -1801,7 +1824,7 @@ public: GSVector4i v0, v1, v2, v3; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { v0 = s[i * 4 + 0]; v1 = s[i * 4 + 1]; @@ -1832,7 +1855,7 @@ public: GSVector4i v0, v1, v2, v3; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { v0 = s[i * 4 + 0]; v1 = s[i * 4 + 1]; diff --git a/plugins/GSdx/GSCapture.cpp b/plugins/GSdx/GSCapture.cpp index 85351c7fa8..951523965a 100644 --- a/plugins/GSdx/GSCapture.cpp +++ b/plugins/GSdx/GSCapture.cpp @@ -26,33 +26,45 @@ #ifdef _WIN32 -class CPinInfo : public PIN_INFO { +class CPinInfo : public PIN_INFO +{ public: CPinInfo() { pFilter = NULL; } - ~CPinInfo() { if (pFilter) pFilter->Release(); } + ~CPinInfo() + { + if (pFilter) + pFilter->Release(); + } }; -class CFilterInfo : public FILTER_INFO { +class CFilterInfo : public FILTER_INFO +{ public: CFilterInfo() { pGraph = NULL; } - ~CFilterInfo() { if (pGraph) pGraph->Release(); } + ~CFilterInfo() + { + if (pGraph) + pGraph->Release(); + } }; #define BeginEnumFilters(pFilterGraph, pEnumFilters, pBaseFilter) \ - {CComPtr pEnumFilters; \ - if(pFilterGraph && SUCCEEDED(pFilterGraph->EnumFilters(&pEnumFilters))) \ { \ - for(CComPtr pBaseFilter; S_OK == pEnumFilters->Next(1, &pBaseFilter, 0); pBaseFilter = NULL) \ + CComPtr pEnumFilters; \ + if(pFilterGraph && SUCCEEDED(pFilterGraph->EnumFilters(&pEnumFilters))) \ { \ + for(CComPtr pBaseFilter; S_OK == pEnumFilters->Next(1, &pBaseFilter, 0); pBaseFilter = NULL) \ + { #define EndEnumFilters }}} #define BeginEnumPins(pBaseFilter, pEnumPins, pPin) \ - {CComPtr pEnumPins; \ - if(pBaseFilter && SUCCEEDED(pBaseFilter->EnumPins(&pEnumPins))) \ { \ - for(CComPtr pPin; S_OK == pEnumPins->Next(1, &pPin, 0); pPin = NULL) \ + CComPtr pEnumPins; \ + if(pBaseFilter && SUCCEEDED(pBaseFilter->EnumPins(&pEnumPins))) \ { \ + for(CComPtr pPin; S_OK == pEnumPins->Next(1, &pPin, 0); pPin = NULL) \ + { #define EndEnumPins }}} @@ -76,9 +88,9 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource STDMETHODIMP NonDelegatingQueryInterface(REFIID riid, void** ppv) { - return - riid == __uuidof(IGSSource) ? GetInterface((IGSSource*)this, ppv) : - __super::NonDelegatingQueryInterface(riid, ppv); + return riid == __uuidof(IGSSource) + ? GetInterface((IGSSource*)this, ppv) + : __super::NonDelegatingQueryInterface(riid, ppv); } class GSSourceOutputPin : public CBaseOutputPin @@ -126,8 +138,10 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 4; mt.SetFormat((uint8*)&vih, sizeof(vih)); - if(colorspace == 1) m_mts.insert(m_mts.begin(), mt); - else m_mts.push_back(mt); + if (colorspace == 1) + m_mts.insert(m_mts.begin(), mt); + else + m_mts.push_back(mt); } HRESULT GSSourceOutputPin::DecideBufferSize(IMemAllocator* pAlloc, ALLOCATOR_PROPERTIES* pProperties) @@ -141,12 +155,12 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource ALLOCATOR_PROPERTIES Actual; - if(FAILED(hr = pAlloc->SetProperties(pProperties, &Actual))) + if (FAILED(hr = pAlloc->SetProperties(pProperties, &Actual))) { return hr; } - if(Actual.cbBuffer < pProperties->cbBuffer) + if (Actual.cbBuffer < pProperties->cbBuffer) { return E_FAIL; } @@ -156,11 +170,11 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource return S_OK; } - HRESULT CheckMediaType(const CMediaType* pmt) + HRESULT CheckMediaType(const CMediaType* pmt) { - for(const auto &mt : m_mts) + for (const auto& mt : m_mts) { - if(mt.majortype == pmt->majortype && mt.subtype == pmt->subtype) + if (mt.majortype == pmt->majortype && mt.subtype == pmt->subtype) { return S_OK; } @@ -169,12 +183,14 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource return E_FAIL; } - HRESULT GetMediaType(int i, CMediaType* pmt) + HRESULT GetMediaType(int i, CMediaType* pmt) { CheckPointer(pmt, E_POINTER); - if(i < 0) return E_INVALIDARG; - if(i > 1) return VFW_S_NO_MORE_ITEMS; + if (i < 0) + return E_INVALIDARG; + if (i > 1) + return VFW_S_NO_MORE_ITEMS; *pmt = m_mts[i]; @@ -195,7 +211,6 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource GSSourceOutputPin* m_output; public: - GSSource(int w, int h, float fps, IUnknown* pUnk, HRESULT& hr, int colorspace) : CBaseFilter("GSSource", pUnk, this, __uuidof(this), &hr) , m_output(NULL) @@ -234,14 +249,14 @@ public: STDMETHODIMP DeliverFrame(const void* bits, int pitch, bool rgba) { - if(!m_output || !m_output->IsConnected()) + if (!m_output || !m_output->IsConnected()) { return E_UNEXPECTED; } CComPtr sample; - if(FAILED(m_output->GetDeliveryBuffer(&sample, NULL, NULL, 0))) + if (FAILED(m_output->GetDeliveryBuffer(&sample, NULL, NULL, 0))) { return E_FAIL; } @@ -263,7 +278,7 @@ public: int h = m_size.y; int srcpitch = pitch; - if(mt.subtype == MEDIASUBTYPE_YUY2) + if (mt.subtype == MEDIASUBTYPE_YUY2) { int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2; @@ -271,7 +286,7 @@ public: GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f); GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f); - if(!rgba) + if (!rgba) { ys = ys.zyxw(); us = us.zyxw(); @@ -280,12 +295,12 @@ public: const GSVector4 offset(16, 128, 16, 128); - for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) + for (int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) { uint32* s = (uint32*)src; uint16* d = (uint16*)dst; - for(int i = 0; i < w; i += 2) + for (int i = 0; i < w; i += 2) { GSVector4 c0 = GSVector4::rgba32(s[i + 0]); GSVector4 c1 = GSVector4::rgba32(s[i + 1]); @@ -300,40 +315,40 @@ public: } } } - else if(mt.subtype == MEDIASUBTYPE_RGB32) + else if (mt.subtype == MEDIASUBTYPE_RGB32) { int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 4; dst += dstpitch * (h - 1); dstpitch = -dstpitch; - for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) + for (int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) { - if(rgba) + if (rgba) { - #if _M_SSE >= 0x301 +#if _M_SSE >= 0x301 GSVector4i* s = (GSVector4i*)src; GSVector4i* d = (GSVector4i*)dst; GSVector4i mask(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); - for(int i = 0, w4 = w >> 2; i < w4; i++) + for (int i = 0, w4 = w >> 2; i < w4; i++) { d[i] = s[i].shuffle8(mask); } - #else +#else GSVector4i* s = (GSVector4i*)src; GSVector4i* d = (GSVector4i*)dst; - for(int i = 0, w4 = w >> 2; i < w4; i++) + for (int i = 0, w4 = w >> 2; i < w4; i++) { d[i] = ((s[i] & 0x00ff0000) >> 16) | ((s[i] & 0x000000ff) << 16) | (s[i] & 0x0000ff00); } - #endif +#endif } else { @@ -346,7 +361,7 @@ public: return E_FAIL; } - if(FAILED(m_output->Deliver(sample))) + if (FAILED(m_output->Deliver(sample))) { return E_FAIL; } @@ -364,22 +379,23 @@ public: static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir) { - if(!pBF) return(NULL); + if (!pBF) + return nullptr; BeginEnumPins(pBF, pEP, pPin) { PIN_DIRECTION dir2; pPin->QueryDirection(&dir2); - if(dir == dir2) + if (dir == dir2) { IPin* pRet = pPin.Detach(); pRet->Release(); - return(pRet); + return pRet; } } EndEnumPins - return(NULL); + return nullptr; } #endif @@ -390,7 +406,7 @@ static IPin* GetFirstPin(IBaseFilter* pBF, PIN_DIRECTION dir) GSCapture::GSCapture() : m_capturing(false), m_frame(0) - , m_out_dir("/tmp/GSdx_Capture") // FIXME Later add an option + , m_out_dir("/tmp/GSdx_Capture") // FIXME Later add an option { m_out_dir = theApp.GetConfigS("capture_out_dir"); m_threads = theApp.GetConfigI("capture_threads"); @@ -451,17 +467,17 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float CComPtr cgb; CComPtr mux; - if(FAILED(hr = m_graph.CoCreateInstance(CLSID_FilterGraph)) - || FAILED(hr = cgb.CoCreateInstance(CLSID_CaptureGraphBuilder2)) - || FAILED(hr = cgb->SetFiltergraph(m_graph)) - || FAILED(hr = cgb->SetOutputFileName(&MEDIASUBTYPE_Avi, std::wstring(dlg.m_filename.begin(), dlg.m_filename.end()).c_str(), &mux, NULL))) + if (FAILED(hr = m_graph.CoCreateInstance(CLSID_FilterGraph)) + || FAILED(hr = cgb.CoCreateInstance(CLSID_CaptureGraphBuilder2)) + || FAILED(hr = cgb->SetFiltergraph(m_graph)) + || FAILED(hr = cgb->SetOutputFileName(&MEDIASUBTYPE_Avi, std::wstring(dlg.m_filename.begin(), dlg.m_filename.end()).c_str(), &mux, NULL))) { return false; } m_src = new GSSource(m_size.x, m_size.y, fps, NULL, hr, dlg.m_colorspace); - if (dlg.m_enc==0) + if (dlg.m_enc == 0) { if (FAILED(hr = m_graph->AddFilter(m_src, L"Source"))) return false; @@ -470,14 +486,13 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float } else { - if(FAILED(hr = m_graph->AddFilter(m_src, L"Source")) - || FAILED(hr = m_graph->AddFilter(dlg.m_enc, L"Encoder"))) + if (FAILED(hr = m_graph->AddFilter(m_src, L"Source")) || FAILED(hr = m_graph->AddFilter(dlg.m_enc, L"Encoder"))) { return false; } - if(FAILED(hr = m_graph->ConnectDirect(GetFirstPin(m_src, PINDIR_OUTPUT), GetFirstPin(dlg.m_enc, PINDIR_INPUT), NULL)) - || FAILED(hr = m_graph->ConnectDirect(GetFirstPin(dlg.m_enc, PINDIR_OUTPUT), GetFirstPin(mux, PINDIR_INPUT), NULL))) + if (FAILED(hr = m_graph->ConnectDirect(GetFirstPin(m_src, PINDIR_OUTPUT), GetFirstPin(dlg.m_enc, PINDIR_INPUT), NULL)) + || FAILED(hr = m_graph->ConnectDirect(GetFirstPin(dlg.m_enc, PINDIR_OUTPUT), GetFirstPin(mux, PINDIR_INPUT), NULL))) { return false; } @@ -519,7 +534,8 @@ bool GSCapture::BeginCapture(float fps, GSVector2i recommendedResolution, float m_size.x = theApp.GetConfigI("CaptureWidth"); m_size.y = theApp.GetConfigI("CaptureHeight"); - for(int i = 0; i < m_threads; i++) { + for (int i = 0; i < m_threads; i++) + { m_workers.push_back(std::unique_ptr(new GSPng::Worker(&GSPng::Process))); } @@ -533,7 +549,7 @@ bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba) { std::lock_guard lock(m_lock); - if(bits == NULL || pitch == 0) + if (bits == NULL || pitch == 0) { ASSERT(0); @@ -542,7 +558,7 @@ bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba) #ifdef _WIN32 - if(m_src) + if (m_src) { CComQIPtr(m_src)->DeliverFrame(bits, pitch, rgba); @@ -553,7 +569,7 @@ bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba) std::string out_file = m_out_dir + format("/frame.%010d.png", m_frame); //GSPng::Save(GSPng::RGB_PNG, out_file, (uint8*)bits, m_size.x, m_size.y, pitch, m_compression_level); - m_workers[m_frame%m_threads]->Push(std::make_shared(GSPng::RGB_PNG, out_file, static_cast(bits), m_size.x, m_size.y, pitch, m_compression_level)); + m_workers[m_frame % m_threads]->Push(std::make_shared(GSPng::RGB_PNG, out_file, static_cast(bits), m_size.x, m_size.y, pitch, m_compression_level)); m_frame++; @@ -571,14 +587,14 @@ bool GSCapture::EndCapture() #ifdef _WIN32 - if(m_src) + if (m_src) { CComQIPtr(m_src)->DeliverEOS(); m_src = NULL; } - if(m_graph) + if (m_graph) { CComQIPtr(m_graph)->Stop(); diff --git a/plugins/GSdx/GSCapture.h b/plugins/GSdx/GSCapture.h index 480f840acf..663853bb25 100644 --- a/plugins/GSdx/GSCapture.h +++ b/plugins/GSdx/GSCapture.h @@ -37,17 +37,17 @@ class GSCapture std::string m_out_dir; int m_threads; - #ifdef _WIN32 +#ifdef _WIN32 CComPtr m_graph; CComPtr m_src; - #elif defined(__unix__) +#elif defined(__unix__) std::vector> m_workers; int m_compression_level; - #endif +#endif public: GSCapture(); @@ -57,6 +57,6 @@ public: bool DeliverFrame(const void* bits, int pitch, bool rgba); bool EndCapture(); - bool IsCapturing() {return m_capturing;} - GSVector2i GetSize() {return m_size;} + bool IsCapturing() { return m_capturing; } + GSVector2i GetSize() { return m_size; } }; diff --git a/plugins/GSdx/GSClut.h b/plugins/GSdx/GSClut.h index 7d15c99708..dee8768a3b 100644 --- a/plugins/GSdx/GSClut.h +++ b/plugins/GSdx/GSClut.h @@ -71,9 +71,12 @@ class alignas(32) GSClut : public GSAlignedClass<32> void WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - template void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - template void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - template void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); + template + void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); + template + void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); + template + void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); @@ -108,8 +111,8 @@ public: void Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); void GetAlphaMinMax32(int& amin, int& amax); - uint32 operator [] (size_t i) const {return m_buff32[i];} + uint32 operator[](size_t i) const { return m_buff32[i]; } - operator const uint32*() const {return m_buff32;} - operator const uint64*() const {return m_buff64;} + operator const uint32*() const { return m_buff32; } + operator const uint64*() const { return m_buff64; } }; diff --git a/plugins/GSdx/GSCodeBuffer.cpp b/plugins/GSdx/GSCodeBuffer.cpp index 5fef7f0f5e..60a830e3d8 100644 --- a/plugins/GSdx/GSCodeBuffer.cpp +++ b/plugins/GSdx/GSCodeBuffer.cpp @@ -32,7 +32,7 @@ GSCodeBuffer::GSCodeBuffer(size_t blocksize) GSCodeBuffer::~GSCodeBuffer() { - for(auto buffer : m_buffers) + for (auto buffer : m_buffers) { vmfree(buffer, m_blocksize); } @@ -45,7 +45,7 @@ void* GSCodeBuffer::GetBuffer(size_t size) size = (size + 15) & ~15; - if(m_ptr == NULL || m_pos + size > m_blocksize) + if (m_ptr == NULL || m_pos + size > m_blocksize) { m_ptr = (uint8*)vmalloc(m_blocksize, true); diff --git a/plugins/GSdx/GSCrc.cpp b/plugins/GSdx/GSCrc.cpp index e47f270303..2c7f0dc667 100644 --- a/plugins/GSdx/GSCrc.cpp +++ b/plugins/GSdx/GSCrc.cpp @@ -129,7 +129,7 @@ CRC::Game CRC::m_games[] = {0xC02C653E, GT4, CH, 0}, {0x7ABDBB5E, GT4, CH, 0}, // cutie comment {0xAEAD1CA3, GT4, JP, 0}, - {0xE906EA37, GT4, JP, 0}, // GT4 First Preview + {0xE906EA37, GT4, JP, 0}, // GT4 First Preview {0xCA6243B9, GT4, JP, 0}, // GT4 Prologue {0xDD764BBE, GT4, JP, 0}, // GT4 Prologue {0xE1258846, GT4, JP, 0}, // GT4 Prologue @@ -357,7 +357,7 @@ CRC::Game CRC::m_games[] = {0xEF06DBD6, SakuraWarsSoLongMyLove, JP, 0}, // cutie comment {0xDD41054D, SakuraWarsSoLongMyLove, US, 0}, // cutie comment {0xC2E3A7A4, SakuraWarsSoLongMyLove, KO, 0}, - {0x4A4B623A, FightingBeautyWulong, JP,0}, // cutie comment + {0x4A4B623A, FightingBeautyWulong, JP, 0}, // cutie comment {0x5AC7E79C, TouristTrophy, CH, 0}, // cutie comment {0xFF9C0E93, TouristTrophy, US, 0}, {0xCA9AA903, TouristTrophy, EU, 0}, @@ -365,7 +365,7 @@ CRC::Game CRC::m_games[] = {0x6FB69282, GodHand, US, 0}, {0x924C4AA6, GodHand, KO, 0}, {0xDE9722A5, GodHand, EU, 0}, - {0x9637D496, KnightsOfTheTemple2, NoRegion, 0}, // // EU and JP versions have the same CRC + {0x9637D496, KnightsOfTheTemple2, NoRegion, 0}, // // EU and JP versions have the same CRC {0x4E811100, UltramanFightingEvolution, JP, 0}, // cutie comment {0xF7F181C3, DeathByDegreesTekkenNinaWilliams, CH, 0}, // cutie comment {0xF088FA5B, DeathByDegreesTekkenNinaWilliams, KO, 0}, @@ -378,7 +378,7 @@ CRC::Game CRC::m_games[] = {0xAE4BEBD3, UrbanReign, EU, 0}, {0x48AC09BC, SteambotChronicles, EU, 0}, {0x9F391882, SteambotChronicles, US, 0}, - {0xFEFCF9DE, SteambotChronicles, JP, 0}, // Ponkotsu Roman Daikatsugeki: Bumpy Trot + {0xFEFCF9DE, SteambotChronicles, JP, 0}, // Ponkotsu Roman Daikatsugeki: Bumpy Trot {0XE1BF5DCA, SuperManReturns, US, 0}, {0XE8F7BAB6, SuperManReturns, EU, 0}, {0x06A7506A, SacredBlaze, JP, 0}, @@ -441,17 +441,17 @@ CRC::Game CRC::m_games[] = {0x972611BB, FIFA05, US, 0}, {0x972719A3, FIFA05, EU, 0}, {0xC5473413, HarryPotterATCOS, NoRegion, 0}, // EU and US versions have the same CRC - Chamber Of Secrets - {0xE1963055, HarryPotterATCOS, JP, 0 }, // Harry Potter to Himitsu no Heya - {0xE90BE9F8, HarryPotterATCOS, JP, 0 }, // Coca Cola original Version - {0xB38CC628, HarryPotterATGOF, US, 0 }, - {0xCDE017A7, HarryPotterATGOF, KO, 0 }, - {0xB18DC525, HarryPotterATGOF, EU, 0 }, - {0x9C3A84F4, HarryPotterATHBP, US, 0 }, // Half-Blood Prince - {0xCB598BC2, HarryPotterATHBP, EU, 0 }, - {0x51E019BC, HarryPotterATPOA, NoRegion, 0 }, // EU and US versions have the same CRC - Prisoner of Azkaban - {0x99A8B4FF, HarryPotterATPOA, KO, 0 }, - {0xA8901AD6, HarryPotterATPOA, JP, 0 }, // Harry Potter to Azkaban no Shuujin - {0x51E417AA, HarryPotterATPOA, EU, 0 }, + {0xE1963055, HarryPotterATCOS, JP, 0}, // Harry Potter to Himitsu no Heya + {0xE90BE9F8, HarryPotterATCOS, JP, 0}, // Coca Cola original Version + {0xB38CC628, HarryPotterATGOF, US, 0}, + {0xCDE017A7, HarryPotterATGOF, KO, 0}, + {0xB18DC525, HarryPotterATGOF, EU, 0}, + {0x9C3A84F4, HarryPotterATHBP, US, 0}, // Half-Blood Prince + {0xCB598BC2, HarryPotterATHBP, EU, 0}, + {0x51E019BC, HarryPotterATPOA, NoRegion, 0}, // EU and US versions have the same CRC - Prisoner of Azkaban + {0x99A8B4FF, HarryPotterATPOA, KO, 0}, + {0xA8901AD6, HarryPotterATPOA, JP, 0}, // Harry Potter to Azkaban no Shuujin + {0x51E417AA, HarryPotterATPOA, EU, 0}, {0x4C01B1B0, HarryPotterOOTP, US, 0}, // Order Of The Phoenix {0x01A9BF0E, HarryPotterOOTP, EU, 0}, {0x230CB71D, SoulReaver2, US, 0}, @@ -516,9 +516,9 @@ CRC::Game CRC::m_games[] = std::map CRC::m_map; -std::string ToLower( std::string str ) +std::string ToLower(std::string str) { - transform( str.begin(), str.end(), str.begin(), ::tolower); + transform(str.begin(), str.end(), str.begin(), ::tolower); return str; } @@ -536,18 +536,19 @@ bool IsCrcExcluded(std::string exclusionList, uint32 crc) CRC::Game CRC::Lookup(uint32 crc) { printf("GSdx Lookup CRC:%08X\n", crc); - if(m_map.empty()) + if (m_map.empty()) { std::string exclusions = theApp.GetConfigS("CrcHacksExclusions"); if (exclusions.length() != 0) - printf( "GSdx: CrcHacksExclusions: %s\n", exclusions.c_str() ); + printf("GSdx: CrcHacksExclusions: %s\n", exclusions.c_str()); int crcDups = 0; - for(size_t i = 0; i < countof(m_games); i++) + for (size_t i = 0; i < countof(m_games); i++) { - if( !IsCrcExcluded( exclusions, m_games[i].crc ) ){ - if(m_map[m_games[i].crc]){ - printf("[FIXME] GSdx: Duplicate CRC: 0x%08X: (game-id/region-id) %d/%d overrides %d/%d\n" - , m_games[i].crc, m_games[i].title, m_games[i].region, m_map[m_games[i].crc]->title, m_map[m_games[i].crc]->region); + if (!IsCrcExcluded(exclusions, m_games[i].crc)) + { + if (m_map[m_games[i].crc]) + { + printf("[FIXME] GSdx: Duplicate CRC: 0x%08X: (game-id/region-id) %d/%d overrides %d/%d\n", m_games[i].crc, m_games[i].title, m_games[i].region, m_map[m_games[i].crc]->title, m_map[m_games[i].crc]->region); crcDups++; } @@ -556,13 +557,13 @@ CRC::Game CRC::Lookup(uint32 crc) //else // printf( "GSdx: excluding CRC hack for 0x%08x\n", m_games[i].crc ); } - if(crcDups) + if (crcDups) printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups); } auto i = m_map.find(crc); - if(i != m_map.end()) + if (i != m_map.end()) { return *i->second; } diff --git a/plugins/GSdx/GSDrawingContext.cpp b/plugins/GSdx/GSDrawingContext.cpp index 3cc2d62315..52ecda9de9 100644 --- a/plugins/GSdx/GSDrawingContext.cpp +++ b/plugins/GSdx/GSDrawingContext.cpp @@ -29,24 +29,31 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv) int uv = br; - if(wm == CLAMP_CLAMP) + if (wm == CLAMP_CLAMP) { - if(uv > limit) uv = limit; + if (uv > limit) + uv = limit; } - else if(wm == CLAMP_REPEAT) + else if (wm == CLAMP_REPEAT) { - if(tl < 0) uv = limit; // wrap around - else if(uv > limit) uv = limit; + if (tl < 0) + uv = limit; // wrap around + else if (uv > limit) + uv = limit; } - else if(wm == CLAMP_REGION_CLAMP) + else if (wm == CLAMP_REGION_CLAMP) { - if(uv < minuv) uv = minuv; - if(uv > maxuv) uv = maxuv; + if (uv < minuv) + uv = minuv; + if (uv > maxuv) + uv = maxuv; } - else if(wm == CLAMP_REGION_REPEAT) + else if (wm == CLAMP_REGION_REPEAT) { - if(tl < 0) uv = minuv | maxuv; // wrap around, just use (any & mask) | fix - else uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask) + if (tl < 0) + uv = minuv | maxuv; // wrap around, just use (any & mask) | fix + else + uv = std::min(uv, minuv) | maxuv; // (any & mask) cannot be larger than mask, select br if that is smaller (not br & mask because there might be a larger value between tl and br when &'ed with the mask) } return uv; @@ -54,7 +61,7 @@ static int findmax(int tl, int br, int limit, int wm, int minuv, int maxuv) static int reduce(int uv, int size) { - while(size > 3 && (1 << (size - 1)) >= uv + 1) + while (size > 3 && (1 << (size - 1)) >= uv + 1) { size--; } @@ -64,7 +71,7 @@ static int reduce(int uv, int size) static int extend(int uv, int size) { - while(size < 10 && (1 << size) < uv + 1) + while (size < 10 && (1 << size) < uv + 1) { size++; } @@ -74,7 +81,8 @@ static int extend(int uv, int size) GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap) { - if(mipmap) return TEX0; // no mipmaping allowed + if (mipmap) + return TEX0; // no mipmaping allowed // find the optimal value for TW/TH by analyzing vertex trace and clamping values, extending only for region modes where uv may be outside @@ -91,7 +99,7 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, GSVector4 uvf = st; - if(linear) + if (linear) { uvf += GSVector4(-0.5f, 0.5f).xxyy(); } @@ -101,23 +109,23 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, uv.x = findmax(uv.x, uv.z, (1 << tw) - 1, wms, minu, maxu); uv.y = findmax(uv.y, uv.w, (1 << th) - 1, wmt, minv, maxv); - if(tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less + if (tw + th >= 19) // smaller sizes aren't worth, they just create multiple entries in the textue cache and the saved memory is less { tw = reduce(uv.x, tw); th = reduce(uv.y, th); } - if(wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT) + if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT) { tw = extend(uv.x, tw); } - if(wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT) + if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT) { th = extend(uv.y, th); } - if((theApp.GetCurrentRendererType() == GSRendererType::OGL_SW) && ((int)TEX0.TW != tw || (int)TEX0.TH != th)) + if ((theApp.GetCurrentRendererType() == GSRendererType::OGL_SW) && ((int)TEX0.TW != tw || (int)TEX0.TH != th)) { GL_DBG("FixedTEX0 %05x %d %d tw %d=>%d th %d=>%d st (%.0f,%.0f,%.0f,%.0f) uvmax %d,%d wm %d,%d (%d,%d,%d,%d)", (int)TEX0.TBP0, (int)TEX0.TBW, (int)TEX0.PSM, diff --git a/plugins/GSdx/GSDrawingContext.h b/plugins/GSdx/GSDrawingContext.h index f309d8435b..20f2f845a7 100644 --- a/plugins/GSdx/GSDrawingContext.h +++ b/plugins/GSdx/GSDrawingContext.h @@ -31,19 +31,19 @@ class alignas(32) GSDrawingContext { public: - GIFRegXYOFFSET XYOFFSET; - GIFRegTEX0 TEX0; - GIFRegTEX1 TEX1; - GIFRegTEX2 TEX2; - GIFRegCLAMP CLAMP; - GIFRegMIPTBP1 MIPTBP1; - GIFRegMIPTBP2 MIPTBP2; - GIFRegSCISSOR SCISSOR; - GIFRegALPHA ALPHA; - GIFRegTEST TEST; - GIFRegFBA FBA; - GIFRegFRAME FRAME; - GIFRegZBUF ZBUF; + GIFRegXYOFFSET XYOFFSET; + GIFRegTEX0 TEX0; + GIFRegTEX1 TEX1; + GIFRegTEX2 TEX2; + GIFRegCLAMP CLAMP; + GIFRegMIPTBP1 MIPTBP1; + GIFRegMIPTBP2 MIPTBP2; + GIFRegSCISSOR SCISSOR; + GIFRegALPHA ALPHA; + GIFRegTEST TEST; + GIFRegFBA FBA; + GIFRegFRAME FRAME; + GIFRegZBUF ZBUF; struct { @@ -64,19 +64,19 @@ public: struct { - GIFRegXYOFFSET XYOFFSET; - GIFRegTEX0 TEX0; - GIFRegTEX1 TEX1; - GIFRegTEX2 TEX2; - GIFRegCLAMP CLAMP; - GIFRegMIPTBP1 MIPTBP1; - GIFRegMIPTBP2 MIPTBP2; - GIFRegSCISSOR SCISSOR; - GIFRegALPHA ALPHA; - GIFRegTEST TEST; - GIFRegFBA FBA; - GIFRegFRAME FRAME; - GIFRegZBUF ZBUF; + GIFRegXYOFFSET XYOFFSET; + GIFRegTEX0 TEX0; + GIFRegTEX1 TEX1; + GIFRegTEX2 TEX2; + GIFRegCLAMP CLAMP; + GIFRegMIPTBP1 MIPTBP1; + GIFRegMIPTBP2 MIPTBP2; + GIFRegSCISSOR SCISSOR; + GIFRegALPHA ALPHA; + GIFRegTEST TEST; + GIFRegFBA FBA; + GIFRegFRAME FRAME; + GIFRegZBUF ZBUF; } stack; bool m_fixed_tex0; @@ -129,9 +129,9 @@ public: (int)SCISSOR.SCAY1 + 1); scissor.ofxy = GSVector4i( - 0x8000, - 0x8000, - (int)XYOFFSET.OFX - 15, + 0x8000, + 0x8000, + (int)XYOFFSET.OFX - 15, (int)XYOFFSET.OFY - 15); } @@ -142,7 +142,7 @@ public: bool DepthWrite() const { - if(TEST.ATE && TEST.ATST == ATST_NEVER && TEST.AFAIL != AFAIL_ZB_ONLY) // alpha test, all pixels fail, z buffer is not updated + if (TEST.ATE && TEST.ATST == ATST_NEVER && TEST.AFAIL != AFAIL_ZB_ONLY) // alpha test, all pixels fail, z buffer is not updated { return false; } @@ -152,7 +152,7 @@ public: GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false); void ComputeFixedTEX0(const GSVector4& st); - bool HasFixedTEX0() const { return m_fixed_tex0;} + bool HasFixedTEX0() const { return m_fixed_tex0; } // Save & Restore before/after draw allow to correct/optimize current register for current draw // Note: we could avoid the restore part if all renderer code is updated to use a local copy instead @@ -197,120 +197,121 @@ public: { // Append on purpose so env + context are merged into a single file FILE* fp = fopen(filename.c_str(), "at"); - if (!fp) return; + if (!fp) + return; fprintf(fp, "XYOFFSET\n" - "\tX:%u\n" - "\tY:%u\n\n" - , XYOFFSET.OFX, XYOFFSET.OFY); + "\tX:%u\n" + "\tY:%u\n\n" + , XYOFFSET.OFX, XYOFFSET.OFY); fprintf(fp, "MIPTBP1\n" - "\tBP1:0x%llx\n" - "\tBW1:%llu\n" - "\tBP2:0x%llx\n" - "\tBW2:%llu\n" - "\tBP3:0x%llx\n" - "\tBW3:%llu\n\n" - , MIPTBP1.TBP1, MIPTBP1.TBW1, MIPTBP1.TBP2, MIPTBP1.TBW2, MIPTBP1.TBP3, MIPTBP1.TBW3); + "\tBP1:0x%llx\n" + "\tBW1:%llu\n" + "\tBP2:0x%llx\n" + "\tBW2:%llu\n" + "\tBP3:0x%llx\n" + "\tBW3:%llu\n\n" + , MIPTBP1.TBP1, MIPTBP1.TBW1, MIPTBP1.TBP2, MIPTBP1.TBW2, MIPTBP1.TBP3, MIPTBP1.TBW3); fprintf(fp, "MIPTBP2\n" - "\tBP4:0x%llx\n" - "\tBW4:%llu\n" - "\tBP5:0x%llx\n" - "\tBW5:%llu\n" - "\tBP6:0x%llx\n" - "\tBW6:%llu\n\n" - , MIPTBP2.TBP4, MIPTBP2.TBW4, MIPTBP2.TBP5, MIPTBP2.TBW5, MIPTBP2.TBP6, MIPTBP2.TBW6); + "\tBP4:0x%llx\n" + "\tBW4:%llu\n" + "\tBP5:0x%llx\n" + "\tBW5:%llu\n" + "\tBP6:0x%llx\n" + "\tBW6:%llu\n\n" + , MIPTBP2.TBP4, MIPTBP2.TBW4, MIPTBP2.TBP5, MIPTBP2.TBW5, MIPTBP2.TBP6, MIPTBP2.TBW6); fprintf(fp, "TEX0\n" - "\tTBP0:0x%x\n" - "\tTBW:%u\n" - "\tPSM:0x%x\n" - "\tTW:%u\n" - "\tTCC:%u\n" - "\tTFX:%u\n" - "\tCBP:0x%x\n" - "\tCPSM:0x%x\n" - "\tCSM:%u\n" - "\tCSA:%u\n" - "\tCLD:%u\n" - "\tTH:%llu\n\n" - , TEX0.TBP0, TEX0.TBW, TEX0.PSM, TEX0.TW, TEX0.TCC, TEX0.TFX, TEX0.CBP, TEX0.CPSM, TEX0.CSM, TEX0.CSA, TEX0.CLD, TEX0.TH); + "\tTBP0:0x%x\n" + "\tTBW:%u\n" + "\tPSM:0x%x\n" + "\tTW:%u\n" + "\tTCC:%u\n" + "\tTFX:%u\n" + "\tCBP:0x%x\n" + "\tCPSM:0x%x\n" + "\tCSM:%u\n" + "\tCSA:%u\n" + "\tCLD:%u\n" + "\tTH:%llu\n\n" + , TEX0.TBP0, TEX0.TBW, TEX0.PSM, TEX0.TW, TEX0.TCC, TEX0.TFX, TEX0.CBP, TEX0.CPSM, TEX0.CSM, TEX0.CSA, TEX0.CLD, TEX0.TH); fprintf(fp, "TEX1\n" - "\tLCM:%u\n" - "\tMXL:%u\n" - "\tMMAG:%u\n" - "\tMMIN:%u\n" - "\tMTBA:%u\n" - "\tL:%u\n" - "\tK:%d\n\n" - , TEX1.LCM, TEX1.MXL, TEX1.MMAG, TEX1.MMIN, TEX1.MTBA, TEX1.L, TEX1.K); + "\tLCM:%u\n" + "\tMXL:%u\n" + "\tMMAG:%u\n" + "\tMMIN:%u\n" + "\tMTBA:%u\n" + "\tL:%u\n" + "\tK:%d\n\n" + , TEX1.LCM, TEX1.MXL, TEX1.MMAG, TEX1.MMIN, TEX1.MTBA, TEX1.L, TEX1.K); fprintf(fp, "TEX2\n" - "\tPSM:0x%x\n" - "\tCBP:0x%x\n" - "\tCPSM:0x%x\n" - "\tCSM:%u\n" - "\tCSA:%u\n" - "\tCLD:%u\n\n" - , TEX2.PSM, TEX2.CBP, TEX2.CPSM, TEX2.CSM, TEX2.CSA, TEX2.CLD); + "\tPSM:0x%x\n" + "\tCBP:0x%x\n" + "\tCPSM:0x%x\n" + "\tCSM:%u\n" + "\tCSA:%u\n" + "\tCLD:%u\n\n" + , TEX2.PSM, TEX2.CBP, TEX2.CPSM, TEX2.CSM, TEX2.CSA, TEX2.CLD); fprintf(fp, "CLAMP\n" - "\tWMS:%u\n" - "\tWMT:%u\n" - "\tMINU:%u\n" - "\tMAXU:%u\n" - "\tMAXV:%u\n" - "\tMINV:%llu\n\n" - , CLAMP.WMS, CLAMP.WMT, CLAMP.MINU, CLAMP.MAXU, CLAMP.MAXV, CLAMP.MINV); + "\tWMS:%u\n" + "\tWMT:%u\n" + "\tMINU:%u\n" + "\tMAXU:%u\n" + "\tMAXV:%u\n" + "\tMINV:%llu\n\n" + , CLAMP.WMS, CLAMP.WMT, CLAMP.MINU, CLAMP.MAXU, CLAMP.MAXV, CLAMP.MINV); // TODO mimmap? (yes I'm lazy) fprintf(fp, "SCISSOR\n" - "\tX0:%u\n" - "\tX1:%u\n" - "\tY0:%u\n" - "\tY1:%u\n\n" - , SCISSOR.SCAX0, SCISSOR.SCAX1, SCISSOR.SCAY0, SCISSOR.SCAY1); + "\tX0:%u\n" + "\tX1:%u\n" + "\tY0:%u\n" + "\tY1:%u\n\n" + , SCISSOR.SCAX0, SCISSOR.SCAX1, SCISSOR.SCAY0, SCISSOR.SCAY1); fprintf(fp, "ALPHA\n" - "\tA:%u\n" - "\tB:%u\n" - "\tC:%u\n" - "\tD:%u\n" - "\tFIX:%u\n" - , ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, ALPHA.FIX); - const char *col[3] = {"Cs", "Cd", "0"}; - const char *alpha[3] = {"As", "Ad", "Af"}; + "\tA:%u\n" + "\tB:%u\n" + "\tC:%u\n" + "\tD:%u\n" + "\tFIX:%u\n" + , ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, ALPHA.FIX); + const char* col[3] = {"Cs", "Cd", "0"}; + const char* alpha[3] = {"As", "Ad", "Af"}; fprintf(fp, "\t=> (%s - %s) * %s + %s\n\n", col[ALPHA.A], col[ALPHA.B], alpha[ALPHA.C], col[ALPHA.D]); fprintf(fp, "TEST\n" - "\tATE:%u\n" - "\tATST:%u\n" - "\tAREF:%u\n" - "\tAFAIL:%u\n" - "\tDATE:%u\n" - "\tDATM:%u\n" - "\tZTE:%u\n" - "\tZTST:%u\n\n" - , TEST.ATE, TEST.ATST, TEST.AREF, TEST.AFAIL, TEST.DATE, TEST.DATM, TEST.ZTE, TEST.ZTST); + "\tATE:%u\n" + "\tATST:%u\n" + "\tAREF:%u\n" + "\tAFAIL:%u\n" + "\tDATE:%u\n" + "\tDATM:%u\n" + "\tZTE:%u\n" + "\tZTST:%u\n\n" + , TEST.ATE, TEST.ATST, TEST.AREF, TEST.AFAIL, TEST.DATE, TEST.DATM, TEST.ZTE, TEST.ZTST); fprintf(fp, "FBA\n" - "\tFBA:%u\n\n" - , FBA.FBA); + "\tFBA:%u\n\n" + , FBA.FBA); fprintf(fp, "FRAME\n" - "\tFBP (*32):0x%x\n" - "\tFBW:%u\n" - "\tPSM:0x%x\n" - "\tFBMSK:0x%x\n\n" - , FRAME.FBP*32, FRAME.FBW, FRAME.PSM, FRAME.FBMSK); + "\tFBP (*32):0x%x\n" + "\tFBW:%u\n" + "\tPSM:0x%x\n" + "\tFBMSK:0x%x\n\n" + , FRAME.FBP * 32, FRAME.FBW, FRAME.PSM, FRAME.FBMSK); fprintf(fp, "ZBUF\n" - "\tZBP (*32):0x%x\n" - "\tPSM:0x%x\n" - "\tZMSK:%u\n\n" - , ZBUF.ZBP*32, ZBUF.PSM, ZBUF.ZMSK); + "\tZBP (*32):0x%x\n" + "\tPSM:0x%x\n" + "\tZMSK:%u\n\n" + , ZBUF.ZBP * 32, ZBUF.PSM, ZBUF.ZMSK); fclose(fp); } diff --git a/plugins/GSdx/GSDrawingEnvironment.h b/plugins/GSdx/GSDrawingEnvironment.h index 7b936ad64e..714fc75240 100644 --- a/plugins/GSdx/GSDrawingEnvironment.h +++ b/plugins/GSdx/GSDrawingEnvironment.h @@ -26,22 +26,22 @@ class alignas(32) GSDrawingEnvironment { public: - GIFRegPRIM PRIM; - GIFRegPRMODE PRMODE; - GIFRegPRMODECONT PRMODECONT; - GIFRegTEXCLUT TEXCLUT; - GIFRegSCANMSK SCANMSK; - GIFRegTEXA TEXA; - GIFRegFOGCOL FOGCOL; - GIFRegDIMX DIMX; - GIFRegDTHE DTHE; - GIFRegCOLCLAMP COLCLAMP; - GIFRegPABE PABE; - GIFRegBITBLTBUF BITBLTBUF; - GIFRegTRXDIR TRXDIR; - GIFRegTRXPOS TRXPOS; - GIFRegTRXREG TRXREG; - GSDrawingContext CTXT[2]; + GIFRegPRIM PRIM; + GIFRegPRMODE PRMODE; + GIFRegPRMODECONT PRMODECONT; + GIFRegTEXCLUT TEXCLUT; + GIFRegSCANMSK SCANMSK; + GIFRegTEXA TEXA; + GIFRegFOGCOL FOGCOL; + GIFRegDIMX DIMX; + GIFRegDTHE DTHE; + GIFRegCOLCLAMP COLCLAMP; + GIFRegPABE PABE; + GIFRegBITBLTBUF BITBLTBUF; + GIFRegTRXDIR TRXDIR; + GIFRegTRXPOS TRXPOS; + GIFRegTRXREG TRXREG; + GSDrawingContext CTXT[2]; GSDrawingEnvironment() { @@ -88,118 +88,118 @@ public: void Dump(const std::string& filename) { FILE* fp = fopen(filename.c_str(), "wt"); - if (!fp) return; + if (!fp) + return; fprintf(fp, "PRIM\n" - "\tPRIM:%u\n" - "\tIIP:%u\n" - "\tTME:%u\n" - "\tFGE:%u\n" - "\tABE:%u\n" - "\tAA1:%u\n" - "\tFST:%u\n" - "\tCTXT:%u\n" - "\tFIX:%u\n\n" - , PRIM.PRIM, PRIM.IIP, PRIM.TME, PRIM.FGE, PRIM.ABE, PRIM.AA1, PRIM.FST, PRIM.CTXT, PRIM.FIX); + "\tPRIM:%u\n" + "\tIIP:%u\n" + "\tTME:%u\n" + "\tFGE:%u\n" + "\tABE:%u\n" + "\tAA1:%u\n" + "\tFST:%u\n" + "\tCTXT:%u\n" + "\tFIX:%u\n\n" + , PRIM.PRIM, PRIM.IIP, PRIM.TME, PRIM.FGE, PRIM.ABE, PRIM.AA1, PRIM.FST, PRIM.CTXT, PRIM.FIX); fprintf(fp, "PRMODE (when AC=0)\n" - "\t_PRIM:%u\n" - "\tIIP:%u\n" - "\tTME:%u\n" - "\tFGE:%u\n" - "\tABE:%u\n" - "\tAA1:%u\n" - "\tFST:%u\n" - "\tCTXT:%u\n" - "\tFIX:%u\n\n" - , PRMODE._PRIM, PRMODE.IIP, PRMODE.TME, PRMODE.FGE, PRMODE.ABE, PRMODE.AA1, PRMODE.FST, PRMODE.CTXT, PRMODE.FIX); + "\t_PRIM:%u\n" + "\tIIP:%u\n" + "\tTME:%u\n" + "\tFGE:%u\n" + "\tABE:%u\n" + "\tAA1:%u\n" + "\tFST:%u\n" + "\tCTXT:%u\n" + "\tFIX:%u\n\n" + , PRMODE._PRIM, PRMODE.IIP, PRMODE.TME, PRMODE.FGE, PRMODE.ABE, PRMODE.AA1, PRMODE.FST, PRMODE.CTXT, PRMODE.FIX); fprintf(fp, "PRMODECONT\n" - "\tAC:%u\n\n" - , PRMODECONT.AC); + "\tAC:%u\n\n" + , PRMODECONT.AC); fprintf(fp, "TEXCLUT\n" - "\tCOU:%u\n" - "\tCBW:%u\n" - "\tCOV:%u\n\n" - , TEXCLUT.COU, TEXCLUT.CBW, TEXCLUT.COV); + "\tCOU:%u\n" + "\tCBW:%u\n" + "\tCOV:%u\n\n" + , TEXCLUT.COU, TEXCLUT.CBW, TEXCLUT.COV); fprintf(fp, "SCANMSK\n" - "\tMSK:%u\n\n" - "\n" - , SCANMSK.MSK); + "\tMSK:%u\n\n" + "\n" + , SCANMSK.MSK); fprintf(fp, "TEXA\n" - "\tAEM:%u\n" - "\tTA0:%u\n" - "\tTA1:%u\n\n" - , TEXA.AEM, TEXA.TA0, TEXA.TA1); + "\tAEM:%u\n" + "\tTA0:%u\n" + "\tTA1:%u\n\n" + , TEXA.AEM, TEXA.TA0, TEXA.TA1); fprintf(fp, "FOGCOL\n" - "\tFCG:%u\n" - "\tFCB:%u\n" - "\tFCR:%u\n\n" - , FOGCOL.FCG, FOGCOL.FCB, FOGCOL.FCR); + "\tFCG:%u\n" + "\tFCB:%u\n" + "\tFCR:%u\n\n" + , FOGCOL.FCG, FOGCOL.FCB, FOGCOL.FCR); fprintf(fp, "DIMX\n" - "\tDM22:%d\n" - "\tDM23:%d\n" - "\tDM31:%d\n" - "\tDM02:%d\n" - "\tDM21:%d\n" - "\tDM12:%d\n" - "\tDM03:%d\n" - "\tDM01:%d\n" - "\tDM33:%d\n" - "\tDM30:%d\n" - "\tDM11:%d\n" - "\tDM10:%d\n" - "\tDM20:%d\n" - "\tDM32:%d\n" - "\tDM00:%d\n" - "\tDM13:%d\n\n" - , DIMX.DM22, DIMX.DM23, DIMX.DM31, DIMX.DM02, DIMX.DM21, DIMX.DM12, DIMX.DM03, DIMX.DM01, DIMX.DM33, DIMX.DM30, DIMX.DM11, DIMX.DM10, DIMX.DM20, DIMX.DM32, DIMX.DM00, DIMX.DM13); + "\tDM22:%d\n" + "\tDM23:%d\n" + "\tDM31:%d\n" + "\tDM02:%d\n" + "\tDM21:%d\n" + "\tDM12:%d\n" + "\tDM03:%d\n" + "\tDM01:%d\n" + "\tDM33:%d\n" + "\tDM30:%d\n" + "\tDM11:%d\n" + "\tDM10:%d\n" + "\tDM20:%d\n" + "\tDM32:%d\n" + "\tDM00:%d\n" + "\tDM13:%d\n\n" + , DIMX.DM22, DIMX.DM23, DIMX.DM31, DIMX.DM02, DIMX.DM21, DIMX.DM12, DIMX.DM03, DIMX.DM01, DIMX.DM33, DIMX.DM30, DIMX.DM11, DIMX.DM10, DIMX.DM20, DIMX.DM32, DIMX.DM00, DIMX.DM13); fprintf(fp, "DTHE\n" - "\tDTHE:%u\n\n" - , DTHE.DTHE); + "\tDTHE:%u\n\n" + , DTHE.DTHE); fprintf(fp, "COLCLAMP\n" - "\tCLAMP:%u\n\n" - , COLCLAMP.CLAMP); + "\tCLAMP:%u\n\n" + , COLCLAMP.CLAMP); fprintf(fp, "PABE\n" - "\tPABE:%u\n\n" - , PABE.PABE); + "\tPABE:%u\n\n" + , PABE.PABE); fprintf(fp, "BITBLTBUF\n" - "\tSBW:%u\n" - "\tSBP:0x%x\n" - "\tSPSM:%u\n" - "\tDBW:%u\n" - "\tDPSM:%u\n" - "\tDBP:0x%x\n\n" - , BITBLTBUF.SBW, BITBLTBUF.SBP, BITBLTBUF.SPSM, BITBLTBUF.DBW, BITBLTBUF.DPSM, BITBLTBUF.DBP); + "\tSBW:%u\n" + "\tSBP:0x%x\n" + "\tSPSM:%u\n" + "\tDBW:%u\n" + "\tDPSM:%u\n" + "\tDBP:0x%x\n\n" + , BITBLTBUF.SBW, BITBLTBUF.SBP, BITBLTBUF.SPSM, BITBLTBUF.DBW, BITBLTBUF.DPSM, BITBLTBUF.DBP); fprintf(fp, "TRXDIR\n" - "\tXDIR:%u\n\n" - , TRXDIR.XDIR); + "\tXDIR:%u\n\n", + TRXDIR.XDIR); fprintf(fp, "TRXPOS\n" - "\tDIRY:%u\n" - "\tSSAY:%u\n" - "\tSSAX:%u\n" - "\tDIRX:%u\n" - "\tDSAX:%u\n" - "\tDSAY:%u\n\n" - , TRXPOS.DIRY, TRXPOS.SSAY, TRXPOS.SSAX, TRXPOS.DIRX, TRXPOS.DSAX, TRXPOS.DSAY); + "\tDIRY:%u\n" + "\tSSAY:%u\n" + "\tSSAX:%u\n" + "\tDIRX:%u\n" + "\tDSAX:%u\n" + "\tDSAY:%u\n\n" + , TRXPOS.DIRY, TRXPOS.SSAY, TRXPOS.SSAX, TRXPOS.DIRX, TRXPOS.DSAX, TRXPOS.DSAY); fprintf(fp, "TRXREG\n" - "\tRRH:%u\n" - "\tRRW:%u\n\n" - , TRXREG.RRH, TRXREG.RRW); + "\tRRH:%u\n" + "\tRRW:%u\n\n" + , TRXREG.RRH, TRXREG.RRW); fclose(fp); } - }; diff --git a/plugins/GSdx/GSDump.cpp b/plugins/GSdx/GSDump.cpp index 4f2a522be0..32d2bc9b40 100644 --- a/plugins/GSdx/GSDump.cpp +++ b/plugins/GSdx/GSDump.cpp @@ -33,7 +33,7 @@ GSDumpBase::GSDumpBase(const std::string& fn) GSDumpBase::~GSDumpBase() { - if(m_gs) + if (m_gs) fclose(m_gs); } @@ -83,7 +83,7 @@ bool GSDumpBase::VSync(int field, bool last, const GSPrivRegSet* regs) return (++m_frames & 1) == 0 && last && (m_extra_frames < 0); } -void GSDumpBase::Write(const void *data, size_t size) +void GSDumpBase::Write(const void* data, size_t size) { if (!m_gs || size == 0) return; @@ -103,7 +103,7 @@ GSDump::GSDump(const std::string& fn, uint32 crc, const GSFreezeData& fd, const AddHeader(crc, fd, regs); } -void GSDump::AppendRawData(const void *data, size_t size) +void GSDump::AppendRawData(const void* data, size_t size) { Write(data, size); } @@ -122,7 +122,8 @@ GSDumpXz::GSDumpXz(const std::string& fn, uint32 crc, const GSFreezeData& fd, co { m_strm = LZMA_STREAM_INIT; lzma_ret ret = lzma_easy_encoder(&m_strm, 6 /*level*/, LZMA_CHECK_CRC64); - if (ret != LZMA_OK) { + if (ret != LZMA_OK) + { fprintf(stderr, "GSDumpXz: Error initializing LZMA encoder ! (error code %u)\n", ret); return; } @@ -141,7 +142,7 @@ GSDumpXz::~GSDumpXz() lzma_end(&m_strm); } -void GSDumpXz::AppendRawData(const void *data, size_t size) +void GSDumpXz::AppendRawData(const void* data, size_t size) { size_t old_size = m_in_buff.size(); m_in_buff.resize(old_size + size); @@ -151,7 +152,7 @@ void GSDumpXz::AppendRawData(const void *data, size_t size) // is enabled, it will freeze PCSX2. 1GB should be enough for long dump. // // Note: long dumps are currently not supported so this path won't be executed - if (m_in_buff.size() > 1024*1024*1024) + if (m_in_buff.size() > 1024 * 1024 * 1024) Flush(); } @@ -175,15 +176,17 @@ void GSDumpXz::Flush() void GSDumpXz::Compress(lzma_action action, lzma_ret expected_status) { - std::vector out_buff(1024*1024); - do { + std::vector out_buff(1024 * 1024); + do + { m_strm.next_out = out_buff.data(); m_strm.avail_out = out_buff.size(); lzma_ret ret = lzma_code(&m_strm, action); - if (ret != expected_status) { - fprintf (stderr, "GSDumpXz: Error %d\n", (int) ret); + if (ret != expected_status) + { + fprintf(stderr, "GSDumpXz: Error %d\n", (int)ret); return; } diff --git a/plugins/GSdx/GSDump.h b/plugins/GSdx/GSDump.h index 6db9163862..489e2daba6 100644 --- a/plugins/GSdx/GSDump.h +++ b/plugins/GSdx/GSDump.h @@ -52,9 +52,9 @@ class GSDumpBase protected: void AddHeader(uint32 crc, const GSFreezeData& fd, const GSPrivRegSet* regs); - void Write(const void *data, size_t size); + void Write(const void* data, size_t size); - virtual void AppendRawData(const void *data, size_t size) = 0; + virtual void AppendRawData(const void* data, size_t size) = 0; virtual void AppendRawData(uint8 c) = 0; public: @@ -68,7 +68,7 @@ public: class GSDump final : public GSDumpBase { - void AppendRawData(const void *data, size_t size) final; + void AppendRawData(const void* data, size_t size) final; void AppendRawData(uint8 c) final; public: @@ -84,7 +84,7 @@ class GSDumpXz final : public GSDumpBase void Flush(); void Compress(lzma_action action, lzma_ret expected_status); - void AppendRawData(const void *data, size_t size); + void AppendRawData(const void* data, size_t size); void AppendRawData(uint8 c); public: diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index 3cf050093a..567b394010 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -85,7 +85,8 @@ GSLocalMemory::GSLocalMemory() : m_clut(this) { m_use_fifo_alloc = theApp.GetConfigB("UserHacks") && theApp.GetConfigB("wrap_gs_mem"); - switch (theApp.GetCurrentRendererType()) { + switch (theApp.GetCurrentRendererType()) + { case GSRendererType::OGL_SW: m_use_fifo_alloc = true; break; @@ -110,15 +111,15 @@ GSLocalMemory::GSLocalMemory() memset(m_vm8, 0, m_vmsize); - for(int bp = 0; bp < 32; bp++) + for (int bp = 0; bp < 32; bp++) { - for(int y = 0; y < 32; y++) for(int x = 0; x < 64; x++) + for (int y = 0; y < 32; y++) for (int x = 0; x < 64; x++) { pageOffset32[bp][y][x] = PixelAddressOrg32(x, y, bp, 0); pageOffset32Z[bp][y][x] = PixelAddressOrg32Z(x, y, bp, 0); } - for(int y = 0; y < 64; y++) for(int x = 0; x < 64; x++) + for (int y = 0; y < 64; y++) for (int x = 0; x < 64; x++) { pageOffset16[bp][y][x] = PixelAddressOrg16(x, y, bp, 0); pageOffset16S[bp][y][x] = PixelAddressOrg16S(x, y, bp, 0); @@ -126,100 +127,100 @@ GSLocalMemory::GSLocalMemory() pageOffset16SZ[bp][y][x] = PixelAddressOrg16SZ(x, y, bp, 0); } - for(int y = 0; y < 64; y++) for(int x = 0; x < 128; x++) + for (int y = 0; y < 64; y++) for (int x = 0; x < 128; x++) { pageOffset8[bp][y][x] = PixelAddressOrg8(x, y, bp, 0); } - for(int y = 0; y < 128; y++) for(int x = 0; x < 128; x++) + for (int y = 0; y < 128; y++) for (int x = 0; x < 128; x++) { pageOffset4[bp][y][x] = PixelAddressOrg4(x, y, bp, 0); } } - for(size_t x = 0; x < countof(rowOffset32); x++) + for (size_t x = 0; x < countof(rowOffset32); x++) { rowOffset32[x] = (int)PixelAddress32(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32(0, 0, 0, 32); } - for(size_t x = 0; x < countof(rowOffset32Z); x++) + for (size_t x = 0; x < countof(rowOffset32Z); x++) { rowOffset32Z[x] = (int)PixelAddress32Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress32Z(0, 0, 0, 32); } - for(size_t x = 0; x < countof(rowOffset16); x++) + for (size_t x = 0; x < countof(rowOffset16); x++) { rowOffset16[x] = (int)PixelAddress16(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16(0, 0, 0, 32); } - for(size_t x = 0; x < countof(rowOffset16S); x++) + for (size_t x = 0; x < countof(rowOffset16S); x++) { rowOffset16S[x] = (int)PixelAddress16S(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16S(0, 0, 0, 32); } - for(size_t x = 0; x < countof(rowOffset16Z); x++) + for (size_t x = 0; x < countof(rowOffset16Z); x++) { rowOffset16Z[x] = (int)PixelAddress16Z(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16Z(0, 0, 0, 32); } - for(size_t x = 0; x < countof(rowOffset16SZ); x++) + for (size_t x = 0; x < countof(rowOffset16SZ); x++) { rowOffset16SZ[x] = (int)PixelAddress16SZ(x & 0x7ff, 0, 0, 32) - (int)PixelAddress16SZ(0, 0, 0, 32); } - for(size_t x = 0; x < countof(rowOffset8[0]); x++) + for (size_t x = 0; x < countof(rowOffset8[0]); x++) { rowOffset8[0][x] = (int)PixelAddress8(x & 0x7ff, 0, 0, 32) - (int)PixelAddress8(0, 0, 0, 32); rowOffset8[1][x] = (int)PixelAddress8(x & 0x7ff, 2, 0, 32) - (int)PixelAddress8(0, 2, 0, 32); } - for(size_t x = 0; x < countof(rowOffset4[0]); x++) + for (size_t x = 0; x < countof(rowOffset4[0]); x++) { rowOffset4[0][x] = (int)PixelAddress4(x & 0x7ff, 0, 0, 32) - (int)PixelAddress4(0, 0, 0, 32); rowOffset4[1][x] = (int)PixelAddress4(x & 0x7ff, 2, 0, 32) - (int)PixelAddress4(0, 2, 0, 32); } - for(size_t x = 0; x < countof(blockOffset32); x++) + for (size_t x = 0; x < countof(blockOffset32); x++) { blockOffset32[x] = (short)((int)BlockNumber32(x << 3, 0, 0, 32) - (int)BlockNumber32(0, 0, 0, 32)); } - for(size_t x = 0; x < countof(blockOffset32Z); x++) + for (size_t x = 0; x < countof(blockOffset32Z); x++) { blockOffset32Z[x] = (short)((int)BlockNumber32Z(x << 3, 0, 0, 32) - (int)BlockNumber32Z(0, 0, 0, 32)); } - for(size_t x = 0; x < countof(blockOffset16); x++) + for (size_t x = 0; x < countof(blockOffset16); x++) { blockOffset16[x] = (short)((int)BlockNumber16(x << 3, 0, 0, 32) - (int)BlockNumber16(0, 0, 0, 32)); } - for(size_t x = 0; x < countof(blockOffset16S); x++) + for (size_t x = 0; x < countof(blockOffset16S); x++) { blockOffset16S[x] = (short)((int)BlockNumber16S(x << 3, 0, 0, 32) - (int)BlockNumber16S(0, 0, 0, 32)); } - for(size_t x = 0; x < countof(blockOffset16Z); x++) + for (size_t x = 0; x < countof(blockOffset16Z); x++) { blockOffset16Z[x] = (short)((int)BlockNumber16Z(x << 3, 0, 0, 32) - (int)BlockNumber16Z(0, 0, 0, 32)); } - for(size_t x = 0; x < countof(blockOffset16SZ); x++) + for (size_t x = 0; x < countof(blockOffset16SZ); x++) { blockOffset16SZ[x] = (short)((int)BlockNumber16SZ(x << 3, 0, 0, 32) - (int)BlockNumber16SZ(0, 0, 0, 32)); } - for(size_t x = 0; x < countof(blockOffset8); x++) + for (size_t x = 0; x < countof(blockOffset8); x++) { blockOffset8[x] = (short)((int)BlockNumber8(x << 3, 0, 0, 32) - (int)BlockNumber8(0, 0, 0, 32)); } - for(size_t x = 0; x < countof(blockOffset4); x++) + for (size_t x = 0; x < countof(blockOffset4); x++) { blockOffset4[x] = (short)((int)BlockNumber4(x << 3, 0, 0, 32) - (int)BlockNumber4(0, 0, 0, 32)); } - for(size_t i = 0; i < countof(m_psm); i++) + for (size_t i = 0; i < countof(m_psm); i++) { m_psm[i].pa = &GSLocalMemory::PixelAddress32; m_psm[i].bn = &GSLocalMemory::BlockNumber32; @@ -240,7 +241,8 @@ GSLocalMemory::GSLocalMemory() m_psm[i].pal = 0; m_psm[i].bs = GSVector2i(8, 8); m_psm[i].pgs = GSVector2i(64, 32); - for(int j = 0; j < 8; j++) m_psm[i].rowOffset[j] = rowOffset32; + for (int j = 0; j < 8; j++) + m_psm[i].rowOffset[j] = rowOffset32; m_psm[i].blockOffset = blockOffset32; m_psm[i].msk = 0xff; m_psm[i].depth = 0; @@ -435,7 +437,8 @@ GSLocalMemory::GSLocalMemory() m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256; m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16; - for(size_t i = 0; i < countof(m_psm); i++) m_psm[i].fmt = 3; + for (size_t i = 0; i < countof(m_psm); i++) + m_psm[i].fmt = 3; m_psm[PSM_PSMCT32].fmt = m_psm[PSM_PSMZ32].fmt = 0; m_psm[PSM_PSMCT24].fmt = m_psm[PSM_PSMZ24].fmt = 1; m_psm[PSM_PSMCT16].fmt = m_psm[PSM_PSMZ16].fmt = 2; @@ -493,13 +496,16 @@ GSLocalMemory::~GSLocalMemory() else vmfree(m_vm8, m_vmsize * 4); - for(auto &i : m_omap) delete i.second; - for(auto &i : m_pomap) _aligned_free(i.second); - for(auto &i : m_po4map) _aligned_free(i.second); + for (auto& i : m_omap) + delete i.second; + for (auto& i : m_pomap) + _aligned_free(i.second); + for (auto& i : m_po4map) + _aligned_free(i.second); - for(auto &i : m_p2tmap) + for (auto& i : m_p2tmap) { - delete [] i.second; + delete[] i.second; } } @@ -509,7 +515,7 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm) auto i = m_omap.find(hash); - if(i != m_omap.end()) + if (i != m_omap.end()) { return i->second; } @@ -540,7 +546,7 @@ GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIF auto it = m_pomap.find(hash); - if(it != m_pomap.end()) + if (it != m_pomap.end()) { return it->second; } @@ -560,13 +566,13 @@ GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIF int fs = m_psm[fpsm].bpp >> 5; int zs = m_psm[zpsm].bpp >> 5; - for(int i = 0; i < 2048; i++) + for (int i = 0; i < 2048; i++) { off->row[i].x = (int)fpa(0, i, fbp, bw) << fs; off->row[i].y = (int)zpa(0, i, zbp, bw) << zs; } - for(int i = 0; i < 2048; i++) + for (int i = 0; i < 2048; i++) { off->col[i].x = m_psm[fpsm].rowOffset[0][i] << fs; off->col[i].y = m_psm[zpsm].rowOffset[0][i] << zs; @@ -596,7 +602,7 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G auto it = m_po4map.find(hash); - if(it != m_po4map.end()) + if (it != m_po4map.end()) { return it->second; } @@ -616,13 +622,13 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G int fs = m_psm[fpsm].bpp >> 5; int zs = m_psm[zpsm].bpp >> 5; - for(int i = 0; i < 2048; i++) + for (int i = 0; i < 2048; i++) { off->row[i].x = (int)fpa(0, i, fbp, bw) << fs; off->row[i].y = (int)zpa(0, i, zbp, bw) << zs; } - for(int i = 0; i < 512; i++) + for (int i = 0; i < 512; i++) { off->col[i].x = m_psm[fpsm].rowOffset[0][i * 4] << fs; off->col[i].y = m_psm[zpsm].rowOffset[0][i * 4] << zs; @@ -633,7 +639,7 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G return off; } -static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) {return a.x < b.x;} +static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) { return a.x < b.x; } std::vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) { @@ -641,7 +647,7 @@ std::vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) auto it = m_p2tmap.find(hash); - if(it != m_p2tmap.end()) + if (it != m_p2tmap.end()) { return it->second; } @@ -655,11 +661,11 @@ std::vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) std::unordered_map> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks) - for(int y = 0; y < th; y += bs.y) + for (int y = 0; y < th; y += bs.y) { uint32 base = off->block.row[y >> 3]; - for(int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x) + for (int x = 0, i = y << 7; x < tw; x += bs.x, i += bs.x) { uint32 page = ((base + off->block.col[x >> 3]) >> 5) % MAX_PAGES; @@ -671,7 +677,7 @@ std::vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) auto p2t = new std::vector[MAX_PAGES]; - for(const auto &i : tmp) + for (const auto& i : tmp) { uint32 page = i.first; @@ -679,14 +685,14 @@ std::vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) std::unordered_map m; - for(const auto addr : tiles) + for (const auto addr : tiles) { uint32 row = addr >> 5; uint32 col = 1 << (addr & 31); auto k = m.find(row); - if(k != m.end()) + if (k != m.end()) { k->second |= col; } @@ -701,7 +707,7 @@ std::vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) // sort by x and flip the mask (it will be used to erase a lot of bits in a loop, [x] &= ~y) - for(const auto &j : m) + for (const auto& j : m) { p2t[page].push_back(GSVector2i(j.first, ~j.second)); } @@ -716,7 +722,7 @@ std::vector* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0) //////////////////// -template +template void GSLocalMemory::WriteImageColumn(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) { uint32 bp = BITBLTBUF.DBP; @@ -724,82 +730,82 @@ void GSLocalMemory::WriteImageColumn(int l, int r, int y, int h, const uint8* sr const int csy = bsy / 4; - for(int offset = srcpitch * csy; h >= csy; h -= csy, y += csy, src += offset) + for (int offset = srcpitch * csy; h >= csy; h -= csy, y += csy, src += offset) { - for(int x = l; x < r; x += bsx) + for (int x = l; x < r; x += bsx) { - switch(psm) + switch (psm) { - case PSM_PSMCT32: GSBlock::WriteColumn32(y, BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMCT16: GSBlock::WriteColumn16(y, BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMCT16S: GSBlock::WriteColumn16(y, BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMT8: GSBlock::WriteColumn8(y, BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break; - case PSM_PSMT4: GSBlock::WriteColumn4(y, BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break; - case PSM_PSMZ32: GSBlock::WriteColumn32(y, BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMZ16: GSBlock::WriteColumn16(y, BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMZ16S: GSBlock::WriteColumn16(y, BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break; - // TODO - default: __assume(0); + case PSM_PSMCT32: GSBlock::WriteColumn32(y, BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break; + case PSM_PSMCT16: GSBlock::WriteColumn16(y, BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break; + case PSM_PSMCT16S: GSBlock::WriteColumn16(y, BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break; + case PSM_PSMT8: GSBlock::WriteColumn8(y, BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break; + case PSM_PSMT4: GSBlock::WriteColumn4(y, BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break; + case PSM_PSMZ32: GSBlock::WriteColumn32(y, BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break; + case PSM_PSMZ16: GSBlock::WriteColumn16(y, BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break; + case PSM_PSMZ16S: GSBlock::WriteColumn16(y, BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break; + // TODO + default: __assume(0); } } } } -template +template void GSLocalMemory::WriteImageBlock(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) { uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; - for(int offset = srcpitch * bsy; h >= bsy; h -= bsy, y += bsy, src += offset) + for (int offset = srcpitch * bsy; h >= bsy; h -= bsy, y += bsy, src += offset) { - for(int x = l; x < r; x += bsx) + for (int x = l; x < r; x += bsx) { - switch(psm) + switch (psm) { - case PSM_PSMCT32: GSBlock::WriteBlock32(BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMCT16: GSBlock::WriteBlock16(BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMCT16S: GSBlock::WriteBlock16(BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMT8: GSBlock::WriteBlock8(BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break; - case PSM_PSMT4: GSBlock::WriteBlock4(BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break; - case PSM_PSMZ32: GSBlock::WriteBlock32(BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break; - case PSM_PSMZ16: GSBlock::WriteBlock16(BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break; - case PSM_PSMZ16S: GSBlock::WriteBlock16(BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break; - // TODO - default: __assume(0); + case PSM_PSMCT32: GSBlock::WriteBlock32(BlockPtr32(x, y, bp, bw), &src[x * 4], srcpitch); break; + case PSM_PSMCT16: GSBlock::WriteBlock16(BlockPtr16(x, y, bp, bw), &src[x * 2], srcpitch); break; + case PSM_PSMCT16S: GSBlock::WriteBlock16(BlockPtr16S(x, y, bp, bw), &src[x * 2], srcpitch); break; + case PSM_PSMT8: GSBlock::WriteBlock8(BlockPtr8(x, y, bp, bw), &src[x], srcpitch); break; + case PSM_PSMT4: GSBlock::WriteBlock4(BlockPtr4(x, y, bp, bw), &src[x >> 1], srcpitch); break; + case PSM_PSMZ32: GSBlock::WriteBlock32(BlockPtr32Z(x, y, bp, bw), &src[x * 4], srcpitch); break; + case PSM_PSMZ16: GSBlock::WriteBlock16(BlockPtr16Z(x, y, bp, bw), &src[x * 2], srcpitch); break; + case PSM_PSMZ16S: GSBlock::WriteBlock16(BlockPtr16SZ(x, y, bp, bw), &src[x * 2], srcpitch); break; + // TODO + default: __assume(0); } } } } -template +template void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) { uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; - for(; h > 0; y++, h--, src += srcpitch) + for (; h > 0; y++, h--, src += srcpitch) { - for(int x = l; x < r; x++) + for (int x = l; x < r; x++) { - switch(psm) + switch (psm) { - case PSM_PSMCT32: WritePixel32(x, y, *(uint32*)&src[x * 4], bp, bw); break; - case PSM_PSMCT16: WritePixel16(x, y, *(uint16*)&src[x * 2], bp, bw); break; - case PSM_PSMCT16S: WritePixel16S(x, y, *(uint16*)&src[x * 2], bp, bw); break; - case PSM_PSMT8: WritePixel8(x, y, src[x], bp, bw); break; - case PSM_PSMT4: WritePixel4(x, y, src[x >> 1] >> ((x & 1) << 2), bp, bw); break; - case PSM_PSMZ32: WritePixel32Z(x, y, *(uint32*)&src[x * 4], bp, bw); break; - case PSM_PSMZ16: WritePixel16Z(x, y, *(uint16*)&src[x * 2], bp, bw); break; - case PSM_PSMZ16S: WritePixel16SZ(x, y, *(uint16*)&src[x * 2], bp, bw); break; - // TODO - default: __assume(0); + case PSM_PSMCT32: WritePixel32(x, y, *(uint32*)&src[x * 4], bp, bw); break; + case PSM_PSMCT16: WritePixel16(x, y, *(uint16*)&src[x * 2], bp, bw); break; + case PSM_PSMCT16S: WritePixel16S(x, y, *(uint16*)&src[x * 2], bp, bw); break; + case PSM_PSMT8: WritePixel8(x, y, src[x], bp, bw); break; + case PSM_PSMT4: WritePixel4(x, y, src[x >> 1] >> ((x & 1) << 2), bp, bw); break; + case PSM_PSMZ32: WritePixel32Z(x, y, *(uint32*)&src[x * 4], bp, bw); break; + case PSM_PSMZ16: WritePixel16Z(x, y, *(uint16*)&src[x * 2], bp, bw); break; + case PSM_PSMZ16S: WritePixel16SZ(x, y, *(uint16*)&src[x * 2], bp, bw); break; + // TODO + default: __assume(0); } } } } -template +template void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF) { alignas(32) uint8 buff[64]; // merge buffer for one column @@ -813,57 +819,59 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* int y2 = y & (csy - 1); - if(y2 > 0) + if (y2 > 0) { int h2 = std::min(h, csy - y2); - for(int x = l; x < r; x += bsx) + for (int x = l; x < r; x += bsx) { uint8* dst = NULL; - switch(psm) + switch (psm) { - case PSM_PSMCT32: dst = BlockPtr32(x, y, bp, bw); break; - case PSM_PSMCT16: dst = BlockPtr16(x, y, bp, bw); break; - case PSM_PSMCT16S: dst = BlockPtr16S(x, y, bp, bw); break; - case PSM_PSMT8: dst = BlockPtr8(x, y, bp, bw); break; - case PSM_PSMT4: dst = BlockPtr4(x, y, bp, bw); break; - case PSM_PSMZ32: dst = BlockPtr32Z(x, y, bp, bw); break; - case PSM_PSMZ16: dst = BlockPtr16Z(x, y, bp, bw); break; - case PSM_PSMZ16S: dst = BlockPtr16SZ(x, y, bp, bw); break; - // TODO - default: __assume(0); + case PSM_PSMCT32: dst = BlockPtr32(x, y, bp, bw); break; + case PSM_PSMCT16: dst = BlockPtr16(x, y, bp, bw); break; + case PSM_PSMCT16S: dst = BlockPtr16S(x, y, bp, bw); break; + case PSM_PSMT8: dst = BlockPtr8(x, y, bp, bw); break; + case PSM_PSMT4: dst = BlockPtr4(x, y, bp, bw); break; + case PSM_PSMZ32: dst = BlockPtr32Z(x, y, bp, bw); break; + case PSM_PSMZ16: dst = BlockPtr16Z(x, y, bp, bw); break; + case PSM_PSMZ16S: dst = BlockPtr16SZ(x, y, bp, bw); break; + // TODO + default: __assume(0); } - switch(psm) + switch (psm) { - case PSM_PSMCT32: - case PSM_PSMZ32: - GSBlock::ReadColumn32(y, dst, buff, 32); - memcpy(&buff[32], &src[x * 4], 32); - GSBlock::WriteColumn32<32, 0xffffffff>(y, dst, buff, 32); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - GSBlock::ReadColumn16(y, dst, buff, 32); - memcpy(&buff[32], &src[x * 2], 32); - GSBlock::WriteColumn16<32>(y, dst, buff, 32); - break; - case PSM_PSMT8: - GSBlock::ReadColumn8(y, dst, buff, 16); - for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + x], 16); - GSBlock::WriteColumn8<32>(y, dst, buff, 16); - break; - case PSM_PSMT4: - GSBlock::ReadColumn4(y, dst, buff, 16); - for(int i = 0, j = y2; i < h2; i++, j++) memcpy(&buff[j * 16], &src[i * srcpitch + (x >> 1)], 16); - GSBlock::WriteColumn4<32>(y, dst, buff, 16); - break; - // TODO - default: - __assume(0); + case PSM_PSMCT32: + case PSM_PSMZ32: + GSBlock::ReadColumn32(y, dst, buff, 32); + memcpy(&buff[32], &src[x * 4], 32); + GSBlock::WriteColumn32<32, 0xffffffff>(y, dst, buff, 32); + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + case PSM_PSMZ16: + case PSM_PSMZ16S: + GSBlock::ReadColumn16(y, dst, buff, 32); + memcpy(&buff[32], &src[x * 2], 32); + GSBlock::WriteColumn16<32>(y, dst, buff, 32); + break; + case PSM_PSMT8: + GSBlock::ReadColumn8(y, dst, buff, 16); + for (int i = 0, j = y2; i < h2; i++, j++) + memcpy(&buff[j * 16], &src[i * srcpitch + x], 16); + GSBlock::WriteColumn8<32>(y, dst, buff, 16); + break; + case PSM_PSMT4: + GSBlock::ReadColumn4(y, dst, buff, 16); + for (int i = 0, j = y2; i < h2; i++, j++) + memcpy(&buff[j * 16], &src[i * srcpitch + (x >> 1)], 16); + GSBlock::WriteColumn4<32>(y, dst, buff, 16); + break; + // TODO + default: + __assume(0); } } @@ -877,15 +885,15 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* { int h2 = h & ~(csy - 1); - if(h2 > 0) + if (h2 > 0) { size_t addr = (size_t)&src[l * trbpp >> 3]; - if((addr & 31) == 0 && (srcpitch & 31) == 0) + if ((addr & 31) == 0 && (srcpitch & 31) == 0) { WriteImageColumn(l, r, y, h2, src, srcpitch, BITBLTBUF); } - else if((addr & 15) == 0 && (srcpitch & 15) == 0) + else if ((addr & 15) == 0 && (srcpitch & 15) == 0) { WriteImageColumn(l, r, y, h2, src, srcpitch, BITBLTBUF); } @@ -902,13 +910,13 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* // merge incomplete column - if(h >= 1) + if (h >= 1) { - for(int x = l; x < r; x += bsx) + for (int x = l; x < r; x += bsx) { uint8* dst = NULL; - switch(psm) + switch (psm) { case PSM_PSMCT32: dst = BlockPtr32(x, y, bp, bw); break; case PSM_PSMCT16: dst = BlockPtr16(x, y, bp, bw); break; @@ -922,51 +930,54 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* default: __assume(0); } - switch(psm) + switch (psm) { - case PSM_PSMCT32: - case PSM_PSMZ32: - GSBlock::ReadColumn32(y, dst, buff, 32); - memcpy(&buff[0], &src[x * 4], 32); - GSBlock::WriteColumn32<32, 0xffffffff>(y, dst, buff, 32); - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - GSBlock::ReadColumn16(y, dst, buff, 32); - memcpy(&buff[0], &src[x * 2], 32); - GSBlock::WriteColumn16<32>(y, dst, buff, 32); - break; - case PSM_PSMT8: - GSBlock::ReadColumn8(y, dst, buff, 16); - for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + x], 16); - GSBlock::WriteColumn8<32>(y, dst, buff, 16); - break; - case PSM_PSMT4: - GSBlock::ReadColumn4(y, dst, buff, 16); - for(int i = 0; i < h; i++) memcpy(&buff[i * 16], &src[i * srcpitch + (x >> 1)], 16); - GSBlock::WriteColumn4<32>(y, dst, buff, 16); - break; - // TODO - default: - __assume(0); + case PSM_PSMCT32: + case PSM_PSMZ32: + GSBlock::ReadColumn32(y, dst, buff, 32); + memcpy(&buff[0], &src[x * 4], 32); + GSBlock::WriteColumn32<32, 0xffffffff>(y, dst, buff, 32); + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + case PSM_PSMZ16: + case PSM_PSMZ16S: + GSBlock::ReadColumn16(y, dst, buff, 32); + memcpy(&buff[0], &src[x * 2], 32); + GSBlock::WriteColumn16<32>(y, dst, buff, 32); + break; + case PSM_PSMT8: + GSBlock::ReadColumn8(y, dst, buff, 16); + for (int i = 0; i < h; i++) + memcpy(&buff[i * 16], &src[i * srcpitch + x], 16); + GSBlock::WriteColumn8<32>(y, dst, buff, 16); + break; + case PSM_PSMT4: + GSBlock::ReadColumn4(y, dst, buff, 16); + for (int i = 0; i < h; i++) + memcpy(&buff[i * 16], &src[i * srcpitch + (x >> 1)], 16); + GSBlock::WriteColumn4<32>(y, dst, buff, 16); + break; + // TODO + default: + __assume(0); } } } } -template +template void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { - if(TRXREG.RRW == 0) return; + if (TRXREG.RRW == 0) + return; int l = (int)TRXPOS.DSAX; int r = l + (int)TRXREG.RRW; // finish the incomplete row first - if(tx != l) + if (tx != l) { int n = std::min(len, (r - tx) * trbpp >> 3); WriteImageX(tx, ty, src, n, BITBLTBUF, TRXPOS, TRXREG); @@ -979,7 +990,7 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFR int srcpitch = (r - l) * trbpp >> 3; int h = len / srcpitch; - if(ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row + if (ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row { const uint8* s = &src[-l * trbpp >> 3]; @@ -988,28 +999,28 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFR // left part - if(l < la) + if (l < la) { WriteImageLeftRight(l, la, ty, h, s, srcpitch, BITBLTBUF); } // right part - if(ra < r) + if (ra < r) { WriteImageLeftRight(ra, r, ty, h, s, srcpitch, BITBLTBUF); } // horizontally aligned part - if(la < ra) + if (la < ra) { // top part { int h2 = std::min(h, bsy - (ty & (bsy - 1))); - if(h2 < bsy) + if (h2 < bsy) { WriteImageTopBottom(la, ra, ty, h2, s, srcpitch, BITBLTBUF); @@ -1024,15 +1035,15 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFR { int h2 = h & ~(bsy - 1); - if(h2 > 0) + if (h2 > 0) { size_t addr = (size_t)&s[la * trbpp >> 3]; - if((addr & 31) == 0 && (srcpitch & 31) == 0) + if ((addr & 31) == 0 && (srcpitch & 31) == 0) { WriteImageBlock(la, ra, ty, h2, s, srcpitch, BITBLTBUF); } - else if((addr & 15) == 0 && (srcpitch & 15) == 0) + else if ((addr & 15) == 0 && (srcpitch & 15) == 0) { WriteImageBlock(la, ra, ty, h2, s, srcpitch, BITBLTBUF); } @@ -1049,7 +1060,7 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFR // bottom part - if(h > 0) + if (h > 0) { WriteImageTopBottom(la, ra, ty, h, s, srcpitch, BITBLTBUF); @@ -1062,7 +1073,7 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFR // the rest - if(len > 0) + if (len > 0) { WriteImageX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); } @@ -1071,12 +1082,13 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFR static bool IsTopLeftAligned(int dsax, int tx, int ty, int bw, int bh) { - return ((dsax & (bw-1)) == 0 && (tx & (bw-1)) == 0 && dsax == tx && (ty & (bh-1)) == 0); + return ((dsax & (bw - 1)) == 0 && (tx & (bw - 1)) == 0 && dsax == tx && (ty & (bh - 1)) == 0); } void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { - if(TRXREG.RRW == 0) return; + if (TRXREG.RRW == 0) + return; uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; @@ -1086,7 +1098,7 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GI bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) + if (!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) { // TODO @@ -1096,9 +1108,9 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GI { th += ty; - for(int y = ty; y < th; y += 8, src += srcpitch * 8) + for (int y = ty; y < th; y += 8, src += srcpitch * 8) { - for(int x = tx; x < tw; x += 8) + for (int x = tx; x < tw; x += 8) { GSBlock::UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, BlockPtr32(x, y, bp, bw)); } @@ -1110,7 +1122,8 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GI void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { - if(TRXREG.RRW == 0) return; + if (TRXREG.RRW == 0) + return; uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; @@ -1120,7 +1133,7 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GI bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) + if (!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) { // TODO @@ -1130,9 +1143,9 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GI { th += ty; - for(int y = ty; y < th; y += 8, src += srcpitch * 8) + for (int y = ty; y < th; y += 8, src += srcpitch * 8) { - for(int x = tx; x < tw; x += 8) + for (int x = tx; x < tw; x += 8) { GSBlock::UnpackAndWriteBlock8H(src + (x - tx), srcpitch, BlockPtr32(x, y, bp, bw)); } @@ -1144,7 +1157,8 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GI void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { - if(TRXREG.RRW == 0) return; + if (TRXREG.RRW == 0) + return; uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; @@ -1154,7 +1168,7 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, G bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) + if (!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) { // TODO @@ -1164,9 +1178,9 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, G { th += ty; - for(int y = ty; y < th; y += 8, src += srcpitch * 8) + for (int y = ty; y < th; y += 8, src += srcpitch * 8) { - for(int x = tx; x < tw; x += 8) + for (int x = tx; x < tw; x += 8) { GSBlock::UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, BlockPtr32(x, y, bp, bw)); } @@ -1178,7 +1192,8 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, G void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { - if(TRXREG.RRW == 0) return; + if (TRXREG.RRW == 0) + return; uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; @@ -1188,7 +1203,7 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, G bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) + if (!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) { // TODO @@ -1198,9 +1213,9 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, G { th += ty; - for(int y = ty; y < th; y += 8, src += srcpitch * 8) + for (int y = ty; y < th; y += 8, src += srcpitch * 8) { - for(int x = tx; x < tw; x += 8) + for (int x = tx; x < tw; x += 8) { GSBlock::UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, BlockPtr32(x, y, bp, bw)); } @@ -1212,7 +1227,8 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, G void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { - if(TRXREG.RRW == 0) return; + if (TRXREG.RRW == 0) + return; uint32 bp = BITBLTBUF.DBP; uint32 bw = BITBLTBUF.DBW; @@ -1222,7 +1238,7 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, G bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); - if(!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) + if (!aligned || (tw & 7) || (th & 7) || (len % srcpitch)) { // TODO @@ -1232,9 +1248,9 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, G { th += ty; - for(int y = ty; y < th; y += 8, src += srcpitch * 8) + for (int y = ty; y < th; y += 8, src += srcpitch * 8) { - for(int x = tx; x < tw; x += 8) + for (int x = tx; x < tw; x += 8) { GSBlock::UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, BlockPtr32Z(x, y, bp, bw)); } @@ -1246,7 +1262,8 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, G void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { - if(len <= 0) return; + if (len <= 0) + return; const uint8* pb = (uint8*)src; const uint16* pw = (uint16*)src; @@ -1261,157 +1278,189 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF int sx = (int)TRXPOS.DSAX; int ex = sx + (int)TRXREG.RRW; - switch(BITBLTBUF.DPSM) + switch (BITBLTBUF.DPSM) { - case PSM_PSMCT32: - case PSM_PSMZ32: + case PSM_PSMCT32: + case PSM_PSMZ32: - len /= 4; + len /= 4; - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pd++) + while (len > 0) { - WritePixel32(addr + offset[x], *pd); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x++, pd++) + { + WritePixel32(addr + offset[x], *pd); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMCT24: + case PSM_PSMZ24: - case PSM_PSMCT24: - case PSM_PSMZ24: + len /= 3; - len /= 3; - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pb += 3) + while (len > 0) { - WritePixel24(addr + offset[x], *(uint32*)pb); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x++, pb += 3) + { + WritePixel24(addr + offset[x], *(uint32*)pb); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + case PSM_PSMZ16: + case PSM_PSMZ16S: - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: + len /= 2; - len /= 2; - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pw++) + while (len > 0) { - WritePixel16(addr + offset[x], *pw); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x++, pw++) + { + WritePixel16(addr + offset[x], *pw); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT8: - case PSM_PSMT8: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pb++) + while (len > 0) { - WritePixel8(addr + offset[x], *pb); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x++, pb++) + { + WritePixel8(addr + offset[x], *pb); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT4: - case PSM_PSMT4: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) + while (len > 0) { - WritePixel4(addr + offset[x + 0], *pb & 0xf); - WritePixel4(addr + offset[x + 1], *pb >> 4); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x += 2, pb++) + { + WritePixel4(addr + offset[x + 0], *pb & 0xf); + WritePixel4(addr + offset[x + 1], *pb >> 4); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT8H: - case PSM_PSMT8H: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x++, pb++) + while (len > 0) { - WritePixel8H(addr + offset[x], *pb); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x++, pb++) + { + WritePixel8H(addr + offset[x], *pb); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT4HL: - case PSM_PSMT4HL: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) + while (len > 0) { - WritePixel4HL(addr + offset[x + 0], *pb & 0xf); - WritePixel4HL(addr + offset[x + 1], *pb >> 4); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x += 2, pb++) + { + WritePixel4HL(addr + offset[x + 0], *pb & 0xf); + WritePixel4HL(addr + offset[x + 1], *pb >> 4); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT4HH: - case PSM_PSMT4HH: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) + while (len > 0) { - WritePixel4HH(addr + offset[x + 0], *pb & 0xf); - WritePixel4HH(addr + offset[x + 1], *pb >> 4); + uint32 addr = psm->pa(0, y, bp, bw); + int* offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x += 2, pb++) + { + WritePixel4HH(addr + offset[x + 0], *pb & 0xf); + WritePixel4HH(addr + offset[x + 1], *pb >> 4); + } + + if (x >= ex) + { + x = sx; + y++; + } } - if(x >= ex) {x = sx; y++;} - } - - break; + break; } tx = x; @@ -1422,7 +1471,8 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const { - if(len <= 0) return; + if (len <= 0) + return; uint8* RESTRICT pb = (uint8*)dst; uint16* RESTRICT pw = (uint16*)dst; @@ -1439,207 +1489,240 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB // printf("spsm=%d x=%d ex=%d y=%d len=%d\n", BITBLTBUF.SPSM, x, ex, y, len); - switch(BITBLTBUF.SPSM) + switch (BITBLTBUF.SPSM) { - case PSM_PSMCT32: - case PSM_PSMZ32: + case PSM_PSMCT32: + case PSM_PSMZ32: - // MGS1 intro, fade effect between two scenes (airplane outside-inside transition) + // MGS1 intro, fade effect between two scenes (airplane outside-inside transition) - len /= 4; + len /= 4; - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex && (x & 7); len--, x++, pd++) + while (len > 0) { - *pd = ps[offset[x]]; + int* RESTRICT offset = psm->rowOffset[y & 7]; + uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; + + for (; len > 0 && x < ex && (x & 7); len--, x++, pd++) + { + *pd = ps[offset[x]]; + } + + // aligned to a column + + for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8) + { + int off = offset[x]; + + GSVector4i::store(&pd[0], GSVector4i::load(&ps[off + 0], &ps[off + 4])); + GSVector4i::store(&pd[4], GSVector4i::load(&ps[off + 8], &ps[off + 12])); + + for (int i = 0; i < 8; i++) + ASSERT(pd[i] == ps[offset[x + i]]); + } + + for (; len > 0 && x < ex; len--, x++, pd++) + { + *pd = ps[offset[x]]; + } + + if (x == ex) + { + x = sx; + y++; + } } - // aligned to a column + break; - for(int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8) + case PSM_PSMCT24: + case PSM_PSMZ24: + + len /= 3; + + while (len > 0) { - int off = offset[x]; + int* RESTRICT offset = psm->rowOffset[y & 7]; + uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - GSVector4i::store(&pd[0], GSVector4i::load(&ps[off + 0], &ps[off + 4])); - GSVector4i::store(&pd[4], GSVector4i::load(&ps[off + 8], &ps[off + 12])); + for (; len > 0 && x < ex; len--, x++, pb += 3) + { + uint32 c = ps[offset[x]]; - for(int i = 0; i < 8; i++) ASSERT(pd[i] == ps[offset[x + i]]); + pb[0] = (uint8)(c); + pb[1] = (uint8)(c >> 8); + pb[2] = (uint8)(c >> 16); + } + + if (x == ex) + { + x = sx; + y++; + } } - for(; len > 0 && x < ex; len--, x++, pd++) + break; + + case PSM_PSMCT16: + case PSM_PSMCT16S: + case PSM_PSMZ16: + case PSM_PSMZ16S: + + len /= 2; + + while (len > 0) { - *pd = ps[offset[x]]; + int* RESTRICT offset = psm->rowOffset[y & 7]; + uint16* RESTRICT ps = &m_vm16[psm->pa(0, y, bp, bw)]; + + for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4) + { + pw[0] = ps[offset[x + 0]]; + pw[1] = ps[offset[x + 1]]; + pw[2] = ps[offset[x + 2]]; + pw[3] = ps[offset[x + 3]]; + } + + for (; len > 0 && x < ex; len--, x++, pw++) + { + *pw = ps[offset[x]]; + } + + if (x == ex) + { + x = sx; + y++; + } } - if(x == ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT8: - case PSM_PSMCT24: - case PSM_PSMZ24: - - len /= 3; - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex; len--, x++, pb += 3) + while (len > 0) { - uint32 c = ps[offset[x]]; + int* RESTRICT offset = psm->rowOffset[y & 7]; + uint8* RESTRICT ps = &m_vm8[psm->pa(0, y, bp, bw)]; - pb[0] = (uint8)(c); - pb[1] = (uint8)(c >> 8); - pb[2] = (uint8)(c >> 16); + for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) + { + pb[0] = ps[offset[x + 0]]; + pb[1] = ps[offset[x + 1]]; + pb[2] = ps[offset[x + 2]]; + pb[3] = ps[offset[x + 3]]; + } + + for (; len > 0 && x < ex; len--, x++, pb++) + { + *pb = ps[offset[x]]; + } + + if (x == ex) + { + x = sx; + y++; + } } - if(x == ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT4: - case PSM_PSMCT16: - case PSM_PSMCT16S: - case PSM_PSMZ16: - case PSM_PSMZ16S: - - len /= 2; - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint16* RESTRICT ps = &m_vm16[psm->pa(0, y, bp, bw)]; - - for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pw += 4) + while (len > 0) { - pw[0] = ps[offset[x + 0]]; - pw[1] = ps[offset[x + 1]]; - pw[2] = ps[offset[x + 2]]; - pw[3] = ps[offset[x + 3]]; + uint32 addr = psm->pa(0, y, bp, bw); + int* RESTRICT offset = psm->rowOffset[y & 7]; + + for (; len > 0 && x < ex; len--, x += 2, pb++) + { + *pb = (uint8)(ReadPixel4(addr + offset[x + 0]) | (ReadPixel4(addr + offset[x + 1]) << 4)); + } + + if (x == ex) + { + x = sx; + y++; + } } - for(; len > 0 && x < ex; len--, x++, pw++) + break; + + case PSM_PSMT8H: + + while (len > 0) { - *pw = ps[offset[x]]; + int* RESTRICT offset = psm->rowOffset[y & 7]; + uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; + + for (int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) + { + pb[0] = (uint8)(ps[offset[x + 0]] >> 24); + pb[1] = (uint8)(ps[offset[x + 1]] >> 24); + pb[2] = (uint8)(ps[offset[x + 2]] >> 24); + pb[3] = (uint8)(ps[offset[x + 3]] >> 24); + } + + for (; len > 0 && x < ex; len--, x++, pb++) + { + *pb = (uint8)(ps[offset[x]] >> 24); + } + + if (x == ex) + { + x = sx; + y++; + } } - if(x == ex) {x = sx; y++;} - } + break; - break; + case PSM_PSMT4HL: - case PSM_PSMT8: - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint8* RESTRICT ps = &m_vm8[psm->pa(0, y, bp, bw)]; - - for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) + while (len > 0) { - pb[0] = ps[offset[x + 0]]; - pb[1] = ps[offset[x + 1]]; - pb[2] = ps[offset[x + 2]]; - pb[3] = ps[offset[x + 3]]; + int* offset = psm->rowOffset[y & 7]; + uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; + + for (; len > 0 && x < ex; len--, x += 2, pb++) + { + uint32 c0 = (ps[offset[x + 0]] >> 24) & 0x0f; + uint32 c1 = (ps[offset[x + 1]] >> 20) & 0xf0; + + *pb = (uint8)(c0 | c1); + } + + if (x == ex) + { + x = sx; + y++; + } } - for(; len > 0 && x < ex; len--, x++, pb++) + break; + + case PSM_PSMT4HH: + + while (len > 0) { - *pb = ps[offset[x]]; + int* RESTRICT offset = psm->rowOffset[y & 7]; + uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; + + for (; len > 0 && x < ex; len--, x += 2, pb++) + { + uint32 c0 = (ps[offset[x + 0]] >> 28) & 0x0f; + uint32 c1 = (ps[offset[x + 1]] >> 24) & 0xf0; + + *pb = (uint8)(c0 | c1); + } + + if (x == ex) + { + x = sx; + y++; + } } - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4: - - while(len > 0) - { - uint32 addr = psm->pa(0, y, bp, bw); - int* RESTRICT offset = psm->rowOffset[y & 7]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - *pb = (uint8)(ReadPixel4(addr + offset[x + 0]) | (ReadPixel4(addr + offset[x + 1]) << 4)); - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT8H: - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(int ex4 = ex - 4; len >= 4 && x <= ex4; len -= 4, x += 4, pb += 4) - { - pb[0] = (uint8)(ps[offset[x + 0]] >> 24); - pb[1] = (uint8)(ps[offset[x + 1]] >> 24); - pb[2] = (uint8)(ps[offset[x + 2]] >> 24); - pb[3] = (uint8)(ps[offset[x + 3]] >> 24); - } - - for(; len > 0 && x < ex; len--, x++, pb++) - { - *pb = (uint8)(ps[offset[x]] >> 24); - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4HL: - - while(len > 0) - { - int* offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - uint32 c0 = (ps[offset[x + 0]] >> 24) & 0x0f; - uint32 c1 = (ps[offset[x + 1]] >> 20) & 0xf0; - - *pb = (uint8)(c0 | c1); - } - - if(x == ex) {x = sx; y++;} - } - - break; - - case PSM_PSMT4HH: - - while(len > 0) - { - int* RESTRICT offset = psm->rowOffset[y & 7]; - uint32* RESTRICT ps = &m_vm32[psm->pa(0, y, bp, bw)]; - - for(; len > 0 && x < ex; len--, x += 2, pb++) - { - uint32 c0 = (ps[offset[x + 0]] >> 28) & 0x0f; - uint32 c1 = (ps[offset[x + 1]] >> 24) & 0xf0; - - *pb = (uint8)(c0 | c1); - } - - if(x == ex) {x = sx; y++;} - } - - break; + break; } tx = x; @@ -1659,7 +1742,7 @@ void GSLocalMemory::ReadTexture32(const GSOffset* RESTRICT off, const GSVector4i void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) { - if(TEXA.AEM) + if (TEXA.AEM) { FOREACH_BLOCK_START(r, 8, 8, 32) { @@ -1687,10 +1770,12 @@ void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVecto // Convert packed RGB scanline to 32 bits RGBA ASSERT(dstpitch >= r.width() * 4); - for(int y = r.top; y < r.bottom; y ++) { + for (int y = r.top; y < r.bottom; y++) + { uint8* line = dst + y * dstpitch; - for(int x = r.right; x >= r.left; x--) { + for (int x = r.right; x >= r.left; x--) + { *(uint32*)&line[x * 4] = *(uint32*)&line[x * 3] & 0xFFFFFF; } } @@ -1698,7 +1783,7 @@ void GSLocalMemory::ReadTextureGPU24(const GSOffset* RESTRICT off, const GSVecto void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) { - if(TEXA.AEM) + if (TEXA.AEM) { FOREACH_BLOCK_START(r, 16, 8, 32) { @@ -1784,7 +1869,7 @@ void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, cons { ALIGN_STACK(32); - if(TEXA.AEM) + if (TEXA.AEM) { GSBlock::ReadAndExpandBlock24(BlockPtr(bp), dst, dstpitch, TEXA); } @@ -1798,7 +1883,7 @@ void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, cons { ALIGN_STACK(32); - if(TEXA.AEM) + if (TEXA.AEM) { GSBlock::ReadAndExpandBlock16(BlockPtr(bp), dst, dstpitch, TEXA); } @@ -1852,9 +1937,7 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& readTexel rt = psm.rt; readTexture rtx = psm.rtx; - if(r.width() < psm.bs.x || r.height() < psm.bs.y - || (r.left & (psm.bs.x - 1)) || (r.top & (psm.bs.y - 1)) - || (r.right & (psm.bs.x - 1)) || (r.bottom & (psm.bs.y - 1))) + if (r.width() < psm.bs.x || r.height() < psm.bs.y || (r.left & (psm.bs.x - 1)) || (r.top & (psm.bs.y - 1)) || (r.right & (psm.bs.x - 1)) || (r.bottom & (psm.bs.y - 1))) { GIFRegTEX0 TEX0; @@ -1866,15 +1949,16 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& bool aligned = ((size_t)(dst + (cr.left - r.left) * sizeof(uint32)) & 0xf) == 0; - if(cr.rempty() || !aligned) + if (cr.rempty() || !aligned) { // TODO: expand r to block size, read into temp buffer - if(!aligned) printf("unaligned memory pointer passed to ReadTexture\n"); + if (!aligned) + printf("unaligned memory pointer passed to ReadTexture\n"); - for(int y = r.top; y < r.bottom; y++, dst += dstpitch) + for (int y = r.top; y < r.bottom; y++, dst += dstpitch) { - for(int x = r.left, i = 0; x < r.right; x++, i++) + for (int x = r.left, i = 0; x < r.right; x++, i++) { ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); } @@ -1882,36 +1966,36 @@ void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& } else { - for(int y = r.top; y < cr.top; y++, dst += dstpitch) + for (int y = r.top; y < cr.top; y++, dst += dstpitch) { - for(int x = r.left, i = 0; x < r.right; x++, i++) + for (int x = r.left, i = 0; x < r.right; x++, i++) { ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); } } - for(int y = cr.bottom; y < r.bottom; y++, dst += dstpitch) + for (int y = cr.bottom; y < r.bottom; y++, dst += dstpitch) { - for(int x = r.left, i = 0; x < r.right; x++, i++) + for (int x = r.left, i = 0; x < r.right; x++, i++) { ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); } } - for(int y = cr.top; y < cr.bottom; y++, dst += dstpitch) + for (int y = cr.top; y < cr.bottom; y++, dst += dstpitch) { - for(int x = r.left, i = 0; x < cr.left; x++, i++) + for (int x = r.left, i = 0; x < cr.left; x++, i++) { ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); } - for(int x = cr.right, i = x - r.left; x < r.right; x++, i++) + for (int x = cr.right, i = x - r.left; x < r.right; x++, i++) { ((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA); } } - if(!cr.rempty()) + if (!cr.rempty()) { (this->*rtx)(off, cr, dst + (cr.left - r.left) * sizeof(uint32), dstpitch, TEXA); } @@ -2025,9 +2109,9 @@ void GSLocalMemory::SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 uint8* p = (uint8*)bits; - for(int j = 0; j < h; j++, p += pitch) + for (int j = 0; j < h; j++, p += pitch) { - for(int i = 0; i < w; i++) + for (int i = 0; i < w; i++) { ((uint32*)p)[i] = (this->*rp)(i, j, TEX0.TBP0, TEX0.TBW); } @@ -2035,7 +2119,7 @@ void GSLocalMemory::SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 GSTextureSW t(GSTexture::Offscreen, w, h); - if(t.Update(GSVector4i(0, 0, w, h), bits, pitch)) + if (t.Update(GSVector4i(0, 0, w, h), bits, pitch)) { t.Save(fn); } @@ -2051,7 +2135,7 @@ GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm) GSLocalMemory::pixelAddress bn = GSLocalMemory::m_psm[_psm].bn; - for(int i = 0; i < 256; i++) + for (int i = 0; i < 256; i++) { block.row[i] = (short)bn(0, i << 3, _bp, _bw); } @@ -2060,12 +2144,12 @@ GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm) GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[_psm].pa; - for(int i = 0; i < 4096; i++) + for (int i = 0; i < 4096; i++) { pixel.row[i] = (int)pa(0, i & 0x7ff, _bp, _bw); } - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { pixel.col[i] = GSLocalMemory::m_psm[_psm].rowOffset[i]; } @@ -2075,7 +2159,7 @@ GSOffset::GSOffset(uint32 _bp, uint32 _bw, uint32 _psm) GSOffset::~GSOffset() { - for(auto buffer: pages_as_bit) + for (auto buffer : pages_as_bit) _aligned_free(buffer); } @@ -2085,7 +2169,8 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bb GSVector4i r = rect.ralign(bs); - if(bbox != NULL) *bbox = r; + if (bbox != NULL) + *bbox = r; // worst case: // bp page-aligned: (w * h) / (64 * 32) @@ -2095,7 +2180,7 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bb int limit = MAX_PAGES + 1; - if(pages == NULL) + if (pages == NULL) { limit = std::min((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1; @@ -2116,18 +2201,18 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bb uint32* RESTRICT p = pages; - for(int y = r.top; y < r.bottom; y += bs.y) + for (int y = r.top; y < r.bottom; y += bs.y) { uint32 base = block.row[y]; - for(int x = r.left; x < r.right; x += bs.x) + for (int x = r.left; x < r.right; x += bs.x) { uint32 n = ((base + block.col[x]) >> 5) % MAX_PAGES; uint32& row = tmp[n >> 5]; uint32 col = 1 << (n & 31); - if((row & col) == 0) + if ((row & col) == 0) { row |= col; @@ -2157,7 +2242,7 @@ uint32* GSOffset::GetPagesAsBits(const GIFRegTEX0& TEX0) return pages; // Aligned on 64 bytes to store the full bitmap in a single cache line - pages = (uint32*)_aligned_malloc(MAX_PAGES/8, 64); + pages = (uint32*)_aligned_malloc(MAX_PAGES / 8, 64); pages_as_bit[hash_key] = pages; GetPagesAsBits(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH), pages); @@ -2184,11 +2269,11 @@ void* GSOffset::GetPagesAsBits(const GSVector4i& rect, void* pages) bs.x >>= 3; bs.y >>= 3; - for(int y = r.top; y < r.bottom; y += bs.y) + for (int y = r.top; y < r.bottom; y += bs.y) { uint32 base = block.row[y]; - for(int x = r.left; x < r.right; x += bs.x) + for (int x = r.left; x < r.right; x += bs.x) { uint32 n = ((base + block.col[x]) >> 5) % MAX_PAGES; diff --git a/plugins/GSdx/GSLocalMemory.h b/plugins/GSdx/GSLocalMemory.h index 47483aaac1..fd25c546d2 100644 --- a/plugins/GSdx/GSLocalMemory.h +++ b/plugins/GSdx/GSLocalMemory.h @@ -35,24 +35,24 @@ public: short row[256]; // yn (n = 0 8 16 ...) short* col; // blockOffset* }; - + struct alignas(32) Pixel { int row[4096]; // yn (n = 0 1 2 ...) NOTE: this wraps around above 2048, only transfers should address the upper half (dark cloud 2 inventing) int* col[8]; // rowOffset* }; - union {uint32 hash; struct {uint32 bp:14, bw:6, psm:6;};}; + union { uint32 hash; struct { uint32 bp:14, bw:6, psm:6; }; }; Block block; Pixel pixel; - std::array pages_as_bit; // texture page coverage based on the texture size. Lazy allocated + std::array pages_as_bit; // texture page coverage based on the texture size. Lazy allocated GSOffset(uint32 bp, uint32 bw, uint32 psm); virtual ~GSOffset(); - enum {EOP = 0xffffffff}; + enum { EOP = 0xffffffff }; uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL); void* GetPagesAsBits(const GSVector4i& rect, void* pages); @@ -121,8 +121,8 @@ public: static const int m_vmsize = 1024 * 1024 * 4; - uint8* m_vm8; - uint16* m_vm16; + uint8* m_vm8; + uint16* m_vm16; uint32* m_vm32; GSClut m_clut; @@ -164,7 +164,10 @@ protected: __forceinline static uint32 Expand16To32(uint16 c, const GIFRegTEXA& TEXA) { - return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3); + return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) + | ((c & 0x7c00) << 9) + | ((c & 0x03e0) << 6) + | ((c & 0x001f) << 3); } // TODO @@ -557,7 +560,8 @@ public: __forceinline void WritePixel4(uint32 addr, uint32 c) { - int shift = (addr & 1) << 2; addr >>= 1; + int shift = (addr & 1) << 2; + addr >>= 1; m_vm8[addr] = (uint8)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift)); } @@ -620,7 +624,7 @@ public: WritePixel8H(PixelAddress32(x, y, bp, bw), c); } - __forceinline void WritePixel4HL(int x, int y, uint32 c, uint32 bp, uint32 bw) + __forceinline void WritePixel4HL(int x, int y, uint32 c, uint32 bp, uint32 bw) { WritePixel4HL(PixelAddress32(x, y, bp, bw), c); } @@ -674,13 +678,13 @@ public: { src -= r.left * sizeof(uint32); - for(int y = r.top; y < r.bottom; y++, src += pitch) + for (int y = r.top; y < r.bottom; y++, src += pitch) { uint32* RESTRICT s = (uint32*)src; uint32* RESTRICT d = &m_vm32[off->pixel.row[y]]; int* RESTRICT col = off->pixel.col[0]; - for(int x = r.left; x < r.right; x++) + for (int x = r.left; x < r.right; x++) { d[col[x]] = s[x]; } @@ -691,13 +695,13 @@ public: { src -= r.left * sizeof(uint32); - for(int y = r.top; y < r.bottom; y++, src += pitch) + for (int y = r.top; y < r.bottom; y++, src += pitch) { uint32* RESTRICT s = (uint32*)src; uint32* RESTRICT d = &m_vm32[off->pixel.row[y]]; int* RESTRICT col = off->pixel.col[0]; - for(int x = r.left; x < r.right; x++) + for (int x = r.left; x < r.right; x++) { d[col[x]] = (d[col[x]] & 0xff000000) | (s[x] & 0x00ffffff); } @@ -708,13 +712,13 @@ public: { src -= r.left * sizeof(uint16); - for(int y = r.top; y < r.bottom; y++, src += pitch) + for (int y = r.top; y < r.bottom; y++, src += pitch) { uint16* RESTRICT s = (uint16*)src; uint16* RESTRICT d = &m_vm16[off->pixel.row[y]]; int* RESTRICT col = off->pixel.col[0]; - for(int x = r.left; x < r.right; x++) + for (int x = r.left; x < r.right; x++) { d[col[x]] = s[x]; } @@ -725,13 +729,13 @@ public: { src -= r.left * sizeof(uint32); - for(int y = r.top; y < r.bottom; y++, src += pitch) + for (int y = r.top; y < r.bottom; y++, src += pitch) { uint32* RESTRICT s = (uint32*)src; uint16* RESTRICT d = &m_vm16[off->pixel.row[y]]; int* RESTRICT col = off->pixel.col[0]; - for(int x = r.left; x < r.right; x++) + for (int x = r.left; x < r.right; x++) { uint32 rb = s[x] & 0x00f800f8; uint32 ga = s[x] & 0x8000f800; @@ -848,19 +852,19 @@ public: // - template + template void WriteImageColumn(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - template + template void WriteImageBlock(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - template + template void WriteImageLeftRight(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - template + template void WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF); - template + template void WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); void WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); @@ -913,10 +917,10 @@ public: // - template void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); + template + void ReadTexture(const GSOffset* RESTRICT off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA); // void SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h); }; - diff --git a/plugins/GSdx/GSLzma.cpp b/plugins/GSdx/GSLzma.cpp index 549ed15836..7cef54b9fd 100644 --- a/plugins/GSdx/GSLzma.cpp +++ b/plugins/GSdx/GSLzma.cpp @@ -21,32 +21,36 @@ #include "stdafx.h" #include "GSLzma.h" -GSDumpFile::GSDumpFile(char* filename, const char* repack_filename) { +GSDumpFile::GSDumpFile(char* filename, const char* repack_filename) +{ m_fp = fopen(filename, "rb"); - if (m_fp == nullptr) { + if (m_fp == nullptr) + { fprintf(stderr, "failed to open %s\n", filename); throw "BAD"; // Just exit the program } m_repack_fp = nullptr; - if (repack_filename) { + if (repack_filename) + { m_repack_fp = fopen(repack_filename, "wb"); if (m_repack_fp == nullptr) fprintf(stderr, "failed to open %s for repack\n", repack_filename); } } -void GSDumpFile::Repack(void* ptr, size_t size) { +void GSDumpFile::Repack(void* ptr, size_t size) +{ if (m_repack_fp == nullptr) return; size_t ret = fwrite(ptr, 1, size, m_repack_fp); if (ret != size) fprintf(stderr, "Failed to repack\n"); - } -GSDumpFile::~GSDumpFile() { +GSDumpFile::~GSDumpFile() +{ if (m_fp) fclose(m_fp); if (m_repack_fp) @@ -54,13 +58,16 @@ GSDumpFile::~GSDumpFile() { } /******************************************************************/ -GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename) : GSDumpFile(filename, repack_filename) { +GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename) + : GSDumpFile(filename, repack_filename) +{ memset(&m_strm, 0, sizeof(lzma_stream)); lzma_ret ret = lzma_stream_decoder(&m_strm, UINT32_MAX, 0); - if (ret != LZMA_OK) { + if (ret != LZMA_OK) + { fprintf(stderr, "Error initializing the decoder! (error code %u)\n", ret); throw "BAD"; // Just exit the program } @@ -78,18 +85,21 @@ GSDumpLzma::GSDumpLzma(char* filename, const char* repack_filename) : GSDumpFile m_strm.next_out = m_area; } -void GSDumpLzma::Decompress() { +void GSDumpLzma::Decompress() +{ lzma_action action = LZMA_RUN; m_strm.next_out = m_area; m_strm.avail_out = m_buff_size; // Nothing left in the input buffer. Read data from the file - if (m_strm.avail_in == 0 && !feof(m_fp)) { + if (m_strm.avail_in == 0 && !feof(m_fp)) + { m_strm.next_in = m_inbuf; m_strm.avail_in = fread(m_inbuf, 1, BUFSIZ, m_fp); - if (ferror(m_fp)) { + if (ferror(m_fp)) + { fprintf(stderr, "Read error: %s\n", strerror(errno)); throw "BAD"; // Just exit the program } @@ -97,10 +107,12 @@ void GSDumpLzma::Decompress() { lzma_ret ret = lzma_code(&m_strm, action); - if (ret != LZMA_OK) { + if (ret != LZMA_OK) + { if (ret == LZMA_STREAM_END) fprintf(stderr, "LZMA decoder finished without error\n\n"); - else { + else + { fprintf(stderr, "Decoder error: (error code %u)\n", ret); throw "BAD"; // Just exit the program } @@ -110,28 +122,33 @@ void GSDumpLzma::Decompress() { m_avail = m_buff_size - m_strm.avail_out; } -bool GSDumpLzma::IsEof() { +bool GSDumpLzma::IsEof() +{ return feof(m_fp) && m_avail == 0 && m_strm.avail_in == 0; } -bool GSDumpLzma::Read(void* ptr, size_t size) { +bool GSDumpLzma::Read(void* ptr, size_t size) +{ size_t off = 0; uint8_t* dst = (uint8_t*)ptr; size_t full_size = size; - while (size && !IsEof()) { - if (m_avail == 0) { + while (size && !IsEof()) + { + if (m_avail == 0) + { Decompress(); } size_t l = std::min(size, m_avail); - memcpy(dst + off, m_area+m_start, l); + memcpy(dst + off, m_area + m_start, l); m_avail -= l; size -= l; m_start += l; off += l; } - if (size == 0) { + if (size == 0) + { Repack(ptr, full_size); return true; } @@ -139,7 +156,8 @@ bool GSDumpLzma::Read(void* ptr, size_t size) { return false; } -GSDumpLzma::~GSDumpLzma() { +GSDumpLzma::~GSDumpLzma() +{ lzma_end(&m_strm); if (m_inbuf) @@ -150,26 +168,32 @@ GSDumpLzma::~GSDumpLzma() { /******************************************************************/ -GSDumpRaw::GSDumpRaw(char* filename, const char* repack_filename) : GSDumpFile(filename, repack_filename) { +GSDumpRaw::GSDumpRaw(char* filename, const char* repack_filename) + : GSDumpFile(filename, repack_filename) +{ m_buff_size = 0; - m_area = NULL; - m_inbuf = NULL; + m_area = nullptr; + m_inbuf = nullptr; m_avail = 0; m_start = 0; } -bool GSDumpRaw::IsEof() { +bool GSDumpRaw::IsEof() +{ return !!feof(m_fp); } -bool GSDumpRaw::Read(void* ptr, size_t size) { +bool GSDumpRaw::Read(void* ptr, size_t size) +{ size_t ret = fread(ptr, 1, size, m_fp); - if (ret != size && ferror(m_fp)) { + if (ret != size && ferror(m_fp)) + { fprintf(stderr, "GSDumpRaw:: Read error (%zu/%zu)\n", ret, size); throw "BAD"; // Just exit the program } - if (ret == size) { + if (ret == size) + { Repack(ptr, size); return true; } diff --git a/plugins/GSdx/GSLzma.h b/plugins/GSdx/GSLzma.h index 7b48575322..9ad286d5fd 100644 --- a/plugins/GSdx/GSLzma.h +++ b/plugins/GSdx/GSLzma.h @@ -20,15 +20,16 @@ #include -class GSDumpFile { - FILE* m_repack_fp; +class GSDumpFile +{ + FILE* m_repack_fp; - protected: - FILE* m_fp; +protected: + FILE* m_fp; void Repack(void* ptr, size_t size); - public: +public: virtual bool IsEof() = 0; virtual bool Read(void* ptr, size_t size) = 0; @@ -36,21 +37,20 @@ class GSDumpFile { virtual ~GSDumpFile(); }; -class GSDumpLzma : public GSDumpFile { - +class GSDumpLzma : public GSDumpFile +{ lzma_stream m_strm; - size_t m_buff_size; - uint8_t* m_area; - uint8_t* m_inbuf; + size_t m_buff_size; + uint8_t* m_area; + uint8_t* m_inbuf; - size_t m_avail; - size_t m_start; + size_t m_avail; + size_t m_start; void Decompress(); - public: - +public: GSDumpLzma(char* filename, const char* repack_filename); virtual ~GSDumpLzma(); @@ -58,17 +58,16 @@ class GSDumpLzma : public GSDumpFile { bool Read(void* ptr, size_t size) final; }; -class GSDumpRaw : public GSDumpFile { +class GSDumpRaw : public GSDumpFile +{ + size_t m_buff_size; + uint8_t* m_area; + uint8_t* m_inbuf; - size_t m_buff_size; - uint8_t* m_area; - uint8_t* m_inbuf; - - size_t m_avail; - size_t m_start; - - public: + size_t m_avail; + size_t m_start; +public: GSDumpRaw(char* filename, const char* repack_filename); virtual ~GSDumpRaw() = default; diff --git a/plugins/GSdx/GSPerfMon.cpp b/plugins/GSdx/GSPerfMon.cpp index 7b5438d941..a6e3b64030 100644 --- a/plugins/GSdx/GSPerfMon.cpp +++ b/plugins/GSdx/GSPerfMon.cpp @@ -36,19 +36,19 @@ GSPerfMon::GSPerfMon() void GSPerfMon::Put(counter_t c, double val) { #ifndef DISABLE_PERF_MON - if(c == Frame) + if (c == Frame) { #if defined(__unix__) || defined(__APPLE__) // clock on linux will return CLOCK_PROCESS_CPUTIME_ID. // CLOCK_THREAD_CPUTIME_ID is much more useful to measure the fps struct timespec ts; clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); - uint64 now = (uint64) ts.tv_sec * (uint64) 1e6 + (uint64) ts.tv_nsec / (uint64) 1e3; + uint64 now = (uint64)ts.tv_sec * (uint64)1e6 + (uint64)ts.tv_nsec / (uint64)1e3; #else clock_t now = clock(); #endif - if(m_lastframe != 0) + if (m_lastframe != 0) { m_counters[c] += (now - m_lastframe) * 1000 / CLOCKS_PER_SEC; } @@ -67,9 +67,9 @@ void GSPerfMon::Put(counter_t c, double val) void GSPerfMon::Update() { #ifndef DISABLE_PERF_MON - if(m_count > 0) + if (m_count > 0) { - for(size_t i = 0; i < countof(m_counters); i++) + for (size_t i = 0; i < countof(m_counters); i++) { m_stats[i] = m_counters[i] / m_count; } @@ -86,7 +86,7 @@ void GSPerfMon::Start(int timer) #ifndef DISABLE_PERF_MON m_start[timer] = __rdtsc(); - if(m_begin[timer] == 0) + if (m_begin[timer] == 0) { m_begin[timer] = m_start[timer]; } @@ -96,7 +96,7 @@ void GSPerfMon::Start(int timer) void GSPerfMon::Stop(int timer) { #ifndef DISABLE_PERF_MON - if(m_start[timer] > 0) + if (m_start[timer] > 0) { m_total[timer] += __rdtsc() - m_start[timer]; m_start[timer] = 0; @@ -108,7 +108,7 @@ int GSPerfMon::CPU(int timer, bool reset) { int percent = (int)(100 * m_total[timer] / (__rdtsc() - m_begin[timer])); - if(reset) + if (reset) { m_begin[timer] = 0; m_start[timer] = 0; diff --git a/plugins/GSdx/GSPerfMon.h b/plugins/GSdx/GSPerfMon.h index 45bedf9b88..c5e059f759 100644 --- a/plugins/GSdx/GSPerfMon.h +++ b/plugins/GSdx/GSPerfMon.h @@ -24,18 +24,25 @@ class GSPerfMon { public: - enum timer_t + enum timer_t { - Main, - Sync, - WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, - WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15, + Main, + Sync, + WorkerDraw0, WorkerDraw1, WorkerDraw2, WorkerDraw3, WorkerDraw4, WorkerDraw5, WorkerDraw6, WorkerDraw7, + WorkerDraw8, WorkerDraw9, WorkerDraw10, WorkerDraw11, WorkerDraw12, WorkerDraw13, WorkerDraw14, WorkerDraw15, TimerLast, }; - - enum counter_t + + enum counter_t { - Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad, SyncPoint, + Frame, + Prim, + Draw, + Swizzle, + Unswizzle, + Fillrate, + Quad, + SyncPoint, CounterLast, }; @@ -52,11 +59,11 @@ protected: public: GSPerfMon(); - void SetFrame(uint64 frame) {m_frame = frame;} - uint64 GetFrame() {return m_frame;} + void SetFrame(uint64 frame) { m_frame = frame; } + uint64 GetFrame() { return m_frame; } void Put(counter_t c, double val = 0); - double Get(counter_t c) {return m_stats[c];} + double Get(counter_t c) { return m_stats[c]; } void Update(); void Start(int timer = Main); @@ -70,6 +77,10 @@ class GSPerfMonAutoTimer int m_timer; public: - GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main) {m_timer = timer; (m_pm = pm)->Start(m_timer);} - ~GSPerfMonAutoTimer() {m_pm->Stop(m_timer);} + GSPerfMonAutoTimer(GSPerfMon* pm, int timer = GSPerfMon::Main) + { + m_timer = timer; + (m_pm = pm)->Start(m_timer); + } + ~GSPerfMonAutoTimer() { m_pm->Stop(m_timer); } }; diff --git a/plugins/GSdx/GSPng.cpp b/plugins/GSdx/GSPng.cpp index 47f4019740..283afeae86 100644 --- a/plugins/GSdx/GSPng.cpp +++ b/plugins/GSdx/GSPng.cpp @@ -23,129 +23,135 @@ #include #include -struct { - int type; - int bytes_per_pixel_in; - int bytes_per_pixel_out; - int channel_bit_depth; - const char *extension[2]; +struct +{ + int type; + int bytes_per_pixel_in; + int bytes_per_pixel_out; + int channel_bit_depth; + const char *extension[2]; } static const pixel[GSPng::Format::COUNT] = { - {PNG_COLOR_TYPE_RGBA, 4, 4, 8 , {"_full.png", nullptr}}, // RGBA_PNG - {PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", nullptr}}, // RGB_PNG - {PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", "_alpha.png"}}, // RGB_A_PNG - {PNG_COLOR_TYPE_GRAY, 4, 1, 8 , {"_alpha.png", nullptr}}, // ALPHA_PNG - {PNG_COLOR_TYPE_GRAY, 1, 1, 8 , {"_R8I.png", nullptr}}, // R8I_PNG - {PNG_COLOR_TYPE_GRAY, 2, 2, 16, {"_R16I.png", nullptr}}, // R16I_PNG - {PNG_COLOR_TYPE_GRAY, 4, 2, 16, {"_R32I_lsb.png", "_R32I_msb.png"}}, // R32I_PNG + {PNG_COLOR_TYPE_RGBA, 4, 4, 8 , {"_full.png", nullptr}}, // RGBA_PNG + {PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", nullptr}}, // RGB_PNG + {PNG_COLOR_TYPE_RGB , 4, 3, 8 , {".png", "_alpha.png"}}, // RGB_A_PNG + {PNG_COLOR_TYPE_GRAY, 4, 1, 8 , {"_alpha.png", nullptr}}, // ALPHA_PNG + {PNG_COLOR_TYPE_GRAY, 1, 1, 8 , {"_R8I.png", nullptr}}, // R8I_PNG + {PNG_COLOR_TYPE_GRAY, 2, 2, 16, {"_R16I.png", nullptr}}, // R16I_PNG + {PNG_COLOR_TYPE_GRAY, 4, 2, 16, {"_R32I_lsb.png", "_R32I_msb.png"}}, // R32I_PNG }; -namespace GSPng { +namespace GSPng +{ - bool SaveFile(const std::string& file, const Format fmt, const uint8* const image, - uint8* const row, const int width, const int height, const int pitch, - const int compression, const bool rb_swapped = false, const bool first_image = false) - { - const int channel_bit_depth = pixel[fmt].channel_bit_depth; - const int bytes_per_pixel_in = pixel[fmt].bytes_per_pixel_in; + bool SaveFile(const std::string& file, const Format fmt, const uint8* const image, + uint8* const row, const int width, const int height, const int pitch, + const int compression, const bool rb_swapped = false, const bool first_image = false) + { + const int channel_bit_depth = pixel[fmt].channel_bit_depth; + const int bytes_per_pixel_in = pixel[fmt].bytes_per_pixel_in; - const int type = first_image ? pixel[fmt].type : PNG_COLOR_TYPE_GRAY; - const int offset = first_image ? 0 : pixel[fmt].bytes_per_pixel_out; - const int bytes_per_pixel_out = first_image ? pixel[fmt].bytes_per_pixel_out : bytes_per_pixel_in - offset; + const int type = first_image ? pixel[fmt].type : PNG_COLOR_TYPE_GRAY; + const int offset = first_image ? 0 : pixel[fmt].bytes_per_pixel_out; + const int bytes_per_pixel_out = first_image ? pixel[fmt].bytes_per_pixel_out : bytes_per_pixel_in - offset; - FILE *fp = px_fopen(file, "wb"); - if (fp == nullptr) - return false; + FILE* fp = px_fopen(file, "wb"); + if (fp == nullptr) + return false; - png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); - png_infop info_ptr = nullptr; + png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + png_infop info_ptr = nullptr; - bool success; - try { - if (png_ptr == nullptr) - throw GSDXRecoverableError(); + bool success; + try + { + if (png_ptr == nullptr) + throw GSDXRecoverableError(); - info_ptr = png_create_info_struct(png_ptr); - if (info_ptr == nullptr) - throw GSDXRecoverableError(); + info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == nullptr) + throw GSDXRecoverableError(); - if (setjmp(png_jmpbuf(png_ptr))) - throw GSDXRecoverableError(); + if (setjmp(png_jmpbuf(png_ptr))) + throw GSDXRecoverableError(); - png_init_io(png_ptr, fp); - png_set_compression_level(png_ptr, compression); - png_set_IHDR(png_ptr, info_ptr, width, height, channel_bit_depth, type, - PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); - png_write_info(png_ptr, info_ptr); + png_init_io(png_ptr, fp); + png_set_compression_level(png_ptr, compression); + png_set_IHDR(png_ptr, info_ptr, width, height, channel_bit_depth, type, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + png_write_info(png_ptr, info_ptr); - if (channel_bit_depth > 8) - png_set_swap(png_ptr); - if (rb_swapped && type != PNG_COLOR_TYPE_GRAY) - png_set_bgr(png_ptr); + if (channel_bit_depth > 8) + png_set_swap(png_ptr); + if (rb_swapped && type != PNG_COLOR_TYPE_GRAY) + png_set_bgr(png_ptr); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) - for (int i = 0; i < bytes_per_pixel_out; ++i) - row[bytes_per_pixel_out * x + i] = image[y * pitch + bytes_per_pixel_in * x + i + offset]; - png_write_row(png_ptr, row); - } - png_write_end(png_ptr, nullptr); + for (int y = 0; y < height; ++y) + { + for (int x = 0; x < width; ++x) + for (int i = 0; i < bytes_per_pixel_out; ++i) + row[bytes_per_pixel_out * x + i] = image[y * pitch + bytes_per_pixel_in * x + i + offset]; + png_write_row(png_ptr, row); + } + png_write_end(png_ptr, nullptr); - success = true; - } catch (GSDXRecoverableError&) { - fprintf(stderr, "Failed to write image %s\n", file.c_str()); + success = true; + } + catch (GSDXRecoverableError&) + { + fprintf(stderr, "Failed to write image %s\n", file.c_str()); success = false; - } + } - if (png_ptr) - png_destroy_write_struct(&png_ptr, info_ptr ? &info_ptr : nullptr); - fclose(fp); + if (png_ptr) + png_destroy_write_struct(&png_ptr, info_ptr ? &info_ptr : nullptr); + fclose(fp); - return success; - } + return success; + } - bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped) - { - std::string root = file; - root.replace(file.length() - 4, 4, ""); + bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped) + { + std::string root = file; + root.replace(file.length() - 4, 4, ""); - ASSERT(fmt >= Format::START && fmt < Format::COUNT); + ASSERT(fmt >= Format::START && fmt < Format::COUNT); - if (compression < 0 || compression > Z_BEST_COMPRESSION) - compression = Z_BEST_SPEED; + if (compression < 0 || compression > Z_BEST_COMPRESSION) + compression = Z_BEST_SPEED; - std::unique_ptr row(new uint8[pixel[fmt].bytes_per_pixel_out * w]); + std::unique_ptr row(new uint8[pixel[fmt].bytes_per_pixel_out * w]); - std::string filename = root + pixel[fmt].extension[0]; - if (!SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression, rb_swapped, true)) - return false; + std::string filename = root + pixel[fmt].extension[0]; + if (!SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression, rb_swapped, true)) + return false; - // Second image - if (pixel[fmt].extension[1] == nullptr) - return true; + // Second image + if (pixel[fmt].extension[1] == nullptr) + return true; - filename = root + pixel[fmt].extension[1]; - return SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression); - } + filename = root + pixel[fmt].extension[1]; + return SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression); + } - Transaction::Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression) - : m_fmt(fmt), m_file(file), m_w(w), m_h(h), m_pitch(pitch), m_compression(compression) - { - // Note: yes it would be better to use shared pointer - m_image = (uint8*)_aligned_malloc(pitch*h, 32); - if (m_image) - memcpy(m_image, image, pitch*h); - } + Transaction::Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression) + : m_fmt(fmt), m_file(file), m_w(w), m_h(h), m_pitch(pitch), m_compression(compression) + { + // Note: yes it would be better to use shared pointer + m_image = (uint8*)_aligned_malloc(pitch * h, 32); + if (m_image) + memcpy(m_image, image, pitch * h); + } - Transaction::~Transaction() - { - if (m_image) - _aligned_free(m_image); - } + Transaction::~Transaction() + { + if (m_image) + _aligned_free(m_image); + } - void Process(std::shared_ptr& item) - { - Save(item->m_fmt, item->m_file, item->m_image, item->m_w, item->m_h, item->m_pitch, item->m_compression); - } + void Process(std::shared_ptr& item) + { + Save(item->m_fmt, item->m_file, item->m_image, item->m_w, item->m_h, item->m_pitch, item->m_compression); + } -} +} // namespace GSPng diff --git a/plugins/GSdx/GSPng.h b/plugins/GSdx/GSPng.h index 6046317ae8..b04ecbf7d3 100644 --- a/plugins/GSdx/GSPng.h +++ b/plugins/GSdx/GSPng.h @@ -22,37 +22,39 @@ #include "GSThread_CXX11.h" -namespace GSPng { - enum Format { - START = 0, - RGBA_PNG = 0, - RGB_PNG, - RGB_A_PNG, - ALPHA_PNG, - R8I_PNG, - R16I_PNG, - R32I_PNG, - COUNT - }; +namespace GSPng +{ + enum Format + { + START = 0, + RGBA_PNG = 0, + RGB_PNG, + RGB_A_PNG, + ALPHA_PNG, + R8I_PNG, + R16I_PNG, + R32I_PNG, + COUNT + }; class Transaction { - public: - Format m_fmt; - const std::string m_file; - uint8* m_image; - int m_w; - int m_h; - int m_pitch; - int m_compression; + public: + Format m_fmt; + const std::string m_file; + uint8* m_image; + int m_w; + int m_h; + int m_pitch; + int m_compression; - Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression); - ~Transaction(); + Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression); + ~Transaction(); }; - bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped = false); + bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped = false); - void Process(std::shared_ptr &item); + void Process(std::shared_ptr& item); - using Worker = GSJobQueue, 16>; -} + using Worker = GSJobQueue, 16>; +} // namespace GSPng diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 8bccb4d762..35762fbb5e 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -294,15 +294,15 @@ void GSState::ResetHandlers() m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D; m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP; -#define SetHandlerXYZ(P, auto_flush) \ - m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2; \ - m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2; \ - m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2; \ - m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2; \ - m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2; \ - m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2; \ - m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2; \ - m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2; \ +#define SetHandlerXYZ(P, auto_flush) \ + m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2; \ + m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2; \ + m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2; \ + m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2; \ + m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2; \ + m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2; \ + m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2; \ + m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2; \ m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2; \ m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2; diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 993c85a5fc..6d9c6b7e11 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -176,15 +176,15 @@ protected: struct { - GSVertex* buff; + GSVertex* buff; size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1 size_t xy_tail; uint64 xy[4]; - } m_vertex; + } m_vertex; struct { - uint32* buff; + uint32* buff; size_t tail; } m_index; @@ -195,7 +195,7 @@ protected: void GrowVertexBuffer(); - template + template void VertexKick(uint32 skip); // following functions need m_vt to be initialized @@ -269,16 +269,19 @@ public: void InitReadFIFO(uint8* mem, int len); void SoftReset(uint32 mask); - void WriteCSR(uint32 csr) {m_regs->CSR.u32[1] = csr;} + void WriteCSR(uint32 csr) { m_regs->CSR.u32[1] = csr; } void ReadFIFO(uint8* mem, int size); template void Transfer(const uint8* mem, uint32 size); int Freeze(GSFreezeData* fd, bool sizeonly); int Defrost(const GSFreezeData* fd); - void GetLastTag(uint32* tag) {*tag = m_path3hack; m_path3hack = 0;} + void GetLastTag(uint32* tag) + { + *tag = m_path3hack; + m_path3hack = 0; + } virtual void SetGameCRC(uint32 crc, int options); void SetFrameSkip(int skip); void SetRegsMem(uint8* basemem); void SetIrqCallback(void (*irq)()); void SetMultithreaded(bool mt = true); }; - diff --git a/plugins/GSdx/GSThread.h b/plugins/GSdx/GSThread.h index 9594a793f3..cfe390dcb3 100644 --- a/plugins/GSdx/GSThread.h +++ b/plugins/GSdx/GSThread.h @@ -72,7 +72,7 @@ public: // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/4th-gen-core-family-desktop-specification-update.pdf) // This can cause builds for AVX2 CPUs to fail with GCC/Clang on Linux, // so check that the RTM instructions are actually available. - #if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) +#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) int nretries = 0; @@ -104,7 +104,7 @@ public: } } - #endif +#endif fallBackLock.lock(); } @@ -115,12 +115,12 @@ public: { fallBackLock.unlock(); } - #if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) +#if (_M_SSE >= 0x501 && !defined(__GNUC__)) || defined(__RTM__) else { _xend(); } - #endif +#endif } }; #endif diff --git a/plugins/GSdx/GSThread_CXX11.h b/plugins/GSdx/GSThread_CXX11.h index 5fa266fb04..00537ee4b4 100644 --- a/plugins/GSdx/GSThread_CXX11.h +++ b/plugins/GSdx/GSThread_CXX11.h @@ -24,7 +24,8 @@ #include "GSdx.h" #include "Utilities/boost_spsc_queue.hpp" -template class GSJobQueue final +template +class GSJobQueue final { private: std::thread m_thread; @@ -37,12 +38,15 @@ private: std::condition_variable m_empty; std::condition_variable m_notempty; - void ThreadProc() { + void ThreadProc() + { std::unique_lock l(m_lock); - while (true) { + while (true) + { - while (m_queue.empty()) { + while (m_queue.empty()) + { if (m_exit) return; @@ -64,9 +68,9 @@ private: } public: - GSJobQueue(std::function func) : - m_func(func), - m_exit(false) + GSJobQueue(std::function func) + : m_func(func) + , m_exit(false) { m_thread = std::thread(&GSJobQueue::ThreadProc, this); } @@ -87,8 +91,9 @@ public: return m_queue.empty(); } - void Push(const T& item) { - while(!m_queue.push(item)) + void Push(const T& item) + { + while (!m_queue.push(item)) std::this_thread::yield(); { @@ -109,7 +114,8 @@ public: assert(IsEmpty()); } - void operator() (T& item) { + void operator()(T& item) + { m_func(item); } }; diff --git a/plugins/GSdx/GSUtil.cpp b/plugins/GSdx/GSUtil.cpp index 08f8a352cd..7e138abb75 100644 --- a/plugins/GSdx/GSUtil.cpp +++ b/plugins/GSdx/GSUtil.cpp @@ -128,7 +128,7 @@ public: memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField)); - for(int i = 0; i < 64; i++) + for (int i = 0; i < 64; i++) { CompatibleBitsField[i][i >> 5] |= 1 << (i & 0x1f); } @@ -211,7 +211,8 @@ bool GSUtil::CheckSSE() { bool status = true; - struct ISA { + struct ISA + { Xbyak::util::Cpu::Type type; const char* name; }; @@ -228,8 +229,10 @@ bool GSUtil::CheckSSE() #endif }; - for (size_t i = 0; i < countof(checks); i++) { - if(!g_cpu.has(checks[i].type)) { + for (size_t i = 0; i < countof(checks); i++) + { + if (!g_cpu.has(checks[i].type)) + { fprintf(stderr, "This CPU does not support %s\n", checks[i].name); status = false; @@ -284,12 +287,12 @@ bool GSUtil::CheckD3D11() return s_D3D11 > 0; } -D3D_FEATURE_LEVEL GSUtil::CheckDirect3D11Level(IDXGIAdapter *adapter, D3D_DRIVER_TYPE type) +D3D_FEATURE_LEVEL GSUtil::CheckDirect3D11Level(IDXGIAdapter* adapter, D3D_DRIVER_TYPE type) { HRESULT hr; D3D_FEATURE_LEVEL level; - if(!CheckD3D11()) + if (!CheckD3D11()) return (D3D_FEATURE_LEVEL)0; hr = D3D11CreateDevice(adapter, type, NULL, 0, NULL, 0, D3D11_SDK_VERSION, NULL, &level, NULL); @@ -323,9 +326,11 @@ GSRendererType GSUtil::GetBestRenderer() #ifdef _WIN32 void GSmkdir(const wchar_t* dir) { - if (!CreateDirectory(dir, nullptr)) { + if (!CreateDirectory(dir, nullptr)) + { DWORD errorID = ::GetLastError(); - if (errorID != ERROR_ALREADY_EXISTS) { + if (errorID != ERROR_ALREADY_EXISTS) + { fprintf(stderr, "Failed to create directory: %ls error %u\n", dir, errorID); } } @@ -355,7 +360,8 @@ std::string GStempdir() const char* psm_str(int psm) { - switch(psm) { + switch (psm) + { // Normal color case PSM_PSMCT32: return "C_32"; case PSM_PSMCT24: return "C_24"; @@ -375,7 +381,7 @@ const char* psm_str(int psm) case PSM_PSMZ16: return "Z_16"; case PSM_PSMZ16S: return "Z_16S"; - case PSM_PSGPU24: return "PS24"; + case PSM_PSGPU24: return "PS24"; default:break; } diff --git a/plugins/GSdx/GSUtil.h b/plugins/GSdx/GSUtil.h index 0963600c30..ccbb699a8e 100644 --- a/plugins/GSdx/GSUtil.h +++ b/plugins/GSdx/GSUtil.h @@ -48,7 +48,7 @@ public: static bool CheckDXGI(); static bool CheckD3D11(); static GSRendererType GetBestRenderer(); - static D3D_FEATURE_LEVEL CheckDirect3D11Level(IDXGIAdapter *adapter = NULL, D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE); + static D3D_FEATURE_LEVEL CheckDirect3D11Level(IDXGIAdapter* adapter = NULL, D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_HARDWARE); #endif }; diff --git a/plugins/GSdx/GSVector.cpp b/plugins/GSdx/GSVector.cpp index c238af0ab6..0fb2c6292a 100644 --- a/plugins/GSdx/GSVector.cpp +++ b/plugins/GSdx/GSVector.cpp @@ -168,23 +168,25 @@ GSVector4i GSVector4i::fit(int arx, int ary) const { GSVector4i r = *this; - if(arx > 0 && ary > 0) + if (arx > 0 && ary > 0) { int w = width(); int h = height(); - if(w * ary > h * arx) + if (w * ary > h * arx) { w = h * arx / ary; r.left = (r.left + r.right - w) >> 1; - if(r.left & 1) r.left++; + if (r.left & 1) + r.left++; r.right = r.left + w; } else { h = w * ary / arx; r.top = (r.top + r.bottom - h) >> 1; - if(r.top & 1) r.top++; + if (r.top & 1) + r.top++; r.bottom = r.top + h; } @@ -204,7 +206,7 @@ GSVector4i GSVector4i::fit(int preset) const { GSVector4i r; - if(preset > 0 && preset < (int)countof(s_ar)) + if (preset > 0 && preset < (int)countof(s_ar)) { r = fit(s_ar[preset][0], s_ar[preset][1]); } diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index d5e2dfd350..a6989af29e 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -41,14 +41,15 @@ enum Round_Mode #pragma pack(push, 1) -template class GSVector2T +template +class GSVector2T { public: union { - struct {T x, y;}; - struct {T r, g;}; - struct {T v[2];}; + struct { T x, y; }; + struct { T r, g; }; + struct { T v[2]; }; }; GSVector2T() @@ -67,12 +68,12 @@ public: this->y = y; } - bool operator == (const GSVector2T& v) const + bool operator==(const GSVector2T& v) const { return x == v.x && y == v.y; } - bool operator != (const GSVector2T& v) const + bool operator!=(const GSVector2T& v) const { return x != v.x || y != v.y; } diff --git a/plugins/GSdx/GSVector4.h b/plugins/GSdx/GSVector4.h index fb0bea36f6..e4c0e14ffb 100644 --- a/plugins/GSdx/GSVector4.h +++ b/plugins/GSdx/GSVector4.h @@ -52,9 +52,9 @@ class alignas(16) GSVector4 public: union { - struct {float x, y, z, w;}; - struct {float r, g, b, a;}; - struct {float left, top, right, bottom;}; + struct { float x, y, z, w; }; + struct { float r, g, b, a; }; + struct { float left, top, right, bottom; }; float v[4]; float f32[4]; int8 i8[16]; @@ -147,19 +147,19 @@ public: __forceinline explicit GSVector4(int i) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 m = _mm_cvtepi32_ps(_mm_broadcastd_epi32(_mm_cvtsi32_si128(i))); - #else +#else GSVector4i v((int)i); *this = GSVector4(v); - #endif +#endif } - + __forceinline explicit GSVector4(uint32 u) { GSVector4i v((int)u); @@ -171,37 +171,37 @@ public: __forceinline static GSVector4 cast(const GSVector4i& v); - #if _M_SSE >= 0x500 +#if _M_SSE >= 0x500 __forceinline static GSVector4 cast(const GSVector8& v); - #endif +#endif - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 __forceinline static GSVector4 cast(const GSVector8i& v); - #endif +#endif - __forceinline void operator = (const GSVector4& v) + __forceinline void operator=(const GSVector4& v) { m = v.m; } - __forceinline void operator = (float f) + __forceinline void operator=(float f) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 - m = _mm_broadcastss_ps(_mm_load_ss(&f)); + m = _mm_broadcastss_ps(_mm_load_ss(&f)); - #else +#else m = _mm_set1_ps(f); - #endif +#endif } - __forceinline void operator = (__m128 m) + __forceinline void operator=(__m128 m) { this->m = m; } @@ -248,7 +248,8 @@ public: return (v + v) - (v * v) * *this; } - template __forceinline GSVector4 round() const + template + __forceinline GSVector4 round() const { return GSVector4(_mm_round_ps(m, mode)); } @@ -265,18 +266,18 @@ public: // http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html - #define LOG_POLY0(x, c0) GSVector4(c0) - #define LOG_POLY1(x, c0, c1) (LOG_POLY0(x, c1).madd(x, GSVector4(c0))) - #define LOG_POLY2(x, c0, c1, c2) (LOG_POLY1(x, c1, c2).madd(x, GSVector4(c0))) - #define LOG_POLY3(x, c0, c1, c2, c3) (LOG_POLY2(x, c1, c2, c3).madd(x, GSVector4(c0))) - #define LOG_POLY4(x, c0, c1, c2, c3, c4) (LOG_POLY3(x, c1, c2, c3, c4).madd(x, GSVector4(c0))) - #define LOG_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector4(c0))) +#define LOG_POLY0(x, c0) GSVector4(c0) +#define LOG_POLY1(x, c0, c1) (LOG_POLY0(x, c1).madd(x, GSVector4(c0))) +#define LOG_POLY2(x, c0, c1, c2) (LOG_POLY1(x, c1, c2).madd(x, GSVector4(c0))) +#define LOG_POLY3(x, c0, c1, c2, c3) (LOG_POLY2(x, c1, c2, c3).madd(x, GSVector4(c0))) +#define LOG_POLY4(x, c0, c1, c2, c3, c4) (LOG_POLY3(x, c1, c2, c3, c4).madd(x, GSVector4(c0))) +#define LOG_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector4(c0))) __forceinline GSVector4 log2(int precision = 5) const { // NOTE: sign bit ignored, safe to pass negative numbers - // The idea behind this algorithm is to split the float into two parts, log2(m * 2^e) => log2(m) + log2(2^e) => log2(m) + e, + // The idea behind this algorithm is to split the float into two parts, log2(m * 2^e) => log2(m) + log2(2^e) => log2(m) + e, // and then approximate the logarithm of the mantissa (it's 1.x when normalized, a nice short range). GSVector4 one = m_one; @@ -290,21 +291,21 @@ public: // Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ - switch(precision) + switch (precision) { - case 3: - p = LOG_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); - break; - case 4: - p = LOG_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); - break; - default: - case 5: - p = LOG_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); - break; - case 6: - p = LOG_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f); - break; + case 3: + p = LOG_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); + break; + case 4: + p = LOG_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); + break; + default: + case 5: + p = LOG_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); + break; + case 6: + p = LOG_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f); + break; } // This effectively increases the polynomial degree by one, but ensures that log2(1) == 0 @@ -316,54 +317,54 @@ public: __forceinline GSVector4 madd(const GSVector4& a, const GSVector4& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector4(_mm_fmadd_ps(m, a, b)); - - #else - + +#else + return *this * a + b; - - #endif + +#endif } __forceinline GSVector4 msub(const GSVector4& a, const GSVector4& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector4(_mm_fmsub_ps(m, a, b)); - - #else - + +#else + return *this * a - b; - - #endif + +#endif } __forceinline GSVector4 nmadd(const GSVector4& a, const GSVector4& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector4(_mm_fnmadd_ps(m, a, b)); - - #else - + +#else + return b - *this * a; - - #endif + +#endif } __forceinline GSVector4 nmsub(const GSVector4& a, const GSVector4& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector4(_mm_fnmsub_ps(m, a, b)); - - #else + +#else return -b - *this * a; - #endif +#endif } __forceinline GSVector4 addm(const GSVector4& a, const GSVector4& b) const @@ -396,7 +397,8 @@ public: return GSVector4(_mm_hsub_ps(m, v.m)); } - template __forceinline GSVector4 dp(const GSVector4& v) const + template + __forceinline GSVector4 dp(const GSVector4& v) const { return GSVector4(_mm_dp_ps(m, v.m, i)); } @@ -431,12 +433,13 @@ public: return GSVector4(_mm_max_ps(m, a)); } - template __forceinline GSVector4 blend32(const GSVector4& a) const + template + __forceinline GSVector4 blend32(const GSVector4& a) const { return GSVector4(_mm_blend_ps(m, a, mask)); } - __forceinline GSVector4 blend32(const GSVector4& a, const GSVector4& mask) const + __forceinline GSVector4 blend32(const GSVector4& a, const GSVector4& mask) const { return GSVector4(_mm_blendv_ps(m, a, mask)); } @@ -488,7 +491,7 @@ public: __forceinline bool allfalse() const { - #if _M_SSE >= 0x500 +#if _M_SSE >= 0x500 return _mm_testz_ps(m, m) != 0; @@ -498,7 +501,7 @@ public: return _mm_testz_si128(a, a) != 0; - #endif +#endif } __forceinline GSVector4 replace_nan(const GSVector4& v) const @@ -506,65 +509,66 @@ public: return v.blend32(*this, *this == *this); } - template __forceinline GSVector4 insert32(const GSVector4& v) const + template + __forceinline GSVector4 insert32(const GSVector4& v) const { // TODO: use blendps when src == dst - #if 0 // _M_SSE >= 0x401 +#if 0 // _M_SSE >= 0x401 // NOTE: it's faster with shuffles... return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0))); - #else +#else - switch(dst) + switch (dst) { - case 0: - switch(src) - { - case 0: return yyxx(v).zxzw(*this); - case 1: return yyyy(v).zxzw(*this); - case 2: return yyzz(v).zxzw(*this); - case 3: return yyww(v).zxzw(*this); - default: __assume(0); - } - break; - case 1: - switch(src) - { - case 0: return xxxx(v).xzzw(*this); - case 1: return xxyy(v).xzzw(*this); - case 2: return xxzz(v).xzzw(*this); - case 3: return xxww(v).xzzw(*this); - default: __assume(0); - } - break; - case 2: - switch(src) - { - case 0: return xyzx(wwxx(v)); - case 1: return xyzx(wwyy(v)); - case 2: return xyzx(wwzz(v)); - case 3: return xyzx(wwww(v)); - default: __assume(0); - } - break; - case 3: - switch(src) - { - case 0: return xyxz(zzxx(v)); - case 1: return xyxz(zzyy(v)); - case 2: return xyxz(zzzz(v)); - case 3: return xyxz(zzww(v)); - default: __assume(0); - } - break; - default: - __assume(0); + case 0: + switch (src) + { + case 0: return yyxx(v).zxzw(*this); + case 1: return yyyy(v).zxzw(*this); + case 2: return yyzz(v).zxzw(*this); + case 3: return yyww(v).zxzw(*this); + default: __assume(0); + } + break; + case 1: + switch (src) + { + case 0: return xxxx(v).xzzw(*this); + case 1: return xxyy(v).xzzw(*this); + case 2: return xxzz(v).xzzw(*this); + case 3: return xxww(v).xzzw(*this); + default: __assume(0); + } + break; + case 2: + switch (src) + { + case 0: return xyzx(wwxx(v)); + case 1: return xyzx(wwyy(v)); + case 2: return xyzx(wwzz(v)); + case 3: return xyzx(wwww(v)); + default: __assume(0); + } + break; + case 3: + switch (src) + { + case 0: return xyxz(zzxx(v)); + case 1: return xyxz(zzyy(v)); + case 2: return xyxz(zzzz(v)); + case 3: return xyxz(zzww(v)); + default: __assume(0); + } + break; + default: + __assume(0); } - #endif +#endif } #ifdef __linux__ @@ -582,12 +586,14 @@ GSVector.h:2973:15: error: shadows template parm 'int i' template __forceinline int extract32() const #endif - template __forceinline int extract32() const + template + __forceinline int extract32() const { return _mm_extract_ps(m, index); } #else - template __forceinline int extract32() const + template + __forceinline int extract32() const { return _mm_extract_ps(m, i); } @@ -630,7 +636,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i' return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31))); } - template __forceinline static GSVector4 load(const void* p) + template + __forceinline static GSVector4 load(const void* p) { return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p)); } @@ -650,10 +657,13 @@ GSVector.h:2973:15: error: shadows template parm 'int i' _mm_storeh_pd((double*)p, _mm_castps_pd(v.m)); } - template __forceinline static void store(void* p, const GSVector4& v) + template + __forceinline static void store(void* p, const GSVector4& v) { - if(aligned) _mm_store_ps((float*)p, v.m); - else _mm_storeu_ps((float*)p, v.m); + if (aligned) + _mm_store_ps((float*)p, v.m); + else + _mm_storeu_ps((float*)p, v.m); } __forceinline static void store(float* p, const GSVector4& v) @@ -710,156 +720,157 @@ GSVector.h:2973:15: error: shadows template parm 'int i' b = v2.h2l(v0); c = v1.l2h(v3); d = v3.h2l(v1); -*/ } +*/ + } - __forceinline GSVector4 operator - () const + __forceinline GSVector4 operator-() const { return neg(); } - __forceinline void operator += (const GSVector4& v) + __forceinline void operator+=(const GSVector4& v) { m = _mm_add_ps(m, v); } - __forceinline void operator -= (const GSVector4& v) + __forceinline void operator-=(const GSVector4& v) { m = _mm_sub_ps(m, v); } - __forceinline void operator *= (const GSVector4& v) + __forceinline void operator*=(const GSVector4& v) { m = _mm_mul_ps(m, v); } - __forceinline void operator /= (const GSVector4& v) + __forceinline void operator/=(const GSVector4& v) { m = _mm_div_ps(m, v); } - __forceinline void operator += (float f) + __forceinline void operator+=(float f) { *this += GSVector4(f); } - __forceinline void operator -= (float f) + __forceinline void operator-=(float f) { *this -= GSVector4(f); } - __forceinline void operator *= (float f) + __forceinline void operator*=(float f) { *this *= GSVector4(f); } - __forceinline void operator /= (float f) + __forceinline void operator/=(float f) { *this /= GSVector4(f); } - __forceinline void operator &= (const GSVector4& v) + __forceinline void operator&=(const GSVector4& v) { m = _mm_and_ps(m, v); } - __forceinline void operator |= (const GSVector4& v) + __forceinline void operator|=(const GSVector4& v) { m = _mm_or_ps(m, v); } - __forceinline void operator ^= (const GSVector4& v) + __forceinline void operator^=(const GSVector4& v) { m = _mm_xor_ps(m, v); } - __forceinline friend GSVector4 operator + (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator+(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_add_ps(v1, v2)); } - __forceinline friend GSVector4 operator - (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator-(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_sub_ps(v1, v2)); } - __forceinline friend GSVector4 operator * (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator*(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_mul_ps(v1, v2)); } - __forceinline friend GSVector4 operator / (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator/(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_div_ps(v1, v2)); } - __forceinline friend GSVector4 operator + (const GSVector4& v, float f) + __forceinline friend GSVector4 operator+(const GSVector4& v, float f) { return v + GSVector4(f); } - __forceinline friend GSVector4 operator - (const GSVector4& v, float f) + __forceinline friend GSVector4 operator-(const GSVector4& v, float f) { return v - GSVector4(f); } - __forceinline friend GSVector4 operator * (const GSVector4& v, float f) + __forceinline friend GSVector4 operator*(const GSVector4& v, float f) { return v * GSVector4(f); } - __forceinline friend GSVector4 operator / (const GSVector4& v, float f) + __forceinline friend GSVector4 operator/(const GSVector4& v, float f) { return v / GSVector4(f); } - __forceinline friend GSVector4 operator & (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator&(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_and_ps(v1, v2)); } - __forceinline friend GSVector4 operator | (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator|(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_or_ps(v1, v2)); } - __forceinline friend GSVector4 operator ^ (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator^(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_xor_ps(v1, v2)); } - __forceinline friend GSVector4 operator == (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator==(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_cmpeq_ps(v1, v2)); } - __forceinline friend GSVector4 operator != (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator!=(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_cmpneq_ps(v1, v2)); } - __forceinline friend GSVector4 operator > (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator>(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_cmpgt_ps(v1, v2)); } - __forceinline friend GSVector4 operator < (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator<(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_cmplt_ps(v1, v2)); } - __forceinline friend GSVector4 operator >= (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator>=(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_cmpge_ps(v1, v2)); } - __forceinline friend GSVector4 operator <= (const GSVector4& v1, const GSVector4& v2) + __forceinline friend GSVector4 operator<=(const GSVector4& v1, const GSVector4& v2) { return GSVector4(_mm_cmple_ps(v1, v2)); } #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ - __forceinline GSVector4 xs##ys##zs##ws() const {return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector4 xs##ys##zs##ws(const GSVector4& v) const {return GSVector4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ + __forceinline GSVector4 xs##ys##zs##ws() const { return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn))); } \ + __forceinline GSVector4 xs##ys##zs##ws(const GSVector4& v) const { return GSVector4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn))); } \ #define VECTOR4_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ @@ -884,7 +895,7 @@ GSVector.h:2973:15: error: shadows template parm 'int i' VECTOR4_SHUFFLE_1(z, 2) VECTOR4_SHUFFLE_1(w, 3) - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 __forceinline GSVector4 broadcast32() const { @@ -901,5 +912,5 @@ GSVector.h:2973:15: error: shadows template parm 'int i' return GSVector4(_mm_broadcastss_ps(_mm_load_ss((const float*)f))); } - #endif +#endif }; diff --git a/plugins/GSdx/GSVector4i.h b/plugins/GSdx/GSVector4i.h index e3c6dc5808..03f731aae1 100644 --- a/plugins/GSdx/GSVector4i.h +++ b/plugins/GSdx/GSVector4i.h @@ -54,15 +54,15 @@ class alignas(16) GSVector4i public: union { - struct {int x, y, z, w;}; - struct {int r, g, b, a;}; - struct {int left, top, right, bottom;}; + struct { int x, y, z, w; }; + struct { int r, g, b, a; }; + struct { int left, top, right, bottom; }; int v[4]; float f32[4]; int8 i8[16]; int16 i16[8]; int32 i32[4]; - int64 i64[2]; + int64 i64[2]; uint8 u8[16]; uint16 u16[8]; uint32 u32[4]; @@ -70,7 +70,8 @@ public: __m128i m; }; - __forceinline constexpr GSVector4i(): x(0), y(0), z(0), w(0) + __forceinline constexpr GSVector4i() + : x(0), y(0), z(0), w(0) { } @@ -138,37 +139,37 @@ public: __forceinline static GSVector4i cast(const GSVector4& v); - #if _M_SSE >= 0x500 +#if _M_SSE >= 0x500 __forceinline static GSVector4i cast(const GSVector8& v); - #endif +#endif - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 __forceinline static GSVector4i cast(const GSVector8i& v); - #endif +#endif - __forceinline void operator = (const GSVector4i& v) + __forceinline void operator=(const GSVector4i& v) { m = v.m; } - __forceinline void operator = (int i) + __forceinline void operator=(int i) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 m = _mm_broadcastd_epi32(_mm_cvtsi32_si128(i)); - #else +#else m = _mm_set1_epi32(i); - #endif +#endif } - __forceinline void operator = (__m128i m) + __forceinline void operator=(__m128i m) { this->m = m; } @@ -209,17 +210,17 @@ public: { int i = (upl64(a) < uph64(a)).mask(); - if(i == 0xffff) + if (i == 0xffff) { return runion_ordered(a); } - if((i & 0x00ff) == 0x00ff) + if ((i & 0x00ff) == 0x00ff) { return *this; } - if((i & 0xff00) == 0xff00) + if ((i & 0xff00) == 0xff00) { return a; } @@ -237,7 +238,8 @@ public: return sat_i32(a); } - template __forceinline GSVector4i ralign(const GSVector2i& a) const + template + __forceinline GSVector4i ralign(const GSVector2i& a) const { // a must be 1 << n @@ -245,13 +247,13 @@ public: GSVector4i v; - switch(mode) + switch (mode) { - case Align_Inside: v = *this + mask; break; - case Align_Outside: v = *this + mask.zwxy(); break; - case Align_NegInf: v = *this; break; - case Align_PosInf: v = *this + mask.zwzw(); break; - default: ASSERT(0); break; + case Align_Inside: v = *this + mask; break; + case Align_Outside: v = *this + mask.zwxy(); break; + case Align_NegInf: v = *this; break; + case Align_PosInf: v = *this + mask.zwzw(); break; + default: ASSERT(0); break; } return v.andnot(mask.xyxy()); @@ -261,7 +263,7 @@ public: GSVector4i fit(int preset) const; - #ifdef _WIN32 +#ifdef _WIN32 __forceinline operator LPCRECT() const { @@ -273,7 +275,7 @@ public: return (LPRECT)this; } - #endif +#endif // @@ -409,7 +411,7 @@ public: __forceinline static int min_i16(int a, int b) { - return store(load(a).min_i16(load(b))); + return store(load(a).min_i16(load(b))); } __forceinline GSVector4i clamp8() const @@ -422,19 +424,21 @@ public: return GSVector4i(_mm_blendv_epi8(m, a, mask)); } - template __forceinline GSVector4i blend16(const GSVector4i& a) const + template + __forceinline GSVector4i blend16(const GSVector4i& a) const { return GSVector4i(_mm_blend_epi16(m, a, mask)); } - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 - template __forceinline GSVector4i blend32(const GSVector4i& v) const + template + __forceinline GSVector4i blend32(const GSVector4i& v) const { return GSVector4i(_mm_blend_epi32(m, v.m, mask)); } - #endif +#endif __forceinline GSVector4i blend(const GSVector4i& a, const GSVector4i& mask) const { @@ -533,15 +537,15 @@ public: __forceinline GSVector4i upl8() const { - #if 0 // _M_SSE >= 0x401 // TODO: compiler bug +#if 0 // _M_SSE >= 0x401 // TODO: compiler bug return GSVector4i(_mm_cvtepu8_epi16(m)); - #else +#else return GSVector4i(_mm_unpacklo_epi8(m, _mm_setzero_si128())); - #endif +#endif } __forceinline GSVector4i uph8() const @@ -551,15 +555,15 @@ public: __forceinline GSVector4i upl16() const { - #if 0 //_M_SSE >= 0x401 // TODO: compiler bug +#if 0 //_M_SSE >= 0x401 // TODO: compiler bug return GSVector4i(_mm_cvtepu16_epi32(m)); - #else +#else return GSVector4i(_mm_unpacklo_epi16(m, _mm_setzero_si128())); - #endif +#endif } __forceinline GSVector4i uph16() const @@ -569,15 +573,15 @@ public: __forceinline GSVector4i upl32() const { - #if 0 //_M_SSE >= 0x401 // TODO: compiler bug +#if 0 //_M_SSE >= 0x401 // TODO: compiler bug return GSVector4i(_mm_cvtepu32_epi64(m)); - #else +#else return GSVector4i(_mm_unpacklo_epi32(m, _mm_setzero_si128())); - #endif +#endif } __forceinline GSVector4i uph32() const @@ -660,17 +664,20 @@ public: return GSVector4i(_mm_cvtepu32_epi64(m)); } - template __forceinline GSVector4i srl() const + template + __forceinline GSVector4i srl() const { return GSVector4i(_mm_srli_si128(m, i)); } - template __forceinline GSVector4i srl(const GSVector4i& v) + template + __forceinline GSVector4i srl(const GSVector4i& v) { return GSVector4i(_mm_alignr_epi8(v.m, m, i)); } - template __forceinline GSVector4i sll() const + template + __forceinline GSVector4i sll() const { return GSVector4i(_mm_slli_si128(m, i)); } @@ -874,21 +881,24 @@ public: return GSVector4i(_mm_madd_epi16(m, v.m)); } - template __forceinline GSVector4i lerp16(const GSVector4i& a, const GSVector4i& f) const + template + __forceinline GSVector4i lerp16(const GSVector4i& a, const GSVector4i& f) const { // (a - this) * f << shift + this return add16(a.sub16(*this).modulate16(f)); } - template __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c) + template + __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c) { // (a - b) * c << shift return a.sub16(b).modulate16(c); } - template __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c, const GSVector4i& d) + template + __forceinline static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c, const GSVector4i& d) { // (a - b) * c << shift + d @@ -902,18 +912,19 @@ public: return add16(a.sub16(*this).mul16l(f).sra16(4)); } - template __forceinline GSVector4i modulate16(const GSVector4i& f) const + template + __forceinline GSVector4i modulate16(const GSVector4i& f) const { // a * f << shift - #if _M_SSE >= 0x301 +#if _M_SSE >= 0x301 - if(shift == 0) + if (shift == 0) { return mul16hrs(f); } - #endif +#endif return sll16(shift + 1).mul16hs(f); } @@ -921,9 +932,9 @@ public: __forceinline bool eq(const GSVector4i& v) const { // pxor, ptest, je - + GSVector4i t = *this ^ v; - + return _mm_testz_si128(t, t) != 0; } @@ -1007,56 +1018,66 @@ public: return _mm_testz_si128(m, m) != 0; } - template __forceinline GSVector4i insert8(int a) const + template + __forceinline GSVector4i insert8(int a) const { return GSVector4i(_mm_insert_epi8(m, a, i)); } - template __forceinline int extract8() const + template + __forceinline int extract8() const { return _mm_extract_epi8(m, i); } - template __forceinline GSVector4i insert16(int a) const + template + __forceinline GSVector4i insert16(int a) const { return GSVector4i(_mm_insert_epi16(m, a, i)); } - template __forceinline int extract16() const + template + __forceinline int extract16() const { return _mm_extract_epi16(m, i); } - template __forceinline GSVector4i insert32(int a) const + template + __forceinline GSVector4i insert32(int a) const { return GSVector4i(_mm_insert_epi32(m, a, i)); } - template __forceinline int extract32() const + template + __forceinline int extract32() const { - if(i == 0) return GSVector4i::store(*this); + if (i == 0) + return GSVector4i::store(*this); return _mm_extract_epi32(m, i); } - #ifdef _M_AMD64 +#ifdef _M_AMD64 - template __forceinline GSVector4i insert64(int64 a) const + template + __forceinline GSVector4i insert64(int64 a) const { return GSVector4i(_mm_insert_epi64(m, a, i)); } - template __forceinline int64 extract64() const + template + __forceinline int64 extract64() const { - if(i == 0) return GSVector4i::storeq(*this); + if (i == 0) + return GSVector4i::storeq(*this); return _mm_extract_epi64(m, i); } - #endif +#endif - - template __forceinline GSVector4i gather8_4(const T* ptr) const + template + __forceinline GSVector4i gather8_4(const T* ptr) const { GSVector4i v; @@ -1080,7 +1101,8 @@ public: return v; } - template __forceinline GSVector4i gather8_8(const T* ptr) const + template + __forceinline GSVector4i gather8_8(const T* ptr) const { GSVector4i v; @@ -1104,7 +1126,8 @@ public: return v; } - template __forceinline GSVector4i gather8_16(const T* ptr, const GSVector4i& a) const + template + __forceinline GSVector4i gather8_16(const T* ptr, const GSVector4i& a) const { GSVector4i v = a; @@ -1120,7 +1143,8 @@ public: return v; } - template __forceinline GSVector4i gather8_32(const T* ptr, const GSVector4i& a) const + template + __forceinline GSVector4i gather8_32(const T* ptr, const GSVector4i& a) const { GSVector4i v = a; @@ -1132,7 +1156,8 @@ public: return v; } - template __forceinline GSVector4i gather16_4(const T* ptr) const + template + __forceinline GSVector4i gather16_4(const T* ptr) const { GSVector4i v; @@ -1148,7 +1173,8 @@ public: return v; } - template __forceinline GSVector4i gather16_8(const T* ptr) const + template + __forceinline GSVector4i gather16_8(const T* ptr) const { GSVector4i v; @@ -1164,7 +1190,8 @@ public: return v; } - template__forceinline GSVector4i gather16_16(const T* ptr) const + template + __forceinline GSVector4i gather16_16(const T* ptr) const { GSVector4i v; @@ -1180,7 +1207,8 @@ public: return v; } - template__forceinline GSVector4i gather16_16(const T1* ptr1, const T2* ptr2) const + template + __forceinline GSVector4i gather16_16(const T1* ptr1, const T2* ptr2) const { GSVector4i v; @@ -1196,7 +1224,8 @@ public: return v; } - template __forceinline GSVector4i gather16_32(const T* ptr, const GSVector4i& a) const + template + __forceinline GSVector4i gather16_32(const T* ptr, const GSVector4i& a) const { GSVector4i v = a; @@ -1208,7 +1237,8 @@ public: return v; } - template __forceinline GSVector4i gather32_4(const T* ptr) const + template + __forceinline GSVector4i gather32_4(const T* ptr) const { GSVector4i v; @@ -1219,7 +1249,8 @@ public: return v; } - template __forceinline GSVector4i gather32_8(const T* ptr) const + template + __forceinline GSVector4i gather32_8(const T* ptr) const { GSVector4i v; @@ -1231,7 +1262,8 @@ public: return v; } - template __forceinline GSVector4i gather32_16(const T* ptr) const + template + __forceinline GSVector4i gather32_16(const T* ptr) const { GSVector4i v; @@ -1243,7 +1275,8 @@ public: return v; } - template __forceinline GSVector4i gather32_32(const T* ptr) const + template + __forceinline GSVector4i gather32_32(const T* ptr) const { GSVector4i v; @@ -1255,7 +1288,8 @@ public: return v; } - template __forceinline GSVector4i gather32_32(const T1* ptr1, const T2* ptr2) const + template + __forceinline GSVector4i gather32_32(const T1* ptr1, const T2* ptr2) const { GSVector4i v; @@ -1267,9 +1301,10 @@ public: return v; } - #if defined(_M_AMD64) +#if defined(_M_AMD64) - template __forceinline GSVector4i gather64_4(const T* ptr) const + template + __forceinline GSVector4i gather64_4(const T* ptr) const { GSVector4i v; @@ -1279,7 +1314,8 @@ public: return v; } - template __forceinline GSVector4i gather64_8(const T* ptr) const + template + __forceinline GSVector4i gather64_8(const T* ptr) const { GSVector4i v; @@ -1289,7 +1325,8 @@ public: return v; } - template __forceinline GSVector4i gather64_16(const T* ptr) const + template + __forceinline GSVector4i gather64_16(const T* ptr) const { GSVector4i v; @@ -1299,7 +1336,8 @@ public: return v; } - template __forceinline GSVector4i gather64_32(const T* ptr) const + template + __forceinline GSVector4i gather64_32(const T* ptr) const { GSVector4i v; @@ -1309,7 +1347,8 @@ public: return v; } - template __forceinline GSVector4i gather64_64(const T* ptr) const + template + __forceinline GSVector4i gather64_64(const T* ptr) const { GSVector4i v; @@ -1319,9 +1358,10 @@ public: return v; } - #else +#else - template __forceinline GSVector4i gather64_4(const T* ptr) const + template + __forceinline GSVector4i gather64_4(const T* ptr) const { GSVector4i v; @@ -1330,7 +1370,8 @@ public: return v; } - template __forceinline GSVector4i gather64_8(const T* ptr) const + template + __forceinline GSVector4i gather64_8(const T* ptr) const { GSVector4i v; @@ -1339,7 +1380,8 @@ public: return v; } - template __forceinline GSVector4i gather64_16(const T* ptr) const + template + __forceinline GSVector4i gather64_16(const T* ptr) const { GSVector4i v; @@ -1348,7 +1390,8 @@ public: return v; } - template __forceinline GSVector4i gather64_32(const T* ptr) const + template + __forceinline GSVector4i gather64_32(const T* ptr) const { GSVector4i v; @@ -1357,9 +1400,10 @@ public: return v; } - #endif +#endif - template __forceinline void gather8_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather8_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather8_4<0>(ptr); dst[1] = gather8_4<8>(ptr); @@ -1370,7 +1414,8 @@ public: dst[0] = gather8_8<>(ptr); } - template __forceinline void gather16_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather16_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather16_4<0>(ptr); dst[1] = gather16_4<4>(ptr); @@ -1378,18 +1423,21 @@ public: dst[3] = gather16_4<12>(ptr); } - template __forceinline void gather16_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather16_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather16_8<0>(ptr); dst[1] = gather16_8<8>(ptr); } - template __forceinline void gather16_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather16_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather16_16<>(ptr); } - template __forceinline void gather32_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather32_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather32_4<0>(ptr); dst[1] = gather32_4<2>(ptr); @@ -1401,7 +1449,8 @@ public: dst[7] = gather32_4<14>(ptr); } - template __forceinline void gather32_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather32_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather32_8<0>(ptr); dst[1] = gather32_8<4>(ptr); @@ -1409,18 +1458,21 @@ public: dst[3] = gather32_8<12>(ptr); } - template __forceinline void gather32_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather32_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather32_16<0>(ptr); dst[1] = gather32_16<4>(ptr); } - template __forceinline void gather32_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather32_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather32_32<>(ptr); } - template __forceinline void gather64_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather64_4(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather64_4<0>(ptr); dst[1] = gather64_4<1>(ptr); @@ -1440,7 +1492,8 @@ public: dst[15] = gather64_4<15>(ptr); } - template __forceinline void gather64_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather64_8(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather64_8<0>(ptr); dst[1] = gather64_8<2>(ptr); @@ -1452,7 +1505,8 @@ public: dst[7] = gather64_8<14>(ptr); } - template __forceinline void gather64_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather64_16(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather64_16<0>(ptr); dst[1] = gather64_16<2>(ptr); @@ -1460,20 +1514,22 @@ public: dst[3] = gather64_16<8>(ptr); } - template __forceinline void gather64_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather64_32(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather64_32<0>(ptr); dst[1] = gather64_32<2>(ptr); } - #ifdef _M_AMD64 +#ifdef _M_AMD64 - template __forceinline void gather64_64(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const + template + __forceinline void gather64_64(const T* RESTRICT ptr, GSVector4i* RESTRICT dst) const { dst[0] = gather64_64<>(ptr); } - #endif +#endif __forceinline static GSVector4i loadnt(const void* p) { @@ -1508,7 +1564,8 @@ public: return GSVector4i(_mm_unpacklo_epi64(lo, hi)); } */ - template __forceinline static GSVector4i load(const void* p) + template + __forceinline static GSVector4i load(const void* p) { return GSVector4i(aligned ? _mm_load_si128((__m128i*)p) : _mm_loadu_si128((__m128i*)p)); } @@ -1518,14 +1575,14 @@ public: return GSVector4i(_mm_cvtsi32_si128(i)); } - #ifdef _M_AMD64 +#ifdef _M_AMD64 __forceinline static GSVector4i loadq(int64 i) { return GSVector4i(_mm_cvtsi64_si128(i)); } - #endif +#endif __forceinline static void storent(void* p, const GSVector4i& v) { @@ -1548,10 +1605,13 @@ public: GSVector4i::storeh(ph, v); } - template __forceinline static void store(void* p, const GSVector4i& v) + template + __forceinline static void store(void* p, const GSVector4i& v) { - if(aligned) _mm_store_si128((__m128i*)p, v.m); - else _mm_storeu_si128((__m128i*)p, v.m); + if (aligned) + _mm_store_si128((__m128i*)p, v.m); + else + _mm_storeu_si128((__m128i*)p, v.m); } __forceinline static int store(const GSVector4i& v) @@ -1559,26 +1619,27 @@ public: return _mm_cvtsi128_si32(v.m); } - #ifdef _M_AMD64 +#ifdef _M_AMD64 __forceinline static int64 storeq(const GSVector4i& v) { return _mm_cvtsi128_si64(v.m); } - #endif +#endif __forceinline static void storent(void* RESTRICT dst, const void* RESTRICT src, size_t size) { const GSVector4i* s = (const GSVector4i*)src; GSVector4i* d = (GSVector4i*)dst; - if(size == 0) return; + if (size == 0) + return; size_t i = 0; size_t j = size >> 6; - for(; i < j; i++, s += 4, d += 4) + for (; i < j; i++, s += 4, d += 4) { storent(&d[0], s[0]); storent(&d[1], s[1]); @@ -1588,7 +1649,8 @@ public: size &= 63; - if(size == 0) return; + if (size == 0) + return; memcpy(d, s, size); } @@ -1690,9 +1752,9 @@ public: GSVector4i* s = (GSVector4i*)src; GSVector4i* d = (GSVector4i*)dst; - for(size_t i = 0; i < size; i++) + for (size_t i = 0; i < size; i++) { - if(!d[i].eq(s[i])) + if (!d[i].eq(s[i])) { return false; } @@ -1710,7 +1772,7 @@ public: GSVector4i* s = (GSVector4i*)src; GSVector4i* d = (GSVector4i*)dst; - for(size_t i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) { GSVector4i v0 = (d[i * 4 + 0] == s[i * 4 + 0]); GSVector4i v1 = (d[i * 4 + 1] == s[i * 4 + 1]); @@ -1720,7 +1782,7 @@ public: v0 = v0 & v1; v2 = v2 & v3; - if(!(v0 & v2).alltrue()) + if (!(v0 & v2).alltrue()) { return false; } @@ -1740,7 +1802,7 @@ public: GSVector4i v = GSVector4i::xffffffff(); - for(size_t i = 0; i < size; i++) + for (size_t i = 0; i < size; i++) { v &= d[i] == s[i]; @@ -1750,142 +1812,142 @@ public: return v.alltrue(); } - __forceinline void operator += (const GSVector4i& v) + __forceinline void operator+=(const GSVector4i& v) { m = _mm_add_epi32(m, v); } - __forceinline void operator -= (const GSVector4i& v) + __forceinline void operator-=(const GSVector4i& v) { m = _mm_sub_epi32(m, v); } - __forceinline void operator += (int i) + __forceinline void operator+=(int i) { *this += GSVector4i(i); } - __forceinline void operator -= (int i) + __forceinline void operator-=(int i) { *this -= GSVector4i(i); } - __forceinline void operator <<= (const int i) + __forceinline void operator<<=(const int i) { m = _mm_slli_epi32(m, i); } - __forceinline void operator >>= (const int i) + __forceinline void operator>>=(const int i) { m = _mm_srli_epi32(m, i); } - __forceinline void operator &= (const GSVector4i& v) + __forceinline void operator&=(const GSVector4i& v) { m = _mm_and_si128(m, v); } - __forceinline void operator |= (const GSVector4i& v) + __forceinline void operator|=(const GSVector4i& v) { m = _mm_or_si128(m, v); } - __forceinline void operator ^= (const GSVector4i& v) + __forceinline void operator^=(const GSVector4i& v) { m = _mm_xor_si128(m, v); } - __forceinline friend GSVector4i operator + (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator+(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_add_epi32(v1, v2)); } - __forceinline friend GSVector4i operator - (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator-(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_sub_epi32(v1, v2)); } - __forceinline friend GSVector4i operator + (const GSVector4i& v, int i) + __forceinline friend GSVector4i operator+(const GSVector4i& v, int i) { return v + GSVector4i(i); } - __forceinline friend GSVector4i operator - (const GSVector4i& v, int i) + __forceinline friend GSVector4i operator-(const GSVector4i& v, int i) { return v - GSVector4i(i); } - __forceinline friend GSVector4i operator << (const GSVector4i& v, const int i) + __forceinline friend GSVector4i operator<<(const GSVector4i& v, const int i) { return GSVector4i(_mm_slli_epi32(v, i)); } - __forceinline friend GSVector4i operator >> (const GSVector4i& v, const int i) + __forceinline friend GSVector4i operator>>(const GSVector4i& v, const int i) { return GSVector4i(_mm_srli_epi32(v, i)); } - __forceinline friend GSVector4i operator & (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator&(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_and_si128(v1, v2)); } - __forceinline friend GSVector4i operator | (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator|(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_or_si128(v1, v2)); } - __forceinline friend GSVector4i operator ^ (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator^(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_xor_si128(v1, v2)); } - __forceinline friend GSVector4i operator & (const GSVector4i& v, int i) + __forceinline friend GSVector4i operator&(const GSVector4i& v, int i) { return v & GSVector4i(i); } - __forceinline friend GSVector4i operator | (const GSVector4i& v, int i) + __forceinline friend GSVector4i operator|(const GSVector4i& v, int i) { return v | GSVector4i(i); } - __forceinline friend GSVector4i operator ^ (const GSVector4i& v, int i) + __forceinline friend GSVector4i operator^(const GSVector4i& v, int i) { return v ^ GSVector4i(i); } - __forceinline friend GSVector4i operator ~ (const GSVector4i& v) + __forceinline friend GSVector4i operator~(const GSVector4i& v) { return v ^ (v == v); } - __forceinline friend GSVector4i operator == (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator==(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_cmpeq_epi32(v1, v2)); } - __forceinline friend GSVector4i operator != (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator!=(const GSVector4i& v1, const GSVector4i& v2) { return ~(v1 == v2); } - __forceinline friend GSVector4i operator > (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator>(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_cmpgt_epi32(v1, v2)); } - __forceinline friend GSVector4i operator < (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator<(const GSVector4i& v1, const GSVector4i& v2) { return GSVector4i(_mm_cmplt_epi32(v1, v2)); } - __forceinline friend GSVector4i operator >= (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator>=(const GSVector4i& v1, const GSVector4i& v2) { return (v1 > v2) | (v1 == v2); } - __forceinline friend GSVector4i operator <= (const GSVector4i& v1, const GSVector4i& v2) + __forceinline friend GSVector4i operator<=(const GSVector4i& v1, const GSVector4i& v2) { return (v1 < v2) | (v1 == v2); } @@ -1919,204 +1981,204 @@ public: VECTOR4i_SHUFFLE_1(z, 2) VECTOR4i_SHUFFLE_1(w, 3) - __forceinline static GSVector4i zero() {return GSVector4i(_mm_setzero_si128());} + __forceinline static GSVector4i zero() { return GSVector4i(_mm_setzero_si128()); } - __forceinline static GSVector4i xffffffff() {return zero() == zero();} + __forceinline static GSVector4i xffffffff() { return zero() == zero(); } - __forceinline static GSVector4i x00000001() {return xffffffff().srl32(31);} - __forceinline static GSVector4i x00000003() {return xffffffff().srl32(30);} - __forceinline static GSVector4i x00000007() {return xffffffff().srl32(29);} - __forceinline static GSVector4i x0000000f() {return xffffffff().srl32(28);} - __forceinline static GSVector4i x0000001f() {return xffffffff().srl32(27);} - __forceinline static GSVector4i x0000003f() {return xffffffff().srl32(26);} - __forceinline static GSVector4i x0000007f() {return xffffffff().srl32(25);} - __forceinline static GSVector4i x000000ff() {return xffffffff().srl32(24);} - __forceinline static GSVector4i x000001ff() {return xffffffff().srl32(23);} - __forceinline static GSVector4i x000003ff() {return xffffffff().srl32(22);} - __forceinline static GSVector4i x000007ff() {return xffffffff().srl32(21);} - __forceinline static GSVector4i x00000fff() {return xffffffff().srl32(20);} - __forceinline static GSVector4i x00001fff() {return xffffffff().srl32(19);} - __forceinline static GSVector4i x00003fff() {return xffffffff().srl32(18);} - __forceinline static GSVector4i x00007fff() {return xffffffff().srl32(17);} - __forceinline static GSVector4i x0000ffff() {return xffffffff().srl32(16);} - __forceinline static GSVector4i x0001ffff() {return xffffffff().srl32(15);} - __forceinline static GSVector4i x0003ffff() {return xffffffff().srl32(14);} - __forceinline static GSVector4i x0007ffff() {return xffffffff().srl32(13);} - __forceinline static GSVector4i x000fffff() {return xffffffff().srl32(12);} - __forceinline static GSVector4i x001fffff() {return xffffffff().srl32(11);} - __forceinline static GSVector4i x003fffff() {return xffffffff().srl32(10);} - __forceinline static GSVector4i x007fffff() {return xffffffff().srl32( 9);} - __forceinline static GSVector4i x00ffffff() {return xffffffff().srl32( 8);} - __forceinline static GSVector4i x01ffffff() {return xffffffff().srl32( 7);} - __forceinline static GSVector4i x03ffffff() {return xffffffff().srl32( 6);} - __forceinline static GSVector4i x07ffffff() {return xffffffff().srl32( 5);} - __forceinline static GSVector4i x0fffffff() {return xffffffff().srl32( 4);} - __forceinline static GSVector4i x1fffffff() {return xffffffff().srl32( 3);} - __forceinline static GSVector4i x3fffffff() {return xffffffff().srl32( 2);} - __forceinline static GSVector4i x7fffffff() {return xffffffff().srl32( 1);} + __forceinline static GSVector4i x00000001() { return xffffffff().srl32(31); } + __forceinline static GSVector4i x00000003() { return xffffffff().srl32(30); } + __forceinline static GSVector4i x00000007() { return xffffffff().srl32(29); } + __forceinline static GSVector4i x0000000f() { return xffffffff().srl32(28); } + __forceinline static GSVector4i x0000001f() { return xffffffff().srl32(27); } + __forceinline static GSVector4i x0000003f() { return xffffffff().srl32(26); } + __forceinline static GSVector4i x0000007f() { return xffffffff().srl32(25); } + __forceinline static GSVector4i x000000ff() { return xffffffff().srl32(24); } + __forceinline static GSVector4i x000001ff() { return xffffffff().srl32(23); } + __forceinline static GSVector4i x000003ff() { return xffffffff().srl32(22); } + __forceinline static GSVector4i x000007ff() { return xffffffff().srl32(21); } + __forceinline static GSVector4i x00000fff() { return xffffffff().srl32(20); } + __forceinline static GSVector4i x00001fff() { return xffffffff().srl32(19); } + __forceinline static GSVector4i x00003fff() { return xffffffff().srl32(18); } + __forceinline static GSVector4i x00007fff() { return xffffffff().srl32(17); } + __forceinline static GSVector4i x0000ffff() { return xffffffff().srl32(16); } + __forceinline static GSVector4i x0001ffff() { return xffffffff().srl32(15); } + __forceinline static GSVector4i x0003ffff() { return xffffffff().srl32(14); } + __forceinline static GSVector4i x0007ffff() { return xffffffff().srl32(13); } + __forceinline static GSVector4i x000fffff() { return xffffffff().srl32(12); } + __forceinline static GSVector4i x001fffff() { return xffffffff().srl32(11); } + __forceinline static GSVector4i x003fffff() { return xffffffff().srl32(10); } + __forceinline static GSVector4i x007fffff() { return xffffffff().srl32( 9); } + __forceinline static GSVector4i x00ffffff() { return xffffffff().srl32( 8); } + __forceinline static GSVector4i x01ffffff() { return xffffffff().srl32( 7); } + __forceinline static GSVector4i x03ffffff() { return xffffffff().srl32( 6); } + __forceinline static GSVector4i x07ffffff() { return xffffffff().srl32( 5); } + __forceinline static GSVector4i x0fffffff() { return xffffffff().srl32( 4); } + __forceinline static GSVector4i x1fffffff() { return xffffffff().srl32( 3); } + __forceinline static GSVector4i x3fffffff() { return xffffffff().srl32( 2); } + __forceinline static GSVector4i x7fffffff() { return xffffffff().srl32( 1); } - __forceinline static GSVector4i x80000000() {return xffffffff().sll32(31);} - __forceinline static GSVector4i xc0000000() {return xffffffff().sll32(30);} - __forceinline static GSVector4i xe0000000() {return xffffffff().sll32(29);} - __forceinline static GSVector4i xf0000000() {return xffffffff().sll32(28);} - __forceinline static GSVector4i xf8000000() {return xffffffff().sll32(27);} - __forceinline static GSVector4i xfc000000() {return xffffffff().sll32(26);} - __forceinline static GSVector4i xfe000000() {return xffffffff().sll32(25);} - __forceinline static GSVector4i xff000000() {return xffffffff().sll32(24);} - __forceinline static GSVector4i xff800000() {return xffffffff().sll32(23);} - __forceinline static GSVector4i xffc00000() {return xffffffff().sll32(22);} - __forceinline static GSVector4i xffe00000() {return xffffffff().sll32(21);} - __forceinline static GSVector4i xfff00000() {return xffffffff().sll32(20);} - __forceinline static GSVector4i xfff80000() {return xffffffff().sll32(19);} - __forceinline static GSVector4i xfffc0000() {return xffffffff().sll32(18);} - __forceinline static GSVector4i xfffe0000() {return xffffffff().sll32(17);} - __forceinline static GSVector4i xffff0000() {return xffffffff().sll32(16);} - __forceinline static GSVector4i xffff8000() {return xffffffff().sll32(15);} - __forceinline static GSVector4i xffffc000() {return xffffffff().sll32(14);} - __forceinline static GSVector4i xffffe000() {return xffffffff().sll32(13);} - __forceinline static GSVector4i xfffff000() {return xffffffff().sll32(12);} - __forceinline static GSVector4i xfffff800() {return xffffffff().sll32(11);} - __forceinline static GSVector4i xfffffc00() {return xffffffff().sll32(10);} - __forceinline static GSVector4i xfffffe00() {return xffffffff().sll32( 9);} - __forceinline static GSVector4i xffffff00() {return xffffffff().sll32( 8);} - __forceinline static GSVector4i xffffff80() {return xffffffff().sll32( 7);} - __forceinline static GSVector4i xffffffc0() {return xffffffff().sll32( 6);} - __forceinline static GSVector4i xffffffe0() {return xffffffff().sll32( 5);} - __forceinline static GSVector4i xfffffff0() {return xffffffff().sll32( 4);} - __forceinline static GSVector4i xfffffff8() {return xffffffff().sll32( 3);} - __forceinline static GSVector4i xfffffffc() {return xffffffff().sll32( 2);} - __forceinline static GSVector4i xfffffffe() {return xffffffff().sll32( 1);} + __forceinline static GSVector4i x80000000() { return xffffffff().sll32(31); } + __forceinline static GSVector4i xc0000000() { return xffffffff().sll32(30); } + __forceinline static GSVector4i xe0000000() { return xffffffff().sll32(29); } + __forceinline static GSVector4i xf0000000() { return xffffffff().sll32(28); } + __forceinline static GSVector4i xf8000000() { return xffffffff().sll32(27); } + __forceinline static GSVector4i xfc000000() { return xffffffff().sll32(26); } + __forceinline static GSVector4i xfe000000() { return xffffffff().sll32(25); } + __forceinline static GSVector4i xff000000() { return xffffffff().sll32(24); } + __forceinline static GSVector4i xff800000() { return xffffffff().sll32(23); } + __forceinline static GSVector4i xffc00000() { return xffffffff().sll32(22); } + __forceinline static GSVector4i xffe00000() { return xffffffff().sll32(21); } + __forceinline static GSVector4i xfff00000() { return xffffffff().sll32(20); } + __forceinline static GSVector4i xfff80000() { return xffffffff().sll32(19); } + __forceinline static GSVector4i xfffc0000() { return xffffffff().sll32(18); } + __forceinline static GSVector4i xfffe0000() { return xffffffff().sll32(17); } + __forceinline static GSVector4i xffff0000() { return xffffffff().sll32(16); } + __forceinline static GSVector4i xffff8000() { return xffffffff().sll32(15); } + __forceinline static GSVector4i xffffc000() { return xffffffff().sll32(14); } + __forceinline static GSVector4i xffffe000() { return xffffffff().sll32(13); } + __forceinline static GSVector4i xfffff000() { return xffffffff().sll32(12); } + __forceinline static GSVector4i xfffff800() { return xffffffff().sll32(11); } + __forceinline static GSVector4i xfffffc00() { return xffffffff().sll32(10); } + __forceinline static GSVector4i xfffffe00() { return xffffffff().sll32( 9); } + __forceinline static GSVector4i xffffff00() { return xffffffff().sll32( 8); } + __forceinline static GSVector4i xffffff80() { return xffffffff().sll32( 7); } + __forceinline static GSVector4i xffffffc0() { return xffffffff().sll32( 6); } + __forceinline static GSVector4i xffffffe0() { return xffffffff().sll32( 5); } + __forceinline static GSVector4i xfffffff0() { return xffffffff().sll32( 4); } + __forceinline static GSVector4i xfffffff8() { return xffffffff().sll32( 3); } + __forceinline static GSVector4i xfffffffc() { return xffffffff().sll32( 2); } + __forceinline static GSVector4i xfffffffe() { return xffffffff().sll32( 1); } - __forceinline static GSVector4i x0001() {return xffffffff().srl16(15);} - __forceinline static GSVector4i x0003() {return xffffffff().srl16(14);} - __forceinline static GSVector4i x0007() {return xffffffff().srl16(13);} - __forceinline static GSVector4i x000f() {return xffffffff().srl16(12);} - __forceinline static GSVector4i x001f() {return xffffffff().srl16(11);} - __forceinline static GSVector4i x003f() {return xffffffff().srl16(10);} - __forceinline static GSVector4i x007f() {return xffffffff().srl16( 9);} - __forceinline static GSVector4i x00ff() {return xffffffff().srl16( 8);} - __forceinline static GSVector4i x01ff() {return xffffffff().srl16( 7);} - __forceinline static GSVector4i x03ff() {return xffffffff().srl16( 6);} - __forceinline static GSVector4i x07ff() {return xffffffff().srl16( 5);} - __forceinline static GSVector4i x0fff() {return xffffffff().srl16( 4);} - __forceinline static GSVector4i x1fff() {return xffffffff().srl16( 3);} - __forceinline static GSVector4i x3fff() {return xffffffff().srl16( 2);} - __forceinline static GSVector4i x7fff() {return xffffffff().srl16( 1);} + __forceinline static GSVector4i x0001() { return xffffffff().srl16(15); } + __forceinline static GSVector4i x0003() { return xffffffff().srl16(14); } + __forceinline static GSVector4i x0007() { return xffffffff().srl16(13); } + __forceinline static GSVector4i x000f() { return xffffffff().srl16(12); } + __forceinline static GSVector4i x001f() { return xffffffff().srl16(11); } + __forceinline static GSVector4i x003f() { return xffffffff().srl16(10); } + __forceinline static GSVector4i x007f() { return xffffffff().srl16( 9); } + __forceinline static GSVector4i x00ff() { return xffffffff().srl16( 8); } + __forceinline static GSVector4i x01ff() { return xffffffff().srl16( 7); } + __forceinline static GSVector4i x03ff() { return xffffffff().srl16( 6); } + __forceinline static GSVector4i x07ff() { return xffffffff().srl16( 5); } + __forceinline static GSVector4i x0fff() { return xffffffff().srl16( 4); } + __forceinline static GSVector4i x1fff() { return xffffffff().srl16( 3); } + __forceinline static GSVector4i x3fff() { return xffffffff().srl16( 2); } + __forceinline static GSVector4i x7fff() { return xffffffff().srl16( 1); } - __forceinline static GSVector4i x8000() {return xffffffff().sll16(15);} - __forceinline static GSVector4i xc000() {return xffffffff().sll16(14);} - __forceinline static GSVector4i xe000() {return xffffffff().sll16(13);} - __forceinline static GSVector4i xf000() {return xffffffff().sll16(12);} - __forceinline static GSVector4i xf800() {return xffffffff().sll16(11);} - __forceinline static GSVector4i xfc00() {return xffffffff().sll16(10);} - __forceinline static GSVector4i xfe00() {return xffffffff().sll16( 9);} - __forceinline static GSVector4i xff00() {return xffffffff().sll16( 8);} - __forceinline static GSVector4i xff80() {return xffffffff().sll16( 7);} - __forceinline static GSVector4i xffc0() {return xffffffff().sll16( 6);} - __forceinline static GSVector4i xffe0() {return xffffffff().sll16( 5);} - __forceinline static GSVector4i xfff0() {return xffffffff().sll16( 4);} - __forceinline static GSVector4i xfff8() {return xffffffff().sll16( 3);} - __forceinline static GSVector4i xfffc() {return xffffffff().sll16( 2);} - __forceinline static GSVector4i xfffe() {return xffffffff().sll16( 1);} + __forceinline static GSVector4i x8000() { return xffffffff().sll16(15); } + __forceinline static GSVector4i xc000() { return xffffffff().sll16(14); } + __forceinline static GSVector4i xe000() { return xffffffff().sll16(13); } + __forceinline static GSVector4i xf000() { return xffffffff().sll16(12); } + __forceinline static GSVector4i xf800() { return xffffffff().sll16(11); } + __forceinline static GSVector4i xfc00() { return xffffffff().sll16(10); } + __forceinline static GSVector4i xfe00() { return xffffffff().sll16( 9); } + __forceinline static GSVector4i xff00() { return xffffffff().sll16( 8); } + __forceinline static GSVector4i xff80() { return xffffffff().sll16( 7); } + __forceinline static GSVector4i xffc0() { return xffffffff().sll16( 6); } + __forceinline static GSVector4i xffe0() { return xffffffff().sll16( 5); } + __forceinline static GSVector4i xfff0() { return xffffffff().sll16( 4); } + __forceinline static GSVector4i xfff8() { return xffffffff().sll16( 3); } + __forceinline static GSVector4i xfffc() { return xffffffff().sll16( 2); } + __forceinline static GSVector4i xfffe() { return xffffffff().sll16( 1); } - __forceinline static GSVector4i xffffffff(const GSVector4i& v) {return v == v;} + __forceinline static GSVector4i xffffffff(const GSVector4i& v) { return v == v; } - __forceinline static GSVector4i x00000001(const GSVector4i& v) {return xffffffff(v).srl32(31);} - __forceinline static GSVector4i x00000003(const GSVector4i& v) {return xffffffff(v).srl32(30);} - __forceinline static GSVector4i x00000007(const GSVector4i& v) {return xffffffff(v).srl32(29);} - __forceinline static GSVector4i x0000000f(const GSVector4i& v) {return xffffffff(v).srl32(28);} - __forceinline static GSVector4i x0000001f(const GSVector4i& v) {return xffffffff(v).srl32(27);} - __forceinline static GSVector4i x0000003f(const GSVector4i& v) {return xffffffff(v).srl32(26);} - __forceinline static GSVector4i x0000007f(const GSVector4i& v) {return xffffffff(v).srl32(25);} - __forceinline static GSVector4i x000000ff(const GSVector4i& v) {return xffffffff(v).srl32(24);} - __forceinline static GSVector4i x000001ff(const GSVector4i& v) {return xffffffff(v).srl32(23);} - __forceinline static GSVector4i x000003ff(const GSVector4i& v) {return xffffffff(v).srl32(22);} - __forceinline static GSVector4i x000007ff(const GSVector4i& v) {return xffffffff(v).srl32(21);} - __forceinline static GSVector4i x00000fff(const GSVector4i& v) {return xffffffff(v).srl32(20);} - __forceinline static GSVector4i x00001fff(const GSVector4i& v) {return xffffffff(v).srl32(19);} - __forceinline static GSVector4i x00003fff(const GSVector4i& v) {return xffffffff(v).srl32(18);} - __forceinline static GSVector4i x00007fff(const GSVector4i& v) {return xffffffff(v).srl32(17);} - __forceinline static GSVector4i x0000ffff(const GSVector4i& v) {return xffffffff(v).srl32(16);} - __forceinline static GSVector4i x0001ffff(const GSVector4i& v) {return xffffffff(v).srl32(15);} - __forceinline static GSVector4i x0003ffff(const GSVector4i& v) {return xffffffff(v).srl32(14);} - __forceinline static GSVector4i x0007ffff(const GSVector4i& v) {return xffffffff(v).srl32(13);} - __forceinline static GSVector4i x000fffff(const GSVector4i& v) {return xffffffff(v).srl32(12);} - __forceinline static GSVector4i x001fffff(const GSVector4i& v) {return xffffffff(v).srl32(11);} - __forceinline static GSVector4i x003fffff(const GSVector4i& v) {return xffffffff(v).srl32(10);} - __forceinline static GSVector4i x007fffff(const GSVector4i& v) {return xffffffff(v).srl32( 9);} - __forceinline static GSVector4i x00ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 8);} - __forceinline static GSVector4i x01ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 7);} - __forceinline static GSVector4i x03ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 6);} - __forceinline static GSVector4i x07ffffff(const GSVector4i& v) {return xffffffff(v).srl32( 5);} - __forceinline static GSVector4i x0fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 4);} - __forceinline static GSVector4i x1fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 3);} - __forceinline static GSVector4i x3fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 2);} - __forceinline static GSVector4i x7fffffff(const GSVector4i& v) {return xffffffff(v).srl32( 1);} + __forceinline static GSVector4i x00000001(const GSVector4i& v) { return xffffffff(v).srl32(31); } + __forceinline static GSVector4i x00000003(const GSVector4i& v) { return xffffffff(v).srl32(30); } + __forceinline static GSVector4i x00000007(const GSVector4i& v) { return xffffffff(v).srl32(29); } + __forceinline static GSVector4i x0000000f(const GSVector4i& v) { return xffffffff(v).srl32(28); } + __forceinline static GSVector4i x0000001f(const GSVector4i& v) { return xffffffff(v).srl32(27); } + __forceinline static GSVector4i x0000003f(const GSVector4i& v) { return xffffffff(v).srl32(26); } + __forceinline static GSVector4i x0000007f(const GSVector4i& v) { return xffffffff(v).srl32(25); } + __forceinline static GSVector4i x000000ff(const GSVector4i& v) { return xffffffff(v).srl32(24); } + __forceinline static GSVector4i x000001ff(const GSVector4i& v) { return xffffffff(v).srl32(23); } + __forceinline static GSVector4i x000003ff(const GSVector4i& v) { return xffffffff(v).srl32(22); } + __forceinline static GSVector4i x000007ff(const GSVector4i& v) { return xffffffff(v).srl32(21); } + __forceinline static GSVector4i x00000fff(const GSVector4i& v) { return xffffffff(v).srl32(20); } + __forceinline static GSVector4i x00001fff(const GSVector4i& v) { return xffffffff(v).srl32(19); } + __forceinline static GSVector4i x00003fff(const GSVector4i& v) { return xffffffff(v).srl32(18); } + __forceinline static GSVector4i x00007fff(const GSVector4i& v) { return xffffffff(v).srl32(17); } + __forceinline static GSVector4i x0000ffff(const GSVector4i& v) { return xffffffff(v).srl32(16); } + __forceinline static GSVector4i x0001ffff(const GSVector4i& v) { return xffffffff(v).srl32(15); } + __forceinline static GSVector4i x0003ffff(const GSVector4i& v) { return xffffffff(v).srl32(14); } + __forceinline static GSVector4i x0007ffff(const GSVector4i& v) { return xffffffff(v).srl32(13); } + __forceinline static GSVector4i x000fffff(const GSVector4i& v) { return xffffffff(v).srl32(12); } + __forceinline static GSVector4i x001fffff(const GSVector4i& v) { return xffffffff(v).srl32(11); } + __forceinline static GSVector4i x003fffff(const GSVector4i& v) { return xffffffff(v).srl32(10); } + __forceinline static GSVector4i x007fffff(const GSVector4i& v) { return xffffffff(v).srl32( 9); } + __forceinline static GSVector4i x00ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 8); } + __forceinline static GSVector4i x01ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 7); } + __forceinline static GSVector4i x03ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 6); } + __forceinline static GSVector4i x07ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 5); } + __forceinline static GSVector4i x0fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 4); } + __forceinline static GSVector4i x1fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 3); } + __forceinline static GSVector4i x3fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 2); } + __forceinline static GSVector4i x7fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 1); } - __forceinline static GSVector4i x80000000(const GSVector4i& v) {return xffffffff(v).sll32(31);} - __forceinline static GSVector4i xc0000000(const GSVector4i& v) {return xffffffff(v).sll32(30);} - __forceinline static GSVector4i xe0000000(const GSVector4i& v) {return xffffffff(v).sll32(29);} - __forceinline static GSVector4i xf0000000(const GSVector4i& v) {return xffffffff(v).sll32(28);} - __forceinline static GSVector4i xf8000000(const GSVector4i& v) {return xffffffff(v).sll32(27);} - __forceinline static GSVector4i xfc000000(const GSVector4i& v) {return xffffffff(v).sll32(26);} - __forceinline static GSVector4i xfe000000(const GSVector4i& v) {return xffffffff(v).sll32(25);} - __forceinline static GSVector4i xff000000(const GSVector4i& v) {return xffffffff(v).sll32(24);} - __forceinline static GSVector4i xff800000(const GSVector4i& v) {return xffffffff(v).sll32(23);} - __forceinline static GSVector4i xffc00000(const GSVector4i& v) {return xffffffff(v).sll32(22);} - __forceinline static GSVector4i xffe00000(const GSVector4i& v) {return xffffffff(v).sll32(21);} - __forceinline static GSVector4i xfff00000(const GSVector4i& v) {return xffffffff(v).sll32(20);} - __forceinline static GSVector4i xfff80000(const GSVector4i& v) {return xffffffff(v).sll32(19);} - __forceinline static GSVector4i xfffc0000(const GSVector4i& v) {return xffffffff(v).sll32(18);} - __forceinline static GSVector4i xfffe0000(const GSVector4i& v) {return xffffffff(v).sll32(17);} - __forceinline static GSVector4i xffff0000(const GSVector4i& v) {return xffffffff(v).sll32(16);} - __forceinline static GSVector4i xffff8000(const GSVector4i& v) {return xffffffff(v).sll32(15);} - __forceinline static GSVector4i xffffc000(const GSVector4i& v) {return xffffffff(v).sll32(14);} - __forceinline static GSVector4i xffffe000(const GSVector4i& v) {return xffffffff(v).sll32(13);} - __forceinline static GSVector4i xfffff000(const GSVector4i& v) {return xffffffff(v).sll32(12);} - __forceinline static GSVector4i xfffff800(const GSVector4i& v) {return xffffffff(v).sll32(11);} - __forceinline static GSVector4i xfffffc00(const GSVector4i& v) {return xffffffff(v).sll32(10);} - __forceinline static GSVector4i xfffffe00(const GSVector4i& v) {return xffffffff(v).sll32( 9);} - __forceinline static GSVector4i xffffff00(const GSVector4i& v) {return xffffffff(v).sll32( 8);} - __forceinline static GSVector4i xffffff80(const GSVector4i& v) {return xffffffff(v).sll32( 7);} - __forceinline static GSVector4i xffffffc0(const GSVector4i& v) {return xffffffff(v).sll32( 6);} - __forceinline static GSVector4i xffffffe0(const GSVector4i& v) {return xffffffff(v).sll32( 5);} - __forceinline static GSVector4i xfffffff0(const GSVector4i& v) {return xffffffff(v).sll32( 4);} - __forceinline static GSVector4i xfffffff8(const GSVector4i& v) {return xffffffff(v).sll32( 3);} - __forceinline static GSVector4i xfffffffc(const GSVector4i& v) {return xffffffff(v).sll32( 2);} - __forceinline static GSVector4i xfffffffe(const GSVector4i& v) {return xffffffff(v).sll32( 1);} + __forceinline static GSVector4i x80000000(const GSVector4i& v) { return xffffffff(v).sll32(31); } + __forceinline static GSVector4i xc0000000(const GSVector4i& v) { return xffffffff(v).sll32(30); } + __forceinline static GSVector4i xe0000000(const GSVector4i& v) { return xffffffff(v).sll32(29); } + __forceinline static GSVector4i xf0000000(const GSVector4i& v) { return xffffffff(v).sll32(28); } + __forceinline static GSVector4i xf8000000(const GSVector4i& v) { return xffffffff(v).sll32(27); } + __forceinline static GSVector4i xfc000000(const GSVector4i& v) { return xffffffff(v).sll32(26); } + __forceinline static GSVector4i xfe000000(const GSVector4i& v) { return xffffffff(v).sll32(25); } + __forceinline static GSVector4i xff000000(const GSVector4i& v) { return xffffffff(v).sll32(24); } + __forceinline static GSVector4i xff800000(const GSVector4i& v) { return xffffffff(v).sll32(23); } + __forceinline static GSVector4i xffc00000(const GSVector4i& v) { return xffffffff(v).sll32(22); } + __forceinline static GSVector4i xffe00000(const GSVector4i& v) { return xffffffff(v).sll32(21); } + __forceinline static GSVector4i xfff00000(const GSVector4i& v) { return xffffffff(v).sll32(20); } + __forceinline static GSVector4i xfff80000(const GSVector4i& v) { return xffffffff(v).sll32(19); } + __forceinline static GSVector4i xfffc0000(const GSVector4i& v) { return xffffffff(v).sll32(18); } + __forceinline static GSVector4i xfffe0000(const GSVector4i& v) { return xffffffff(v).sll32(17); } + __forceinline static GSVector4i xffff0000(const GSVector4i& v) { return xffffffff(v).sll32(16); } + __forceinline static GSVector4i xffff8000(const GSVector4i& v) { return xffffffff(v).sll32(15); } + __forceinline static GSVector4i xffffc000(const GSVector4i& v) { return xffffffff(v).sll32(14); } + __forceinline static GSVector4i xffffe000(const GSVector4i& v) { return xffffffff(v).sll32(13); } + __forceinline static GSVector4i xfffff000(const GSVector4i& v) { return xffffffff(v).sll32(12); } + __forceinline static GSVector4i xfffff800(const GSVector4i& v) { return xffffffff(v).sll32(11); } + __forceinline static GSVector4i xfffffc00(const GSVector4i& v) { return xffffffff(v).sll32(10); } + __forceinline static GSVector4i xfffffe00(const GSVector4i& v) { return xffffffff(v).sll32( 9); } + __forceinline static GSVector4i xffffff00(const GSVector4i& v) { return xffffffff(v).sll32( 8); } + __forceinline static GSVector4i xffffff80(const GSVector4i& v) { return xffffffff(v).sll32( 7); } + __forceinline static GSVector4i xffffffc0(const GSVector4i& v) { return xffffffff(v).sll32( 6); } + __forceinline static GSVector4i xffffffe0(const GSVector4i& v) { return xffffffff(v).sll32( 5); } + __forceinline static GSVector4i xfffffff0(const GSVector4i& v) { return xffffffff(v).sll32( 4); } + __forceinline static GSVector4i xfffffff8(const GSVector4i& v) { return xffffffff(v).sll32( 3); } + __forceinline static GSVector4i xfffffffc(const GSVector4i& v) { return xffffffff(v).sll32( 2); } + __forceinline static GSVector4i xfffffffe(const GSVector4i& v) { return xffffffff(v).sll32( 1); } - __forceinline static GSVector4i x0001(const GSVector4i& v) {return xffffffff(v).srl16(15);} - __forceinline static GSVector4i x0003(const GSVector4i& v) {return xffffffff(v).srl16(14);} - __forceinline static GSVector4i x0007(const GSVector4i& v) {return xffffffff(v).srl16(13);} - __forceinline static GSVector4i x000f(const GSVector4i& v) {return xffffffff(v).srl16(12);} - __forceinline static GSVector4i x001f(const GSVector4i& v) {return xffffffff(v).srl16(11);} - __forceinline static GSVector4i x003f(const GSVector4i& v) {return xffffffff(v).srl16(10);} - __forceinline static GSVector4i x007f(const GSVector4i& v) {return xffffffff(v).srl16( 9);} - __forceinline static GSVector4i x00ff(const GSVector4i& v) {return xffffffff(v).srl16( 8);} - __forceinline static GSVector4i x01ff(const GSVector4i& v) {return xffffffff(v).srl16( 7);} - __forceinline static GSVector4i x03ff(const GSVector4i& v) {return xffffffff(v).srl16( 6);} - __forceinline static GSVector4i x07ff(const GSVector4i& v) {return xffffffff(v).srl16( 5);} - __forceinline static GSVector4i x0fff(const GSVector4i& v) {return xffffffff(v).srl16( 4);} - __forceinline static GSVector4i x1fff(const GSVector4i& v) {return xffffffff(v).srl16( 3);} - __forceinline static GSVector4i x3fff(const GSVector4i& v) {return xffffffff(v).srl16( 2);} - __forceinline static GSVector4i x7fff(const GSVector4i& v) {return xffffffff(v).srl16( 1);} + __forceinline static GSVector4i x0001(const GSVector4i& v) { return xffffffff(v).srl16(15); } + __forceinline static GSVector4i x0003(const GSVector4i& v) { return xffffffff(v).srl16(14); } + __forceinline static GSVector4i x0007(const GSVector4i& v) { return xffffffff(v).srl16(13); } + __forceinline static GSVector4i x000f(const GSVector4i& v) { return xffffffff(v).srl16(12); } + __forceinline static GSVector4i x001f(const GSVector4i& v) { return xffffffff(v).srl16(11); } + __forceinline static GSVector4i x003f(const GSVector4i& v) { return xffffffff(v).srl16(10); } + __forceinline static GSVector4i x007f(const GSVector4i& v) { return xffffffff(v).srl16( 9); } + __forceinline static GSVector4i x00ff(const GSVector4i& v) { return xffffffff(v).srl16( 8); } + __forceinline static GSVector4i x01ff(const GSVector4i& v) { return xffffffff(v).srl16( 7); } + __forceinline static GSVector4i x03ff(const GSVector4i& v) { return xffffffff(v).srl16( 6); } + __forceinline static GSVector4i x07ff(const GSVector4i& v) { return xffffffff(v).srl16( 5); } + __forceinline static GSVector4i x0fff(const GSVector4i& v) { return xffffffff(v).srl16( 4); } + __forceinline static GSVector4i x1fff(const GSVector4i& v) { return xffffffff(v).srl16( 3); } + __forceinline static GSVector4i x3fff(const GSVector4i& v) { return xffffffff(v).srl16( 2); } + __forceinline static GSVector4i x7fff(const GSVector4i& v) { return xffffffff(v).srl16( 1); } - __forceinline static GSVector4i x8000(const GSVector4i& v) {return xffffffff(v).sll16(15);} - __forceinline static GSVector4i xc000(const GSVector4i& v) {return xffffffff(v).sll16(14);} - __forceinline static GSVector4i xe000(const GSVector4i& v) {return xffffffff(v).sll16(13);} - __forceinline static GSVector4i xf000(const GSVector4i& v) {return xffffffff(v).sll16(12);} - __forceinline static GSVector4i xf800(const GSVector4i& v) {return xffffffff(v).sll16(11);} - __forceinline static GSVector4i xfc00(const GSVector4i& v) {return xffffffff(v).sll16(10);} - __forceinline static GSVector4i xfe00(const GSVector4i& v) {return xffffffff(v).sll16( 9);} - __forceinline static GSVector4i xff00(const GSVector4i& v) {return xffffffff(v).sll16( 8);} - __forceinline static GSVector4i xff80(const GSVector4i& v) {return xffffffff(v).sll16( 7);} - __forceinline static GSVector4i xffc0(const GSVector4i& v) {return xffffffff(v).sll16( 6);} - __forceinline static GSVector4i xffe0(const GSVector4i& v) {return xffffffff(v).sll16( 5);} - __forceinline static GSVector4i xfff0(const GSVector4i& v) {return xffffffff(v).sll16( 4);} - __forceinline static GSVector4i xfff8(const GSVector4i& v) {return xffffffff(v).sll16( 3);} - __forceinline static GSVector4i xfffc(const GSVector4i& v) {return xffffffff(v).sll16( 2);} - __forceinline static GSVector4i xfffe(const GSVector4i& v) {return xffffffff(v).sll16( 1);} + __forceinline static GSVector4i x8000(const GSVector4i& v) { return xffffffff(v).sll16(15); } + __forceinline static GSVector4i xc000(const GSVector4i& v) { return xffffffff(v).sll16(14); } + __forceinline static GSVector4i xe000(const GSVector4i& v) { return xffffffff(v).sll16(13); } + __forceinline static GSVector4i xf000(const GSVector4i& v) { return xffffffff(v).sll16(12); } + __forceinline static GSVector4i xf800(const GSVector4i& v) { return xffffffff(v).sll16(11); } + __forceinline static GSVector4i xfc00(const GSVector4i& v) { return xffffffff(v).sll16(10); } + __forceinline static GSVector4i xfe00(const GSVector4i& v) { return xffffffff(v).sll16( 9); } + __forceinline static GSVector4i xff00(const GSVector4i& v) { return xffffffff(v).sll16( 8); } + __forceinline static GSVector4i xff80(const GSVector4i& v) { return xffffffff(v).sll16( 7); } + __forceinline static GSVector4i xffc0(const GSVector4i& v) { return xffffffff(v).sll16( 6); } + __forceinline static GSVector4i xffe0(const GSVector4i& v) { return xffffffff(v).sll16( 5); } + __forceinline static GSVector4i xfff0(const GSVector4i& v) { return xffffffff(v).sll16( 4); } + __forceinline static GSVector4i xfff8(const GSVector4i& v) { return xffffffff(v).sll16( 3); } + __forceinline static GSVector4i xfffc(const GSVector4i& v) { return xffffffff(v).sll16( 2); } + __forceinline static GSVector4i xfffe(const GSVector4i& v) { return xffffffff(v).sll16( 1); } - __forceinline static GSVector4i xff(int n) {return m_xff[n];} - __forceinline static GSVector4i x0f(int n) {return m_x0f[n];} + __forceinline static GSVector4i xff(int n) { return m_xff[n]; } + __forceinline static GSVector4i x0f(int n) { return m_x0f[n]; } }; diff --git a/plugins/GSdx/GSVector8.h b/plugins/GSdx/GSVector8.h index 99636adb71..262c21232f 100644 --- a/plugins/GSdx/GSVector8.h +++ b/plugins/GSdx/GSVector8.h @@ -62,8 +62,8 @@ class alignas(32) GSVector8 public: union { - struct {float x0, y0, z0, w0, x1, y1, z1, w1;}; - struct {float r0, g0, b0, a0, r1, g1, b1, a1;}; + struct { float x0, y0, z0, w0, x1, y1, z1, w1; }; + struct { float r0, g0, b0, a0, r1, g1, b1, a1; }; float v[8]; float f32[8]; int8 i8[32]; @@ -126,15 +126,15 @@ public: __forceinline GSVector8(__m128 m0, __m128 m1) { - #if 0 // _MSC_VER >= 1700 +#if 0 // _MSC_VER >= 1700 this->m = _mm256_permute2f128_ps(_mm256_castps128_ps256(m0), _mm256_castps128_ps256(m1), 0x20); - #else +#else this->m = zero().insert<0>(m0).insert<1>(m1); - #endif +#endif } constexpr GSVector8(const GSVector8& v) = default; @@ -146,17 +146,17 @@ public: __forceinline explicit GSVector8(int i) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 m = _mm256_cvtepi32_ps(_mm256_broadcastd_epi32(_mm_cvtsi32_si128(i))); - #else +#else GSVector4i v((int)i); *this = GSVector4(v); - #endif +#endif } __forceinline explicit GSVector8(__m128 m) @@ -169,41 +169,41 @@ public: { } - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 __forceinline explicit GSVector8(const GSVector8i& v); __forceinline static GSVector8 cast(const GSVector8i& v); - #endif +#endif __forceinline static GSVector8 cast(const GSVector4& v); __forceinline static GSVector8 cast(const GSVector4i& v); - __forceinline void operator = (const GSVector8& v) + __forceinline void operator=(const GSVector8& v) { m = v.m; } - __forceinline void operator = (float f) + __forceinline void operator=(float f) { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 - m = _mm256_broadcastss_ps(_mm_load_ss(&f)); + m = _mm256_broadcastss_ps(_mm_load_ss(&f)); - #else +#else m = _mm256_set1_ps(f); - #endif +#endif } - __forceinline void operator = (__m128 m) + __forceinline void operator=(__m128 m) { this->m = _mm256_insertf128_ps(_mm256_castps128_ps256(m), m, 1); } - __forceinline void operator = (__m256 m) + __forceinline void operator=(__m256 m) { this->m = m; } @@ -215,28 +215,28 @@ public: __forceinline GSVector8 abs() const { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 return *this & cast(GSVector8i::x7fffffff()); - #else - +#else + return *this & m_x7fffffff; - #endif +#endif } __forceinline GSVector8 neg() const { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 return *this ^ cast(GSVector8i::x80000000()); - #else - +#else + return *this ^ m_x80000000; - #endif +#endif } __forceinline GSVector8 rcp() const @@ -251,7 +251,8 @@ public: return (v + v) - (v * v) * *this; } - template __forceinline GSVector8 round() const + template + __forceinline GSVector8 round() const { return GSVector8(_mm256_round_ps(m, mode)); } @@ -266,14 +267,14 @@ public: return round(); } - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 - #define LOG8_POLY0(x, c0) GSVector8(c0) - #define LOG8_POLY1(x, c0, c1) (LOG8_POLY0(x, c1).madd(x, GSVector8(c0))) - #define LOG8_POLY2(x, c0, c1, c2) (LOG8_POLY1(x, c1, c2).madd(x, GSVector8(c0))) - #define LOG8_POLY3(x, c0, c1, c2, c3) (LOG8_POLY2(x, c1, c2, c3).madd(x, GSVector8(c0))) - #define LOG8_POLY4(x, c0, c1, c2, c3, c4) (LOG8_POLY3(x, c1, c2, c3, c4).madd(x, GSVector8(c0))) - #define LOG8_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG8_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector8(c0))) +#define LOG8_POLY0(x, c0) GSVector8(c0) +#define LOG8_POLY1(x, c0, c1) (LOG8_POLY0(x, c1).madd(x, GSVector8(c0))) +#define LOG8_POLY2(x, c0, c1, c2) (LOG8_POLY1(x, c1, c2).madd(x, GSVector8(c0))) +#define LOG8_POLY3(x, c0, c1, c2, c3) (LOG8_POLY2(x, c1, c2, c3).madd(x, GSVector8(c0))) +#define LOG8_POLY4(x, c0, c1, c2, c3, c4) (LOG8_POLY3(x, c1, c2, c3, c4).madd(x, GSVector8(c0))) +#define LOG8_POLY5(x, c0, c1, c2, c3, c4, c5) (LOG8_POLY4(x, c1, c2, c3, c4, c5).madd(x, GSVector8(c0))) __forceinline GSVector8 log2(int precision = 5) const { @@ -288,21 +289,21 @@ public: GSVector8 p; - switch(precision) + switch (precision) { - case 3: - p = LOG8_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); - break; - case 4: - p = LOG8_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); - break; - default: - case 5: - p = LOG8_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); - break; - case 6: - p = LOG8_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f); - break; + case 3: + p = LOG8_POLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); + break; + case 4: + p = LOG8_POLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); + break; + default: + case 5: + p = LOG8_POLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); + break; + case 6: + p = LOG8_POLY5(m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f, 3.1821337e-1f, -3.4436006e-2f); + break; } // This effectively increases the polynomial degree by one, but ensures that log2(1) == 0 @@ -312,58 +313,58 @@ public: return p + e; } - #endif +#endif __forceinline GSVector8 madd(const GSVector8& a, const GSVector8& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector8(_mm256_fmadd_ps(m, a, b)); - - #else - + +#else + return *this * a + b; - - #endif + +#endif } __forceinline GSVector8 msub(const GSVector8& a, const GSVector8& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector8(_mm256_fmsub_ps(m, a, b)); - - #else - + +#else + return *this * a - b; - - #endif + +#endif } __forceinline GSVector8 nmadd(const GSVector8& a, const GSVector8& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector8(_mm256_fnmadd_ps(m, a, b)); - - #else - + +#else + return b - *this * a; - - #endif + +#endif } __forceinline GSVector8 nmsub(const GSVector8& a, const GSVector8& b) const { - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 return GSVector8(_mm256_fnmsub_ps(m, a, b)); - - #else + +#else return -b - *this * a; - #endif +#endif } __forceinline GSVector8 addm(const GSVector8& a, const GSVector8& b) const @@ -396,7 +397,8 @@ public: return GSVector8(_mm256_hsub_ps(m, v.m)); } - template __forceinline GSVector8 dp(const GSVector8& v) const + template + __forceinline GSVector8 dp(const GSVector8& v) const { return GSVector8(_mm256_dp_ps(m, v.m, i)); } @@ -431,12 +433,13 @@ public: return GSVector8(_mm256_max_ps(m, a)); } - template __forceinline GSVector8 blend32(const GSVector8& a) const + template + __forceinline GSVector8 blend32(const GSVector8& a) const { return GSVector8(_mm256_blend_ps(m, a, mask)); } - __forceinline GSVector8 blend32(const GSVector8& a, const GSVector8& mask) const + __forceinline GSVector8 blend32(const GSVector8& a, const GSVector8& mask) const { return GSVector8(_mm256_blendv_ps(m, a, mask)); } @@ -490,86 +493,91 @@ public: { return _mm256_testz_ps(m, m) != 0; } - + __forceinline GSVector8 replace_nan(const GSVector8& v) const { return v.blend32(*this, *this == *this); } - template __forceinline GSVector8 insert32(const GSVector8& v) const + template + __forceinline GSVector8 insert32(const GSVector8& v) const { // TODO: use blendps when src == dst ASSERT(src < 4 && dst < 4); // not cross lane like extract32() - switch(dst) + switch (dst) { - case 0: - switch(src) - { - case 0: return yyxx(v).zxzw(*this); - case 1: return yyyy(v).zxzw(*this); - case 2: return yyzz(v).zxzw(*this); - case 3: return yyww(v).zxzw(*this); - default: __assume(0); - } - break; - case 1: - switch(src) - { - case 0: return xxxx(v).xzzw(*this); - case 1: return xxyy(v).xzzw(*this); - case 2: return xxzz(v).xzzw(*this); - case 3: return xxww(v).xzzw(*this); - default: __assume(0); - } - break; - case 2: - switch(src) - { - case 0: return xyzx(wwxx(v)); - case 1: return xyzx(wwyy(v)); - case 2: return xyzx(wwzz(v)); - case 3: return xyzx(wwww(v)); - default: __assume(0); - } - break; - case 3: - switch(src) - { - case 0: return xyxz(zzxx(v)); - case 1: return xyxz(zzyy(v)); - case 2: return xyxz(zzzz(v)); - case 3: return xyxz(zzww(v)); - default: __assume(0); - } - break; - default: - __assume(0); + case 0: + switch (src) + { + case 0: return yyxx(v).zxzw(*this); + case 1: return yyyy(v).zxzw(*this); + case 2: return yyzz(v).zxzw(*this); + case 3: return yyww(v).zxzw(*this); + default: __assume(0); + } + break; + case 1: + switch (src) + { + case 0: return xxxx(v).xzzw(*this); + case 1: return xxyy(v).xzzw(*this); + case 2: return xxzz(v).xzzw(*this); + case 3: return xxww(v).xzzw(*this); + default: __assume(0); + } + break; + case 2: + switch (src) + { + case 0: return xyzx(wwxx(v)); + case 1: return xyzx(wwyy(v)); + case 2: return xyzx(wwzz(v)); + case 3: return xyzx(wwww(v)); + default: __assume(0); + } + break; + case 3: + switch (src) + { + case 0: return xyxz(zzxx(v)); + case 1: return xyxz(zzyy(v)); + case 2: return xyxz(zzzz(v)); + case 3: return xyxz(zzww(v)); + default: __assume(0); + } + break; + default: + __assume(0); } return *this; } - template __forceinline int extract32() const + template + __forceinline int extract32() const { ASSERT(i < 8); return extract().template extract32(); } - template __forceinline GSVector8 insert(__m128 m) const + template + __forceinline GSVector8 insert(__m128 m) const { ASSERT(i < 2); return GSVector8(_mm256_insertf128_ps(this->m, m, i)); } - template __forceinline GSVector4 extract() const + template + __forceinline GSVector4 extract() const { ASSERT(i < 2); - if(i == 0) return GSVector4(_mm256_castps256_ps128(m)); + if (i == 0) + return GSVector4(_mm256_castps256_ps128(m)); return GSVector4(_mm256_extractf128_ps(m, i)); } @@ -606,7 +614,8 @@ public: return loadh(ph, loadl(pl)); } - template __forceinline static GSVector8 load(const void* p) + template + __forceinline static GSVector8 load(const void* p) { return GSVector8(aligned ? _mm256_load_ps((const float*)p) : _mm256_loadu_ps((const float*)p)); } @@ -623,10 +632,13 @@ public: _mm_store_ps((float*)p, _mm256_extractf128_ps(v.m, 1)); } - template __forceinline static void store(void* p, const GSVector8& v) + template + __forceinline static void store(void* p, const GSVector8& v) { - if(aligned) _mm256_store_ps((float*)p, v.m); - else _mm256_storeu_ps((float*)p, v.m); + if (aligned) + _mm256_store_ps((float*)p, v.m); + else + _mm256_storeu_ps((float*)p, v.m); } // @@ -643,147 +655,147 @@ public: // - __forceinline GSVector8 operator - () const + __forceinline GSVector8 operator-() const { return neg(); } - __forceinline void operator += (const GSVector8& v) + __forceinline void operator+=(const GSVector8& v) { m = _mm256_add_ps(m, v); } - __forceinline void operator -= (const GSVector8& v) + __forceinline void operator-=(const GSVector8& v) { m = _mm256_sub_ps(m, v); } - __forceinline void operator *= (const GSVector8& v) + __forceinline void operator*=(const GSVector8& v) { m = _mm256_mul_ps(m, v); } - __forceinline void operator /= (const GSVector8& v) + __forceinline void operator/=(const GSVector8& v) { m = _mm256_div_ps(m, v); } - __forceinline void operator += (float f) + __forceinline void operator+=(float f) { *this += GSVector8(f); } - __forceinline void operator -= (float f) + __forceinline void operator-=(float f) { *this -= GSVector8(f); } - __forceinline void operator *= (float f) + __forceinline void operator*=(float f) { *this *= GSVector8(f); } - __forceinline void operator /= (float f) + __forceinline void operator/=(float f) { *this /= GSVector8(f); } - __forceinline void operator &= (const GSVector8& v) + __forceinline void operator&=(const GSVector8& v) { m = _mm256_and_ps(m, v); } - __forceinline void operator |= (const GSVector8& v) + __forceinline void operator|=(const GSVector8& v) { m = _mm256_or_ps(m, v); } - __forceinline void operator ^= (const GSVector8& v) + __forceinline void operator^=(const GSVector8& v) { m = _mm256_xor_ps(m, v); } - __forceinline friend GSVector8 operator + (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator+(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_add_ps(v1, v2)); } - __forceinline friend GSVector8 operator - (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator-(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_sub_ps(v1, v2)); } - __forceinline friend GSVector8 operator * (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator*(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_mul_ps(v1, v2)); } - __forceinline friend GSVector8 operator / (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator/(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_div_ps(v1, v2)); } - __forceinline friend GSVector8 operator + (const GSVector8& v, float f) + __forceinline friend GSVector8 operator+(const GSVector8& v, float f) { return v + GSVector8(f); } - __forceinline friend GSVector8 operator - (const GSVector8& v, float f) + __forceinline friend GSVector8 operator-(const GSVector8& v, float f) { return v - GSVector8(f); } - __forceinline friend GSVector8 operator * (const GSVector8& v, float f) + __forceinline friend GSVector8 operator*(const GSVector8& v, float f) { return v * GSVector8(f); } - __forceinline friend GSVector8 operator / (const GSVector8& v, float f) + __forceinline friend GSVector8 operator/(const GSVector8& v, float f) { return v / GSVector8(f); } - __forceinline friend GSVector8 operator & (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator&(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_and_ps(v1, v2)); } - __forceinline friend GSVector8 operator | (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator|(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_or_ps(v1, v2)); } - __forceinline friend GSVector8 operator ^ (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator^(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_xor_ps(v1, v2)); } - __forceinline friend GSVector8 operator == (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator==(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_EQ_OQ)); } - __forceinline friend GSVector8 operator != (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator!=(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_NEQ_OQ)); } - __forceinline friend GSVector8 operator > (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator>(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_GT_OQ)); } - __forceinline friend GSVector8 operator < (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator<(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LT_OQ)); } - __forceinline friend GSVector8 operator >= (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator>=(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_GE_OQ)); } - __forceinline friend GSVector8 operator <= (const GSVector8& v1, const GSVector8& v2) + __forceinline friend GSVector8 operator<=(const GSVector8& v1, const GSVector8& v2) { return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LE_OQ)); } @@ -795,11 +807,11 @@ public: #define VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ - __forceinline GSVector8 xs##ys##zs##ws() const {return GSVector8(_mm256_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8 xs##ys##zs##ws(const GSVector8& v) const {return GSVector8(_mm256_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} + __forceinline GSVector8 xs##ys##zs##ws() const { return GSVector8(_mm256_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn))); } \ + __forceinline GSVector8 xs##ys##zs##ws(const GSVector8& v) const { return GSVector8(_mm256_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn))); } // vs2012u3 cannot reuse the result of equivalent shuffles when it is done with _mm256_permute_ps (write v.xxxx() twice, and it will do it twice), but with _mm256_shuffle_ps it can. - //__forceinline GSVector8 xs##ys##zs##ws() const {return GSVector8(_mm256_permute_ps(m, _MM_SHUFFLE(wn, zn, yn, xn)));} + //__forceinline GSVector8 xs##ys##zs##ws() const { return GSVector8(_mm256_permute_ps(m, _MM_SHUFFLE(wn, zn, yn, xn))); } #define VECTOR8_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ VECTOR8_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ @@ -831,8 +843,8 @@ public: // _ = 0 #define VECTOR8_PERMUTE128_2(as, an, bs, bn) \ - __forceinline GSVector8 as##bs() const {return GSVector8(_mm256_permute2f128_ps(m, m, an | (bn << 4)));} \ - __forceinline GSVector8 as##bs(const GSVector8& v) const {return GSVector8(_mm256_permute2f128_ps(m, v.m, an | (bn << 4)));} \ + __forceinline GSVector8 as##bs() const { return GSVector8(_mm256_permute2f128_ps(m, m, an | (bn << 4))); } \ + __forceinline GSVector8 as##bs(const GSVector8& v) const { return GSVector8(_mm256_permute2f128_ps(m, v.m, an | (bn << 4))); } \ #define VECTOR8_PERMUTE128_1(as, an) \ VECTOR8_PERMUTE128_2(as, an, a, 0) \ @@ -847,7 +859,7 @@ public: VECTOR8_PERMUTE128_1(d, 3) VECTOR8_PERMUTE128_1(_, 8) - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 // a = v[63:0] // b = v[127:64] @@ -855,7 +867,7 @@ public: // d = v[255:192] #define VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, ds, dn) \ - __forceinline GSVector8 as##bs##cs##ds() const {return GSVector8(_mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(m), _MM_SHUFFLE(dn, cn, bn, an))));} \ + __forceinline GSVector8 as##bs##cs##ds() const { return GSVector8(_mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(m), _MM_SHUFFLE(dn, cn, bn, an)))); } \ #define VECTOR8_PERMUTE64_3(as, an, bs, bn, cs, cn) \ VECTOR8_PERMUTE64_4(as, an, bs, bn, cs, cn, a, 0) \ @@ -902,7 +914,7 @@ public: // TODO: v.(x0|y0|z0|w0|x1|y1|z1|w1) // broadcast element - #endif +#endif }; #endif diff --git a/plugins/GSdx/GSVector8i.h b/plugins/GSdx/GSVector8i.h index 1daa1e8afd..42e963690c 100644 --- a/plugins/GSdx/GSVector8i.h +++ b/plugins/GSdx/GSVector8i.h @@ -72,8 +72,8 @@ class alignas(32) GSVector8i public: union { - struct {int x0, y0, z0, w0, x1, y1, z1, w1;}; - struct {int r0, g0, b0, a0, r1, g1, b1, a1;}; + struct { int x0, y0, z0, w0, x1, y1, z1, w1; }; + struct { int r0, g0, b0, a0, r1, g1, b1, a1; }; int v[8]; float f32[8]; int8 i8[32]; @@ -119,7 +119,7 @@ public: } constexpr GSVector8i( - char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, + char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15, char b16, char b17, char b18, char b19, char b20, char b21, char b22, char b23, char b24, char b25, char b26, char b27, char b28, char b29, char b30, char b31) @@ -131,15 +131,15 @@ public: __forceinline GSVector8i(__m128i m0, __m128i m1) { - #if 0 // _MSC_VER >= 1700 +#if 0 // _MSC_VER >= 1700 this->m = _mm256_permute2x128_si256(_mm256_castsi128_si256(m0), _mm256_castsi128_si256(m1), 0); - - #else - + +#else + *this = zero().insert<0>(m0).insert<1>(m1); - #endif +#endif } GSVector8i(const GSVector8i& v) = default; @@ -159,22 +159,22 @@ public: { } - __forceinline void operator = (const GSVector8i& v) + __forceinline void operator=(const GSVector8i& v) { m = v.m; } - __forceinline void operator = (int i) + __forceinline void operator=(int i) { m = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(i)); // m = _mm256_set1_epi32(i); } - __forceinline void operator = (__m128i m) + __forceinline void operator=(__m128i m) { this->m = _mm256_inserti128_si256(_mm256_castsi128_si256(m), m, 1); } - __forceinline void operator = (__m256i m) + __forceinline void operator=(__m256i m) { this->m = m; } @@ -215,7 +215,7 @@ public: { return max_i32(a.xyxy()).min_i32(a.zwzw()); } - + __forceinline GSVector8i sat_u8(const GSVector8i& a, const GSVector8i& b) const { return max_u8(a).min_u8(b); @@ -316,7 +316,8 @@ public: return GSVector8i(_mm256_blendv_epi8(m, a, mask)); } - template __forceinline GSVector8i blend16(const GSVector8i& a) const + template + __forceinline GSVector8i blend16(const GSVector8i& a) const { return GSVector8i(_mm256_blend_epi16(m, a, mask)); } @@ -520,79 +521,82 @@ public: // - static __forceinline GSVector8i i8to16c(const void* p) + static __forceinline GSVector8i i8to16c(const void* p) { - return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p))); + return GSVector8i(_mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)p))); } - static __forceinline GSVector8i u8to16c(const void* p) + static __forceinline GSVector8i u8to16c(const void* p) { - return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p))); + return GSVector8i(_mm256_cvtepu8_epi16(_mm_load_si128((__m128i*)p))); } - static __forceinline GSVector8i i8to32c(const void* p) + static __forceinline GSVector8i i8to32c(const void* p) { - return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p))); + return GSVector8i(_mm256_cvtepi8_epi32(_mm_loadl_epi64((__m128i*)p))); } - static __forceinline GSVector8i u8to32c(const void* p) + static __forceinline GSVector8i u8to32c(const void* p) { - return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p))); + return GSVector8i(_mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)p))); } - static __forceinline GSVector8i i8to64c(int i) + static __forceinline GSVector8i i8to64c(int i) { - return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i))); + return GSVector8i(_mm256_cvtepi8_epi64(_mm_cvtsi32_si128(i))); } - static __forceinline GSVector8i u8to64c(int i) + static __forceinline GSVector8i u8to64c(int i) { - return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i))); + return GSVector8i(_mm256_cvtepu8_epi64(_mm_cvtsi32_si128(i))); } - static __forceinline GSVector8i i16to32c(const void* p) + static __forceinline GSVector8i i16to32c(const void* p) { - return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p))); + return GSVector8i(_mm256_cvtepi16_epi32(_mm_load_si128((__m128i*)p))); } - static __forceinline GSVector8i u16to32c(const void* p) + static __forceinline GSVector8i u16to32c(const void* p) { - return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p))); + return GSVector8i(_mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)p))); } - static __forceinline GSVector8i i16to64c(const void* p) + static __forceinline GSVector8i i16to64c(const void* p) { - return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p))); + return GSVector8i(_mm256_cvtepi16_epi64(_mm_loadl_epi64((__m128i*)p))); } - static __forceinline GSVector8i u16to64c(const void* p) + static __forceinline GSVector8i u16to64c(const void* p) { - return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p))); + return GSVector8i(_mm256_cvtepu16_epi64(_mm_loadl_epi64((__m128i*)p))); } - static __forceinline GSVector8i i32to64c(const void* p) + static __forceinline GSVector8i i32to64c(const void* p) { - return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p))); + return GSVector8i(_mm256_cvtepi32_epi64(_mm_load_si128((__m128i*)p))); } - static __forceinline GSVector8i u32to64c(const void* p) + static __forceinline GSVector8i u32to64c(const void* p) { - return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p))); + return GSVector8i(_mm256_cvtepu32_epi64(_mm_load_si128((__m128i*)p))); } // - template __forceinline GSVector8i srl() const + template + __forceinline GSVector8i srl() const { return GSVector8i(_mm256_srli_si256(m, i)); } - template __forceinline GSVector8i srl(const GSVector8i& v) + template + __forceinline GSVector8i srl(const GSVector8i& v) { return GSVector8i(_mm256_alignr_epi8(v.m, m, i)); } - template __forceinline GSVector8i sll() const + template + __forceinline GSVector8i sll() const { return GSVector8i(_mm256_slli_si256(m, i)); //return GSVector8i(_mm256_slli_si128(m, i)); @@ -848,21 +852,24 @@ public: return GSVector8i(_mm256_madd_epi16(m, v.m)); } - template __forceinline GSVector8i lerp16(const GSVector8i& a, const GSVector8i& f) const + template + __forceinline GSVector8i lerp16(const GSVector8i& a, const GSVector8i& f) const { // (a - this) * f << shift + this return add16(a.sub16(*this).modulate16(f)); } - template __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c) + template + __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c) { // (a - b) * c << shift return a.sub16(b).modulate16(c); } - template __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c, const GSVector8i& d) + template + __forceinline static GSVector8i lerp16(const GSVector8i& a, const GSVector8i& b, const GSVector8i& c, const GSVector8i& d) { // (a - b) * c << shift + d @@ -876,11 +883,12 @@ public: return add16(a.sub16(*this).mul16l(f).sra16(4)); } - template __forceinline GSVector8i modulate16(const GSVector8i& f) const + template + __forceinline GSVector8i modulate16(const GSVector8i& f) const { // a * f << shift - - if(shift == 0) + + if (shift == 0) { return mul16hrs(f); } @@ -891,7 +899,7 @@ public: __forceinline bool eq(const GSVector8i& v) const { GSVector8i t = *this ^ v; - + return _mm256_testz_si256(t, t) != 0; } @@ -977,7 +985,8 @@ public: // TODO: extract/insert - template __forceinline int extract8() const + template + __forceinline int extract8() const { ASSERT(i < 32); @@ -986,7 +995,8 @@ public: return v.extract8(); } - template __forceinline int extract16() const + template + __forceinline int extract16() const { ASSERT(i < 16); @@ -995,27 +1005,32 @@ public: return v.extract16(); } - template __forceinline int extract32() const + template + __forceinline int extract32() const { ASSERT(i < 8); GSVector4i v = extract(); - if((i & 3) == 0) return GSVector4i::store(v); + if ((i & 3) == 0) + return GSVector4i::store(v); return v.extract32(); } - template __forceinline GSVector4i extract() const + template + __forceinline GSVector4i extract() const { ASSERT(i < 2); - if(i == 0) return GSVector4i(_mm256_castsi256_si128(m)); + if (i == 0) + return GSVector4i(_mm256_castsi256_si128(m)); return GSVector4i(_mm256_extracti128_si256(m, i)); } - template __forceinline GSVector8i insert(__m128i m) const + template + __forceinline GSVector8i insert(__m128i m) const { ASSERT(i < 2); @@ -1024,7 +1039,8 @@ public: // TODO: gather - template __forceinline GSVector8i gather32_32(const T* ptr) const + template + __forceinline GSVector8i gather32_32(const T* ptr) const { GSVector4i v0; GSVector4i v1; @@ -1060,7 +1076,8 @@ public: return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4)); } - template __forceinline GSVector8i gather32_32(const T1* ptr1, const T2* ptr2) const + template + __forceinline GSVector8i gather32_32(const T1* ptr1, const T2* ptr2) const { GSVector4i v0; GSVector4i v1; @@ -1091,7 +1108,8 @@ public: return gather32_32(ptr1).gather32_32(ptr2); } - template __forceinline void gather32_32(const T* RESTRICT ptr, GSVector8i* RESTRICT dst) const + template + __forceinline void gather32_32(const T* RESTRICT ptr, GSVector8i* RESTRICT dst) const { dst[0] = gather32_32<>(ptr); } @@ -1144,7 +1162,8 @@ public: // return GSVector8i(l).insert<1>(h); } - template __forceinline static GSVector8i load(const void* p) + template + __forceinline static GSVector8i load(const void* p) { return GSVector8i(aligned ? _mm256_load_si256((__m256i*)p) : _mm256_loadu_si256((__m256i*)p)); } @@ -1154,14 +1173,14 @@ public: return cast(GSVector4i::load(i)); } - #ifdef _M_AMD64 +#ifdef _M_AMD64 __forceinline static GSVector8i loadq(int64 i) { return cast(GSVector4i::loadq(i)); } - #endif +#endif __forceinline static void storent(void* p, const GSVector8i& v) { @@ -1184,10 +1203,13 @@ public: GSVector8i::storeh(ph, v); } - template __forceinline static void store(void* p, const GSVector8i& v) + template + __forceinline static void store(void* p, const GSVector8i& v) { - if(aligned) _mm256_store_si256((__m256i*)p, v.m); - else _mm256_storeu_si256((__m256i*)p, v.m); + if (aligned) + _mm256_store_si256((__m256i*)p, v.m); + else + _mm256_storeu_si256((__m256i*)p, v.m); } __forceinline static int store(const GSVector8i& v) @@ -1195,26 +1217,27 @@ public: return GSVector4i::store(GSVector4i::cast(v)); } - #ifdef _M_AMD64 +#ifdef _M_AMD64 __forceinline static int64 storeq(const GSVector8i& v) { return GSVector4i::storeq(GSVector4i::cast(v)); } - #endif +#endif __forceinline static void storent(void* RESTRICT dst, const void* RESTRICT src, size_t size) { const GSVector8i* s = (const GSVector8i*)src; GSVector8i* d = (GSVector8i*)dst; - if(size == 0) return; + if (size == 0) + return; size_t i = 0; size_t j = size >> 7; - for(; i < j; i++, s += 4, d += 4) + for (; i < j; i++, s += 4, d += 4) { storent(&d[0], s[0]); storent(&d[1], s[1]); @@ -1224,7 +1247,8 @@ public: size &= 127; - if(size == 0) return; + if (size == 0) + return; memcpy(d, s, size); } @@ -1348,142 +1372,142 @@ public: d = f.bd(d); } - __forceinline void operator += (const GSVector8i& v) + __forceinline void operator+=(const GSVector8i& v) { m = _mm256_add_epi32(m, v); } - __forceinline void operator -= (const GSVector8i& v) + __forceinline void operator-=(const GSVector8i& v) { m = _mm256_sub_epi32(m, v); } - __forceinline void operator += (int i) + __forceinline void operator+=(int i) { *this += GSVector8i(i); } - __forceinline void operator -= (int i) + __forceinline void operator-=(int i) { *this -= GSVector8i(i); } - __forceinline void operator <<= (const int i) + __forceinline void operator<<=(const int i) { m = _mm256_slli_epi32(m, i); } - __forceinline void operator >>= (const int i) + __forceinline void operator>>=(const int i) { m = _mm256_srli_epi32(m, i); } - __forceinline void operator &= (const GSVector8i& v) + __forceinline void operator&=(const GSVector8i& v) { m = _mm256_and_si256(m, v); } - __forceinline void operator |= (const GSVector8i& v) + __forceinline void operator|=(const GSVector8i& v) { m = _mm256_or_si256(m, v); } - __forceinline void operator ^= (const GSVector8i& v) + __forceinline void operator^=(const GSVector8i& v) { m = _mm256_xor_si256(m, v); } - __forceinline friend GSVector8i operator + (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator+(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_add_epi32(v1, v2)); } - __forceinline friend GSVector8i operator - (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator-(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_sub_epi32(v1, v2)); } - __forceinline friend GSVector8i operator + (const GSVector8i& v, int i) + __forceinline friend GSVector8i operator+(const GSVector8i& v, int i) { return v + GSVector8i(i); } - __forceinline friend GSVector8i operator - (const GSVector8i& v, int i) + __forceinline friend GSVector8i operator-(const GSVector8i& v, int i) { return v - GSVector8i(i); } - __forceinline friend GSVector8i operator << (const GSVector8i& v, const int i) + __forceinline friend GSVector8i operator<<(const GSVector8i& v, const int i) { return GSVector8i(_mm256_slli_epi32(v, i)); } - __forceinline friend GSVector8i operator >> (const GSVector8i& v, const int i) + __forceinline friend GSVector8i operator>>(const GSVector8i& v, const int i) { return GSVector8i(_mm256_srli_epi32(v, i)); } - __forceinline friend GSVector8i operator & (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator&(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_and_si256(v1, v2)); } - __forceinline friend GSVector8i operator | (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator|(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_or_si256(v1, v2)); } - __forceinline friend GSVector8i operator ^ (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator^(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_xor_si256(v1, v2)); } - __forceinline friend GSVector8i operator & (const GSVector8i& v, int i) + __forceinline friend GSVector8i operator&(const GSVector8i& v, int i) { return v & GSVector8i(i); } - __forceinline friend GSVector8i operator | (const GSVector8i& v, int i) + __forceinline friend GSVector8i operator|(const GSVector8i& v, int i) { return v | GSVector8i(i); } - __forceinline friend GSVector8i operator ^ (const GSVector8i& v, int i) + __forceinline friend GSVector8i operator^(const GSVector8i& v, int i) { return v ^ GSVector8i(i); } - __forceinline friend GSVector8i operator ~ (const GSVector8i& v) + __forceinline friend GSVector8i operator~(const GSVector8i& v) { return v ^ (v == v); } - __forceinline friend GSVector8i operator == (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator==(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_cmpeq_epi32(v1, v2)); } - __forceinline friend GSVector8i operator != (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator!=(const GSVector8i& v1, const GSVector8i& v2) { return ~(v1 == v2); } - __forceinline friend GSVector8i operator > (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator>(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_cmpgt_epi32(v1, v2)); } - __forceinline friend GSVector8i operator < (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator<(const GSVector8i& v1, const GSVector8i& v2) { return GSVector8i(_mm256_cmpgt_epi32(v2, v1)); } - __forceinline friend GSVector8i operator >= (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator>=(const GSVector8i& v1, const GSVector8i& v2) { return (v1 > v2) | (v1 == v2); } - __forceinline friend GSVector8i operator <= (const GSVector8i& v1, const GSVector8i& v2) + __forceinline friend GSVector8i operator<=(const GSVector8i& v1, const GSVector8i& v2) { return (v1 < v2) | (v1 == v2); } @@ -1494,10 +1518,10 @@ public: // w = v[127:96] / v[255:224] #define VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ - __forceinline GSVector8i xs##ys##zs##ws() const {return GSVector8i(_mm256_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8i xs##ys##zs##ws##l() const {return GSVector8i(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8i xs##ys##zs##ws##h() const {return GSVector8i(_mm256_shufflehi_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \ - __forceinline GSVector8i xs##ys##zs##ws##lh() const {return GSVector8i(_mm256_shufflehi_epi16(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)), _MM_SHUFFLE(wn, zn, yn, xn)));} \ + __forceinline GSVector8i xs##ys##zs##ws() const { return GSVector8i(_mm256_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn))); } \ + __forceinline GSVector8i xs##ys##zs##ws##l() const { return GSVector8i(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn))); } \ + __forceinline GSVector8i xs##ys##zs##ws##h() const { return GSVector8i(_mm256_shufflehi_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn))); } \ + __forceinline GSVector8i xs##ys##zs##ws##lh() const { return GSVector8i(_mm256_shufflehi_epi16(_mm256_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)), _MM_SHUFFLE(wn, zn, yn, xn))); } \ #define VECTOR8i_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ VECTOR8i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ @@ -1529,8 +1553,8 @@ public: // _ = 0 #define VECTOR8i_PERMUTE128_2(as, an, bs, bn) \ - __forceinline GSVector8i as##bs() const {return GSVector8i(_mm256_permute2x128_si256(m, m, an | (bn << 4)));} \ - __forceinline GSVector8i as##bs(const GSVector8i& v) const {return GSVector8i(_mm256_permute2x128_si256(m, v.m, an | (bn << 4)));} \ + __forceinline GSVector8i as##bs() const { return GSVector8i(_mm256_permute2x128_si256(m, m, an | (bn << 4))); } \ + __forceinline GSVector8i as##bs(const GSVector8i& v) const { return GSVector8i(_mm256_permute2x128_si256(m, v.m, an | (bn << 4))); } \ #define VECTOR8i_PERMUTE128_1(as, an) \ VECTOR8i_PERMUTE128_2(as, an, a, 0) \ @@ -1551,7 +1575,7 @@ public: // d = v[255:192] #define VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, ds, dn) \ - __forceinline GSVector8i as##bs##cs##ds() const {return GSVector8i(_mm256_permute4x64_epi64(m, _MM_SHUFFLE(dn, cn, bn, an)));} \ + __forceinline GSVector8i as##bs##cs##ds() const { return GSVector8i(_mm256_permute4x64_epi64(m, _MM_SHUFFLE(dn, cn, bn, an))); } \ #define VECTOR8i_PERMUTE64_3(as, an, bs, bn, cs, cn) \ VECTOR8i_PERMUTE64_4(as, an, bs, bn, cs, cn, a, 0) \ @@ -1626,9 +1650,9 @@ public: // this one only has m128 source op, it will be saved to a temp on stack if the compiler is not smart enough and use the address of v directly (<= vs2012u3rc2) return GSVector8i(_mm256_broadcastsi128_si256(v)); // fastest - //return GSVector8i(v); // almost as fast as broadcast - //return cast(v).insert<1>(v); // slow - //return cast(v).aa(); // slowest + // return GSVector8i(v); // almost as fast as broadcast + // return cast(v).insert<1>(v); // slow + // return cast(v).aa(); // slowest } __forceinline static GSVector8i broadcast8(const void* p) @@ -1656,206 +1680,206 @@ public: return GSVector8i(_mm256_broadcastsi128_si256(*(const __m128i*)p)); } - __forceinline static GSVector8i zero() {return GSVector8i(_mm256_setzero_si256());} + __forceinline static GSVector8i zero() { return GSVector8i(_mm256_setzero_si256()); } - __forceinline static GSVector8i xffffffff() {return zero() == zero();} + __forceinline static GSVector8i xffffffff() { return zero() == zero(); } - __forceinline static GSVector8i x00000001() {return xffffffff().srl32(31);} - __forceinline static GSVector8i x00000003() {return xffffffff().srl32(30);} - __forceinline static GSVector8i x00000007() {return xffffffff().srl32(29);} - __forceinline static GSVector8i x0000000f() {return xffffffff().srl32(28);} - __forceinline static GSVector8i x0000001f() {return xffffffff().srl32(27);} - __forceinline static GSVector8i x0000003f() {return xffffffff().srl32(26);} - __forceinline static GSVector8i x0000007f() {return xffffffff().srl32(25);} - __forceinline static GSVector8i x000000ff() {return xffffffff().srl32(24);} - __forceinline static GSVector8i x000001ff() {return xffffffff().srl32(23);} - __forceinline static GSVector8i x000003ff() {return xffffffff().srl32(22);} - __forceinline static GSVector8i x000007ff() {return xffffffff().srl32(21);} - __forceinline static GSVector8i x00000fff() {return xffffffff().srl32(20);} - __forceinline static GSVector8i x00001fff() {return xffffffff().srl32(19);} - __forceinline static GSVector8i x00003fff() {return xffffffff().srl32(18);} - __forceinline static GSVector8i x00007fff() {return xffffffff().srl32(17);} - __forceinline static GSVector8i x0000ffff() {return xffffffff().srl32(16);} - __forceinline static GSVector8i x0001ffff() {return xffffffff().srl32(15);} - __forceinline static GSVector8i x0003ffff() {return xffffffff().srl32(14);} - __forceinline static GSVector8i x0007ffff() {return xffffffff().srl32(13);} - __forceinline static GSVector8i x000fffff() {return xffffffff().srl32(12);} - __forceinline static GSVector8i x001fffff() {return xffffffff().srl32(11);} - __forceinline static GSVector8i x003fffff() {return xffffffff().srl32(10);} - __forceinline static GSVector8i x007fffff() {return xffffffff().srl32( 9);} - __forceinline static GSVector8i x00ffffff() {return xffffffff().srl32( 8);} - __forceinline static GSVector8i x01ffffff() {return xffffffff().srl32( 7);} - __forceinline static GSVector8i x03ffffff() {return xffffffff().srl32( 6);} - __forceinline static GSVector8i x07ffffff() {return xffffffff().srl32( 5);} - __forceinline static GSVector8i x0fffffff() {return xffffffff().srl32( 4);} - __forceinline static GSVector8i x1fffffff() {return xffffffff().srl32( 3);} - __forceinline static GSVector8i x3fffffff() {return xffffffff().srl32( 2);} - __forceinline static GSVector8i x7fffffff() {return xffffffff().srl32( 1);} + __forceinline static GSVector8i x00000001() { return xffffffff().srl32(31); } + __forceinline static GSVector8i x00000003() { return xffffffff().srl32(30); } + __forceinline static GSVector8i x00000007() { return xffffffff().srl32(29); } + __forceinline static GSVector8i x0000000f() { return xffffffff().srl32(28); } + __forceinline static GSVector8i x0000001f() { return xffffffff().srl32(27); } + __forceinline static GSVector8i x0000003f() { return xffffffff().srl32(26); } + __forceinline static GSVector8i x0000007f() { return xffffffff().srl32(25); } + __forceinline static GSVector8i x000000ff() { return xffffffff().srl32(24); } + __forceinline static GSVector8i x000001ff() { return xffffffff().srl32(23); } + __forceinline static GSVector8i x000003ff() { return xffffffff().srl32(22); } + __forceinline static GSVector8i x000007ff() { return xffffffff().srl32(21); } + __forceinline static GSVector8i x00000fff() { return xffffffff().srl32(20); } + __forceinline static GSVector8i x00001fff() { return xffffffff().srl32(19); } + __forceinline static GSVector8i x00003fff() { return xffffffff().srl32(18); } + __forceinline static GSVector8i x00007fff() { return xffffffff().srl32(17); } + __forceinline static GSVector8i x0000ffff() { return xffffffff().srl32(16); } + __forceinline static GSVector8i x0001ffff() { return xffffffff().srl32(15); } + __forceinline static GSVector8i x0003ffff() { return xffffffff().srl32(14); } + __forceinline static GSVector8i x0007ffff() { return xffffffff().srl32(13); } + __forceinline static GSVector8i x000fffff() { return xffffffff().srl32(12); } + __forceinline static GSVector8i x001fffff() { return xffffffff().srl32(11); } + __forceinline static GSVector8i x003fffff() { return xffffffff().srl32(10); } + __forceinline static GSVector8i x007fffff() { return xffffffff().srl32( 9); } + __forceinline static GSVector8i x00ffffff() { return xffffffff().srl32( 8); } + __forceinline static GSVector8i x01ffffff() { return xffffffff().srl32( 7); } + __forceinline static GSVector8i x03ffffff() { return xffffffff().srl32( 6); } + __forceinline static GSVector8i x07ffffff() { return xffffffff().srl32( 5); } + __forceinline static GSVector8i x0fffffff() { return xffffffff().srl32( 4); } + __forceinline static GSVector8i x1fffffff() { return xffffffff().srl32( 3); } + __forceinline static GSVector8i x3fffffff() { return xffffffff().srl32( 2); } + __forceinline static GSVector8i x7fffffff() { return xffffffff().srl32( 1); } - __forceinline static GSVector8i x80000000() {return xffffffff().sll32(31);} - __forceinline static GSVector8i xc0000000() {return xffffffff().sll32(30);} - __forceinline static GSVector8i xe0000000() {return xffffffff().sll32(29);} - __forceinline static GSVector8i xf0000000() {return xffffffff().sll32(28);} - __forceinline static GSVector8i xf8000000() {return xffffffff().sll32(27);} - __forceinline static GSVector8i xfc000000() {return xffffffff().sll32(26);} - __forceinline static GSVector8i xfe000000() {return xffffffff().sll32(25);} - __forceinline static GSVector8i xff000000() {return xffffffff().sll32(24);} - __forceinline static GSVector8i xff800000() {return xffffffff().sll32(23);} - __forceinline static GSVector8i xffc00000() {return xffffffff().sll32(22);} - __forceinline static GSVector8i xffe00000() {return xffffffff().sll32(21);} - __forceinline static GSVector8i xfff00000() {return xffffffff().sll32(20);} - __forceinline static GSVector8i xfff80000() {return xffffffff().sll32(19);} - __forceinline static GSVector8i xfffc0000() {return xffffffff().sll32(18);} - __forceinline static GSVector8i xfffe0000() {return xffffffff().sll32(17);} - __forceinline static GSVector8i xffff0000() {return xffffffff().sll32(16);} - __forceinline static GSVector8i xffff8000() {return xffffffff().sll32(15);} - __forceinline static GSVector8i xffffc000() {return xffffffff().sll32(14);} - __forceinline static GSVector8i xffffe000() {return xffffffff().sll32(13);} - __forceinline static GSVector8i xfffff000() {return xffffffff().sll32(12);} - __forceinline static GSVector8i xfffff800() {return xffffffff().sll32(11);} - __forceinline static GSVector8i xfffffc00() {return xffffffff().sll32(10);} - __forceinline static GSVector8i xfffffe00() {return xffffffff().sll32( 9);} - __forceinline static GSVector8i xffffff00() {return xffffffff().sll32( 8);} - __forceinline static GSVector8i xffffff80() {return xffffffff().sll32( 7);} - __forceinline static GSVector8i xffffffc0() {return xffffffff().sll32( 6);} - __forceinline static GSVector8i xffffffe0() {return xffffffff().sll32( 5);} - __forceinline static GSVector8i xfffffff0() {return xffffffff().sll32( 4);} - __forceinline static GSVector8i xfffffff8() {return xffffffff().sll32( 3);} - __forceinline static GSVector8i xfffffffc() {return xffffffff().sll32( 2);} - __forceinline static GSVector8i xfffffffe() {return xffffffff().sll32( 1);} + __forceinline static GSVector8i x80000000() { return xffffffff().sll32(31); } + __forceinline static GSVector8i xc0000000() { return xffffffff().sll32(30); } + __forceinline static GSVector8i xe0000000() { return xffffffff().sll32(29); } + __forceinline static GSVector8i xf0000000() { return xffffffff().sll32(28); } + __forceinline static GSVector8i xf8000000() { return xffffffff().sll32(27); } + __forceinline static GSVector8i xfc000000() { return xffffffff().sll32(26); } + __forceinline static GSVector8i xfe000000() { return xffffffff().sll32(25); } + __forceinline static GSVector8i xff000000() { return xffffffff().sll32(24); } + __forceinline static GSVector8i xff800000() { return xffffffff().sll32(23); } + __forceinline static GSVector8i xffc00000() { return xffffffff().sll32(22); } + __forceinline static GSVector8i xffe00000() { return xffffffff().sll32(21); } + __forceinline static GSVector8i xfff00000() { return xffffffff().sll32(20); } + __forceinline static GSVector8i xfff80000() { return xffffffff().sll32(19); } + __forceinline static GSVector8i xfffc0000() { return xffffffff().sll32(18); } + __forceinline static GSVector8i xfffe0000() { return xffffffff().sll32(17); } + __forceinline static GSVector8i xffff0000() { return xffffffff().sll32(16); } + __forceinline static GSVector8i xffff8000() { return xffffffff().sll32(15); } + __forceinline static GSVector8i xffffc000() { return xffffffff().sll32(14); } + __forceinline static GSVector8i xffffe000() { return xffffffff().sll32(13); } + __forceinline static GSVector8i xfffff000() { return xffffffff().sll32(12); } + __forceinline static GSVector8i xfffff800() { return xffffffff().sll32(11); } + __forceinline static GSVector8i xfffffc00() { return xffffffff().sll32(10); } + __forceinline static GSVector8i xfffffe00() { return xffffffff().sll32( 9); } + __forceinline static GSVector8i xffffff00() { return xffffffff().sll32( 8); } + __forceinline static GSVector8i xffffff80() { return xffffffff().sll32( 7); } + __forceinline static GSVector8i xffffffc0() { return xffffffff().sll32( 6); } + __forceinline static GSVector8i xffffffe0() { return xffffffff().sll32( 5); } + __forceinline static GSVector8i xfffffff0() { return xffffffff().sll32( 4); } + __forceinline static GSVector8i xfffffff8() { return xffffffff().sll32( 3); } + __forceinline static GSVector8i xfffffffc() { return xffffffff().sll32( 2); } + __forceinline static GSVector8i xfffffffe() { return xffffffff().sll32( 1); } - __forceinline static GSVector8i x0001() {return xffffffff().srl16(15);} - __forceinline static GSVector8i x0003() {return xffffffff().srl16(14);} - __forceinline static GSVector8i x0007() {return xffffffff().srl16(13);} - __forceinline static GSVector8i x000f() {return xffffffff().srl16(12);} - __forceinline static GSVector8i x001f() {return xffffffff().srl16(11);} - __forceinline static GSVector8i x003f() {return xffffffff().srl16(10);} - __forceinline static GSVector8i x007f() {return xffffffff().srl16( 9);} - __forceinline static GSVector8i x00ff() {return xffffffff().srl16( 8);} - __forceinline static GSVector8i x01ff() {return xffffffff().srl16( 7);} - __forceinline static GSVector8i x03ff() {return xffffffff().srl16( 6);} - __forceinline static GSVector8i x07ff() {return xffffffff().srl16( 5);} - __forceinline static GSVector8i x0fff() {return xffffffff().srl16( 4);} - __forceinline static GSVector8i x1fff() {return xffffffff().srl16( 3);} - __forceinline static GSVector8i x3fff() {return xffffffff().srl16( 2);} - __forceinline static GSVector8i x7fff() {return xffffffff().srl16( 1);} + __forceinline static GSVector8i x0001() { return xffffffff().srl16(15); } + __forceinline static GSVector8i x0003() { return xffffffff().srl16(14); } + __forceinline static GSVector8i x0007() { return xffffffff().srl16(13); } + __forceinline static GSVector8i x000f() { return xffffffff().srl16(12); } + __forceinline static GSVector8i x001f() { return xffffffff().srl16(11); } + __forceinline static GSVector8i x003f() { return xffffffff().srl16(10); } + __forceinline static GSVector8i x007f() { return xffffffff().srl16( 9); } + __forceinline static GSVector8i x00ff() { return xffffffff().srl16( 8); } + __forceinline static GSVector8i x01ff() { return xffffffff().srl16( 7); } + __forceinline static GSVector8i x03ff() { return xffffffff().srl16( 6); } + __forceinline static GSVector8i x07ff() { return xffffffff().srl16( 5); } + __forceinline static GSVector8i x0fff() { return xffffffff().srl16( 4); } + __forceinline static GSVector8i x1fff() { return xffffffff().srl16( 3); } + __forceinline static GSVector8i x3fff() { return xffffffff().srl16( 2); } + __forceinline static GSVector8i x7fff() { return xffffffff().srl16( 1); } - __forceinline static GSVector8i x8000() {return xffffffff().sll16(15);} - __forceinline static GSVector8i xc000() {return xffffffff().sll16(14);} - __forceinline static GSVector8i xe000() {return xffffffff().sll16(13);} - __forceinline static GSVector8i xf000() {return xffffffff().sll16(12);} - __forceinline static GSVector8i xf800() {return xffffffff().sll16(11);} - __forceinline static GSVector8i xfc00() {return xffffffff().sll16(10);} - __forceinline static GSVector8i xfe00() {return xffffffff().sll16( 9);} - __forceinline static GSVector8i xff00() {return xffffffff().sll16( 8);} - __forceinline static GSVector8i xff80() {return xffffffff().sll16( 7);} - __forceinline static GSVector8i xffc0() {return xffffffff().sll16( 6);} - __forceinline static GSVector8i xffe0() {return xffffffff().sll16( 5);} - __forceinline static GSVector8i xfff0() {return xffffffff().sll16( 4);} - __forceinline static GSVector8i xfff8() {return xffffffff().sll16( 3);} - __forceinline static GSVector8i xfffc() {return xffffffff().sll16( 2);} - __forceinline static GSVector8i xfffe() {return xffffffff().sll16( 1);} + __forceinline static GSVector8i x8000() { return xffffffff().sll16(15); } + __forceinline static GSVector8i xc000() { return xffffffff().sll16(14); } + __forceinline static GSVector8i xe000() { return xffffffff().sll16(13); } + __forceinline static GSVector8i xf000() { return xffffffff().sll16(12); } + __forceinline static GSVector8i xf800() { return xffffffff().sll16(11); } + __forceinline static GSVector8i xfc00() { return xffffffff().sll16(10); } + __forceinline static GSVector8i xfe00() { return xffffffff().sll16( 9); } + __forceinline static GSVector8i xff00() { return xffffffff().sll16( 8); } + __forceinline static GSVector8i xff80() { return xffffffff().sll16( 7); } + __forceinline static GSVector8i xffc0() { return xffffffff().sll16( 6); } + __forceinline static GSVector8i xffe0() { return xffffffff().sll16( 5); } + __forceinline static GSVector8i xfff0() { return xffffffff().sll16( 4); } + __forceinline static GSVector8i xfff8() { return xffffffff().sll16( 3); } + __forceinline static GSVector8i xfffc() { return xffffffff().sll16( 2); } + __forceinline static GSVector8i xfffe() { return xffffffff().sll16( 1); } - __forceinline static GSVector8i xffffffff(const GSVector8i& v) {return v == v;} + __forceinline static GSVector8i xffffffff(const GSVector8i& v) { return v == v; } - __forceinline static GSVector8i x00000001(const GSVector8i& v) {return xffffffff(v).srl32(31);} - __forceinline static GSVector8i x00000003(const GSVector8i& v) {return xffffffff(v).srl32(30);} - __forceinline static GSVector8i x00000007(const GSVector8i& v) {return xffffffff(v).srl32(29);} - __forceinline static GSVector8i x0000000f(const GSVector8i& v) {return xffffffff(v).srl32(28);} - __forceinline static GSVector8i x0000001f(const GSVector8i& v) {return xffffffff(v).srl32(27);} - __forceinline static GSVector8i x0000003f(const GSVector8i& v) {return xffffffff(v).srl32(26);} - __forceinline static GSVector8i x0000007f(const GSVector8i& v) {return xffffffff(v).srl32(25);} - __forceinline static GSVector8i x000000ff(const GSVector8i& v) {return xffffffff(v).srl32(24);} - __forceinline static GSVector8i x000001ff(const GSVector8i& v) {return xffffffff(v).srl32(23);} - __forceinline static GSVector8i x000003ff(const GSVector8i& v) {return xffffffff(v).srl32(22);} - __forceinline static GSVector8i x000007ff(const GSVector8i& v) {return xffffffff(v).srl32(21);} - __forceinline static GSVector8i x00000fff(const GSVector8i& v) {return xffffffff(v).srl32(20);} - __forceinline static GSVector8i x00001fff(const GSVector8i& v) {return xffffffff(v).srl32(19);} - __forceinline static GSVector8i x00003fff(const GSVector8i& v) {return xffffffff(v).srl32(18);} - __forceinline static GSVector8i x00007fff(const GSVector8i& v) {return xffffffff(v).srl32(17);} - __forceinline static GSVector8i x0000ffff(const GSVector8i& v) {return xffffffff(v).srl32(16);} - __forceinline static GSVector8i x0001ffff(const GSVector8i& v) {return xffffffff(v).srl32(15);} - __forceinline static GSVector8i x0003ffff(const GSVector8i& v) {return xffffffff(v).srl32(14);} - __forceinline static GSVector8i x0007ffff(const GSVector8i& v) {return xffffffff(v).srl32(13);} - __forceinline static GSVector8i x000fffff(const GSVector8i& v) {return xffffffff(v).srl32(12);} - __forceinline static GSVector8i x001fffff(const GSVector8i& v) {return xffffffff(v).srl32(11);} - __forceinline static GSVector8i x003fffff(const GSVector8i& v) {return xffffffff(v).srl32(10);} - __forceinline static GSVector8i x007fffff(const GSVector8i& v) {return xffffffff(v).srl32( 9);} - __forceinline static GSVector8i x00ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 8);} - __forceinline static GSVector8i x01ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 7);} - __forceinline static GSVector8i x03ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 6);} - __forceinline static GSVector8i x07ffffff(const GSVector8i& v) {return xffffffff(v).srl32( 5);} - __forceinline static GSVector8i x0fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 4);} - __forceinline static GSVector8i x1fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 3);} - __forceinline static GSVector8i x3fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 2);} - __forceinline static GSVector8i x7fffffff(const GSVector8i& v) {return xffffffff(v).srl32( 1);} + __forceinline static GSVector8i x00000001(const GSVector8i& v) { return xffffffff(v).srl32(31); } + __forceinline static GSVector8i x00000003(const GSVector8i& v) { return xffffffff(v).srl32(30); } + __forceinline static GSVector8i x00000007(const GSVector8i& v) { return xffffffff(v).srl32(29); } + __forceinline static GSVector8i x0000000f(const GSVector8i& v) { return xffffffff(v).srl32(28); } + __forceinline static GSVector8i x0000001f(const GSVector8i& v) { return xffffffff(v).srl32(27); } + __forceinline static GSVector8i x0000003f(const GSVector8i& v) { return xffffffff(v).srl32(26); } + __forceinline static GSVector8i x0000007f(const GSVector8i& v) { return xffffffff(v).srl32(25); } + __forceinline static GSVector8i x000000ff(const GSVector8i& v) { return xffffffff(v).srl32(24); } + __forceinline static GSVector8i x000001ff(const GSVector8i& v) { return xffffffff(v).srl32(23); } + __forceinline static GSVector8i x000003ff(const GSVector8i& v) { return xffffffff(v).srl32(22); } + __forceinline static GSVector8i x000007ff(const GSVector8i& v) { return xffffffff(v).srl32(21); } + __forceinline static GSVector8i x00000fff(const GSVector8i& v) { return xffffffff(v).srl32(20); } + __forceinline static GSVector8i x00001fff(const GSVector8i& v) { return xffffffff(v).srl32(19); } + __forceinline static GSVector8i x00003fff(const GSVector8i& v) { return xffffffff(v).srl32(18); } + __forceinline static GSVector8i x00007fff(const GSVector8i& v) { return xffffffff(v).srl32(17); } + __forceinline static GSVector8i x0000ffff(const GSVector8i& v) { return xffffffff(v).srl32(16); } + __forceinline static GSVector8i x0001ffff(const GSVector8i& v) { return xffffffff(v).srl32(15); } + __forceinline static GSVector8i x0003ffff(const GSVector8i& v) { return xffffffff(v).srl32(14); } + __forceinline static GSVector8i x0007ffff(const GSVector8i& v) { return xffffffff(v).srl32(13); } + __forceinline static GSVector8i x000fffff(const GSVector8i& v) { return xffffffff(v).srl32(12); } + __forceinline static GSVector8i x001fffff(const GSVector8i& v) { return xffffffff(v).srl32(11); } + __forceinline static GSVector8i x003fffff(const GSVector8i& v) { return xffffffff(v).srl32(10); } + __forceinline static GSVector8i x007fffff(const GSVector8i& v) { return xffffffff(v).srl32( 9); } + __forceinline static GSVector8i x00ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 8); } + __forceinline static GSVector8i x01ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 7); } + __forceinline static GSVector8i x03ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 6); } + __forceinline static GSVector8i x07ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 5); } + __forceinline static GSVector8i x0fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 4); } + __forceinline static GSVector8i x1fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 3); } + __forceinline static GSVector8i x3fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 2); } + __forceinline static GSVector8i x7fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 1); } - __forceinline static GSVector8i x80000000(const GSVector8i& v) {return xffffffff(v).sll32(31);} - __forceinline static GSVector8i xc0000000(const GSVector8i& v) {return xffffffff(v).sll32(30);} - __forceinline static GSVector8i xe0000000(const GSVector8i& v) {return xffffffff(v).sll32(29);} - __forceinline static GSVector8i xf0000000(const GSVector8i& v) {return xffffffff(v).sll32(28);} - __forceinline static GSVector8i xf8000000(const GSVector8i& v) {return xffffffff(v).sll32(27);} - __forceinline static GSVector8i xfc000000(const GSVector8i& v) {return xffffffff(v).sll32(26);} - __forceinline static GSVector8i xfe000000(const GSVector8i& v) {return xffffffff(v).sll32(25);} - __forceinline static GSVector8i xff000000(const GSVector8i& v) {return xffffffff(v).sll32(24);} - __forceinline static GSVector8i xff800000(const GSVector8i& v) {return xffffffff(v).sll32(23);} - __forceinline static GSVector8i xffc00000(const GSVector8i& v) {return xffffffff(v).sll32(22);} - __forceinline static GSVector8i xffe00000(const GSVector8i& v) {return xffffffff(v).sll32(21);} - __forceinline static GSVector8i xfff00000(const GSVector8i& v) {return xffffffff(v).sll32(20);} - __forceinline static GSVector8i xfff80000(const GSVector8i& v) {return xffffffff(v).sll32(19);} - __forceinline static GSVector8i xfffc0000(const GSVector8i& v) {return xffffffff(v).sll32(18);} - __forceinline static GSVector8i xfffe0000(const GSVector8i& v) {return xffffffff(v).sll32(17);} - __forceinline static GSVector8i xffff0000(const GSVector8i& v) {return xffffffff(v).sll32(16);} - __forceinline static GSVector8i xffff8000(const GSVector8i& v) {return xffffffff(v).sll32(15);} - __forceinline static GSVector8i xffffc000(const GSVector8i& v) {return xffffffff(v).sll32(14);} - __forceinline static GSVector8i xffffe000(const GSVector8i& v) {return xffffffff(v).sll32(13);} - __forceinline static GSVector8i xfffff000(const GSVector8i& v) {return xffffffff(v).sll32(12);} - __forceinline static GSVector8i xfffff800(const GSVector8i& v) {return xffffffff(v).sll32(11);} - __forceinline static GSVector8i xfffffc00(const GSVector8i& v) {return xffffffff(v).sll32(10);} - __forceinline static GSVector8i xfffffe00(const GSVector8i& v) {return xffffffff(v).sll32( 9);} - __forceinline static GSVector8i xffffff00(const GSVector8i& v) {return xffffffff(v).sll32( 8);} - __forceinline static GSVector8i xffffff80(const GSVector8i& v) {return xffffffff(v).sll32( 7);} - __forceinline static GSVector8i xffffffc0(const GSVector8i& v) {return xffffffff(v).sll32( 6);} - __forceinline static GSVector8i xffffffe0(const GSVector8i& v) {return xffffffff(v).sll32( 5);} - __forceinline static GSVector8i xfffffff0(const GSVector8i& v) {return xffffffff(v).sll32( 4);} - __forceinline static GSVector8i xfffffff8(const GSVector8i& v) {return xffffffff(v).sll32( 3);} - __forceinline static GSVector8i xfffffffc(const GSVector8i& v) {return xffffffff(v).sll32( 2);} - __forceinline static GSVector8i xfffffffe(const GSVector8i& v) {return xffffffff(v).sll32( 1);} + __forceinline static GSVector8i x80000000(const GSVector8i& v) { return xffffffff(v).sll32(31); } + __forceinline static GSVector8i xc0000000(const GSVector8i& v) { return xffffffff(v).sll32(30); } + __forceinline static GSVector8i xe0000000(const GSVector8i& v) { return xffffffff(v).sll32(29); } + __forceinline static GSVector8i xf0000000(const GSVector8i& v) { return xffffffff(v).sll32(28); } + __forceinline static GSVector8i xf8000000(const GSVector8i& v) { return xffffffff(v).sll32(27); } + __forceinline static GSVector8i xfc000000(const GSVector8i& v) { return xffffffff(v).sll32(26); } + __forceinline static GSVector8i xfe000000(const GSVector8i& v) { return xffffffff(v).sll32(25); } + __forceinline static GSVector8i xff000000(const GSVector8i& v) { return xffffffff(v).sll32(24); } + __forceinline static GSVector8i xff800000(const GSVector8i& v) { return xffffffff(v).sll32(23); } + __forceinline static GSVector8i xffc00000(const GSVector8i& v) { return xffffffff(v).sll32(22); } + __forceinline static GSVector8i xffe00000(const GSVector8i& v) { return xffffffff(v).sll32(21); } + __forceinline static GSVector8i xfff00000(const GSVector8i& v) { return xffffffff(v).sll32(20); } + __forceinline static GSVector8i xfff80000(const GSVector8i& v) { return xffffffff(v).sll32(19); } + __forceinline static GSVector8i xfffc0000(const GSVector8i& v) { return xffffffff(v).sll32(18); } + __forceinline static GSVector8i xfffe0000(const GSVector8i& v) { return xffffffff(v).sll32(17); } + __forceinline static GSVector8i xffff0000(const GSVector8i& v) { return xffffffff(v).sll32(16); } + __forceinline static GSVector8i xffff8000(const GSVector8i& v) { return xffffffff(v).sll32(15); } + __forceinline static GSVector8i xffffc000(const GSVector8i& v) { return xffffffff(v).sll32(14); } + __forceinline static GSVector8i xffffe000(const GSVector8i& v) { return xffffffff(v).sll32(13); } + __forceinline static GSVector8i xfffff000(const GSVector8i& v) { return xffffffff(v).sll32(12); } + __forceinline static GSVector8i xfffff800(const GSVector8i& v) { return xffffffff(v).sll32(11); } + __forceinline static GSVector8i xfffffc00(const GSVector8i& v) { return xffffffff(v).sll32(10); } + __forceinline static GSVector8i xfffffe00(const GSVector8i& v) { return xffffffff(v).sll32( 9); } + __forceinline static GSVector8i xffffff00(const GSVector8i& v) { return xffffffff(v).sll32( 8); } + __forceinline static GSVector8i xffffff80(const GSVector8i& v) { return xffffffff(v).sll32( 7); } + __forceinline static GSVector8i xffffffc0(const GSVector8i& v) { return xffffffff(v).sll32( 6); } + __forceinline static GSVector8i xffffffe0(const GSVector8i& v) { return xffffffff(v).sll32( 5); } + __forceinline static GSVector8i xfffffff0(const GSVector8i& v) { return xffffffff(v).sll32( 4); } + __forceinline static GSVector8i xfffffff8(const GSVector8i& v) { return xffffffff(v).sll32( 3); } + __forceinline static GSVector8i xfffffffc(const GSVector8i& v) { return xffffffff(v).sll32( 2); } + __forceinline static GSVector8i xfffffffe(const GSVector8i& v) { return xffffffff(v).sll32( 1); } - __forceinline static GSVector8i x0001(const GSVector8i& v) {return xffffffff(v).srl16(15);} - __forceinline static GSVector8i x0003(const GSVector8i& v) {return xffffffff(v).srl16(14);} - __forceinline static GSVector8i x0007(const GSVector8i& v) {return xffffffff(v).srl16(13);} - __forceinline static GSVector8i x000f(const GSVector8i& v) {return xffffffff(v).srl16(12);} - __forceinline static GSVector8i x001f(const GSVector8i& v) {return xffffffff(v).srl16(11);} - __forceinline static GSVector8i x003f(const GSVector8i& v) {return xffffffff(v).srl16(10);} - __forceinline static GSVector8i x007f(const GSVector8i& v) {return xffffffff(v).srl16( 9);} - __forceinline static GSVector8i x00ff(const GSVector8i& v) {return xffffffff(v).srl16( 8);} - __forceinline static GSVector8i x01ff(const GSVector8i& v) {return xffffffff(v).srl16( 7);} - __forceinline static GSVector8i x03ff(const GSVector8i& v) {return xffffffff(v).srl16( 6);} - __forceinline static GSVector8i x07ff(const GSVector8i& v) {return xffffffff(v).srl16( 5);} - __forceinline static GSVector8i x0fff(const GSVector8i& v) {return xffffffff(v).srl16( 4);} - __forceinline static GSVector8i x1fff(const GSVector8i& v) {return xffffffff(v).srl16( 3);} - __forceinline static GSVector8i x3fff(const GSVector8i& v) {return xffffffff(v).srl16( 2);} - __forceinline static GSVector8i x7fff(const GSVector8i& v) {return xffffffff(v).srl16( 1);} + __forceinline static GSVector8i x0001(const GSVector8i& v) { return xffffffff(v).srl16(15); } + __forceinline static GSVector8i x0003(const GSVector8i& v) { return xffffffff(v).srl16(14); } + __forceinline static GSVector8i x0007(const GSVector8i& v) { return xffffffff(v).srl16(13); } + __forceinline static GSVector8i x000f(const GSVector8i& v) { return xffffffff(v).srl16(12); } + __forceinline static GSVector8i x001f(const GSVector8i& v) { return xffffffff(v).srl16(11); } + __forceinline static GSVector8i x003f(const GSVector8i& v) { return xffffffff(v).srl16(10); } + __forceinline static GSVector8i x007f(const GSVector8i& v) { return xffffffff(v).srl16( 9); } + __forceinline static GSVector8i x00ff(const GSVector8i& v) { return xffffffff(v).srl16( 8); } + __forceinline static GSVector8i x01ff(const GSVector8i& v) { return xffffffff(v).srl16( 7); } + __forceinline static GSVector8i x03ff(const GSVector8i& v) { return xffffffff(v).srl16( 6); } + __forceinline static GSVector8i x07ff(const GSVector8i& v) { return xffffffff(v).srl16( 5); } + __forceinline static GSVector8i x0fff(const GSVector8i& v) { return xffffffff(v).srl16( 4); } + __forceinline static GSVector8i x1fff(const GSVector8i& v) { return xffffffff(v).srl16( 3); } + __forceinline static GSVector8i x3fff(const GSVector8i& v) { return xffffffff(v).srl16( 2); } + __forceinline static GSVector8i x7fff(const GSVector8i& v) { return xffffffff(v).srl16( 1); } - __forceinline static GSVector8i x8000(const GSVector8i& v) {return xffffffff(v).sll16(15);} - __forceinline static GSVector8i xc000(const GSVector8i& v) {return xffffffff(v).sll16(14);} - __forceinline static GSVector8i xe000(const GSVector8i& v) {return xffffffff(v).sll16(13);} - __forceinline static GSVector8i xf000(const GSVector8i& v) {return xffffffff(v).sll16(12);} - __forceinline static GSVector8i xf800(const GSVector8i& v) {return xffffffff(v).sll16(11);} - __forceinline static GSVector8i xfc00(const GSVector8i& v) {return xffffffff(v).sll16(10);} - __forceinline static GSVector8i xfe00(const GSVector8i& v) {return xffffffff(v).sll16( 9);} - __forceinline static GSVector8i xff00(const GSVector8i& v) {return xffffffff(v).sll16( 8);} - __forceinline static GSVector8i xff80(const GSVector8i& v) {return xffffffff(v).sll16( 7);} - __forceinline static GSVector8i xffc0(const GSVector8i& v) {return xffffffff(v).sll16( 6);} - __forceinline static GSVector8i xffe0(const GSVector8i& v) {return xffffffff(v).sll16( 5);} - __forceinline static GSVector8i xfff0(const GSVector8i& v) {return xffffffff(v).sll16( 4);} - __forceinline static GSVector8i xfff8(const GSVector8i& v) {return xffffffff(v).sll16( 3);} - __forceinline static GSVector8i xfffc(const GSVector8i& v) {return xffffffff(v).sll16( 2);} - __forceinline static GSVector8i xfffe(const GSVector8i& v) {return xffffffff(v).sll16( 1);} + __forceinline static GSVector8i x8000(const GSVector8i& v) { return xffffffff(v).sll16(15); } + __forceinline static GSVector8i xc000(const GSVector8i& v) { return xffffffff(v).sll16(14); } + __forceinline static GSVector8i xe000(const GSVector8i& v) { return xffffffff(v).sll16(13); } + __forceinline static GSVector8i xf000(const GSVector8i& v) { return xffffffff(v).sll16(12); } + __forceinline static GSVector8i xf800(const GSVector8i& v) { return xffffffff(v).sll16(11); } + __forceinline static GSVector8i xfc00(const GSVector8i& v) { return xffffffff(v).sll16(10); } + __forceinline static GSVector8i xfe00(const GSVector8i& v) { return xffffffff(v).sll16( 9); } + __forceinline static GSVector8i xff00(const GSVector8i& v) { return xffffffff(v).sll16( 8); } + __forceinline static GSVector8i xff80(const GSVector8i& v) { return xffffffff(v).sll16( 7); } + __forceinline static GSVector8i xffc0(const GSVector8i& v) { return xffffffff(v).sll16( 6); } + __forceinline static GSVector8i xffe0(const GSVector8i& v) { return xffffffff(v).sll16( 5); } + __forceinline static GSVector8i xfff0(const GSVector8i& v) { return xffffffff(v).sll16( 4); } + __forceinline static GSVector8i xfff8(const GSVector8i& v) { return xffffffff(v).sll16( 3); } + __forceinline static GSVector8i xfffc(const GSVector8i& v) { return xffffffff(v).sll16( 2); } + __forceinline static GSVector8i xfffe(const GSVector8i& v) { return xffffffff(v).sll16( 1); } - __forceinline static GSVector8i xff(int n) {return m_xff[n];} - __forceinline static GSVector8i x0f(int n) {return m_x0f[n];} + __forceinline static GSVector8i xff(int n) { return m_xff[n]; } + __forceinline static GSVector8i x0f(int n) { return m_x0f[n]; } }; #endif diff --git a/plugins/GSdx/GSdx.cpp b/plugins/GSdx/GSdx.cpp index 51ce320dd4..d8f65b4b6b 100644 --- a/plugins/GSdx/GSdx.cpp +++ b/plugins/GSdx/GSdx.cpp @@ -30,14 +30,14 @@ static void* s_hModule; BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) { - switch(ul_reason_for_call) + switch (ul_reason_for_call) { - case DLL_PROCESS_ATTACH: - s_hModule = hModule; - case DLL_THREAD_ATTACH: - case DLL_THREAD_DETACH: - case DLL_PROCESS_DETACH: - break; + case DLL_PROCESS_ATTACH: + s_hModule = hModule; + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + case DLL_PROCESS_DETACH: + break; } return TRUE; @@ -47,11 +47,14 @@ bool GSdxApp::LoadResource(int id, std::vector& buff, const wchar_t* type) { buff.clear(); HRSRC hRsrc = FindResource((HMODULE)s_hModule, MAKEINTRESOURCE(id), type != NULL ? type : (LPWSTR)RT_RCDATA); - if(!hRsrc) return false; + if (!hRsrc) + return false; HGLOBAL hGlobal = ::LoadResource((HMODULE)s_hModule, hRsrc); - if(!hGlobal) return false; + if (!hGlobal) + return false; DWORD size = SizeofResource((HMODULE)s_hModule, hRsrc); - if(!size) return false; + if (!size) + return false; // On Linux resources are always NULL terminated // Add + 1 on size to do the same for compatibility sake (required by GSDeviceOGL) buff.resize(size + 1); @@ -66,7 +69,8 @@ bool GSdxApp::LoadResource(int id, std::vector& buff, const wchar_t* type) bool GSdxApp::LoadResource(int id, std::vector& buff, const char* type) { std::string path; - switch (id) { + switch (id) + { case IDR_COMMON_GLSL: path = "/GSdx/res/glsl/common_header.glsl"; break; @@ -99,12 +103,13 @@ bool GSdxApp::LoadResource(int id, std::vector& buff, const char* type) return false; } - GBytes *bytes = g_resource_lookup_data(GSdx_res_get_resource(), path.c_str(), G_RESOURCE_LOOKUP_FLAGS_NONE, nullptr); + GBytes* bytes = g_resource_lookup_data(GSdx_res_get_resource(), path.c_str(), G_RESOURCE_LOOKUP_FLAGS_NONE, nullptr); size_t size = 0; const void* data = g_bytes_get_data(bytes, &size); - if (data == nullptr || size == 0) { + if (data == nullptr || size == 0) + { printf("Failed to get data for resource: %d\n", id); return false; } @@ -125,14 +130,16 @@ size_t GSdxApp::GetIniString(const char* lpAppName, const char* lpKeyName, const std::string key(lpKeyName); std::string value = m_configuration_map[key]; - if (value.empty()) { + if (value.empty()) + { // save the value for futur call m_configuration_map[key] = std::string(lpDefault); strcpy(lpReturnedString, lpDefault); - } else + } + else strcpy(lpReturnedString, value.c_str()); - return 0; + return 0; } bool GSdxApp::WriteIniString(const char* lpAppName, const char* lpKeyName, const char* pString, const char* lpFileName) @@ -146,16 +153,19 @@ bool GSdxApp::WriteIniString(const char* lpAppName, const char* lpKeyName, const // Save config to a file FILE* f = px_fopen(lpFileName, "w"); - if (f == NULL) return false; // FIXME print a nice message + if (f == NULL) + return false; // FIXME print a nice message // Maintain compatibility with GSDumpGUI/old Windows ini. #ifdef _WIN32 fprintf(f, "[Settings]\n"); #endif - for (const auto& entry : m_configuration_map) { + for (const auto& entry : m_configuration_map) + { // Do not save the inifile key which is not an option - if (entry.first.compare("inifile") == 0) continue; + if (entry.first.compare("inifile") == 0) + continue; // Only keep option that have a default value (allow to purge old option of the GSdx.ini) if (!entry.second.empty() && m_default_configuration.find(entry.first) != m_default_configuration.end()) @@ -171,11 +181,13 @@ int GSdxApp::GetIniInt(const char* lpAppName, const char* lpKeyName, int nDefaul BuildConfigurationMap(lpFileName); std::string value = m_configuration_map[std::string(lpKeyName)]; - if (value.empty()) { + if (value.empty()) + { // save the value for futur call SetConfig(lpKeyName, nDefault); return nDefault; - } else + } + else return atoi(value.c_str()); } @@ -341,7 +353,7 @@ void GSdxApp::Init() m_default_configuration["force_texture_clear"] = "0"; m_default_configuration["fxaa"] = "0"; m_default_configuration["interlace"] = "7"; - m_default_configuration["conservative_framebuffer"] = "1"; + m_default_configuration["conservative_framebuffer"] = "1"; m_default_configuration["linear_present"] = "1"; m_default_configuration["MaxAnisotropy"] = "0"; m_default_configuration["mipmap"] = "1"; @@ -425,10 +437,12 @@ void GSdxApp::Init() void GSdxApp::ReloadConfig() { - if (m_configuration_map.empty()) return; + if (m_configuration_map.empty()) + return; auto file = m_configuration_map.find("inifile"); - if (file == m_configuration_map.end()) return; + if (file == m_configuration_map.end()) + return; // A map was built so reload it std::string filename = file->second; @@ -440,7 +454,8 @@ void GSdxApp::BuildConfigurationMap(const char* lpFileName) { // Check if the map was already built std::string inifile_value(lpFileName); - if ( inifile_value.compare(m_configuration_map["inifile"]) == 0 ) return; + if (inifile_value.compare(m_configuration_map["inifile"]) == 0) + return; m_configuration_map["inifile"] = inifile_value; // Load config from file @@ -453,7 +468,8 @@ void GSdxApp::BuildConfigurationMap(const char* lpFileName) return; std::string line; - while (std::getline(file, line)) { + while (std::getline(file, line)) + { const auto separator = line.find('='); if (separator == std::string::npos) continue; @@ -485,7 +501,7 @@ void* GSdxApp::GetModuleHandlePtr() void GSdxApp::SetConfigDir(const char* dir) { - if( dir == NULL ) + if (dir == NULL) { m_ini = "inis/GSdx.ini"; } @@ -493,7 +509,7 @@ void GSdxApp::SetConfigDir(const char* dir) { m_ini = dir; - if(m_ini[m_ini.length() - 1] != DIRECTORY_SEPARATOR) + if (m_ini[m_ini.length() - 1] != DIRECTORY_SEPARATOR) { m_ini += DIRECTORY_SEPARATOR; } @@ -507,9 +523,12 @@ std::string GSdxApp::GetConfigS(const char* entry) char buff[4096] = {0}; auto def = m_default_configuration.find(entry); - if (def != m_default_configuration.end()) { + if (def != m_default_configuration.end()) + { GetIniString(m_section.c_str(), entry, def->second.c_str(), buff, countof(buff), m_ini.c_str()); - } else { + } + else + { fprintf(stderr, "Option %s doesn't have a default value\n", entry); GetIniString(m_section.c_str(), entry, "", buff, countof(buff), m_ini.c_str()); } @@ -526,9 +545,12 @@ int GSdxApp::GetConfigI(const char* entry) { auto def = m_default_configuration.find(entry); - if (def != m_default_configuration.end()) { + if (def != m_default_configuration.end()) + { return GetIniInt(m_section.c_str(), entry, std::stoi(def->second), m_ini.c_str()); - } else { + } + else + { fprintf(stderr, "Option %s doesn't have a default value\n", entry); return GetIniInt(m_section.c_str(), entry, 0, m_ini.c_str()); } diff --git a/plugins/GSdx/GSdx.h b/plugins/GSdx/GSdx.h index 880acc44e5..17833be0b9 100644 --- a/plugins/GSdx/GSdx.h +++ b/plugins/GSdx/GSdx.h @@ -28,8 +28,8 @@ class GSdxApp { std::string m_ini; std::string m_section; - std::map< std::string, std::string > m_default_configuration; - std::map< std::string, std::string > m_configuration_map; + std::map m_default_configuration; + std::map m_configuration_map; GSRendererType m_current_renderer_type; public: @@ -39,7 +39,10 @@ public: void* GetModuleHandlePtr(); #ifdef _WIN32 - HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();} + HMODULE GetModuleHandle() + { + return (HMODULE)GetModuleHandlePtr(); + } #endif void BuildConfigurationMap(const char* lpFileName); @@ -58,10 +61,13 @@ public: void SetConfig(const char* entry, const char* value); void SetConfig(const char* entry, int value); // Avoid issue with overloading - template - T GetConfigT(const char* entry) { return static_cast(GetConfigI(entry)); } - int GetConfigI(const char* entry); - bool GetConfigB(const char* entry); + template + T GetConfigT(const char* entry) + { + return static_cast(GetConfigI(entry)); + } + int GetConfigI(const char* entry); + bool GetConfigB(const char* entry); std::string GetConfigS(const char* entry); void SetCurrentRendererType(GSRendererType type); @@ -87,8 +93,14 @@ public: std::vector m_gs_tv_shaders; }; -struct GSDXError {}; -struct GSDXRecoverableError : GSDXError {}; -struct GSDXErrorGlVertexArrayTooSmall : GSDXError {}; +struct GSDXError +{ +}; +struct GSDXRecoverableError : GSDXError +{ +}; +struct GSDXErrorGlVertexArrayTooSmall : GSDXError +{ +}; extern GSdxApp theApp; diff --git a/plugins/GSdx/Renderers/Common/GSDevice.cpp b/plugins/GSdx/Renderers/Common/GSDevice.cpp index da19f98279..d300553c41 100644 --- a/plugins/GSdx/Renderers/Common/GSDevice.cpp +++ b/plugins/GSdx/Renderers/Common/GSDevice.cpp @@ -42,7 +42,8 @@ GSDevice::GSDevice() GSDevice::~GSDevice() { - for(auto t : m_pool) delete t; + for (auto t : m_pool) + delete t; delete m_backbuffer; delete m_merge; @@ -60,7 +61,8 @@ bool GSDevice::Create(const std::shared_ptr& wnd) bool GSDevice::Reset(int w, int h) { - for(auto t : m_pool) delete t; + for (auto t : m_pool) + delete t; m_pool.clear(); @@ -88,9 +90,9 @@ void GSDevice::Present(const GSVector4i& r, int shader) int w = std::max(cr.width(), 1); int h = std::max(cr.height(), 1); - if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) + if (!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { - if(!Reset(w, h)) + if (!Reset(w, h)) { return; } @@ -101,7 +103,7 @@ void GSDevice::Present(const GSVector4i& r, int shader) // FIXME is it mandatory, it could be slow ClearRenderTarget(m_backbuffer, 0); - if(m_current) + if (m_current) { static int s_shader[5] = {ShaderConvert_COPY, ShaderConvert_SCANLINE, ShaderConvert_DIAGONAL_FILTER, ShaderConvert_TRIANGULAR_FILTER, @@ -123,11 +125,11 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, int format) { const GSVector2i size(w, h); - for(auto i = m_pool.begin(); i != m_pool.end(); ++i) + for (auto i = m_pool.begin(); i != m_pool.end(); ++i) { GSTexture* t = *i; - if(t->GetType() == type && t->GetFormat() == format && t->GetSize() == size) + if (t->GetType() == type && t->GetFormat() == format && t->GetSize() == size) { m_pool.erase(i); @@ -142,7 +144,7 @@ void GSDevice::PrintMemoryUsage() { #ifdef ENABLE_OGL_DEBUG uint32 pool = 0; - for(auto t : m_pool) + for (auto t : m_pool) { if (t) pool += t->GetMemUsage(); @@ -161,7 +163,7 @@ void GSDevice::EndScene() void GSDevice::Recycle(GSTexture* t) { - if(t) + if (t) { #ifdef _DEBUG // Uncommit saves memory but it means a futur allocation when we want to reuse the texture. @@ -175,7 +177,7 @@ void GSDevice::Recycle(GSTexture* t) //printf("%d\n",m_pool.size()); - while(m_pool.size() > 300) + while (m_pool.size() > 300) { delete m_pool.back(); @@ -188,7 +190,7 @@ void GSDevice::AgePool() { m_frame++; - while(m_pool.size() > 40 && m_frame - m_pool.back()->last_frame_used > 10) + while (m_pool.size() > 40 && m_frame - m_pool.back()->last_frame_used > 10) { delete m_pool.back(); @@ -199,7 +201,7 @@ void GSDevice::AgePool() void GSDevice::PurgePool() { // OOM emergency. Let's free this useless pool - while(!m_pool.empty()) + while (!m_pool.empty()) { delete m_pool.back(); @@ -253,13 +255,13 @@ void GSDevice::Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, con // (texture appears to be non-null, and is being re-created at a size around like 1700x340, // dunno if that's relevant) -- air - if(ResizeTarget(&m_merge, fs.x, fs.y)) + if (ResizeTarget(&m_merge, fs.x, fs.y)) { GSTexture* tex[3] = {NULL, NULL, NULL}; - for(size_t i = 0; i < countof(tex); i++) + for (size_t i = 0; i < countof(tex); i++) { - if(sTex[i] != NULL) + if (sTex[i] != NULL) { tex[i] = sTex[i]; } @@ -267,9 +269,9 @@ void GSDevice::Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, con DoMerge(tex, sRect, m_merge, dRect, PMODE, EXTBUF, c); - for(size_t i = 0; i < countof(tex); i++) + for (size_t i = 0; i < countof(tex); i++) { - if(tex[i] != sTex[i]) + if (tex[i] != sTex[i]) { Recycle(tex[i]); } @@ -287,13 +289,13 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse { ResizeTarget(&m_weavebob, ds.x, ds.y); - if(mode == 0 || mode == 2) // weave or blend + if (mode == 0 || mode == 2) // weave or blend { // weave first DoInterlace(m_merge, m_weavebob, field, false, 0); - if(mode == 2) + if (mode == 2) { // blend @@ -308,7 +310,7 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse m_current = m_weavebob; } } - else if(mode == 1) // bob + else if (mode == 1) // bob { DoInterlace(m_merge, m_weavebob, 3, true, yoffset * field); @@ -338,7 +340,7 @@ void GSDevice::FXAA() { GSVector2i s = m_current->GetSize(); - if(ResizeTarget(&m_target_tmp)) + if (ResizeTarget(&m_target_tmp)) { GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); @@ -352,7 +354,7 @@ void GSDevice::ShadeBoost() { GSVector2i s = m_current->GetSize(); - if(ResizeTarget(&m_target_tmp)) + if (ResizeTarget(&m_target_tmp)) { GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); @@ -364,11 +366,15 @@ void GSDevice::ShadeBoost() bool GSDevice::ResizeTexture(GSTexture** t, int type, int w, int h) { - if(t == NULL) {ASSERT(0); return false;} + if (t == NULL) + { + ASSERT(0); + return false; + } GSTexture* t2 = *t; - if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h) + if (t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h) { delete t2; @@ -403,7 +409,7 @@ GSAdapter::operator std::string() const return buf; } -bool GSAdapter::operator==(const GSAdapter &desc_dxgi) const +bool GSAdapter::operator==(const GSAdapter& desc_dxgi) const { return vendor == desc_dxgi.vendor && device == desc_dxgi.device @@ -412,7 +418,7 @@ bool GSAdapter::operator==(const GSAdapter &desc_dxgi) const } #ifdef _WIN32 -GSAdapter::GSAdapter(const DXGI_ADAPTER_DESC1 &desc_dxgi) +GSAdapter::GSAdapter(const DXGI_ADAPTER_DESC1& desc_dxgi) : vendor(desc_dxgi.VendorId) , device(desc_dxgi.DeviceId) , subsys(desc_dxgi.SubSysId) diff --git a/plugins/GSdx/Renderers/Common/GSDevice.h b/plugins/GSdx/Renderers/Common/GSDevice.h index 4c11911139..dc38a0cb08 100644 --- a/plugins/GSdx/Renderers/Common/GSDevice.h +++ b/plugins/GSdx/Renderers/Common/GSDevice.h @@ -71,7 +71,7 @@ class MergeConstantBuffer public: GSVector4 BGColor; - MergeConstantBuffer() {memset(this, 0, sizeof(*this));} + MergeConstantBuffer() { memset(this, 0, sizeof(*this)); } }; class InterlaceConstantBuffer @@ -81,7 +81,7 @@ public: float hH; float _pad[1]; - InterlaceConstantBuffer() {memset(this, 0, sizeof(*this));} + InterlaceConstantBuffer() { memset(this, 0, sizeof(*this)); } }; class ExternalFXConstantBuffer @@ -100,7 +100,7 @@ public: GSVector4 rcpFrame; GSVector4 rcpFrameOpt; - FXAAConstantBuffer() {memset(this, 0, sizeof(*this));} + FXAAConstantBuffer() { memset(this, 0, sizeof(*this)); } }; class ShadeBoostConstantBuffer @@ -109,7 +109,7 @@ public: GSVector4 rcpFrame; GSVector4 rcpFrameOpt; - ShadeBoostConstantBuffer() {memset(this, 0, sizeof(*this));} + ShadeBoostConstantBuffer() { memset(this, 0, sizeof(*this)); } }; #pragma pack(pop) @@ -124,7 +124,10 @@ enum HWBlendFlags }; // Determines the HW blend function for DX11/OGL -struct HWBlend { uint16 flags, op, src, dst; }; +struct HWBlend +{ + uint16 flags, op, src, dst; +}; class GSDevice : public GSAlignedClass<32> { @@ -145,8 +148,8 @@ protected: OP_ADD, OP_SUBTRACT, OP_REV_SUBTRACT }; - static const int m_NO_BLEND = 0; - static const int m_MERGE_BLEND = m_blendMap.size() - 1; + static const int m_NO_BLEND = 0; + static const int m_MERGE_BLEND = m_blendMap.size() - 1; std::shared_ptr m_wnd; int m_vsync; @@ -157,8 +160,14 @@ protected: GSTexture* m_blend; GSTexture* m_target_tmp; GSTexture* m_current; - struct {size_t stride, start, count, limit;} m_vertex; - struct {size_t start, count, limit;} m_index; + struct + { + size_t stride, start, count, limit; + } m_vertex; + struct + { + size_t start, count, limit; + } m_index; unsigned int m_frame; // for ageing the pool bool m_linear_present; @@ -180,16 +189,21 @@ public: void Recycle(GSTexture* t); - enum {Windowed, Fullscreen, DontCare}; + enum + { + Windowed, + Fullscreen, + DontCare + }; - virtual bool Create(const std::shared_ptr &wnd); + virtual bool Create(const std::shared_ptr& wnd); virtual bool Reset(int w, int h); - virtual bool IsLost(bool update = false) {return false;} + virtual bool IsLost(bool update = false) { return false; } virtual void Present(const GSVector4i& r, int shader); virtual void Present(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, int shader = 0); virtual void Flip() {} - virtual void SetVSync(int vsync) {m_vsync = vsync;} + virtual void SetVSync(int vsync) { m_vsync = vsync; } virtual void BeginScene() {} virtual void DrawPrimitive() {}; @@ -212,7 +226,7 @@ public: GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); - virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) {return NULL;} + virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) { return NULL; } virtual void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) {} virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true) {} @@ -238,7 +252,7 @@ public: bool ResizeTarget(GSTexture** t, int w, int h); bool ResizeTarget(GSTexture** t); - bool IsRBSwapped() {return m_rbswapped;} + bool IsRBSwapped() { return m_rbswapped; } void AgePool(); void PurgePool(); @@ -260,17 +274,17 @@ struct GSAdapter operator std::string() const; bool operator==(const GSAdapter&) const; - bool operator==(const std::string &s) const + bool operator==(const std::string& s) const { return (std::string)*this == s; } - bool operator==(const char *s) const + bool operator==(const char* s) const { return (std::string)*this == s; } #ifdef _WIN32 - GSAdapter(const DXGI_ADAPTER_DESC1 &desc_dxgi); + GSAdapter(const DXGI_ADAPTER_DESC1& desc_dxgi); #endif #ifdef __linux__ // TODO diff --git a/plugins/GSdx/Renderers/Common/GSDirtyRect.cpp b/plugins/GSdx/Renderers/Common/GSDirtyRect.cpp index 003353538d..ae54f17aa0 100644 --- a/plugins/GSdx/Renderers/Common/GSDirtyRect.cpp +++ b/plugins/GSdx/Renderers/Common/GSDirtyRect.cpp @@ -43,7 +43,7 @@ const GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0) const const GSVector2i src = GSLocalMemory::m_psm[psm].bs; - if(psm != TEX0.PSM) + if (psm != TEX0.PSM) { const GSVector2i dst = GSLocalMemory::m_psm[TEX0.PSM].bs; @@ -64,11 +64,11 @@ const GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0) const const GSVector4i GSDirtyRectList::GetDirtyRectAndClear(const GIFRegTEX0& TEX0, const GSVector2i& size) { - if(!empty()) + if (!empty()) { GSVector4i r(INT_MAX, INT_MAX, 0, 0); - for(const auto& dirty_rect : *this) + for (const auto& dirty_rect : *this) { r = r.runion(dirty_rect.GetDirtyRect(TEX0)); } diff --git a/plugins/GSdx/Renderers/Common/GSFastList.h b/plugins/GSdx/Renderers/Common/GSFastList.h index fa2519d7f3..c8cc3edd70 100644 --- a/plugins/GSdx/Renderers/Common/GSFastList.h +++ b/plugins/GSdx/Renderers/Common/GSFastList.h @@ -23,8 +23,9 @@ #pragma once template -struct Element { - T data; +struct Element +{ + T data; uint16 next_index; uint16 prev_index; }; @@ -33,8 +34,10 @@ template class FastListIterator; template -class FastList { +class FastList +{ friend class FastListIterator; + private: // The index of the first element of the list is m_buffer[0].next_index // The first Element of the list has prev_index equal to 0 @@ -45,7 +48,7 @@ private: // Due to m_buffer reallocation, the pointers to Element stored into the array // are invalidated every time Grow() is executed. But FastListIterator is // index based, not pointer based, and the elements are copied in order on Grow(), - // so there is no iterator invalidation (which is an index invalidation) until + // so there is no iterator invalidation (which is an index invalidation) until // the relevant iterator (or the index alone) are erased from the list. // m_buffer[0] is always present as auxiliary Element of the list Element* m_buffer; @@ -56,16 +59,19 @@ private: uint16* m_free_indexes_stack; public: - __forceinline FastList() { + __forceinline FastList() + { m_buffer = nullptr; clear(); } - __forceinline ~FastList() { + __forceinline ~FastList() + { _aligned_free(m_buffer); } - void clear() { + void clear() + { // Initialize m_capacity to 4 so we avoid to Grow() on initial insertions // The code doesn't break if this value is changed with anything from 1 to USHRT_MAX m_capacity = 4; @@ -77,20 +83,23 @@ public: m_free_indexes_stack = (uint16*)&m_buffer[m_capacity]; // Initialize m_buffer[0], data field is unused but initialized using default T constructor - m_buffer[0] = { T(), 0, 0 }; + m_buffer[0] = {T(), 0, 0}; // m_free_indexes_stack top index is 0, bottom index is m_capacity - 2 m_free_indexes_stack_top = 0; // m_buffer index 0 is reserved for auxiliary element - for (uint16 i = 0; i < m_capacity - 1; i++) { + for (uint16 i = 0; i < m_capacity - 1; i++) + { m_free_indexes_stack[i] = i + 1; } } // Insert the element in front of the list and return its position in m_buffer - __forceinline uint16 InsertFront(const T& data) { - if (Full()) { + __forceinline uint16 InsertFront(const T& data) + { + if (Full()) + { Grow(); } @@ -101,81 +110,99 @@ public: return free_index; } - __forceinline void push_front(const T& data) { + __forceinline void push_front(const T& data) + { InsertFront(data); } - __forceinline const T& back() const { + __forceinline const T& back() const + { return m_buffer[LastIndex()].data; } - __forceinline void pop_back() { + __forceinline void pop_back() + { EraseIndex(LastIndex()); } - __forceinline uint16 size() const { + __forceinline uint16 size() const + { return m_free_indexes_stack_top; } - __forceinline bool empty() const { + __forceinline bool empty() const + { return size() == 0; - } + } - __forceinline void EraseIndex(const uint16 index) { + __forceinline void EraseIndex(const uint16 index) + { ListRemove(index); m_free_indexes_stack[--m_free_indexes_stack_top] = index; } - __forceinline void MoveFront(const uint16 index) { - if (FirstIndex() != index) { + __forceinline void MoveFront(const uint16 index) + { + if (FirstIndex() != index) + { ListRemove(index); ListInsertFront(index); } } - __forceinline const FastListIterator begin() const { + __forceinline const FastListIterator begin() const + { return FastListIterator(this, FirstIndex()); } - __forceinline const FastListIterator end() const { + __forceinline const FastListIterator end() const + { return FastListIterator(this, 0); } - __forceinline FastListIterator erase(FastListIterator i) { + __forceinline FastListIterator erase(FastListIterator i) + { EraseIndex(i.Index()); return ++i; } private: // Accessed by FastListIterator using class friendship - __forceinline const T& Data(const uint16 index) const { + __forceinline const T& Data(const uint16 index) const + { return m_buffer[index].data; } // Accessed by FastListIterator using class friendship - __forceinline uint16 NextIndex(const uint16 index) const { + __forceinline uint16 NextIndex(const uint16 index) const + { return m_buffer[index].next_index; } // Accessed by FastListIterator using class friendship - __forceinline uint16 PrevIndex(const uint16 index) const { + __forceinline uint16 PrevIndex(const uint16 index) const + { return m_buffer[index].prev_index; } - __forceinline uint16 FirstIndex() const { + __forceinline uint16 FirstIndex() const + { return m_buffer[0].next_index; } - __forceinline uint16 LastIndex() const { + __forceinline uint16 LastIndex() const + { return m_buffer[0].prev_index; } - __forceinline bool Full() const { + __forceinline bool Full() const + { // The minus one is due to the presence of the auxiliary element return size() == m_capacity - 1; } - __forceinline void ListInsertFront(const uint16 index) { + __forceinline void ListInsertFront(const uint16 index) + { // Update prev / next indexes to add m_buffer[index] to the chain Element& head = m_buffer[0]; m_buffer[index].prev_index = 0; @@ -184,15 +211,18 @@ private: head.next_index = index; } - __forceinline void ListRemove(const uint16 index) { + __forceinline void ListRemove(const uint16 index) + { // Update prev / next indexes to remove m_buffer[index] from the chain const Element& to_remove = m_buffer[index]; m_buffer[to_remove.prev_index].next_index = to_remove.next_index; m_buffer[to_remove.next_index].prev_index = to_remove.prev_index; } - void Grow() { - if (m_capacity == USHRT_MAX) { + void Grow() + { + if (m_capacity == USHRT_MAX) + { throw std::runtime_error("FastList size maxed out at USHRT_MAX (65535) elements, cannot grow futhermore."); } @@ -203,14 +233,15 @@ private: memcpy(new_buffer, m_buffer, m_capacity * sizeof(Element)); memcpy(new_free_indexes_stack, m_free_indexes_stack, (m_capacity - 1) * sizeof(uint16)); - + _aligned_free(m_buffer); - + m_buffer = new_buffer; m_free_indexes_stack = new_free_indexes_stack; // Initialize the additional space in the stack - for (uint16 i = m_capacity - 1; i < new_capacity - 1; i++) { + for (uint16 i = m_capacity - 1; i < new_capacity - 1; i++) + { m_free_indexes_stack[i] = i + 1; } @@ -228,50 +259,59 @@ private: uint16 m_index; public: - __forceinline FastListIterator(const FastList* fastlist, const uint16 index) { + __forceinline FastListIterator(const FastList* fastlist, const uint16 index) + { m_fastlist = fastlist; m_index = index; } - __forceinline bool operator!=(const FastListIterator& other) const { + __forceinline bool operator!=(const FastListIterator& other) const + { return (m_index != other.m_index); } - __forceinline bool operator==(const FastListIterator& other) const { + __forceinline bool operator==(const FastListIterator& other) const + { return (m_index == other.m_index); } // Prefix increment - __forceinline const FastListIterator& operator++() { + __forceinline const FastListIterator& operator++() + { m_index = m_fastlist->NextIndex(m_index); return *this; } // Postfix increment - __forceinline const FastListIterator operator++(int) { + __forceinline const FastListIterator operator++(int) + { FastListIterator copy(*this); ++(*this); return copy; } // Prefix decrement - __forceinline const FastListIterator& operator--() { + __forceinline const FastListIterator& operator--() + { m_index = m_fastlist->PrevIndex(m_index); return *this; } // Postfix decrement - __forceinline const FastListIterator operator--(int) { + __forceinline const FastListIterator operator--(int) + { FastListIterator copy(*this); --(*this); return copy; } - __forceinline const T& operator*() const { + __forceinline const T& operator*() const + { return m_fastlist->Data(m_index); } - __forceinline uint16 Index() const { + __forceinline uint16 Index() const + { return m_index; } }; diff --git a/plugins/GSdx/Renderers/Common/GSFunctionMap.h b/plugins/GSdx/Renderers/Common/GSFunctionMap.h index d944184df0..2411cdc763 100644 --- a/plugins/GSdx/Renderers/Common/GSFunctionMap.h +++ b/plugins/GSdx/Renderers/Common/GSFunctionMap.h @@ -28,7 +28,8 @@ #include "Renderers/SW/GSScanlineEnvironment.h" -template class GSFunctionMap +template +class GSFunctionMap { protected: struct ActivePtr @@ -53,16 +54,17 @@ public: virtual ~GSFunctionMap() { - for(auto &i : m_map_active) delete i.second; + for (auto& i : m_map_active) + delete i.second; } - VALUE operator [] (KEY key) + VALUE operator[](KEY key) { m_active = NULL; auto it = m_map_active.find(key); - if(it != m_map_active.end()) + if (it != m_map_active.end()) { m_active = it->second; } @@ -88,9 +90,9 @@ public: void UpdateStats(uint64 frame, uint64 ticks, int actual, int total) { - if(m_active) + if (m_active) { - if(m_active->frame != frame) + if (m_active->frame != frame) { m_active->frame = frame; m_active->frames++; @@ -108,11 +110,11 @@ public: { uint64 ttpf = 0; - for(const auto &i : m_map_active) + for (const auto& i : m_map_active) { ActivePtr* p = i.second; - if(p->frames) + if (p->frames) { ttpf += p->ticks / p->frames; } @@ -120,12 +122,12 @@ public: printf("GS stats\n"); - for (const auto &i : m_map_active) + for (const auto& i : m_map_active) { KEY key = i.first; ActivePtr* p = i.second; - if(p->frames && ttpf) + if (p->frames && ttpf) { uint64 tpp = p->actual > 0 ? p->ticks / p->actual : 0; uint64 tpf = p->frames > 0 ? p->ticks / p->frames : 0; @@ -154,7 +156,7 @@ public: } }; -template +template class GSCodeGeneratorFunctionMap : public GSFunctionMap { std::string m_name; @@ -163,7 +165,7 @@ class GSCodeGeneratorFunctionMap : public GSFunctionMap GSCodeBuffer m_cb; size_t m_total_code_size; - enum {MAX_SIZE = 8192}; + enum { MAX_SIZE = 8192 }; public: GSCodeGeneratorFunctionMap(const char* name, void* param) @@ -186,7 +188,7 @@ public: auto i = m_cgmap.find(key); - if(i != m_cgmap.end()) + if (i != m_cgmap.end()) { ret = i->second; } @@ -211,7 +213,7 @@ public: m_cgmap[key] = ret; - #ifdef ENABLE_VTUNE +#ifdef ENABLE_VTUNE // vtune method registration @@ -249,7 +251,7 @@ public: */ } - #endif +#endif delete cg; } diff --git a/plugins/GSdx/Renderers/Common/GSOsdManager.cpp b/plugins/GSdx/Renderers/Common/GSOsdManager.cpp index 73609ed0fb..a8f5b675cd 100644 --- a/plugins/GSdx/Renderers/Common/GSOsdManager.cpp +++ b/plugins/GSdx/Renderers/Common/GSOsdManager.cpp @@ -23,20 +23,23 @@ #include "GSdx.h" #include "GSOsdManager.h" #ifdef _WIN32 - #include "resource.h" +#include "resource.h" #endif -void GSOsdManager::LoadFont() { +void GSOsdManager::LoadFont() +{ FT_Error error = FT_New_Face(m_library, theApp.GetConfigS("osd_fontname").c_str(), 0, &m_face); - if (error) { + if (error) + { FT_Error error_load_res = 1; - if(theApp.LoadResource(IDR_FONT_ROBOTO, resource_data_buffer)) + if (theApp.LoadResource(IDR_FONT_ROBOTO, resource_data_buffer)) error_load_res = FT_New_Memory_Face(m_library, (const FT_Byte*)resource_data_buffer.data(), resource_data_buffer.size(), 0, &m_face); - - if (error_load_res) { + + if (error_load_res) + { m_face = NULL; fprintf(stderr, "Failed to init freetype face from external and internal resource\n"); - if(error == FT_Err_Unknown_File_Format) + if (error == FT_Err_Unknown_File_Format) fprintf(stderr, "\tFreetype unknown file format for external file\n"); return; } @@ -45,11 +48,14 @@ void GSOsdManager::LoadFont() { LoadSize(); } -void GSOsdManager::LoadSize() { - if (!m_face) return; +void GSOsdManager::LoadSize() +{ + if (!m_face) + return; - FT_Error error = FT_Set_Pixel_Sizes(m_face, 0, m_size);; - if (error) { + FT_Error error = FT_Set_Pixel_Sizes(m_face, 0, m_size); + if (error) + { fprintf(stderr, "Failed to init the face size\n"); return; } @@ -60,11 +66,12 @@ void GSOsdManager::LoadSize() { m_atlas_h = m_size + 10; // another random guess } -GSOsdManager::GSOsdManager() : m_atlas_h(0) - , m_atlas_w(0) - , m_max_width(0) - , m_onscreen_messages(0) - , m_texture_dirty(true) +GSOsdManager::GSOsdManager() + : m_atlas_h(0) + , m_atlas_w(0) + , m_max_width(0) + , m_onscreen_messages(0) + , m_texture_dirty(true) { m_monitor_enabled = theApp.GetConfigB("osd_monitor_enabled"); m_log_enabled = theApp.GetConfigB("osd_log_enabled"); @@ -79,7 +86,8 @@ GSOsdManager::GSOsdManager() : m_atlas_h(0) m_color = r | (g << 8) | (b << 16) | (255 << 24); - if (FT_Init_FreeType(&m_library)) { + if (FT_Init_FreeType(&m_library)) + { m_face = NULL; fprintf(stderr, "Failed to init the freetype library\n"); return; @@ -91,24 +99,30 @@ GSOsdManager::GSOsdManager() : m_atlas_h(0) AddGlyph(' '); } -GSOsdManager::~GSOsdManager() { +GSOsdManager::~GSOsdManager() +{ FT_Done_FreeType(m_library); } -GSVector2i GSOsdManager::get_texture_font_size() { +GSVector2i GSOsdManager::get_texture_font_size() +{ return GSVector2i(m_atlas_w, m_atlas_h); } -void GSOsdManager::upload_texture_atlas(GSTexture* t) { - if (!m_face) return; +void GSOsdManager::upload_texture_atlas(GSTexture* t) +{ + if (!m_face) + return; if (m_char_info.size() > 96) // we only reserved space for this many glyphs fprintf(stderr, "More than 96 glyphs needed for OSD"); // This can be sped up a bit by only uploading new glyphs int x = 0; - for(auto &pair : m_char_info) { - if(FT_Load_Char(m_face, pair.first, FT_LOAD_RENDER)) { + for (auto& pair : m_char_info) + { + if (FT_Load_Char(m_face, pair.first, FT_LOAD_RENDER)) + { fprintf(stderr, "failed to load char U%d\n", (int)pair.first); continue; } @@ -123,11 +137,12 @@ void GSOsdManager::upload_texture_atlas(GSTexture* t) { pair.second.bl = m_face->glyph->bitmap_left; pair.second.bt = m_face->glyph->bitmap_top; - GSVector4i r(x, 0, x+pair.second.bw, pair.second.bh); + GSVector4i r(x, 0, x + pair.second.bw, pair.second.bh); if (r.width()) t->Update(r, m_face->glyph->bitmap.buffer, m_face->glyph->bitmap.pitch); - if (r.width() > m_max_width) m_max_width = r.width(); + if (r.width() > m_max_width) + m_max_width = r.width(); pair.second.tx = (float)x / m_atlas_w; pair.second.ty = (float)pair.second.bh / m_atlas_h; @@ -139,38 +154,53 @@ void GSOsdManager::upload_texture_atlas(GSTexture* t) { m_texture_dirty = false; } -#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 ) +#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) /* This is dumb in that it doesn't check for malformed UTF8. This function * is not expected to operate on user input, but only on compiled in strings */ -void dumb_utf8_to_utf32(const char *utf8, char32_t *utf32, unsigned size) { - while(*utf8 && --size) { - if((*utf8 & 0xF1) == 0xF0) { +void dumb_utf8_to_utf32(const char* utf8, char32_t* utf32, unsigned size) +{ + while (*utf8 && --size) + { + if ((*utf8 & 0xF1) == 0xF0) + { *utf32++ = (utf8[0] & 0x07) << 18 | (utf8[1] & 0x3F) << 12 | (utf8[2] & 0x3F) << 6 | utf8[3] & 0x3F; utf8 += 4; - } else if((*utf8 & 0xF0) == 0xE0) { + } + else if ((*utf8 & 0xF0) == 0xE0) + { *utf32++ = (utf8[0] & 0x0F) << 12 | (utf8[1] & 0x3F) << 6 | utf8[2] & 0x3F; utf8 += 3; - } else if((*utf8 & 0xE0) == 0xC0) { + } + else if ((*utf8 & 0xE0) == 0xC0) + { *utf32++ = (utf8[0] & 0x1F) << 6 | utf8[1] & 0x3F; utf8 += 2; - } else if((*utf8 & 0x80) == 0x00) { + } + else if ((*utf8 & 0x80) == 0x00) + { *utf32++ = utf8[0] & 0x7F; utf8 += 1; } } - if(size) *utf32 = *utf8; // Copy NUL char + if (size) + *utf32 = *utf8; // Copy NUL char } #endif -void GSOsdManager::AddGlyph(char32_t codepoint) { - if (!m_face) return; - if(m_char_info.count(codepoint) == 0) { +void GSOsdManager::AddGlyph(char32_t codepoint) +{ + if (!m_face) + return; + if (m_char_info.count(codepoint) == 0) + { m_texture_dirty = true; m_char_info[codepoint]; // add it - if(FT_HAS_KERNING(m_face)) { + if (FT_HAS_KERNING(m_face)) + { FT_UInt new_glyph = FT_Get_Char_Index(m_face, codepoint); - for(auto pair : m_char_info) { + for (auto pair : m_char_info) + { FT_Vector delta; FT_UInt glyph_index = FT_Get_Char_Index(m_face, pair.first); @@ -181,14 +211,16 @@ void GSOsdManager::AddGlyph(char32_t codepoint) { } } -void GSOsdManager::Log(const char *utf8) { - if(!m_log_enabled) +void GSOsdManager::Log(const char* utf8) +{ + if (!m_log_enabled) return; -#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 ) +#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) char32_t buffer[256]; dumb_utf8_to_utf32(utf8, buffer, countof(buffer)); - for(char32_t* c = buffer; *c; ++c) AddGlyph(*c); + for (char32_t* c = buffer; *c; ++c) + AddGlyph(*c); #else #if _MSC_VER == 1900 std::wstring_convert, unsigned int> conv; @@ -196,24 +228,28 @@ void GSOsdManager::Log(const char *utf8) { std::wstring_convert, char32_t> conv; #endif std::u32string buffer = conv.from_bytes(utf8); - for(auto const &c : buffer) AddGlyph(c); + for (auto const& c : buffer) + AddGlyph(c); #endif m_onscreen_messages++; m_log.push_back(log_info{buffer, std::chrono::system_clock::time_point()}); - } -void GSOsdManager::Monitor(const char *key, const char *value) { - if(!m_monitor_enabled) +void GSOsdManager::Monitor(const char* key, const char* value) +{ + if (!m_monitor_enabled) return; - if(value && *value) { -#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 ) + if (value && *value) + { +#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) char32_t buffer[256], vbuffer[256]; dumb_utf8_to_utf32(key, buffer, countof(buffer)); dumb_utf8_to_utf32(value, vbuffer, countof(vbuffer)); - for(char32_t* c = buffer; *c; ++c) AddGlyph(*c); - for(char32_t* c = vbuffer; *c; ++c) AddGlyph(*c); + for (char32_t* c = buffer; *c; ++c) + AddGlyph(*c); + for (char32_t* c = vbuffer; *c; ++c) + AddGlyph(*c); #else #if _MSC_VER == 1900 std::wstring_convert, unsigned int> conv; @@ -222,12 +258,16 @@ void GSOsdManager::Monitor(const char *key, const char *value) { #endif std::u32string buffer = conv.from_bytes(key); std::u32string vbuffer = conv.from_bytes(value); - for(auto const &c : buffer) AddGlyph(c); - for(auto const &c : vbuffer) AddGlyph(c); + for (auto const& c : buffer) + AddGlyph(c); + for (auto const& c : vbuffer) + AddGlyph(c); #endif m_monitor[buffer] = vbuffer; - } else { -#if __GNUC__ < 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ < 4 ) + } + else + { +#if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 4) char32_t buffer[256]; dumb_utf8_to_utf32(key, buffer, countof(buffer)); #else @@ -242,11 +282,12 @@ void GSOsdManager::Monitor(const char *key, const char *value) { } } -void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color) { - float x2 = x + g.bl * (2.0f/m_real_size.x); - float y2 = -y - g.bt * (2.0f/m_real_size.y); - float w = g.bw * (2.0f/m_real_size.x); - float h = g.bh * (2.0f/m_real_size.y); +void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color) +{ + float x2 = x + g.bl * (2.0f / m_real_size.x); + float y2 = -y - g.bt * (2.0f / m_real_size.y); + float w = g.bw * (2.0f / m_real_size.x); + float h = g.bh * (2.0f / m_real_size.y); dst->p = GSVector4(x2 , -y2 , 0.0f, 1.0f); dst->t = GSVector2(g.tx , 0.0f); @@ -274,18 +315,21 @@ void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, fl ++dst; } -void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color) { +void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color) +{ char32_t p = 0; - for(const auto & c : msg) { - if(p) { - x += m_kern_info[std::make_pair(p, c)] * (2.0f/m_real_size.x); + for (const auto& c : msg) + { + if (p) + { + x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x); } RenderGlyph(dst, m_char_info[c], x, y, color); /* Advance the cursor to the start of the next character */ - x += m_char_info[c].ax * (2.0f/m_real_size.x); - y += m_char_info[c].ay * (2.0f/m_real_size.y); + x += m_char_info[c].ax * (2.0f / m_real_size.x); + y += m_char_info[c].ay * (2.0f / m_real_size.y); dst += 6; @@ -293,36 +337,47 @@ void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, floa } } -size_t GSOsdManager::Size() { +size_t GSOsdManager::Size() +{ size_t sum = 0; - if(m_log_enabled) { + if (m_log_enabled) + { float offset = 0; - for(auto it = m_log.begin(); it != m_log.end(); ++it) { - float y = 1 - ((m_size+2)*(it-m_log.begin()+1)) * (2.0f/m_real_size.y); - if(y + offset < -1) break; + for (auto it = m_log.begin(); it != m_log.end(); ++it) + { + float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y); + if (y + offset < -1) + break; std::chrono::duration elapsed; - if(it->OnScreen.time_since_epoch().count() == 0) { + if (it->OnScreen.time_since_epoch().count() == 0) + { elapsed = std::chrono::seconds(0); - } else { + } + else + { elapsed = std::chrono::system_clock::now() - it->OnScreen; - if(elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) { + if (elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) + { continue; } } - float ratio = (elapsed - std::chrono::seconds(m_log_timeout/2)).count() / std::chrono::seconds(m_log_timeout/2).count(); - ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio; + float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count(); + ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : + ratio; - y += offset += ((m_size+2) * (2.0f/m_real_size.y)) * ratio; + y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio; sum += it->msg.size(); } } - if(m_monitor_enabled) { - for(const auto &pair : m_monitor) { + if (m_monitor_enabled) + { + for (const auto& pair : m_monitor) + { sum += pair.first.size(); sum += pair.second.size(); } @@ -331,17 +386,20 @@ size_t GSOsdManager::Size() { return sum * 6; } -float GSOsdManager::StringSize(const std::u32string msg) { +float GSOsdManager::StringSize(const std::u32string msg) +{ char32_t p = 0; float x = 0.0; - for(auto c : msg) { - if(p) { - x += m_kern_info[std::make_pair(p, c)] * (2.0f/m_real_size.x); + for (auto c : msg) + { + if (p) + { + x += m_kern_info[std::make_pair(p, c)] * (2.0f / m_real_size.x); } /* Advance the cursor to the start of the next character */ - x += m_char_info[c].ax * (2.0f/m_real_size.x); + x += m_char_info[c].ax * (2.0f / m_real_size.x); p = c; } @@ -349,37 +407,43 @@ float GSOsdManager::StringSize(const std::u32string msg) { return x; } -size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) { +size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) +{ size_t drawn = 0; float opacity = m_opacity * 0.01f; - if(m_log_enabled) { + if (m_log_enabled) + { float offset = 0; - for(auto it = m_log.begin(); it != m_log.end();) { - float x = -1 + 8 * (2.0f/m_real_size.x); - float y = 1 - ((m_size+2)*(it-m_log.begin()+1)) * (2.0f/m_real_size.y); + for (auto it = m_log.begin(); it != m_log.end();) + { + float x = -1 + 8 * (2.0f / m_real_size.x); + float y = 1 - ((m_size + 2) * (it - m_log.begin() + 1)) * (2.0f / m_real_size.y); - if(y + offset < -1) break; + if (y + offset < -1) + break; - if(it->OnScreen.time_since_epoch().count() == 0) + if (it->OnScreen.time_since_epoch().count() == 0) it->OnScreen = std::chrono::system_clock::now(); std::chrono::duration elapsed = std::chrono::system_clock::now() - it->OnScreen; - if(elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) { + if (elapsed > std::chrono::seconds(m_log_timeout) || m_onscreen_messages > m_max_onscreen_messages) + { m_onscreen_messages--; it = m_log.erase(it); continue; } - if(it->msg.size() * 6 > count - drawn) break; + if (it->msg.size() * 6 > count - drawn) + break; - float ratio = (elapsed - std::chrono::seconds(m_log_timeout/2)).count() / std::chrono::seconds(m_log_timeout/2).count(); + float ratio = (elapsed - std::chrono::seconds(m_log_timeout / 2)).count() / std::chrono::seconds(m_log_timeout / 2).count(); ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio; - y += offset += ((m_size+2) * (2.0f/m_real_size.y)) * ratio; + y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio; uint32 color = m_color; - ((uint8 *)&color)[3] = (uint8)(((uint8 *)&color)[3] * (1.0f - ratio) * opacity); + ((uint8*)&color)[3] = (uint8)(((uint8*)&color)[3] * (1.0f - ratio) * opacity); RenderString(dst, it->msg, x, y, color); dst += it->msg.size() * 6; drawn += it->msg.size() * 6; @@ -387,13 +451,15 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) { } } - if(m_monitor_enabled) { + if (m_monitor_enabled) + { // pair.first is the key and second is the value and color // Since the monitor is right justified, but we render from left to right // we need to find the longest string float first_max = 0.0, second_max = 0.0; - for(const auto &pair : m_monitor) { + for (const auto& pair : m_monitor) + { float first_len = StringSize(pair.first); float second_len = StringSize(pair.second); @@ -402,18 +468,20 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) { } size_t line = 1; - for(const auto &pair : m_monitor) { - if((pair.first.size() + pair.second.size()) * 6 > count - drawn) break; + for (const auto& pair : m_monitor) + { + if ((pair.first.size() + pair.second.size()) * 6 > count - drawn) + break; // Calculate where to start rendering from by taking the right most position 1.0 // and subtracting (going left) 8 scaled pixels for a margin, then subtracting // the size of the longest key and subtracting a scaled space and finally // subtracting the longest value - float x = 1.0f - 8 * (2.0f/m_real_size.x) - first_max - m_char_info[' '].ax * (2.0f/m_real_size.x) - second_max; - float y = -1.0f + ((m_size+2)*(2.0f/m_real_size.y)) * line++; + float x = 1.0f - 8 * (2.0f / m_real_size.x) - first_max - m_char_info[' '].ax * (2.0f / m_real_size.x) - second_max; + float y = -1.0f + ((m_size + 2) * (2.0f / m_real_size.y)) * line++; uint32 color = m_color; - ((uint8 *)&color)[3] = (uint8)(((uint8 *)&color)[3] * opacity); + ((uint8*)&color)[3] = (uint8)(((uint8*)&color)[3] * opacity); // Render the key RenderString(dst, pair.first, x, y, color); @@ -421,7 +489,7 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) { drawn += pair.first.size() * 6; // Calculate the position for the value - x = 1.0f - 8 * (2.0f/m_real_size.x) - second_max; + x = 1.0f - 8 * (2.0f / m_real_size.x) - second_max; // Render the value RenderString(dst, pair.second, x, y, color); @@ -432,4 +500,3 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count) { return drawn; } - diff --git a/plugins/GSdx/Renderers/Common/GSOsdManager.h b/plugins/GSdx/Renderers/Common/GSOsdManager.h index c505222c03..c2b4001837 100644 --- a/plugins/GSdx/Renderers/Common/GSOsdManager.h +++ b/plugins/GSdx/Renderers/Common/GSOsdManager.h @@ -28,8 +28,10 @@ #include #include FT_FREETYPE_H -class GSOsdManager { - struct glyph_info { +class GSOsdManager +{ + struct glyph_info + { int32 ax; // advance.x int32 ay; // advance.y @@ -48,15 +50,16 @@ class GSOsdManager { std::map, FT_Pos> m_kern_info; FT_Library m_library; - FT_Face m_face; - FT_UInt m_size; + FT_Face m_face; + FT_UInt m_size; uint32 m_atlas_h; uint32 m_atlas_w; int32 m_max_width; int32 m_onscreen_messages; - struct log_info { + struct log_info + { std::u32string msg; std::chrono::system_clock::time_point OnScreen; }; @@ -76,8 +79,7 @@ class GSOsdManager { uint32 m_color; int m_max_onscreen_messages; - public: - +public: GSOsdManager(); ~GSOsdManager(); @@ -89,14 +91,13 @@ class GSOsdManager { bool m_texture_dirty; void upload_texture_atlas(GSTexture* t); - void Log(const char *utf8); - void Monitor(const char *key, const char *value); + void Log(const char* utf8); + void Monitor(const char* key, const char* value); GSVector2i m_real_size; size_t Size(); size_t GeneratePrimitives(GSVertexPT1* dst, size_t count); - - private: - + +private: std::vector resource_data_buffer; }; diff --git a/plugins/GSdx/Renderers/Common/GSRenderer.cpp b/plugins/GSdx/Renderers/Common/GSRenderer.cpp index acb65d5f33..4226422792 100644 --- a/plugins/GSdx/Renderers/Common/GSRenderer.cpp +++ b/plugins/GSdx/Renderers/Common/GSRenderer.cpp @@ -35,7 +35,7 @@ GSRenderer::GSRenderer() , m_shift_key(false) , m_control_key(false) , m_texture_shuffle(false) - , m_real_size(0,0) + , m_real_size(0, 0) , m_wnd() , m_dev(NULL) { @@ -67,7 +67,7 @@ bool GSRenderer::CreateDevice(GSDevice* dev) ASSERT(dev); ASSERT(!m_dev); - if(!dev->Create(m_wnd)) + if (!dev->Create(m_wnd)) { return false; } @@ -80,7 +80,8 @@ bool GSRenderer::CreateDevice(GSDevice* dev) void GSRenderer::ResetDevice() { - if(m_dev) m_dev->Reset(1, 1); + if (m_dev) + m_dev->Reset(1, 1); } bool GSRenderer::Merge(int field) @@ -90,14 +91,14 @@ bool GSRenderer::Merge(int field) GSVector4i fr[2]; GSVector4i dr[2]; - GSVector2i display_baseline = { INT_MAX, INT_MAX }; - GSVector2i frame_baseline = { INT_MAX, INT_MAX }; + GSVector2i display_baseline = {INT_MAX, INT_MAX}; + GSVector2i frame_baseline = {INT_MAX, INT_MAX}; - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { en[i] = IsEnabled(i); - if(en[i]) + if (en[i]) { fr[i] = GetFrameRect(i); dr[i] = GetDisplayRect(i); @@ -111,7 +112,7 @@ bool GSRenderer::Merge(int field) } } - if(!en[0] && !en[1]) + if (!en[0] && !en[1]) { return false; } @@ -128,7 +129,7 @@ bool GSRenderer::Merge(int field) m_regs->DISP[0].DISPFB.FBW == m_regs->DISP[1].DISPFB.FBW && m_regs->DISP[0].DISPFB.PSM == m_regs->DISP[1].DISPFB.PSM; - if(samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) + if (samesrc /*&& m_regs->PMODE.SLBG == 0 && m_regs->PMODE.MMOD == 1 && m_regs->PMODE.ALP == 0x80*/) { // persona 4: // @@ -173,38 +174,42 @@ bool GSRenderer::Merge(int field) GSVector2i ds(0, 0); GSTexture* tex[3] = {NULL, NULL, NULL}; - int y_offset[3] = {0, 0, 0}; + int y_offset[3] = {0, 0, 0}; s_n++; bool feedback_merge = m_regs->EXTWRITE.WRITE == 1; - if(samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge) + if (samesrc && fr[0].bottom == fr[1].bottom && !feedback_merge) { - tex[0] = GetOutput(0, y_offset[0]); - tex[1] = tex[0]; // saves one texture fetch + tex[0] = GetOutput(0, y_offset[0]); + tex[1] = tex[0]; // saves one texture fetch y_offset[1] = y_offset[0]; } else { - if(en[0]) tex[0] = GetOutput(0, y_offset[0]); - if(en[1]) tex[1] = GetOutput(1, y_offset[1]); - if(feedback_merge) tex[2] = GetFeedbackOutput(); + if (en[0]) + tex[0] = GetOutput(0, y_offset[0]); + if (en[1]) + tex[1] = GetOutput(1, y_offset[1]); + if (feedback_merge) + tex[2] = GetFeedbackOutput(); } GSVector4 src[2]; GSVector4 src_hw[2]; GSVector4 dst[2]; - for(int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) { - if(!en[i] || !tex[i]) continue; + if (!en[i] || !tex[i]) + continue; GSVector4i r = fr[i]; GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); - src_hw[i] = (GSVector4(r) + GSVector4 (0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy(); + src_hw[i] = (GSVector4(r) + GSVector4(0, y_offset[i], 0, y_offset[i])) * scale / GSVector4(tex[i]->GetSize()).xyxy(); GSVector2 off(0); GSVector2i display_diff(dr[i].left - display_baseline.x, dr[i].top - display_baseline.y); @@ -212,26 +217,26 @@ bool GSRenderer::Merge(int field) // Time Crisis 2/3 uses two side by side images when in split screen mode. // Though ignore cases where baseline and display rectangle offsets only differ by 1 pixel, causes blurring and wrong resolution output on FFXII - if(display_diff.x > 2) + if (display_diff.x > 2) { off.x = tex[i]->GetScale().x * display_diff.x; } // If the DX offset is too small then consider the status of frame memory offsets, prevents blurring on Tenchu: Fatal Shadows, Worms 3D - else if(display_diff.x != frame_diff.x) + else if (display_diff.x != frame_diff.x) { off.x = tex[i]->GetScale().x * frame_diff.x; } - if(display_diff.y >= 4) // Shouldn't this be >= 2? + if (display_diff.y >= 4) // Shouldn't this be >= 2? { off.y = tex[i]->GetScale().y * display_diff.y; - if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) + if (m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { off.y /= 2; } } - else if(display_diff.y != frame_diff.y) + else if (display_diff.y != frame_diff.y) { off.y = tex[i]->GetScale().y * frame_diff.y; } @@ -244,7 +249,7 @@ bool GSRenderer::Merge(int field) ds = fs; - if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) + if (m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { ds.y *= 2; } @@ -252,9 +257,9 @@ bool GSRenderer::Merge(int field) bool slbg = m_regs->PMODE.SLBG; - if(tex[0] || tex[1]) + if (tex[0] || tex[1]) { - if(tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) + if (tex[0] == tex[1] && !slbg && (src[0] == src[1] & dst[0] == dst[1]).alltrue()) { // the two outputs are identical, skip drawing one of them (the one that is alpha blended) @@ -265,9 +270,9 @@ bool GSRenderer::Merge(int field) m_dev->Merge(tex, src_hw, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c); - if(m_regs->SMODE2.INT && m_interlace > 0) + if (m_regs->SMODE2.INT && m_interlace > 0) { - if(m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting + if (m_interlace == 7 && m_regs->SMODE2.FFMD) // Auto interlace enabled / Odd frame interlace setting { int field2 = 0; int mode = 2; @@ -281,17 +286,17 @@ bool GSRenderer::Merge(int field) } } - if(m_shadeboost) + if (m_shadeboost) { m_dev->ShadeBoost(); } - if(m_shaderfx) + if (m_shaderfx) { m_dev->ExternalFX(); } - if(m_fxaa) + if (m_fxaa) { m_dev->FXAA(); } @@ -309,7 +314,8 @@ void GSRenderer::SetVSync(int vsync) { m_vsync = vsync; - if(m_dev) m_dev->SetVSync(m_vsync); + if (m_dev) + m_dev->SetVSync(m_vsync); } void GSRenderer::VSync(int field) @@ -320,14 +326,14 @@ void GSRenderer::VSync(int field) Flush(); - if(s_dump && s_n >= s_saven) + if (s_dump && s_n >= s_saven) { m_regs->Dump(root_sw + format("%05d_f%lld_gs_reg.txt", s_n, m_perfmon.GetFrame())); } - if(!m_dev->IsLost(true)) + if (!m_dev->IsLost(true)) { - if(!Merge(field ? 1 : 0)) + if (!Merge(field ? 1 : 0)) { return; } @@ -341,7 +347,7 @@ void GSRenderer::VSync(int field) // osd - if((m_perfmon.GetFrame() & 0x1f) == 0) + if ((m_perfmon.GetFrame() & 0x1f) == 0) { m_perfmon.Update(); @@ -350,9 +356,9 @@ void GSRenderer::VSync(int field) std::string s; #ifdef GSTITLEINFO_API_FORCE_VERBOSE - if(1)//force verbose reply + if (1) //force verbose reply #else - if(m_wnd->IsManaged()) + if (m_wnd->IsManaged()) #endif { //GSdx owns the window's title, be verbose. @@ -370,18 +376,17 @@ void GSRenderer::VSync(int field) (int)m_perfmon.Get(GSPerfMon::Draw), m_perfmon.CPU(), m_perfmon.Get(GSPerfMon::Swizzle) / 1024, - m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 - ); + m_perfmon.Get(GSPerfMon::Unswizzle) / 1024); double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); - if(fillrate > 0) + if (fillrate > 0) { s += format(" | %.2f mpps", fps * fillrate / (1024 * 1024)); int sum = 0; - for(int i = 0; i < 16; i++) + for (int i = 0; i < 16; i++) { sum += m_perfmon.CPU(GSPerfMon::WorkerDraw0 + i); } @@ -396,12 +401,12 @@ void GSRenderer::VSync(int field) s = format("%dx%d | %s", GetInternalResolution().x, GetInternalResolution().y, theApp.m_gs_interlace[m_interlace].name.c_str()); } - if(m_capture.IsCapturing()) + if (m_capture.IsCapturing()) { s += " | Recording..."; } - if(m_wnd->IsManaged()) + if (m_wnd->IsManaged()) { m_wnd->SetWindowText(s.c_str()); } @@ -426,7 +431,7 @@ void GSRenderer::VSync(int field) // so let's use actual OSD! } - if(m_frameskip) + if (m_frameskip) { return; } @@ -443,9 +448,9 @@ void GSRenderer::VSync(int field) // snapshot - if(!m_snapshot.empty()) + if (!m_snapshot.empty()) { - if(!m_dump && m_shift_key) + if (!m_dump && m_shift_key) { GSFreezeData fd = {0, nullptr}; Freeze(&fd, true); @@ -457,35 +462,35 @@ void GSRenderer::VSync(int field) else m_dump = std::unique_ptr(new GSDumpXz(m_snapshot, m_crc, fd, m_regs)); - delete [] fd.data; + delete[] fd.data; } - if(GSTexture* t = m_dev->GetCurrent()) + if (GSTexture* t = m_dev->GetCurrent()) { t->Save(m_snapshot + ".png"); } m_snapshot.clear(); } - else if(m_dump) + else if (m_dump) { - if(m_dump->VSync(field, !m_control_key, m_regs)) + if (m_dump->VSync(field, !m_control_key, m_regs)) m_dump.reset(); } // capture - if(m_capture.IsCapturing()) + if (m_capture.IsCapturing()) { - if(GSTexture* current = m_dev->GetCurrent()) + if (GSTexture* current = m_dev->GetCurrent()) { GSVector2i size = m_capture.GetSize(); - if(GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) + if (GSTexture* offscreen = m_dev->CopyOffscreen(current, GSVector4(0, 0, 1, 1), size.x, size.y)) { GSTexture::GSMap m; - if(offscreen->Map(m)) + if (offscreen->Map(m)) { m_capture.DeliverFrame(m.bits, m.pitch, !m_dev->IsRBSwapped()); @@ -552,7 +557,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e) m_shift_key = !!(::GetAsyncKeyState(VK_SHIFT) & 0x8000); m_control_key = !!(::GetAsyncKeyState(VK_CONTROL) & 0x8000); #else - switch(e->key) + switch (e->key) { case XK_Shift_L: case XK_Shift_R: @@ -565,7 +570,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e) } #endif - if(e->type == KEYPRESS) + if (e->type == KEYPRESS) { int step = m_shift_key ? -1 : 1; @@ -580,44 +585,43 @@ void GSRenderer::KeyEvent(GSKeyEventData* e) #define VK_HOME XK_Home #endif - switch(e->key) + switch (e->key) { - case VK_F5: - m_interlace = (m_interlace + s_interlace_nb + step) % s_interlace_nb; - theApp.SetConfig("interlace", m_interlace); - printf("GSdx: Set deinterlace mode to %d (%s).\n", m_interlace, theApp.m_gs_interlace.at(m_interlace).name.c_str()); - return; - case VK_F6: - if( m_wnd->IsManaged() ) - m_aspectratio = (m_aspectratio + s_aspect_ratio_nb + step) % s_aspect_ratio_nb; - return; - case VK_DELETE: - m_aa1 = !m_aa1; - theApp.SetConfig("aa1", m_aa1); - printf("GSdx: (Software) Edge anti-aliasing is now %s.\n", m_aa1 ? "enabled" : "disabled"); - return; - case VK_INSERT: - m_mipmap = (m_mipmap + s_mipmap_nb + step) % s_mipmap_nb; - theApp.SetConfig("mipmap_hw", m_mipmap); - printf("GSdx: Mipmapping is now %s.\n", theApp.m_gs_hack.at(m_mipmap).name.c_str()); - return; - case VK_PRIOR: - m_fxaa = !m_fxaa; - theApp.SetConfig("fxaa", m_fxaa); - printf("GSdx: FXAA anti-aliasing is now %s.\n", m_fxaa ? "enabled" : "disabled"); - return; - case VK_HOME: - m_shaderfx = !m_shaderfx; - theApp.SetConfig("shaderfx", m_shaderfx); - printf("GSdx: External post-processing is now %s.\n", m_shaderfx ? "enabled" : "disabled"); - return; - case VK_NEXT: // As requested by Prafull, to be removed later - char dither_msg[3][16] = {"disabled", "auto", "auto unscaled"}; - m_dithering = (m_dithering+1)%3; - printf("GSdx: Dithering is now %s.\n", dither_msg[m_dithering]); - return; + case VK_F5: + m_interlace = (m_interlace + s_interlace_nb + step) % s_interlace_nb; + theApp.SetConfig("interlace", m_interlace); + printf("GSdx: Set deinterlace mode to %d (%s).\n", m_interlace, theApp.m_gs_interlace.at(m_interlace).name.c_str()); + return; + case VK_F6: + if (m_wnd->IsManaged()) + m_aspectratio = (m_aspectratio + s_aspect_ratio_nb + step) % s_aspect_ratio_nb; + return; + case VK_DELETE: + m_aa1 = !m_aa1; + theApp.SetConfig("aa1", m_aa1); + printf("GSdx: (Software) Edge anti-aliasing is now %s.\n", m_aa1 ? "enabled" : "disabled"); + return; + case VK_INSERT: + m_mipmap = (m_mipmap + s_mipmap_nb + step) % s_mipmap_nb; + theApp.SetConfig("mipmap_hw", m_mipmap); + printf("GSdx: Mipmapping is now %s.\n", theApp.m_gs_hack.at(m_mipmap).name.c_str()); + return; + case VK_PRIOR: + m_fxaa = !m_fxaa; + theApp.SetConfig("fxaa", m_fxaa); + printf("GSdx: FXAA anti-aliasing is now %s.\n", m_fxaa ? "enabled" : "disabled"); + return; + case VK_HOME: + m_shaderfx = !m_shaderfx; + theApp.SetConfig("shaderfx", m_shaderfx); + printf("GSdx: External post-processing is now %s.\n", m_shaderfx ? "enabled" : "disabled"); + return; + case VK_NEXT: // As requested by Prafull, to be removed later + char dither_msg[3][16] = {"disabled", "auto", "auto unscaled"}; + m_dithering = (m_dithering + 1) % 3; + printf("GSdx: Dithering is now %s.\n", dither_msg[m_dithering]); + return; } - } } diff --git a/plugins/GSdx/Renderers/Common/GSRenderer.h b/plugins/GSdx/Renderers/Common/GSRenderer.h index 111ca65547..00a5e44136 100644 --- a/plugins/GSdx/Renderers/Common/GSRenderer.h +++ b/plugins/GSdx/Renderers/Common/GSRenderer.h @@ -65,11 +65,11 @@ public: virtual void VSync(int field); virtual bool MakeSnapshot(const std::string& path); virtual void KeyEvent(GSKeyEventData* e); - virtual bool CanUpscale() {return false;} - virtual int GetUpscaleMultiplier() {return 1;} - virtual GSVector2i GetCustomResolution() {return GSVector2i(0,0);} + virtual bool CanUpscale() { return false; } + virtual int GetUpscaleMultiplier() { return 1; } + virtual GSVector2i GetCustomResolution() { return GSVector2i(0, 0); } GSVector2i GetInternalResolution(); - void SetAspectRatio(int aspect) {m_aspectratio = aspect;} + void SetAspectRatio(int aspect) { m_aspectratio = aspect; } void SetVSync(int vsync); virtual bool BeginCapture(std::string& filename); diff --git a/plugins/GSdx/Renderers/Common/GSTexture.h b/plugins/GSdx/Renderers/Common/GSTexture.h index fe705a66a1..a7dfadcff7 100644 --- a/plugins/GSdx/Renderers/Common/GSTexture.h +++ b/plugins/GSdx/Renderers/Common/GSTexture.h @@ -35,15 +35,32 @@ protected: bool m_sparse; public: - struct GSMap {uint8* bits; int pitch;}; + struct GSMap + { + uint8* bits; + int pitch; + }; - enum {RenderTarget = 1, DepthStencil, Texture, Offscreen, Backbuffer, SparseRenderTarget, SparseDepthStencil}; + enum + { + RenderTarget = 1, + DepthStencil, + Texture, + Offscreen, + Backbuffer, + SparseRenderTarget, + SparseDepthStencil + }; public: GSTexture(); virtual ~GSTexture() {} - virtual operator bool() {ASSERT(0); return false;} + virtual operator bool() + { + ASSERT(0); + return false; + } virtual bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) = 0; virtual bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) = 0; @@ -52,17 +69,17 @@ public: virtual bool Save(const std::string& fn) = 0; virtual uint32 GetID() { return 0; } - GSVector2 GetScale() const {return m_scale;} - void SetScale(const GSVector2& scale) {m_scale = scale;} + GSVector2 GetScale() const { return m_scale; } + void SetScale(const GSVector2& scale) { m_scale = scale; } - int GetWidth() const {return m_size.x;} - int GetHeight() const {return m_size.y;} - GSVector2i GetSize() const {return m_size;} + int GetWidth() const { return m_size.x; } + int GetHeight() const { return m_size.y; } + GSVector2i GetSize() const { return m_size; } - int GetType() const {return m_type;} - int GetFormat() const {return m_format;} + int GetType() const { return m_type; } + int GetFormat() const { return m_format; } - virtual void CommitPages(const GSVector2i& region, bool commit) {}; + virtual void CommitPages(const GSVector2i& region, bool commit) {} void CommitRegion(const GSVector2i& region); void Commit(); void Uncommit(); diff --git a/plugins/GSdx/Renderers/Common/GSVertex.h b/plugins/GSdx/Renderers/Common/GSVertex.h index ee1d4a2a15..cea0126b49 100644 --- a/plugins/GSdx/Renderers/Common/GSVertex.h +++ b/plugins/GSdx/Renderers/Common/GSVertex.h @@ -34,11 +34,11 @@ struct alignas(32) GSVertex { struct { - GIFRegST ST; // S:0, T:4 + GIFRegST ST; // S:0, T:4 GIFRegRGBAQ RGBAQ; // RGBA:8, Q:12 - GIFRegXYZ XYZ; // XY:16, Z:20 - union {uint32 UV; struct {uint16 U, V;};}; // UV:24 - uint32 FOG; // FOG:28 + GIFRegXYZ XYZ; // XY:16, Z:20 + union { uint32 UV; struct { uint16 U, V; }; }; // UV:24 + uint32 FOG; // FOG:28 }; #if _M_SSE >= 0x500 @@ -50,11 +50,22 @@ struct alignas(32) GSVertex GSVertex() = default; // Warning object is potentially used in hot path #if _M_SSE >= 0x500 - GSVertex(const GSVertex& v) {mx = v.mx;} - void operator = (const GSVertex& v) {mx = v.mx;} + GSVertex(const GSVertex& v) + { + mx = v.mx; + } + void operator=(const GSVertex& v) { mx = v.mx; } #else - GSVertex(const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];} - void operator = (const GSVertex& v) {m[0] = v.m[0]; m[1] = v.m[1];} + GSVertex(const GSVertex& v) + { + m[0] = v.m[0]; + m[1] = v.m[1]; + } + void operator=(const GSVertex& v) + { + m[0] = v.m[0]; + m[1] = v.m[1]; + } #endif }; @@ -68,7 +79,7 @@ struct alignas(32) GSVertexPT1 GSVector4 p; GSVector2 t; char pad[4]; - union {uint32 c; struct {uint8 r, g, b, a;};}; + union { uint32 c; struct { uint8 r, g, b, a; }; }; }; struct GSVertexPT2 diff --git a/plugins/GSdx/Renderers/Common/GSVertexList.h b/plugins/GSdx/Renderers/Common/GSVertexList.h index bd0b016d4c..2f9c90e5e5 100644 --- a/plugins/GSdx/Renderers/Common/GSVertexList.h +++ b/plugins/GSdx/Renderers/Common/GSVertexList.h @@ -21,7 +21,8 @@ #pragma once -template class GSVertexList +template +class GSVertexList { void* m_base; Vertex* m_v[3]; @@ -33,7 +34,7 @@ public: { m_base = _aligned_malloc(sizeof(Vertex) * countof(m_v), 32); - for(size_t i = 0; i < countof(m_v); i++) + for (size_t i = 0; i < countof(m_v); i++) { m_v[i] = &((Vertex*)m_base)[i]; } @@ -58,13 +59,13 @@ public: __forceinline void RemoveAt(int pos, int keep) { - if(keep == 1) + if (keep == 1) { Vertex* tmp = m_v[pos + 0]; m_v[pos + 0] = m_v[pos + 1]; m_v[pos + 1] = tmp; } - else if(keep == 2) + else if (keep == 2) { Vertex* tmp = m_v[pos + 0]; m_v[pos + 0] = m_v[pos + 1]; diff --git a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp index b093811c7e..d34f28cd9c 100644 --- a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp +++ b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp @@ -68,7 +68,8 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, // Potential float overflow detected. Better uses the slower division instead // Note: If Q is too big, 1/Q will end up as 0. 1e30 is a random number // that feel big enough. - if (!fst && !m_accurate_stq && m_min.t.z > 1e30) { + if (!fst && !m_accurate_stq && m_min.t.z > 1e30) + { fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z); m_accurate_stq = true; (this->*m_fmm[m_accurate_stq][color][fst][tme][iip][primclass])(vertex, index, i_count); @@ -79,18 +80,19 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, m_alpha.valid = false; // I'm not sure of the cost. In doubt let's do it only when depth is enabled - if(m_state->m_context->TEST.ZTE == 1 && m_state->m_context->TEST.ZTST > ZTST_ALWAYS) { + if (m_state->m_context->TEST.ZTE == 1 && m_state->m_context->TEST.ZTST > ZTST_ALWAYS) + { CorrectDepthTrace(vertex, v_count); } - if(m_state->PRIM->TME) + if (m_state->PRIM->TME) { const GIFRegTEX1& TEX1 = m_state->m_context->TEX1; m_filter.mmag = TEX1.IsMagLinear(); m_filter.mmin = TEX1.IsMinLinear(); - if(TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2 + if (TEX1.MXL == 0) // MXL == 0 => MMIN ignored, tested it on ps2 { m_filter.linear = m_filter.mmag; } @@ -98,13 +100,18 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, { float K = (float)TEX1.K / 16; - if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated + if (TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated { // LOD = log2(1/|Q|) * (1 << L) + K GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K); - if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;} + if (m_lod.x > m_lod.y) + { + float tmp = m_lod.x; + m_lod.x = m_lod.y; + m_lod.y = tmp; + } } else { @@ -112,11 +119,11 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, m_lod.y = K; } - if(m_lod.y <= 0) + if (m_lod.y <= 0) { m_filter.linear = m_filter.mmag; } - else if(m_lod.x > 0) + else if (m_lod.x > 0) { m_filter.linear = m_filter.mmin; } @@ -149,25 +156,25 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, } } -template +template void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count) { const GSDrawingContext* context = m_state->m_context; int n = 1; - switch(primclass) + switch (primclass) { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; + case GS_POINT_CLASS: + n = 1; + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + n = 2; + break; + case GS_TRIANGLE_CLASS: + n = 3; + break; } GSVector4 tmin = s_minmax.xxxx(); @@ -180,21 +187,21 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun const GSVertex* RESTRICT v = (GSVertex*)vertex; - for(int i = 0; i < count; i += n) + for (int i = 0; i < count; i += n) { - if(primclass == GS_POINT_CLASS) + if (primclass == GS_POINT_CLASS) { GSVector4i c(v[index[i]].m[0]); - if(color) + if (color) { cmin = cmin.min_u8(c); cmax = cmax.max_u8(c); } - if(tme) + if (tme) { - if(!fst) + if (!fst) { GSVector4 stq = GSVector4::cast(c); @@ -229,14 +236,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun pmin = pmin.min_u32(p); pmax = pmax.max_u32(p); } - else if(primclass == GS_LINE_CLASS) + else if (primclass == GS_LINE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); - if(color) + if (color) { - if(iip) + if (iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); @@ -248,14 +255,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun } } - if(tme) + if (tme) { - if(!fst) + if (!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); - if(accurate_stq) + if (accurate_stq) { GSVector4 q = stq0.wwww(stq1); @@ -300,15 +307,15 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun pmin = pmin.min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p0.max_u32(p1)); } - else if(primclass == GS_TRIANGLE_CLASS) + else if (primclass == GS_TRIANGLE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); GSVector4i c2(v[index[i + 2]].m[0]); - if(color) + if (color) { - if(iip) + if (iip) { cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1)); @@ -320,15 +327,15 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun } } - if(tme) + if (tme) { - if(!fst) + if (!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); GSVector4 stq2 = GSVector4::cast(c2); - if(accurate_stq) + if (accurate_stq) { GSVector4 q = stq0.wwww(stq1).xzww(stq2); @@ -381,14 +388,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1)); pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1)); } - else if(primclass == GS_SPRITE_CLASS) + else if (primclass == GS_SPRITE_CLASS) { GSVector4i c0(v[index[i + 0]].m[0]); GSVector4i c1(v[index[i + 1]].m[0]); - if(color) + if (color) { - if(iip) + if (iip) { cmin = cmin.min_u8(c0.min_u8(c1)); cmax = cmax.max_u8(c0.max_u8(c1)); @@ -400,14 +407,14 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun } } - if(tme) + if (tme) { - if(!fst) + if (!fst) { GSVector4 stq0 = GSVector4::cast(c0); GSVector4 stq1 = GSVector4::cast(c1); - if(accurate_stq) + if (accurate_stq) { GSVector4 q = stq1.wwww(); @@ -468,9 +475,9 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun m_min.p = (GSVector4(pmin) - o) * s; m_max.p = (GSVector4(pmax) - o) * s; - if(tme) + if (tme) { - if(fst) + if (fst) { s = GSVector4(1.0f / 16, 1.0f).xxyy(); } @@ -488,7 +495,7 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun m_max.t = GSVector4::zero(); } - if(color) + if (color) { m_min.c = cmin.zzzz().u8to32(); m_max.c = cmax.zzzz().u8to32(); @@ -518,21 +525,29 @@ void GSVertexTrace::CorrectDepthTrace(const void* vertex, int count) uint32 z = v[0].XYZ.Z; // ought to check only 1/2 for sprite - if (z & 1) { + if (z & 1) + { // Check that first bit is always 1 - for (int i = 0; i < count; i++) { + for (int i = 0; i < count; i++) + { z &= v[i].XYZ.Z; } - } else { + } + else + { // Check that first bit is always 0 - for (int i = 0; i < count; i++) { + for (int i = 0; i < count; i++) + { z |= v[i].XYZ.Z; } } - if (z == v[0].XYZ.Z) { + if (z == v[0].XYZ.Z) + { m_eq.z = 1; - } else { + } + else + { m_eq.z = 0; } } diff --git a/plugins/GSdx/Renderers/Common/GSVertexTrace.h b/plugins/GSdx/Renderers/Common/GSVertexTrace.h index dede475c38..83104d88ca 100644 --- a/plugins/GSdx/Renderers/Common/GSVertexTrace.h +++ b/plugins/GSdx/Renderers/Common/GSVertexTrace.h @@ -34,8 +34,16 @@ class alignas(32) GSVertexTrace : public GSAlignedClass<32> BiFiltering m_force_filter; public: - struct Vertex {GSVector4i c; GSVector4 p, t;}; - struct VertexAlpha {int min, max; bool valid;}; + struct Vertex + { + GSVector4i c; + GSVector4 p, t; + }; + struct VertexAlpha + { + int min, max; + bool valid; + }; bool m_accurate_stq; protected: @@ -47,7 +55,7 @@ protected: FindMinMaxPtr m_fmm[2][2][2][2][2][4]; - template + template void FindMinMax(const void* vertex, const uint32* index, int count); public: @@ -60,13 +68,13 @@ public: union { uint32 value; - struct {uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1;}; - struct {uint32 rgba:16, xyzf:4, stq:4;}; + struct { uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1; }; + struct { uint32 rgba:16, xyzf:4, stq:4; }; } m_eq; union { - struct {uint32 mmag:1, mmin:1, linear:1, opt_linear:1;}; + struct { uint32 mmag:1, mmin:1, linear:1, opt_linear:1; }; } m_filter; GSVector2 m_lod; // x = min, y = max @@ -77,8 +85,8 @@ public: void Update(const void* vertex, const uint32* index, int v_count, int i_count, GS_PRIM_CLASS primclass); - bool IsLinear() const {return m_filter.opt_linear;} - bool IsRealLinear() const {return m_filter.linear;} + bool IsLinear() const { return m_filter.opt_linear; } + bool IsRealLinear() const { return m_filter.linear; } void CorrectDepthTrace(const void* vertex, int count); }; diff --git a/plugins/GSdx/Renderers/DX11/GSDevice11.cpp b/plugins/GSdx/Renderers/DX11/GSDevice11.cpp index 73bb87189b..1d05689700 100644 --- a/plugins/GSdx/Renderers/DX11/GSDevice11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSDevice11.cpp @@ -54,40 +54,40 @@ bool GSDevice11::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode) switch (level) { - case D3D_FEATURE_LEVEL_10_0: - m_shader.model = "0x400"; - m_shader.vs = "vs_4_0"; - m_shader.gs = "gs_4_0"; - m_shader.ps = "ps_4_0"; - m_shader.cs = "cs_4_0"; - break; - case D3D_FEATURE_LEVEL_10_1: - m_shader.model = "0x401"; - m_shader.vs = "vs_4_1"; - m_shader.gs = "gs_4_1"; - m_shader.ps = "ps_4_1"; - m_shader.cs = "cs_4_1"; - break; - case D3D_FEATURE_LEVEL_11_0: - m_shader.model = "0x500"; - m_shader.vs = "vs_5_0"; - m_shader.gs = "gs_5_0"; - m_shader.ps = "ps_5_0"; - m_shader.cs = "cs_5_0"; - break; - default: - ASSERT(0); - return false; + case D3D_FEATURE_LEVEL_10_0: + m_shader.model = "0x400"; + m_shader.vs = "vs_4_0"; + m_shader.gs = "gs_4_0"; + m_shader.ps = "ps_4_0"; + m_shader.cs = "cs_4_0"; + break; + case D3D_FEATURE_LEVEL_10_1: + m_shader.model = "0x401"; + m_shader.vs = "vs_4_1"; + m_shader.gs = "gs_4_1"; + m_shader.ps = "ps_4_1"; + m_shader.cs = "cs_4_1"; + break; + case D3D_FEATURE_LEVEL_11_0: + m_shader.model = "0x500"; + m_shader.vs = "vs_5_0"; + m_shader.gs = "gs_5_0"; + m_shader.ps = "ps_5_0"; + m_shader.cs = "cs_5_0"; + break; + default: + ASSERT(0); + return false; } return true; } -bool GSDevice11::Create(const std::shared_ptr &wnd) +bool GSDevice11::Create(const std::shared_ptr& wnd) { bool nvidia_vendor = false; - if(!__super::Create(wnd)) + if (!__super::Create(wnd)) { return false; } @@ -160,8 +160,7 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) const HRESULT result = D3D11CreateDevice( adapter, driver_type, nullptr, flags, supported_levels.data(), supported_levels.size(), - D3D11_SDK_VERSION, &m_dev, &level, &m_ctx - ); + D3D11_SDK_VERSION, &m_dev, &level, &m_ctx); if (FAILED(result)) { @@ -189,8 +188,7 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) const HRESULT result = m_factory->CreateSwapChainForHwnd( m_dev, reinterpret_cast(m_wnd->GetHandle()), - &swapchain_description, nullptr, nullptr, &m_swapchain - ); + &swapchain_description, nullptr, nullptr, &m_swapchain); if (FAILED(result)) { @@ -199,7 +197,7 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) } } - if(!SetFeatureLevel(level, true)) + if (!SetFeatureLevel(level, true)) return false; // Set maximum texture size limit based on supported feature level. @@ -208,7 +206,8 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) else m_d3d_texsize = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; - { // HACK: check nVIDIA + { + // HACK: check nVIDIA // Note: It can cause issues on several games such as SOTC, Fatal Frame, plus it adds border offset. bool disable_safe_features = theApp.GetConfigB("UserHacks") && theApp.GetConfigB("UserHacks_Disable_Safe_Features"); m_hack_topleft_offset = (m_upscale_multiplier != 1 && nvidia_vendor && !disable_safe_features) ? -0.01f : 0.0f; @@ -256,9 +255,9 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) D3D_SHADER_MACRO* sm_convert_ptr = sm_convert.GetPtr(); - for(size_t i = 0; i < countof(m_convert.ps); i++) + for (size_t i = 0; i < countof(m_convert.ps); i++) { - CreateShader(shader, "convert.fx", nullptr, format("ps_main%d", i).c_str(), sm_convert_ptr, & m_convert.ps[i]); + CreateShader(shader, "convert.fx", nullptr, format("ps_main%d", i).c_str(), sm_convert_ptr, &m_convert.ps[i]); } memset(&dsd, 0, sizeof(dsd)); @@ -288,7 +287,7 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) hr = m_dev->CreateBuffer(&bd, NULL, &m_merge.cb); theApp.LoadResource(IDR_MERGE_FX, shader); - for(size_t i = 0; i < countof(m_merge.ps); i++) + for (size_t i = 0; i < countof(m_merge.ps); i++) { CreateShader(shader, "merge.fx", nullptr, format("ps_main%d", i).c_str(), sm_model.GetPtr(), &m_merge.ps[i]); } @@ -317,7 +316,7 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) hr = m_dev->CreateBuffer(&bd, NULL, &m_interlace.cb); theApp.LoadResource(IDR_INTERLACE_FX, shader); - for(size_t i = 0; i < countof(m_interlace.ps); i++) + for (size_t i = 0; i < countof(m_interlace.ps); i++) { CreateShader(shader, "interlace.fx", nullptr, format("ps_main%d", i).c_str(), sm_model.GetPtr(), &m_interlace.ps[i]); } @@ -435,18 +434,17 @@ bool GSDevice11::Create(const std::shared_ptr &wnd) GSVector2i tex_font = m_osd.get_texture_font_size(); m_font = std::unique_ptr( - CreateSurface(GSTexture::Texture, tex_font.x, tex_font.y, DXGI_FORMAT_R8_UNORM) - ); + CreateSurface(GSTexture::Texture, tex_font.x, tex_font.y, DXGI_FORMAT_R8_UNORM)); return true; } bool GSDevice11::Reset(int w, int h) { - if(!__super::Reset(w, h)) + if (!__super::Reset(w, h)) return false; - if(m_swapchain) + if (m_swapchain) { DXGI_SWAP_CHAIN_DESC scd; @@ -457,7 +455,7 @@ bool GSDevice11::Reset(int w, int h) CComPtr backbuffer; - if(FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer))) + if (FAILED(m_swapchain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**)&backbuffer))) { return false; } @@ -553,13 +551,15 @@ void GSDevice11::DrawIndexedPrimitive(int offset, int count) void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c) { - if (!t) return; + if (!t) + return; m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v); } void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c) { - if (!t) return; + if (!t) + return; GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255); m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v); @@ -567,13 +567,15 @@ void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c) void GSDevice11::ClearDepth(GSTexture* t) { - if (!t) return; + if (!t) + return; m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_DEPTH, 0.0f, 0); } void GSDevice11::ClearStencil(GSTexture* t, uint8 c) { - if (!t) return; + if (!t) + return; m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_STENCIL, 0, c); } @@ -597,24 +599,24 @@ GSTexture* GSDevice11::CreateSurface(int type, int w, int h, int format) // mipmap = m_mipmap > 1 || m_filter != TriFiltering::None; bool mipmap = m_mipmap > 1; - int layers = mipmap && format == DXGI_FORMAT_R8G8B8A8_UNORM ? (int)log2(std::max(w,h)) : 1; + int layers = mipmap && format == DXGI_FORMAT_R8G8B8A8_UNORM ? (int)log2(std::max(w, h)) : 1; - switch(type) + switch (type) { - case GSTexture::RenderTarget: - desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; - break; - case GSTexture::DepthStencil: - desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE; - break; - case GSTexture::Texture: - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.MipLevels = layers; - break; - case GSTexture::Offscreen: - desc.Usage = D3D11_USAGE_STAGING; - desc.CPUAccessFlags |= D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - break; + case GSTexture::RenderTarget: + desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; + break; + case GSTexture::DepthStencil: + desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE; + break; + case GSTexture::Texture: + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.MipLevels = layers; + break; + case GSTexture::Offscreen: + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags |= D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + break; } GSTexture11* t = NULL; @@ -623,18 +625,18 @@ GSTexture* GSDevice11::CreateSurface(int type, int w, int h, int format) hr = m_dev->CreateTexture2D(&desc, NULL, &texture); - if(SUCCEEDED(hr)) + if (SUCCEEDED(hr)) { t = new GSTexture11(texture); - switch(type) + switch (type) { - case GSTexture::RenderTarget: - ClearRenderTarget(t, 0); - break; - case GSTexture::DepthStencil: - ClearDepth(t); - break; + case GSTexture::RenderTarget: + ClearRenderTarget(t, 0); + break; + case GSTexture::DepthStencil: + ClearDepth(t); + break; } } else @@ -657,14 +659,14 @@ GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int { GSTexture* dst = NULL; - if(format == 0) + if (format == 0) { format = DXGI_FORMAT_R8G8B8A8_UNORM; } ASSERT(format == DXGI_FORMAT_R8G8B8A8_UNORM || format == DXGI_FORMAT_R16_UINT || format == DXGI_FORMAT_R32_UINT); - if(GSTexture* rt = CreateRenderTarget(w, h, format)) + if (GSTexture* rt = CreateRenderTarget(w, h, format)) { GSVector4 dRect(0, 0, w, h); @@ -672,7 +674,7 @@ GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int dst = CreateOffscreen(w, h, format); - if(dst) + if (dst) { m_ctx->CopyResource(*(GSTexture11*)dst, *(GSTexture11*)rt); } @@ -691,7 +693,7 @@ void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) return; } - D3D11_BOX box = { (UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U }; + D3D11_BOX box = {(UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U}; // DX api isn't happy if we pass a box for depth copy // It complains that depth/multisample must be a full copy @@ -750,16 +752,16 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[ShaderConvert_COPY], nullptr, bs, false); } -void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs , bool linear) +void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear) { - if(!sTex || !dTex) + if (!sTex || !dTex) { ASSERT(0); return; } - bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24] || - ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]); + bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24] + || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]); BeginScene(); @@ -767,7 +769,7 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* // om - + if (draw_in_depth) OMSetDepthStencilState(m_convert.dss_write, 0); else @@ -837,7 +839,8 @@ void GSDevice11::RenderOsd(GSTexture* dt) OMSetBlendState(m_merge.bs, 0); OMSetRenderTargets(dt, NULL); - if(m_osd.m_texture_dirty) { + if (m_osd.m_texture_dirty) + { m_osd.upload_texture_atlas(m_font.get()); } @@ -876,12 +879,12 @@ void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, ClearRenderTarget(dTex, c); - if(sTex[1] && !slbg) + if (sTex[1] && !slbg) { StretchRect(sTex[1], sRect[1], dTex, dRect[1], m_merge.ps[0], NULL, true); } - if(sTex[0]) + if (sTex[0]) { m_ctx->UpdateSubresource(m_merge.cb, 0, NULL, &c, 0, 0); @@ -911,7 +914,8 @@ void GSDevice11::InitExternalFX() { if (!ExShader_Compiled) { - try { + try + { std::string config_name(theApp.GetConfigS("shaderfx_conf")); std::ifstream fconfig(config_name); std::stringstream shader; @@ -935,7 +939,8 @@ void GSDevice11::InitExternalFX() fprintf(stderr, "GSdx: External shader '%s' not loaded and will be disabled!\n", shader_name.c_str()); } } - catch (GSDXRecoverableError) { + catch (GSDXRecoverableError) + { printf("GSdx: failed to compile external post-processing shader. \n"); } ExShader_Compiled = true; @@ -968,13 +973,15 @@ void GSDevice11::InitFXAA() { if (!FXAA_Compiled) { - try { + try + { std::vector shader; theApp.LoadResource(IDR_FXAA_FX, shader); ShaderMacro sm(m_shader.model); CreateShader(shader, "fxaa.fx", nullptr, "ps_main", sm.GetPtr(), &m_fxaa.ps); } - catch (GSDXRecoverableError) { + catch (GSDXRecoverableError) + { printf("GSdx: failed to compile fxaa shader.\n"); } FXAA_Compiled = true; @@ -1066,7 +1073,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou { void* ptr = NULL; - if(IAMapVertexBuffer(&ptr, stride, count)) + if (IAMapVertexBuffer(&ptr, stride, count)) { GSVector4i::storent(ptr, vertex, count * stride); @@ -1078,7 +1085,7 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) { ASSERT(m_vertex.count == 0); - if(count * stride > m_vertex.limit * m_vertex.stride) + if (count * stride > m_vertex.limit * m_vertex.stride) { m_vb_old = m_vb; m_vb = NULL; @@ -1087,7 +1094,7 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) m_vertex.limit = std::max(count * 3 / 2, 11000); } - if(m_vb == NULL) + if (m_vb == NULL) { D3D11_BUFFER_DESC bd; @@ -1102,12 +1109,13 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); - if(FAILED(hr)) return false; + if (FAILED(hr)) + return false; } D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; - if(m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride) + if (m_vertex.start + count > m_vertex.limit || stride != m_vertex.stride) { m_vertex.start = 0; @@ -1116,7 +1124,7 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count) D3D11_MAPPED_SUBRESOURCE m; - if(FAILED(m_ctx->Map(m_vb, 0, type, 0, &m))) + if (FAILED(m_ctx->Map(m_vb, 0, type, 0, &m))) { return false; } @@ -1138,7 +1146,7 @@ void GSDevice11::IAUnmapVertexBuffer() void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) { - if(m_state.vb != vb || m_state.vb_stride != stride) + if (m_state.vb != vb || m_state.vb_stride != stride) { m_state.vb = vb; m_state.vb_stride = stride; @@ -1154,7 +1162,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count) { ASSERT(m_index.count == 0); - if(count > m_index.limit) + if (count > m_index.limit) { m_ib_old = m_ib; m_ib = NULL; @@ -1163,7 +1171,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count) m_index.limit = std::max(count * 3 / 2, 11000); } - if(m_ib == NULL) + if (m_ib == NULL) { D3D11_BUFFER_DESC bd; @@ -1178,12 +1186,13 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count) hr = m_dev->CreateBuffer(&bd, NULL, &m_ib); - if(FAILED(hr)) return; + if (FAILED(hr)) + return; } D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE; - if(m_index.start + count > m_index.limit) + if (m_index.start + count > m_index.limit) { m_index.start = 0; @@ -1192,7 +1201,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count) D3D11_MAPPED_SUBRESOURCE m; - if(SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m))) + if (SUCCEEDED(m_ctx->Map(m_ib, 0, type, 0, &m))) { memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32)); @@ -1206,7 +1215,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count) void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib) { - if(m_state.ib != ib) + if (m_state.ib != ib) { m_state.ib = ib; @@ -1216,7 +1225,7 @@ void GSDevice11::IASetIndexBuffer(ID3D11Buffer* ib) void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) { - if(m_state.layout != layout) + if (m_state.layout != layout) { m_state.layout = layout; @@ -1226,7 +1235,7 @@ void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology) { - if(m_state.topology != topology) + if (m_state.topology != topology) { m_state.topology = topology; @@ -1236,14 +1245,14 @@ void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology) void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) { - if(m_state.vs != vs) + if (m_state.vs != vs) { m_state.vs = vs; m_ctx->VSSetShader(vs, NULL, 0); } - if(m_state.vs_cb != vs_cb) + if (m_state.vs_cb != vs_cb) { m_state.vs_cb = vs_cb; @@ -1253,7 +1262,7 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) void GSDevice11::GSSetShader(ID3D11GeometryShader* gs, ID3D11Buffer* gs_cb) { - if(m_state.gs != gs) + if (m_state.gs != gs) { m_state.gs = gs; @@ -1273,7 +1282,7 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) PSSetShaderResource(0, sr0); PSSetShaderResource(1, sr1); - for(size_t i = 2; i < m_state.ps_sr_views.size(); i++) + for (size_t i = 2; i < m_state.ps_sr_views.size(); i++) { PSSetShaderResource(i, NULL); } @@ -1283,7 +1292,8 @@ void GSDevice11::PSSetShaderResource(int i, GSTexture* sr) { ID3D11ShaderResourceView* srv = NULL; - if(sr) srv = *(GSTexture11*)sr; + if (sr) + srv = *(GSTexture11*)sr; PSSetShaderResourceView(i, srv, sr); } @@ -1292,7 +1302,7 @@ void GSDevice11::PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv, G { ASSERT(i < (int)m_state.ps_sr_views.size()); - if(m_state.ps_sr_views[i] != srv) + if (m_state.ps_sr_views[i] != srv) { m_state.ps_sr_views[i] = srv; m_state.ps_sr_texture[i] = (GSTexture11*)sr; @@ -1302,7 +1312,7 @@ void GSDevice11::PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv, G void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1) { - if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1) + if (m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1) { m_state.ps_ss[0] = ss0; m_state.ps_ss[1] = ss1; @@ -1311,14 +1321,14 @@ void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb) { - if(m_state.ps != ps) + if (m_state.ps != ps) { m_state.ps = ps; m_ctx->PSSetShader(ps, NULL, 0); } - if(m_state.ps_cb != ps_cb) + if (m_state.ps_cb != ps_cb) { m_state.ps_cb = ps_cb; @@ -1334,7 +1344,7 @@ void GSDevice11::PSUpdateShaderState() void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref) { - if(m_state.dss != dss || m_state.sref != sref) + if (m_state.dss != dss || m_state.sref != sref) { m_state.dss = dss; m_state.sref = sref; @@ -1345,7 +1355,7 @@ void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, float bf) { - if(m_state.bs != bs || m_state.bf != bf) + if (m_state.bs != bs || m_state.bf != bf) { m_state.bs = bs; m_state.bf = bf; @@ -1364,10 +1374,10 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector if (!rt && !ds) throw GSDXRecoverableError(); - if(rt) rtv = *(GSTexture11*)rt; - if(ds) dsv = *(GSTexture11*)ds; + if (rt) rtv = *(GSTexture11*)rt; + if (ds) dsv = *(GSTexture11*)ds; - if(m_state.rt_view != rtv || m_state.dsv != dsv) + if (m_state.rt_view != rtv || m_state.dsv != dsv) { m_state.rt_view = rtv; m_state.rt_texture = static_cast(rt); @@ -1378,7 +1388,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector } GSVector2i size = rt ? rt->GetSize() : ds->GetSize(); - if(m_state.viewport != size) + if (m_state.viewport != size) { m_state.viewport = size; @@ -1397,7 +1407,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy(); - if(!m_state.scissor.eq(r)) + if (!m_state.scissor.eq(r)) { m_state.scissor = r; @@ -1436,14 +1446,14 @@ void GSDevice11::CreateShader(const std::vector& source, const char* fn, I hr = m_dev->CreateVertexShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, vs); - if(FAILED(hr)) + if (FAILED(hr)) { throw GSDXRecoverableError(); } hr = m_dev->CreateInputLayout(layout, count, shader->GetBufferPointer(), shader->GetBufferSize(), il); - if(FAILED(hr)) + if (FAILED(hr)) { throw GSDXRecoverableError(); } @@ -1459,7 +1469,7 @@ void GSDevice11::CreateShader(const std::vector& source, const char* fn, I hr = m_dev->CreateGeometryShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, gs); - if(FAILED(hr)) + if (FAILED(hr)) { throw GSDXRecoverableError(); } @@ -1475,7 +1485,7 @@ void GSDevice11::CreateShader(const std::vector& source, const char* fn, I hr = m_dev->CreatePixelShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, ps); - if(FAILED(hr)) + if (FAILED(hr)) { throw GSDXRecoverableError(); } @@ -1494,8 +1504,7 @@ void GSDevice11::CompileShader(const std::vector& source, const char* fn, const HRESULT hr = D3DCompile( source.data(), source.size(), fn, macro, include, entry, shader_model.c_str(), - flags, 0, shader, &error - ); + flags, 0, shader, &error); if (error) fprintf(stderr, "%s\n", (const char*)error->GetBufferPointer()); @@ -1508,25 +1517,25 @@ uint16 GSDevice11::ConvertBlendEnum(uint16 generic) { switch (generic) { - case SRC_COLOR : return D3D11_BLEND_SRC_COLOR; - case INV_SRC_COLOR : return D3D11_BLEND_INV_SRC_COLOR; - case DST_COLOR : return D3D11_BLEND_DEST_COLOR; - case INV_DST_COLOR : return D3D11_BLEND_INV_DEST_COLOR; - case SRC1_COLOR : return D3D11_BLEND_SRC1_COLOR; - case INV_SRC1_COLOR : return D3D11_BLEND_INV_SRC1_COLOR; - case SRC_ALPHA : return D3D11_BLEND_SRC_ALPHA; - case INV_SRC_ALPHA : return D3D11_BLEND_INV_SRC_ALPHA; - case DST_ALPHA : return D3D11_BLEND_DEST_ALPHA; - case INV_DST_ALPHA : return D3D11_BLEND_INV_DEST_ALPHA; - case SRC1_ALPHA : return D3D11_BLEND_SRC1_ALPHA; - case INV_SRC1_ALPHA : return D3D11_BLEND_INV_SRC1_ALPHA; - case CONST_COLOR : return D3D11_BLEND_BLEND_FACTOR; - case INV_CONST_COLOR : return D3D11_BLEND_INV_BLEND_FACTOR; - case CONST_ONE : return D3D11_BLEND_ONE; - case CONST_ZERO : return D3D11_BLEND_ZERO; - case OP_ADD : return D3D11_BLEND_OP_ADD; - case OP_SUBTRACT : return D3D11_BLEND_OP_SUBTRACT; - case OP_REV_SUBTRACT : return D3D11_BLEND_OP_REV_SUBTRACT; - default : ASSERT(0); return 0; + case SRC_COLOR: return D3D11_BLEND_SRC_COLOR; + case INV_SRC_COLOR: return D3D11_BLEND_INV_SRC_COLOR; + case DST_COLOR: return D3D11_BLEND_DEST_COLOR; + case INV_DST_COLOR: return D3D11_BLEND_INV_DEST_COLOR; + case SRC1_COLOR: return D3D11_BLEND_SRC1_COLOR; + case INV_SRC1_COLOR: return D3D11_BLEND_INV_SRC1_COLOR; + case SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA; + case INV_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA; + case DST_ALPHA: return D3D11_BLEND_DEST_ALPHA; + case INV_DST_ALPHA: return D3D11_BLEND_INV_DEST_ALPHA; + case SRC1_ALPHA: return D3D11_BLEND_SRC1_ALPHA; + case INV_SRC1_ALPHA: return D3D11_BLEND_INV_SRC1_ALPHA; + case CONST_COLOR: return D3D11_BLEND_BLEND_FACTOR; + case INV_CONST_COLOR: return D3D11_BLEND_INV_BLEND_FACTOR; + case CONST_ONE: return D3D11_BLEND_ONE; + case CONST_ZERO: return D3D11_BLEND_ZERO; + case OP_ADD: return D3D11_BLEND_OP_ADD; + case OP_SUBTRACT: return D3D11_BLEND_OP_SUBTRACT; + case OP_REV_SUBTRACT: return D3D11_BLEND_OP_REV_SUBTRACT; + default: ASSERT(0); return 0; } } \ No newline at end of file diff --git a/plugins/GSdx/Renderers/DX11/GSDevice11.h b/plugins/GSdx/Renderers/DX11/GSDevice11.h index a5099cf14a..fb897b36af 100644 --- a/plugins/GSdx/Renderers/DX11/GSDevice11.h +++ b/plugins/GSdx/Renderers/DX11/GSDevice11.h @@ -34,7 +34,7 @@ struct GSVertexShader11 class GSDevice11 final : public GSDevice { public: - #pragma pack(push, 1) +#pragma pack(push, 1) struct alignas(32) VSConstantBuffer { @@ -58,7 +58,7 @@ public: GSVector4i* a = (GSVector4i*)this; GSVector4i* b = (GSVector4i*)cb; - if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue()) + if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue()) { a[0] = b[0]; a[1] = b[1]; @@ -78,19 +78,25 @@ public: { struct { - uint32 tme:1; - uint32 fst:1; + uint32 tme : 1; + uint32 fst : 1; - uint32 _free:30; + uint32 _free : 30; }; uint32 key; }; - operator uint32() const {return key;} + operator uint32() const { return key; } - VSSelector() : key(0) {} - VSSelector(uint32 k) : key(k) {} + VSSelector() + : key(0) + { + } + VSSelector(uint32 k) + : key(k) + { + } }; struct alignas(32) PSConstantBuffer @@ -131,7 +137,7 @@ public: GSVector4i* a = (GSVector4i*)this; GSVector4i* b = (GSVector4i*)cb; - if(!((a[0] == b[0]) /*& (a[1] == b1)*/ & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5]) & + if (!((a[0] == b[0]) /*& (a[1] == b1)*/ & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5]) & (a[6] == b[6]) & (a[7] == b[7]) & (a[9] == b[9]) & // if WH matches HalfTexel does too (a[10] == b[10]) & (a[11] == b[11]) & (a[12] == b[12]) & (a[13] == b[13])).alltrue()) { @@ -178,22 +184,28 @@ public: { struct { - uint32 iip:1; - uint32 prim:2; - uint32 point:1; - uint32 line:1; - uint32 cpu_sprite:1; + uint32 iip : 1; + uint32 prim : 2; + uint32 point : 1; + uint32 line : 1; + uint32 cpu_sprite : 1; - uint32 _free:26; + uint32 _free : 26; }; uint32 key; }; - operator uint32() {return key;} + operator uint32() { return key; } - GSSelector() : key(0) {} - GSSelector(uint32 k) : key(k) {} + GSSelector() + : key(0) + { + } + GSSelector(uint32 k) + : key(k) + { + } }; struct PSSelector @@ -204,63 +216,66 @@ public: { // *** Word 1 // Format - uint32 fmt:4; - uint32 dfmt:2; - uint32 depth_fmt:2; + uint32 fmt : 4; + uint32 dfmt : 2; + uint32 depth_fmt : 2; // Alpha extension/Correction - uint32 aem:1; - uint32 fba:1; + uint32 aem : 1; + uint32 fba : 1; // Fog - uint32 fog:1; + uint32 fog : 1; // Pixel test - uint32 atst:3; + uint32 atst : 3; // Color sampling - uint32 fst:1; - uint32 tfx:3; - uint32 tcc:1; - uint32 wms:2; - uint32 wmt:2; - uint32 ltf:1; + uint32 fst : 1; + uint32 tfx : 3; + uint32 tcc : 1; + uint32 wms : 2; + uint32 wmt : 2; + uint32 ltf : 1; // Shuffle and fbmask effect - uint32 shuffle:1; - uint32 read_ba:1; - uint32 fbmask:1; + uint32 shuffle : 1; + uint32 read_ba : 1; + uint32 fbmask : 1; // Blend and Colclip - uint32 hdr:1; - uint32 blend_a:2; - uint32 blend_b:2; // bit30/31 - uint32 blend_c:2; // bit0 - uint32 blend_d:2; - uint32 clr1:1; - uint32 colclip:1; - uint32 pabe:1; + uint32 hdr : 1; + uint32 blend_a : 2; + uint32 blend_b : 2; // bit30/31 + uint32 blend_c : 2; // bit0 + uint32 blend_d : 2; + uint32 clr1 : 1; + uint32 colclip : 1; + uint32 pabe : 1; // Others ways to fetch the texture - uint32 channel:3; + uint32 channel : 3; // Dithering - uint32 dither:2; + uint32 dither : 2; // Depth clamp - uint32 zclamp:1; + uint32 zclamp : 1; // Hack - uint32 tcoffsethack:1; - uint32 urban_chaos_hle:1; - uint32 tales_of_abyss_hle:1; - uint32 point_sampler:1; - uint32 invalid_tex0:1; // Lupin the 3rd + uint32 tcoffsethack : 1; + uint32 urban_chaos_hle : 1; + uint32 tales_of_abyss_hle : 1; + uint32 point_sampler : 1; + uint32 invalid_tex0 : 1; // Lupin the 3rd - uint32 _free:14; + uint32 _free : 14; }; uint64 key; }; - operator uint64() {return key;} + operator uint64() { return key; } - PSSelector() : key(0) {} + PSSelector() + : key(0) + { + } }; struct PSSamplerSelector @@ -269,17 +284,20 @@ public: { struct { - uint32 tau:1; - uint32 tav:1; - uint32 ltf:1; + uint32 tau : 1; + uint32 tav : 1; + uint32 ltf : 1; }; uint32 key; }; - operator uint32() {return key & 0x7;} + operator uint32() { return key & 0x7; } - PSSamplerSelector() : key(0) {} + PSSamplerSelector() + : key(0) + { + } }; struct OMDepthStencilSelector @@ -288,19 +306,22 @@ public: { struct { - uint32 ztst:2; - uint32 zwe:1; - uint32 date:1; - uint32 fba:1; - uint32 date_one:1; + uint32 ztst : 2; + uint32 zwe : 1; + uint32 date : 1; + uint32 fba : 1; + uint32 date_one : 1; }; uint32 key; }; - operator uint32() {return key & 0x3f;} + operator uint32() { return key & 0x3f; } - OMDepthStencilSelector() : key(0) {} + OMDepthStencilSelector() + : key(0) + { + } }; struct OMBlendSelector @@ -310,44 +331,55 @@ public: struct { // Color mask - uint32 wr:1; - uint32 wg:1; - uint32 wb:1; - uint32 wa:1; + uint32 wr : 1; + uint32 wg : 1; + uint32 wb : 1; + uint32 wa : 1; // Alpha blending - uint32 blend_index:7; - uint32 abe:1; - uint32 accu_blend:1; + uint32 blend_index : 7; + uint32 abe : 1; + uint32 accu_blend : 1; }; struct { // Color mask - uint32 wrgba:4; + uint32 wrgba : 4; }; uint32 key; }; - operator uint32() {return key & 0x1fff;} + operator uint32() { return key & 0x1fff; } - OMBlendSelector() : key(0) {} + OMBlendSelector() + : key(0) + { + } }; - #pragma pack(pop) +#pragma pack(pop) class ShaderMacro { struct mcstr { - const char* name, * def; - mcstr(const char* n, const char* d) : name(n), def(d) {} + const char *name, *def; + mcstr(const char* n, const char* d) + : name(n) + , def(d) + { + } }; struct mstring { std::string name, def; - mstring(const char* n, std::string d) : name(n), def(d) {} + mstring(const char* n, std::string d) + : name(n) + , def(d) + { + } }; std::vector mlist; @@ -379,7 +411,7 @@ private: void RenderOsd(GSTexture* dt); void BeforeDraw(); void AfterDraw(); - + uint16 ConvertBlendEnum(uint16 generic) final; CComPtr m_factory; @@ -456,13 +488,13 @@ private: CComPtr cb; } m_shaderfx; - struct + struct { CComPtr ps; CComPtr cb; } m_fxaa; - struct + struct { CComPtr ps; CComPtr cb; @@ -494,7 +526,12 @@ private: std::unique_ptr m_font; protected: - struct {D3D_FEATURE_LEVEL level; std::string model, vs, gs, ps, cs;} m_shader; + struct + { + D3D_FEATURE_LEVEL level; + std::string model, vs, gs, ps, cs; + } m_shader; + public: GSDevice11(); virtual ~GSDevice11() {} @@ -502,7 +539,7 @@ public: bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode); void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const { level = m_shader.level; } - bool Create(const std::shared_ptr &wnd); + bool Create(const std::shared_ptr& wnd); bool Reset(int w, int h); void Flip(); void SetVSync(int vsync) final; @@ -558,9 +595,9 @@ public: void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix); - ID3D11Device* operator->() {return m_dev;} - operator ID3D11Device*() {return m_dev;} - operator ID3D11DeviceContext*() {return m_ctx;} + ID3D11Device* operator->() { return m_dev; } + operator ID3D11Device*() { return m_dev; } + operator ID3D11DeviceContext*() { return m_ctx; } void CreateShader(const std::vector& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il); void CreateShader(const std::vector& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs); @@ -568,4 +605,3 @@ public: void CompileShader(const std::vector& source, const char* fn, ID3DInclude* include, const char* entry, D3D_SHADER_MACRO* macro, ID3DBlob** shader, std::string shader_model); }; - diff --git a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp index 842e4e329f..78d7c2ad55 100644 --- a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp @@ -45,49 +45,49 @@ void GSRendererDX11::SetupIA(const float& sx, const float& sy) switch (m_vt.m_primclass) { - case GS_POINT_CLASS: - if (unscale_pt_ln) - { - m_gs_sel.point = 1; - gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); - } + case GS_POINT_CLASS: + if (unscale_pt_ln) + { + m_gs_sel.point = 1; + gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); + } - t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; - break; + t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; + break; - case GS_LINE_CLASS: - if (unscale_pt_ln) - { - m_gs_sel.line = 1; - gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); - } + case GS_LINE_CLASS: + if (unscale_pt_ln) + { + m_gs_sel.line = 1; + gs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); + } - t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; - break; - - case GS_SPRITE_CLASS: - // Lines: GPU conversion. - // Triangles: CPU conversion. - if (!m_vt.m_accurate_stq && m_vertex.next > 32) // <=> 16 sprites (based on Shadow Hearts) - { t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; - } - else - { - m_gs_sel.cpu_sprite = 1; - Lines2Sprites(); + break; + case GS_SPRITE_CLASS: + // Lines: GPU conversion. + // Triangles: CPU conversion. + if (!m_vt.m_accurate_stq && m_vertex.next > 32) // <=> 16 sprites (based on Shadow Hearts) + { + t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; + } + else + { + m_gs_sel.cpu_sprite = 1; + Lines2Sprites(); + + t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + } + + break; + + case GS_TRIANGLE_CLASS: t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - } + break; - break; - - case GS_TRIANGLE_CLASS: - t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - break; - - default: - __assume(0); + default: + __assume(0); } void* ptr = NULL; @@ -102,7 +102,8 @@ void GSRendererDX11::SetupIA(const float& sx, const float& sy) for (unsigned int i = 0; i < m_vertex.next; i++) { - if (PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF; + if (PRIM->TME && PRIM->FST) + d[i].UV &= 0x3FEF3FEF; } } @@ -188,7 +189,7 @@ void GSRendererDX11::EmulateTextureShuffleAndFbmask() default: break; } - + // Uncomment to disable texture shuffle emulation. // m_texture_shuffle = false; @@ -401,7 +402,6 @@ void GSRendererDX11::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache: // fprintf(stderr, "%d: Green channel (wrong mask) (fbmask %x)\n", s_n, m_context->FRAME.FBMSK >> 24); m_ps_sel.channel = ChannelFetch_GREEN; } - } else if (green) { @@ -452,7 +452,7 @@ void GSRendererDX11::EmulateBlending() { // Partial port of OGL SW blending. Currently only works for accumulation and non recursive blend. const GIFRegALPHA& ALPHA = m_context->ALPHA; - bool sw_blending = false; + bool sw_blending = false; // No blending so early exit if (!(PRIM->ABE || m_env.PABE.PABE || (PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS))) @@ -491,7 +491,8 @@ void GSRendererDX11::EmulateBlending() case ACC_BLEND_BASIC_D3D11: sw_blending |= accumulation_blend || blend_non_recursive; [[fallthrough]]; - default: break; + default: + break; } // Color clip @@ -532,7 +533,8 @@ void GSRendererDX11::EmulateBlending() { m_om_bsel.accu_blend = 1; - if (ALPHA.A == 2) { + if (ALPHA.A == 2) + { // The blend unit does a reverse subtraction so it means // the shader must output a positive value. // Replace 0 - Cs by Cs - 0 @@ -568,8 +570,8 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex) { // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. //const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; - const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; - const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; + const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; const uint8 wms = m_context->CLAMP.WMS; const uint8 wmt = m_context->CLAMP.WMT; @@ -619,7 +621,6 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex) GSVector4 half_offset = RealignTargetTextureCoordinate(tex); vs_cb.Texture_Scale_Offset.z = half_offset.x; vs_cb.Texture_Scale_Offset.w = half_offset.y; - } else if (tex->m_target) { @@ -684,7 +685,6 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex) // Note 4 bits indexes are converted to 8 bits m_ps_sel.fmt = 3 << 2; - } else { @@ -930,7 +930,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou m_ps_sel.fba = m_context->FBA.FBA; m_ps_sel.dither = m_dithering > 0 && m_ps_sel.dfmt == 2 && m_env.DTHE.DTHE; - if(m_ps_sel.dither) + if (m_ps_sel.dither) { m_ps_sel.dither = m_dithering; ps_cb.DitherMatrix[0] = GSVector4(m_env.DIMX.DM00, m_env.DIMX.DM10, m_env.DIMX.DM20, m_env.DIMX.DM30); @@ -1091,7 +1091,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou bool b = m_om_bsel.wb; bool a = m_om_bsel.wa; - switch(m_context->TEST.AFAIL) + switch (m_context->TEST.AFAIL) { case AFAIL_KEEP: z = r = g = b = a = false; break; // none case AFAIL_FB_ONLY: z = false; break; // rgba diff --git a/plugins/GSdx/Renderers/DX11/GSRendererDX11.h b/plugins/GSdx/Renderers/DX11/GSRendererDX11.h index c74fa700b0..00c62b53e1 100644 --- a/plugins/GSdx/Renderers/DX11/GSRendererDX11.h +++ b/plugins/GSdx/Renderers/DX11/GSRendererDX11.h @@ -27,7 +27,8 @@ class GSRendererDX11 final : public GSRendererHW { - enum ACC_BLEND_D3D11 { + enum ACC_BLEND_D3D11 + { ACC_BLEND_NONE_D3D11 = 0, ACC_BLEND_BASIC_D3D11 = 1, ACC_BLEND_MEDIUM_D3D11 = 2, diff --git a/plugins/GSdx/Renderers/DX11/GSTexture11.cpp b/plugins/GSdx/Renderers/DX11/GSTexture11.cpp index 7010caab51..c0e5e445c4 100644 --- a/plugins/GSdx/Renderers/DX11/GSTexture11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSTexture11.cpp @@ -36,10 +36,14 @@ GSTexture11::GSTexture11(ID3D11Texture2D* texture) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; - if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) m_type = RenderTarget; - else if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) m_type = DepthStencil; - else if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) m_type = Texture; - else if(m_desc.Usage == D3D11_USAGE_STAGING) m_type = Offscreen; + if (m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) + m_type = RenderTarget; + else if (m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) + m_type = DepthStencil; + else if (m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) + m_type = Texture; + else if (m_desc.Usage == D3D11_USAGE_STAGING) + m_type = Offscreen; m_format = (int)m_desc.Format; @@ -48,12 +52,12 @@ GSTexture11::GSTexture11(ID3D11Texture2D* texture) bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch, int layer) { - if(layer >= m_max_layer) + if (layer >= m_max_layer) return true; - if(m_dev && m_texture) + if (m_dev && m_texture) { - D3D11_BOX box = { (UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U }; + D3D11_BOX box = {(UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U}; UINT subresource = layer; // MipSlice + (ArraySlice * MipLevels). m_ctx->UpdateSubresource(m_texture, subresource, &box, data, pitch, 0); @@ -66,21 +70,21 @@ bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch, int l bool GSTexture11::Map(GSMap& m, const GSVector4i* r, int layer) { - if(r != NULL) + if (r != NULL) { // ASSERT(0); // not implemented return false; } - if(layer >= m_max_layer) + if (layer >= m_max_layer) return false; - if(m_texture && m_desc.Usage == D3D11_USAGE_STAGING) + if (m_texture && m_desc.Usage == D3D11_USAGE_STAGING) { D3D11_MAPPED_SUBRESOURCE map; UINT subresource = layer; - if(SUCCEEDED(m_ctx->Map(m_texture, subresource, D3D11_MAP_READ_WRITE, 0, &map))) + if (SUCCEEDED(m_ctx->Map(m_texture, subresource, D3D11_MAP_READ_WRITE, 0, &map))) { m.bits = (uint8*)map.pData; m.pitch = (int)map.RowPitch; @@ -96,7 +100,7 @@ bool GSTexture11::Map(GSMap& m, const GSVector4i* r, int layer) void GSTexture11::Unmap() { - if(m_texture) + if (m_texture) { UINT subresource = m_layer; m_ctx->Unmap(m_texture, subresource); @@ -156,7 +160,7 @@ bool GSTexture11::Save(const std::string& fn) { for (uint32 x = 0; x < desc.Width; x++) { - reinterpret_cast(d)[x] = static_cast(ldexpf(reinterpret_cast(s)[x*2], 32)); + reinterpret_cast(d)[x] = static_cast(ldexpf(reinterpret_cast(s)[x * 2], 32)); } } @@ -175,14 +179,14 @@ bool GSTexture11::Save(const std::string& fn) #endif switch (desc.Format) { - case DXGI_FORMAT_A8_UNORM: - format = GSPng::R8I_PNG; - break; - case DXGI_FORMAT_R8G8B8A8_UNORM: - break; - default: - fprintf(stderr, "DXGI_FORMAT %d not saved to image\n", desc.Format); - return false; + case DXGI_FORMAT_A8_UNORM: + format = GSPng::R8I_PNG; + break; + case DXGI_FORMAT_R8G8B8A8_UNORM: + break; + default: + fprintf(stderr, "DXGI_FORMAT %d not saved to image\n", desc.Format); + return false; } D3D11_MAPPED_SUBRESOURCE sm; @@ -207,9 +211,9 @@ GSTexture11::operator ID3D11Texture2D*() GSTexture11::operator ID3D11ShaderResourceView*() { - if(!m_srv && m_dev && m_texture) + if (!m_srv && m_dev && m_texture) { - if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) + if (m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) { D3D11_SHADER_RESOURCE_VIEW_DESC srvd = {}; @@ -232,7 +236,7 @@ GSTexture11::operator ID3D11RenderTargetView*() { ASSERT(m_dev); - if(!m_rtv && m_dev && m_texture) + if (!m_rtv && m_dev && m_texture) { m_dev->CreateRenderTargetView(m_texture, NULL, &m_rtv); } @@ -242,9 +246,9 @@ GSTexture11::operator ID3D11RenderTargetView*() GSTexture11::operator ID3D11DepthStencilView*() { - if(!m_dsv && m_dev && m_texture) + if (!m_dsv && m_dev && m_texture) { - if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) + if (m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) { D3D11_DEPTH_STENCIL_VIEW_DESC dsvd = {}; diff --git a/plugins/GSdx/Renderers/DX11/GSTextureCache11.cpp b/plugins/GSdx/Renderers/DX11/GSTextureCache11.cpp index 6db2dd6d23..5d8117aef3 100644 --- a/plugins/GSdx/Renderers/DX11/GSTextureCache11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSTextureCache11.cpp @@ -121,13 +121,15 @@ void GSTextureCache11::Read(Source* t, const GSVector4i& r) const GIFRegTEX0& TEX0 = t->m_TEX0; - if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) { + if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) + { m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r); GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); - if (offscreen->Map(m, &r_offscreen)) { + if (offscreen->Map(m, &r_offscreen)) + { GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); diff --git a/plugins/GSdx/Renderers/DX11/GSTextureCache11.h b/plugins/GSdx/Renderers/DX11/GSTextureCache11.h index 52dedba984..1ba3dce2ac 100644 --- a/plugins/GSdx/Renderers/DX11/GSTextureCache11.h +++ b/plugins/GSdx/Renderers/DX11/GSTextureCache11.h @@ -27,7 +27,7 @@ class GSTextureCache11 : public GSTextureCache { protected: - int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} + int Get8bitFormat() { return DXGI_FORMAT_A8_UNORM; } void Read(Target* t, const GSVector4i& r); void Read(Source* t, const GSVector4i& r); diff --git a/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp b/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp index 9176081c5c..c7fe2f64e5 100644 --- a/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp @@ -38,7 +38,8 @@ bool GSDevice11::CreateTextureFX() hr = m_dev->CreateBuffer(&bd, NULL, &m_vs_cb); - if(FAILED(hr)) return false; + if (FAILED(hr)) + return false; memset(&bd, 0, sizeof(bd)); @@ -48,7 +49,8 @@ bool GSDevice11::CreateTextureFX() hr = m_dev->CreateBuffer(&bd, NULL, &m_gs_cb); - if (FAILED(hr)) return false; + if (FAILED(hr)) + return false; memset(&bd, 0, sizeof(bd)); @@ -58,7 +60,8 @@ bool GSDevice11::CreateTextureFX() hr = m_dev->CreateBuffer(&bd, NULL, &m_ps_cb); - if(FAILED(hr)) return false; + if (FAILED(hr)) + return false; D3D11_SAMPLER_DESC sd; @@ -75,7 +78,8 @@ bool GSDevice11::CreateTextureFX() hr = m_dev->CreateSamplerState(&sd, &m_palette_ss); - if(FAILED(hr)) return false; + if (FAILED(hr)) + return false; // create layout @@ -97,7 +101,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb) { auto i = std::as_const(m_vs).find(sel); - if(i == m_vs.end()) + if (i == m_vs.end()) { ShaderMacro sm(m_shader.model); @@ -126,7 +130,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb) i = m_vs.find(sel); } - if(m_vs_cb_cache.Update(cb)) + if (m_vs_cb_cache.Update(cb)) { ID3D11DeviceContext* ctx = m_ctx; @@ -184,7 +188,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe { auto i = std::as_const(m_ps).find(sel); - if(i == m_ps.end()) + if (i == m_ps.end()) { ShaderMacro sm(m_shader.model); @@ -234,7 +238,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe i = m_ps.find(sel); } - if(m_ps_cb_cache.Update(cb)) + if (m_ps_cb_cache.Update(cb)) { ID3D11DeviceContext* ctx = m_ctx; @@ -243,16 +247,16 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe CComPtr ss0, ss1; - if(sel.tfx != 4) + if (sel.tfx != 4) { - if(!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3)) + if (!(sel.fmt < 3 && sel.wms < 3 && sel.wmt < 3)) { ssel.ltf = 0; } auto i = std::as_const(m_ps_ss).find(ssel); - if(i != m_ps_ss.end()) + if (i != m_ps_ss.end()) { ss0 = i->second; } @@ -278,7 +282,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe m_ps_ss[ssel] = ss0; } - if(sel.fmt >= 3) + if (sel.fmt >= 3) { ss1 = m_palette_ss; } @@ -293,13 +297,13 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin { auto i = std::as_const(m_om_dss).find(dssel); - if(i == m_om_dss.end()) + if (i == m_om_dss.end()) { D3D11_DEPTH_STENCIL_DESC dsd; memset(&dsd, 0, sizeof(dsd)); - if(dssel.date) + if (dssel.date) { dsd.StencilEnable = true; dsd.StencilReadMask = 1; @@ -314,7 +318,7 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; } - if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) + if (dssel.ztst != ZTST_ALWAYS || dssel.zwe) { static const D3D11_COMPARISON_FUNC ztst[] = { @@ -342,7 +346,7 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin auto j = std::as_const(m_om_bs).find(bsel); - if(j == m_om_bs.end()) + if (j == m_om_bs.end()) { D3D11_BLEND_DESC bd; @@ -350,7 +354,7 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin bd.RenderTarget[0].BlendEnable = bsel.abe; - if(bsel.abe) + if (bsel.abe) { HWBlend blend = GetBlend(bsel.blend_index); bd.RenderTarget[0].BlendOp = (D3D11_BLEND_OP)blend.op; @@ -367,10 +371,10 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin } } - if(bsel.wr) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_RED; - if(bsel.wg) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_GREEN; - if(bsel.wb) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_BLUE; - if(bsel.wa) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_ALPHA; + if (bsel.wr) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_RED; + if (bsel.wg) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_GREEN; + if (bsel.wb) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_BLUE; + if (bsel.wa) bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_ALPHA; CComPtr bs; diff --git a/plugins/GSdx/Renderers/HW/GSRendererHW.cpp b/plugins/GSdx/Renderers/HW/GSRendererHW.cpp index c7f4cf34b6..4b19c95d7c 100644 --- a/plugins/GSdx/Renderers/HW/GSRendererHW.cpp +++ b/plugins/GSdx/Renderers/HW/GSRendererHW.cpp @@ -37,14 +37,15 @@ GSRendererHW::GSRendererHW(GSTextureCache* tc) , m_userhacks_tcoffset_x(0) , m_userhacks_tcoffset_y(0) , m_channel_shuffle(false) - , m_lod(GSVector2i(0,0)) + , m_lod(GSVector2i(0, 0)) { m_mipmap = theApp.GetConfigI("mipmap_hw"); m_upscale_multiplier = theApp.GetConfigI("upscale_multiplier"); m_conservative_framebuffer = theApp.GetConfigB("conservative_framebuffer"); m_accurate_date = theApp.GetConfigB("accurate_date"); - if (theApp.GetConfigB("UserHacks")) { + if (theApp.GetConfigB("UserHacks")) + { m_userhacks_enabled_gs_mem_clear = !theApp.GetConfigB("UserHacks_Disable_Safe_Features"); m_userHacks_enabled_unscale_ptln = !theApp.GetConfigB("UserHacks_Disable_Safe_Features"); m_userhacks_align_sprite_X = theApp.GetConfigB("UserHacks_align_sprite_X"); @@ -55,7 +56,9 @@ GSRendererHW::GSRendererHW(GSTextureCache* tc) m_userhacks_tcoffset_x = theApp.GetConfigI("UserHacks_TCOffsetX") / -1000.0f; m_userhacks_tcoffset_y = theApp.GetConfigI("UserHacks_TCOffsetY") / -1000.0f; m_userhacks_tcoffset = m_userhacks_tcoffset_x < 0.0f || m_userhacks_tcoffset_y < 0.0f; - } else { + } + else + { m_userhacks_enabled_gs_mem_clear = true; m_userHacks_enabled_unscale_ptln = true; m_userhacks_align_sprite_X = false; @@ -65,15 +68,17 @@ GSRendererHW::GSRendererHW(GSTextureCache* tc) m_userHacks_HPO = 0; } - if (!m_upscale_multiplier) { //Custom Resolution + if (!m_upscale_multiplier) // Custom Resolution + { m_custom_width = m_width = theApp.GetConfigI("resx"); m_custom_height = m_height = theApp.GetConfigI("resy"); } - if (m_upscale_multiplier == 1) { // hacks are only needed for upscaling issues. - m_userhacks_round_sprite_offset = 0; - m_userhacks_align_sprite_X = false; - m_userHacks_merge_sprite = false; + if (m_upscale_multiplier == 1) // hacks are only needed for upscaling issues. + { + m_userhacks_round_sprite_offset = 0; + m_userhacks_align_sprite_X = false; + m_userHacks_merge_sprite = false; } m_dump_root = root_hw; @@ -108,7 +113,7 @@ void GSRendererHW::SetScaling() // Framebuffer width is always a multiple of 64 so at certain cases it can't cover some weird width values. // 480P , 576P use width as 720 which is not referencable by FBW * 64. so it produces 704 ( the closest value multiple by 64). // In such cases, let's just use the CRTC width. - int fb_width = std::max({ (int)m_context->FRAME.FBW * 64, crtc_size.x , 512 }); + int fb_width = std::max({(int)m_context->FRAME.FBW * 64, crtc_size.x, 512}); // GS doesn't have a specific register for the FrameBuffer height. so we get the height // from physical units of the display rectangle in case the game uses a heigher value of height. // @@ -139,13 +144,13 @@ void GSRendererHW::SetScaling() // No need to resize for native/custom resolutions as default size will be enough for native and we manually get RT Buffer size for custom. // don't resize until the display rectangle and register states are stabilized. - if ( m_upscale_multiplier <= 1 || good_rt_size) + if (m_upscale_multiplier <= 1 || good_rt_size) return; m_tc->RemovePartial(); m_width = upscaled_fb_w; m_height = upscaled_fb_h; - printf("Frame buffer size set to %dx%d (%dx%d)\n", fb_width, fb_height , m_width, m_height); + printf("Frame buffer size set to %dx%d (%dx%d)\n", fb_width, fb_height, m_width, m_height); } void GSRendererHW::CustomResolutionScaling() @@ -173,7 +178,7 @@ void GSRendererHW::CustomResolutionScaling() // scissoring values) Display rectangle has a height of 256 but scissor has a height of 512 which seems to // be the real buffer size. Not sure if the width one is needed, need to check it on some random data before enabling it. // int framebuffer_width = static_cast(std::round(scissored_buffer_size.x * scaling_ratio.x)); - int framebuffer_height = static_cast(std::round(scissored_buffer_size.y * scaling_ratio.y)); + int framebuffer_height = static_cast(std::round(scissored_buffer_size.y * scaling_ratio.y)); if (m_width >= m_custom_width && m_height >= framebuffer_height) return; @@ -249,12 +254,12 @@ void GSRendererHW::SetGameCRC(uint32 crc, int options) bool GSRendererHW::CanUpscale() { - if(m_hacks.m_cu && !(this->*m_hacks.m_cu)()) + if (m_hacks.m_cu && !(this->*m_hacks.m_cu)()) { return false; } - return m_upscale_multiplier!=1 && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition) + return m_upscale_multiplier != 1 && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition) } int GSRendererHW::GetUpscaleMultiplier() @@ -282,7 +287,7 @@ void GSRendererHW::VSync(int field) //Check if the frame buffer width or display width has changed SetScaling(); - if(m_reset) + if (m_reset) { m_tc->RemoveAll(); @@ -321,12 +326,13 @@ GSTexture* GSRendererHW::GetOutput(int i, int& y_offset) GSTexture* t = NULL; - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GetFramebufferHeight())) + if (GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GetFramebufferHeight())) { t = rt->m_texture; int delta = TEX0.TBP0 - rt->m_TEX0.TBP0; - if (delta > 0 && DISPFB.FBW != 0) { + if (delta > 0 && DISPFB.FBW != 0) + { int pages = delta >> 5u; int y_pages = pages / DISPFB.FBW; y_offset = y_pages * GSLocalMemory::m_psm[DISPFB.PSM].pgs.y; @@ -334,9 +340,9 @@ GSTexture* GSRendererHW::GetOutput(int i, int& y_offset) } #ifdef ENABLE_OGL_DEBUG - if(s_dump) + if (s_dump) { - if(s_savef && s_n >= s_saven) + if (s_savef && s_n >= s_saven) { t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, psm_str(TEX0.PSM))); } @@ -355,12 +361,12 @@ GSTexture* GSRendererHW::GetFeedbackOutput() TEX0.TBW = m_regs->EXTBUF.EXBW; TEX0.PSM = m_regs->DISP[m_regs->EXTBUF.FBIN & 1].DISPFB.PSM; - GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, /*GetFrameRect(i).bottom*/0); + GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, /*GetFrameRect(i).bottom*/ 0); GSTexture* t = rt->m_texture; #ifdef ENABLE_OGL_DEBUG - if(s_dump && s_savef && s_n >= s_saven) + if (s_dump && s_savef && s_n >= s_saven) t->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), 3, (int)TEX0.TBP0, psm_str(TEX0.PSM))); #endif @@ -398,7 +404,8 @@ void GSRendererHW::Lines2Sprites() v0.XYZ.Z = v1.XYZ.Z; v0.FOG = v1.FOG; - if (PRIM->TME && !PRIM->FST) { + if (PRIM->TME && !PRIM->FST) + { GSVector4 st0 = GSVector4::loadl(&v0.ST.u64); GSVector4 st1 = GSVector4::loadl(&v1.ST.u64); GSVector4 Q = GSVector4(v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q); @@ -498,7 +505,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) const GIFRegXYOFFSET& o = m_context->XYOFFSET; // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors - int pos = (v[0].XYZ.X - o.OFX) & 0xFF; + int pos = (v[0].XYZ.X - o.OFX) & 0xFF; write_ba = (pos > 112 && pos < 136); // Read texture is 8 to 16 pixels (same as above) @@ -508,7 +515,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) read_ba = (tex_pos > 112 && tex_pos < 144); bool half_bottom = false; - switch (m_userhacks_ts_half_bottom) { + switch (m_userhacks_ts_half_bottom) + { case 0: // Force Disabled. // Force Disabled will help games such as Xenosaga. @@ -541,10 +549,12 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) break; } - if (PRIM->FST) { + if (PRIM->FST) + { GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U); - for(size_t i = 0; i < count; i += 2) { + for (size_t i = 0; i < count; i += 2) + { if (write_ba) v[i].XYZ.X -= 128u; else @@ -555,7 +565,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) else v[i+1].U += 128u; - if (!half_bottom){ + if (!half_bottom) + { // Height is too big (2x). int tex_offset = v[i].V & 0xF; GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); @@ -569,11 +580,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) v[i + 1].V = (uint16)tmp.w; } } - } else { + } + else + { const float offset_8pix = 8.0f / tw; GL_INS("First vertex is P: %d => %d T: %f => %f (offset %f)", v[0].XYZ.X, v[1].XYZ.X, v[0].ST.S, v[1].ST.S, offset_8pix); - for(size_t i = 0; i < count; i += 2) { + for (size_t i = 0; i < count; i += 2) + { if (write_ba) v[i].XYZ.X -= 128u; else @@ -584,14 +598,15 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) else v[i+1].ST.S += offset_8pix; - if (!half_bottom) { + if (!half_bottom) + { // Height is too big (2x). GSVector4i offset(o.OFY, o.OFY); GSVector4i tmp(v[i].XYZ.Y, v[i + 1].XYZ.Y); tmp = GSVector4i(tmp - offset).srl32(1) + offset; - //fprintf(stderr, "Before %d, After %d\n", v[i+1].XYZ.Y, tmp.y); + //fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y); v[i].XYZ.Y = (uint16)tmp.x; v[i].ST.T /= 2.0f; v[i + 1].XYZ.Y = (uint16)tmp.y; @@ -606,7 +621,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) else m_vt.m_max.p.x += 8.0f; - if (!half_bottom) { + if (!half_bottom) + { float delta_Y = m_vt.m_max.p.y - m_vt.m_min.p.y; m_vt.m_max.p.y -= delta_Y / 2.0f; } @@ -616,7 +632,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) else m_vt.m_max.t.x += 8.0f; - if (!half_bottom) { + if (!half_bottom) + { float delta_T = m_vt.m_max.t.y - m_vt.m_min.t.y; m_vt.m_max.t.y -= delta_T / 2.0f; } @@ -624,58 +641,73 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex) { - if (m_userHacks_HPO <= 1 || GetUpscaleMultiplier() == 1) return GSVector4(0.0f); + if (m_userHacks_HPO <= 1 || GetUpscaleMultiplier() == 1) + return GSVector4(0.0f); - GSVertex* v = &m_vertex.buff[0]; - const GSVector2& scale = tex->m_texture->GetScale(); - bool linear = m_vt.IsRealLinear(); - int t_position = v[0].U; + GSVertex* v = &m_vertex.buff[0]; + const GSVector2& scale = tex->m_texture->GetScale(); + bool linear = m_vt.IsRealLinear(); + int t_position = v[0].U; GSVector4 half_offset(0.0f); // FIXME Let's start with something wrong same mess on X and Y // FIXME Maybe it will be enough to check linear - if (PRIM->FST) { + if (PRIM->FST) + { - if (m_userHacks_HPO == 3) { - if (!linear && t_position == 8) { - half_offset.x = 8; - half_offset.y = 8; - } else if (linear && t_position == 16) { - half_offset.x = 16; - half_offset.y = 16; - } else if (m_vt.m_min.p.x == -0.5f) { + if (m_userHacks_HPO == 3) + { + if (!linear && t_position == 8) + { half_offset.x = 8; half_offset.y = 8; } - } else { - if (!linear && t_position == 8) { + else if (linear && t_position == 16) + { + half_offset.x = 16; + half_offset.y = 16; + } + else if (m_vt.m_min.p.x == -0.5f) + { + half_offset.x = 8; + half_offset.y = 8; + } + } + else + { + if (!linear && t_position == 8) + { half_offset.x = 8 - 8 / scale.x; half_offset.y = 8 - 8 / scale.y; - } else if (linear && t_position == 16) { + } + else if (linear && t_position == 16) + { half_offset.x = 16 - 16 / scale.x; half_offset.y = 16 - 16 / scale.y; - } else if (m_vt.m_min.p.x == -0.5f) { + } + else if (m_vt.m_min.p.x == -0.5f) + { half_offset.x = 8; half_offset.y = 8; } } GL_INS("offset detected %f,%f t_pos %d (linear %d, scale %f)", - half_offset.x, half_offset.y, t_position, linear, scale.x); - - } else if (m_vt.m_eq.q) { + half_offset.x, half_offset.y, t_position, linear, scale.x); + } + else if (m_vt.m_eq.q) + { float tw = (float)(1 << m_context->TEX0.TW); float th = (float)(1 << m_context->TEX0.TH); - float q = v[0].RGBAQ.Q; + float q = v[0].RGBAQ.Q; // Tales of Abyss half_offset.x = 0.5f * q / tw; half_offset.y = 0.5f * q / th; GL_INS("ST offset detected %f,%f (linear %d, scale %f)", - half_offset.x, half_offset.y, linear, scale.x); - + half_offset.x, half_offset.y, linear, scale.x); } return half_offset; @@ -692,8 +724,10 @@ GSVector4i GSRendererHW::ComputeBoundingBox(const GSVector2& rtscale, const GSVe void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) { // Upscaling hack to avoid various line/grid issues - if (m_userHacks_merge_sprite && tex && tex->m_target && (m_vt.m_primclass == GS_SPRITE_CLASS)) { - if (PRIM->FST && GSLocalMemory::m_psm[tex->m_TEX0.PSM].fmt < 2 && ((m_vt.m_eq.value & 0xCFFFF) == 0xCFFFF)) { + if (m_userHacks_merge_sprite && tex && tex->m_target && (m_vt.m_primclass == GS_SPRITE_CLASS)) + { + if (PRIM->FST && GSLocalMemory::m_psm[tex->m_TEX0.PSM].fmt < 2 && ((m_vt.m_eq.value & 0xCFFFF) == 0xCFFFF)) + { // Ideally the hack ought to be enabled in a true paving mode only. I don't know how to do it accurately // neither in a fast way. So instead let's just take the hypothesis that all sprites must have the same @@ -704,10 +738,12 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) // SSE optimization: shuffle m[1] to have (4*32 bits) X, Y, U, V int first_dpX = v[1].XYZ.X - v[0].XYZ.X; int first_dpU = v[1].U - v[0].U; - for (size_t i = 0; i < m_vertex.next; i += 2) { + for (size_t i = 0; i < m_vertex.next; i += 2) + { int dpX = v[i + 1].XYZ.X - v[i].XYZ.X; int dpU = v[i + 1].U - v[i].U; - if (dpX != first_dpX || dpU != first_dpU) { + if (dpX != first_dpX || dpU != first_dpU) + { is_paving = false; break; } @@ -720,7 +756,8 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex) GL_INS("PP SAMPLER: Dp %f %f Dt %f %f. Is blit %d, is paving %d, count %d", delta_p.x, delta_p.y, delta_t.x, delta_t.y, is_blit, is_paving, m_vertex.tail); #endif - if (is_paving) { + if (is_paving) + { // Replace all sprite with a single fullscreen sprite. GSVertex* s = &m_vertex.buff[0]; @@ -752,7 +789,8 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS { // printf("[%d] InvalidateLocalMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP, (int)BITBLTBUF.SPSM); - if(clut) return; // FIXME + if (clut) + return; // FIXME m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); } @@ -779,19 +817,19 @@ void GSRendererHW::SwSpriteRender() { // Supported drawing attributes ASSERT(PRIM->PRIM == GS_TRIANGLESTRIP || PRIM->PRIM == GS_SPRITE); - ASSERT(!PRIM->FGE); // No FOG - ASSERT(!PRIM->AA1); // No antialiasing - ASSERT(!PRIM->FIX); // Normal fragment value control + ASSERT(!PRIM->FGE); // No FOG + ASSERT(!PRIM->AA1); // No antialiasing + ASSERT(!PRIM->FIX); // Normal fragment value control ASSERT(!m_env.DTHE.DTHE); // No dithering - ASSERT(!m_context->TEST.ATE); // No alpha test - ASSERT(!m_context->TEST.DATE); // No destination alpha test - ASSERT(!m_context->DepthRead() && !m_context->DepthWrite()); // No depth handling + ASSERT(!m_context->TEST.ATE); // No alpha test + ASSERT(!m_context->TEST.DATE); // No destination alpha test + ASSERT(!m_context->DepthRead() && !m_context->DepthWrite()); // No depth handling - ASSERT(!m_context->TEX0.CSM); // No CLUT usage + ASSERT(!m_context->TEX0.CSM); // No CLUT usage - ASSERT(!m_env.PABE.PABE); // No PABE + ASSERT(!m_env.PABE.PABE); // No PABE // PSMCT32 pixel format ASSERT(!PRIM->TME || (PRIM->TME && m_context->TEX0.PSM == PSM_PSMCT32)); @@ -819,10 +857,10 @@ void GSRendererHW::SwSpriteRender() bitbltbuf.DBW = m_context->FRAME.FBW; bitbltbuf.DPSM = m_context->FRAME.PSM; - ASSERT(m_r.x == 0 && m_r.y == 0); // No rendering region offset - ASSERT(!PRIM->TME || (abs(m_vt.m_min.t.x) <= SSR_UV_TOLERANCE && abs(m_vt.m_min.t.y) <= SSR_UV_TOLERANCE)); // No input texture offset, if any - ASSERT(!PRIM->TME || (abs(m_vt.m_max.t.x - m_r.z) <= SSR_UV_TOLERANCE && abs(m_vt.m_max.t.y - m_r.w) <= SSR_UV_TOLERANCE)); // No input texture min/mag, if any - ASSERT(!PRIM->TME || (m_vt.m_max.t.x <= (1 << m_context->TEX0.TW) && m_vt.m_max.t.y <= (1 << m_context->TEX0.TH))); // No texture UV wrap, if any + ASSERT(m_r.x == 0 && m_r.y == 0); // No rendering region offset + ASSERT(!PRIM->TME || (abs(m_vt.m_min.t.x) <= SSR_UV_TOLERANCE && abs(m_vt.m_min.t.y) <= SSR_UV_TOLERANCE)); // No input texture offset, if any + ASSERT(!PRIM->TME || (abs(m_vt.m_max.t.x - m_r.z) <= SSR_UV_TOLERANCE && abs(m_vt.m_max.t.y - m_r.w) <= SSR_UV_TOLERANCE)); // No input texture min/mag, if any + ASSERT(!PRIM->TME || (m_vt.m_max.t.x <= (1 << m_context->TEX0.TW) && m_vt.m_max.t.y <= (1 << m_context->TEX0.TH))); // No texture UV wrap, if any GIFRegTRXPOS trxpos = {}; @@ -859,11 +897,11 @@ void GSRendererHW::SwSpriteRender() const bool alpha_blending_enabled = PRIM->ABE; - const GSVertex& v = m_vertex.buff[m_index.buff[m_index.tail - 1]]; // Last vertex. - const GSVector4i vc = GSVector4i(v.RGBAQ.R, v.RGBAQ.G, v.RGBAQ.B, v.RGBAQ.A) // 0x000000AA000000BB000000GG000000RR - .ps32(); // 0x00AA00BB00GG00RR00AA00BB00GG00RR + const GSVertex& v = m_vertex.buff[m_index.buff[m_index.tail - 1]]; // Last vertex. + const GSVector4i vc = GSVector4i(v.RGBAQ.R, v.RGBAQ.G, v.RGBAQ.B, v.RGBAQ.A) // 0x000000AA000000BB000000GG000000RR + .ps32(); // 0x00AA00BB00GG00RR00AA00BB00GG00RR - const GSVector4i a_mask = GSVector4i::xff000000().u8to16(); // 0x00FF00000000000000FF000000000000 + const GSVector4i a_mask = GSVector4i::xff000000().u8to16(); // 0x00FF00000000000000FF000000000000 const bool fb_mask_enabled = m_context->FRAME.FBMSK != 0x0; const GSVector4i fb_mask = GSVector4i(m_context->FRAME.FBMSK).u8to16(); // 0x00AA00BB00GG00RR00AA00BB00GG00RR @@ -887,13 +925,13 @@ void GSRendererHW::SwSpriteRender() if (texture_mapping_enabled) { // Read 2 source pixel colors - ASSERT((scol[x] + 1) == scol[x + 1]); // Source pixel pair is adjacent in memory - sc = GSVector4i::loadl(&s[scol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr + ASSERT((scol[x] + 1) == scol[x + 1]); // Source pixel pair is adjacent in memory + sc = GSVector4i::loadl(&s[scol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr // Apply TFX ASSERT(tex0_tfx == 0 || tex0_tfx == 1); if (tex0_tfx == 0) - sc = sc.mul16l(vc).srl16(7).clamp8(); // clamp((sc * vc) >> 7, 0, 255), srl16 is ok because 16 bit values are unsigned + sc = sc.mul16l(vc).srl16(7).clamp8(); // clamp((sc * vc) >> 7, 0, 255), srl16 is ok because 16 bit values are unsigned if (tex0_tcc == 0) sc = sc.blend(vc, a_mask); @@ -909,8 +947,8 @@ void GSRendererHW::SwSpriteRender() if (alpha_blending_enabled || fb_mask_enabled) { // Read 2 destination pixel colors - ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory - dc0 = GSVector4i::loadl(&d[dcol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr + ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory + dc0 = GSVector4i::loadl(&d[dcol[x]]).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr } if (alpha_blending_enabled) @@ -927,19 +965,19 @@ void GSRendererHW::SwSpriteRender() sc_alpha_vec = GSVector4i(alpha_fix).xxxx().ps32(); else sc_alpha_vec = (alpha_c == 0 ? sc : dc0) - .yyww() // 0x00AA00BB00AA00BB00aa00bb00aa00bb - .srl32(16) // 0x000000AA000000AA000000aa000000aa - .ps32() // 0x00AA00AA00aa00aa00AA00AA00aa00aa - .xxyy(); // 0x00AA00AA00AA00AA00aa00aa00aa00aa + .yyww() // 0x00AA00BB00AA00BB00aa00bb00aa00bb + .srl32(16) // 0x000000AA000000AA000000aa000000aa + .ps32() // 0x00AA00AA00aa00aa00AA00AA00aa00aa + .xxyy(); // 0x00AA00AA00AA00AA00aa00aa00aa00aa switch (alpha_b) { - case 1: - dc = sc.sub16(dc0).mul16l(sc_alpha_vec).sra16(7).add16(dc0); // (((Cs - Cd) * C) >> 7) + Cd, must use sra16 due to signed 16 bit values - break; - default: - dc = sc.mul16l(sc_alpha_vec).sra16(7).add16(dc0); // (((Cs - 0) * C) >> 7) + Cd, must use sra16 due to signed 16 bit values - break; + case 1: + dc = sc.sub16(dc0).mul16l(sc_alpha_vec).sra16(7).add16(dc0); // (((Cs - Cd) * C) >> 7) + Cd, must use sra16 due to signed 16 bit values + break; + default: + dc = sc.mul16l(sc_alpha_vec).sra16(7).add16(dc0); // (((Cs - 0) * C) >> 7) + Cd, must use sra16 due to signed 16 bit values + break; } // dc alpha channels (dc.u16[3], dc.u16[7]) dirty } @@ -950,9 +988,9 @@ void GSRendererHW::SwSpriteRender() // Clamping if (m_env.COLCLAMP.CLAMP) - dc = dc.clamp8(); // clamp(dc, 0, 255) + dc = dc.clamp8(); // clamp(dc, 0, 255) else - dc = dc.sll16(8).srl16(8); // Mask, lower 8 bits enabled per channel + dc = dc.sll16(8).srl16(8); // Mask, lower 8 bits enabled per channel // No Alpha Correction ASSERT(m_context->FBA.FBA == 0); @@ -964,8 +1002,8 @@ void GSRendererHW::SwSpriteRender() dc = dc.blend(dc0, fb_mask); // Store 2 pixel colors - dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr - ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory + dc = dc.pu16(GSVector4i::zero()); // 0x0000000000000000AABBGGRRaabbggrr + ASSERT((dcol[x] + 1) == dcol[x + 1]); // Destination pixel pair is adjacent in memory GSVector4i::storel(&d[dcol[x]], dc); } } @@ -974,7 +1012,7 @@ void GSRendererHW::SwSpriteRender() bool GSRendererHW::CanUseSwSpriteRender(bool allow_64x64_sprite) { const bool r_0_0_64_64 = allow_64x64_sprite ? (m_r == GSVector4i(0, 0, 64, 64)).alltrue() : false; - if (r_0_0_64_64 && !allow_64x64_sprite) // Rendering region 64x64 support is enabled via parameter + if (r_0_0_64_64 && !allow_64x64_sprite) // Rendering region 64x64 support is enabled via parameter return false; const bool r_0_0_16_16 = (m_r == GSVector4i(0, 0, 16, 16)).alltrue(); if (!r_0_0_16_16 && !r_0_0_64_64) // Rendering region is 16x16 or 64x64, without offset @@ -982,39 +1020,39 @@ bool GSRendererHW::CanUseSwSpriteRender(bool allow_64x64_sprite) if (PRIM->PRIM != GS_SPRITE && ((PRIM->IIP && m_vt.m_eq.rgba != 0xffff) || (PRIM->TME && !PRIM->FST && m_vt.m_eq.q != 0x1) - || m_vt.m_eq.z != 0x1)) // No rasterization + || m_vt.m_eq.z != 0x1)) // No rasterization return false; - if (m_vt.m_primclass != GS_TRIANGLE_CLASS && m_vt.m_primclass != GS_SPRITE_CLASS) // Triangle or sprite class prims + if (m_vt.m_primclass != GS_TRIANGLE_CLASS && m_vt.m_primclass != GS_SPRITE_CLASS) // Triangle or sprite class prims return false; if (PRIM->PRIM != GS_TRIANGLESTRIP && PRIM->PRIM != GS_SPRITE) // Triangle strip or sprite draw return false; - if (m_vt.m_primclass == GS_TRIANGLE_CLASS && (PRIM->PRIM != GS_TRIANGLESTRIP || m_vertex.tail != 4)) // If triangle class, strip draw with 4 vertices (two prims, emulating single sprite prim) + if (m_vt.m_primclass == GS_TRIANGLE_CLASS && (PRIM->PRIM != GS_TRIANGLESTRIP || m_vertex.tail != 4)) // If triangle class, strip draw with 4 vertices (two prims, emulating single sprite prim) return false; // TODO If GS_TRIANGLESTRIP draw, check that the draw is axis aligned - if (m_vt.m_primclass == GS_SPRITE_CLASS && (PRIM->PRIM != GS_SPRITE || m_vertex.tail != 2)) // If sprite class, sprite draw with 2 vertices (one prim) + if (m_vt.m_primclass == GS_SPRITE_CLASS && (PRIM->PRIM != GS_SPRITE || m_vertex.tail != 2)) // If sprite class, sprite draw with 2 vertices (one prim) return false; - if (m_context->DepthRead() || m_context->DepthWrite()) // No depth handling + if (m_context->DepthRead() || m_context->DepthWrite()) // No depth handling return false; - if (m_context->FRAME.PSM != PSM_PSMCT32) // Frame buffer format is 32 bit color + if (m_context->FRAME.PSM != PSM_PSMCT32) // Frame buffer format is 32 bit color return false; if (PRIM->TME) - { + { // Texture mapping enabled - if (m_context->TEX0.PSM != PSM_PSMCT32) // Input texture format is 32 bit color + if (m_context->TEX0.PSM != PSM_PSMCT32) // Input texture format is 32 bit color return false; - if (IsMipMapDraw()) // No mipmapping + if (IsMipMapDraw()) // No mipmapping return false; - if (abs(m_vt.m_min.t.x) > SSR_UV_TOLERANCE || abs(m_vt.m_min.t.y) > SSR_UV_TOLERANCE) // No horizontal nor vertical offset + if (abs(m_vt.m_min.t.x) > SSR_UV_TOLERANCE || abs(m_vt.m_min.t.y) > SSR_UV_TOLERANCE) // No horizontal nor vertical offset return false; - if (abs(m_vt.m_max.t.x - m_r.z) > SSR_UV_TOLERANCE || abs(m_vt.m_max.t.y - m_r.w) > SSR_UV_TOLERANCE) // No texture width or height mag/min + if (abs(m_vt.m_max.t.x - m_r.z) > SSR_UV_TOLERANCE || abs(m_vt.m_max.t.y - m_r.w) > SSR_UV_TOLERANCE) // No texture width or height mag/min return false; const int tw = 1 << m_context->TEX0.TW; const int th = 1 << m_context->TEX0.TH; - if (m_vt.m_max.t.x > tw || m_vt.m_max.t.y > th) // No UV wrapping + if (m_vt.m_max.t.x > tw || m_vt.m_max.t.y > th) // No UV wrapping return false; } - + // The draw call is a good candidate for using the SwSpriteRender to replace the GPU draw // However, some draw attributes might not be supported yet by the SwSpriteRender, // so if any bug occurs in using it, enabling debug build would probably @@ -1035,21 +1073,23 @@ void GSRendererHW::RoundSpriteOffset() size_t count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; - for(size_t i = 0; i < count; i += 2) { + for (size_t i = 0; i < count; i += 2) + { // Performance note: if it had any impact on perf, someone would port it to SSE (AKA GSVector) // Compute the coordinate of first and last texels (in native with a linear filtering) int ox = m_context->XYOFFSET.OFX; int X0 = v[i].XYZ.X - ox; - int X1 = v[i+1].XYZ.X - ox; - int Lx = (v[i+1].XYZ.X - v[i].XYZ.X); + int X1 = v[i + 1].XYZ.X - ox; + int Lx = (v[i + 1].XYZ.X - v[i].XYZ.X); float ax0 = alpha0(Lx, X0, X1); float ax1 = alpha1(Lx, X0, X1); - uint16 tx0 = Interpolate_UV(ax0, v[i].U, v[i+1].U); - uint16 tx1 = Interpolate_UV(ax1, v[i].U, v[i+1].U); + uint16 tx0 = Interpolate_UV(ax0, v[i].U, v[i + 1].U); + uint16 tx1 = Interpolate_UV(ax1, v[i].U, v[i + 1].U); #ifdef DEBUG_U - if (debug) { - fprintf(stderr, "u0:%d and u1:%d\n", v[i].U, v[i+1].U); + if (debug) + { + fprintf(stderr, "u0:%d and u1:%d\n", v[i].U, v[i + 1].U); fprintf(stderr, "a0:%f and a1:%f\n", ax0, ax1); fprintf(stderr, "t0:%d and t1:%d\n", tx0, tx1); } @@ -1057,15 +1097,16 @@ void GSRendererHW::RoundSpriteOffset() int oy = m_context->XYOFFSET.OFY; int Y0 = v[i].XYZ.Y - oy; - int Y1 = v[i+1].XYZ.Y - oy; - int Ly = (v[i+1].XYZ.Y - v[i].XYZ.Y); + int Y1 = v[i + 1].XYZ.Y - oy; + int Ly = (v[i + 1].XYZ.Y - v[i].XYZ.Y); float ay0 = alpha0(Ly, Y0, Y1); float ay1 = alpha1(Ly, Y0, Y1); - uint16 ty0 = Interpolate_UV(ay0, v[i].V, v[i+1].V); - uint16 ty1 = Interpolate_UV(ay1, v[i].V, v[i+1].V); + uint16 ty0 = Interpolate_UV(ay0, v[i].V, v[i + 1].V); + uint16 ty1 = Interpolate_UV(ay1, v[i].V, v[i + 1].V); #ifdef DEBUG_V - if (debug) { - fprintf(stderr, "v0:%d and v1:%d\n", v[i].V, v[i+1].V); + if (debug) + { + fprintf(stderr, "v0:%d and v1:%d\n", v[i].V, v[i + 1].V); fprintf(stderr, "a0:%f and a1:%f\n", ay0, ay1); fprintf(stderr, "t0:%d and t1:%d\n", ty0, ty1); } @@ -1073,11 +1114,11 @@ void GSRendererHW::RoundSpriteOffset() #ifdef DEBUG_U if (debug) - fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].U, v[i+1].U); + fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].U, v[i + 1].U); #endif #ifdef DEBUG_V if (debug) - fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].V, v[i+1].V); + fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].V, v[i + 1].V); #endif #if 1 @@ -1088,54 +1129,68 @@ void GSRendererHW::RoundSpriteOffset() // of interpolation migth trigger a discard (with alpha testing) // Let's use something simple that correct really bad case (for a couple of 2D games). // I hope it won't create too much glitches. - if (linear) { - int Lu = v[i+1].U - v[i].U; + if (linear) + { + int Lu = v[i + 1].U - v[i].U; // Note 32 is based on taisho-mononoke - if ((Lu > 0) && (Lu <= (Lx+32))) { - v[i+1].U -= 8; + if ((Lu > 0) && (Lu <= (Lx + 32))) + { + v[i + 1].U -= 8; } - } else { - if (tx0 <= tx1) { - v[i].U = tx0; - v[i+1].U = tx1 + 16; - } else { - v[i].U = tx0 + 15; - v[i+1].U = tx1; + } + else + { + if (tx0 <= tx1) + { + v[i].U = tx0; + v[i + 1].U = tx1 + 16; + } + else + { + v[i].U = tx0 + 15; + v[i + 1].U = tx1; } } #endif #if 1 - if (linear) { - int Lv = v[i+1].V - v[i].V; - if ((Lv > 0) && (Lv <= (Ly+32))) { - v[i+1].V -= 8; + if (linear) + { + int Lv = v[i + 1].V - v[i].V; + if ((Lv > 0) && (Lv <= (Ly + 32))) + { + v[i + 1].V -= 8; } - } else { - if (ty0 <= ty1) { - v[i].V = ty0; - v[i+1].V = ty1 + 16; - } else { - v[i].V = ty0 + 15; - v[i+1].V = ty1; + } + else + { + if (ty0 <= ty1) + { + v[i].V = ty0; + v[i + 1].V = ty1 + 16; + } + else + { + v[i].V = ty0 + 15; + v[i + 1].V = ty1; } } #endif #ifdef DEBUG_U if (debug) - fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].U, v[i+1].U); + fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].U, v[i + 1].U); #endif #ifdef DEBUG_V if (debug) - fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].V, v[i+1].V); + fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].V, v[i + 1].V); #endif - } } void GSRendererHW::Draw() { - if(m_dev->IsLost() || IsBadFrame()) { + if (m_dev->IsLost() || IsBadFrame()) + { GL_INS("Warning skipping a draw call (%d)", s_n); return; } @@ -1146,7 +1201,7 @@ void GSRendererHW::Draw() const GSLocalMemory::psm_t& tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; // Fix TEX0 size - if(PRIM->TME && !IsMipMapActive()) + if (PRIM->TME && !IsMipMapActive()) m_context->ComputeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t)); // skip alpha test if possible @@ -1187,22 +1242,31 @@ void GSRendererHW::Draw() const GSVector4 delta_p = m_vt.m_max.p - m_vt.m_min.p; bool single_page = (delta_p.x <= 64.0f) && (delta_p.y <= 64.0f); - if (m_channel_shuffle) { + if (m_channel_shuffle) + { m_channel_shuffle = draw_sprite_tex && (m_context->TEX0.PSM == PSM_PSMT8) && single_page; - if (m_channel_shuffle) { + if (m_channel_shuffle) + { GL_CACHE("Channel shuffle effect detected SKIP"); return; } - } else if (draw_sprite_tex && m_context->FRAME.Block() == m_context->TEX0.TBP0) { + } + else if (draw_sprite_tex && m_context->FRAME.Block() == m_context->TEX0.TBP0) + { // Special post-processing effect - if ((m_context->TEX0.PSM == PSM_PSMT8) && single_page) { + if ((m_context->TEX0.PSM == PSM_PSMT8) && single_page) + { GL_INS("Channel shuffle effect detected"); m_channel_shuffle = true; - } else { + } + else + { GL_DBG("Special post-processing effect not supported"); m_channel_shuffle = false; } - } else { + } + else + { m_channel_shuffle = false; } @@ -1214,7 +1278,8 @@ void GSRendererHW::Draw() GSTextureCache::Target* rt = NULL; GSTexture* rt_tex = NULL; - if (!no_rt) { + if (!no_rt) + { rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true, fm); rt_tex = rt->m_texture; } @@ -1225,7 +1290,8 @@ void GSRendererHW::Draw() GSTextureCache::Target* ds = NULL; GSTexture* ds_tex = NULL; - if (!no_ds) { + if (!no_ds) + { ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); ds_tex = ds->m_texture; } @@ -1233,40 +1299,49 @@ void GSRendererHW::Draw() m_src = nullptr; m_texture_shuffle = false; - if(PRIM->TME) + if (PRIM->TME) { GIFRegCLAMP MIP_CLAMP = context->CLAMP; int mxl = std::min((int)m_context->TEX1.MXL, 6); m_lod = GSVector2i(0, 0); // Code from the SW renderer - if (IsMipMapActive()) { + if (IsMipMapActive()) + { int interpolation = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri int k = (m_context->TEX1.K + 8) >> 4; int lcm = m_context->TEX1.LCM; - if ((int)m_vt.m_lod.x >= mxl) { + if ((int)m_vt.m_lod.x >= mxl) + { k = mxl; // set lod to max level lcm = 1; // constant lod } - if (PRIM->FST) { + if (PRIM->FST) + { ASSERT(lcm == 1); ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) lcm = 1; } - if (lcm == 1) { + if (lcm == 1) + { m_lod.x = std::max(k, 0); m_lod.y = m_lod.x; - } else { + } + else + { // Not constant but who care ! - if (interpolation == 2) { + if (interpolation == 2) + { // Mipmap Linear. Both layers are sampled, only take the big one m_lod.x = std::max((int)floor(m_vt.m_lod.x), 0); - } else { + } + else + { // On GS lod is a fixed float number 7:4 (4 bit for the frac part) #if 0 m_lod.x = std::max((int)round(m_vt.m_lod.x + 0.0625), 0); @@ -1294,13 +1369,16 @@ void GSRendererHW::Draw() MIP_CLAMP.MAXU >>= m_lod.x; MIP_CLAMP.MAXV >>= m_lod.x; - for (int i = 0; i < m_lod.x; i++) { + for (int i = 0; i < m_lod.x; i++) + { m_vt.m_min.t *= 0.5f; m_vt.m_max.t *= 0.5f; } GL_CACHE("Mipmap LOD %d %d (%f %f) new size %dx%d (K %d L %u)", m_lod.x, m_lod.y, m_vt.m_lod.x, m_vt.m_lod.y, 1 << TEX0.TW, 1 << TEX0.TH, m_context->TEX1.K, m_context->TEX1.L); - } else { + } + else + { TEX0 = GetTex0Layer(0); } @@ -1313,12 +1391,14 @@ void GSRendererHW::Draw() m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, r) : m_tc->LookupSource(TEX0, env.TEXA, r); // Round 2 - if (IsMipMapActive() && m_mipmap == 2 && !tex_psm.depth) { + if (IsMipMapActive() && m_mipmap == 2 && !tex_psm.depth) + { // Upload remaining texture layers GSVector4 tmin = m_vt.m_min.t; GSVector4 tmax = m_vt.m_max.t; - for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++) { + for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++) + { const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(layer); m_context->offset.tex = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM); @@ -1348,7 +1428,8 @@ void GSRendererHW::Draw() && draw_sprite_tex && m_src->m_32_bits_fmt; // Okami mustn't call this code - if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && (m_context->FRAME.FBMSK == 0)) { + if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && (m_context->FRAME.FBMSK == 0)) + { // Avious dubious call to m_texture_shuffle on 16 bits games // The pattern is severals column of 8 pixels. A single sprite // smell fishy but a big sprite is wrong. @@ -1358,8 +1439,8 @@ void GSRendererHW::Draw() GSVertex* v = &m_vertex.buff[0]; m_texture_shuffle = ((v[1].U - v[0].U) < 256) || // Tomb Raider Angel of Darkness relies on this behavior to produce a fog effect. - // In this case, the address of the framebuffer and texture are the same. - // The game will take RG => BA and then the BA => RG of next pixels. + // In this case, the address of the framebuffer and texture are the same. + // The game will take RG => BA and then the BA => RG of next pixels. // However, only RG => BA needs to be emulated because RG isn't used. m_context->FRAME.Block() == m_context->TEX0.TBP0 || // DMC3, Onimusha 3 rely on this behavior. @@ -1373,35 +1454,40 @@ void GSRendererHW::Draw() // Texture shuffle is not yet supported with strange clamp mode ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); - if (m_src->m_target && m_context->TEX0.PSM == PSM_PSMT8 && single_page && draw_sprite_tex) { + if (m_src->m_target && m_context->TEX0.PSM == PSM_PSMT8 && single_page && draw_sprite_tex) + { GL_INS("Channel shuffle effect detected (2nd shot)"); m_channel_shuffle = true; - } else { + } + else + { m_channel_shuffle = false; } } - if (rt) { + if (rt) + { // Be sure texture shuffle detection is properly propagated // Otherwise set or clear the flag (Code in texture cache only set the flag) // Note: it is important to clear the flag when RT is used as a real 16 bits target. rt->m_32_bits_fmt = m_texture_shuffle || (GSLocalMemory::m_psm[context->FRAME.PSM].bpp != 16); } - if(s_dump) + if (s_dump) { uint64 frame = m_perfmon.GetFrame(); std::string s; - if (s_n >= s_saven) { + if (s_n >= s_saven) + { // Dump Register state s = format("%05d_context.txt", s_n); - m_env.Dump(m_dump_root+s); - m_context->Dump(m_dump_root+s); + m_env.Dump(m_dump_root + s); + m_context->Dump(m_dump_root + s); } - if(s_savet && s_n >= s_saven && m_src) + if (s_savet && s_n >= s_saven && m_src) { s = format("%05d_f%lld_itex_%05x_%s_%d%d_%02x_%02x_%02x_%02x.dds", s_n, frame, (int)context->TEX0.TBP0, psm_str(context->TEX0.PSM), @@ -1409,49 +1495,50 @@ void GSRendererHW::Draw() (int)context->CLAMP.MINU, (int)context->CLAMP.MAXU, (int)context->CLAMP.MINV, (int)context->CLAMP.MAXV); - m_src->m_texture->Save(m_dump_root+s); + m_src->m_texture->Save(m_dump_root + s); - if(m_src->m_palette) + if (m_src->m_palette) { s = format("%05d_f%lld_itpx_%05x_%s.dds", s_n, frame, context->TEX0.CBP, psm_str(context->TEX0.CPSM)); - m_src->m_palette->Save(m_dump_root+s); + m_src->m_palette->Save(m_dump_root + s); } } - if(s_save && s_n >= s_saven) + if (s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt0_%05x_%s.bmp", s_n, frame, context->FRAME.Block(), psm_str(context->FRAME.PSM)); if (rt) - rt->m_texture->Save(m_dump_root+s); + rt->m_texture->Save(m_dump_root + s); } - if(s_savez && s_n >= s_saven) + if (s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, context->ZBUF.Block(), psm_str(context->ZBUF.PSM)); if (ds_tex) - ds_tex->Save(m_dump_root+s); + ds_tex->Save(m_dump_root + s); } - } // The rectangle of the draw m_r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); - if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, m_src)) + if (m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds_tex, m_src)) { GL_INS("Warning skipping a draw call (%d)", s_n); return; } - if (!OI_BlitFMV(rt, m_src, m_r)) { + if (!OI_BlitFMV(rt, m_src, m_r)) + { GL_INS("Warning skipping a draw call (%d)", s_n); return; } - if (m_userhacks_enabled_gs_mem_clear) { + if (m_userhacks_enabled_gs_mem_clear) + { // Constant Direct Write without texture/test/blending (aka a GS mem clear) if ((m_vt.m_primclass == GS_SPRITE_CLASS) && !PRIM->TME // Direct write && (!PRIM->ABE || m_context->ALPHA.IsOpaque()) // No transparency @@ -1470,40 +1557,48 @@ void GSRendererHW::Draw() // A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite // Note: first hack corrects both position and texture coordinate // Note: second hack corrects only the texture coordinate - if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { + if ((m_upscale_multiplier > 1) && (m_vt.m_primclass == GS_SPRITE_CLASS)) + { size_t count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; // Hack to avoid vertical black line in various games (ace combat/tekken) - if (m_userhacks_align_sprite_X) { + if (m_userhacks_align_sprite_X) + { // Note for performance reason I do the check only once on the first // primitive int win_position = v[1].XYZ.X - context->XYOFFSET.OFX; const bool unaligned_position = ((win_position & 0xF) == 8); - const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful + const bool unaligned_texture = ((v[1].U & 0xF) == 0) && PRIM->FST; // I'm not sure this check is useful const bool hole_in_vertex = (count < 4) || (v[1].XYZ.X != v[2].XYZ.X); - if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) { + if (hole_in_vertex && unaligned_position && (unaligned_texture || !PRIM->FST)) + { // Normaly vertex are aligned on full pixels and texture in half // pixels. Let's extend the coverage of an half-pixel to avoid // hole after upscaling - for(size_t i = 0; i < count; i += 2) { - v[i+1].XYZ.X += 8; + for (size_t i = 0; i < count; i += 2) + { + v[i + 1].XYZ.X += 8; // I really don't know if it is a good idea. Neither what to do for !PRIM->FST if (unaligned_texture) - v[i+1].U += 8; + v[i + 1].U += 8; } } } // Noting to do if no texture is sampled - if (PRIM->FST && draw_sprite_tex) { - if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) { + if (PRIM->FST && draw_sprite_tex) + { + if ((m_userhacks_round_sprite_offset > 1) || (m_userhacks_round_sprite_offset == 1 && !m_vt.IsLinear())) + { if (m_vt.IsLinear()) RoundSpriteOffset(); else RoundSpriteOffset(); } - } else { + } + else + { ; // vertical line in Yakuza (note check m_userhacks_align_sprite_X behavior) } } @@ -1522,15 +1617,17 @@ void GSRendererHW::Draw() // Help to detect rendering outside of the framebuffer #if _DEBUG - if (m_upscale_multiplier * m_r.z > m_width) { + if (m_upscale_multiplier * m_r.z > m_width) + { GL_INS("ERROR: RT width is too small only %d but require %d", m_width, m_upscale_multiplier * m_r.z); } - if (m_upscale_multiplier * m_r.w > m_height) { + if (m_upscale_multiplier * m_r.w > m_height) + { GL_INS("ERROR: RT height is too small only %d but require %d", m_height, m_upscale_multiplier * m_r.w); } #endif - if(fm != 0xffffffff && rt) + if (fm != 0xffffffff && rt) { //rt->m_valid = rt->m_valid.runion(r); rt->UpdateValidity(m_r); @@ -1540,7 +1637,7 @@ void GSRendererHW::Draw() m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); } - if(zm != 0xffffffff && ds) + if (zm != 0xffffffff && ds) { //ds->m_valid = ds->m_valid.runion(r); ds->UpdateValidity(m_r); @@ -1552,43 +1649,43 @@ void GSRendererHW::Draw() // - if(m_hacks.m_oo) + if (m_hacks.m_oo) { (this->*m_hacks.m_oo)(); } - if(s_dump) + if (s_dump) { uint64 frame = m_perfmon.GetFrame(); std::string s; - if(s_save && s_n >= s_saven) + if (s_save && s_n >= s_saven) { s = format("%05d_f%lld_rt1_%05x_%s.bmp", s_n, frame, context->FRAME.Block(), psm_str(context->FRAME.PSM)); if (rt) - rt->m_texture->Save(m_dump_root+s); + rt->m_texture->Save(m_dump_root + s); } - if(s_savez && s_n >= s_saven) + if (s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz1_%05x_%s.bmp", s_n, frame, context->ZBUF.Block(), psm_str(context->ZBUF.PSM)); if (ds_tex) - ds_tex->Save(m_dump_root+s); + ds_tex->Save(m_dump_root + s); } - if(s_savel > 0 && (s_n - s_saven) > s_savel) + if (s_savel > 0 && (s_n - s_saven) > s_savel) { s_dump = 0; } } - #ifdef DISABLE_HW_TEXTURE_CACHE +#ifdef DISABLE_HW_TEXTURE_CACHE if (rt) m_tc->Read(rt, m_r); - #endif +#endif } // hacks @@ -1629,7 +1726,8 @@ void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game) m_oo = m_oo_map[hash]; m_cu = m_cu_map[hash]; - if (game.flags & CRC::PointListPalette) { + if (game.flags & CRC::PointListPalette) + { ASSERT(m_oi == NULL); m_oi = &GSRendererHW::OI_PointListPalette; @@ -1644,7 +1742,8 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds) // Note gs mem clear must be tested before calling this function // Limit further to unmask Z write - if (!m_context->ZBUF.ZMSK && rt && ds) { + if (!m_context->ZBUF.ZMSK && rt && ds) + { const GSVertex* v = &m_vertex.buff[0]; const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; //const GSLocalMemory::psm_t& depth_psm = GSLocalMemory::m_psm[m_context->ZBUF.PSM]; @@ -1665,32 +1764,39 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds) // Frame and depth pointer can be inverted uint32 base; uint32 half; - if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) { + if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) + { base = m_context->ZBUF.ZBP; half = m_context->FRAME.FBP; - } else { + } + else + { base = m_context->FRAME.FBP; half = m_context->ZBUF.ZBP; } // If both buffers are side by side we can expect a fast clear in on-going - if (half <= (base + written_pages)) { + if (half <= (base + written_pages)) + { uint32 color = v[1].RGBAQ.u32[0]; bool clear_depth = (m_context->FRAME.FBP > m_context->ZBUF.ZBP); GL_INS("OI_DoubleHalfClear:%s: base %x half %x. w_pages %d h_pages %d fbw %d. Color %x", - clear_depth ? "depth" : "target", base << 5, half << 5, w_pages, h_pages, m_context->FRAME.FBW, color); + clear_depth ? "depth" : "target", base << 5, half << 5, w_pages, h_pages, m_context->FRAME.FBW, color); // Commit texture with a factor 2 on the height GSTexture* t = clear_depth ? ds : rt; GSVector4i commitRect = ComputeBoundingBox(t->GetScale(), t->GetSize()); t->CommitRegion(GSVector2i(commitRect.z, 2 * commitRect.w)); - if (clear_depth) { + if (clear_depth) + { // Only pure clear are supported for depth ASSERT(color == 0); m_dev->ClearDepth(t); - } else { + } + else + { m_dev->ClearRenderTarget(t, color); } } @@ -1703,7 +1809,8 @@ void GSRendererHW::OI_GsMemClear() // Note gs mem clear must be tested before calling this function // Limit it further to a full screen 0 write - if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0))) { + if ((m_vertex.next == 2) && m_vt.m_min.c.eq(GSVector4i(0))) + { GSOffset* off = m_context->offset.fb; GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in)); // Limit the hack to a single fullscreen clear. Some games might use severals column to clear a screen @@ -1717,31 +1824,36 @@ void GSRendererHW::OI_GsMemClear() // FIXME: loop can likely be optimized with AVX/SSE. Pixels aren't // linear but the value will be done for all pixels of a block. // FIXME: maybe we could limit the write to the top and bottom row page. - if (format == 0) { + if (format == 0) + { // Based on WritePixel32 - for(int y = r.top; y < r.bottom; y++) + for (int y = r.top; y < r.bottom; y++) { uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]]; int* RESTRICT col = off->pixel.col[0]; - for(int x = r.left; x < r.right; x++) + for (int x = r.left; x < r.right; x++) { d[col[x]] = 0; // Here the constant color } } - } else if (format == 1) { + } + else if (format == 1) + { // Based on WritePixel24 - for(int y = r.top; y < r.bottom; y++) + for (int y = r.top; y < r.bottom; y++) { uint32* RESTRICT d = &m_mem.m_vm32[off->pixel.row[y]]; int* RESTRICT col = off->pixel.col[0]; - for(int x = r.left; x < r.right; x++) + for (int x = r.left; x < r.right; x++) { d[col[x]] &= 0xff000000; // Clear the color } } - } else if (format == 2) { + } + else if (format == 2) + { ; // Hack is used for FMV which are likely 24/32 bits. Let's keep the for reference #if 0 // Based on WritePixel16 @@ -1762,7 +1874,8 @@ void GSRendererHW::OI_GsMemClear() bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_context->TEX0.TBW > 0) { + if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && PRIM->TME && !PRIM->ABE && tex && !tex->m_target && m_context->TEX0.TBW > 0) + { GL_PUSH("OI_BlitFMV"); GL_INS("OI_BlitFMV"); @@ -1800,7 +1913,8 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc // Do the blit. With a Copy mess to avoid issue with limited API (dx) // m_dev->StretchRect(tex->m_texture, sRect, tex->m_texture, dRect); GSVector4i r_full(0, 0, tw, th); - if (GSTexture* rt = m_dev->CreateRenderTarget(tw, th)) { + if (GSTexture* rt = m_dev->CreateRenderTarget(tw, th)) + { m_dev->CopyRect(tex->m_texture, rt, r_full); m_dev->StretchRect(tex->m_texture, sRect, rt, dRect); @@ -1859,13 +1973,13 @@ bool GSRendererHW::OI_BigMuthaTruckers(GSTexture* rt, GSTexture* ds, GSTextureCa bool GSRendererHW::OI_DBZBTGames(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) { - if (t && t->m_from_target) // Avoid slow framebuffer readback + if (t && t->m_from_target) // Avoid slow framebuffer readback return true; // Sprite rendering if (!CanUseSwSpriteRender(true)) return true; - + SwSpriteRender(); return false; // Skip current draw @@ -1876,34 +1990,36 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source static uint32* video = NULL; static size_t lines = 0; - if(lines == 0) + if (lines == 0) { - if(m_vt.m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2)) + if (m_vt.m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2)) { lines = m_vertex.next / 2; } } else { - if(m_vt.m_primclass == GS_POINT_CLASS) + if (m_vt.m_primclass == GS_POINT_CLASS) { - if(m_vertex.next >= 16 * 512) + if (m_vertex.next >= 16 * 512) { // incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 - if(!video) video = new uint32[512 * 512]; + if (!video) + video = new uint32[512 * 512]; int ox = m_context->XYOFFSET.OFX - 8; int oy = m_context->XYOFFSET.OFY - 8; const GSVertex* RESTRICT v = m_vertex.buff; - for(int i = (int)m_vertex.next; i > 0; i--, v++) + for (int i = (int)m_vertex.next; i > 0; i--, v++) { int x = (v->XYZ.X - ox) >> 4; int y = (v->XYZ.Y - oy) >> 4; - if (x < 0 || x >= 448 || y < 0 || y >= (int)lines) return false; // le sigh + if (x < 0 || x >= 448 || y < 0 || y >= (int)lines) + return false; // le sigh video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0]; } @@ -1915,9 +2031,9 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source lines = 0; } } - else if(m_vt.m_primclass == GS_LINE_CLASS) + else if (m_vt.m_primclass == GS_LINE_CLASS) { - if(m_vertex.next == lines * 2) + if (m_vertex.next == lines * 2) { // normally, this step would copy the video onto screen with 512 texture mapped horizontal lines, // but we use the stored video data to create a new texture, and replace the lines with two triangles @@ -1959,11 +2075,11 @@ bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* uint32 ZBP = m_context->ZBUF.Block(); uint32 TBP = m_context->TEX0.TBP0; - if((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S) + if ((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S) { // random battle transition (z buffer written directly, clear it now) GL_INS("OI_FFX ZB clear"); - if(ds) + if (ds) ds->Commit(); // Don't bother to save few MB for a single game m_dev->ClearDepth(ds); } @@ -1977,7 +2093,7 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S GSVertex* RESTRICT v = m_vertex.buff; - for(int i = (int)m_vertex.next; i > 0; i--, v++) + for (int i = (int)m_vertex.next; i > 0; i--, v++) { uint32 c = v->RGBAQ.u32[0]; @@ -1985,7 +2101,7 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S uint32 g = (c >> 8) & 0xff; uint32 b = (c >> 16) & 0xff; - if(r == 0 && g != 0 && b != 0) + if (r == 0 && g != 0 && b != 0) { v->RGBAQ.u32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1); } @@ -1998,12 +2114,12 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) { - if(!PRIM->TME) + if (!PRIM->TME) { uint32 FBP = m_context->FRAME.Block(); uint32 ZBP = m_context->ZBUF.Block(); - if(FBP == 0x008c0 && ZBP == 0x01a40) + if (FBP == 0x008c0 && ZBP == 0x01a40) { // frame buffer clear, atst = fail, afail = write z only, z buffer points to frame buffer @@ -2013,7 +2129,7 @@ bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTex TEX0.TBW = m_context->FRAME.FBW; TEX0.PSM = m_context->FRAME.PSM; - if(GSTextureCache::Target* tmp_rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true)) + if (GSTextureCache::Target* tmp_rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true)) { GL_INS("OI_RozenMaidenGebetGarden FB clear"); tmp_rt->m_texture->Commit(); // Don't bother to save few MB for a single game @@ -2022,7 +2138,7 @@ bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTex return false; } - else if(FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180) + else if (FBP == 0x00000 && m_context->ZBUF.Block() == 0x01180) { // z buffer clear, frame buffer now points to the z buffer (how can they be so clever?) @@ -2032,7 +2148,7 @@ bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTex TEX0.TBW = m_context->FRAME.FBW; TEX0.PSM = m_context->ZBUF.PSM; - if(GSTextureCache::Target* tmp_ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) + if (GSTextureCache::Target* tmp_ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, true)) { GL_INS("OI_RozenMaidenGebetGarden ZB clear"); tmp_ds->m_texture->Commit(); // Don't bother to save few MB for a single game @@ -2084,18 +2200,18 @@ bool GSRendererHW::OI_SonicUnleashed(GSTexture* rt, GSTexture* ds, GSTextureCach bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) { - if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME) + if (m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME) { uint32 FBP = m_context->FRAME.Block(); uint32 FBW = m_context->FRAME.FBW; - if(FBP >= 0x03f40 && (FBP & 0x1f) == 0) + if (FBP >= 0x03f40 && (FBP & 0x1f) == 0) { - if(m_vertex.next == 16) + if (m_vertex.next == 16) { GSVertex* RESTRICT v = m_vertex.buff; - for(int i = 0; i < 16; i++, v++) + for (int i = 0; i < 16; i++, v++) { uint32 c = v->RGBAQ.u32[0]; uint32 a = c >> 24; @@ -2111,11 +2227,11 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa return false; } - else if(m_vertex.next == 256) + else if (m_vertex.next == 256) { GSVertex* RESTRICT v = m_vertex.buff; - for(int i = 0; i < 256; i++, v++) + for (int i = 0; i < 256; i++, v++) { uint32 c = v->RGBAQ.u32[0]; uint32 a = c >> 24; @@ -2159,7 +2275,7 @@ bool GSRendererHW::OI_SuperManReturns(GSTexture* rt, GSTexture* ds, GSTextureCac ASSERT((v->RGBAQ.A << 24 | v->RGBAQ.B << 16 | v->RGBAQ.G << 8 | v->RGBAQ.R) == (int)v->XYZ.Z); // Do a direct write - if(rt) + if (rt) rt->Commit(); // Don't bother to save few MB for a single game m_dev->ClearRenderTarget(rt, GSVector4(m_vt.m_min.c)); @@ -2194,9 +2310,10 @@ bool GSRendererHW::OI_ArTonelico2(GSTexture* rt, GSTexture* ds, GSTextureCache:: GSVertex* v = &m_vertex.buff[0]; - if (m_vertex.next == 2 && !PRIM->TME && m_context->FRAME.FBW == 10 && v->XYZ.Z == 0 && m_context->TEST.ZTST == ZTST_ALWAYS) { + if (m_vertex.next == 2 && !PRIM->TME && m_context->FRAME.FBW == 10 && v->XYZ.Z == 0 && m_context->TEST.ZTST == ZTST_ALWAYS) + { GL_INS("OI_ArTonelico2"); - if(ds) + if (ds) ds->Commit(); // Don't bother to save few MB for a single game m_dev->ClearDepth(ds); } @@ -2212,7 +2329,7 @@ bool GSRendererHW::OI_JakGames(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou // Render 16x16 palette via CPU. SwSpriteRender(); - return false; // Skip current draw. + return false; // Skip current draw. } // OO (others output?) hacks: invalidate extra local memory after the draw call @@ -2223,7 +2340,7 @@ void GSRendererHW::OO_MajokkoALaMode2() uint32 FBP = m_context->FRAME.Block(); - if(!PRIM->TME && FBP == 0x03f40) + if (!PRIM->TME && FBP == 0x03f40) { GIFRegBITBLTBUF BITBLTBUF; diff --git a/plugins/GSdx/Renderers/HW/GSRendererHW.h b/plugins/GSdx/Renderers/HW/GSRendererHW.h index eb967cc58a..2e85e04039 100644 --- a/plugins/GSdx/Renderers/HW/GSRendererHW.h +++ b/plugins/GSdx/Renderers/HW/GSRendererHW.h @@ -43,7 +43,7 @@ private: static const float SSR_UV_TOLERANCE; - #pragma region hacks +#pragma region hacks typedef bool (GSRendererHW::*OI_Ptr)(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); typedef void (GSRendererHW::*OO_Ptr)(); @@ -73,7 +73,8 @@ private: class Hacks { - template class HackEntry + template + class HackEntry { public: CRC::Title title; @@ -88,18 +89,19 @@ private: } }; - template class FunctionMap : public GSFunctionMap + template + class FunctionMap : public GSFunctionMap { - std::list >& m_tbl; + std::list>& m_tbl; T GetDefaultFunction(uint32 key) { CRC::Title title = (CRC::Title)(key & 0xffffff); CRC::Region region = (CRC::Region)(key >> 24); - for(const auto &entry : m_tbl) + for (const auto& entry : m_tbl) { - if(entry.title == title && (entry.region == CRC::RegionCount || entry.region == region)) + if (entry.title == title && (entry.region == CRC::RegionCount || entry.region == region)) { return entry.func; } @@ -109,12 +111,15 @@ private: } public: - FunctionMap(std::list >& tbl) : m_tbl(tbl) {} + FunctionMap(std::list>& tbl) + : m_tbl(tbl) + { + } }; - std::list > m_oi_list; - std::list > m_oo_list; - std::list > m_cu_list; + std::list> m_oi_list; + std::list> m_oo_list; + std::list> m_cu_list; FunctionMap m_oi_map; FunctionMap m_oo_map; @@ -131,7 +136,7 @@ private: } m_hacks; - #pragma endregion +#pragma endregion uint16 Interpolate_UV(float alpha, int t0, int t1); float alpha0(int L, int X0, int X1); @@ -139,7 +144,8 @@ private: void SwSpriteRender(); bool CanUseSwSpriteRender(bool allow_64x64_sprite); - template void RoundSpriteOffset(); + template + void RoundSpriteOffset(); protected: GSTextureCache* m_tc; @@ -190,5 +196,5 @@ public: void Draw(); // Called by the texture cache to know if current texture is useful - virtual bool IsDummyTexture() const { return false;} + virtual bool IsDummyTexture() const { return false; } }; diff --git a/plugins/GSdx/Renderers/HW/GSTextureCache.cpp b/plugins/GSdx/Renderers/HW/GSTextureCache.cpp index 632aea6b89..c1f92b9265 100644 --- a/plugins/GSdx/Renderers/HW/GSTextureCache.cpp +++ b/plugins/GSdx/Renderers/HW/GSTextureCache.cpp @@ -31,7 +31,8 @@ GSTextureCache::GSTextureCache(GSRenderer* r) : m_renderer(r) , m_palette_map(r) { - if (theApp.GetConfigB("UserHacks")) { + if (theApp.GetConfigB("UserHacks")) + { UserHacks_HalfPixelOffset = theApp.GetConfigI("UserHacks_HalfPixelOffset") == 1; m_preload_frame = theApp.GetConfigB("preload_frame_with_gs_data"); m_disable_partial_invalidation = theApp.GetConfigB("UserHacks_DisablePartialInvalidation"); @@ -39,7 +40,9 @@ GSTextureCache::GSTextureCache(GSRenderer* r) m_cpu_fb_conversion = theApp.GetConfigB("UserHacks_CPU_FB_Conversion"); m_texture_inside_rt = theApp.GetConfigB("UserHacks_TextureInsideRt"); m_wrap_gs_mem = theApp.GetConfigB("wrap_gs_mem"); - } else { + } + else + { UserHacks_HalfPixelOffset = false; m_preload_frame = false; m_disable_partial_invalidation = false; @@ -77,7 +80,8 @@ void GSTextureCache::RemovePartial() for (int type = 0; type < 2; type++) { - for (auto t : m_dst[type]) delete t; + for (auto t : m_dst[type]) + delete t; m_dst[type].clear(); } @@ -87,9 +91,10 @@ void GSTextureCache::RemoveAll() { m_src.RemoveAll(); - for(int type = 0; type < 2; type++) + for (int type = 0; type < 2; type++) { - for (auto t : m_dst[type]) delete t; + for (auto t : m_dst[type]) + delete t; m_dst[type].clear(); } @@ -99,13 +104,17 @@ void GSTextureCache::RemoveAll() GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette) { - if (!m_can_convert_depth) { + if (!m_can_convert_depth) + { GL_CACHE("LookupDepthSource not supported (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM); - if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos) { + if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos) + { // JackieChan and SVCChaos cause regressions when skipping the draw calls when depth is disabled/not supported. // This way we make sure there are no regressions on D3D as well. return LookupSource(TEX0, TEXA, r); - } else { + } + else + { throw GSDXRecoverableError(); } } @@ -119,26 +128,32 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 uint32 bp = TEX0.TBP0; uint32 psm = TEX0.PSM; - for(auto t : m_dst[DepthStencil]) { - if(t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + for (auto t : m_dst[DepthStencil]) + { + if (t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); - if (t->m_age == 0) { + if (t->m_age == 0) + { // Perfect Match dst = t; break; - } else if (t->m_age == 1) { + } + else if (t->m_age == 1) + { // Better than nothing (Full Spectrum Warrior) dst = t; } } } - if (!dst) { + if (!dst) + { // Retry on the render target (Silent Hill 4) - for(auto t : m_dst[RenderTarget]) { + for (auto t : m_dst[RenderTarget]) + { // FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ??? - if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); dst = t; @@ -147,10 +162,11 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 } } - if (dst) { + if (dst) + { GL_CACHE("TC depth: dst %s hit: %d (0x%x, %s)", to_string(dst->m_type), - dst->m_texture ? dst->m_texture->GetID() : 0, - TEX0.TBP0, psm_str(psm)); + dst->m_texture ? dst->m_texture->GetID() : 0, + TEX0.TBP0, psm_str(psm)); // Create a shared texture source src = new Source(m_renderer, TEX0, TEXA, m_temp, true); @@ -167,12 +183,15 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 // texture cache list. It means that a new Source is created everytime we need it. // If it is too expensive, one could cut memory allocation in Source constructor for this // use case. - if (palette) { + if (palette) + { AttachPaletteToSource(src, psm_s.pal, true); } m_src.m_surfaces.insert(src); - } else { + } + else + { GL_CACHE("TC depth: ERROR miss (0x%x, %s)", TEX0.TBP0, psm_str(psm)); // Possible ? In this case we could call LookupSource // Or just put a basic texture @@ -182,10 +201,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 // Note: might worth to check previous frame // Note: otherwise return NULL and skip the draw - if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos) { + if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos) + { // JackieChan and SVCChaos cause regressions when skipping the draw calls so we reuse the old code for these two. return LookupSource(TEX0, TEXA, r); - } else { + } + else + { // Full Spectrum Warrior: first draw call of cut-scene rendering // The game tries to emulate a texture shuffle with an old depth buffer // (don't exists yet for us due to the cache) @@ -205,7 +227,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con //const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm; // Until DX is fixed - if(psm_s.pal > 0) + if (psm_s.pal > 0) m_renderer->m_mem.m_clut.Read32(TEX0, TEXA); const uint32* clut = m_renderer->m_mem.m_clut; @@ -214,7 +236,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con auto& m = m_src.m_map[TEX0.TBP0 >> 5]; - for(auto i = m.begin(); i != m.end(); ++i) + for (auto i = m.begin(); i != m.end(); ++i) { Source* s = *i; @@ -222,11 +244,12 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con continue; // Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check - if (!s->m_target) { + if (!s->m_target) + { // We request a palette texture (psm_s.pal). If the texture was // converted by the CPU (!s->m_palette), we need to ensure // palette content is the same. - if (psm_s.pal > 0 && !s->m_palette && !s->ClutMatch({ clut, psm_s.pal })) + if (psm_s.pal > 0 && !s->m_palette && !s->ClutMatch({clut, psm_s.pal})) continue; // We request a 24/16 bit RGBA texture. Alpha expansion was done by @@ -248,9 +271,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con int y_offset = 0; #ifdef DISABLE_HW_TEXTURE_CACHE - if( 0 ) + if (0) #else - if(src == NULL) + if (src == NULL) #endif { uint32 bp = TEX0.TBP0; @@ -259,7 +282,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con uint32 bw = TEX0.TBW; int tw = 1 << TEX0.TW; int th = 1 << TEX0.TH; - uint32 bp_end = psm_s.bn(tw - 1, th - 1, bp, bw); // Valid only for color formats + uint32 bp_end = psm_s.bn(tw - 1, th - 1, bp, bw); // Valid only for color formats // Arc the Lad finds the wrong surface here when looking for a depth stencil. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. @@ -269,8 +292,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con bool texture_inside_rt = ShallSearchTextureInsideRt(); - for(auto t : m_dst[RenderTarget]) { - if(t->m_used && t->m_dirty.empty()) { + for (auto t : m_dst[RenderTarget]) + { + if (t->m_used && t->m_dirty.empty()) + { // Typical bug (MGS3 blue cloud): // 1/ RT used as 32 bits => alpha channel written // 2/ RT used as 24 bits => no update of alpha channel @@ -280,7 +305,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // Solution: consider the RT as 32 bits if the alpha was used in the past uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; - if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { + if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) + { // It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will be slow but // 1/ it just works :) // 2/ even with upscaling @@ -295,8 +321,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con dst = t; break; - - } else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) { + } + else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) + { // Detect half of the render target (fix snow engine game) // Target Page (8KB) have always a width of 64 pixels // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 @@ -304,8 +331,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con dst = t; break; - - } else if (texture_inside_rt && psm == PSM_PSMCT32 && t->m_TEX0.PSM == psm && t->m_TEX0.TBP0 < bp && t->m_end_block >= bp) { + } + else if (texture_inside_rt && psm == PSM_PSMCT32 && t->m_TEX0.PSM == psm && t->m_TEX0.TBP0 < bp && t->m_end_block >= bp) + { // Only PSMCT32 to limit false hits // Check if it is possible to hit with valid offset on the given Target @@ -346,7 +374,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // SWEEP SEARCH: offset - TexInsideRtCacheEntry entry = { psm, bp, bp_end, bw, t->m_TEX0.TBP0, t->m_end_block, false, 0, 0 }; + TexInsideRtCacheEntry entry = {psm, bp, bp_end, bw, t->m_TEX0.TBP0, t->m_end_block, false, 0, 0}; for (int candidate_x_offset = 0; candidate_x_offset < t->m_valid.z; ++candidate_x_offset) { @@ -404,23 +432,28 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // // Sigh... They don't help us. - if (dst == NULL && m_can_convert_depth) { + if (dst == NULL && m_can_convert_depth) + { // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // // 1/ Check only current frame, I guess it is only used as a postprocessing effect - for(auto t : m_dst[DepthStencil]) { - if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + for (auto t : m_dst[DepthStencil]) + { + if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the // rescaling of the current function. - if (psm_s.bpp > 8) { + if (psm_s.bpp > 8) + { GIFRegTEX0 depth_TEX0; depth_TEX0.u32[0] = TEX0.u32[0] | (0x30u << 20u); depth_TEX0.u32[1] = TEX0.u32[1]; return LookupDepthSource(depth_TEX0, TEXA, r); - } else { + } + else + { return LookupDepthSource(TEX0, TEXA, r, true); } } @@ -430,28 +463,33 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con bool new_source = false; - if(src == NULL) + if (src == NULL) { #ifdef ENABLE_OGL_DEBUG - if (dst) { + if (dst) + { GL_CACHE("TC: dst %s hit (%s): %d (0x%x, %s)", to_string(dst->m_type), half_right ? "half" : "full", - dst->m_texture ? dst->m_texture->GetID() : 0, - TEX0.TBP0, psm_str(TEX0.PSM)); - } else { + dst->m_texture ? dst->m_texture->GetID() : 0, + TEX0.TBP0, psm_str(TEX0.PSM)); + } + else + { GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM)); } #endif src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset); new_source = true; - - } else { + } + else + { GL_CACHE("TC: src hit: %d (0x%x, 0x%x, %s)", - src->m_texture ? src->m_texture->GetID() : 0, - TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, - psm_str(TEX0.PSM)); + src->m_texture ? src->m_texture->GetID() : 0, + TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, + psm_str(TEX0.PSM)); } - if (src->m_palette && !new_source && !src->ClutMatch({ clut, psm_s.pal })) { + if (src->m_palette && !new_source && !src->ClutMatch({clut, psm_s.pal})) + { AttachPaletteToSource(src, psm_s.pal, true); } @@ -497,10 +535,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int Target* dst = NULL; auto& list = m_dst[type]; - for(auto i = list.begin(); i != list.end(); ++i) { + for (auto i = list.begin(); i != list.end(); ++i) + { Target* t = *i; - if(bp == t->m_TEX0.TBP0) + if (bp == t->m_TEX0.TBP0) { list.MoveFront(i.Index()); @@ -513,32 +552,40 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } } - if (dst) { + if (dst) + { GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, %s)", to_string(type), w, h, dst->m_texture->GetID(), bp, psm_str(TEX0.PSM)); dst->Update(); dst->m_dirty_alpha |= (psm_s.trbpp == 32 && (fbmask & 0xFF000000) != 0xFF000000) || (psm_s.trbpp == 16); - - } else if (m_can_convert_depth) { + } + else if (m_can_convert_depth) + { int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil; // Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick // some bad data. Target* dst_match = nullptr; - for(auto t : m_dst[rev_type]) { - if (bp == t->m_TEX0.TBP0) { - if (t->m_age == 0) { + for (auto t : m_dst[rev_type]) + { + if (bp == t->m_TEX0.TBP0) + { + if (t->m_age == 0) + { dst_match = t; break; - } else if (t->m_age == 1) { + } + else if (t->m_age == 1) + { dst_match = t; } } } - if (dst_match) { + if (dst_match) + { GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, w, h); @@ -547,10 +594,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int int shader; bool fmt_16_bits = (psm_s.bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp == 16); - if (type == DepthStencil) { + if (type == DepthStencil) + { GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM)); shader = (fmt_16_bits) ? ShaderConvert_RGB5A1_TO_FLOAT16 : ShaderConvert_RGBA8_TO_FLOAT32 + psm_s.fmt; - } else { + } + else + { GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM)); shader = (fmt_16_bits) ? ShaderConvert_FLOAT16_TO_RGB5A1 : ShaderConvert_FLOAT32_TO_RGBA8; } @@ -558,7 +608,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } } - if(dst == NULL) + if (dst == NULL) { GL_CACHE("TC: Lookup Target(%s) %dx%d, miss (0x%x, %s)", to_string(type), w, h, bp, psm_str(TEX0.PSM)); @@ -576,7 +626,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int // From a performance point of view, it might cost a little on big upscaling // but normally few RT are miss so it must remain reasonable. bool supported_fmt = m_can_convert_depth || psm_s.depth == 0; - if (m_preload_frame && TEX0.TBW > 0 && supported_fmt) { + if (m_preload_frame && TEX0.TBW > 0 && supported_fmt) + { GL_INS("Preloading the RT DATA"); // RT doesn't have height but if we use a too big value, we will read outside of the GS memory. int page0 = TEX0.TBP0 >> 5; @@ -587,18 +638,20 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, max_h), TEX0.PSM)); dst->Update(); - } else { + } + else + { #ifdef ENABLE_OGL_DEBUG switch (type) { - case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; - case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture); break; - default: break; + case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; + case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture); break; + default: break; } #endif } } ScaleTexture(dst->m_texture); - if(used) + if (used) { dst->m_used = true; } @@ -620,8 +673,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int #endif // Let's try to find a perfect frame that contains valid data - for(auto t : m_dst[RenderTarget]) { - if(bp == t->m_TEX0.TBP0 && t->m_end_block >= bp) { + for (auto t : m_dst[RenderTarget]) + { + if (bp == t->m_TEX0.TBP0 && t->m_end_block >= bp) + { dst = t; GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM)); @@ -631,9 +686,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } // 2nd try ! Try to find a frame that include the bp - if (dst == NULL) { - for(auto t : m_dst[RenderTarget]) { - if (t->m_TEX0.TBP0 < bp && bp <= t->m_end_block) { + if (dst == NULL) + { + for (auto t : m_dst[RenderTarget]) + { + if (t->m_TEX0.TBP0 < bp && bp <= t->m_end_block) + { dst = t; GL_CACHE("TC: Lookup Frame %dx%d, inclusive hit: %d (0x%x, took 0x%x -> 0x%x %s)", w, h, t->m_texture->GetID(), bp, t->m_TEX0.TBP0, t->m_end_block, psm_str(TEX0.PSM)); @@ -644,9 +702,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } // 3rd try ! Try to find a frame that doesn't contain valid data (honestly I'm not sure we need to do it) - if (dst == NULL) { - for(auto t : m_dst[RenderTarget]) { - if(bp == t->m_TEX0.TBP0) { + if (dst == NULL) + { + for (auto t : m_dst[RenderTarget]) + { + if (bp == t->m_TEX0.TBP0) + { dst = t; GL_CACHE("TC: Lookup Frame %dx%d, empty hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM)); @@ -681,7 +742,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } #endif - if(dst == NULL) + if (dst == NULL) { GL_CACHE("TC: Lookup Frame %dx%d, miss (0x%x %s)", w, h, bp, psm_str(TEX0.PSM)); @@ -690,7 +751,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage - if (m_preload_frame) { + if (m_preload_frame) + { // Load GS data into frame. Game can directly uploads a background or the full image in // "CTRC" buffer. It will also avoid various black screen issue in gs dump. // @@ -721,15 +783,15 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp) return; auto& list = m_dst[type]; - for(auto i = list.begin(); i != list.end(); ++i) + for (auto i = list.begin(); i != list.end(); ++i) { Target* t = *i; - if(bp == t->m_TEX0.TBP0) + if (bp == t->m_TEX0.TBP0) { GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); list.erase(i); delete t; @@ -737,25 +799,25 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp) break; } } - } // Goal: invalidate data sent to the GPU when the source (GS memory) is modified // Called each time you want to write to the GS memory void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target) { - if(!off) return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549. + if (!off) + return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549. uint32 bp = off->bp; uint32 bw = off->bw; uint32 psm = off->psm; - if(!target) + if (!target) { // Remove Source that have same BP as the render target (color&dss) // rendering will dirty the copy auto& list = m_src.m_map[bp >> 5]; - for(auto i = list.begin(); i != list.end(); ) + for (auto i = list.begin(); i != list.end();) { Source* s = *i; ++i; @@ -768,17 +830,18 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b } uint32 bbp = bp + bw * 0x10; - if (bw >= 16 && bbp < 16384) { + if (bw >= 16 && bbp < 16384) + { // Detect half of the render target (fix snow engine game) // Target Page (8KB) have always a width of 64 pixels // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 auto& list = m_src.m_map[bbp >> 5]; - for(auto i = list.begin(); i != list.end(); ) + for (auto i = list.begin(); i != list.end();) { Source* s = *i; ++i; - if(GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) + if (GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM)) { m_src.RemoveAt(s); } @@ -788,13 +851,15 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b // Haunting ground write frame buffer 0x3000 and expect to write data to 0x3380 // Note: the game only does a 0 direct write. If some games expect some real data // we are screwed. - if (m_renderer->m_game.title == CRC::HauntingGround) { - uint32 end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats + if (m_renderer->m_game.title == CRC::HauntingGround) + { + uint32 end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats auto type = RenderTarget; - for(auto t : m_dst[type]) + for (auto t : m_dst[type]) { - if (t->m_TEX0.TBP0 > bp && t->m_end_block <= end_block) { + if (t->m_TEX0.TBP0 > bp && t->m_end_block <= end_block) + { // Haunting ground expect to clean buffer B with a rendering into buffer A. // Situation is quite messy as it would require to extract the data from the buffer A // and to move in buffer B. @@ -804,8 +869,8 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b // // So just clear the damn buffer and forget about it. GL_CACHE("TC: Clear Sub Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); m_renderer->m_dev->ClearRenderTarget(t->m_texture, 0); } } @@ -820,23 +885,23 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b bool found = false; - for(const uint32* p = pages; *p != GSOffset::EOP; p++) + for (const uint32* p = pages; *p != GSOffset::EOP; p++) { uint32 page = *p; auto& list = m_src.m_map[page]; - for(auto i = list.begin(); i != list.end(); ) + for (auto i = list.begin(); i != list.end();) { Source* s = *i; ++i; - if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM)) + if (GSUtil::HasSharedBits(psm, s->m_TEX0.PSM)) { bool b = bp == s->m_TEX0.TBP0; - if(!s->m_target) + if (!s->m_target) { - if(m_disable_partial_invalidation && s->m_repeating) + if (m_disable_partial_invalidation && s->m_repeating) { m_src.RemoveAt(s); } @@ -845,10 +910,10 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b uint32* RESTRICT valid = s->m_valid; // Invalidate data of input texture - if(s->m_repeating) + if (s->m_repeating) { // Note: very hot path on snowbling engine game - for(const GSVector2i& k : s->m_p2t[page]) + for (const GSVector2i& k : s->m_p2t[page]) { valid[k.x] &= k.y; } @@ -871,7 +936,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b if (!b) b = s->Overlaps(bp, bw, psm, rect); - if(b) + if (b) { m_src.RemoveAt(s); } @@ -880,12 +945,13 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b } } - if(!target) return; + if (!target) + return; - for(int type = 0; type < 2; type++) + for (int type = 0; type < 2; type++) { auto& list = m_dst[type]; - for(auto i = list.begin(); i != list.end(); ) + for (auto i = list.begin(); i != list.end();) { auto j = i++; Target* t = *j; @@ -897,13 +963,13 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b // Major issues are expected if the game try to reuse the target // If we dirty the RT, it will likely upload partially invalid data. // (The color on the previous example) - if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { - if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) + if (!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { GL_CACHE("TC: Dirty Target(%s) %d (0x%x) r(%d,%d,%d,%d)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0, r.x, r.y, r.z, r.w); + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0, r.x, r.y, r.z, r.w); t->m_dirty.push_back(GSDirtyRect(r, psm)); t->m_TEX0.TBW = bw; } @@ -911,12 +977,14 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b { list.erase(j); GL_CACHE("TC: Remove Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); delete t; continue; } - } else if (bp == t->m_TEX0.TBP0) { + } + else if (bp == t->m_TEX0.TBP0) + { // EE writes the ALPHA channel. Mark it as invalid for // the texture cache. Otherwise it will generate a wrong // hit on the texture cache. @@ -925,21 +993,22 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b } // GH: Try to detect texture write that will overlap with a target buffer - if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) { + if (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) + { if (bp < t->m_TEX0.TBP0) { uint32 rowsize = bw * 8192; uint32 offset = (uint32)((t->m_TEX0.TBP0 - bp) * 256); - if(rowsize > 0 && offset % rowsize == 0) + if (rowsize > 0 && offset % rowsize == 0) { int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; - if(r.bottom > y) + if (r.bottom > y) { GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); // TODO: do not add this rect above too t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm)); t->m_TEX0.TBW = bw; @@ -955,17 +1024,19 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b // Ben 10 Alien Force : Vilgax Attacks uses a small temporary target for multiple textures (different bw) // It is too complex to handle, and purpose of the code was to handle FMV (large bw). So let's skip small // (128 pixels) target - if (bw > 2 && t->m_TEX0.TBW == bw && t->Inside(bp, bw, psm, rect) && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { + if (bw > 2 && t->m_TEX0.TBW == bw && t->Inside(bp, bw, psm, rect) && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) + { uint32 rowsize = bw * 8192u; uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256); - if(rowsize > 0 && offset % rowsize == 0) { + if (rowsize > 0 && offset % rowsize == 0) + { int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; GL_CACHE("TC: Dirty in the middle of Target(%s) %d (0x%x->0x%x) pos(%d,%d => %d,%d) bw:%u", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0, t->m_end_block, - r.left, r.top + y, r.right, r.bottom + y, bw); + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0, t->m_end_block, + r.left, r.top + y, r.right, r.bottom + y, bw); t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top + y, r.right, r.bottom + y), psm)); t->m_TEX0.TBW = bw; @@ -987,11 +1058,15 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) //uint32 bw = off->bw; // No depth handling please. - if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S) { + if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S) + { GL_INS("ERROR: InvalidateLocalMem depth format isn't supported (%d,%d to %d,%d)", r.x, r.y, r.z, r.w); - if (m_can_convert_depth) { - for(auto t : m_dst[DepthStencil]) { - if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { + if (m_can_convert_depth) + { + for (auto t : m_dst[DepthStencil]) + { + if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { if (GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) Read(t, r.rintersect(t->m_valid)); } @@ -1004,11 +1079,11 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) // It works for all the games mentioned below and fixes a couple of other ones as well // (Busen0: Wizardry and Chaos Legion). // Also in a few games the below code ran the Grandia3 case when it shouldn't :p - for(auto t : m_dst[RenderTarget]) + for (auto t : m_dst[RenderTarget]) { if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S) { - if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { // GH Note: Read will do a StretchRect and then will sizzle data to the GS memory // t->m_valid will do the full target texture whereas r.intersect(t->m_valid) will be limited @@ -1020,16 +1095,21 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) // note: r.rintersect breaks Wizardry and Chaos Legion // Read(t, t->m_valid) works in all tested games but is very slow in GUST titles >< - if (GSTextureCache::m_disable_partial_invalidation) { + if (GSTextureCache::m_disable_partial_invalidation) + { Read(t, r.rintersect(t->m_valid)); - } else { + } + else + { if (r.x == 0 && r.y == 0) // Full screen read? Read(t, t->m_valid); else // Block level read? Read(t, r.rintersect(t->m_valid)); } } - } else { + } + else + { GL_INS("ERROR: InvalidateLocalMem target is a depth format"); } } @@ -1044,14 +1124,14 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) // if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S) // { - // if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + // if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) // { - // if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) + // if (GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) // { // Read(t, r.rintersect(t->m_valid)); // return; // } - // else if(psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S)) + // else if (psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S)) // { // // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit // Read(t, GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid)); @@ -1075,18 +1155,18 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) // } // // Grandia3, FFX, FFX-2 pause menus. t->m_TEX0.TBP0 magic number checks because otherwise kills xs2 videos - // if( (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && (bp > t->m_TEX0.TBP0) ) - // && ((t->m_TEX0.TBP0 == 0) || (t->m_TEX0.TBP0==3328) || (t->m_TEX0.TBP0==3584) )) + // if ((GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && (bp > t->m_TEX0.TBP0)) + // && ((t->m_TEX0.TBP0 == 0) || (t->m_TEX0.TBP0==3328) || (t->m_TEX0.TBP0==3584))) // { // //printf("first : %d-%d child : %d-%d\n", psm, bp, t->m_TEX0.PSM, t->m_TEX0.TBP0); // uint32 rowsize = bw * 8192; // uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256); - // if(rowsize > 0 && offset % rowsize == 0) + // if (rowsize > 0 && offset % rowsize == 0) // { // int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; - // if(y < ymin && y < 512) + // if (y < ymin && y < 512) // { // rt2 = t; // ymin = y; @@ -1095,7 +1175,7 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r) // } // } //} - //if(rt2) + //if (rt2) //{ // Read(rt2, GSVector4i(r.left, r.top + ymin, r.right, r.bottom + ymin)); //} @@ -1115,17 +1195,20 @@ void GSTextureCache::InvalidateVideoMemSubTarget(GSTextureCache::Target* rt) auto& list = m_dst[RenderTarget]; - for(auto i = list.begin(); i != list.end(); ) { + for (auto i = list.begin(); i != list.end();) + { Target* t = *i; - if((t->m_TEX0.TBP0 > rt->m_TEX0.TBP0) && (t->m_end_block < rt->m_end_block) && (t->m_TEX0.TBW == rt->m_TEX0.TBW) - && (t->m_TEX0.TBP0 < t->m_end_block)) { + if ((t->m_TEX0.TBP0 > rt->m_TEX0.TBP0) && (t->m_end_block < rt->m_end_block) && (t->m_TEX0.TBW == rt->m_TEX0.TBW) && (t->m_TEX0.TBP0 < t->m_end_block)) + { GL_INS("InvalidateVideoMemSubTarget: rt 0x%x -> 0x%x, sub rt 0x%x -> 0x%x", - rt->m_TEX0.TBP0, rt->m_end_block, t->m_TEX0.TBP0, t->m_end_block); + rt->m_TEX0.TBP0, rt->m_end_block, t->m_TEX0.TBP0, t->m_end_block); i = list.erase(i); delete t; - } else { + } + else + { ++i; } } @@ -1136,18 +1219,22 @@ void GSTextureCache::IncAge() int maxage = m_src.m_used ? 3 : 30; // You can't use m_map[page] because Source* are duplicated on several pages. - for(auto i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); ) + for (auto i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end();) { Source* s = *i; - if(s->m_shared_texture) { + if (s->m_shared_texture) + { // Shared textures are temporary only added in the hash set but not in the texture // cache list therefore you can't use RemoveAt i = m_src.m_surfaces.erase(i); delete s; - } else { + } + else + { ++i; - if (++s->m_age > maxage) { + if (++s->m_age > maxage) + { m_src.RemoveAt(s); } } @@ -1161,10 +1248,10 @@ void GSTextureCache::IncAge() // Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions. maxage = 400; // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it - for(int type = 0; type < 2; type++) + for (int type = 0; type < 2; type++) { auto& list = m_dst[type]; - for(auto i = list.begin(); i != list.end(); ) + for (auto i = list.begin(); i != list.end();) { Target* t = *i; @@ -1172,20 +1259,23 @@ void GSTextureCache::IncAge() // probability that game will do it on the current RT. // Variable is cleared here to avoid issue with game that uses a 16 bits // render target - if (t->m_age > 0) { + if (t->m_age > 0) + { // GoW2 uses the effect at the start of the frame t->m_32_bits_fmt = false; } - if(++t->m_age > maxage) + if (++t->m_age > maxage) { i = list.erase(i); GL_CACHE("TC: Remove Target(%s): %d (0x%x) due to age", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); delete t; - } else { + } + else + { ++i; } } @@ -1226,7 +1316,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_texture->SetScale(scale); src->m_end_block = dst->m_end_block; - if (psm.pal > 0) { + if (psm.pal > 0) + { // Attach palette for GPU texture conversion AttachPaletteToSource(src, psm.pal, true); } @@ -1265,18 +1356,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY; bool is_8bits = TEX0.PSM == PSM_PSMT8; - if (is_8bits) { + if (is_8bits) + { GL_INS("Reading RT as a packed-indexed 8 bits format"); shader = ShaderConvert_RGBA_TO_8I; } #ifdef ENABLE_OGL_DEBUG - if (TEX0.PSM == PSM_PSMT4) { + if (TEX0.PSM == PSM_PSMT4) + { GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported"); } #endif - if (GSLocalMemory::m_psm[TEX0.PSM].bpp > 8) { + if (GSLocalMemory::m_psm[TEX0.PSM].bpp > 8) + { src->m_32_bits_fmt = dst->m_32_bits_fmt; } @@ -1293,7 +1387,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con int w = (int)(dst->m_texture->GetScale().x * tw); int h = (int)(dst->m_texture->GetScale().y * th); - if (is_8bits) { + if (is_8bits) + { // Unscale 8 bits textures, quality won't be nice but format is really awful w = tw; h = th; @@ -1303,7 +1398,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // pitch conversion - if(dst->m_TEX0.TBW != TEX0.TBW) // && dst->m_TEX0.PSM == TEX0.PSM + if (dst->m_TEX0.TBW != TEX0.TBW) // && dst->m_TEX0.PSM == TEX0.PSM { // This is so broken :p ////Better not do the code below, "fixes" like every game that ever gets here.. @@ -1330,10 +1425,10 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con //int dw = (int)TEX0.TBW << 6; //int dh = 1 << TEX0.TH; - //if(sw != 0) - //for(int dy = 0; dy < dh; dy += blockHeight) + //if (sw != 0) + //for (int dy = 0; dy < dh; dy += blockHeight) //{ - // for(int dx = 0; dx < dw; dx += blockWidth) + // for (int dx = 0; dx < dw; dx += blockWidth) // { // int off = dy * dw / blockHeight + dx; @@ -1349,11 +1444,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // } //} } - else if(tw < 1024) + else if (tw < 1024) { // FIXME: timesplitters blurs the render target by blending itself over a couple of times hack = true; - //if(tw == 256 && th == 128 && (TEX0.TBP0 == 0 || TEX0.TBP0 == 0x00e00)) + //if (tw == 256 && th == 128 && (TEX0.TBP0 == 0 || TEX0.TBP0 == 0x00e00)) //{ // delete src; // return NULL; @@ -1388,7 +1483,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // 2/ It doesn't support rescaling of the RT (tw = 1024) // Maybe it will be more easy to just round the UV value in the Vertex Shader - if (!is_8bits) { + if (!is_8bits) + { // 8 bits handling is special due to unscaling. It is better to not execute this code if (w > dstsize.x) { @@ -1416,7 +1512,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format // However it is different here. We want to reuse a Render Target as a texture. // Because the texture is already on the GPU, CPU can't convert it. - if (psm.pal > 0) { + if (psm.pal > 0) + { AttachPaletteToSource(src, psm.pal, true); } // Disable linear filtering for various GS post-processing effect @@ -1447,13 +1544,16 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if ((sRect == dRect).alltrue() && !shader) { - if (half_right) { + if (half_right) + { // You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT // which is arbitrary set to 1280 (biggest RT used by GS). h/w are based on the input texture // so the only reliable way to find the real size of the target is to use the TBW value. float real_width = dst->m_TEX0.TBW * 64u * dst->m_texture->GetScale().x; - m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i((int)(real_width/2.0f), 0, (int)real_width, h)); - } else { + m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i((int)(real_width / 2.0f), 0, (int)real_width, h)); + } + else + { m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i(0, 0, w, h)); // <= likely wrong dstsize.x could be bigger than w } } @@ -1463,14 +1563,15 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con sRect.z /= sTex->GetWidth(); sRect.w /= sTex->GetHeight(); - if (half_right) { - sRect.x = sRect.z/2.0f; + if (half_right) + { + sRect.x = sRect.z / 2.0f; } m_renderer->m_dev->StretchRect(sTex, sRect, dTex, dRect, shader, linear); } - if( src->m_texture ) + if (src->m_texture) src->m_texture->SetScale(scale); else ASSERT(0); @@ -1481,25 +1582,25 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con float modx = 0.0f; float mody = 0.0f; - if(UserHacks_HalfPixelOffset && hack) + if (UserHacks_HalfPixelOffset && hack) { switch(m_renderer->GetUpscaleMultiplier()) { - case 0: //Custom Resolution - { - const float offset = 0.2f; - modx = dst->m_texture->GetScale().x + offset; - mody = dst->m_texture->GetScale().y + offset; - dst->m_texture->LikelyOffset = true; - break; - } - case 2: modx = 2.2f; mody = 2.2f; dst->m_texture->LikelyOffset = true; break; - case 3: modx = 3.1f; mody = 3.1f; dst->m_texture->LikelyOffset = true; break; - case 4: modx = 4.2f; mody = 4.2f; dst->m_texture->LikelyOffset = true; break; - case 5: modx = 5.3f; mody = 5.3f; dst->m_texture->LikelyOffset = true; break; - case 6: modx = 6.2f; mody = 6.2f; dst->m_texture->LikelyOffset = true; break; - case 8: modx = 8.2f; mody = 8.2f; dst->m_texture->LikelyOffset = true; break; - default: modx = 0.0f; mody = 0.0f; dst->m_texture->LikelyOffset = false; break; + case 0: //Custom Resolution + { + const float offset = 0.2f; + modx = dst->m_texture->GetScale().x + offset; + mody = dst->m_texture->GetScale().y + offset; + dst->m_texture->LikelyOffset = true; + break; + } + case 2: modx = 2.2f; mody = 2.2f; dst->m_texture->LikelyOffset = true; break; + case 3: modx = 3.1f; mody = 3.1f; dst->m_texture->LikelyOffset = true; break; + case 4: modx = 4.2f; mody = 4.2f; dst->m_texture->LikelyOffset = true; break; + case 5: modx = 5.3f; mody = 5.3f; dst->m_texture->LikelyOffset = true; break; + case 6: modx = 6.2f; mody = 6.2f; dst->m_texture->LikelyOffset = true; break; + case 8: modx = 8.2f; mody = 8.2f; dst->m_texture->LikelyOffset = true; break; + default: modx = 0.0f; mody = 0.0f; dst->m_texture->LikelyOffset = false; break; } } @@ -1513,9 +1614,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_texture = m_renderer->m_dev->CreateTexture(tw, th, Get8bitFormat()); AttachPaletteToSource(src, psm.pal, true); } - else { + else + { src->m_texture = m_renderer->m_dev->CreateTexture(tw, th); - if (psm.pal > 0) { + if (psm.pal > 0) + { AttachPaletteToSource(src, psm.pal, false); } } @@ -1538,13 +1641,13 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int t->m_type = type; - if(type == RenderTarget) + if (type == RenderTarget) { t->m_texture = m_renderer->m_dev->CreateSparseRenderTarget(w, h); t->m_used = true; // FIXME } - else if(type == DepthStencil) + else if (type == DepthStencil) { t->m_texture = m_renderer->m_dev->CreateSparseDepthStencil(w, h); } @@ -1557,24 +1660,28 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int void GSTextureCache::PrintMemoryUsage() { #ifdef ENABLE_OGL_DEBUG - uint32 tex = 0; + uint32 tex = 0; uint32 tex_rt = 0; - uint32 rt = 0; - uint32 dss = 0; - for(auto s : m_src.m_surfaces) { - if(s && !s->m_shared_texture) { - if(s->m_target) + uint32 rt = 0; + uint32 dss = 0; + for (auto s : m_src.m_surfaces) + { + if (s && !s->m_shared_texture) + { + if (s->m_target) tex_rt += s->m_texture->GetMemUsage(); else - tex += s->m_texture->GetMemUsage(); + tex += s->m_texture->GetMemUsage(); } } - for(auto t : m_dst[RenderTarget]) { - if(t) + for (auto t : m_dst[RenderTarget]) + { + if (t) rt += t->m_texture->GetMemUsage(); } - for(auto t : m_dst[DepthStencil]) { - if(t) + for (auto t : m_dst[DepthStencil]) + { + if (t) dss += t->m_texture->GetMemUsage(); } @@ -1621,7 +1728,7 @@ bool GSTextureCache::Surface::Overlaps(uint32 bp, uint32 bw, uint32 psm, const G // Valid only for color formats. uint32 const end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw); return (m_TEX0.TBP0 <= bp && bp <= m_end_block) - || (m_TEX0.TBP0 <= end_block && end_block <= m_end_block); + || (m_TEX0.TBP0 <= end_block && end_block <= m_end_block); } // GSTextureCache::Source @@ -1640,15 +1747,17 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR m_TEX0 = TEX0; m_TEXA = TEXA; - if (dummy_container) { + if (dummy_container) + { // Dummy container only contain a m_texture that is a pointer to another source. m_write.rect = NULL; m_write.count = 0; m_repeating = false; - - } else { + } + else + { memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0)); memset(m_valid, 0, sizeof(m_valid)); @@ -1658,7 +1767,7 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR m_repeating = m_TEX0.IsRepeating(); - if(m_repeating) + if (m_repeating) { m_p2t = r->m_mem.GetPage2TileMap(m_TEX0); } @@ -1677,7 +1786,7 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer) { Surface::UpdateAge(); - if(layer == 0 && (m_complete || m_target)) + if (layer == 0 && (m_complete || m_target)) { return; } @@ -1689,7 +1798,7 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer) GSVector4i r = rect.ralign(bs); - if(layer == 0 && r.eq(GSVector4i(0, 0, tw, th))) + if (layer == 0 && r.eq(GSVector4i(0, 0, tw, th))) { m_complete = true; // lame, but better than nothing } @@ -1698,24 +1807,24 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer) uint32 blocks = 0; - if(m_repeating) + if (m_repeating) { - for(int y = r.top; y < r.bottom; y += bs.y) + for (int y = r.top; y < r.bottom; y += bs.y) { uint32 base = off->block.row[y >> 3u]; - for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) + for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) { uint32 block = base + off->block.col[x >> 3u]; - if(block < MAX_BLOCKS || m_wrap_gs_mem) + if (block < MAX_BLOCKS || m_wrap_gs_mem) { uint32 addr = (i >> 3u) % MAX_BLOCKS; uint32 row = addr >> 5u; uint32 col = 1 << (addr & 31u); - if((m_valid[row] & col) == 0) + if ((m_valid[row] & col) == 0) { m_valid[row] |= col; @@ -1729,22 +1838,22 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer) } else { - for(int y = r.top; y < r.bottom; y += bs.y) + for (int y = r.top; y < r.bottom; y += bs.y) { uint32 base = off->block.row[y >> 3u]; - for(int x = r.left; x < r.right; x += bs.x) + for (int x = r.left; x < r.right; x += bs.x) { uint32 block = base + off->block.col[x >> 3u]; - if(block < MAX_BLOCKS || m_wrap_gs_mem) + if (block < MAX_BLOCKS || m_wrap_gs_mem) { block %= MAX_BLOCKS; uint32 row = block >> 5u; uint32 col = 1 << (block & 31u); - if((m_valid[row] & col) == 0) + if ((m_valid[row] & col) == 0) { m_valid[row] |= col; @@ -1757,7 +1866,7 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer) } } - if(blocks > 0) + if (blocks > 0) { m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_palette ? 2 : 0)); @@ -1790,18 +1899,18 @@ void GSTextureCache::Source::Write(const GSVector4i& r, int layer) { m_write.rect[m_write.count++] = r; - while(m_write.count >= 2) + while (m_write.count >= 2) { GSVector4i& a = m_write.rect[m_write.count - 2]; GSVector4i& b = m_write.rect[m_write.count - 1]; - if((a == b.zyxw()).mask() == 0xfff0) + if ((a == b.zyxw()).mask() == 0xfff0) { a.right = b.right; // extend right m_write.count--; } - else if((a == b.xwzy()).mask() == 0xff0f) + else if ((a == b.xwzy()).mask() == 0xff0f) { a.bottom = b.bottom; // extend down @@ -1813,7 +1922,7 @@ void GSTextureCache::Source::Write(const GSVector4i& r, int layer) } } - if(m_write.count > 2) + if (m_write.count > 2) { Flush(1, layer); } @@ -1841,7 +1950,7 @@ void GSTextureCache::Source::Flush(uint32 count, int layer) GSLocalMemory::readTexture rtx = psm.rtx; - if(m_palette) + if (m_palette) { pitch >>= 2; rtx = psm.rtxP; @@ -1849,11 +1958,11 @@ void GSTextureCache::Source::Flush(uint32 count, int layer) uint8* buff = m_temp; - for(uint32 i = 0; i < count; i++) + for (uint32 i = 0; i < count; i++) { GSVector4i r = m_write.rect[i]; - if((r > tr).mask() & 0xff00) + if ((r > tr).mask() & 0xff00) { (mem.*rtx)(off, r, buff, pitch, m_TEXA); @@ -1863,7 +1972,7 @@ void GSTextureCache::Source::Flush(uint32 count, int layer) { GSTexture::GSMap m; - if(m_texture->Map(m, &r, layer)) + if (m_texture->Map(m, &r, layer)) { (mem.*rtx)(off, r, m.bits, m.pitch, m_TEXA); @@ -1878,7 +1987,7 @@ void GSTextureCache::Source::Flush(uint32 count, int layer) } } - if(count < m_write.count) + if (count < m_write.count) { // Warning src and destination overlap. Memmove must be used instead of memcpy memmove(&m_write.rect[0], &m_write.rect[count], (m_write.count - count) * sizeof(m_write.rect[0])); @@ -1887,7 +1996,8 @@ void GSTextureCache::Source::Flush(uint32 count, int layer) m_write.count -= count; } -bool GSTextureCache::Source::ClutMatch(PaletteKey palette_key) { +bool GSTextureCache::Source::ClutMatch(PaletteKey palette_key) +{ return PaletteKeyEqual()(palette_key, m_palette_obj->GetPaletteKey()); } @@ -1927,15 +2037,19 @@ void GSTextureCache::Target::Update() GSVector4i r = m_dirty.GetDirtyRectAndClear(m_TEX0, t_size); - if (r.rempty()) return; + if (r.rempty()) + return; // No handling please - if ((m_type == DepthStencil) && !m_depth_supported) { + if ((m_type == DepthStencil) && !m_depth_supported) + { // do the most likely thing a direct write would do, clear it GL_INS("ERROR: Update DepthStencil dummy"); return; - } else if (m_type == DepthStencil && m_renderer->m_game.title == CRC::FFX2) { + } + else if (m_type == DepthStencil && m_renderer->m_game.title == CRC::FFX2) + { GL_INS("ERROR: bad invalidation detected, depth buffer will be cleared"); // FFX2 menu. Invalidation of the depth is wrongly done and only the first // page is invalidated. Technically a CRC hack will be better but I don't expect @@ -1964,9 +2078,9 @@ void GSTextureCache::Target::Update() GSTexture::GSMap m; - if(t->Map(m)) + if (t->Map(m)) { - m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA); + m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA); t->Unmap(); } @@ -1982,13 +2096,13 @@ void GSTextureCache::Target::Update() // m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4); // Copy the new GS memory content into the destination texture. - if(m_type == RenderTarget) + if (m_type == RenderTarget) { GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w); m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); } - else if(m_type == DepthStencil) + else if (m_type == DepthStencil) { GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0); @@ -2004,7 +2118,7 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect) m_valid = m_valid.runion(rect); // Block of the bottom right texel of the validity rectangle, last valid block of the texture - m_end_block = GSLocalMemory::m_psm[m_TEX0.PSM].bn(m_valid.z - 1, m_valid.w - 1, m_TEX0.TBP0, m_TEX0.TBW); // Valid only for color formats + m_end_block = GSLocalMemory::m_psm[m_TEX0.PSM].bn(m_valid.z - 1, m_valid.w - 1, m_TEX0.TBP0, m_TEX0.TBW); // Valid only for color formats // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -2015,7 +2129,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* { m_surfaces.insert(s); - if(s->m_target) + if (s->m_target) { // TODO @@ -2028,16 +2142,16 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* } // The source pointer will be stored/duplicated in all m_map[array of pages] - for(size_t i = 0; i < countof(m_pages); i++) + for (size_t i = 0; i < countof(m_pages); i++) { - if(uint32 p = s->m_pages_as_bit[i]) + if (uint32 p = s->m_pages_as_bit[i]) { auto* m = &m_map[i << 5]; auto* e = &s->m_erase_it[i << 5]; unsigned long j; - while(_BitScanForward(&j, p)) + while (_BitScanForward(&j, p)) { // FIXME: this statement could be optimized to a single ASM instruction (instead of 4) // Either BTR (AKA bit test and reset). Depends on the previous instruction. @@ -2052,11 +2166,12 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* void GSTextureCache::SourceMap::RemoveAll() { - for (auto s : m_surfaces) delete s; + for (auto s : m_surfaces) + delete s; m_surfaces.clear(); - for(size_t i = 0; i < countof(m_map); i++) + for (size_t i = 0; i < countof(m_map); i++) { m_map[i].clear(); } @@ -2067,8 +2182,8 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) m_surfaces.erase(s); GL_CACHE("TC: Remove Src Texture: %d (0x%x)", - s->m_texture ? s->m_texture->GetID() : 0, - s->m_TEX0.TBP0); + s->m_texture ? s->m_texture->GetID() : 0, + s->m_TEX0.TBP0); if (s->m_target) { @@ -2077,16 +2192,16 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) } else { - for(size_t i = 0; i < countof(m_pages); i++) + for (size_t i = 0; i < countof(m_pages); i++) { - if(uint32 p = s->m_pages_as_bit[i]) + if (uint32 p = s->m_pages_as_bit[i]) { auto* m = &m_map[i << 5]; const auto* e = &s->m_erase_it[i << 5]; unsigned long j; - while(_BitScanForward(&j, p)) + while (_BitScanForward(&j, p)) { // FIXME: this statement could be optimized to a single ASM instruction (instead of 4) // Either BTR (AKA bit test and reset). Depends on the previous instruction. @@ -2118,26 +2233,32 @@ GSTextureCache::Palette::Palette(const GSRenderer* renderer, uint16 pal, bool ne uint16 palette_size = pal * sizeof(uint32); m_clut = (uint32*)_aligned_malloc(palette_size, 64); memcpy(m_clut, (const uint32*)m_renderer->m_mem.m_clut, palette_size); - if (need_gs_texture) { + if (need_gs_texture) + { InitializeTexture(); } } -GSTextureCache::Palette::~Palette() { +GSTextureCache::Palette::~Palette() +{ m_renderer->m_dev->Recycle(m_tex_palette); _aligned_free(m_clut); } -GSTexture* GSTextureCache::Palette::GetPaletteGSTexture() { +GSTexture* GSTextureCache::Palette::GetPaletteGSTexture() +{ return m_tex_palette; } -GSTextureCache::PaletteKey GSTextureCache::Palette::GetPaletteKey() { - return { m_clut, m_pal }; +GSTextureCache::PaletteKey GSTextureCache::Palette::GetPaletteKey() +{ + return {m_clut, m_pal}; } -void GSTextureCache::Palette::InitializeTexture() { - if (!m_tex_palette) { +void GSTextureCache::Palette::InitializeTexture() +{ + if (!m_tex_palette) + { // A palette texture is always created with dimensions 256x1 (also in the case that m_pal is 16, thus a 16x1 texture // would be enough to store the CLUT data) because the coordinates that the shader uses for // sampling such texture are always normalized by 255. @@ -2157,14 +2278,16 @@ void GSTextureCache::Palette::InitializeTexture() { // it is computed in 16 passes, // 2) The clut can contain many 0s, so as a way to increase the spread of hashing values for small changes in the input clut the hashing function // is using addition in combination with logical XOR operator; The addition constants are large prime numbers, which may help in achieving what intended. -std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey &key) const { +std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey& key) const +{ uint16 pal = key.pal; const uint32* clut = key.clut; ASSERT((pal & 15) == 0); size_t clut_hash = 3831179159; - for (uint16 i = 0; i < pal; i += 16) { + for (uint16 i = 0; i < pal; i += 16) + { clut_hash = (clut_hash + 1488000301) ^ (clut[i ] + 33644011); clut_hash = (clut_hash + 3831179159) ^ (clut[i + 1] + 47627467); clut_hash = (clut_hash + 3659574209) ^ (clut[i + 2] + 577038523); @@ -2190,8 +2313,10 @@ std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey &key) co // GSTextureCache::PaletteKeyEqual -bool GSTextureCache::PaletteKeyEqual::operator()(const PaletteKey &lhs, const PaletteKey &rhs) const { - if (lhs.pal != rhs.pal) { +bool GSTextureCache::PaletteKeyEqual::operator()(const PaletteKey& lhs, const PaletteKey& rhs) const +{ + if (lhs.pal != rhs.pal) + { return false; } @@ -2203,12 +2328,14 @@ bool GSTextureCache::PaletteKeyEqual::operator()(const PaletteKey &lhs, const Pa GSTextureCache::PaletteMap::PaletteMap(const GSRenderer* renderer) : m_renderer(renderer) { - for (auto& map : m_maps) { + for (auto& map : m_maps) + { map.reserve(MAX_SIZE); } } -std::shared_ptr GSTextureCache::PaletteMap::LookupPalette(uint16 pal, bool need_gs_texture) { +std::shared_ptr GSTextureCache::PaletteMap::LookupPalette(uint16 pal, bool need_gs_texture) +{ ASSERT(pal == 16 || pal == 256); // Choose which hash map search into: @@ -2219,13 +2346,15 @@ std::shared_ptr GSTextureCache::PaletteMap::LookupPalet const uint32* clut = (const uint32*)m_renderer->m_mem.m_clut; // Create PaletteKey for searching into map (clut is actually not copied, so do not store this key into the map) - PaletteKey palette_key = { clut, pal }; + PaletteKey palette_key = {clut, pal}; auto it1 = map.find(palette_key); - if (it1 != map.end()) { + if (it1 != map.end()) + { // Clut content match, HIT - if (need_gs_texture && !it1->second->GetPaletteGSTexture()) { + if (need_gs_texture && !it1->second->GetPaletteGSTexture()) + { // Generate GSTexture and upload clut content if needed and not done yet it1->second->InitializeTexture(); } @@ -2234,49 +2363,56 @@ std::shared_ptr GSTextureCache::PaletteMap::LookupPalet // No palette with matching clut content, MISS - if (map.size() > MAX_SIZE) { + if (map.size() > MAX_SIZE) + { // If the map is too big, try to clean it by disposing and removing unused palettes, before adding the new one GL_INS("WARNING, %u-bit PaletteMap (Size %u): Max size %u exceeded, clearing unused palettes.", pal * sizeof(uint32), map.size(), MAX_SIZE); uint32 current_size = map.size(); - for (auto it = map.begin(); it != map.end(); ) { + for (auto it = map.begin(); it != map.end();) + { // If the palette is unused, there is only one shared pointers holding a reference to the unused Palette object, // and this shared pointer is the one stored in the map itself - if (it->second.use_count() <= 1) { + if (it->second.use_count() <= 1) + { // Palette is unused it = map.erase(it); // Erase element from map - // The palette object should now be gone as the shared pointer to the object in the map is deleted + // The palette object should now be gone as the shared pointer to the object in the map is deleted } - else { + else + { ++it; } } uint32 cleared_palette_count = current_size - (uint32)map.size(); - if (cleared_palette_count == 0) { + if (cleared_palette_count == 0) + { GL_INS("ERROR, %u-bit PaletteMap (Size %u): Max size %u exceeded, could not clear any palette, negative performance impact.", pal * sizeof(uint32), map.size(), MAX_SIZE); } - else { + else + { map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing GL_INS("INFO, %u-bit PaletteMap (Size %u): Cleared %u palettes.", pal * sizeof(uint32), map.size(), cleared_palette_count); } } std::shared_ptr palette = std::make_shared(m_renderer, pal, need_gs_texture); - + map.emplace(palette->GetPaletteKey(), palette); GL_CACHE("TC, %u-bit PaletteMap (Size %u): Added new palette.", pal * sizeof(uint32), map.size()); - + return palette; } -void GSTextureCache::PaletteMap::Clear() { - for (auto& map : m_maps) { +void GSTextureCache::PaletteMap::Clear() +{ + for (auto& map : m_maps) + { map.clear(); // Clear all the nodes of the map, deleting Palette objects managed by shared pointers as they should be unused elsewhere map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing } } - diff --git a/plugins/GSdx/Renderers/HW/GSTextureCache.h b/plugins/GSdx/Renderers/HW/GSTextureCache.h index cffaa1c488..dba99b27fd 100644 --- a/plugins/GSdx/Renderers/HW/GSTextureCache.h +++ b/plugins/GSdx/Renderers/HW/GSTextureCache.h @@ -28,7 +28,11 @@ class GSTextureCache { public: - enum {RenderTarget, DepthStencil}; + enum + { + RenderTarget, + DepthStencil + }; class Surface : public GSAlignedClass<32> { @@ -43,7 +47,7 @@ public: uint8* m_temp; bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture bool m_shared_texture; - uint32 m_end_block; // Hint of the surface area. + uint32 m_end_block; // Hint of the surface area. public: Surface(GSRenderer* r, uint8* temp); @@ -54,7 +58,8 @@ public: bool Overlaps(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect); }; - struct PaletteKey { + struct PaletteKey + { const uint32* clut; uint16 pal; }; @@ -86,19 +91,25 @@ public: void InitializeTexture(); }; - struct PaletteKeyHash { + struct PaletteKeyHash + { // Calculate hash - std::size_t operator()(const PaletteKey &key) const; + std::size_t operator()(const PaletteKey& key) const; }; - struct PaletteKeyEqual { + struct PaletteKeyEqual + { // Compare pal value and clut contents - bool operator()(const PaletteKey &lhs, const PaletteKey &rhs) const; + bool operator()(const PaletteKey& lhs, const PaletteKey& rhs) const; }; class Source : public Surface { - struct {GSVector4i* rect; uint32 count;} m_write; + struct + { + GSVector4i* rect; + uint32 count; + } m_write; void Write(const GSVector4i& r, int layer); void Flush(uint32 count, int layer); @@ -116,7 +127,7 @@ public: // still be valid on future. However it ought to be good when the source is created // so it can be used to access un-converted data for the current draw call. GSTexture* m_from_target; - GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0 + GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0 GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value // Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase std::array m_erase_it; @@ -155,7 +166,7 @@ public: private: static const uint16 MAX_SIZE = 65535; // Max size of each map. const GSRenderer* m_renderer; - + // Array of 2 maps, the first for 64B palettes and the second for 1024B palettes. // Each map stores the key PaletteKey (clut copy, pal value) pointing to the relevant shared pointer to Palette object. // There is one PaletteKey per Palette, and the hashing and comparison of PaletteKey is done with custom operators PaletteKeyHash and PaletteKeyEqual. @@ -178,7 +189,11 @@ public: uint32 m_pages[16]; // bitmap of all pages bool m_used; - SourceMap() : m_used(false) {memset(m_pages, 0, sizeof(m_pages));} + SourceMap() + : m_used(false) + { + memset(m_pages, 0, sizeof(m_pages)); + } void Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off); void RemoveAll(); @@ -249,7 +264,8 @@ public: bool ShallSearchTextureInsideRt(); - const char* to_string(int type) { + const char* to_string(int type) + { return (type == DepthStencil) ? "Depth" : "Color"; } diff --git a/plugins/GSdx/Renderers/HW/GSVertexHW.h b/plugins/GSdx/Renderers/HW/GSVertexHW.h index f249f5731a..262e23a0ad 100644 --- a/plugins/GSdx/Renderers/HW/GSVertexHW.h +++ b/plugins/GSdx/Renderers/HW/GSVertexHW.h @@ -28,13 +28,18 @@ struct alignas(32) GSVertexHW9 { - GSVector4 t; + GSVector4 t; GSVector4 p; // t.z = union {struct {uint8 r, g, b, a;}; uint32 c0;}; // t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;} - GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;} + GSVertexHW9& operator=(GSVertexHW9& v) + { + t = v.t; + p = v.p; + return *this; + } }; #pragma pack(pop) diff --git a/plugins/GSdx/Renderers/Null/GSDeviceNull.cpp b/plugins/GSdx/Renderers/Null/GSDeviceNull.cpp index 5c754922b7..6bac6a3080 100644 --- a/plugins/GSdx/Renderers/Null/GSDeviceNull.cpp +++ b/plugins/GSdx/Renderers/Null/GSDeviceNull.cpp @@ -22,9 +22,9 @@ #include "stdafx.h" #include "GSDeviceNull.h" -bool GSDeviceNull::Create(const std::shared_ptr &wnd) +bool GSDeviceNull::Create(const std::shared_ptr& wnd) { - if(!GSDevice::Create(wnd)) + if (!GSDevice::Create(wnd)) return false; Reset(1, 1); @@ -41,4 +41,3 @@ GSTexture* GSDeviceNull::CreateSurface(int type, int w, int h, int format) { return new GSTextureNull(type, w, h, format); } - diff --git a/plugins/GSdx/Renderers/Null/GSDeviceNull.h b/plugins/GSdx/Renderers/Null/GSDeviceNull.h index 2595ed1906..6d2584b849 100644 --- a/plugins/GSdx/Renderers/Null/GSDeviceNull.h +++ b/plugins/GSdx/Renderers/Null/GSDeviceNull.h @@ -36,7 +36,6 @@ private: public: GSDeviceNull() {} - bool Create(const std::shared_ptr &wnd); + bool Create(const std::shared_ptr& wnd); bool Reset(int w, int h); }; - diff --git a/plugins/GSdx/Renderers/Null/GSRendererNull.h b/plugins/GSdx/Renderers/Null/GSRendererNull.h index a9c6e7fc14..78fe0fc265 100644 --- a/plugins/GSdx/Renderers/Null/GSRendererNull.h +++ b/plugins/GSdx/Renderers/Null/GSRendererNull.h @@ -28,7 +28,10 @@ class GSRendererNull : public GSRenderer class GSVertexTraceNull : public GSVertexTrace { public: - GSVertexTraceNull(const GSState* state) : GSVertexTrace(state) {} + GSVertexTraceNull(const GSState* state) + : GSVertexTrace(state) + { + } }; protected: @@ -42,8 +45,8 @@ protected: } public: - GSRendererNull() - : GSRenderer() + GSRendererNull() + : GSRenderer() { } }; diff --git a/plugins/GSdx/Renderers/Null/GSTextureNull.h b/plugins/GSdx/Renderers/Null/GSTextureNull.h index 6d6cdc6522..82aa2de8d4 100644 --- a/plugins/GSdx/Renderers/Null/GSTextureNull.h +++ b/plugins/GSdx/Renderers/Null/GSTextureNull.h @@ -25,17 +25,20 @@ class GSTextureNull : public GSTexture { - struct {int type, w, h, format;} m_desc; + struct + { + int type, w, h, format; + } m_desc; public: GSTextureNull(); GSTextureNull(int type, int w, int h, int format); - int GetType() const {return m_desc.type;} - int GetFormat() const {return m_desc.format;} + int GetType() const { return m_desc.type; } + int GetFormat() const { return m_desc.format; } - bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) {return true;} - bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) {return false;} + bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) { return true; } + bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) { return false; } void Unmap() {} - bool Save(const std::string& fn) {return false;} + bool Save(const std::string& fn) { return false; } }; diff --git a/plugins/GSdx/Renderers/OpenGL/GLLoader.h b/plugins/GSdx/Renderers/OpenGL/GLLoader.h index b0b864a742..5bbc6159af 100644 --- a/plugins/GSdx/Renderers/OpenGL/GLLoader.h +++ b/plugins/GSdx/Renderers/OpenGL/GLLoader.h @@ -27,7 +27,7 @@ #define GL_BUFFER_0 (0) #ifndef GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR -#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008 +#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008 #endif // FIX compilation issue with Mesa 10 @@ -54,12 +54,12 @@ // Added in GL4.6. Code should be updated but driver support... #ifndef GL_TEXTURE_MAX_ANISOTROPY_EXT -#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE +#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE #endif // Believe me or not, they forgot to add the interaction with DSA... #ifndef GL_EXT_direct_state_access -typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit); +typedef void(APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit); #endif // ********************** End of the extra header ******************* // @@ -115,18 +115,20 @@ typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLin // It should be done by ENABLE_GL_VERSION_1_4 but it conflicts with the old gl.h #if defined(__unix__) || defined(__APPLE__) -extern PFNGLBLENDFUNCSEPARATEPROC glBlendFuncSeparate; +extern PFNGLBLENDFUNCSEPARATEPROC glBlendFuncSeparate; #endif -extern PFNGLTEXTUREPAGECOMMITMENTEXTPROC glTexturePageCommitmentEXT; +extern PFNGLTEXTUREPAGECOMMITMENTEXTPROC glTexturePageCommitmentEXT; #include "PFN_GLLOADER_HPP.h" -namespace GLExtension { +namespace GLExtension +{ extern bool Has(const std::string& ext); extern void Set(const std::string& ext, bool v = true); -} +} // namespace GLExtension -namespace GLLoader { +namespace GLLoader +{ void check_gl_requirements(); extern bool vendor_id_amd; @@ -145,4 +147,4 @@ namespace GLLoader { extern bool found_compatible_GL_ARB_sparse_texture2; extern bool found_compatible_sparse_depth; -} +} // namespace GLLoader diff --git a/plugins/GSdx/Renderers/OpenGL/GLState.cpp b/plugins/GSdx/Renderers/OpenGL/GLState.cpp index fa08f89a50..d3c1f14fe2 100644 --- a/plugins/GSdx/Renderers/OpenGL/GLState.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GLState.cpp @@ -22,7 +22,8 @@ #include "stdafx.h" #include "GLState.h" -namespace GLState { +namespace GLState +{ GLuint fbo; GSVector2i viewport; GSVector4i scissor; @@ -59,7 +60,8 @@ namespace GLState { int64 available_vram; - void Clear() { + void Clear() + { fbo = 0; viewport = GSVector2i(0, 0); scissor = GSVector4i(0, 0, 0, 0); @@ -100,4 +102,4 @@ namespace GLState { // (256MB are reserved for PBO/IBO/VBO/UBO buffers) available_vram = (4096u - 256u) * 1024u * 1024u; } -} +} // namespace GLState diff --git a/plugins/GSdx/Renderers/OpenGL/GLState.h b/plugins/GSdx/Renderers/OpenGL/GLState.h index 2f55635ae3..3befbf75ad 100644 --- a/plugins/GSdx/Renderers/OpenGL/GLState.h +++ b/plugins/GSdx/Renderers/OpenGL/GLState.h @@ -24,7 +24,8 @@ #include "GSdx.h" #include "GSVector.h" -namespace GLState { +namespace GLState +{ extern GLuint fbo; // frame buffer object extern GSVector2i viewport; extern GSVector4i scissor; @@ -62,4 +63,4 @@ namespace GLState { extern int64 available_vram; extern void Clear(); -} +} // namespace GLState diff --git a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp index a805dd9edb..d96409b321 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp @@ -41,12 +41,12 @@ uint64 g_vertex_upload_byte = 0; uint64 g_uniform_upload_byte = 0; #endif -static const uint32 g_merge_cb_index = 10; -static const uint32 g_interlace_cb_index = 11; -static const uint32 g_fx_cb_index = 14; -static const uint32 g_convert_index = 15; -static const uint32 g_vs_cb_index = 20; -static const uint32 g_ps_cb_index = 21; +static const uint32 g_merge_cb_index = 10; +static const uint32 g_interlace_cb_index = 11; +static const uint32 g_fx_cb_index = 14; +static const uint32 g_convert_index = 15; +static const uint32 g_vs_cb_index = 20; +static const uint32 g_ps_cb_index = 21; bool GSDeviceOGL::m_debug_gl_call = false; int GSDeviceOGL::m_shader_inst = 0; @@ -72,7 +72,7 @@ GSDeviceOGL::GSDeviceOGL() memset(&m_date, 0, sizeof(m_date)); memset(&m_shadeboost, 0, sizeof(m_shadeboost)); memset(&m_om_dss, 0, sizeof(m_om_dss)); - memset(&m_profiler, 0 , sizeof(m_profiler)); + memset(&m_profiler, 0, sizeof(m_profiler)); GLState::Clear(); m_mipmap = theApp.GetConfigI("mipmap"); @@ -82,21 +82,22 @@ GSDeviceOGL::GSDeviceOGL() m_filter = TriFiltering::None; // Reset the debug file - #ifdef ENABLE_OGL_DEBUG +#ifdef ENABLE_OGL_DEBUG if (theApp.GetCurrentRendererType() == GSRendererType::OGL_SW) - m_debug_gl_file = fopen("GSdx_opengl_debug_sw.txt","w"); + m_debug_gl_file = fopen("GSdx_opengl_debug_sw.txt", "w"); else - m_debug_gl_file = fopen("GSdx_opengl_debug_hw.txt","w"); - #endif + m_debug_gl_file = fopen("GSdx_opengl_debug_hw.txt", "w"); +#endif - m_debug_gl_call = theApp.GetConfigB("debug_opengl"); + m_debug_gl_call = theApp.GetConfigB("debug_opengl"); m_disable_hw_gl_draw = theApp.GetConfigB("disable_hw_gl_draw"); } GSDeviceOGL::~GSDeviceOGL() { - if (m_debug_gl_file) { + if (m_debug_gl_file) + { fclose(m_debug_gl_file); m_debug_gl_file = NULL; } @@ -143,7 +144,8 @@ GSDeviceOGL::~GSDeviceOGL() glDeleteSamplers(countof(m_ps_ss), m_ps_ss); - for (uint32 key = 0; key < countof(m_om_dss); key++) delete m_om_dss[key]; + for (uint32 key = 0; key < countof(m_om_dss); key++) + delete m_om_dss[key]; PboPool::Destroy(); @@ -154,27 +156,30 @@ GSDeviceOGL::~GSDeviceOGL() void GSDeviceOGL::GenerateProfilerData() { - if (m_profiler.last_query < 3) { + if (m_profiler.last_query < 3) + { glDeleteQueries(1 << 16, m_profiler.timer_query); return; } // Wait latest quey to get valid result GLuint available = 0; - while (!available) { + while (!available) + { glGetQueryObjectuiv(m_profiler.timer(), GL_QUERY_RESULT_AVAILABLE, &available); } GLuint64 time_start; GLuint64 time_end; std::vector times; - double ms = 0.000001; + double ms = 0.000001; - int replay = theApp.GetConfigI("linux_replay"); + int replay = theApp.GetConfigI("linux_replay"); int first_query = replay > 1 ? m_profiler.last_query / replay : 0; glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start); - for (uint32 q = first_query + 1; q < m_profiler.last_query; q++) { + for (uint32 q = first_query + 1; q < m_profiler.last_query; q++) + { glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end); uint64 t = time_end - time_start; times.push_back((double)t * ms); @@ -187,22 +192,26 @@ void GSDeviceOGL::GenerateProfilerData() glDeleteQueries(1 << 16, m_profiler.timer_query); - double frames = times.size(); - double mean = 0.0; - double sd = 0.0; + double frames = times.size(); + double mean = 0.0; + double sd = 0.0; auto minmax_time = std::minmax_element(times.begin(), times.end()); - for (auto t : times) mean += t; + for (auto t : times) + mean += t; mean = mean / frames; - for (auto t : times) sd += pow(t-mean, 2); + for (auto t : times) + sd += pow(t - mean, 2); sd = sqrt(sd / frames); uint32 time_repartition[16] = {0}; - for (auto t : times) { - uint32 slot = (uint32)(t/2.0); - if (slot >= countof(time_repartition)) { + for (auto t : times) + { + uint32 slot = (uint32)(t / 2.0); + if (slot >= countof(time_repartition)) + { slot = countof(time_repartition) - 1; } time_repartition[slot]++; @@ -215,13 +224,16 @@ void GSDeviceOGL::GenerateProfilerData() fprintf(stderr, "SD %4.2f ms\n", sd); fprintf(stderr, "\n"); fprintf(stderr, "Frame Repartition\n"); - for (uint32 i = 0; i < countof(time_repartition); i ++) { - fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i+1), time_repartition[i]); + for (uint32 i = 0; i < countof(time_repartition); i++) + { + fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i + 1), time_repartition[i]); } FILE* csv = fopen("GSdx_profile.csv", "w"); - if (csv) { - for (size_t i = 0; i < times.size(); i++) { + if (csv) + { + for (size_t i = 0; i < times.size(); i++) + { fprintf(csv, "%zu,%lf\n", i, times[i]); } @@ -239,12 +251,13 @@ GSTexture* GSDeviceOGL::CreateSurface(int type, int w, int h, int fmt) // NOTE: I'm not sure RenderTarget always need to be cleared. It could be costly for big upscale. // FIXME: it will be more logical to do it in FetchSurface. This code is only called at first creation // of the texture. However we could reuse a deleted texture. - if (m_force_texture_clear == 0) { + if (m_force_texture_clear == 0) + { // Clear won't be done if the texture isn't committed. Commit the full texture to ensure // correct behavior of force clear option (debug option) t->Commit(); - switch(type) + switch (type) { case GSTexture::RenderTarget: ClearRenderTarget(t, 0); @@ -267,13 +280,14 @@ GSTexture* GSDeviceOGL::FetchSurface(int type, int w, int h, int format) GSTexture* t = GSDevice::FetchSurface(type, w, h, format); - if (m_force_texture_clear) { + if (m_force_texture_clear) + { // Clear won't be done if the texture isn't committed. Commit the full texture to ensure // correct behavior of force clear option (debug option) t->Commit(); GSVector4 red(1.0f, 0.0f, 0.0f, 1.0f); - switch(type) + switch (type) { case GSTexture::RenderTarget: ClearRenderTarget(t, 0); @@ -295,14 +309,15 @@ GSTexture* GSDeviceOGL::FetchSurface(int type, int w, int h, int format) return t; } -bool GSDeviceOGL::Create(const std::shared_ptr &wnd) +bool GSDeviceOGL::Create(const std::shared_ptr& wnd) { std::vector shader; // **************************************************************** // Debug helper // **************************************************************** #ifdef ENABLE_OGL_DEBUG - if (theApp.GetConfigB("debug_opengl")) { + if (theApp.GetConfigB("debug_opengl")) + { glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); @@ -340,7 +355,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); // Some timers to help profiling - if (GLLoader::in_replayer) { + if (GLLoader::in_replayer) + { glCreateQueries(GL_TIMESTAMP, 1 << 16, m_profiler.timer_query); } } @@ -353,9 +369,9 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) static_assert(sizeof(GSVertexPT1) == sizeof(GSVertex), "wrong GSVertex size"); std::vector il_convert = { - {0, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(0) } , + {0, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)( 0) } , {1, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } , - {2, 4 , GL_UNSIGNED_BYTE , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(8) } , + {2, 4 , GL_UNSIGNED_BYTE , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)( 8) } , {3, 1 , GL_FLOAT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(12) } , {4, 2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(16) } , {5, 1 , GL_UNSIGNED_INT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(20) } , @@ -371,7 +387,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) { GL_PUSH("GSDeviceOGL::Sampler"); - for (uint32 key = 0; key < countof(m_ps_ss); key++) { + for (uint32 key = 0; key < countof(m_ps_ss); key++) + { m_ps_ss[key] = CreateSampler(PSSamplerSelector(key)); } } @@ -396,7 +413,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, shader.data()); m_convert.vs = vs; - for(size_t i = 0; i < countof(m_convert.ps); i++) { + for (size_t i = 0; i < countof(m_convert.ps); i++) + { ps = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); std::string pretty_name = "Convert pipe " + std::to_string(i); m_convert.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); @@ -425,7 +443,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) theApp.LoadResource(IDR_MERGE_GLSL, shader); - for(size_t i = 0; i < countof(m_merge_obj.ps); i++) { + for (size_t i = 0; i < countof(m_merge_obj.ps); i++) + { ps = m_shader->Compile("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); std::string pretty_name = "Merge pipe " + std::to_string(i); m_merge_obj.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); @@ -442,7 +461,8 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) theApp.LoadResource(IDR_INTERLACE_GLSL, shader); - for(size_t i = 0; i < countof(m_interlace.ps); i++) { + for (size_t i = 0; i < countof(m_interlace.ps); i++) + { ps = m_shader->Compile("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); std::string pretty_name = "Interlace pipe " + std::to_string(i); m_interlace.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); @@ -533,14 +553,19 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) // Get Available Memory // **************************************************************** GLint vram[4] = {0}; - if (GLLoader::vendor_id_amd) { + if (GLLoader::vendor_id_amd) + { // Full vram, remove a small margin for others buffer glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, vram); - } else if (GLExtension::Has("GL_NVX_gpu_memory_info")) { + } + else if (GLExtension::Has("GL_NVX_gpu_memory_info")) + { // GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX <= give full memory // Available vram glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, vram); - } else { + } + else + { fprintf(stdout, "No extenstion supported to get available memory. Use default value !\n"); } @@ -558,13 +583,12 @@ bool GSDeviceOGL::Create(const std::shared_ptr &wnd) GSVector2i tex_font = m_osd.get_texture_font_size(); m_font = std::unique_ptr( - new GSTextureOGL(GSTextureOGL::Texture, tex_font.x, tex_font.y, GL_R8, m_fbo_read, false) - ); + new GSTextureOGL(GSTextureOGL::Texture, tex_font.x, tex_font.y, GL_R8, m_fbo_read, false)); // **************************************************************** // Finish window setup and backbuffer // **************************************************************** - if(!GSDevice::Create(wnd)) + if (!GSDevice::Create(wnd)) return false; GSVector4i rect = wnd->GetClientRect(); @@ -608,7 +632,8 @@ void GSDeviceOGL::CreateTextureFX() // enough but buffer is polluted with noise. Clear will be limited // to the mask. glStencilMask(0xFF); - for (uint32 key = 0; key < countof(m_om_dss); key++) { + for (uint32 key = 0; key < countof(m_om_dss); key++) + { m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); } @@ -618,7 +643,7 @@ void GSDeviceOGL::CreateTextureFX() bool GSDeviceOGL::Reset(int w, int h) { - if(!GSDevice::Reset(w, h)) + if (!GSDevice::Reset(w, h)) return false; // Opengl allocate the backbuffer with the window. The render is done in the backbuffer when @@ -638,7 +663,8 @@ void GSDeviceOGL::Flip() { m_wnd->Flip(); - if (GLLoader::in_replayer) { + if (GLLoader::in_replayer) + { glQueryCounter(m_profiler.timer(), GL_TIMESTAMP); m_profiler.last_query++; } @@ -686,7 +712,8 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count) void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) { - if (!t) return; + if (!t) + return; GSTextureOGL* T = static_cast(t); if (T->HasBeenCleaned() && !T->IsBackbuffer()) @@ -707,18 +734,20 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) uint32 old_color_mask = GLState::wrgba; OMSetColorMaskState(); - if (T->IsBackbuffer()) { + if (T->IsBackbuffer()) + { OMSetFBO(0); // glDrawBuffer(GL_BACK); // this is the default when there is no FB // 0 will select the first drawbuffer ie GL_BACK glClearBufferfv(GL_COLOR, 0, c.v); - } else { + } + else + { OMSetFBO(m_fbo); OMAttachRt(T); glClearBufferfv(GL_COLOR, 0, c.v); - } OMSetColorMaskState(OMColorMaskSelector(old_color_mask)); @@ -730,7 +759,8 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) { - if (!t) return; + if (!t) + return; GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255); ClearRenderTarget(t, color); @@ -738,13 +768,15 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) void GSDeviceOGL::ClearDepth(GSTexture* t) { - if (!t) return; + if (!t) + return; GSTextureOGL* T = static_cast(t); GL_PUSH("Clear Depth %d", T->GetID()); - if (0 && GLLoader::found_GL_ARB_clear_texture) { + if (0 && GLLoader::found_GL_ARB_clear_texture) + { // I don't know what the driver does but it creates // some slowdowns on Harry Potter PS // Maybe it triggers some texture relocations, or maybe @@ -754,7 +786,9 @@ void GSDeviceOGL::ClearDepth(GSTexture* t) // Don't bother with Depth_Stencil insanity T->Clear(NULL); - } else { + } + else + { OMSetFBO(m_fbo); // RT must be detached, if RT is too small, depth won't be fully cleared // AT tolenico 2 map clip bug @@ -764,9 +798,12 @@ void GSDeviceOGL::ClearDepth(GSTexture* t) // TODO: check size of scissor before toggling it glDisable(GL_SCISSOR_TEST); float c = 0.0f; - if (GLState::depth_mask) { + if (GLState::depth_mask) + { glClearBufferfv(GL_DEPTH, 0, &c); - } else { + } + else + { glDepthMask(true); glClearBufferfv(GL_DEPTH, 0, &c); glDepthMask(false); @@ -777,7 +814,8 @@ void GSDeviceOGL::ClearDepth(GSTexture* t) void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) { - if (!t) return; + if (!t) + return; GSTextureOGL* T = static_cast(t); @@ -800,15 +838,19 @@ GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel) glCreateSamplers(1, &sampler); // Bilinear filtering - if (sel.biln) { + if (sel.biln) + { glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - } else { + } + else + { glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } - switch (static_cast(sel.triln)) { + switch (static_cast(sel.triln)) + { case GS_MIN_FILTER::Nearest: // Nop based on biln break; @@ -846,7 +888,8 @@ GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel) glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); int anisotropy = theApp.GetConfigI("MaxAnisotropy"); - if (anisotropy && sel.aniso) { + if (anisotropy && sel.aniso) + { if (GLExtension::Has("GL_ARB_texture_filter_anisotropic")) glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, (float)anisotropy); else if (GLExtension::Has("GL_EXT_texture_filter_anisotropic")) @@ -874,7 +917,7 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel) dss->SetStencil(GL_EQUAL, GL_KEEP); } - if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) + if (dssel.ztst != ZTST_ALWAYS || dssel.zwe) { static const GLenum ztst[] = { @@ -911,7 +954,8 @@ void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt, const GSVector4i& area) void GSDeviceOGL::RecycleDateTexture() { - if (m_date.t) { + if (m_date.t) + { //static_cast(m_date.t)->Save(format("/tmp/date_adv_%04ld.csv", GSState::s_n)); Recycle(m_date.t); @@ -1009,7 +1053,8 @@ void GSDeviceOGL::SelfShaderTestRun(const std::string& dir, const std::string& f #ifdef __linux__ // Nouveau actually - if (GLLoader::mesa_driver) { + if (GLLoader::mesa_driver) + { if (freopen(out.c_str(), "w", stderr) == NULL) fprintf(stderr, "Failed to redirect stderr\n"); } @@ -1021,7 +1066,8 @@ void GSDeviceOGL::SelfShaderTestRun(const std::string& dir, const std::string& f #ifdef __linux__ // Nouveau actually - if (GLLoader::mesa_driver) { + if (GLLoader::mesa_driver) + { if (freopen("/dev/tty", "w", stderr) == NULL) fprintf(stderr, "Failed to restore stderr\n"); } @@ -1031,12 +1077,12 @@ void GSDeviceOGL::SelfShaderTestRun(const std::string& dir, const std::string& f void GSDeviceOGL::SelfShaderTestPrint(const std::string& test, int& nb_shader) { fprintf(stderr, "%-25s\t\t%d shaders:\t%d instructions (M %4.2f)\t%d registers (M %4.2f)\n", - test.c_str(), nb_shader, - m_shader_inst, (float)m_shader_inst/(float)nb_shader, - m_shader_reg, (float)m_shader_reg/(float)nb_shader); + test.c_str(), nb_shader, + m_shader_inst, (float)m_shader_inst / (float)nb_shader, + m_shader_reg, (float)m_shader_reg / (float)nb_shader); m_shader_inst = 0; - m_shader_reg = 0; + m_shader_reg = 0; nb_shader = 0; } @@ -1050,26 +1096,29 @@ void GSDeviceOGL::SelfShaderTest() std::string test; m_shader_inst = 0; - m_shader_reg = 0; + m_shader_reg = 0; int nb_shader = 0; test = "SW_Blending"; - for (int colclip = 0; colclip < 2; colclip++) { - for (int fmt = 0; fmt < 3; fmt++) { - for (int i = 0; i < 3; i++) { + for (int colclip = 0; colclip < 2; colclip++) + { + for (int fmt = 0; fmt < 3; fmt++) + { + for (int i = 0; i < 3; i++) + { PSSelector sel; sel.tfx = 4; int ib = (i + 1) % 3; sel.blend_a = i; - sel.blend_b = ib;; + sel.blend_b = ib; sel.blend_c = i; sel.blend_d = i; sel.colclip = colclip; - sel.dfmt = fmt; + sel.dfmt = fmt; std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm", - i, ib, i, i, colclip, fmt); + i, ib, i, i, colclip, fmt); SelfShaderTestRun(test, file, sel, nb_shader); } } @@ -1077,7 +1126,8 @@ void GSDeviceOGL::SelfShaderTest() SelfShaderTestPrint(test, nb_shader); test = "Alpha_Test"; - for (int atst = 0; atst < 5; atst++) { + for (int atst = 0; atst < 5; atst++) + { PSSelector sel; sel.tfx = 4; @@ -1088,7 +1138,8 @@ void GSDeviceOGL::SelfShaderTest() SelfShaderTestPrint(test, nb_shader); test = "Fbmask__Fog__Shuffle__Read_ba"; - for (int read_ba = 0; read_ba < 2; read_ba++) { + for (int read_ba = 0; read_ba < 2; read_ba++) + { PSSelector sel; sel.tfx = 4; @@ -1103,7 +1154,8 @@ void GSDeviceOGL::SelfShaderTest() SelfShaderTestPrint(test, nb_shader); test = "Date"; - for (int date = 1; date < 7; date++) { + for (int date = 1; date < 7; date++) + { PSSelector sel; sel.tfx = 4; @@ -1114,7 +1166,8 @@ void GSDeviceOGL::SelfShaderTest() SelfShaderTestPrint(test, nb_shader); test = "FBA"; - for (int fmt = 0; fmt < 3; fmt++) { + for (int fmt = 0; fmt < 3; fmt++) + { PSSelector sel; sel.tfx = 4; @@ -1141,15 +1194,18 @@ void GSDeviceOGL::SelfShaderTest() SelfShaderTestPrint(test, nb_shader); test = "Tfx__Tcc"; - for (int channel = 0; channel < 5; channel++) { - for (int tfx = 0; tfx < 5; tfx++) { - for (int tcc = 0; tcc < 2; tcc++) { + for (int channel = 0; channel < 5; channel++) + { + for (int tfx = 0; tfx < 5; tfx++) + { + for (int tcc = 0; tcc < 2; tcc++) + { PSSelector sel; sel.fst = 1; sel.channel = channel; - sel.tfx = tfx; - sel.tcc = tcc; + sel.tfx = tfx; + sel.tcc = tcc; std::string file = format("Shader_Tfx_%d__Tcc_%d__Channel_%d.glsl.asm", tfx, tcc, channel); SelfShaderTestRun(test, file, sel, nb_shader); } @@ -1158,17 +1214,24 @@ void GSDeviceOGL::SelfShaderTest() SelfShaderTestPrint(test, nb_shader); test = "Texture_Sampling"; - for (int depth = 0; depth < 4; depth++) { - for (int fmt = 0; fmt < 16; fmt++) { - if ((fmt & 3) == 3) continue; + for (int depth = 0; depth < 4; depth++) + { + for (int fmt = 0; fmt < 16; fmt++) + { + if ((fmt & 3) == 3) + continue; - for (int ltf = 0; ltf < 2; ltf++) { - for (int aem = 0; aem < 2; aem++) { - for (int wms = 1; wms < 4; wms++) { - for (int wmt = 1; wmt < 4; wmt++) { + for (int ltf = 0; ltf < 2; ltf++) + { + for (int aem = 0; aem < 2; aem++) + { + for (int wms = 1; wms < 4; wms++) + { + for (int wmt = 1; wmt < 4; wmt++) + { PSSelector sel; - sel.tfx = 1; - sel.tcc = 1; + sel.tfx = 1; + sel.tcc = 1; sel.fst = 1; sel.depth_fmt = depth; @@ -1178,7 +1241,7 @@ void GSDeviceOGL::SelfShaderTest() sel.wms = wms; sel.wmt = wmt; std::string file = format("Shader_Ltf_%d__Aem_%d__TFmt_%d__Wms_%d__Wmt_%d__DepthFmt_%d.glsl.asm", - ltf, aem, fmt, wms, wmt, depth); + ltf, aem, fmt, wms, wmt, depth); SelfShaderTestRun(test, file, sel, nb_shader); } } @@ -1255,11 +1318,11 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r dTex->CommitRegion(GSVector2i(r.z, r.w)); ASSERT(GLExtension::Has("GL_ARB_copy_image") && glCopyImageSubData); - glCopyImageSubData( sid, GL_TEXTURE_2D, - 0, r.x, r.y, 0, - did, GL_TEXTURE_2D, - 0, 0, 0, 0, - r.width(), r.height(), 1); + glCopyImageSubData(sid, GL_TEXTURE_2D, + 0, r.x, r.y, 0, + did, GL_TEXTURE_2D, + 0, 0, 0, 0, + r.width(), r.height(), 1); } void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) @@ -1286,14 +1349,14 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, int bs, OMColorMaskSelector cms, bool linear) { - if(!sTex || !dTex) + if (!sTex || !dTex) { ASSERT(0); return; } bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24] || - ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]); + ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]); // Performance optimization. It might be faster to use a framebuffer blit for standard case // instead to emulate it with shader @@ -1352,7 +1415,8 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture // 2/ in case some GSdx code expect thing in dx order. // Only flipping the backbuffer is transparent (I hope)... GSVector4 flip_sr = sRect; - if (static_cast(dTex)->IsBackbuffer()) { + if (static_cast(dTex)->IsBackbuffer()) + { flip_sr.y = sRect.w; flip_sr.w = sRect.y; } @@ -1398,7 +1462,8 @@ void GSDeviceOGL::RenderOsd(GSTexture* dt) OMSetBlendState((uint8)GSDeviceOGL::m_MERGE_BLEND); OMSetRenderTargets(dt, NULL); - if(m_osd.m_texture_dirty) { + if (m_osd.m_texture_dirty) + { m_osd.upload_texture_atlas(m_font.get()); } @@ -1434,14 +1499,16 @@ void GSDeviceOGL::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, ClearRenderTarget(dTex, c); // Upload constant to select YUV algo - if (feedback_write_2 || feedback_write_1) { + if (feedback_write_2 || feedback_write_1) + { // Write result to feedback loop m_misc_cb_cache.EMOD_AC.x = EXTBUF.EMODA; m_misc_cb_cache.EMOD_AC.y = EXTBUF.EMODC; m_convert.cb->cache_upload(&m_misc_cb_cache); } - if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg)) { + if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg)) + { // 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output // Note: value outside of dRect must contains the background color (c) StretchRect(sTex[1], sRect[1], dTex, dRect[1], ShaderConvert_COPY); @@ -1455,16 +1522,20 @@ void GSDeviceOGL::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, if (feedback_write_2_but_blend_bg) ClearRenderTarget(dTex, c); - if (sTex[0]) { + if (sTex[0]) + { if (PMODE.AMOD == 1) // Keep the alpha from the 2nd output OMSetColorMaskState(OMColorMaskSelector(0x7)); // 1st output is enabled. It must be blended - if (PMODE.MMOD == 1) { + if (PMODE.MMOD == 1) + { // Blend with a constant alpha m_merge_obj.cb->cache_upload(&c.v); StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[1], m_MERGE_BLEND, OMColorMaskSelector()); - } else { + } + else + { // Blend with 2 * input alpha StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[0], m_MERGE_BLEND, OMColorMaskSelector()); } @@ -1498,8 +1569,10 @@ void GSDeviceOGL::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex) { // Lazy compile - if (!m_fxaa.ps) { - if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension + if (!m_fxaa.ps) + { + if (!GLLoader::found_GL_ARB_gpu_shader5) // GL4.0 extension + { return; } @@ -1528,12 +1601,14 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex) void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) { // Lazy compile - if (!m_shaderfx.ps) { - if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension + if (!m_shaderfx.ps) + { + if (!GLLoader::found_GL_ARB_gpu_shader5) // GL4.0 extension + { return; } - std::string config_name(theApp.GetConfigS("shaderfx_conf")); + std::string config_name(theApp.GetConfigS("shaderfx_conf")); std::ifstream fconfig(config_name); std::stringstream config; config << "#extension GL_ARB_gpu_shader5 : require\n"; @@ -1542,10 +1617,11 @@ void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) else fprintf(stderr, "Warning failed to load '%s'. External Shader might be wrongly configured\n", config_name.c_str()); - std::string shader_name(theApp.GetConfigS("shaderfx_glsl")); + std::string shader_name(theApp.GetConfigS("shaderfx_glsl")); std::ifstream fshader(shader_name); std::stringstream shader; - if (!fshader.good()) { + if (!fshader.good()) + { fprintf(stderr, "Error failed to load '%s'. External Shader will be disabled !\n", shader_name.c_str()); return; } @@ -1606,7 +1682,8 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver // om OMSetDepthStencilState(m_date.dss); - if (GLState::blend) { + if (GLState::blend) + { glDisable(GL_BLEND); } OMSetRenderTargets(NULL, ds, &GLState::scissor); @@ -1624,7 +1701,8 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver DrawPrimitive(); - if (GLState::blend) { + if (GLState::blend) + { glEnable(GL_BLEND); } @@ -1655,9 +1733,11 @@ void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr) { ASSERT(i < (int)countof(GLState::tex_unit)); // Note: Nvidia debgger doesn't support the id 0 (ie the NULL texture) - if (sr) { + if (sr) + { GLuint id = static_cast(sr)->GetID(); - if (GLState::tex_unit[i] != id) { + if (GLState::tex_unit[i] != id) + { GLState::tex_unit[i] = id; glBindTextureUnit(i, id); } @@ -1672,7 +1752,8 @@ void GSDeviceOGL::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) void GSDeviceOGL::PSSetSamplerState(GLuint ss) { - if (GLState::ps_ss != ss) { + if (GLState::ps_ss != ss) + { GLState::ps_ss = ss; glBindSampler(0, ss); } @@ -1681,14 +1762,18 @@ void GSDeviceOGL::PSSetSamplerState(GLuint ss) void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt) { GLuint id; - if (rt) { + if (rt) + { rt->WasAttached(); id = rt->GetID(); - } else { + } + else + { id = 0; } - if (GLState::rt != id) { + if (GLState::rt != id) + { GLState::rt = id; glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, id, 0); } @@ -1697,14 +1782,18 @@ void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt) void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds) { GLuint id; - if (ds) { + if (ds) + { ds->WasAttached(); id = ds->GetID(); - } else { + } + else + { id = 0; } - if (GLState::ds != id) { + if (GLState::ds != id) + { GLState::ds = id; glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0); } @@ -1712,7 +1801,8 @@ void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds) void GSDeviceOGL::OMSetFBO(GLuint fbo) { - if (GLState::fbo != fbo) { + if (GLState::fbo != fbo) + { GLState::fbo = fbo; glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); } @@ -1726,7 +1816,8 @@ void GSDeviceOGL::OMSetDepthStencilState(GSDepthStencilOGL* dss) void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) { - if (sel.wrgba != GLState::wrgba) { + if (sel.wrgba != GLState::wrgba) + { GLState::wrgba = sel.wrgba; glColorMaski(0, sel.wr, sel.wg, sel.wb, sel.wa); @@ -1735,37 +1826,45 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant, bool accumulation_blend) { - if (blend_index) { - if (!GLState::blend) { + if (blend_index) + { + if (!GLState::blend) + { GLState::blend = true; glEnable(GL_BLEND); } - if (is_blend_constant && GLState::bf != blend_factor) { + if (is_blend_constant && GLState::bf != blend_factor) + { GLState::bf = blend_factor; float bf = (float)blend_factor / 128.0f; glBlendColor(bf, bf, bf, bf); } HWBlend b = GetBlend(blend_index); - if (accumulation_blend) { + if (accumulation_blend) + { b.src = GL_ONE; b.dst = GL_ONE; } - if (GLState::eq_RGB != b.op) { + if (GLState::eq_RGB != b.op) + { GLState::eq_RGB = b.op; glBlendEquationSeparate(b.op, GL_FUNC_ADD); } - if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst) { + if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst) + { GLState::f_sRGB = b.src; GLState::f_dRGB = b.dst; glBlendFuncSeparate(b.src, b.dst, GL_ONE, GL_ZERO); } - - } else { - if (GLState::blend) { + } + else + { + if (GLState::blend) + { GLState::blend = false; glDisable(GL_BLEND); } @@ -1777,11 +1876,15 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto GSTextureOGL* RT = static_cast(rt); GSTextureOGL* DS = static_cast(ds); - if (rt == NULL || !RT->IsBackbuffer()) { + if (rt == NULL || !RT->IsBackbuffer()) + { OMSetFBO(m_fbo); - if (rt) { + if (rt) + { OMAttachRt(RT); - } else { + } + else + { OMAttachRt(); } @@ -1790,15 +1893,16 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto OMAttachDs(DS); else OMAttachDs(); - - } else { + } + else + { // Render in the backbuffer OMSetFBO(0); } GSVector2i size = rt ? rt->GetSize() : ds ? ds->GetSize() : GLState::viewport; - if(GLState::viewport != size) + if (GLState::viewport != size) { GLState::viewport = size; // FIXME ViewportIndexedf or ViewportIndexedfv (GL4.1) @@ -1807,7 +1911,7 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy(); - if(!GLState::scissor.eq(r)) + if (!GLState::scissor.eq(r)) { GLState::scissor = r; // FIXME ScissorIndexedv (GL4.1) @@ -1818,11 +1922,13 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb) { GL_PUSH("UBO"); - if(m_vs_cb_cache.Update(vs_cb)) { + if (m_vs_cb_cache.Update(vs_cb)) + { m_vs_cb->upload(vs_cb); } - if(m_ps_cb_cache.Update(ps_cb)) { + if (m_ps_cb_cache.Update(ps_cb)) + { m_ps_cb->upload(ps_cb); } } @@ -1838,10 +1944,13 @@ void GSDeviceOGL::SetupPipeline(const VSSelector& vsel, const GSSelector& gsel, GLuint ps; auto i = m_ps.find(psel); - if (i == m_ps.end()) { + if (i == m_ps.end()) + { ps = CompilePS(psel); m_ps[psel] = ps; - } else { + } + else + { ps = i->second; } @@ -1852,9 +1961,12 @@ void GSDeviceOGL::SetupPipeline(const VSSelector& vsel, const GSSelector& gsel, static PSSelector old_psel; static GLuint old_ps = 0; std::string msg(""); -#define CHECK_STATE(p) if (psel.p != old_psel.p) msg.append(" ").append(#p); +#define CHECK_STATE(p) \ + if (psel.p != old_psel.p) \ + msg.append(" ").append(#p); - if (old_ps != ps) { + if (old_ps != ps) + { CHECK_STATE(tex_fmt); CHECK_STATE(dfmt); @@ -1917,12 +2029,13 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel) } // Note: used as a callback of DebugMessageCallback. Don't change the signature -void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar *gl_message, const void* userParam) +void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar* gl_message, const void* userParam) { std::string message(gl_message, gl_length >= 0 ? gl_length : strlen(gl_message)); std::string type, severity, source; static int sev_counter = 0; - switch(gl_type) { + switch (gl_type) + { case GL_DEBUG_TYPE_ERROR_ARB : type = "Error"; break; case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB : type = "Deprecated bhv"; break; case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB : type = "Undefined bhv"; break; @@ -1933,19 +2046,22 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, case GL_DEBUG_TYPE_POP_GROUP : return; // Don't print message injected by myself default : type = "TTT"; break; } - switch(gl_severity) { + switch (gl_severity) + { case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; sev_counter++; break; case GL_DEBUG_SEVERITY_MEDIUM_ARB : severity = "Mid"; break; case GL_DEBUG_SEVERITY_LOW_ARB : severity = "Low"; break; - default : if (id == 0xFEAD) - severity = "Cache"; - else if (id == 0xB0B0) - severity = "REG"; - else if (id == 0xD0D0) - severity = "EXTRA"; - break; + default: + if (id == 0xFEAD) + severity = "Cache"; + else if (id == 0xB0B0) + severity = "REG"; + else if (id == 0xD0D0) + severity = "EXTRA"; + break; } - switch(gl_source) { + switch (gl_source) + { case GL_DEBUG_SOURCE_API_ARB : source = "API"; break; case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB : source = "WINDOW"; break; case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB : source = "COMPILER"; break; @@ -1957,30 +2073,35 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, #ifdef _DEBUG // Don't spam noisy information on the terminal - if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION) { - fprintf(stderr,"T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); + if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION) + { + fprintf(stderr, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); } #else // Print nouveau shader compiler info - if (GSState::s_n == 0) { + if (GSState::s_n == 0) + { int t, local, gpr, inst, byte; int status = sscanf(message.c_str(), "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d", - &t, &local, &gpr, &inst, &byte); - if (status == 5) { + &t, &local, &gpr, &inst, &byte); + if (status == 5) + { m_shader_inst += inst; - m_shader_reg += gpr; - fprintf(stderr,"T:%s\t\tS:%s\t=> %s\n", type.c_str(), severity.c_str(), message.c_str()); + m_shader_reg += gpr; + fprintf(stderr, "T:%s\t\tS:%s\t=> %s\n", type.c_str(), severity.c_str(), message.c_str()); } } #endif if (m_debug_gl_file) - fprintf(m_debug_gl_file,"T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); + fprintf(m_debug_gl_file, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); #ifdef _DEBUG - if (sev_counter >= 5) { + if (sev_counter >= 5) + { // Close the file to flush the content on disk before exiting. - if (m_debug_gl_file) { + if (m_debug_gl_file) + { fclose(m_debug_gl_file); m_debug_gl_file = NULL; } @@ -1993,25 +2114,25 @@ uint16 GSDeviceOGL::ConvertBlendEnum(uint16 generic) { switch (generic) { - case SRC_COLOR : return GL_SRC_COLOR; - case INV_SRC_COLOR : return GL_ONE_MINUS_SRC_COLOR; - case DST_COLOR : return GL_DST_COLOR; - case INV_DST_COLOR : return GL_ONE_MINUS_DST_COLOR; - case SRC1_COLOR : return GL_SRC1_COLOR; - case INV_SRC1_COLOR : return GL_ONE_MINUS_SRC1_COLOR; - case SRC_ALPHA : return GL_SRC_ALPHA; - case INV_SRC_ALPHA : return GL_ONE_MINUS_SRC_ALPHA; - case DST_ALPHA : return GL_DST_ALPHA; - case INV_DST_ALPHA : return GL_ONE_MINUS_DST_ALPHA; - case SRC1_ALPHA : return GL_SRC1_ALPHA; - case INV_SRC1_ALPHA : return GL_ONE_MINUS_SRC1_ALPHA; - case CONST_COLOR : return GL_CONSTANT_COLOR; - case INV_CONST_COLOR : return GL_ONE_MINUS_CONSTANT_COLOR; - case CONST_ONE : return GL_ONE; - case CONST_ZERO : return GL_ZERO; - case OP_ADD : return GL_FUNC_ADD; - case OP_SUBTRACT : return GL_FUNC_SUBTRACT; - case OP_REV_SUBTRACT : return GL_FUNC_REVERSE_SUBTRACT; - default : ASSERT(0); return 0; + case SRC_COLOR : return GL_SRC_COLOR; + case INV_SRC_COLOR : return GL_ONE_MINUS_SRC_COLOR; + case DST_COLOR : return GL_DST_COLOR; + case INV_DST_COLOR : return GL_ONE_MINUS_DST_COLOR; + case SRC1_COLOR : return GL_SRC1_COLOR; + case INV_SRC1_COLOR : return GL_ONE_MINUS_SRC1_COLOR; + case SRC_ALPHA : return GL_SRC_ALPHA; + case INV_SRC_ALPHA : return GL_ONE_MINUS_SRC_ALPHA; + case DST_ALPHA : return GL_DST_ALPHA; + case INV_DST_ALPHA : return GL_ONE_MINUS_DST_ALPHA; + case SRC1_ALPHA : return GL_SRC1_ALPHA; + case INV_SRC1_ALPHA : return GL_ONE_MINUS_SRC1_ALPHA; + case CONST_COLOR : return GL_CONSTANT_COLOR; + case INV_CONST_COLOR : return GL_ONE_MINUS_CONSTANT_COLOR; + case CONST_ONE : return GL_ONE; + case CONST_ZERO : return GL_ZERO; + case OP_ADD : return GL_FUNC_ADD; + case OP_SUBTRACT : return GL_FUNC_SUBTRACT; + case OP_REV_SUBTRACT : return GL_FUNC_REVERSE_SUBTRACT; + default : ASSERT(0); return 0; } -} \ No newline at end of file +} diff --git a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h index 028dbc9f33..06319480b2 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h @@ -34,7 +34,8 @@ extern uint64 g_real_texture_upload_byte; extern uint64 g_vertex_upload_byte; #endif -class GSDepthStencilOGL { +class GSDepthStencilOGL +{ bool m_depth_enable; GLenum m_depth_func; bool m_depth_mask; @@ -44,8 +45,8 @@ class GSDepthStencilOGL { GLenum m_stencil_spass_dpass_op; public: - - GSDepthStencilOGL() : m_depth_enable(false) + GSDepthStencilOGL() + : m_depth_enable(false) , m_depth_func(GL_ALWAYS) , m_depth_mask(0) , m_stencil_enable(false) @@ -57,12 +58,21 @@ public: void EnableDepth() { m_depth_enable = true; } void EnableStencil() { m_stencil_enable = true; } - void SetDepth(GLenum func, bool mask) { m_depth_func = func; m_depth_mask = mask; } - void SetStencil(GLenum func, GLenum pass) { m_stencil_func = func; m_stencil_spass_dpass_op = pass; } + void SetDepth(GLenum func, bool mask) + { + m_depth_func = func; + m_depth_mask = mask; + } + void SetStencil(GLenum func, GLenum pass) + { + m_stencil_func = func; + m_stencil_spass_dpass_op = pass; + } void SetupDepth() { - if (GLState::depth != m_depth_enable) { + if (GLState::depth != m_depth_enable) + { GLState::depth = m_depth_enable; if (m_depth_enable) glEnable(GL_DEPTH_TEST); @@ -70,12 +80,15 @@ public: glDisable(GL_DEPTH_TEST); } - if (m_depth_enable) { - if (GLState::depth_func != m_depth_func) { + if (m_depth_enable) + { + if (GLState::depth_func != m_depth_func) + { GLState::depth_func = m_depth_func; glDepthFunc(m_depth_func); } - if (GLState::depth_mask != m_depth_mask) { + if (GLState::depth_mask != m_depth_mask) + { GLState::depth_mask = m_depth_mask; glDepthMask((GLboolean)m_depth_mask); } @@ -84,7 +97,8 @@ public: void SetupStencil() { - if (GLState::stencil != m_stencil_enable) { + if (GLState::stencil != m_stencil_enable) + { GLState::stencil = m_stencil_enable; if (m_stencil_enable) glEnable(GL_STENCIL_TEST); @@ -92,13 +106,16 @@ public: glDisable(GL_STENCIL_TEST); } - if (m_stencil_enable) { + if (m_stencil_enable) + { // Note: here the mask control which bitplane is considered by the operation - if (GLState::stencil_func != m_stencil_func) { + if (GLState::stencil_func != m_stencil_func) + { GLState::stencil_func = m_stencil_func; glStencilFunc(m_stencil_func, 1, 1); } - if (GLState::stencil_pass != m_stencil_spass_dpass_op) { + if (GLState::stencil_pass != m_stencil_spass_dpass_op) + { GLState::stencil_pass = m_stencil_spass_dpass_op; glStencilOp(GL_KEEP, GL_KEEP, m_stencil_spass_dpass_op); } @@ -133,7 +150,7 @@ public: GSVector4i* a = (GSVector4i*)this; GSVector4i* b = (GSVector4i*)cb; - if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2])).alltrue()) + if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2])).alltrue()) { a[0] = b[0]; a[1] = b[1]; @@ -152,17 +169,23 @@ public: { struct { - uint32 int_fst:1; - uint32 _free:31; + uint32 int_fst : 1; + uint32 _free : 31; }; uint32 key; }; - operator uint32() const {return key;} + operator uint32() const { return key; } - VSSelector() : key(0) {} - VSSelector(uint32 k) : key(k) {} + VSSelector() + : key(0) + { + } + VSSelector(uint32 k) + : key(k) + { + } }; struct GSSelector @@ -171,20 +194,26 @@ public: { struct { - uint32 sprite:1; - uint32 point:1; - uint32 line:1; + uint32 sprite : 1; + uint32 point : 1; + uint32 line : 1; - uint32 _free:29; + uint32 _free : 29; }; uint32 key; }; - operator uint32() const {return key;} + operator uint32() const { return key; } - GSSelector() : key(0) {} - GSSelector(uint32 k) : key(k) {} + GSSelector() + : key(0) + { + } + GSSelector(uint32 k) + : key(k) + { + } }; struct alignas(32) PSConstantBuffer @@ -263,74 +292,77 @@ public: { // *** Word 1 // Format - uint32 tex_fmt:4; - uint32 dfmt:2; - uint32 depth_fmt:2; + uint32 tex_fmt : 4; + uint32 dfmt : 2; + uint32 depth_fmt : 2; // Alpha extension/Correction - uint32 aem:1; - uint32 fba:1; + uint32 aem : 1; + uint32 fba : 1; // Fog - uint32 fog:1; + uint32 fog : 1; // Flat/goround shading - uint32 iip:1; + uint32 iip : 1; // Pixel test - uint32 date:3; - uint32 atst:3; + uint32 date : 3; + uint32 atst : 3; // Color sampling - uint32 fst:1; // Investigate to do it on the VS - uint32 tfx:3; - uint32 tcc:1; - uint32 wms:2; - uint32 wmt:2; - uint32 ltf:1; + uint32 fst : 1; // Investigate to do it on the VS + uint32 tfx : 3; + uint32 tcc : 1; + uint32 wms : 2; + uint32 wmt : 2; + uint32 ltf : 1; // Shuffle and fbmask effect - uint32 shuffle:1; - uint32 read_ba:1; - uint32 write_rg:1; - uint32 fbmask:1; + uint32 shuffle : 1; + uint32 read_ba : 1; + uint32 write_rg : 1; + uint32 fbmask : 1; //uint32 _free1:0; // *** Word 2 // Blend and Colclip - uint32 blend_a:2; - uint32 blend_b:2; - uint32 blend_c:2; - uint32 blend_d:2; - uint32 clr1:1; // useful? - uint32 hdr:1; - uint32 colclip:1; - uint32 pabe:1; + uint32 blend_a : 2; + uint32 blend_b : 2; + uint32 blend_c : 2; + uint32 blend_d : 2; + uint32 clr1 : 1; // useful? + uint32 hdr : 1; + uint32 colclip : 1; + uint32 pabe : 1; // Others ways to fetch the texture - uint32 channel:3; + uint32 channel : 3; // Dithering - uint32 dither:2; + uint32 dither : 2; // Depth clamp - uint32 zclamp:1; + uint32 zclamp : 1; // Hack - uint32 tcoffsethack:1; - uint32 urban_chaos_hle:1; - uint32 tales_of_abyss_hle:1; - uint32 tex_is_fb:1; // Jak Shadows - uint32 automatic_lod:1; - uint32 manual_lod:1; - uint32 point_sampler:1; - uint32 invalid_tex0:1; // Lupin the 3rd + uint32 tcoffsethack : 1; + uint32 urban_chaos_hle : 1; + uint32 tales_of_abyss_hle : 1; + uint32 tex_is_fb : 1; // Jak Shadows + uint32 automatic_lod : 1; + uint32 manual_lod : 1; + uint32 point_sampler : 1; + uint32 invalid_tex0 : 1; // Lupin the 3rd - uint32 _free2:6; + uint32 _free2 : 6; }; uint64 key; }; // FIXME is the & useful ? - operator uint64() const {return key;} + operator uint64() const { return key; } - PSSelector() : key(0) {} + PSSelector() + : key(0) + { + } }; struct PSSamplerSelector @@ -339,22 +371,28 @@ public: { struct { - uint32 tau:1; - uint32 tav:1; - uint32 biln:1; - uint32 triln:3; - uint32 aniso:1; + uint32 tau : 1; + uint32 tav : 1; + uint32 biln : 1; + uint32 triln : 3; + uint32 aniso : 1; - uint32 _free:25; + uint32 _free : 25; }; uint32 key; }; - operator uint32() {return key;} + operator uint32() { return key; } - PSSamplerSelector() : key(0) {} - PSSamplerSelector(uint32 k) : key(k) {} + PSSamplerSelector() + : key(0) + { + } + PSSamplerSelector(uint32 k) + : key(k) + { + } }; struct OMDepthStencilSelector @@ -363,22 +401,28 @@ public: { struct { - uint32 ztst:2; - uint32 zwe:1; - uint32 date:1; - uint32 date_one:1; + uint32 ztst : 2; + uint32 zwe : 1; + uint32 date : 1; + uint32 date_one : 1; - uint32 _free:27; + uint32 _free : 27; }; uint32 key; }; // FIXME is the & useful ? - operator uint32() {return key;} + operator uint32() { return key; } - OMDepthStencilSelector() : key(0) {} - OMDepthStencilSelector(uint32 k) : key(k) {} + OMDepthStencilSelector() + : key(0) + { + } + OMDepthStencilSelector(uint32 k) + : key(k) + { + } }; struct OMColorMaskSelector @@ -387,26 +431,29 @@ public: { struct { - uint32 wr:1; - uint32 wg:1; - uint32 wb:1; - uint32 wa:1; + uint32 wr : 1; + uint32 wg : 1; + uint32 wb : 1; + uint32 wa : 1; - uint32 _free:28; + uint32 _free : 28; }; struct { - uint32 wrgba:4; + uint32 wrgba : 4; }; uint32 key; }; // FIXME is the & useful ? - operator uint32() {return key & 0xf;} + operator uint32() { return key & 0xf; } - OMColorMaskSelector() : key(0xF) {} + OMColorMaskSelector() + : key(0xF) + { + } OMColorMaskSelector(uint32 c) { wrgba = c; } }; @@ -416,7 +463,7 @@ public: GSVector4i ChannelShuffle; GSVector4i EMOD_AC; - MiscConstantBuffer() {memset(this, 0, sizeof(*this));} + MiscConstantBuffer() { memset(this, 0, sizeof(*this)); } }; static int m_shader_inst; @@ -436,61 +483,69 @@ private: std::vector m_shader_tfx_vgs; std::vector m_shader_tfx_fs; - GLuint m_fbo; // frame buffer container - GLuint m_fbo_read; // frame buffer container only for reading + GLuint m_fbo; // frame buffer container + GLuint m_fbo_read; // frame buffer container only for reading - GSVertexBufferStateOGL* m_va;// state of the vertex buffer/array + GSVertexBufferStateOGL* m_va; // state of the vertex buffer/array - struct { - GLuint ps[2]; // program object - GSUniformBufferOGL* cb; // uniform buffer object + struct + { + GLuint ps[2]; // program object + GSUniformBufferOGL* cb; // uniform buffer object } m_merge_obj; - struct { - GLuint ps[4]; // program object - GSUniformBufferOGL* cb; // uniform buffer object + struct + { + GLuint ps[4]; // program object + GSUniformBufferOGL* cb; // uniform buffer object } m_interlace; - struct { - GLuint vs; // program object - GLuint ps[ShaderConvert_Count]; // program object - GLuint ln; // sampler object - GLuint pt; // sampler object + struct + { + GLuint vs; // program object + GLuint ps[ShaderConvert_Count]; // program object + GLuint ln; // sampler object + GLuint pt; // sampler object GSDepthStencilOGL* dss; GSDepthStencilOGL* dss_write; GSUniformBufferOGL* cb; } m_convert; - struct { + struct + { GLuint ps; - GSUniformBufferOGL *cb; + GSUniformBufferOGL* cb; } m_fxaa; - struct { + struct + { GLuint ps; GSUniformBufferOGL* cb; } m_shaderfx; - struct { + struct + { GSDepthStencilOGL* dss; GSTexture* t; } m_date; - struct { + struct + { GLuint ps; } m_shadeboost; - struct { + struct + { uint16 last_query; - GLuint timer_query[1<<16]; + GLuint timer_query[1 << 16]; GLuint timer() { return timer_query[last_query]; } } m_profiler; - GLuint m_vs[1<<1]; - GLuint m_gs[1<<3]; - GLuint m_ps_ss[1<<7]; - GSDepthStencilOGL* m_om_dss[1<<5]; + GLuint m_vs[1 << 1]; + GLuint m_gs[1 << 3]; + GLuint m_ps_ss[1 << 7]; + GSDepthStencilOGL* m_om_dss[1 << 5]; std::unordered_map m_ps; GLuint m_apitrace; @@ -530,9 +585,9 @@ public: void GenerateProfilerData(); // Used by OpenGL, so the same calling convention is required. - static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar *gl_message, const void* userParam); + static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar* gl_message, const void* userParam); - bool Create(const std::shared_ptr &wnd); + bool Create(const std::shared_ptr& wnd); bool Reset(int w, int h); void Flip(); void SetVSync(int vsync); diff --git a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp index 57657f6b1a..a6f2d5c234 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp @@ -51,18 +51,20 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy) GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - if (m_userhacks_wildhack && !m_isPackedUV_HackFlag && PRIM->TME && PRIM->FST) { - for(unsigned int i = 0; i < m_vertex.next; i++) + if (m_userhacks_wildhack && !m_isPackedUV_HackFlag && PRIM->TME && PRIM->FST) + { + for (unsigned int i = 0; i < m_vertex.next; i++) m_vertex.buff[i].UV &= 0x3FEF3FEF; } GLenum t = 0; const bool unscale_pt_ln = m_userHacks_enabled_unscale_ptln && (GetUpscaleMultiplier() != 1) && GLLoader::found_geometry_shader; - switch(m_vt.m_primclass) + switch (m_vt.m_primclass) { case GS_POINT_CLASS: - if (unscale_pt_ln) { + if (unscale_pt_ln) + { m_gs_sel.point = 1; vs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); } @@ -71,7 +73,8 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy) break; case GS_LINE_CLASS: - if (unscale_pt_ln) { + if (unscale_pt_ln) + { m_gs_sel.line = 1; vs_cb.PointSize = GSVector2(16.0f * sx, 16.0f * sy); } @@ -95,11 +98,14 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy) // the extra validation cost of the extra stage. // // Note: keep Geometry Shader in the replayer to ease debug. - if (GLLoader::found_geometry_shader && !m_vt.m_accurate_stq && (m_vertex.next > 32 || GLLoader::in_replayer)) { // <=> 16 sprites (based on Shadow Hearts) + if (GLLoader::found_geometry_shader && !m_vt.m_accurate_stq && (m_vertex.next > 32 || GLLoader::in_replayer)) // <=> 16 sprites (based on Shadow Hearts) + { m_gs_sel.sprite = 1; t = GL_LINES; - } else { + } + else + { Lines2Sprites(); t = GL_TRIANGLES; @@ -121,10 +127,13 @@ void GSRendererOGL::SetupIA(const float& sx, const float& sy) void GSRendererOGL::EmulateZbuffer() { - if (m_context->TEST.ZTE) { + if (m_context->TEST.ZTE) + { m_om_dssel.ztst = m_context->TEST.ZTST; m_om_dssel.zwe = !m_context->ZBUF.ZMSK; - } else { + } + else + { m_om_dssel.ztst = ZTST_ALWAYS; } @@ -137,10 +146,14 @@ void GSRendererOGL::EmulateZbuffer() //ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f); m_ps_sel.zclamp = 0; - if (clamp_z) { - if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS) { + if (clamp_z) + { + if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS) + { vs_cb.MaxDepth = GSVector2i(max_z); - } else { + } + else + { ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, max_z * ldexpf(1, -32)); m_ps_sel.zclamp = 1; } @@ -148,7 +161,8 @@ void GSRendererOGL::EmulateZbuffer() GSVertex* v = &m_vertex.buff[0]; // Minor optimization of a corner case (it allow to better emulate some alpha test effects) - if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z) { + if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z) + { GL_DBG("Optimize Z test GEQUAL to ALWAYS (%s)", psm_str(m_context->ZBUF.PSM)); m_om_dssel.ztst = ZTST_ALWAYS; } @@ -159,7 +173,8 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask() // Uncomment to disable texture shuffle emulation. // m_texture_shuffle = false; - if (m_texture_shuffle) { + if (m_texture_shuffle) + { m_ps_sel.shuffle = 1; m_ps_sel.dfmt = 0; @@ -185,11 +200,15 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask() m_om_csel.wrgba = 0; // 2 Select the new mask (Please someone put SSE here) - if (rg_mask != 0xFF) { - if (write_ba) { + if (rg_mask != 0xFF) + { + if (write_ba) + { GL_INS("Color shuffle %s => B", read_ba ? "B" : "R"); m_om_csel.wb = 1; - } else { + } + else + { GL_INS("Color shuffle %s => R", read_ba ? "B" : "R"); m_om_csel.wr = 1; } @@ -197,11 +216,15 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask() m_ps_sel.fbmask = 1; } - if (ba_mask != 0xFF) { - if (write_ba) { + if (ba_mask != 0xFF) + { + if (write_ba) + { GL_INS("Color shuffle %s => A", read_ba ? "A" : "G"); m_om_csel.wa = 1; - } else { + } + else + { GL_INS("Color shuffle %s => G", read_ba ? "A" : "G"); m_om_csel.wg = 1; } @@ -209,25 +232,32 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask() m_ps_sel.fbmask = 1; } - if (m_ps_sel.fbmask && m_sw_blending) { + if (m_ps_sel.fbmask && m_sw_blending) + { ps_cb.FbMask.r = rg_mask; ps_cb.FbMask.g = rg_mask; ps_cb.FbMask.b = ba_mask; ps_cb.FbMask.a = ba_mask; // No blending so hit unsafe path. - if (!PRIM->ABE) { + if (!PRIM->ABE) + { GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask); m_require_one_barrier = true; - } else { + } + else + { GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); m_require_full_barrier = true; } - } else { + } + else + { m_ps_sel.fbmask = 0; } - - } else { + } + else + { m_ps_sel.dfmt = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt; GSVector4i fbmask_v = GSVector4i::load((int)m_context->FRAME.FBMSK); @@ -238,7 +268,8 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask() m_ps_sel.fbmask = m_sw_blending && (~ff_fbmask & ~zero_fbmask & 0xF); - if (m_ps_sel.fbmask) { + if (m_ps_sel.fbmask) + { ps_cb.FbMask = fbmask_v.u8to32(); // Only alpha is special here, I think we can take a very unsafe shortcut // Alpha isn't blended on the GS but directly copyied into the RT. @@ -260,14 +291,17 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask() have been invalidated before subsequent Draws are executed. */ // No blending so hit unsafe path. - if (!PRIM->ABE || !(~ff_fbmask & ~zero_fbmask & 0x7)) { + if (!PRIM->ABE || !(~ff_fbmask & ~zero_fbmask & 0x7)) + { GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, - (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); + (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); m_require_one_barrier = true; - } else { + } + else + { // The safe and accurate path (but slow) GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, - (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); + (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); m_require_full_barrier = true; } } @@ -276,20 +310,25 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask() void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex) { - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; + GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; // Uncomment to disable HLE emulation (allow to trace the draw call) // m_channel_shuffle = false; // First let's check we really have a channel shuffle effect - if (m_channel_shuffle) { - if (m_game.title == CRC::GT4 || m_game.title == CRC::GT3 || m_game.title == CRC::GTConcept || m_game.title == CRC::TouristTrophy) { + if (m_channel_shuffle) + { + if (m_game.title == CRC::GT4 || m_game.title == CRC::GT3 || m_game.title == CRC::GTConcept || m_game.title == CRC::TouristTrophy) + { GL_INS("Gran Turismo RGB Channel"); m_ps_sel.channel = ChannelFetch_RGB; m_context->TEX0.TFX = TFX_DECAL; *rt = tex->m_from_target; - } else if (m_game.title == CRC::Tekken5) { - if (m_context->FRAME.FBW == 1) { + } + else if (m_game.title == CRC::Tekken5) + { + if (m_context->FRAME.FBW == 1) + { // Used in stages: Secret Garden, Acid Rain, Moonlit Wilderness GL_INS("Tekken5 RGB Channel"); m_ps_sel.channel = ChannelFetch_RGB; @@ -298,23 +337,32 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:: // Minus current draw call m_skip = 12 * (3 + 3 + 1) - 1; *rt = tex->m_from_target; - } else { + } + else + { // Could skip model drawing if wrongly detected m_channel_shuffle = false; } - } else if ((tex->m_texture->GetType() == GSTexture::DepthStencil) && !(tex->m_32_bits_fmt)) { + } + else if ((tex->m_texture->GetType() == GSTexture::DepthStencil) && !(tex->m_32_bits_fmt)) + { // So far 2 games hit this code path. Urban Chaos and Tales of Abyss // UC: will copy depth to green channel // ToA: will copy depth to alpha channel - if ((m_context->FRAME.FBMSK & 0xFF0000) == 0xFF0000) { + if ((m_context->FRAME.FBMSK & 0xFF0000) == 0xFF0000) + { // Green channel is masked GL_INS("Tales Of Abyss Crazyness (MSB 16b depth to Alpha)"); m_ps_sel.tales_of_abyss_hle = 1; - } else { + } + else + { GL_INS("Urban Chaos Crazyness (Green extraction)"); m_ps_sel.urban_chaos_hle = 1; } - } else if (m_index.tail <= 64 && m_context->CLAMP.WMT == 3) { + } + else if (m_index.tail <= 64 && m_context->CLAMP.WMT == 3) + { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So // let's disable channel when the signature is different @@ -323,23 +371,29 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:: // handled above. GL_INS("Maybe not a channel!"); m_channel_shuffle = false; - } else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MAXU & 0x8) == 8)) { + } + else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MAXU & 0x8) == 8)) + { // Read either blue or Alpha. Let's go for Blue ;) // MGS3/Kill Zone GL_INS("Blue channel"); m_ps_sel.channel = ChannelFetch_BLUE; - } else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MINU & 0x8) == 0)) { + } + else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MINU & 0x8) == 0)) + { // Read either Red or Green. Let's check the V coordinate. 0-1 is likely top so // red. 2-3 is likely bottom so green (actually depends on texture base pointer offset) bool green = PRIM->FST && (m_vertex.buff[0].V & 32); - if (green && (m_context->FRAME.FBMSK & 0x00FFFFFF) == 0x00FFFFFF) { + if (green && (m_context->FRAME.FBMSK & 0x00FFFFFF) == 0x00FFFFFF) + { // Typically used in Terminator 3 - int blue_mask = m_context->FRAME.FBMSK >> 24; + int blue_mask = m_context->FRAME.FBMSK >> 24; int green_mask = ~blue_mask & 0xFF; int blue_shift = -1; // Note: potentially we could also check the value of the clut - switch (m_context->FRAME.FBMSK >> 24) { + switch (m_context->FRAME.FBMSK >> 24) + { case 0xFF: ASSERT(0); break; case 0xFE: blue_shift = 1; break; case 0xFC: blue_shift = 2; break; @@ -354,31 +408,40 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:: int green_shift = 8 - blue_shift; dev->SetupCBMisc(GSVector4i(blue_mask, blue_shift, green_mask, green_shift)); - if (blue_shift >= 0) { + if (blue_shift >= 0) + { GL_INS("Green/Blue channel (%d, %d)", blue_shift, green_shift); m_ps_sel.channel = ChannelFetch_GXBY; m_context->FRAME.FBMSK = 0x00FFFFFF; - } else { + } + else + { GL_INS("Green channel (wrong mask) (fbmask %x)", m_context->FRAME.FBMSK >> 24); m_ps_sel.channel = ChannelFetch_GREEN; } - - } else if (green) { + } + else if (green) + { GL_INS("Green channel"); m_ps_sel.channel = ChannelFetch_GREEN; - } else { + } + else + { // Pop GL_INS("Red channel"); m_ps_sel.channel = ChannelFetch_RED; } - } else { + } + else + { GL_INS("Channel not supported"); m_channel_shuffle = false; } } // Effect is really a channel shuffle effect so let's cheat a little - if (m_channel_shuffle) { + if (m_channel_shuffle) + { dev->PSSetShaderResource(4, tex->m_from_target); m_require_one_barrier = true; @@ -395,8 +458,9 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:: m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; - - } else { + } + else + { #ifdef ENABLE_OGL_DEBUG dev->PSSetShaderResource(4, NULL); #endif @@ -405,24 +469,26 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:: void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) { - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; + GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; const GIFRegALPHA& ALPHA = m_context->ALPHA; - bool sw_blending = false; + bool sw_blending = false; // No blending so early exit - if (!(PRIM->ABE || m_env.PABE.PABE || (PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS))) { + if (!(PRIM->ABE || m_env.PABE.PABE || (PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS))) + { dev->OMSetBlendState(); return; } - if (m_env.PABE.PABE) { + if (m_env.PABE.PABE) + { // Breath of Fire Dragon Quarter, Strawberry Shortcake, Super Robot Wars, Cartoon Network Racing. GL_INS("PABE mode ENABLED"); m_ps_sel.pabe = 1; } // Compute the blending equation to detect special case - const uint8 blend_index = uint8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D); + const uint8 blend_index = uint8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D); const int blend_flag = m_dev->GetBlendFlags(blend_index); // SW Blend is (nearly) free. Let's use it. @@ -438,12 +504,14 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) // Warning no break on purpose // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. - switch (m_sw_blending) { + switch (m_sw_blending) + { case ACC_BLEND_ULTRA: sw_blending |= true; [[fallthrough]]; case ACC_BLEND_FULL: - if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) GetAlphaMinMax(); + if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) + GetAlphaMinMax(); sw_blending |= (ALPHA.A != ALPHA.B) && ((ALPHA.C == 0 && m_vt.m_alpha.max > 128) || (ALPHA.C == 2 && ALPHA.FIX > 128u)); [[fallthrough]]; case ACC_BLEND_HIGH: @@ -463,29 +531,37 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) } // Color clip - if (m_env.COLCLAMP.CLAMP == 0) { + if (m_env.COLCLAMP.CLAMP == 0) + { // Safe FBMASK, avoid hitting accumulation mode on 16bit, // fixes shadows in Superman shadows of Apokolips. const bool sw_fbmask_colclip = !m_require_one_barrier && m_ps_sel.fbmask; const bool free_colclip = m_prim_overlap == PRIM_OVERLAP_NO || blend_non_recursive || sw_fbmask_colclip; GL_DBG("COLCLIP Info (Blending: %d/%d/%d/%d, SW FBMASK: %d, OVERLAP: %d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, sw_fbmask_colclip, m_prim_overlap); - if (free_colclip) { + if (free_colclip) + { // The fastest algo that requires a single pass GL_INS("COLCLIP Free mode ENABLED"); m_ps_sel.colclip = 1; sw_blending = true; accumulation_blend = false; // disable the HDR algo - } else if (accumulation_blend) { + } + else if (accumulation_blend) + { // A fast algo that requires 2 passes GL_INS("COLCLIP Fast HDR mode ENABLED"); m_ps_sel.hdr = 1; - sw_blending = true; // Enable sw blending for the HDR algo - } else if (sw_blending) { + sw_blending = true; // Enable sw blending for the HDR algo + } + else if (sw_blending) + { // A slow algo that could requires several passes (barely used) GL_INS("COLCLIP SW mode ENABLED"); m_ps_sel.colclip = 1; - } else { + } + else + { GL_INS("COLCLIP HDR mode ENABLED"); m_ps_sel.hdr = 1; } @@ -497,7 +573,8 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) // Switch DATE_GL42 with DATE_GL45 in such cases to ensure accuracy. // No mix of COLCLIP + sw blend + DATE_GL42, neither sw fbmask + DATE_GL42. // Note: Do the swap after colclip to avoid adding extra conditions. - if (sw_blending && DATE_GL42) { + if (sw_blending && DATE_GL42) + { GL_PERF("DATE: Swap DATE_GL42 with DATE_GL45"); m_require_full_barrier = true; DATE_GL42 = false; @@ -507,18 +584,21 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) // For stat to optimize accurate option #if 0 GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (drawlist %d) (sw %d)", - ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending); + ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending); #endif - if (sw_blending) { + if (sw_blending) + { m_ps_sel.blend_a = ALPHA.A; m_ps_sel.blend_b = ALPHA.B; m_ps_sel.blend_c = ALPHA.C; m_ps_sel.blend_d = ALPHA.D; - if (accumulation_blend) { + if (accumulation_blend) + { // Keep HW blending to do the addition/subtraction dev->OMSetBlendState(blend_index, 0, false, true); - if (ALPHA.A == 2) { + if (ALPHA.A == 2) + { // The blend unit does a reverse subtraction so it means // the shader must output a positive value. // Replace 0 - Cs by Cs - 0 @@ -529,8 +609,9 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) m_ps_sel.blend_d = 2; // Note accumulation_blend doesn't require a barrier - - } else { + } + else + { // Disable HW blending dev->OMSetBlendState(); @@ -538,16 +619,22 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) } // Require the fix alpha vlaue - if (ALPHA.C == 2) { + if (ALPHA.C == 2) + { ps_cb.TA_Af.a = (float)ALPHA.FIX / 128.0f; } - } else { + } + else + { m_ps_sel.clr1 = !!(blend_flag & BLEND_C_CLR); - if (m_ps_sel.dfmt == 1 && ALPHA.C == 1) { + if (m_ps_sel.dfmt == 1 && ALPHA.C == 1) + { // 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent - const uint8 hacked_blend_index = blend_index + 3; // +3 <=> +1 on C + const uint8 hacked_blend_index = blend_index + 3; // +3 <=> +1 on C dev->OMSetBlendState(hacked_blend_index, 128, true); - } else { + } + else + { dev->OMSetBlendState(blend_index, ALPHA.FIX, (ALPHA.C == 2)); } } @@ -555,12 +642,12 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45) void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) { - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; + GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. //const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; - const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; - const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; + const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; const uint8 wms = m_context->CLAMP.WMS; const uint8 wmt = m_context->CLAMP.WMT; @@ -581,7 +668,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) break; case TriFiltering::PS2: - if (need_mipmap && m_mipmap != 2) { + if (need_mipmap && m_mipmap != 2) + { trilinear = m_context->TEX1.MMIN; trilinear_auto = true; } @@ -603,14 +691,16 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) // Performance note: // 1/ Don't set 0 as it is the default value // 2/ Only keep aem when it is useful (avoid useless shader permutation) - if (m_ps_sel.shuffle) { + if (m_ps_sel.shuffle) + { // Force a 32 bits access (normally shuffle is done on 16 bits) // m_ps_sel.tex_fmt = 0; // removed as an optimization - m_ps_sel.aem = m_env.TEXA.AEM; + m_ps_sel.aem = m_env.TEXA.AEM; ASSERT(tex->m_target); // Require a float conversion if the texure is a depth otherwise uses Integral scaling - if (psm.depth) { + if (psm.depth) + { m_ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : 1; m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate } @@ -626,17 +716,19 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) bilinear &= m_vt.IsLinear(); vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex); - - } else if (tex->m_target) { + } + else if (tex->m_target) + { // Use an old target. AEM and index aren't resolved it must be done // on the GPU // Select the 32/24/16 bits color (AEM) m_ps_sel.tex_fmt = cpsm.fmt; - m_ps_sel.aem = m_env.TEXA.AEM; + m_ps_sel.aem = m_env.TEXA.AEM; // Don't upload AEM if format is 32 bits - if (cpsm.fmt) { + if (cpsm.fmt) + { GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); ta /= 255.0f; // FIXME rely on compiler for the optimization @@ -645,7 +737,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) } // Select the index format - if (tex->m_palette) { + if (tex->m_palette) + { // FIXME Potentially improve fmt field in GSLocalMemory if (m_context->TEX0.PSM == PSM_PSMT4HL) m_ps_sel.tex_fmt |= 1 << 2; @@ -661,14 +754,17 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) } // Depth format - if (tex->m_texture->GetType() == GSTexture::DepthStencil) { + if (tex->m_texture->GetType() == GSTexture::DepthStencil) + { // Require a float conversion if the texure is a depth format m_ps_sel.depth_fmt = (psm.bpp == 16) ? 2 : 1; m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate // Don't force interpolation on depth format bilinear &= m_vt.IsLinear(); - } else if (psm.depth) { + } + else if (psm.depth) + { // Use Integral scaling m_ps_sel.depth_fmt = 3; m_vs_sel.int_fst = !PRIM->FST; // select float/int coordinate @@ -678,25 +774,30 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) } vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex); - - } else if (tex->m_palette) { + } + else if (tex->m_palette) + { // Use a standard 8 bits texture. AEM is already done on the CLUT // Therefore you only need to set the index // m_ps_sel.aem = 0; // removed as an optimization // Note 4 bits indexes are converted to 8 bits m_ps_sel.tex_fmt = 3 << 2; - - } else { + } + else + { // Standard texture. Both index and AEM expansion were already done by the CPU. // m_ps_sel.tex_fmt = 0; // removed as an optimization // m_ps_sel.aem = 0; // removed as an optimization } - if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { + if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) + { // Micro optimization that reduces GPU load (removes 5 instructions on the FS program) m_ps_sel.tfx = TFX_DECAL; - } else { + } + else + { m_ps_sel.tfx = m_context->TEX0.TFX; } @@ -717,25 +818,31 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) ps_cb.WH = WH; ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); - if (complex_wms_wmt) { + if (complex_wms_wmt) + { ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); ps_cb.MinMax = GSVector4(ps_cb.MskFix) / WH.xyxy(); - } else if (trilinear_manual) { + } + else if (trilinear_manual) + { // Reuse MinMax for mipmap parameter to avoid an extension of the UBO ps_cb.MinMax.x = (float)m_context->TEX1.K / 16.0f; ps_cb.MinMax.y = float(1 << m_context->TEX1.L); ps_cb.MinMax.z = float(m_lod.x); // Offset because first layer is m_lod, dunno if we can do better ps_cb.MinMax.w = float(m_lod.y); - } else if (trilinear_auto) { + } + else if (trilinear_auto) + { tex->m_texture->GenerateMipmap(); } // TC Offset Hack m_ps_sel.tcoffsethack = m_userhacks_tcoffset; - ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, m_userhacks_tcoffset_x, m_userhacks_tcoffset_y) / WH.xyxy(); + ps_cb.TC_OH_TS = GSVector4(1 / 16.0f, 1 / 16.0f, m_userhacks_tcoffset_x, m_userhacks_tcoffset_y) / WH.xyxy(); // Must be done after all coordinates math - if (m_context->HasFixedTEX0() && !PRIM->FST) { + if (m_context->HasFixedTEX0() && !PRIM->FST) + { m_ps_sel.invalid_tex0 = 1; // Use invalid size to denormalize ST coordinate ps_cb.WH.x = (float)(1 << m_context->stack.TEX0.TW); @@ -746,22 +853,28 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) } // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader - m_ps_ssel.tau = (wms != CLAMP_CLAMP); - m_ps_ssel.tav = (wmt != CLAMP_CLAMP); - if (shader_emulated_sampler) { - m_ps_ssel.biln = 0; + m_ps_ssel.tau = (wms != CLAMP_CLAMP); + m_ps_ssel.tav = (wmt != CLAMP_CLAMP); + if (shader_emulated_sampler) + { + m_ps_ssel.biln = 0; m_ps_ssel.aniso = 0; m_ps_ssel.triln = 0; - } else { - m_ps_ssel.biln = bilinear; + } + else + { + m_ps_ssel.biln = bilinear; // Aniso filtering doesn't work with textureLod so use texture (automatic_lod) instead. // Enable aniso only for triangles. Sprites are flat so aniso is likely useless (it would save perf for others primitives). const bool anisotropic = m_vt.m_primclass == GS_TRIANGLE_CLASS && !trilinear_manual; m_ps_ssel.aniso = anisotropic; m_ps_ssel.triln = trilinear; - if (trilinear_manual) { + if (trilinear_manual) + { m_ps_sel.manual_lod = 1; - } else if (trilinear_auto || anisotropic) { + } + else if (trilinear_auto || anisotropic) + { m_ps_sel.automatic_lod = 1; } } @@ -788,7 +901,8 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() m_drawlist.clear(); size_t i = 0; - while (i < count) { + while (i < count) + { // In order to speed up comparison a bounding-box is accumulated. It removes a // loop so code is much faster (check game virtua fighter). Besides it allow to check // properly the Y order. @@ -797,12 +911,13 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() // .y = min(v[i].XYZ.Y, v[i+1].XYZ.Y) // .z = max(v[i].XYZ.X, v[i+1].XYZ.X) // .w = max(v[i].XYZ.Y, v[i+1].XYZ.Y) - GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i+1].m[1])).upl16().xzyw(); + GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i + 1].m[1])).upl16().xzyw(); all = all.xyxy().blend(all.zwzw(), all > all.zwxy()); size_t j = i + 2; - while (j < count) { - GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j+1].m[1])).upl16().xzyw(); + while (j < count) + { + GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw(); sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy()); // Be sure to get vertex in good order, otherwise .r* function doesn't @@ -812,9 +927,12 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() ASSERT(all.x <= all.z); ASSERT(all.y <= all.w); - if (all.rintersect(sprite).rempty()) { + if (all.rintersect(sprite).rempty()) + { all = all.runion_ordered(sprite); - } else { + } + else + { overlap = PRIM_OVERLAP_YES; break; } @@ -873,18 +991,25 @@ void GSRendererOGL::SendDraw() { GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - if (!m_require_full_barrier && m_require_one_barrier) { + if (!m_require_full_barrier && m_require_one_barrier) + { // Need only a single barrier glTextureBarrier(); dev->DrawIndexedPrimitive(); - } else if (!m_require_full_barrier) { + } + else if (!m_require_full_barrier) + { // Don't need any barrier dev->DrawIndexedPrimitive(); - } else if (m_prim_overlap == PRIM_OVERLAP_NO) { + } + else if (m_prim_overlap == PRIM_OVERLAP_NO) + { // Need full barrier but a single barrier will be enough glTextureBarrier(); dev->DrawIndexedPrimitive(); - } else if (m_vt.m_primclass == GS_SPRITE_CLASS) { + } + else if (m_vt.m_primclass == GS_SPRITE_CLASS) + { size_t nb_vertex = (m_gs_sel.sprite == 1) ? 2 : 6; GL_PUSH("Split the draw (SPRITE)"); @@ -892,32 +1017,36 @@ void GSRendererOGL::SendDraw() #if defined(_DEBUG) // Check how draw call is split. std::map frequency; - for (const auto& it: m_drawlist) + for (const auto& it : m_drawlist) ++frequency[it]; std::string message; - for (const auto& it: frequency) + for (const auto& it : frequency) message += " " + std::to_string(it.first) + "(" + std::to_string(it.second) + ")"; GL_PERF("Split single draw (%d sprites) into %zu draws: consecutive draws(frequency):%s", - m_index.tail / nb_vertex, m_drawlist.size(), message.c_str()); + m_index.tail / nb_vertex, m_drawlist.size(), message.c_str()); #endif - for (size_t count, p = 0, n = 0; n < m_drawlist.size(); p += count, ++n) { + for (size_t count, p = 0, n = 0; n < m_drawlist.size(); p += count, ++n) + { count = m_drawlist[n] * nb_vertex; glTextureBarrier(); dev->DrawIndexedPrimitive(p, count); } - } else { + } + else + { // FIXME: Investigate: a dynamic check to pack as many primitives as possibles // I'm nearly sure GSdx already have this kind of code (maybe we can adapt GSDirtyRect) size_t nb_vertex = GSUtil::GetClassVertexCount(m_vt.m_primclass); GL_PUSH("Split the draw"); - GL_PERF("Split single draw in %d draw", m_index.tail/nb_vertex); + GL_PERF("Split single draw in %d draw", m_index.tail / nb_vertex); - for (size_t p = 0; p < m_index.tail; p += nb_vertex) { + for (size_t p = 0; p < m_index.tail; p += nb_vertex) + { glTextureBarrier(); dev->DrawIndexedPrimitive(p, nb_vertex); } @@ -945,11 +1074,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSVector4i area_in = GSVector4i(m_vt.m_min.t.xyxy(m_vt.m_max.t)); GL_PUSH("GL Draw from %d (area %d,%d => %d,%d) in %d (Depth %d) (area %d,%d => %d,%d)", - tex && tex->m_texture ? tex->m_texture->GetID() : -1, - area_in.x, area_in.y, area_in.z, area_in.w, - rt ? rt->GetID() : -1, ds ? ds->GetID() : -1, - area_out.x, area_out.y, area_out.z, area_out.w - ); + tex && tex->m_texture ? tex->m_texture->GetID() : -1, + area_in.x, area_in.y, area_in.z, area_in.w, + rt ? rt->GetID() : -1, ds ? ds->GetID() : -1, + area_out.x, area_out.y, area_out.z, area_out.w); #endif GSTexture* hdr_rt = NULL; @@ -985,15 +1113,19 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour m_prim_overlap = PrimitiveOverlap(); // Detect framebuffer read that will need special handling - if ((m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && m_sw_blending) { - if ((m_context->FRAME.FBMSK == 0x00FFFFFF) && (m_vt.m_primclass == GS_TRIANGLE_CLASS)) { + if ((m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && m_sw_blending) + { + if ((m_context->FRAME.FBMSK == 0x00FFFFFF) && (m_vt.m_primclass == GS_TRIANGLE_CLASS)) + { // This pattern is used by several games to emulate a stencil (shadow) // Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1 // Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1 GL_DBG("Source and Target are the same! Let's sample the framebuffer"); m_ps_sel.tex_is_fb = 1; m_require_full_barrier = true; - } else if (m_prim_overlap != PRIM_OVERLAP_NO) { + } + else if (m_prim_overlap != PRIM_OVERLAP_NO) + { // Note: It is fine if the texture fits in a single GS page. First access will cache // the page in the GS texture buffer. GL_INS("ERROR: Source and Target are the same!"); @@ -1069,33 +1201,43 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Blend - if (!IsOpaque() && rt) { + if (!IsOpaque() && rt) + { EmulateBlending(DATE_GL42, DATE_GL45); - } else { + } + else + { dev->OMSetBlendState(); // No blending please } - if (m_ps_sel.dfmt == 1) { + if (m_ps_sel.dfmt == 1) + { // Disable writing of the alpha channel m_om_csel.wa = 0; } // DATE setup, no DATE_GL45 please - if (DATE && !DATE_GL45) { + if (DATE && !DATE_GL45) + { GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize); // Reduce the quantity of clean function - glScissor( dRect.x, dRect.y, dRect.width(), dRect.height() ); + glScissor(dRect.x, dRect.y, dRect.width(), dRect.height()); GLState::scissor = dRect; // Must be done here to avoid any GL state pertubation (clear function...) // Create an r32ui image that will containt primitive ID - if (DATE_GL42) { + if (DATE_GL42) + { dev->InitPrimDateTexture(rt, dRect); - } else if (DATE_one) { + } + else if (DATE_one) + { dev->ClearStencil(ds, 1); - } else { + } + else + { GSVector4 src = GSVector4(dRect) / GSVector4(rtsize.x, rtsize.y).xyxy(); GSVector4 dst = src * 2.0f - 1.0f; @@ -1147,14 +1289,19 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // GS_SPRITE_CLASS are already flat (either by CPU or the GS) m_ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP; - if (DATE_GL45) { + if (DATE_GL45) + { m_ps_sel.date = 5 + m_context->TEST.DATM; - } else if (DATE_one) { + } + else if (DATE_one) + { m_require_one_barrier = true; - m_ps_sel.date = 5 + m_context->TEST.DATM; - m_om_dssel.date = 1; + m_ps_sel.date = 5 + m_context->TEST.DATM; + m_om_dssel.date = 1; m_om_dssel.date_one = 1; - } else if (DATE) { + } + else if (DATE) + { if (DATE_GL42) m_ps_sel.date = 1 + m_context->TEST.DATM; else @@ -1191,7 +1338,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool ate_RGBA_then_Z = false; bool ate_RGB_then_ZA = false; uint8 ps_atst = 0; - if (ate_first_pass & ate_second_pass) { + if (ate_first_pass & ate_second_pass) + { GL_DBG("Complex Alpha Test"); const bool commutative_depth = (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z) || (m_om_dssel.ztst == ZTST_ALWAYS); const bool commutative_alpha = (m_context->ALPHA.C != 1); // when either Alpha Src or a constant @@ -1200,38 +1348,48 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ate_RGB_then_ZA = (m_context->TEST.AFAIL == AFAIL_RGB_ONLY) & commutative_depth & commutative_alpha; } - if (ate_RGBA_then_Z) { + if (ate_RGBA_then_Z) + { GL_DBG("Alternate ATE handling: ate_RGBA_then_Z"); // Render all color but don't update depth // ATE is disabled here m_om_dssel.zwe = false; - } else if (ate_RGB_then_ZA) { + } + else if (ate_RGB_then_ZA) + { GL_DBG("Alternate ATE handling: ate_RGB_then_ZA"); // Render RGB color but don't update depth/alpha // ATE is disabled here m_om_dssel.zwe = false; m_om_csel.wa = false; - } else { + } + else + { EmulateAtst(ps_cb.FogColor_AREF, ps_atst, false); m_ps_sel.atst = ps_atst; } - if (tex) { + if (tex) + { EmulateTextureSampler(tex); - } else { + } + else + { m_ps_sel.tfx = 4; } // Always bind the RT. This way special effect can use it. dev->PSSetShaderResource(3, rt); - if (m_game.title == CRC::ICO) { + if (m_game.title == CRC::ICO) + { GSVertex* v = &m_vertex.buff[0]; const GSVideoMode mode = GetVideoMode(); if (tex && m_vt.m_primclass == GS_SPRITE_CLASS && m_vertex.next == 2 && PRIM->ABE && // Blend texture - ((v[1].U == 8200 && v[1].V == 7176 && mode == GSVideoMode::NTSC) || // at display resolution 512x448 - (v[1].U == 8200 && v[1].V == 8200 && mode == GSVideoMode::PAL)) && // at display resolution 512x512 - tex->m_TEX0.PSM == PSM_PSMT8H) { // i.e. read the alpha channel of a 32 bits texture + ((v[1].U == 8200 && v[1].V == 7176 && mode == GSVideoMode::NTSC) || // at display resolution 512x448 + (v[1].U == 8200 && v[1].V == 8200 && mode == GSVideoMode::PAL)) && // at display resolution 512x512 + tex->m_TEX0.PSM == PSM_PSMT8H) // i.e. read the alpha channel of a 32 bits texture + { // Note potentially we can limit to TBP0:0x2800 // Depth buffer was moved so GSdx will invalide it which means a @@ -1248,7 +1406,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->PSSetShaderResource(4, ds); // We need the palette to convert the depth to the correct alpha value. - if (!tex->m_palette) { + if (!tex->m_palette) + { uint16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal; m_tc->AttachPaletteToSource(tex, pal, true); dev->PSSetShaderResource(1, tex->m_palette); @@ -1277,7 +1436,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if (ds) ds->CommitRegion(GSVector2i(commitRect.z, commitRect.w)); - if (DATE_GL42) { + if (DATE_GL42) + { GL_PUSH("Date GL42"); // It could be good idea to use stencil in the same time. // Early stencil test will reduce the number of atomic-load operation @@ -1306,13 +1466,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); } - if (m_ps_sel.hdr) { + if (m_ps_sel.hdr) + { hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA32F); dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false); dev->OMSetRenderTargets(hdr_rt, ds, &scissor); - } else { + } + else + { dev->OMSetRenderTargets(rt, ds, &scissor); } @@ -1351,7 +1514,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool b = m_om_csel.wb; bool a = m_om_csel.wa; - switch(m_context->TEST.AFAIL) + switch (m_context->TEST.AFAIL) { case AFAIL_KEEP: z = r = g = b = a = false; break; // none case AFAIL_FB_ONLY: z = false; break; // rgba @@ -1362,10 +1525,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Depth test should be disabled when depth writes are masked and similarly, Alpha test must be disabled // when writes to all of the alpha bits in the Framebuffer are masked. - if (ate_RGBA_then_Z) { + if (ate_RGBA_then_Z) + { z = !m_context->ZBUF.ZMSK; r = g = b = a = false; - } else if (ate_RGB_then_ZA) { + } + else if (ate_RGB_then_ZA) + { z = !m_context->ZBUF.ZMSK; a = (m_context->FRAME.FBMSK & 0xFF000000) != 0xFF000000; r = g = b = false; @@ -1386,7 +1552,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } - if (DATE_GL42) { + if (DATE_GL42) + { dev->RecycleDateTexture(); } @@ -1394,7 +1561,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Warning: EndScene must be called before StretchRect otherwise // vertices will be overwritten. Trust me you don't want to do that. - if (hdr_rt) { + if (hdr_rt) + { GSVector4 dRect(ComputeBoundingBox(rtscale, rtsize)); GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); dev->StretchRect(hdr_rt, sRect, rt, dRect, ShaderConvert_MOD_256, false); diff --git a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.h b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.h index b7c77203f2..7010f74dc6 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.h @@ -27,13 +27,15 @@ class GSRendererOGL final : public GSRendererHW { - enum PRIM_OVERLAP { + enum PRIM_OVERLAP + { PRIM_OVERLAP_UNKNOW, PRIM_OVERLAP_YES, PRIM_OVERLAP_NO }; - enum ACC_BLEND { + enum ACC_BLEND + { ACC_BLEND_NONE = 0, ACC_BLEND_BASIC = 1, ACC_BLEND_MEDIUM = 2, @@ -42,46 +44,46 @@ class GSRendererOGL final : public GSRendererHW ACC_BLEND_ULTRA = 5 }; - private: - PRIM_OVERLAP m_prim_overlap; - std::vector m_drawlist; +private: + PRIM_OVERLAP m_prim_overlap; + std::vector m_drawlist; - TriFiltering UserHacks_tri_filter; + TriFiltering UserHacks_tri_filter; - GSDeviceOGL::VSConstantBuffer vs_cb; - GSDeviceOGL::PSConstantBuffer ps_cb; + GSDeviceOGL::VSConstantBuffer vs_cb; + GSDeviceOGL::PSConstantBuffer ps_cb; - bool m_require_one_barrier; - bool m_require_full_barrier; + bool m_require_one_barrier; + bool m_require_full_barrier; - GSDeviceOGL::VSSelector m_vs_sel; - GSDeviceOGL::GSSelector m_gs_sel; - GSDeviceOGL::PSSelector m_ps_sel; + GSDeviceOGL::VSSelector m_vs_sel; + GSDeviceOGL::GSSelector m_gs_sel; + GSDeviceOGL::PSSelector m_ps_sel; - GSDeviceOGL::PSSamplerSelector m_ps_ssel; - GSDeviceOGL::OMColorMaskSelector m_om_csel; - GSDeviceOGL::OMDepthStencilSelector m_om_dssel; + GSDeviceOGL::PSSamplerSelector m_ps_ssel; + GSDeviceOGL::OMColorMaskSelector m_om_csel; + GSDeviceOGL::OMDepthStencilSelector m_om_dssel; - private: - inline void ResetStates(); - inline void SetupIA(const float& sx, const float& sy); - inline void EmulateTextureShuffleAndFbmask(); - inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex); - inline void EmulateBlending(bool& DATE_GL42, bool& DATE_GL45); - inline void EmulateTextureSampler(const GSTextureCache::Source* tex); - inline void EmulateZbuffer(); +private: + inline void ResetStates(); + inline void SetupIA(const float& sx, const float& sy); + inline void EmulateTextureShuffleAndFbmask(); + inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex); + inline void EmulateBlending(bool& DATE_GL42, bool& DATE_GL45); + inline void EmulateTextureSampler(const GSTextureCache::Source* tex); + inline void EmulateZbuffer(); - public: - GSRendererOGL(); - virtual ~GSRendererOGL() {}; +public: + GSRendererOGL(); + virtual ~GSRendererOGL() {} - bool CreateDevice(GSDevice* dev); + bool CreateDevice(GSDevice* dev); - void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) final; + void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) final; - PRIM_OVERLAP PrimitiveOverlap(); + PRIM_OVERLAP PrimitiveOverlap(); - void SendDraw(); + void SendDraw(); - bool IsDummyTexture() const final; + bool IsDummyTexture() const final; }; diff --git a/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.cpp index 5d8b53a1c1..80eebc7674 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.cpp @@ -29,9 +29,9 @@ #include "GSdxResources.h" #endif -GSShaderOGL::GSShaderOGL(bool debug) : - m_pipeline(0), - m_debug_shader(debug) +GSShaderOGL::GSShaderOGL(bool debug) + : m_pipeline(0) + , m_debug_shader(debug) { theApp.LoadResource(IDR_COMMON_GLSL, m_common_header); @@ -43,10 +43,12 @@ GSShaderOGL::GSShaderOGL(bool debug) : GSShaderOGL::~GSShaderOGL() { printf("Delete %zu Shaders, %zu Programs, %zu Pipelines\n", - m_shad_to_delete.size(), m_prog_to_delete.size(), m_pipe_to_delete.size()); + m_shad_to_delete.size(), m_prog_to_delete.size(), m_pipe_to_delete.size()); - for (auto s : m_shad_to_delete) glDeleteShader(s); - for (auto p : m_prog_to_delete) glDeleteProgram(p); + for (auto s : m_shad_to_delete) + glDeleteShader(s); + for (auto p : m_prog_to_delete) + glDeleteProgram(p); glDeleteProgramPipelines(m_pipe_to_delete.size(), &m_pipe_to_delete[0]); } @@ -91,7 +93,8 @@ void GSShaderOGL::BindProgram(GLuint vs, GLuint gs, GLuint ps) { GLuint p = LinkProgram(vs, gs, ps); - if (GLState::program != p) { + if (GLState::program != p) + { GLState::program = p; glUseProgram(p); } @@ -99,7 +102,8 @@ void GSShaderOGL::BindProgram(GLuint vs, GLuint gs, GLuint ps) void GSShaderOGL::BindProgram(GLuint p) { - if (GLState::program != p) { + if (GLState::program != p) + { GLState::program = p; glUseProgram(p); } @@ -109,12 +113,14 @@ void GSShaderOGL::BindPipeline(GLuint vs, GLuint gs, GLuint ps) { BindPipeline(m_pipeline); - if (GLState::vs != vs) { + if (GLState::vs != vs) + { GLState::vs = vs; glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, vs); } - if (GLState::gs != gs) { + if (GLState::gs != gs) + { GLState::gs = gs; glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, gs); } @@ -133,12 +139,14 @@ void GSShaderOGL::BindPipeline(GLuint vs, GLuint gs, GLuint ps) void GSShaderOGL::BindPipeline(GLuint pipe) { - if (GLState::pipeline != pipe) { + if (GLState::pipeline != pipe) + { GLState::pipeline = pipe; glBindProgramPipeline(pipe); } - if (GLState::program) { + if (GLState::program) + { GLState::program = 0; glUseProgram(0); } @@ -146,15 +154,18 @@ void GSShaderOGL::BindPipeline(GLuint pipe) bool GSShaderOGL::ValidateShader(GLuint s) { - if (!m_debug_shader) return true; + if (!m_debug_shader) + return true; GLint status = 0; glGetShaderiv(s, GL_COMPILE_STATUS, &status); - if (status) return true; + if (status) + return true; GLint log_length = 0; glGetShaderiv(s, GL_INFO_LOG_LENGTH, &log_length); - if (log_length > 0) { + if (log_length > 0) + { char* log = new char[log_length]; glGetShaderInfoLog(s, log_length, NULL, log); fprintf(stderr, "%s", log); @@ -167,15 +178,18 @@ bool GSShaderOGL::ValidateShader(GLuint s) bool GSShaderOGL::ValidateProgram(GLuint p) { - if (!m_debug_shader) return true; + if (!m_debug_shader) + return true; GLint status = 0; glGetProgramiv(p, GL_LINK_STATUS, &status); - if (status) return true; + if (status) + return true; GLint log_length = 0; glGetProgramiv(p, GL_INFO_LOG_LENGTH, &log_length); - if (log_length > 0) { + if (log_length > 0) + { char* log = new char[log_length]; glGetProgramInfoLog(p, log_length, NULL, log); fprintf(stderr, "%s", log); @@ -188,18 +202,21 @@ bool GSShaderOGL::ValidateProgram(GLuint p) bool GSShaderOGL::ValidatePipeline(GLuint p) { - if (!m_debug_shader) return true; + if (!m_debug_shader) + return true; // FIXME: might be mandatory to validate the pipeline glValidateProgramPipeline(p); GLint status = 0; glGetProgramPipelineiv(p, GL_VALIDATE_STATUS, &status); - if (status) return true; + if (status) + return true; GLint log_length = 0; glGetProgramPipelineiv(p, GL_INFO_LOG_LENGTH, &log_length); - if (log_length > 0) { + if (log_length > 0) + { char* log = new char[log_length]; glGetProgramPipelineInfoLog(p, log_length, NULL, log); fprintf(stderr, "%s", log); @@ -218,10 +235,13 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co header += "#extension GL_ARB_shading_language_420pack: require\n"; // Need GL version 410 header += "#extension GL_ARB_separate_shader_objects: require\n"; - if (GLLoader::found_GL_ARB_shader_image_load_store) { + if (GLLoader::found_GL_ARB_shader_image_load_store) + { // Need GL version 420 header += "#extension GL_ARB_shader_image_load_store: require\n"; - } else { + } + else + { header += "#define DISABLE_GL42_image\n"; } if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel) @@ -234,7 +254,8 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co header += "#define pGL_ES 0\n"; // Allow to puts several shader in 1 files - switch (type) { + switch (type) + { case GL_VERTEX_SHADER: header += "#define VERTEX_SHADER 1\n"; break; @@ -244,7 +265,8 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co case GL_FRAGMENT_SHADER: header += "#define FRAGMENT_SHADER 1\n"; break; - default: ASSERT(0); + default: + ASSERT(0); } // Select the entry point ie the main function @@ -276,7 +298,8 @@ GLuint GSShaderOGL::Compile(const std::string& glsl_file, const std::string& ent bool status = ValidateProgram(program); - if (!status) { + if (!status) + { // print extra info fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), program); fprintf(stderr, "\n%s", macro_sel.c_str()); @@ -306,13 +329,14 @@ GLuint GSShaderOGL::CompileShader(const std::string& glsl_file, const std::strin sources[1] = m_common_header.data(); sources[2] = glsl_h_code; - shader = glCreateShader(type); + shader = glCreateShader(type); glShaderSource(shader, shader_nb, sources, NULL); glCompileShader(shader); bool status = ValidateShader(shader); - if (!status) { + if (!status) + { // print extra info fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file.c_str(), entry.c_str(), shader); fprintf(stderr, "\n%s", macro_sel.c_str()); @@ -331,12 +355,13 @@ GLuint GSShaderOGL::CompileShader(const std::string& glsl_file, const std::strin // GLSL improvement (unfortunately). int GSShaderOGL::DumpAsm(const std::string& file, GLuint p) { - if (!GLLoader::vendor_id_nvidia) return 0; + if (!GLLoader::vendor_id_nvidia) + return 0; - GLint binaryLength; + GLint binaryLength; glGetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength); - char* binary = new char[binaryLength+4]; + char* binary = new char[binaryLength + 4]; GLenum binaryFormat; glGetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary); @@ -345,18 +370,24 @@ int GSShaderOGL::DumpAsm(const std::string& file, GLuint p) // Search the magic number "!!" int asm_ = 0; - while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_+1] != '!')) { + while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_ + 1] != '!')) + { asm_ += 1; } int instructions = -1; - if (asm_ < binaryLength) { + if (asm_ < binaryLength) + { // Now print asm as text char* asm_txt = strtok(&binary[asm_], "\n"); - while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) { - if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4)) { + while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5))) + { + if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4)) + { instructions = 0; - } else if (instructions >= 0) { + } + else if (instructions >= 0) + { if (instructions == 0) fprintf(outfile, "\n"); instructions++; @@ -369,7 +400,8 @@ int GSShaderOGL::DumpAsm(const std::string& file, GLuint p) } fclose(outfile); - if (instructions < 0) { + if (instructions < 0) + { // RAW dump in case of error fprintf(stderr, "Error: failed to find the number of instructions!\n"); outfile = fopen(file.c_str(), "wb"); diff --git a/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.h b/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.h index 8051f6503c..4e9a46deaf 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSShaderOGL.h @@ -21,7 +21,8 @@ #pragma once -class GSShaderOGL { +class GSShaderOGL +{ GLuint m_pipeline; std::unordered_map m_program; const bool m_debug_shader; @@ -37,7 +38,7 @@ class GSShaderOGL { std::string GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro); std::vector m_common_header; - public: +public: GSShaderOGL(bool debug); ~GSShaderOGL(); diff --git a/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.cpp index effd979c94..dcf8a3e5cb 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.cpp @@ -76,18 +76,18 @@ void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader)) + if (GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, r.width(), r.height(), fmt, ps_shader)) { GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); - if(offscreen->Map(m, &r_offscreen)) + if (offscreen->Map(m, &r_offscreen)) { // TODO: block level write GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - switch(TEX0.PSM) + switch (TEX0.PSM) { case PSM_PSMCT32: case PSM_PSMZ32: @@ -123,13 +123,15 @@ void GSTextureCacheOGL::Read(Source* t, const GSVector4i& r) // FIXME Create a get function to avoid the useless copy // Note: With openGL 4.5 you can use glGetTextureSubImage - if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) { + if (GSTexture* offscreen = m_renderer->m_dev->CreateOffscreen(r.width(), r.height())) + { m_renderer->m_dev->CopyRect(t->m_texture, offscreen, r); GSTexture::GSMap m; GSVector4i r_offscreen(0, 0, r.width(), r.height()); - if (offscreen->Map(m, &r_offscreen)) { + if (offscreen->Map(m, &r_offscreen)) + { GSOffset* off = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); m_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r); diff --git a/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.h b/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.h index 490dab2abb..19085d9fb5 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSTextureCacheOGL.h @@ -28,7 +28,7 @@ class GSTextureCacheOGL final : public GSTextureCache { protected: - int Get8bitFormat() { return GL_R8;} + int Get8bitFormat() { return GL_R8; } void Read(Target* t, const GSVector4i& r); void Read(Source* t, const GSVector4i& r); diff --git a/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.cpp index 3acc52ee77..55a4a69b4f 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.cpp @@ -30,16 +30,17 @@ extern uint64 g_real_texture_upload_byte; #endif // FIXME OGL4: investigate, only 1 unpack buffer always bound -namespace PboPool { +namespace PboPool +{ - const uint32 m_pbo_size = 64*1024*1024; - const uint32 m_seg_size = 16*1024*1024; + const uint32 m_pbo_size = 64 * 1024 * 1024; + const uint32 m_seg_size = 16 * 1024 * 1024; GLuint m_buffer; - uptr m_offset; - char* m_map; + uptr m_offset; + char* m_map; uint32 m_size; - GLsync m_fence[m_pbo_size/m_seg_size]; + GLsync m_fence[m_pbo_size / m_seg_size]; // Option for buffer storage // XXX: actually does I really need coherent and barrier??? @@ -49,7 +50,8 @@ namespace PboPool { const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT; const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT; - void Init() { + void Init() + { glGenBuffers(1, &m_buffer); BindPbo(); @@ -57,22 +59,25 @@ namespace PboPool { glObjectLabel(GL_BUFFER, m_buffer, -1, "PBO"); glBufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags); - m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags); + m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags); m_offset = 0; - for (size_t i = 0; i < countof(m_fence); i++) { + for (size_t i = 0; i < countof(m_fence); i++) + { m_fence[i] = 0; } UnbindPbo(); } - char* Map(uint32 size) { + char* Map(uint32 size) + { char* map; // Note: keep offset aligned for SSE/AVX m_size = (size + 63) & ~0x3F; - if (m_size > m_pbo_size) { + if (m_size > m_pbo_size) + { fprintf(stderr, "BUG: PBO too small %u but need %u\n", m_pbo_size, m_size); } @@ -87,41 +92,50 @@ namespace PboPool { return map; } - void Unmap() { + void Unmap() + { glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset, m_size); } - uptr Offset() { + uptr Offset() + { return m_offset; } - void Destroy() { - m_map = NULL; + void Destroy() + { + m_map = NULL; m_offset = 0; - for (size_t i = 0; i < countof(m_fence); i++) { + for (size_t i = 0; i < countof(m_fence); i++) + { glDeleteSync(m_fence[i]); } glDeleteBuffers(1, &m_buffer); } - void BindPbo() { + void BindPbo() + { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer); } - void Sync() { + void Sync() + { uint32 segment_current = m_offset / m_seg_size; - uint32 segment_next = (m_offset + m_size) / m_seg_size; + uint32 segment_next = (m_offset + m_size) / m_seg_size; - if (segment_current != segment_next) { - if (segment_next >= countof(m_fence)) { + if (segment_current != segment_next) + { + if (segment_next >= countof(m_fence)) + { segment_next = 0; } // Align current transfer on the start of the segment m_offset = m_seg_size * segment_next; - if (m_size > m_seg_size) { + if (m_size > m_seg_size) + { fprintf(stderr, "BUG: PBO Map size %u is bigger than a single segment %u. Crossing more than one fence is not supported yet, texture data may be corrupted.\n", m_size, m_seg_size); // TODO Synchronize all crossed fences } @@ -130,10 +144,12 @@ namespace PboPool { m_fence[segment_current] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); // Check next segment is free - if (m_fence[segment_next]) { + if (m_fence[segment_next]) + { GLenum status = glClientWaitSync(m_fence[segment_next], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); // Potentially it doesn't work on AMD driver which might always return GL_CONDITION_SATISFIED - if (status != GL_ALREADY_SIGNALED) { + if (status != GL_ALREADY_SIGNALED) + { GL_PERF("GL_PIXEL_UNPACK_BUFFER: Sync Sync (%x)! Buffer too small ?", status); } @@ -143,21 +159,23 @@ namespace PboPool { } } - void UnbindPbo() { + void UnbindPbo() + { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } - void EndTransfer() { + void EndTransfer() + { m_offset += m_size; } -} +} // namespace PboPool GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap) : m_clean(false), m_generate_mipmap(true), m_local_buffer(nullptr), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0), m_layer(0) { // OpenGL didn't like dimensions of size 0 - m_size.x = std::max(1,w); - m_size.y = std::max(1,h); + m_size.x = std::max(1, w); + m_size.y = std::max(1, h); m_format = format; m_type = type; m_fbo_read = fbo_read; @@ -166,7 +184,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, m_max_layer = 1; // Bunch of constant parameter - switch (m_format) { + switch (m_format) + { // 1 Channel integer case GL_R32UI: case GL_R32I: @@ -240,7 +259,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, ASSERT(0); } - switch (m_type) { + switch (m_type) + { case GSTexture::Backbuffer: return; // backbuffer isn't a real texture case GSTexture::Offscreen: @@ -249,7 +269,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, break; case GSTexture::Texture: // Only 32 bits input texture will be supported for mipmap - m_max_layer = mipmap && m_format == GL_RGBA8 ? (int)log2(std::max(w,h)) : 1; + m_max_layer = mipmap && m_format == GL_RGBA8 ? (int)log2(std::max(w, h)) : 1; break; case SparseRenderTarget: case SparseDepthStencil: @@ -259,7 +279,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, break; } - switch (m_format) { + switch (m_format) + { case GL_R16UI: case GL_R8: m_sparse &= GLLoader::found_compatible_GL_ARB_sparse_texture2; @@ -294,21 +315,26 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, // Create a gl object (texture isn't allocated here) glCreateTextures(GL_TEXTURE_2D, 1, &m_texture_id); - if (m_format == GL_R8) { + if (m_format == GL_R8) + { // Emulate DX behavior, beside it avoid special code in shader to differentiate // palette texture from a GL_RGBA target or a GL_R texture. glTextureParameteri(m_texture_id, GL_TEXTURE_SWIZZLE_A, GL_RED); } - if (m_sparse) { + if (m_sparse) + { GSVector2i old_size = m_size; m_size = RoundUpPage(m_size); - if (m_size != old_size) { + if (m_size != old_size) + { fprintf(stderr, "Sparse texture size (%dx%d) isn't a multiple of gpu page size (%dx%d)\n", old_size.x, old_size.y, m_gpu_page_size.x, m_gpu_page_size.y); } glTextureParameteri(m_texture_id, GL_TEXTURE_SPARSE_ARB, true); - } else { + } + else + { m_committed_size = m_size; } @@ -316,7 +342,8 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, static int every_512 = 0; GLState::available_vram -= m_mem_usage; - if ((GLState::available_vram < 0) && (every_512 % 512 == 0)) { + if ((GLState::available_vram < 0) && (every_512 % 512 == 0)) + { fprintf(stderr, "Available VRAM is very low (%lld), a crash is expected! Enable conservative buffer allocation or reduce upscaling!\n", GLState::available_vram); every_512++; // Pull emergency break @@ -334,7 +361,8 @@ GSTextureOGL::~GSTextureOGL() GLState::rt = 0; if (m_texture_id == GLState::ds) GLState::ds = 0; - for (size_t i = 0; i < countof(GLState::tex_unit); i++) { + for (size_t i = 0; i < countof(GLState::tex_unit); i++) + { if (m_texture_id == GLState::tex_unit[i]) GLState::tex_unit[i] = 0; } @@ -406,7 +434,8 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int // PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch // Note: row_byte != pitch - for (int h = 0; h < r.height(); h++) { + for (int h = 0; h < r.height(); h++) + { memcpy(map, src, row_byte); map += row_byte; src += pitch; @@ -434,13 +463,14 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer) GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y); // Will need some investigation - ASSERT(r.width() != 0); + ASSERT(r.width() != 0); ASSERT(r.height() != 0); uint32 row_byte = r.width() << m_int_shift; m.pitch = row_byte; - if (m_type == GSTexture::Offscreen) { + if (m_type == GSTexture::Offscreen) + { // The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GSdx // architecture is waiting the data right now. @@ -467,7 +497,9 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer) m.bits = m_local_buffer; return true; - } else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { + } + else if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) + { GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap m_clean = false; @@ -477,7 +509,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer) m.bits = (uint8*)PboPool::Map(map_size); #ifdef ENABLE_OGL_DEBUG_MEM_BW - g_real_texture_upload_byte += map_size; + g_real_texture_upload_byte += map_size; #endif // Save the area for the unmap @@ -495,7 +527,8 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer) void GSTextureOGL::Unmap() { - if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) { + if (m_type == GSTexture::Texture || m_type == GSTexture::RenderTarget) + { PboPool::Unmap(); @@ -514,7 +547,8 @@ void GSTextureOGL::Unmap() void GSTextureOGL::GenerateMipmap() { - if (m_generate_mipmap && m_max_layer > 1) { + if (m_generate_mipmap && m_max_layer > 1) + { glGenerateTextureMipmap(m_texture_id); m_generate_mipmap = false; } @@ -524,12 +558,16 @@ void GSTextureOGL::CommitPages(const GSVector2i& region, bool commit) { GLState::available_vram += m_mem_usage; - if (commit) { - if (m_committed_size.x == 0) { + if (commit) + { + if (m_committed_size.x == 0) + { // Nothing allocated so far GL_INS("CommitPages initial %dx%d of %u", region.x, region.y, m_texture_id); glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, 0, 0, region.x, region.y, 1, commit); - } else { + } + else + { GL_INS("CommitPages extend %dx%d to %dx%d of %u", m_committed_size.x, m_committed_size.y, region.x, region.y, m_texture_id); int w = region.x - m_committed_size.x; int h = region.y - m_committed_size.y; @@ -539,8 +577,9 @@ void GSTextureOGL::CommitPages(const GSVector2i& region, bool commit) glTexturePageCommitmentEXT(m_texture_id, GL_TEX_LEVEL_0, 0, m_committed_size.y, 0, region.x, h, 1, commit); } m_committed_size = region; - - } else { + } + else + { // Release everything GL_INS("CommitPages release of %u", m_texture_id); @@ -557,7 +596,7 @@ bool GSTextureOGL::Save(const std::string& fn) { // Collect the texture data uint32 pitch = 4 * m_committed_size.x; - uint32 buf_size = pitch * m_committed_size.y * 2;// Note *2 for security (depth/stencil) + uint32 buf_size = pitch * m_committed_size.y * 2; // Note *2 for security (depth/stencil) std::unique_ptr image(new uint8[buf_size]); #ifdef ENABLE_OGL_DEBUG GSPng::Format fmt = GSPng::RGB_A_PNG; @@ -565,9 +604,12 @@ bool GSTextureOGL::Save(const std::string& fn) GSPng::Format fmt = GSPng::RGB_PNG; #endif - if (IsBackbuffer()) { + if (IsBackbuffer()) + { glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get()); - } else if(IsDss()) { + } + else if (IsDss()) + { glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_texture_id, 0); @@ -576,18 +618,23 @@ bool GSTextureOGL::Save(const std::string& fn) glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); fmt = GSPng::RGB_A_PNG; - } else if(m_format == GL_R32I) { + } + else if (m_format == GL_R32I) + { // Note: 4.5 function used for accurate DATE // barely used outside of dev and not sparse anyway glGetTextureImage(m_texture_id, 0, GL_RED_INTEGER, GL_INT, buf_size, image.get()); fmt = GSPng::R32I_PNG; - } else { + } + else + { glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0); - if (m_format == GL_RGBA8) { + if (m_format == GL_RGBA8) + { glReadPixels(0, 0, m_committed_size.x, m_committed_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image.get()); } else if (m_format == GL_R16UI) diff --git a/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.h b/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.h index 136f620c60..466b194dcb 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSTextureOGL.h @@ -23,68 +23,69 @@ #include "Renderers/Common/GSTexture.h" -namespace PboPool { +namespace PboPool +{ inline void BindPbo(); inline void UnbindPbo(); inline void Sync(); inline char* Map(uint32 size); - inline void Unmap(); - inline uptr Offset(); - inline void EndTransfer(); + inline void Unmap(); + inline uptr Offset(); + inline void EndTransfer(); void Init(); void Destroy(); -} +} // namespace PboPool class GSTextureOGL final : public GSTexture { - private: - GLuint m_texture_id; // the texture id - GLuint m_fbo_read; - bool m_clean; - bool m_generate_mipmap; +private: + GLuint m_texture_id; // the texture id + GLuint m_fbo_read; + bool m_clean; + bool m_generate_mipmap; - uint8* m_local_buffer; - // Avoid alignment constrain - //GSVector4i m_r; - int m_r_x; - int m_r_y; - int m_r_w; - int m_r_h; - int m_layer; - int m_max_layer; + uint8* m_local_buffer; + // Avoid alignment constrain + //GSVector4i m_r; + int m_r_x; + int m_r_y; + int m_r_w; + int m_r_h; + int m_layer; + int m_max_layer; - // internal opengl format/type/alignment - GLenum m_int_format; - GLenum m_int_type; - uint32 m_int_shift; + // internal opengl format/type/alignment + GLenum m_int_format; + GLenum m_int_type; + uint32 m_int_shift; - // Allow to track size of allocated memory - uint32 m_mem_usage; + // Allow to track size of allocated memory + uint32 m_mem_usage; - public: - explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap); - virtual ~GSTextureOGL(); +public: + explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap); + virtual ~GSTextureOGL(); - bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) final; - bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) final; - void Unmap() final; - void GenerateMipmap() final; - bool Save(const std::string& fn) final; + bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) final; + bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) final; + void Unmap() final; + void GenerateMipmap() final; + bool Save(const std::string& fn) final; - bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); } - bool IsDss() { return (m_type == GSTexture::DepthStencil || m_type == GSTexture::SparseDepthStencil); } + bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); } + bool IsDss() { return (m_type == GSTexture::DepthStencil || m_type == GSTexture::SparseDepthStencil); } - uint32 GetID() final { return m_texture_id; } - bool HasBeenCleaned() { return m_clean; } - void WasAttached() { m_clean = false; } - void WasCleaned() { m_clean = true; } + uint32 GetID() final { return m_texture_id; } + bool HasBeenCleaned() { return m_clean; } + void WasAttached() { m_clean = false; } + void WasCleaned() { m_clean = true; } - void Clear(const void* data); - void Clear(const void* data, const GSVector4i& area); + void Clear(const void* data); + void Clear(const void* data, const GSVector4i& area); - void CommitPages(const GSVector2i& region, bool commit) final; + void CommitPages(const GSVector2i& region, bool commit) final; - uint32 GetMemUsage(); + uint32 GetMemUsage(); }; diff --git a/plugins/GSdx/Renderers/OpenGL/GSUniformBufferOGL.h b/plugins/GSdx/Renderers/OpenGL/GSUniformBufferOGL.h index 8c8b51773a..3f911e284d 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSUniformBufferOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSUniformBufferOGL.h @@ -28,11 +28,12 @@ extern uint64 g_uniform_upload_byte; #endif -class GSUniformBufferOGL { - GLuint m_buffer; // data object - GLuint m_index; // GLSL slot - uint32 m_size; // size of the data - uint8* m_cache; // content of the previous upload +class GSUniformBufferOGL +{ + GLuint m_buffer; // data object + GLuint m_index; // GLSL slot + uint32 m_size; // size of the data + uint8* m_cache; // content of the previous upload public: GSUniformBufferOGL(const std::string& pretty_name, GLuint index, uint32 size) @@ -49,7 +50,8 @@ public: void bind() { - if (GLState::ubo != m_buffer) { + if (GLState::ubo != m_buffer) + { GLState::ubo = m_buffer; glBindBuffer(GL_UNIFORM_BUFFER, m_buffer); } @@ -82,7 +84,8 @@ public: void cache_upload(const void* src) { - if (memcmp(m_cache, src, m_size) != 0) { + if (memcmp(m_cache, src, m_size) != 0) + { memcpy(m_cache, src, m_size); upload(src); } @@ -95,18 +98,19 @@ public: } }; -#define UBO_BUFFER_SIZE (4*1024*1024) +#define UBO_BUFFER_SIZE (4 * 1024 * 1024) -class GSUniformBufferStorageOGL { - GLuint m_buffer; // data object - GLuint m_index; // GLSL slot - uint32 m_size; // size of the data +class GSUniformBufferStorageOGL +{ + GLuint m_buffer; // data object + GLuint m_index; // GLSL slot + uint32 m_size; // size of the data uint8* m_buffer_ptr; uint32 m_offset; public: - GSUniformBufferStorageOGL(GLuint index, uint32 size) : m_index(index) - , m_size(size), m_offset(0) + GSUniformBufferStorageOGL(GLuint index, uint32 size) + : m_index(index) , m_size(size) , m_offset(0) { glGenBuffers(1, &m_buffer); bind(); @@ -116,7 +120,8 @@ public: void bind() { - if (GLState::ubo != m_buffer) { + if (GLState::ubo != m_buffer) + { GLState::ubo = m_buffer; glBindBuffer(GL_UNIFORM_BUFFER, m_buffer); } @@ -130,7 +135,7 @@ public: GLsizei buffer_size = UBO_BUFFER_SIZE; glBufferStorage(GL_UNIFORM_BUFFER, buffer_size, NULL, create_flags); - m_buffer_ptr = (uint8*) glMapBufferRange(GL_UNIFORM_BUFFER, 0, buffer_size, map_flags); + m_buffer_ptr = (uint8*)glMapBufferRange(GL_UNIFORM_BUFFER, 0, buffer_size, map_flags); ASSERT(m_buffer_ptr); } @@ -159,7 +164,8 @@ public: m_offset = 0; } - ~GSUniformBufferStorageOGL() { + ~GSUniformBufferStorageOGL() + { glDeleteBuffers(1, &m_buffer); } }; diff --git a/plugins/GSdx/Renderers/OpenGL/GSVertexArrayOGL.h b/plugins/GSdx/Renderers/OpenGL/GSVertexArrayOGL.h index ee41cbfd0f..7d86d8cdfa 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSVertexArrayOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSVertexArrayOGL.h @@ -27,27 +27,29 @@ extern uint64 g_vertex_upload_byte; #endif -struct GSInputLayoutOGL { - GLint location; - GLint size; - GLenum type; +struct GSInputLayoutOGL +{ + GLint location; + GLint size; + GLenum type; GLboolean normalize; GLsizei stride; const GLvoid* offset; }; -template -class GSBufferOGL { +template +class GSBufferOGL +{ size_t m_start; size_t m_count; size_t m_limit; size_t m_quarter_shift; - const GLenum m_target; + const GLenum m_target; GLuint m_buffer_name; - uint8* m_buffer_ptr; + uint8* m_buffer_ptr; GLsync m_fence[5]; - public: +public: GSBufferOGL(GLenum target, size_t count) : m_start(0) , m_count(0) @@ -60,7 +62,8 @@ class GSBufferOGL { m_limit = 1u << (1u + (size_t)std::log2(count - 1u)); m_quarter_shift = (size_t)std::log2(m_limit * STRIDE) - 2; - for (size_t i = 0; i < 5; i++) { + for (size_t i = 0; i < 5; i++) + { m_fence[i] = 0; } @@ -78,16 +81,19 @@ class GSBufferOGL { const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT; const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT; - glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags ); - m_buffer_ptr = (uint8*) glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags); - if (!m_buffer_ptr) { + glBufferStorage(m_target, STRIDE * m_limit, NULL, create_flags); + m_buffer_ptr = (uint8*)glMapBufferRange(m_target, 0, STRIDE * m_limit, map_flags); + if (!m_buffer_ptr) + { fprintf(stderr, "Failed to map buffer\n"); throw GSDXError(); } } - ~GSBufferOGL() { - for (size_t i = 0; i < 5; i++) { + ~GSBufferOGL() + { + for (size_t i = 0; i < 5; i++) + { glDeleteSync(m_fence[i]); } glDeleteBuffers(1, &m_buffer_name); @@ -108,14 +114,16 @@ class GSBufferOGL { size_t offset = m_start * STRIDE; size_t length = m_count * STRIDE; - if (m_count > (m_limit - m_start) ) { + if (m_count > (m_limit - m_start)) + { size_t current_chunk = offset >> m_quarter_shift; #ifdef ENABLE_OGL_DEBUG_FENCE fprintf(stderr, "%x: Wrap buffer\n", m_target); fprintf(stderr, "%x: Insert a fence in chunk %zu\n", m_target, current_chunk); #endif ASSERT(current_chunk > 0 && current_chunk < 5); - if (m_fence[current_chunk] == 0) { + if (m_fence[current_chunk] == 0) + { m_fence[current_chunk] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } @@ -124,10 +132,12 @@ class GSBufferOGL { offset = 0; // Only check first chunk - if (m_fence[0]) { + if (m_fence[0]) + { #ifdef ENABLE_OGL_DEBUG_FENCE GLenum status = glClientWaitSync(m_fence[0], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - if (status != GL_ALREADY_SIGNALED) { + if (status != GL_ALREADY_SIGNALED) + { fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); } #else @@ -141,13 +151,15 @@ class GSBufferOGL { // Protect buffer with fences size_t current_chunk = offset >> m_quarter_shift; size_t next_chunk = (offset + length) >> m_quarter_shift; - for (size_t c = current_chunk + 1; c <= next_chunk; c++) { + for (size_t c = current_chunk + 1; c <= next_chunk; c++) + { #ifdef ENABLE_OGL_DEBUG_FENCE - fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c-1); + fprintf(stderr, "%x: Insert a fence in chunk %d\n", m_target, c - 1); #endif ASSERT(c > 0 && c < 5); - m_fence[c-1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - if (m_fence[c]) { + m_fence[c - 1] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + if (m_fence[c]) + { #ifdef ENABLE_OGL_DEBUG_FENCE GLenum status = glClientWaitSync(m_fence[c], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); #else @@ -157,7 +169,8 @@ class GSBufferOGL { m_fence[c] = 0; #ifdef ENABLE_OGL_DEBUG_FENCE - if (status != GL_ALREADY_SIGNALED) { + if (status != GL_ALREADY_SIGNALED) + { fprintf(stderr, "%x: Sync Sync! Buffer too small\n", m_target); } #endif @@ -211,10 +224,10 @@ class GSBufferOGL { } size_t GetStart() { return m_start; } - }; -class GSVertexBufferStateOGL { +class GSVertexBufferStateOGL +{ std::unique_ptr> m_vb; std::unique_ptr> m_ib; @@ -223,10 +236,11 @@ class GSVertexBufferStateOGL { std::vector m_layout; // No copy constructor please - GSVertexBufferStateOGL(const GSVertexBufferStateOGL& ) = delete; + GSVertexBufferStateOGL(const GSVertexBufferStateOGL&) = delete; public: - GSVertexBufferStateOGL(const std::vector& layout) : m_topology(0), m_layout(layout) + GSVertexBufferStateOGL(const std::vector& layout) + : m_topology(0), m_layout(layout) { glGenVertexArrays(1, &m_va); glBindVertexArray(m_va); @@ -250,15 +264,20 @@ public: void set_internal_format() { - for (const auto &l : m_layout) { + for (const auto& l : m_layout) + { // Note this function need both a vertex array object and a GL_ARRAY_BUFFER buffer glEnableVertexAttribArray(l.location); - switch (l.type) { + switch (l.type) + { case GL_UNSIGNED_SHORT: case GL_UNSIGNED_INT: - if (l.normalize) { + if (l.normalize) + { glVertexAttribPointer(l.location, l.size, l.type, l.normalize, l.stride, l.offset); - } else { + } + else + { // Rule: when shader use integral (not normalized) you must use glVertexAttribIPointer (note the extra I) glVertexAttribIPointer(l.location, l.size, l.type, l.stride, l.offset); } @@ -280,19 +299,24 @@ public: void DrawPrimitive(int offset, int count) { m_vb->Draw(m_topology, offset, count); } - void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart() ); } + void DrawIndexedPrimitive() { m_ib->Draw(m_topology, m_vb->GetStart()); } - void DrawIndexedPrimitive(int offset, int count) { m_ib->Draw(m_topology, m_vb->GetStart(), offset, count ); } + void DrawIndexedPrimitive(int offset, int count) { m_ib->Draw(m_topology, m_vb->GetStart(), offset, count); } void SetTopology(GLenum topology) { m_topology = topology; } - void* MapVB(size_t count) { - void *ptr; - while (true) { - try { + void* MapVB(size_t count) + { + void* ptr; + while (true) + { + try + { ptr = m_vb->map(count); break; - } catch (GSDXErrorGlVertexArrayTooSmall) { + } + catch (GSDXErrorGlVertexArrayTooSmall) + { GL_INS("GL vertex buffer is too small"); m_vb.reset(new GSBufferOGL(GL_ARRAY_BUFFER, count)); @@ -304,12 +328,17 @@ public: return ptr; } void UnmapVB() { m_vb->unmap(); } - void UploadVB(const void* vertices, size_t count) { - while (true) { - try { + void UploadVB(const void* vertices, size_t count) + { + while (true) + { + try + { m_vb->upload(vertices, count); break; - } catch (GSDXErrorGlVertexArrayTooSmall) { + } + catch (GSDXErrorGlVertexArrayTooSmall) + { GL_INS("GL vertex buffer is too small"); m_vb.reset(new GSBufferOGL(GL_ARRAY_BUFFER, count)); @@ -319,12 +348,17 @@ public: } } - void UploadIB(const void* index, size_t count) { - while (true) { - try { + void UploadIB(const void* index, size_t count) + { + while (true) + { + try + { m_ib->upload(index, count); break; - } catch (GSDXErrorGlVertexArrayTooSmall) { + } + catch (GSDXErrorGlVertexArrayTooSmall) + { GL_INS("GL index buffer is too small"); m_ib.reset(new GSBufferOGL(GL_ELEMENT_ARRAY_BUFFER, count)); @@ -336,5 +370,4 @@ public: { glDeleteVertexArrays(1, &m_va); } - }; diff --git a/plugins/GSdx/Renderers/OpenGL/glext_extra.h b/plugins/GSdx/Renderers/OpenGL/glext_extra.h index 9cf94f7a7b..ac6b917d79 100644 --- a/plugins/GSdx/Renderers/OpenGL/glext_extra.h +++ b/plugins/GSdx/Renderers/OpenGL/glext_extra.h @@ -39,7 +39,7 @@ extern "C" { #define APIENTRY #endif #ifndef APIENTRYP -#define APIENTRYP APIENTRY * +#define APIENTRYP APIENTRY* #endif #ifndef GLAPI #define GLAPI extern @@ -47,7 +47,7 @@ extern "C" { #ifndef GL_VERSION_4_3 #define GL_VERSION_4_3 1 -typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void(APIENTRY* GLDEBUGPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message, const void* userParam); #define GL_NUM_SHADING_LANGUAGE_VERSIONS 0x82E9 #define GL_VERTEX_ATTRIB_ARRAY_LONG 0x874E #define GL_COMPRESSED_RGB8_ETC2 0x9274 @@ -306,49 +306,49 @@ typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum #define GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D9 #define GL_MAX_VERTEX_ATTRIB_BINDINGS 0x82DA #define GL_VERTEX_BINDING_BUFFER 0x8F4F -typedef void (APIENTRYP PFNGLCLEARBUFFERDATAPROC) (GLenum target, GLenum internalformat, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLCLEARBUFFERSUBDATAPROC) (GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); -typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC) (GLintptr indirect); -typedef void (APIENTRYP PFNGLCOPYIMAGESUBDATAPROC) (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); -typedef void (APIENTRYP PFNGLFRAMEBUFFERPARAMETERIPROC) (GLenum target, GLenum pname, GLint param); -typedef void (APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); -typedef void (APIENTRYP PFNGLGETINTERNALFORMATI64VPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint64 *params); -typedef void (APIENTRYP PFNGLINVALIDATETEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth); -typedef void (APIENTRYP PFNGLINVALIDATETEXIMAGEPROC) (GLuint texture, GLint level); -typedef void (APIENTRYP PFNGLINVALIDATEBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizeiptr length); -typedef void (APIENTRYP PFNGLINVALIDATEBUFFERDATAPROC) (GLuint buffer); -typedef void (APIENTRYP PFNGLINVALIDATEFRAMEBUFFERPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments); -typedef void (APIENTRYP PFNGLINVALIDATESUBFRAMEBUFFERPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTPROC) (GLenum mode, const void *indirect, GLsizei drawcount, GLsizei stride); -typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTPROC) (GLenum mode, GLenum type, const void *indirect, GLsizei drawcount, GLsizei stride); -typedef void (APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC) (GLuint program, GLenum programInterface, GLenum pname, GLint *params); -typedef GLuint (APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name); -typedef void (APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name); -typedef void (APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params); -typedef GLint (APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC) (GLuint program, GLenum programInterface, const GLchar *name); -typedef GLint (APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name); -typedef void (APIENTRYP PFNGLSHADERSTORAGEBLOCKBINDINGPROC) (GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding); -typedef void (APIENTRYP PFNGLTEXBUFFERRANGEPROC) (GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size); -typedef void (APIENTRYP PFNGLTEXSTORAGE2DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); -typedef void (APIENTRYP PFNGLTEXSTORAGE3DMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); -typedef void (APIENTRYP PFNGLTEXTUREVIEWPROC) (GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel, GLuint numlevels, GLuint minlayer, GLuint numlayers); -typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERPROC) (GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); -typedef void (APIENTRYP PFNGLVERTEXATTRIBFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXATTRIBIFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXATTRIBLFORMATPROC) (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXATTRIBBINDINGPROC) (GLuint attribindex, GLuint bindingindex); -typedef void (APIENTRYP PFNGLVERTEXBINDINGDIVISORPROC) (GLuint bindingindex, GLuint divisor); -typedef void (APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC) (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled); -typedef void (APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC) (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf); -typedef void (APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC) (GLDEBUGPROC callback, const void *userParam); -typedef GLuint (APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC) (GLuint count, GLsizei bufSize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog); -typedef void (APIENTRYP PFNGLPUSHDEBUGGROUPPROC) (GLenum source, GLuint id, GLsizei length, const GLchar *message); -typedef void (APIENTRYP PFNGLPOPDEBUGGROUPPROC) (void); -typedef void (APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label); -typedef void (APIENTRYP PFNGLGETOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label); -typedef void (APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label); -typedef void (APIENTRYP PFNGLGETOBJECTPTRLABELPROC) (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label); +typedef void(APIENTRYP PFNGLCLEARBUFFERDATAPROC)(GLenum target, GLenum internalformat, GLenum format, GLenum type, const void* data); +typedef void(APIENTRYP PFNGLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void* data); +typedef void(APIENTRYP PFNGLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +typedef void(APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect); +typedef void(APIENTRYP PFNGLCOPYIMAGESUBDATAPROC)(GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); +typedef void(APIENTRYP PFNGLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param); +typedef void(APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint* params); +typedef void(APIENTRYP PFNGLGETINTERNALFORMATI64VPROC)(GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint64* params); +typedef void(APIENTRYP PFNGLINVALIDATETEXSUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth); +typedef void(APIENTRYP PFNGLINVALIDATETEXIMAGEPROC)(GLuint texture, GLint level); +typedef void(APIENTRYP PFNGLINVALIDATEBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length); +typedef void(APIENTRYP PFNGLINVALIDATEBUFFERDATAPROC)(GLuint buffer); +typedef void(APIENTRYP PFNGLINVALIDATEFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum* attachments); +typedef void(APIENTRYP PFNGLINVALIDATESUBFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum* attachments, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTPROC)(GLenum mode, const void* indirect, GLsizei drawcount, GLsizei stride); +typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTPROC)(GLenum mode, GLenum type, const void* indirect, GLsizei drawcount, GLsizei stride); +typedef void(APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC)(GLuint program, GLenum programInterface, GLenum pname, GLint* params); +typedef GLuint(APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC)(GLuint program, GLenum programInterface, const GLchar* name); +typedef void(APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei* length, GLchar* name); +typedef void(APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum* props, GLsizei bufSize, GLsizei* length, GLint* params); +typedef GLint(APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC)(GLuint program, GLenum programInterface, const GLchar* name); +typedef GLint(APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC)(GLuint program, GLenum programInterface, const GLchar* name); +typedef void(APIENTRYP PFNGLSHADERSTORAGEBLOCKBINDINGPROC)(GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding); +typedef void(APIENTRYP PFNGLTEXBUFFERRANGEPROC)(GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size); +typedef void(APIENTRYP PFNGLTEXSTORAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +typedef void(APIENTRYP PFNGLTEXSTORAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +typedef void(APIENTRYP PFNGLTEXTUREVIEWPROC)(GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel, GLuint numlevels, GLuint minlayer, GLuint numlayers); +typedef void(APIENTRYP PFNGLBINDVERTEXBUFFERPROC)(GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); +typedef void(APIENTRYP PFNGLVERTEXATTRIBFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); +typedef void(APIENTRYP PFNGLVERTEXATTRIBIFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void(APIENTRYP PFNGLVERTEXATTRIBLFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void(APIENTRYP PFNGLVERTEXATTRIBBINDINGPROC)(GLuint attribindex, GLuint bindingindex); +typedef void(APIENTRYP PFNGLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLuint divisor); +typedef void(APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC)(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint* ids, GLboolean enabled); +typedef void(APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* buf); +typedef void(APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC)(GLDEBUGPROC callback, const void* userParam); +typedef GLuint(APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC)(GLuint count, GLsizei bufSize, GLenum* sources, GLenum* types, GLuint* ids, GLenum* severities, GLsizei* lengths, GLchar* messageLog); +typedef void(APIENTRYP PFNGLPUSHDEBUGGROUPPROC)(GLenum source, GLuint id, GLsizei length, const GLchar* message); +typedef void(APIENTRYP PFNGLPOPDEBUGGROUPPROC)(void); +typedef void(APIENTRYP PFNGLOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei length, const GLchar* label); +typedef void(APIENTRYP PFNGLGETOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei bufSize, GLsizei* length, GLchar* label); +typedef void(APIENTRYP PFNGLOBJECTPTRLABELPROC)(const void* ptr, GLsizei length, const GLchar* label); +typedef void(APIENTRYP PFNGLGETOBJECTPTRLABELPROC)(const void* ptr, GLsizei bufSize, GLsizei* length, GLchar* label); #endif /* GL_VERSION_4_3 */ #ifndef GL_VERSION_4_4 @@ -372,15 +372,15 @@ typedef void (APIENTRYP PFNGLGETOBJECTPTRLABELPROC) (const void *ptr, GLsizei bu #define GL_QUERY_BUFFER_BINDING 0x9193 #define GL_QUERY_RESULT_NO_WAIT 0x9194 #define GL_MIRROR_CLAMP_TO_EDGE 0x8743 -typedef void (APIENTRYP PFNGLBUFFERSTORAGEPROC) (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags); -typedef void (APIENTRYP PFNGLCLEARTEXIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLBINDBUFFERSBASEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers); -typedef void (APIENTRYP PFNGLBINDBUFFERSRANGEPROC) (GLenum target, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizeiptr *sizes); -typedef void (APIENTRYP PFNGLBINDTEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); -typedef void (APIENTRYP PFNGLBINDSAMPLERSPROC) (GLuint first, GLsizei count, const GLuint *samplers); -typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); -typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERSPROC) (GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); +typedef void(APIENTRYP PFNGLBUFFERSTORAGEPROC)(GLenum target, GLsizeiptr size, const void* data, GLbitfield flags); +typedef void(APIENTRYP PFNGLCLEARTEXIMAGEPROC)(GLuint texture, GLint level, GLenum format, GLenum type, const void* data); +typedef void(APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* data); +typedef void(APIENTRYP PFNGLBINDBUFFERSBASEPROC)(GLenum target, GLuint first, GLsizei count, const GLuint* buffers); +typedef void(APIENTRYP PFNGLBINDBUFFERSRANGEPROC)(GLenum target, GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets, const GLsizeiptr* sizes); +typedef void(APIENTRYP PFNGLBINDTEXTURESPROC)(GLuint first, GLsizei count, const GLuint* textures); +typedef void(APIENTRYP PFNGLBINDSAMPLERSPROC)(GLuint first, GLsizei count, const GLuint* samplers); +typedef void(APIENTRYP PFNGLBINDIMAGETEXTURESPROC)(GLuint first, GLsizei count, const GLuint* textures); +typedef void(APIENTRYP PFNGLBINDVERTEXBUFFERSPROC)(GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets, const GLsizei* strides); #endif /* GL_VERSION_4_4 */ #ifndef GL_VERSION_4_5 @@ -407,116 +407,116 @@ typedef void (APIENTRYP PFNGLBINDVERTEXBUFFERSPROC) (GLuint first, GLsizei count #define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004 #define GL_CONTEXT_RELEASE_BEHAVIOR 0x82FB #define GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH 0x82FC -typedef void (APIENTRYP PFNGLCLIPCONTROLPROC) (GLenum origin, GLenum depth); -typedef void (APIENTRYP PFNGLCREATETRANSFORMFEEDBACKSPROC) (GLsizei n, GLuint *ids); -typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC) (GLuint xfb, GLuint index, GLuint buffer); -typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC) (GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); -typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKIVPROC) (GLuint xfb, GLenum pname, GLint *param); -typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKI_VPROC) (GLuint xfb, GLenum pname, GLuint index, GLint *param); -typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKI64_VPROC) (GLuint xfb, GLenum pname, GLuint index, GLint64 *param); -typedef void (APIENTRYP PFNGLCREATEBUFFERSPROC) (GLsizei n, GLuint *buffers); -typedef void (APIENTRYP PFNGLNAMEDBUFFERSTORAGEPROC) (GLuint buffer, GLsizeiptr size, const void *data, GLbitfield flags); -typedef void (APIENTRYP PFNGLNAMEDBUFFERDATAPROC) (GLuint buffer, GLsizeiptr size, const void *data, GLenum usage); -typedef void (APIENTRYP PFNGLNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data); -typedef void (APIENTRYP PFNGLCOPYNAMEDBUFFERSUBDATAPROC) (GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); -typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERDATAPROC) (GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void *data); -typedef void (APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void *data); -typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERPROC) (GLuint buffer, GLenum access); -typedef void *(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access); -typedef GLboolean (APIENTRYP PFNGLUNMAPNAMEDBUFFERPROC) (GLuint buffer); -typedef void (APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC) (GLuint buffer, GLintptr offset, GLsizeiptr length); -typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVPROC) (GLuint buffer, GLenum pname, GLint *params); -typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERI64VPROC) (GLuint buffer, GLenum pname, GLint64 *params); -typedef void (APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVPROC) (GLuint buffer, GLenum pname, void **params); -typedef void (APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, void *data); -typedef void (APIENTRYP PFNGLCREATEFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC) (GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC) (GLuint framebuffer, GLenum pname, GLint param); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC) (GLuint framebuffer, GLenum buf); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC) (GLuint framebuffer, GLsizei n, const GLenum *bufs); -typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC) (GLuint framebuffer, GLenum src); -typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments); -typedef void (APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC) (GLuint framebuffer, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint *value); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint *value); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFVPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value); -typedef void (APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFIPROC) (GLuint framebuffer, GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); -typedef void (APIENTRYP PFNGLBLITNAMEDFRAMEBUFFERPROC) (GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); -typedef GLenum (APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC) (GLuint framebuffer, GLenum target); -typedef void (APIENTRYP PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC) (GLuint framebuffer, GLenum pname, GLint *param); -typedef void (APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC) (GLuint framebuffer, GLenum attachment, GLenum pname, GLint *params); -typedef void (APIENTRYP PFNGLCREATERENDERBUFFERSPROC) (GLsizei n, GLuint *renderbuffers); -typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEPROC) (GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC) (GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC) (GLuint renderbuffer, GLenum pname, GLint *params); -typedef void (APIENTRYP PFNGLCREATETEXTURESPROC) (GLenum target, GLsizei n, GLuint *textures); -typedef void (APIENTRYP PFNGLTEXTUREBUFFERPROC) (GLuint texture, GLenum internalformat, GLuint buffer); -typedef void (APIENTRYP PFNGLTEXTUREBUFFERRANGEPROC) (GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size); -typedef void (APIENTRYP PFNGLTEXTURESTORAGE1DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width); -typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DPROC) (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); -typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC) (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); -typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC) (GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); -typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels); -typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); -typedef void (APIENTRYP PFNGLTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels); -typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data); -typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); -typedef void (APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data); -typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DPROC) (GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); -typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFPROC) (GLuint texture, GLenum pname, GLfloat param); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERFVPROC) (GLuint texture, GLenum pname, const GLfloat *param); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIPROC) (GLuint texture, GLenum pname, GLint param); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIIVPROC) (GLuint texture, GLenum pname, const GLint *params); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIUIVPROC) (GLuint texture, GLenum pname, const GLuint *params); -typedef void (APIENTRYP PFNGLTEXTUREPARAMETERIVPROC) (GLuint texture, GLenum pname, const GLint *param); -typedef void (APIENTRYP PFNGLGENERATETEXTUREMIPMAPPROC) (GLuint texture); -typedef void (APIENTRYP PFNGLBINDTEXTUREUNITPROC) (GLuint unit, GLuint texture); -typedef void (APIENTRYP PFNGLGETTEXTUREIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC) (GLuint texture, GLint level, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVPROC) (GLuint texture, GLint level, GLenum pname, GLfloat *params); -typedef void (APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVPROC) (GLuint texture, GLint level, GLenum pname, GLint *params); -typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERFVPROC) (GLuint texture, GLenum pname, GLfloat *params); -typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIIVPROC) (GLuint texture, GLenum pname, GLint *params); -typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVPROC) (GLuint texture, GLenum pname, GLuint *params); -typedef void (APIENTRYP PFNGLGETTEXTUREPARAMETERIVPROC) (GLuint texture, GLenum pname, GLint *params); -typedef void (APIENTRYP PFNGLCREATEVERTEXARRAYSPROC) (GLsizei n, GLuint *arrays); -typedef void (APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index); -typedef void (APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBPROC) (GLuint vaobj, GLuint index); -typedef void (APIENTRYP PFNGLVERTEXARRAYELEMENTBUFFERPROC) (GLuint vaobj, GLuint buffer); -typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERPROC) (GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); -typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERSPROC) (GLuint vaobj, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizei *strides); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBBINDINGPROC) (GLuint vaobj, GLuint attribindex, GLuint bindingindex); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBIFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXARRAYATTRIBLFORMATPROC) (GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); -typedef void (APIENTRYP PFNGLVERTEXARRAYBINDINGDIVISORPROC) (GLuint vaobj, GLuint bindingindex, GLuint divisor); -typedef void (APIENTRYP PFNGLGETVERTEXARRAYIVPROC) (GLuint vaobj, GLenum pname, GLint *param); -typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXEDIVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint *param); -typedef void (APIENTRYP PFNGLGETVERTEXARRAYINDEXED64IVPROC) (GLuint vaobj, GLuint index, GLenum pname, GLint64 *param); -typedef void (APIENTRYP PFNGLCREATESAMPLERSPROC) (GLsizei n, GLuint *samplers); -typedef void (APIENTRYP PFNGLCREATEPROGRAMPIPELINESPROC) (GLsizei n, GLuint *pipelines); -typedef void (APIENTRYP PFNGLCREATEQUERIESPROC) (GLenum target, GLsizei n, GLuint *ids); -typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTI64VPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); -typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTIVPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); -typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTUI64VPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); -typedef void (APIENTRYP PFNGLGETQUERYBUFFEROBJECTUIVPROC) (GLuint id, GLuint buffer, GLenum pname, GLintptr offset); -typedef void (APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC) (GLbitfield barriers); -typedef void (APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void *pixels); -typedef GLenum (APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC) (void); -typedef void (APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint lod, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETNTEXIMAGEPROC) (GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels); -typedef void (APIENTRYP PFNGLGETNUNIFORMDVPROC) (GLuint program, GLint location, GLsizei bufSize, GLdouble *params); -typedef void (APIENTRYP PFNGLGETNUNIFORMFVPROC) (GLuint program, GLint location, GLsizei bufSize, GLfloat *params); -typedef void (APIENTRYP PFNGLGETNUNIFORMIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params); -typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC) (GLuint program, GLint location, GLsizei bufSize, GLuint *params); -typedef void (APIENTRYP PFNGLREADNPIXELSPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); -typedef void (APIENTRYP PFNGLTEXTUREBARRIERPROC) (void); +typedef void(APIENTRYP PFNGLCLIPCONTROLPROC)(GLenum origin, GLenum depth); +typedef void(APIENTRYP PFNGLCREATETRANSFORMFEEDBACKSPROC)(GLsizei n, GLuint* ids); +typedef void(APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC)(GLuint xfb, GLuint index, GLuint buffer); +typedef void(APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC)(GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKIVPROC)(GLuint xfb, GLenum pname, GLint* param); +typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKI_VPROC)(GLuint xfb, GLenum pname, GLuint index, GLint* param); +typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKI64_VPROC)(GLuint xfb, GLenum pname, GLuint index, GLint64* param); +typedef void(APIENTRYP PFNGLCREATEBUFFERSPROC)(GLsizei n, GLuint* buffers); +typedef void(APIENTRYP PFNGLNAMEDBUFFERSTORAGEPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLbitfield flags); +typedef void(APIENTRYP PFNGLNAMEDBUFFERDATAPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLenum usage); +typedef void(APIENTRYP PFNGLNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, const void* data); +typedef void(APIENTRYP PFNGLCOPYNAMEDBUFFERSUBDATAPROC)(GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERDATAPROC)(GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void* data); +typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type, const void* data); +typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFERPROC)(GLuint buffer, GLenum access); +typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access); +typedef GLboolean(APIENTRYP PFNGLUNMAPNAMEDBUFFERPROC)(GLuint buffer); +typedef void(APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length); +typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVPROC)(GLuint buffer, GLenum pname, GLint* params); +typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERI64VPROC)(GLuint buffer, GLenum pname, GLint64* params); +typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVPROC)(GLuint buffer, GLenum pname, void** params); +typedef void(APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, void* data); +typedef void(APIENTRYP PFNGLCREATEFRAMEBUFFERSPROC)(GLsizei n, GLuint* framebuffers); +typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC)(GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC)(GLuint framebuffer, GLenum pname, GLint param); +typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level); +typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer); +typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC)(GLuint framebuffer, GLenum buf); +typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC)(GLuint framebuffer, GLsizei n, const GLenum* bufs); +typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC)(GLuint framebuffer, GLenum src); +typedef void(APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC)(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments); +typedef void(APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC)(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERIVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint* value); +typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint* value); +typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat* value); +typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFIPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +typedef void(APIENTRYP PFNGLBLITNAMEDFRAMEBUFFERPROC)(GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +typedef GLenum(APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC)(GLuint framebuffer, GLenum target); +typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC)(GLuint framebuffer, GLenum pname, GLint* param); +typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLuint framebuffer, GLenum attachment, GLenum pname, GLint* params); +typedef void(APIENTRYP PFNGLCREATERENDERBUFFERSPROC)(GLsizei n, GLuint* renderbuffers); +typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEPROC)(GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height); +typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void(APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC)(GLuint renderbuffer, GLenum pname, GLint* params); +typedef void(APIENTRYP PFNGLCREATETEXTURESPROC)(GLenum target, GLsizei n, GLuint* textures); +typedef void(APIENTRYP PFNGLTEXTUREBUFFERPROC)(GLuint texture, GLenum internalformat, GLuint buffer); +typedef void(APIENTRYP PFNGLTEXTUREBUFFERRANGEPROC)(GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size); +typedef void(APIENTRYP PFNGLTEXTURESTORAGE1DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC)(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC)(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void* pixels); +typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void* pixels); +typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels); +typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void* data); +typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* data); +typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data); +typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFPROC)(GLuint texture, GLenum pname, GLfloat param); +typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFVPROC)(GLuint texture, GLenum pname, const GLfloat* param); +typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIPROC)(GLuint texture, GLenum pname, GLint param); +typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIIVPROC)(GLuint texture, GLenum pname, const GLint* params); +typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIUIVPROC)(GLuint texture, GLenum pname, const GLuint* params); +typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIVPROC)(GLuint texture, GLenum pname, const GLint* param); +typedef void(APIENTRYP PFNGLGENERATETEXTUREMIPMAPPROC)(GLuint texture); +typedef void(APIENTRYP PFNGLBINDTEXTUREUNITPROC)(GLuint unit, GLuint texture); +typedef void(APIENTRYP PFNGLGETTEXTUREIMAGEPROC)(GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels); +typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC)(GLuint texture, GLint level, GLsizei bufSize, void* pixels); +typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVPROC)(GLuint texture, GLint level, GLenum pname, GLfloat* params); +typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVPROC)(GLuint texture, GLint level, GLenum pname, GLint* params); +typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERFVPROC)(GLuint texture, GLenum pname, GLfloat* params); +typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIIVPROC)(GLuint texture, GLenum pname, GLint* params); +typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVPROC)(GLuint texture, GLenum pname, GLuint* params); +typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIVPROC)(GLuint texture, GLenum pname, GLint* params); +typedef void(APIENTRYP PFNGLCREATEVERTEXARRAYSPROC)(GLsizei n, GLuint* arrays); +typedef void(APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBPROC)(GLuint vaobj, GLuint index); +typedef void(APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBPROC)(GLuint vaobj, GLuint index); +typedef void(APIENTRYP PFNGLVERTEXARRAYELEMENTBUFFERPROC)(GLuint vaobj, GLuint buffer); +typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERPROC)(GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); +typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERSPROC)(GLuint vaobj, GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets, const GLsizei* strides); +typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBBINDINGPROC)(GLuint vaobj, GLuint attribindex, GLuint bindingindex); +typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); +typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBIFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBLFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +typedef void(APIENTRYP PFNGLVERTEXARRAYBINDINGDIVISORPROC)(GLuint vaobj, GLuint bindingindex, GLuint divisor); +typedef void(APIENTRYP PFNGLGETVERTEXARRAYIVPROC)(GLuint vaobj, GLenum pname, GLint* param); +typedef void(APIENTRYP PFNGLGETVERTEXARRAYINDEXEDIVPROC)(GLuint vaobj, GLuint index, GLenum pname, GLint* param); +typedef void(APIENTRYP PFNGLGETVERTEXARRAYINDEXED64IVPROC)(GLuint vaobj, GLuint index, GLenum pname, GLint64* param); +typedef void(APIENTRYP PFNGLCREATESAMPLERSPROC)(GLsizei n, GLuint* samplers); +typedef void(APIENTRYP PFNGLCREATEPROGRAMPIPELINESPROC)(GLsizei n, GLuint* pipelines); +typedef void(APIENTRYP PFNGLCREATEQUERIESPROC)(GLenum target, GLsizei n, GLuint* ids); +typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTI64VPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTIVPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTUI64VPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTUIVPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset); +typedef void(APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC)(GLbitfield barriers); +typedef void(APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void* pixels); +typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void* pixels); +typedef GLenum(APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC)(void); +typedef void(APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint lod, GLsizei bufSize, void* pixels); +typedef void(APIENTRYP PFNGLGETNTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels); +typedef void(APIENTRYP PFNGLGETNUNIFORMDVPROC)(GLuint program, GLint location, GLsizei bufSize, GLdouble* params); +typedef void(APIENTRYP PFNGLGETNUNIFORMFVPROC)(GLuint program, GLint location, GLsizei bufSize, GLfloat* params); +typedef void(APIENTRYP PFNGLGETNUNIFORMIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLint* params); +typedef void(APIENTRYP PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint* params); +typedef void(APIENTRYP PFNGLREADNPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void* data); +typedef void(APIENTRYP PFNGLTEXTUREBARRIERPROC)(void); #endif /* GL_VERSION_4_5 */ #ifndef GL_VERSION_4_6 @@ -543,10 +543,10 @@ typedef void (APIENTRYP PFNGLTEXTUREBARRIERPROC) (void); #define GL_MAX_TEXTURE_MAX_ANISOTROPY 0x84FF #define GL_TRANSFORM_FEEDBACK_OVERFLOW 0x82EC #define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW 0x82ED -typedef void (APIENTRYP PFNGLSPECIALIZESHADERPROC) (GLuint shader, const GLchar *pEntryPoint, GLuint numSpecializationConstants, const GLuint *pConstantIndex, const GLuint *pConstantValue); -typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC) (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); -typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC) (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); -typedef void (APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC) (GLfloat factor, GLfloat units, GLfloat clamp); +typedef void(APIENTRYP PFNGLSPECIALIZESHADERPROC)(GLuint shader, const GLchar* pEntryPoint, GLuint numSpecializationConstants, const GLuint* pConstantIndex, const GLuint* pConstantValue); +typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC)(GLenum mode, const void* indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC)(GLenum mode, GLenum type, const void* indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride); +typedef void(APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC)(GLfloat factor, GLfloat units, GLfloat clamp); #endif /* GL_VERSION_4_6 */ #ifdef __cplusplus diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanline.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanline.cpp index ec0f578075..7abe96e578 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanline.cpp +++ b/plugins/GSdx/Renderers/SW/GSDrawScanline.cpp @@ -39,7 +39,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data) { memcpy(&m_global, &((const SharedData*)data)->global, sizeof(m_global)); - if(m_global.sel.mmin && m_global.sel.lcm) + if (m_global.sel.mmin && m_global.sel.lcm) { #if defined(__GNUC__) && _M_SSE >= 0x501 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80286 @@ -54,7 +54,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data) int lod_x = m_global.lod.i.x0; GSVector4i v = m_global.t.minmax.srl16(lod_x); #else - GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.extract32<0>());//.x); + GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.extract32<0>()); //.x); #endif v = v.upl16(v); @@ -65,7 +65,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data) m_ds = m_ds_map[m_global.sel]; - if(m_global.sel.aa1) + if (m_global.sel.aa1) { GSScanlineSelector sel; @@ -80,7 +80,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data) m_de = NULL; } - if(m_global.sel.IsSolidRect()) + if (m_global.sel.IsSolidRect()) { m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect; } @@ -125,35 +125,35 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co bool has_t = sel.fb && sel.tfx != TFX_NONE; bool has_c = sel.fb && !(sel.tfx == TFX_DECAL && sel.tcc); - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 const GSVector8* shift = (GSVector8*)g_const->m_shift_256b; - if(has_z || has_f) + if (has_z || has_f) { - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]); - if(has_f) + if (has_f) { m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); GSVector8 df = GSVector8::broadcast32(&dscan.p.w); - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { m_local.d[i].f = GSVector8i(df * shift[1 + i]).xxzzlh(); } } - if(has_z) + if (has_z) { m_local.d8.p.z = dp8.extract32<2>(); GSVector8 dz = GSVector8::broadcast32(&dscan.p.z); - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { m_local.d[i].z = dz * shift[1 + i]; } @@ -161,23 +161,23 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co } else { - if(has_f) + if (has_f) { m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); } - if(has_z) + if (has_z) { m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w } } } - if(has_t) + if (has_t) { GSVector4 dt8 = dscan.t * GSVector4::broadcast32(&shift[0]); - if(sel.fst) + if (sel.fst) { m_local.d8.stq = GSVector4::cast(GSVector4i(dt8)); } @@ -188,45 +188,45 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co GSVector8 dt(dscan.t); - for(int j = 0, k = sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = sel.fst ? 2 : 3; j < k; j++) { GSVector8 dstq; - switch(j) + switch (j) { - case 0: dstq = dt.xxxx(); break; - case 1: dstq = dt.yyyy(); break; - case 2: dstq = dt.zzzz(); break; + case 0: dstq = dt.xxxx(); break; + case 1: dstq = dt.yyyy(); break; + case 2: dstq = dt.zzzz(); break; } - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { GSVector8 v = dstq * shift[1 + i]; - if(sel.fst) + if (sel.fst) { - switch(j) + switch (j) { - case 0: m_local.d[i].s = GSVector8::cast(GSVector8i(v)); break; - case 1: m_local.d[i].t = GSVector8::cast(GSVector8i(v)); break; + case 0: m_local.d[i].s = GSVector8::cast(GSVector8i(v)); break; + case 1: m_local.d[i].t = GSVector8::cast(GSVector8i(v)); break; } } else { - switch(j) + switch (j) { - case 0: m_local.d[i].s = v; break; - case 1: m_local.d[i].t = v; break; - case 2: m_local.d[i].q = v; break; + case 0: m_local.d[i].s = v; break; + case 1: m_local.d[i].t = v; break; + case 2: m_local.d[i].q = v; break; } } } } } - if(has_c) + if (has_c) { - if(sel.iip) + if (sel.iip) { GSVector4 dc8 = dscan.c * GSVector4::broadcast32(&shift[0]); @@ -237,7 +237,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co GSVector8 dr = dc.xxxx(); GSVector8 db = dc.zzzz(); - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); GSVector8i b = GSVector8i(db * shift[1 + i]).ps32(); @@ -248,7 +248,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co GSVector8 dg = dc.yyyy(); GSVector8 da = dc.wwww(); - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); @@ -259,53 +259,54 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co else { int last = 0; - - switch(sel.prim) + + switch (sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; } GSVector8i c = GSVector8i(GSVector8(vertex[index[last]].c)); c = c.upl16(c.zwxy()); - if(sel.tfx == TFX_NONE) c = c.srl16(7); + if (sel.tfx == TFX_NONE) + c = c.srl16(7); m_local.c.rb = c.xxxx(); m_local.c.ga = c.zzzz(); } } - #else +#else const GSVector4* shift = (GSVector4*)g_const->m_shift_128b; - if(has_z || has_f) + if (has_z || has_f) { - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { - if(has_f) + if (has_f) { GSVector4 df = dscan.p.wwww(); m_local.d4.f = GSVector4i(df * shift[0]).xxzzlh(); - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { m_local.d[i].f = GSVector4i(df * shift[1 + i]).xxzzlh(); } } - if(has_z) + if (has_z) { GSVector4 dz = dscan.p.zzzz(); m_local.d4.z = dz * shift[0]; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { m_local.d[i].z = dz * shift[1 + i]; } @@ -313,23 +314,23 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co } else { - if(has_f) + if (has_f) { m_local.p.f = GSVector4i(vertex[index[1]].p).zzzzh().zzzz(); } - if(has_z) + if (has_z) { m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w } } } - if(has_t) + if (has_t) { GSVector4 t = dscan.t; - if(sel.fst) + if (sel.fst) { m_local.d4.stq = GSVector4::cast(GSVector4i(t * shift[0])); } @@ -338,52 +339,52 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co m_local.d4.stq = t * shift[0]; } - for(int j = 0, k = sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = sel.fst ? 2 : 3; j < k; j++) { GSVector4 dstq; - switch(j) + switch (j) { - case 0: dstq = t.xxxx(); break; - case 1: dstq = t.yyyy(); break; - case 2: dstq = t.zzzz(); break; + case 0: dstq = t.xxxx(); break; + case 1: dstq = t.yyyy(); break; + case 2: dstq = t.zzzz(); break; } - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { GSVector4 v = dstq * shift[1 + i]; - if(sel.fst) + if (sel.fst) { - switch(j) + switch (j) { - case 0: m_local.d[i].s = GSVector4::cast(GSVector4i(v)); break; - case 1: m_local.d[i].t = GSVector4::cast(GSVector4i(v)); break; + case 0: m_local.d[i].s = GSVector4::cast(GSVector4i(v)); break; + case 1: m_local.d[i].t = GSVector4::cast(GSVector4i(v)); break; } } else { - switch(j) + switch (j) { - case 0: m_local.d[i].s = v; break; - case 1: m_local.d[i].t = v; break; - case 2: m_local.d[i].q = v; break; + case 0: m_local.d[i].s = v; break; + case 1: m_local.d[i].t = v; break; + case 2: m_local.d[i].q = v; break; } } } } } - if(has_c) + if (has_c) { - if(sel.iip) + if (sel.iip) { m_local.d4.c = GSVector4i(dscan.c * shift[0]).xzyw().ps32(); GSVector4 dr = dscan.c.xxxx(); GSVector4 db = dscan.c.zzzz(); - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { GSVector4i r = GSVector4i(dr * shift[1 + i]).ps32(); GSVector4i b = GSVector4i(db * shift[1 + i]).ps32(); @@ -394,7 +395,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co GSVector4 dg = dscan.c.yyyy(); GSVector4 da = dscan.c.wwww(); - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { GSVector4i g = GSVector4i(dg * shift[1 + i]).ps32(); GSVector4i a = GSVector4i(da * shift[1 + i]).ps32(); @@ -405,34 +406,35 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co else { int last = 0; - - switch(sel.prim) + + switch (sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; } GSVector4i c = GSVector4i(vertex[index[last]].c); c = c.upl16(c.zwxy()); - if(sel.tfx == TFX_NONE) c = c.srl16(7); + if (sel.tfx == TFX_NONE) + c = c.srl16(7); m_local.c.rb = c.xxxx(); m_local.c.ga = c.zzzz(); } } - #endif +#endif } void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) { GSScanlineSelector sel = m_global.sel; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector8i test; GSVector8 zo; @@ -446,7 +448,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS int skip, steps; - if(!sel.notest) + if (!sel.notest) { skip = left & 7; steps = pixels + skip - 8; @@ -458,46 +460,46 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS skip = 0; steps = pixels - 8; } - + ASSERT((left & 7) == 0); const GSVector2i* fza_base = &m_global.fzbr[top]; const GSVector2i* fza_offset = &m_global.fzbc[left >> 2]; - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { - if(sel.fwrite && sel.fge) + if (sel.fwrite && sel.fge) { f = GSVector8i::broadcast16(GSVector4i(scan.p).srl<12>()).add16(m_local.d[skip].f); } - if(sel.zb) + if (sel.zb) { zo = m_local.d[skip].z; } } - if(sel.fb) + if (sel.fb) { - if(sel.edge) + if (sel.edge) { cov = GSVector8i::broadcast16(GSVector4i::cast(scan.t).srl<12>()).srl16(9); } - if(sel.tfx != TFX_NONE) + if (sel.tfx != TFX_NONE) { - if(sel.fst) + if (sel.fst) { GSVector4i vt(scan.t); GSVector8i u = GSVector8i::broadcast32(vt.xxxx()) + GSVector8i::cast(m_local.d[skip].s); GSVector8i v = GSVector8i::broadcast32(vt.yyyy()); - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) + if (sel.prim != GS_SPRITE_CLASS || sel.mmin) { v += GSVector8i::cast(m_local.d[skip].t); } - else if(sel.ltf) + else if (sel.ltf) { vf = v.xxzzlh().srl16(12); } @@ -513,9 +515,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(!(sel.tfx == TFX_DECAL && sel.tcc)) + if (!(sel.tfx == TFX_DECAL && sel.tcc)) { - if(sel.iip) + if (sel.iip) { GSVector4i c(scan.c); @@ -532,7 +534,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - while(1) + while (1) { do { @@ -543,15 +545,15 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // TestZ - if(sel.zb) + if (sel.zb) { za = (fza_base->y + fza_offset->y) % HALF_VM_SIZE; - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { GSVector8 z = GSVector8::broadcast32(&scan.p.z) + zo; - if(sel.zoverflow) + if (sel.zoverflow) { zs = (GSVector8i(z * 0.5f) << 1) | (GSVector8i(z) & GSVector8i::x00000001()); } @@ -565,23 +567,23 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS zs = GSVector8i::broadcast32(&m_local.p.z); } - if(sel.ztest) + if (sel.ztest) { zd = GSVector8i::load( (uint8*)m_global.vm + za * 2, (uint8*)m_global.vm + za * 2 + 16, (uint8*)m_global.vm + za * 2 + 32, (uint8*)m_global.vm + za * 2 + 48); - switch(sel.zpsm) + switch (sel.zpsm) { - case 1: zd = zd.sll32(8).srl32(8); break; - case 2: zd = zd.sll32(16).srl32(16); break; - default: break; + case 1: zd = zd.sll32( 8).srl32( 8); break; + case 2: zd = zd.sll32(16).srl32(16); break; + default: break; } GSVector8i zso = zs; GSVector8i zdo = zd; - if(sel.zoverflow || sel.zpsm == 0) + if (sel.zoverflow || sel.zpsm == 0) { zso -= GSVector8i::x80000000(); zdo -= GSVector8i::x80000000(); @@ -590,19 +592,20 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS if (sel.zclamp) zso = zso.min_u32(GSVector8i::xffffffff().srl32(sel.zpsm * 8)); - switch(sel.ztst) + switch (sel.ztst) { - case ZTST_GEQUAL: test |= zso < zdo; break; + case ZTST_GEQUAL: test |= zso < zdo; break; case ZTST_GREATER: test |= zso <= zdo; break; } - if(test.alltrue()) continue; + if (test.alltrue()) + continue; } } // SampleTexture - if(sel.fb && sel.tfx != TFX_NONE) + if (sel.fb && sel.tfx != TFX_NONE) { GSVector8i u, v, uv[2]; GSVector8i lodi, lodf; @@ -610,9 +613,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS GSVector8i addr00, addr01, addr10, addr11; GSVector8i c00, c01, c10, c11; - if(sel.mmin) + if (sel.mmin) { - if(!sel.fst) + if (!sel.fst) { u = GSVector8i(s / q); v = GSVector8i(t / q); @@ -623,20 +626,20 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS v = GSVector8i::cast(t); } - if(!sel.lcm) + if (!sel.lcm) { GSVector8 tmp = q.log2(3) * m_global.l + m_global.k; // (-log2(Q) * (1 << L) + K) * 0x10000 - + GSVector8i lod = GSVector8i(tmp.sat(GSVector8::zero(), m_global.mxl), false); - if(sel.mmin == 1) // round-off mode + if (sel.mmin == 1) // round-off mode { lod += 0x8000; } lodi = lod.srl32(16); - if(sel.mmin == 2) // trilinear mode + if (sel.mmin == 2) // trilinear mode { lodf = lod.xxzzlh(); } @@ -674,7 +677,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS maxuv = m_local.temp.uv_minmax[1]; } - if(sel.ltf) + if (sel.ltf) { u -= 0x8000; v -= 0x8000; @@ -689,24 +692,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { GSVector8i repeat = (uv0 & minuv) | maxuv; GSVector8i clamp = uv0.sat_i16(minuv, maxuv); - + uv0 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); } - if(sel.ltf) + if (sel.ltf) { uv1 = uv1.add16(GSVector8i::x0001()); GSVector8i repeat = (uv1 & minuv) | maxuv; GSVector8i clamp = uv1.sat_i16(minuv, maxuv); - + uv1 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); } GSVector8i y0 = uv0.uph16() << (sel.tw + 3); GSVector8i x0 = uv0.upl16(); - if(sel.ltf) + if (sel.ltf) { GSVector8i y1 = uv1.uph16() << (sel.tw + 3); GSVector8i x1 = uv1.upl16(); @@ -716,9 +719,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS addr10 = y1 + x0; addr11 = y1 + x1; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; @@ -730,7 +733,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; @@ -740,7 +743,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS c11.u32[i] = tex[addr11.u32[i]]; } } - + GSVector8i rb00 = c00.sll16(8).srl16(8); GSVector8i ga00 = c00.srl16(8); GSVector8i rb01 = c01.sll16(8).srl16(8); @@ -764,16 +767,16 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { addr00 = y0 + x0; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; } } else { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; } @@ -783,7 +786,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS ga = c00.srl16(8); } - if(sel.mmin != 1) // !round-off mode + if (sel.mmin != 1) // !round-off mode { GSVector8i rb2, ga2; @@ -795,7 +798,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS minuv = minuv.srl16(1); maxuv = maxuv.srl16(1); - if(sel.ltf) + if (sel.ltf) { u -= 0x8000; v -= 0x8000; @@ -810,24 +813,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { GSVector8i repeat = (uv0 & minuv) | maxuv; GSVector8i clamp = uv0.sat_i16(minuv, maxuv); - + uv0 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); } - if(sel.ltf) + if (sel.ltf) { uv1 = uv1.add16(GSVector8i::x0001()); GSVector8i repeat = (uv1 & minuv) | maxuv; GSVector8i clamp = uv1.sat_i16(minuv, maxuv); - + uv1 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); } GSVector8i y0 = uv0.uph16() << (sel.tw + 3); GSVector8i x0 = uv0.upl16(); - if(sel.ltf) + if (sel.ltf) { GSVector8i y1 = uv1.uph16() << (sel.tw + 3); GSVector8i x1 = uv1.upl16(); @@ -837,9 +840,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS addr10 = y1 + x0; addr11 = y1 + x1; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; @@ -851,7 +854,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; @@ -861,7 +864,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS c11.u32[i] = tex[addr11.u32[i]]; } } - + GSVector8i rb00 = c00.sll16(8).srl16(8); GSVector8i ga00 = c00.srl16(8); GSVector8i rb01 = c01.sll16(8).srl16(8); @@ -885,16 +888,16 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { addr00 = y0 + x0; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; } } else { - for(int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) { c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; } @@ -904,7 +907,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS ga2 = c00.srl16(8); } - if(sel.lcm) lodf = m_global.lod.f; + if (sel.lcm) + lodf = m_global.lod.f; lodf = lodf.srl16(1); @@ -914,12 +918,12 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(!sel.fst) + if (!sel.fst) { u = GSVector8i(s / q); v = GSVector8i(t / q); - if(sel.ltf) + if (sel.ltf) { u -= 0x8000; v -= 0x8000; @@ -931,11 +935,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS v = GSVector8i::cast(t); } - if(sel.ltf) + if (sel.ltf) { uf = u.xxzzlh().srl16(12); - - if(sel.prim != GS_SPRITE_CLASS) + + if (sel.prim != GS_SPRITE_CLASS) { vf = v.xxzzlh().srl16(12); } @@ -950,24 +954,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { GSVector8i repeat = (uv0 & tmin) | tmax; GSVector8i clamp = uv0.sat_i16(tmin, tmax); - + uv0 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); } - if(sel.ltf) + if (sel.ltf) { uv1 = uv1.add16(GSVector8i::x0001()); GSVector8i repeat = (uv1 & tmin) | tmax; GSVector8i clamp = uv1.sat_i16(tmin, tmax); - + uv1 = clamp.blend8(repeat, GSVector8i::broadcast128(m_global.t.mask)); } GSVector8i y0 = uv0.uph16() << (sel.tw + 3); GSVector8i x0 = uv0.upl16(); - if(sel.ltf) + if (sel.ltf) { GSVector8i y1 = uv1.uph16() << (sel.tw + 3); GSVector8i x1 = uv1.upl16(); @@ -977,7 +981,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS addr10 = y1 + x0; addr11 = y1 + x1; - if(sel.tlu) + if (sel.tlu) { const uint8* tex = (const uint8*)m_global.tex[0]; @@ -995,7 +999,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS c10 = addr10.gather32_32(tex); c11 = addr11.gather32_32(tex); } - + GSVector8i rb00 = c00.sll16(8).srl16(8); GSVector8i ga00 = c00.srl16(8); GSVector8i rb01 = c01.sll16(8).srl16(8); @@ -1019,7 +1023,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { addr00 = y0 + x0; - if(sel.tlu) + if (sel.tlu) { c00 = addr00.gather32_32((const uint8*)m_global.tex[0], m_global.clut); } @@ -1036,35 +1040,38 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // AlphaTFX - if(sel.fb) + if (sel.fb) { - switch(sel.tfx) + switch (sel.tfx) { - case TFX_MODULATE: - ga = ga.modulate16<1>(gaf).clamp8(); - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_DECAL: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_HIGHLIGHT: - ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); - break; - case TFX_HIGHLIGHT2: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_NONE: - ga = sel.iip ? gaf.srl16(7) : gaf; - break; + case TFX_MODULATE: + ga = ga.modulate16<1>(gaf).clamp8(); + if (!sel.tcc) + ga = ga.mix16(gaf.srl16(7)); + break; + case TFX_DECAL: + if (!sel.tcc) + ga = ga.mix16(gaf.srl16(7)); + break; + case TFX_HIGHLIGHT: + ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); + break; + case TFX_HIGHLIGHT2: + if (!sel.tcc) + ga = ga.mix16(gaf.srl16(7)); + break; + case TFX_NONE: + ga = sel.iip ? gaf.srl16(7) : gaf; + break; } - if(sel.aa1) + if (sel.aa1) { GSVector8i x00800080(0x00800080); GSVector8i a = sel.edge ? cov : x00800080; - if(!sel.abe) + if (!sel.abe) { ga = ga.mix16(a); } @@ -1077,48 +1084,49 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // ReadMask - if(sel.fwrite) + if (sel.fwrite) { fm = m_global.fm; } - if(sel.zwrite) + if (sel.zwrite) { zm = m_global.zm; } // TestAlpha - if(!TestAlpha(test, fm, zm, ga)) continue; + if (!TestAlpha(test, fm, zm, ga)) + continue; // ColorTFX - if(sel.fwrite) + if (sel.fwrite) { GSVector8i af; - switch(sel.tfx) + switch (sel.tfx) { - case TFX_MODULATE: - rb = rb.modulate16<1>(rbf).clamp8(); - break; - case TFX_DECAL: - break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - af = gaf.yywwlh().srl16(7); - rb = rb.modulate16<1>(rbf).add16(af).clamp8(); - ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga); - break; - case TFX_NONE: - rb = sel.iip ? rbf.srl16(7) : rbf; - break; + case TFX_MODULATE: + rb = rb.modulate16<1>(rbf).clamp8(); + break; + case TFX_DECAL: + break; + case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: + af = gaf.yywwlh().srl16(7); + rb = rb.modulate16<1>(rbf).add16(af).clamp8(); + ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga); + break; + case TFX_NONE: + rb = sel.iip ? rbf.srl16(7) : rbf; + break; } } // Fog - if(sel.fwrite && sel.fge) + if (sel.fwrite && sel.fge) { GSVector8i fog = sel.prim != GS_SPRITE_CLASS ? f : GSVector8i::broadcast16(&m_local.p.f); @@ -1140,11 +1148,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // ReadFrame - if(sel.fb) + if (sel.fb) { fa = (fza_base->x + fza_offset->x) % HALF_VM_SIZE; - if(sel.rfb) + if (sel.rfb) { fd = GSVector8i::load( (uint8*)m_global.vm + fa * 2, (uint8*)m_global.vm + fa * 2 + 16, @@ -1154,11 +1162,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // TestDestAlpha - if(sel.date && (sel.fpsm == 0 || sel.fpsm == 2)) + if (sel.date && (sel.fpsm == 0 || sel.fpsm == 2)) { - if(sel.datm) + if (sel.datm) { - if(sel.fpsm == 2) + if (sel.fpsm == 2) { // test |= fd.srl32(15) == GSVector8i::zero(); test |= fd.sll32(16).sra32(31) == GSVector8i::zero(); @@ -1170,7 +1178,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(sel.fpsm == 2) + if (sel.fpsm == 2) { test |= fd.sll32(16).sra32(31); // == GSVector8i::xffffffff(); } @@ -1180,34 +1188,35 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(test.alltrue()) continue; + if (test.alltrue()) + continue; } // WriteMask int fzm = 0; - if(!sel.notest) + if (!sel.notest) { - if(sel.fwrite) + if (sel.fwrite) { fm |= test; } - if(sel.zwrite) + if (sel.zwrite) { zm |= test; } - if(sel.fwrite && sel.zwrite) + if (sel.fwrite && sel.zwrite) { fzm = ~(fm == GSVector8i::xffffffff()).ps32(zm == GSVector8i::xffffffff()).mask(); } - else if(sel.fwrite) + else if (sel.fwrite) { fzm = ~(fm == GSVector8i::xffffffff()).ps32().mask(); } - else if(sel.zwrite) + else if (sel.zwrite) { fzm = ~(zm == GSVector8i::xffffffff()).ps32().mask(); } @@ -1215,9 +1224,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // WriteZBuf - if(sel.zwrite) + if (sel.zwrite) { - if(sel.ztest && sel.zpsm < 2) + if (sel.ztest && sel.zpsm < 2) { zs = zs.blend8(zd, zm); } @@ -1227,9 +1236,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest; - if(sel.notest) + if (sel.notest) { - if(fast) + if (fast) { GSVector4i::storel((uint8*)m_global.vm + za * 2, zs.extract<0>()); GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs.extract<0>()); @@ -1250,146 +1259,146 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(fast) + if (fast) { - if(fzm & 0x00000f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs.extract<0>()); - if(fzm & 0x0000f000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs.extract<0>()); - if(fzm & 0x0f000000) GSVector4i::storel((uint8*)m_global.vm + za * 2 + 32, zs.extract<1>()); - if(fzm & 0xf0000000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 48, zs.extract<1>()); + if (fzm & 0x00000f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs.extract<0>()); + if (fzm & 0x0000f000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs.extract<0>()); + if (fzm & 0x0f000000) GSVector4i::storel((uint8*)m_global.vm + za * 2 + 32, zs.extract<1>()); + if (fzm & 0xf0000000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 48, zs.extract<1>()); } else { - if(fzm & 0x00000300) WritePixel(zs, za, 0, sel.zpsm); - if(fzm & 0x00000c00) WritePixel(zs, za, 1, sel.zpsm); - if(fzm & 0x00003000) WritePixel(zs, za, 2, sel.zpsm); - if(fzm & 0x0000c000) WritePixel(zs, za, 3, sel.zpsm); - if(fzm & 0x03000000) WritePixel(zs, za, 4, sel.zpsm); - if(fzm & 0x0c000000) WritePixel(zs, za, 5, sel.zpsm); - if(fzm & 0x30000000) WritePixel(zs, za, 6, sel.zpsm); - if(fzm & 0xc0000000) WritePixel(zs, za, 7, sel.zpsm); + if (fzm & 0x00000300) WritePixel(zs, za, 0, sel.zpsm); + if (fzm & 0x00000c00) WritePixel(zs, za, 1, sel.zpsm); + if (fzm & 0x00003000) WritePixel(zs, za, 2, sel.zpsm); + if (fzm & 0x0000c000) WritePixel(zs, za, 3, sel.zpsm); + if (fzm & 0x03000000) WritePixel(zs, za, 4, sel.zpsm); + if (fzm & 0x0c000000) WritePixel(zs, za, 5, sel.zpsm); + if (fzm & 0x30000000) WritePixel(zs, za, 6, sel.zpsm); + if (fzm & 0xc0000000) WritePixel(zs, za, 7, sel.zpsm); } } } // AlphaBlend - if(sel.fwrite && (sel.abe || sel.aa1)) + if (sel.fwrite && (sel.abe || sel.aa1)) { GSVector8i rbs = rb, gas = ga, rbd, gad, a, mask; - if(sel.aba != sel.abb && (sel.aba == 1 || sel.abb == 1 || sel.abc == 1) || sel.abd == 1) + if (sel.aba != sel.abb && (sel.aba == 1 || sel.abb == 1 || sel.abc == 1) || sel.abd == 1) { - switch(sel.fpsm) + switch (sel.fpsm) { - case 0: - case 1: - rbd = fd.sll16(8).srl16(8); - gad = fd.srl16(8); - break; - case 2: - rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - gad = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - break; + case 0: + case 1: + rbd = fd.sll16(8).srl16(8); + gad = fd.srl16(8); + break; + case 2: + rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); + gad = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); + break; } } - if(sel.aba != sel.abb) + if (sel.aba != sel.abb) { switch(sel.aba) { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector8i::zero(); break; + case 0: break; + case 1: rb = rbd; break; + case 2: rb = GSVector8i::zero(); break; } switch(sel.abb) { - case 0: rb = rb.sub16(rbs); break; - case 1: rb = rb.sub16(rbd); break; - case 2: break; + case 0: rb = rb.sub16(rbs); break; + case 1: rb = rb.sub16(rbd); break; + case 2: break; } if(!(sel.fpsm == 1 && sel.abc == 1)) { switch(sel.abc) { - case 0: a = gas.yywwlh().sll16(7); break; - case 1: a = gad.yywwlh().sll16(7); break; - case 2: a = m_global.afix; break; + case 0: a = gas.yywwlh().sll16(7); break; + case 1: a = gad.yywwlh().sll16(7); break; + case 2: a = m_global.afix; break; } rb = rb.modulate16<1>(a); } - switch(sel.abd) + switch (sel.abd) { - case 0: rb = rb.add16(rbs); break; - case 1: rb = rb.add16(rbd); break; - case 2: break; + case 0: rb = rb.add16(rbs); break; + case 1: rb = rb.add16(rbd); break; + case 2: break; } } else { - switch(sel.abd) + switch (sel.abd) { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector8i::zero(); break; + case 0: break; + case 1: rb = rbd; break; + case 2: rb = GSVector8i::zero(); break; } } - if(sel.pabe) + if (sel.pabe) { mask = (gas << 8).sra32(31); - + rb = rbs.blend8(rb, mask); } - - if(sel.aba != sel.abb) + + if (sel.aba != sel.abb) { switch(sel.aba) { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector8i::zero(); break; + case 0: break; + case 1: ga = gad; break; + case 2: ga = GSVector8i::zero(); break; } switch(sel.abb) { - case 0: ga = ga.sub16(gas); break; - case 1: ga = ga.sub16(gad); break; - case 2: break; + case 0: ga = ga.sub16(gas); break; + case 1: ga = ga.sub16(gad); break; + case 2: break; } - if(!(sel.fpsm == 1 && sel.abc == 1)) + if (!(sel.fpsm == 1 && sel.abc == 1)) { ga = ga.modulate16<1>(a); } - switch(sel.abd) + switch (sel.abd) { - case 0: ga = ga.add16(gas); break; - case 1: ga = ga.add16(gad); break; - case 2: break; + case 0: ga = ga.add16(gas); break; + case 1: ga = ga.add16(gad); break; + case 2: break; } } else { - switch(sel.abd) + switch (sel.abd) { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector8i::zero(); break; + case 0: break; + case 1: ga = gad; break; + case 2: ga = GSVector8i::zero(); break; } } - if(sel.pabe) + if (sel.pabe) { ga = gas.blend8(ga, mask >> 16); } else { - if(sel.fpsm != 1) + if (sel.fpsm != 1) { ga = ga.mix16(gas); } @@ -1398,9 +1407,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // WriteFrame - if(sel.fwrite) + if (sel.fwrite) { - if(sel.fpsm == 2 && sel.dthe) + if (sel.fpsm == 2 && sel.dthe) { int y = (top & 3) << 1; @@ -1408,7 +1417,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS ga = ga.add16(GSVector8i::broadcast128(m_global.dimx[1 + y])); } - if(sel.colclamp == 0) + if (sel.colclamp == 0) { rb &= GSVector8i::x00ff(); ga &= GSVector8i::x00ff(); @@ -1416,12 +1425,12 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS GSVector8i fs = rb.upl16(ga).pu16(rb.uph16(ga)); - if(sel.fba && sel.fpsm != 1) + if (sel.fba && sel.fpsm != 1) { fs |= GSVector8i::x80000000(); } - if(sel.fpsm == 2) + if (sel.fpsm == 2) { GSVector8i rb = fs & 0x00f800f8; GSVector8i ga = fs & 0x8000f800; @@ -1429,16 +1438,16 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); } - if(sel.rfb) + if (sel.rfb) { fs = fs.blend(fd, fm); } bool fast = sel.rfb ? sel.fpsm < 2 : sel.fpsm == 0 && sel.notest; - if(sel.notest) + if (sel.notest) { - if(fast) + if (fast) { GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs.extract<0>()); GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs.extract<0>()); @@ -1459,63 +1468,64 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(fast) + if (fast) { - if(fzm & 0x0000000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs.extract<0>()); - if(fzm & 0x000000f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs.extract<0>()); - if(fzm & 0x000f0000) GSVector4i::storel((uint8*)m_global.vm + fa * 2 + 32, fs.extract<1>()); - if(fzm & 0x00f00000) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 48, fs.extract<1>()); + if (fzm & 0x0000000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs.extract<0>()); + if (fzm & 0x000000f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs.extract<0>()); + if (fzm & 0x000f0000) GSVector4i::storel((uint8*)m_global.vm + fa * 2 + 32, fs.extract<1>()); + if (fzm & 0x00f00000) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 48, fs.extract<1>()); } else { - if(fzm & 0x00000003) WritePixel(fs, fa, 0, sel.fpsm); - if(fzm & 0x0000000c) WritePixel(fs, fa, 1, sel.fpsm); - if(fzm & 0x00000030) WritePixel(fs, fa, 2, sel.fpsm); - if(fzm & 0x000000c0) WritePixel(fs, fa, 3, sel.fpsm); - if(fzm & 0x00030000) WritePixel(fs, fa, 4, sel.fpsm); - if(fzm & 0x000c0000) WritePixel(fs, fa, 5, sel.fpsm); - if(fzm & 0x00300000) WritePixel(fs, fa, 6, sel.fpsm); - if(fzm & 0x00c00000) WritePixel(fs, fa, 7, sel.fpsm); + if (fzm & 0x00000003) WritePixel(fs, fa, 0, sel.fpsm); + if (fzm & 0x0000000c) WritePixel(fs, fa, 1, sel.fpsm); + if (fzm & 0x00000030) WritePixel(fs, fa, 2, sel.fpsm); + if (fzm & 0x000000c0) WritePixel(fs, fa, 3, sel.fpsm); + if (fzm & 0x00030000) WritePixel(fs, fa, 4, sel.fpsm); + if (fzm & 0x000c0000) WritePixel(fs, fa, 5, sel.fpsm); + if (fzm & 0x00300000) WritePixel(fs, fa, 6, sel.fpsm); + if (fzm & 0x00c00000) WritePixel(fs, fa, 7, sel.fpsm); } } } - } - while(0); + } while (0); - if(sel.edge) break; + if (sel.edge) + break; - if(steps <= 0) break; + if (steps <= 0) + break; // Step - + steps -= 8; fza_offset += 2; - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { - if(sel.zb) + if (sel.zb) { zo += GSVector8::broadcast32(&m_local.d8.p.z); } - if(sel.fwrite && sel.fge) + if (sel.fwrite && sel.fge) { f = f.add16(GSVector8i::broadcast16(&m_local.d8.p.f)); } } - if(sel.fb) + if (sel.fb) { - if(sel.tfx != TFX_NONE) + if (sel.tfx != TFX_NONE) { - if(sel.fst) + if (sel.fst) { GSVector8i stq = GSVector8i::cast(GSVector8(m_local.d8.stq)); s = GSVector8::cast(GSVector8i::cast(s) + stq.xxxx()); - - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) + + if (sel.prim != GS_SPRITE_CLASS || sel.mmin) { t = GSVector8::cast(GSVector8i::cast(t) + stq.yyyy()); } @@ -1531,9 +1541,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(!(sel.tfx == TFX_DECAL && sel.tcc)) + if (!(sel.tfx == TFX_DECAL && sel.tcc)) { - if(sel.iip) + if (sel.iip) { GSVector8i c = GSVector8i::broadcast64(&m_local.d8.c); @@ -1542,13 +1552,13 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(!sel.notest) + if (!sel.notest) { test = GSVector8i::i8to32c(g_const->m_test_256b[15 + (steps & (steps >> 31))]); } } - #else +#else const GSVector4i* const_test = (GSVector4i*)g_const->m_test_128b; GSVector4i test; @@ -1563,7 +1573,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS int skip, steps; - if(!sel.notest) + if (!sel.notest) { skip = left & 3; steps = pixels + skip - 4; @@ -1581,40 +1591,40 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS const GSVector2i* fza_base = &m_global.fzbr[top]; const GSVector2i* fza_offset = &m_global.fzbc[left >> 2]; - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { - if(sel.fwrite && sel.fge) + if (sel.fwrite && sel.fge) { f = GSVector4i(scan.p).zzzzh().zzzz().add16(m_local.d[skip].f); } - if(sel.zb) + if (sel.zb) { zo = m_local.d[skip].z; } } - if(sel.fb) + if (sel.fb) { - if(sel.edge) + if (sel.edge) { cov = GSVector4i::cast(scan.t).zzzzh().wwww().srl16(9); } - if(sel.tfx != TFX_NONE) + if (sel.tfx != TFX_NONE) { - if(sel.fst) + if (sel.fst) { GSVector4i vt(scan.t); GSVector4i u = vt.xxxx() + GSVector4i::cast(m_local.d[skip].s); - GSVector4i v = vt.yyyy(); - - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) + GSVector4i v = vt.yyyy(); + + if (sel.prim != GS_SPRITE_CLASS || sel.mmin) { v += GSVector4i::cast(m_local.d[skip].t); } - else if(sel.ltf) + else if (sel.ltf) { vf = v.xxzzlh().srl16(12); } @@ -1630,9 +1640,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(!(sel.tfx == TFX_DECAL && sel.tcc)) + if (!(sel.tfx == TFX_DECAL && sel.tcc)) { - if(sel.iip) + if (sel.iip) { GSVector4i c(scan.c); @@ -1649,7 +1659,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - while(1) + while (1) { do { @@ -1660,15 +1670,15 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // TestZ - if(sel.zb) + if (sel.zb) { za = (fza_base->y + fza_offset->y) % HALF_VM_SIZE; - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { GSVector4 z = scan.p.zzzz() + zo; - if(sel.zoverflow) + if (sel.zoverflow) { zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); } @@ -1682,21 +1692,21 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS zs = m_local.p.z; } - if(sel.ztest) + if (sel.ztest) { zd = GSVector4i::load((uint8*)m_global.vm + za * 2, (uint8*)m_global.vm + za * 2 + 16); - switch(sel.zpsm) + switch (sel.zpsm) { - case 1: zd = zd.sll32(8).srl32(8); break; - case 2: zd = zd.sll32(16).srl32(16); break; - default: break; + case 1: zd = zd.sll32( 8).srl32( 8); break; + case 2: zd = zd.sll32(16).srl32(16); break; + default: break; } GSVector4i zso = zs; GSVector4i zdo = zd; - if(sel.zoverflow || sel.zpsm == 0) + if (sel.zoverflow || sel.zpsm == 0) { zso -= GSVector4i::x80000000(); zdo -= GSVector4i::x80000000(); @@ -1712,19 +1722,20 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS zso.u32[3] = std::min(z_max, zso.u32[3]); } - switch(sel.ztst) + switch (sel.ztst) { - case ZTST_GEQUAL: test |= zso < zdo; break; - case ZTST_GREATER: test |= zso <= zdo; break; + case ZTST_GEQUAL: test |= zso < zdo; break; + case ZTST_GREATER: test |= zso <= zdo; break; } - if(test.alltrue()) continue; + if (test.alltrue()) + continue; } } // SampleTexture - if(sel.fb && sel.tfx != TFX_NONE) + if (sel.fb && sel.tfx != TFX_NONE) { GSVector4i u, v, uv[2]; GSVector4i lodi, lodf; @@ -1732,9 +1743,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS GSVector4i addr00, addr01, addr10, addr11; GSVector4i c00, c01, c10, c11; - if(sel.mmin) + if (sel.mmin) { - if(!sel.fst) + if (!sel.fst) { u = GSVector4i(s / q); v = GSVector4i(t / q); @@ -1745,20 +1756,20 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS v = GSVector4i::cast(t); } - if(!sel.lcm) + if (!sel.lcm) { GSVector4 tmp = q.log2(3) * m_global.l + m_global.k; // (-log2(Q) * (1 << L) + K) * 0x10000 - + GSVector4i lod = GSVector4i(tmp.sat(GSVector4::zero(), m_global.mxl), false); - if(sel.mmin == 1) // round-off mode + if (sel.mmin == 1) // round-off mode { lod += 0x8000; } lodi = lod.srl32(16); - if(sel.mmin == 2) // trilinear mode + if (sel.mmin == 2) // trilinear mode { lodf = lod.xxzzlh(); } @@ -1767,7 +1778,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS GSVector4i aabb = u.upl32(v); GSVector4i ccdd = u.uph32(v); - + GSVector4i aaxx = aabb.sra32(lodi.x); GSVector4i xxbb = aabb.sra32(lodi.y); GSVector4i ccxx = ccdd.sra32(lodi.z); @@ -1778,7 +1789,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS u = acac.upl32(bdbd); v = acac.uph32(bdbd); - + uv[0] = u; uv[1] = v; @@ -1809,7 +1820,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS maxuv = m_local.temp.uv_minmax[1]; } - if(sel.ltf) + if (sel.ltf) { u -= 0x8000; v -= 0x8000; @@ -1824,24 +1835,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { GSVector4i repeat = (uv0 & minuv) | maxuv; GSVector4i clamp = uv0.sat_i16(minuv, maxuv); - + uv0 = clamp.blend8(repeat, m_global.t.mask); } - if(sel.ltf) + if (sel.ltf) { uv1 = uv1.add16(GSVector4i::x0001()); GSVector4i repeat = (uv1 & minuv) | maxuv; GSVector4i clamp = uv1.sat_i16(minuv, maxuv); - + uv1 = clamp.blend8(repeat, m_global.t.mask); } GSVector4i y0 = uv0.uph16() << (sel.tw + 3); GSVector4i x0 = uv0.upl16(); - if(sel.ltf) + if (sel.ltf) { GSVector4i y1 = uv1.uph16() << (sel.tw + 3); GSVector4i x1 = uv1.upl16(); @@ -1851,9 +1862,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS addr10 = y1 + x0; addr11 = y1 + x1; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; @@ -1865,7 +1876,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; @@ -1875,7 +1886,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS c11.u32[i] = tex[addr11.u32[i]]; } } - + GSVector4i rb00 = c00.sll16(8).srl16(8); GSVector4i ga00 = c00.srl16(8); GSVector4i rb01 = c01.sll16(8).srl16(8); @@ -1899,16 +1910,16 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { addr00 = y0 + x0; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; } } else { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; } @@ -1918,7 +1929,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS ga = c00.srl16(8); } - if(sel.mmin != 1) // !round-off mode + if (sel.mmin != 1) // !round-off mode { GSVector4i rb2, ga2; @@ -1930,7 +1941,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS minuv = minuv.srl16(1); maxuv = maxuv.srl16(1); - if(sel.ltf) + if (sel.ltf) { u -= 0x8000; v -= 0x8000; @@ -1945,24 +1956,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { GSVector4i repeat = (uv0 & minuv) | maxuv; GSVector4i clamp = uv0.sat_i16(minuv, maxuv); - + uv0 = clamp.blend8(repeat, m_global.t.mask); } - if(sel.ltf) + if (sel.ltf) { uv1 = uv1.add16(GSVector4i::x0001()); GSVector4i repeat = (uv1 & minuv) | maxuv; GSVector4i clamp = uv1.sat_i16(minuv, maxuv); - + uv1 = clamp.blend8(repeat, m_global.t.mask); } GSVector4i y0 = uv0.uph16() << (sel.tw + 3); GSVector4i x0 = uv0.upl16(); - if(sel.ltf) + if (sel.ltf) { GSVector4i y1 = uv1.uph16() << (sel.tw + 3); GSVector4i x1 = uv1.upl16(); @@ -1972,9 +1983,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS addr10 = y1 + x0; addr11 = y1 + x1; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]]; @@ -1986,7 +1997,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]]; @@ -1996,7 +2007,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS c11.u32[i] = tex[addr11.u32[i]]; } } - + GSVector4i rb00 = c00.sll16(8).srl16(8); GSVector4i ga00 = c00.srl16(8); GSVector4i rb01 = c01.sll16(8).srl16(8); @@ -2020,16 +2031,16 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { addr00 = y0 + x0; - if(sel.tlu) + if (sel.tlu) { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]]; } } else { - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]; } @@ -2039,7 +2050,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS ga2 = c00.srl16(8); } - if(sel.lcm) lodf = m_global.lod.f; + if (sel.lcm) + lodf = m_global.lod.f; lodf = lodf.srl16(1); @@ -2049,12 +2061,12 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(!sel.fst) + if (!sel.fst) { u = GSVector4i(s / q); v = GSVector4i(t / q); - if(sel.ltf) + if (sel.ltf) { u -= 0x8000; v -= 0x8000; @@ -2066,11 +2078,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS v = GSVector4i::cast(t); } - if(sel.ltf) + if (sel.ltf) { uf = u.xxzzlh().srl16(12); - - if(sel.prim != GS_SPRITE_CLASS) + + if (sel.prim != GS_SPRITE_CLASS) { vf = v.xxzzlh().srl16(12); } @@ -2082,24 +2094,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { GSVector4i repeat = (uv0 & m_global.t.min) | m_global.t.max; GSVector4i clamp = uv0.sat_i16(m_global.t.min, m_global.t.max); - + uv0 = clamp.blend8(repeat, m_global.t.mask); } - if(sel.ltf) + if (sel.ltf) { uv1 = uv1.add16(GSVector4i::x0001()); GSVector4i repeat = (uv1 & m_global.t.min) | m_global.t.max; GSVector4i clamp = uv1.sat_i16(m_global.t.min, m_global.t.max); - + uv1 = clamp.blend8(repeat, m_global.t.mask); } GSVector4i y0 = uv0.uph16() << (sel.tw + 3); GSVector4i x0 = uv0.upl16(); - if(sel.ltf) + if (sel.ltf) { GSVector4i y1 = uv1.uph16() << (sel.tw + 3); GSVector4i x1 = uv1.upl16(); @@ -2109,7 +2121,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS addr10 = y1 + x0; addr11 = y1 + x1; - if(sel.tlu) + if (sel.tlu) { const uint8* tex = (const uint8*)m_global.tex[0]; @@ -2127,7 +2139,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS c10 = addr10.gather32_32(tex); c11 = addr11.gather32_32(tex); } - + GSVector4i rb00 = c00.sll16(8).srl16(8); GSVector4i ga00 = c00.srl16(8); GSVector4i rb01 = c01.sll16(8).srl16(8); @@ -2151,7 +2163,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS { addr00 = y0 + x0; - if(sel.tlu) + if (sel.tlu) { c00 = addr00.gather32_32((const uint8*)m_global.tex[0], m_global.clut); } @@ -2168,35 +2180,38 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // AlphaTFX - if(sel.fb) + if (sel.fb) { - switch(sel.tfx) + switch (sel.tfx) { - case TFX_MODULATE: - ga = ga.modulate16<1>(gaf).clamp8(); - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_DECAL: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_HIGHLIGHT: - ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); - break; - case TFX_HIGHLIGHT2: - if(!sel.tcc) ga = ga.mix16(gaf.srl16(7)); - break; - case TFX_NONE: - ga = sel.iip ? gaf.srl16(7) : gaf; - break; + case TFX_MODULATE: + ga = ga.modulate16<1>(gaf).clamp8(); + if (!sel.tcc) + ga = ga.mix16(gaf.srl16(7)); + break; + case TFX_DECAL: + if (!sel.tcc) + ga = ga.mix16(gaf.srl16(7)); + break; + case TFX_HIGHLIGHT: + ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7))); + break; + case TFX_HIGHLIGHT2: + if (!sel.tcc) + ga = ga.mix16(gaf.srl16(7)); + break; + case TFX_NONE: + ga = sel.iip ? gaf.srl16(7) : gaf; + break; } - if(sel.aa1) + if (sel.aa1) { GSVector4i x00800080(0x00800080); GSVector4i a = sel.edge ? cov : x00800080; - if(!sel.abe) + if (!sel.abe) { ga = ga.mix16(a); } @@ -2209,48 +2224,49 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // ReadMask - if(sel.fwrite) + if (sel.fwrite) { fm = m_global.fm; } - if(sel.zwrite) + if (sel.zwrite) { zm = m_global.zm; } // TestAlpha - if(!TestAlpha(test, fm, zm, ga)) continue; + if (!TestAlpha(test, fm, zm, ga)) + continue; // ColorTFX - if(sel.fwrite) + if (sel.fwrite) { GSVector4i af; - switch(sel.tfx) + switch (sel.tfx) { - case TFX_MODULATE: - rb = rb.modulate16<1>(rbf).clamp8(); - break; - case TFX_DECAL: - break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: - af = gaf.yywwlh().srl16(7); - rb = rb.modulate16<1>(rbf).add16(af).clamp8(); - ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga); - break; - case TFX_NONE: - rb = sel.iip ? rbf.srl16(7) : rbf; - break; + case TFX_MODULATE: + rb = rb.modulate16<1>(rbf).clamp8(); + break; + case TFX_DECAL: + break; + case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: + af = gaf.yywwlh().srl16(7); + rb = rb.modulate16<1>(rbf).add16(af).clamp8(); + ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga); + break; + case TFX_NONE: + rb = sel.iip ? rbf.srl16(7) : rbf; + break; } } // Fog - if(sel.fwrite && sel.fge) + if (sel.fwrite && sel.fge) { GSVector4i fog = sel.prim != GS_SPRITE_CLASS ? f : m_local.p.f; @@ -2269,11 +2285,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // ReadFrame - if(sel.fb) + if (sel.fb) { fa = (fza_base->x + fza_offset->x) % HALF_VM_SIZE; - if(sel.rfb) + if (sel.rfb) { fd = GSVector4i::load((uint8*)m_global.vm + fa * 2, (uint8*)m_global.vm + fa * 2 + 16); } @@ -2281,11 +2297,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // TestDestAlpha - if(sel.date && (sel.fpsm == 0 || sel.fpsm == 2)) + if (sel.date && (sel.fpsm == 0 || sel.fpsm == 2)) { - if(sel.datm) + if (sel.datm) { - if(sel.fpsm == 2) + if (sel.fpsm == 2) { // test |= fd.srl32(15) == GSVector4i::zero(); test |= fd.sll32(16).sra32(31) == GSVector4i::zero(); @@ -2297,7 +2313,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(sel.fpsm == 2) + if (sel.fpsm == 2) { test |= fd.sll32(16).sra32(31); // == GSVector4i::xffffffff(); } @@ -2307,34 +2323,35 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(test.alltrue()) continue; + if (test.alltrue()) + continue; } // WriteMask int fzm = 0; - if(!sel.notest) + if (!sel.notest) { - if(sel.fwrite) + if (sel.fwrite) { fm |= test; } - if(sel.zwrite) + if (sel.zwrite) { zm |= test; } - if(sel.fwrite && sel.zwrite) + if (sel.fwrite && sel.zwrite) { fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask(); } - else if(sel.fwrite) + else if (sel.fwrite) { fzm = ~(fm == GSVector4i::xffffffff()).ps32().mask(); } - else if(sel.zwrite) + else if (sel.zwrite) { fzm = ~(zm == GSVector4i::xffffffff()).ps32().mask(); } @@ -2342,9 +2359,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // WriteZBuf - if(sel.zwrite) + if (sel.zwrite) { - if(sel.ztest && sel.zpsm < 2) + if (sel.ztest && sel.zpsm < 2) { zs = zs.blend8(zd, zm); } @@ -2361,9 +2378,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest; - if(sel.notest) + if (sel.notest) { - if(fast) + if (fast) { GSVector4i::storel((uint8*)m_global.vm + za * 2, zs); GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs); @@ -2378,140 +2395,140 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(fast) + if (fast) { - if(fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs); - if(fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs); + if (fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs); + if (fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs); } else { - if(fzm & 0x0300) WritePixel(zs, za, 0, sel.zpsm); - if(fzm & 0x0c00) WritePixel(zs, za, 1, sel.zpsm); - if(fzm & 0x3000) WritePixel(zs, za, 2, sel.zpsm); - if(fzm & 0xc000) WritePixel(zs, za, 3, sel.zpsm); + if (fzm & 0x0300) WritePixel(zs, za, 0, sel.zpsm); + if (fzm & 0x0c00) WritePixel(zs, za, 1, sel.zpsm); + if (fzm & 0x3000) WritePixel(zs, za, 2, sel.zpsm); + if (fzm & 0xc000) WritePixel(zs, za, 3, sel.zpsm); } } } // AlphaBlend - if(sel.fwrite && (sel.abe || sel.aa1)) + if (sel.fwrite && (sel.abe || sel.aa1)) { GSVector4i rbs = rb, gas = ga, rbd, gad, a, mask; - if(sel.aba != sel.abb && (sel.aba == 1 || sel.abb == 1 || sel.abc == 1) || sel.abd == 1) + if (sel.aba != sel.abb && (sel.aba == 1 || sel.abb == 1 || sel.abc == 1) || sel.abd == 1) { - switch(sel.fpsm) + switch (sel.fpsm) { - case 0: - case 1: - rbd = fd.sll16(8).srl16(8); - gad = fd.srl16(8); - break; - case 2: - rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - gad = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - break; + case 0: + case 1: + rbd = fd.sll16(8).srl16(8); + gad = fd.srl16(8); + break; + case 2: + rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); + gad = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); + break; } } - if(sel.aba != sel.abb) + if (sel.aba != sel.abb) { - switch(sel.aba) + switch (sel.aba) { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector4i::zero(); break; + case 0: break; + case 1: rb = rbd; break; + case 2: rb = GSVector4i::zero(); break; } - switch(sel.abb) + switch (sel.abb) { - case 0: rb = rb.sub16(rbs); break; - case 1: rb = rb.sub16(rbd); break; - case 2: break; + case 0: rb = rb.sub16(rbs); break; + case 1: rb = rb.sub16(rbd); break; + case 2: break; } - if(!(sel.fpsm == 1 && sel.abc == 1)) + if (!(sel.fpsm == 1 && sel.abc == 1)) { switch(sel.abc) { - case 0: a = gas.yywwlh().sll16(7); break; - case 1: a = gad.yywwlh().sll16(7); break; - case 2: a = m_global.afix; break; + case 0: a = gas.yywwlh().sll16(7); break; + case 1: a = gad.yywwlh().sll16(7); break; + case 2: a = m_global.afix; break; } rb = rb.modulate16<1>(a); } - switch(sel.abd) + switch (sel.abd) { - case 0: rb = rb.add16(rbs); break; - case 1: rb = rb.add16(rbd); break; - case 2: break; + case 0: rb = rb.add16(rbs); break; + case 1: rb = rb.add16(rbd); break; + case 2: break; } } else { - switch(sel.abd) + switch (sel.abd) { - case 0: break; - case 1: rb = rbd; break; - case 2: rb = GSVector4i::zero(); break; + case 0: break; + case 1: rb = rbd; break; + case 2: rb = GSVector4i::zero(); break; } } - if(sel.pabe) + if (sel.pabe) { mask = (gas << 8).sra32(31); - + rb = rbs.blend8(rb, mask); } - - if(sel.aba != sel.abb) + + if (sel.aba != sel.abb) { - switch(sel.aba) + switch (sel.aba) { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector4i::zero(); break; + case 0: break; + case 1: ga = gad; break; + case 2: ga = GSVector4i::zero(); break; } - switch(sel.abb) + switch (sel.abb) { - case 0: ga = ga.sub16(gas); break; - case 1: ga = ga.sub16(gad); break; - case 2: break; + case 0: ga = ga.sub16(gas); break; + case 1: ga = ga.sub16(gad); break; + case 2: break; } - if(!(sel.fpsm == 1 && sel.abc == 1)) + if (!(sel.fpsm == 1 && sel.abc == 1)) { ga = ga.modulate16<1>(a); } - switch(sel.abd) + switch (sel.abd) { - case 0: ga = ga.add16(gas); break; - case 1: ga = ga.add16(gad); break; - case 2: break; + case 0: ga = ga.add16(gas); break; + case 1: ga = ga.add16(gad); break; + case 2: break; } } else { - switch(sel.abd) + switch (sel.abd) { - case 0: break; - case 1: ga = gad; break; - case 2: ga = GSVector4i::zero(); break; + case 0: break; + case 1: ga = gad; break; + case 2: ga = GSVector4i::zero(); break; } } - if(sel.pabe) + if (sel.pabe) { ga = gas.blend8(ga, mask >> 16); } else { - if(sel.fpsm != 1) + if (sel.fpsm != 1) { ga = ga.mix16(gas); } @@ -2520,9 +2537,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS // WriteFrame - if(sel.fwrite) + if (sel.fwrite) { - if(sel.fpsm == 2 && sel.dthe) + if (sel.fpsm == 2 && sel.dthe) { int y = (top & 3) << 1; @@ -2530,7 +2547,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS ga = ga.add16(m_global.dimx[1 + y]); } - if(sel.colclamp == 0) + if (sel.colclamp == 0) { rb &= GSVector4i::x00ff(); ga &= GSVector4i::x00ff(); @@ -2538,12 +2555,12 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS GSVector4i fs = rb.upl16(ga).pu16(rb.uph16(ga)); - if(sel.fba && sel.fpsm != 1) + if (sel.fba && sel.fpsm != 1) { fs |= GSVector4i::x80000000(); } - if(sel.fpsm == 2) + if (sel.fpsm == 2) { GSVector4i rb = fs & 0x00f800f8; GSVector4i ga = fs & 0x8000f800; @@ -2551,16 +2568,16 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS fs = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3); } - if(sel.rfb) + if (sel.rfb) { fs = fs.blend(fd, fm); } bool fast = sel.rfb ? sel.fpsm < 2 : sel.fpsm == 0 && sel.notest; - if(sel.notest) + if (sel.notest) { - if(fast) + if (fast) { GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs); GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs); @@ -2575,57 +2592,58 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } else { - if(fast) + if (fast) { - if(fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs); - if(fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs); + if (fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs); + if (fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs); } else { - if(fzm & 0x0003) WritePixel(fs, fa, 0, sel.fpsm); - if(fzm & 0x000c) WritePixel(fs, fa, 1, sel.fpsm); - if(fzm & 0x0030) WritePixel(fs, fa, 2, sel.fpsm); - if(fzm & 0x00c0) WritePixel(fs, fa, 3, sel.fpsm); + if (fzm & 0x0003) WritePixel(fs, fa, 0, sel.fpsm); + if (fzm & 0x000c) WritePixel(fs, fa, 1, sel.fpsm); + if (fzm & 0x0030) WritePixel(fs, fa, 2, sel.fpsm); + if (fzm & 0x00c0) WritePixel(fs, fa, 3, sel.fpsm); } } } - } - while(0); + } while (0); - if(sel.edge) break; + if (sel.edge) + break; - if(steps <= 0) break; + if (steps <= 0) + break; // Step - + steps -= 4; fza_offset++; - if(sel.prim != GS_SPRITE_CLASS) + if (sel.prim != GS_SPRITE_CLASS) { - if(sel.zb) + if (sel.zb) { zo += m_local.d4.z; } - if(sel.fwrite && sel.fge) + if (sel.fwrite && sel.fge) { f = f.add16(m_local.d4.f); } } - if(sel.fb) + if (sel.fb) { - if(sel.tfx != TFX_NONE) + if (sel.tfx != TFX_NONE) { - if(sel.fst) + if (sel.fst) { GSVector4i stq = GSVector4i::cast(m_local.d4.stq); s = GSVector4::cast(GSVector4i::cast(s) + stq.xxxx()); - - if(sel.prim != GS_SPRITE_CLASS || sel.mmin) + + if (sel.prim != GS_SPRITE_CLASS || sel.mmin) { t = GSVector4::cast(GSVector4i::cast(t) + stq.yyyy()); } @@ -2641,9 +2659,9 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(!(sel.tfx == TFX_DECAL && sel.tcc)) + if (!(sel.tfx == TFX_DECAL && sel.tcc)) { - if(sel.iip) + if (sel.iip) { GSVector4i c = m_local.d4.c; @@ -2652,13 +2670,13 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS } } - if(!sel.notest) + if (!sel.notest) { test = const_test[7 + (steps & (steps >> 31))]; } } - #endif +#endif } void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) @@ -2675,81 +2693,85 @@ void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& s m_global.sel.edge = edge; } -template +template bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga) { GSScanlineSelector sel = m_global.sel; - switch(sel.afail) + switch (sel.afail) { - case AFAIL_FB_ONLY: - if(!sel.zwrite) return true; - break; + case AFAIL_FB_ONLY: + if (!sel.zwrite) + return true; + break; - case AFAIL_ZB_ONLY: - if(!sel.fwrite) return true; - break; + case AFAIL_ZB_ONLY: + if (!sel.fwrite) + return true; + break; - case AFAIL_RGB_ONLY: - if(!sel.zwrite && sel.fpsm == 1) return true; - break; + case AFAIL_RGB_ONLY: + if (!sel.zwrite && sel.fpsm == 1) + return true; + break; } T t; - switch(sel.atst) + switch (sel.atst) { - case ATST_NEVER: - t = GSVector4i::xffffffff(); - break; + case ATST_NEVER: + t = GSVector4i::xffffffff(); + break; - case ATST_ALWAYS: - return true; + case ATST_ALWAYS: + return true; - case ATST_LESS: - case ATST_LEQUAL: - t = (ga >> 16) > T(m_global.aref); - break; + case ATST_LESS: + case ATST_LEQUAL: + t = (ga >> 16) > T(m_global.aref); + break; - case ATST_EQUAL: - t = (ga >> 16) != T(m_global.aref); - break; + case ATST_EQUAL: + t = (ga >> 16) != T(m_global.aref); + break; - case ATST_GEQUAL: - case ATST_GREATER: - t = (ga >> 16) < T(m_global.aref); - break; + case ATST_GEQUAL: + case ATST_GREATER: + t = (ga >> 16) < T(m_global.aref); + break; - case ATST_NOTEQUAL: - t = (ga >> 16) == T(m_global.aref); - break; + case ATST_NOTEQUAL: + t = (ga >> 16) == T(m_global.aref); + break; - default: - __assume(0); + default: + __assume(0); } - switch(sel.afail) + switch (sel.afail) { - case AFAIL_KEEP: - test |= t; - if(test.alltrue()) return false; - break; + case AFAIL_KEEP: + test |= t; + if (test.alltrue()) + return false; + break; - case AFAIL_FB_ONLY: - zm |= t; - break; + case AFAIL_FB_ONLY: + zm |= t; + break; - case AFAIL_ZB_ONLY: - fm |= t; - break; + case AFAIL_ZB_ONLY: + fm |= t; + break; - case AFAIL_RGB_ONLY: - zm |= t; - fm |= t & T::xff000000(); // fpsm 16 bit => & 0xffff8000? - break; + case AFAIL_RGB_ONLY: + zm |= t; + fm |= t & T::xff000000(); // fpsm 16 bit => & 0xffff8000? + break; - default: - __assume(0); + default: + __assume(0); } return true; @@ -2757,21 +2779,22 @@ bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga) static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0] -template void GSDrawScanline::WritePixel(const T& src, int addr, int i, uint32 psm) +template +void GSDrawScanline::WritePixel(const T& src, int addr, int i, uint32 psm) { uint8* dst = (uint8*)m_global.vm + addr * 2 + s_offsets[i] * 2; - switch(psm) + switch (psm) { - case 0: - *(uint32*)dst = src.u32[i]; - break; - case 1: - *(uint32*)dst = (src.u32[i] & 0xffffff) | (*(uint32*)dst & 0xff000000); - break; - case 2: - *(uint16*)dst = src.u16[i * 2]; - break; + case 0: + *(uint32*)dst = src.u32[i]; + break; + case 1: + *(uint32*)dst = (src.u32[i] & 0xffffff) | (*(uint32*)dst & 0xff000000); + break; + case 2: + *(uint16*)dst = src.u16[i * 2]; + break; } } @@ -2786,22 +2809,22 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) uint32 m; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 m = m_global.zm; - #else +#else m = m_global.zm.u32[0]; - #endif +#endif - if(m != 0xffffffff) + if (m != 0xffffffff) { const int* zbr = m_global.zbr; const int* zbc = m_global.zbc; uint32 z = v.t.u32[3]; // (uint32)v.p.z; - if(m_global.sel.zpsm != 2) + if (m_global.sel.zpsm != 2) { - if(m == 0) + if (m == 0) { DrawRectT(zbr, zbc, r, z, m); } @@ -2812,7 +2835,7 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) } else { - if((m & 0xffff) == 0) + if ((m & 0xffff) == 0) { DrawRectT(zbr, zbc, r, z, m); } @@ -2823,27 +2846,27 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) } } - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 m = m_global.fm; - #else +#else m = m_global.fm.u32[0]; - #endif +#endif - if(m != 0xffffffff) + if (m != 0xffffffff) { const int* fbr = m_global.fbr; const int* fbc = m_global.fbc; uint32 c = (GSVector4i(v.c) >> 7).rgba32(); - if(m_global.sel.fba) + if (m_global.sel.fba) { c |= 0x80000000; } - if(m_global.sel.fpsm != 2) + if (m_global.sel.fpsm != 2) { - if(m == 0) + if (m == 0) { DrawRectT(fbr, fbc, r, c, m); } @@ -2856,7 +2879,7 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) { c = ((c & 0xf8) >> 3) | ((c & 0xf800) >> 6) | ((c & 0xf80000) >> 9) | ((c & 0x80000000) >> 16); - if((m & 0xffff) == 0) + if ((m & 0xffff) == 0) { DrawRectT(fbr, fbc, r, c, m); } @@ -2868,24 +2891,25 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v) } } -template +template void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) { - if(m == 0xffffffff) return; + if (m == 0xffffffff) + return; - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 GSVector8i color((int)c); GSVector8i mask((int)m); - #else +#else GSVector4i color((int)c); GSVector4i mask((int)m); - #endif +#endif - if(sizeof(T) == sizeof(uint16)) + if (sizeof(T) == sizeof(uint16)) { color = color.xxzzlh(); mask = mask.xxzzlh(); @@ -2896,16 +2920,17 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, color = color.andnot(mask); c = c & (~m); - if(masked) ASSERT(mask.u32[0] != 0); + if (masked) + ASSERT(mask.u32[0] != 0); GSVector4i br = r.ralign(GSVector2i(8 * 4 / sizeof(T), 8)); - if(!br.rempty()) + if (!br.rempty()) { FillRect(row, col, GSVector4i(r.x, r.y, r.z, br.y), c, m); FillRect(row, col, GSVector4i(r.x, br.w, r.z, r.w), c, m); - if(r.x < br.x || br.z < r.z) + if (r.x < br.x || br.z < r.z) { FillRect(row, col, GSVector4i(r.x, br.y, br.x, br.w), c, m); FillRect(row, col, GSVector4i(br.z, br.y, r.z, br.w), c, m); @@ -2919,18 +2944,19 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col, } } -template +template void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m) { - if(r.x >= r.z) return; + if (r.x >= r.z) + return; T* vm = (T*)m_global.vm; - for(int y = r.y; y < r.w; y++) + for (int y = r.y; y < r.w; y++) { T* RESTRICT d = &vm[row[y]]; - for(int x = r.x; x < r.z; x++) + for (int x = r.x; x < r.z; x++) { d[col[x]] = (T)(!masked ? c : (c | (d[col[x]] & m))); } @@ -2939,18 +2965,19 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col, #if _M_SSE >= 0x501 -template +template void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m) { - if(r.x >= r.z) return; + if (r.x >= r.z) + return; T* vm = (T*)m_global.vm; - for(int y = r.y; y < r.w; y += 8) + for (int y = r.y; y < r.w; y += 8) { T* RESTRICT d = &vm[row[y]]; - for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) + for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) { GSVector8i* RESTRICT p = (GSVector8i*)&d[col[x]]; @@ -2968,22 +2995,23 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, #else -template +template void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m) { - if(r.x >= r.z) return; + if (r.x >= r.z) + return; T* vm = (T*)m_global.vm; - for(int y = r.y; y < r.w; y += 8) + for (int y = r.y; y < r.w; y += 8) { T* RESTRICT d = &vm[row[y]]; - for(int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) + for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T)) { GSVector4i* RESTRICT p = (GSVector4i*)&d[col[x]]; - for(int i = 0; i < 16; i += 4) + for (int i = 0; i < 16; i += 4) { p[i + 0] = !masked ? c : (c | (p[i + 0] & m)); p[i + 1] = !masked ? c : (c | (p[i + 1] & m)); diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanline.h b/plugins/GSdx/Renderers/SW/GSDrawScanline.h index f2fb122267..bf02c34048 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanline.h +++ b/plugins/GSdx/Renderers/SW/GSDrawScanline.h @@ -43,23 +43,23 @@ protected: GSCodeGeneratorFunctionMap m_sp_map; GSCodeGeneratorFunctionMap m_ds_map; - template + template void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); - template + template __forceinline void FillRect(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 - template + template __forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m); - #else +#else - template + template __forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m); - #endif +#endif public: GSDrawScanline(); @@ -73,18 +73,21 @@ public: void DrawRect(const GSVector4i& r, const GSVertexSW& v); #ifndef ENABLE_JIT_RASTERIZER - + void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan); void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan); void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan); - bool IsEdge() const {return m_global.sel.aa1;} - bool IsRect() const {return m_global.sel.IsSolidRect();} + bool IsEdge() const { return m_global.sel.aa1; } + bool IsRect() const { return m_global.sel.IsSolidRect(); } template bool TestAlpha(T& test, T& fm, T& zm, const T& ga); template void WritePixel(const T& src, int addr, int i, uint32 psm); #endif - void PrintStats() {m_ds_map.PrintStats();} + void PrintStats() + { + m_ds_map.PrintStats(); + } }; diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp index 1508978081..c71774d24e 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.cpp @@ -26,7 +26,7 @@ #else void GSDrawScanlineCodeGenerator::Generate() { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) Generate_AVX(); else Generate_SSE(); @@ -40,21 +40,24 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key { m_sel.key = key; - if(m_sel.breakpoint) + if (m_sel.breakpoint) db(0xCC); - try { + try + { Generate(); - } catch (std::exception& e) { + } + catch (std::exception& e) + { fprintf(stderr, "ERR:GSDrawScanlineCodeGenerator %s\n", e.what()); } } void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { - if(shift == 0) + if (shift == 0) { vpmulhrsw(a, f); } @@ -63,11 +66,10 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uin vpsllw(a, shift + 1); vpmulhw(a, f); } - } else { - if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3)) + if (shift == 0 && m_cpu.has(util::Cpu::tSSSE3)) { pmulhrsw(a, f); } @@ -81,7 +83,7 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uin void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpsubw(a, b); modulate16(a, f, shift); @@ -97,7 +99,7 @@ void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpsubw(a, b); vpmullw(a, f); @@ -115,7 +117,7 @@ void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpblendw(a, b, 0xaa); } @@ -127,13 +129,14 @@ void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& t void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpackuswb(a, a); #if _M_SSE >= 0x501 // Greg: why ? - if(m_cpu.has(util::Cpu::tAVX2)) { + if (m_cpu.has(util::Cpu::tAVX2)) + { ASSERT(a.isYMM()); vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks } @@ -152,7 +155,7 @@ void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test) { uint32 mask = test.isYMM() ? 0xffffffff : 0xffff; - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpmovmskb(eax, test); cmp(eax, mask); @@ -168,7 +171,7 @@ void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test) void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpand(b, mask); vpandn(mask, a); @@ -185,7 +188,7 @@ void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& m void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm& mask) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpand(b, mask); vpandn(mask, a); @@ -201,15 +204,15 @@ void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm& void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) vpblendvb(a, a, b, xmm0); - else + else pblendvb(a, b); } void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a) { - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { vpblendvb(b, a, b, xmm0); } @@ -225,15 +228,20 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const // l = src & 0xFF; (1 left shift + 1 right shift) // h = (src >> 8) & 0xFF; (1 right shift) - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) { - if (src == h) { + if (src == h) + { vpsllw(l, src, 8); vpsrlw(h, 8); - } else if (src == l) { + } + else if (src == l) + { vpsrlw(h, src, 8); vpsllw(l, 8); - } else { + } + else + { vpsllw(l, src, 8); vpsrlw(h, src, 8); } @@ -241,11 +249,16 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const } else { - if (src == h) { + if (src == h) + { movdqa(l, src); - } else if (src == l) { + } + else if (src == l) + { movdqa(h, src); - } else { + } + else + { movdqa(l, src); movdqa(h, src); } diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.h b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.h index bbe04c5483..4fc037a433 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.h @@ -35,7 +35,7 @@ using namespace Xbyak; class GSDrawScanlineCodeGenerator : public GSCodeGenerator { - void operator = (const GSDrawScanlineCodeGenerator&); + void operator=(const GSDrawScanlineCodeGenerator&); GSScanlineSelector m_sel; GSScanlineLocalData& m_local; @@ -43,7 +43,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator void Generate(); - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 void Init(); void Step(); @@ -71,7 +71,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator void ReadTexel(int pixels, int mip_offset = 0); void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i); - #else +#else void Generate_SSE(); void Init_SSE(); @@ -94,7 +94,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator void WriteZBuf_SSE(); void AlphaBlend_SSE(); void WriteFrame_SSE(); - void ReadPixel_SSE(const Xmm& dst, const RegLong& addr); + void ReadPixel_SSE(const Xmm& dst, const RegLong& addr); void WritePixel_SSE(const Xmm& src, const RegLong& addr, const Reg8& mask, bool fast, int psm, int fz); void WritePixel_SSE(const Xmm& src, const RegLong& addr, uint8 i, int psm); void ReadTexel_SSE(int pixels, int mip_offset = 0); @@ -121,13 +121,13 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator void WriteZBuf_AVX(); void AlphaBlend_AVX(); void WriteFrame_AVX(); - void ReadPixel_AVX(const Xmm& dst, const RegLong& addr); + void ReadPixel_AVX(const Xmm& dst, const RegLong& addr); void WritePixel_AVX(const Xmm& src, const RegLong& addr, const Reg8& mask, bool fast, int psm, int fz); void WritePixel_AVX(const Xmm& src, const RegLong& addr, uint8 i, int psm); void ReadTexel_AVX(int pixels, int mip_offset = 0); void ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i); - #endif +#endif void modulate16(const Xmm& a, const Operand& f, uint8 shift); void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift); diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx.cpp index b17615c0e5..7669580ca4 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx.cpp +++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx.cpp @@ -32,20 +32,20 @@ #define _m_local__gd__clut r11 #define _m_local__gd__tex a3 // More pretty name -#define _z xmm8 -#define _f xmm9 -#define _s xmm10 -#define _t xmm11 -#define _q xmm12 -#define _f_rb xmm13 -#define _f_ga xmm14 -#define _test xmm15 +#define _z xmm8 +#define _f xmm9 +#define _s xmm10 +#define _t xmm11 +#define _q xmm12 +#define _f_rb xmm13 +#define _f_ga xmm14 +#define _test xmm15 // Extra bonus -#define _rb xmm2 -#define _ga xmm3 -#define _fm xmm4 -#define _zm xmm5 -#define _fd xmm6 +#define _rb xmm2 +#define _ga xmm3 +#define _fm xmm4 +#define _zm xmm5 +#define _fd xmm6 #define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[_m_local + offsetof(GSScanlineLocalData, field)]) #define _rip_global(field) (m_rip ? ptr[rip + &m_local.gd->field] : ptr[_m_local__gd + offsetof(GSScanlineGlobalData, field)]) @@ -81,7 +81,7 @@ void GSDrawScanlineCodeGenerator::Generate_AVX() sub(rsp, 8 + 10 * 16); - for(int i = 6; i < 16; i++) + for (int i = 6; i < 16; i++) { vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i)); } @@ -103,7 +103,7 @@ void GSDrawScanlineCodeGenerator::Generate_AVX() mov(_m_local__gd, _rip_local(gd)); } - if(need_clut) + if (need_clut) mov(_m_local__gd__clut, _rip_global(clut)); Init_AVX(); @@ -125,7 +125,7 @@ void GSDrawScanlineCodeGenerator::Generate_AVX() // xmm14 = ga // xmm15 = test - if(!m_sel.edge) + if (!m_sel.edge) { align(16); } @@ -137,7 +137,7 @@ L("loop"); // ebp = za // FIXME not yet done - if(m_sel.mmin && 0) + if (m_sel.mmin && 0) { SampleTextureLOD_AVX(); } @@ -243,7 +243,7 @@ L("step"); // if(steps <= 0) break; - if(!m_sel.edge) + if (!m_sel.edge) { test(a0.cvt32(), a0.cvt32()); @@ -257,7 +257,7 @@ L("step"); L("exit"); #ifdef _WIN64 - for(int i = 6; i < 16; i++) + for (int i = 6; i < 16; i++) { vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]); } @@ -285,7 +285,7 @@ L("exit"); void GSDrawScanlineCodeGenerator::Init_AVX() { - if(!m_sel.notest) + if (!m_sel.notest) { // int skip = left & 3; @@ -316,8 +316,8 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } else { - mov(ebx, a1.cvt32()); // left - xor(a1.cvt32(), a1.cvt32()); // skip + mov(ebx, a1.cvt32()); // left + xor(a1.cvt32(), a1.cvt32()); // skip lea(a0.cvt32(), ptr[a0 - 4]); // steps } @@ -336,7 +336,7 @@ void GSDrawScanlineCodeGenerator::Init_AVX() mov(rax, _rip_global(fzbc)); lea(t0, ptr[rax + rbx * 2]); - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) + if (m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // a1 = &m_local.d[skip] // note a1 was (skip << 4) @@ -346,13 +346,13 @@ void GSDrawScanlineCodeGenerator::Init_AVX() lea(a1, ptr[rax + a1 * 8]); } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) + if (m_sel.fwrite && m_sel.fge || m_sel.zb) { vmovaps(xmm0, ptr[a3 + offsetof(GSVertexSW, p)]); // v.p - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { // f = GSVector4i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); @@ -362,7 +362,7 @@ void GSDrawScanlineCodeGenerator::Init_AVX() vpaddw(_f, ptr[a1 + 16 * 6]); } - if(m_sel.zb) + if (m_sel.zb) { // z = vp.zzzz() + m_local.d[skip].z; @@ -373,23 +373,23 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } else { - if(m_sel.ztest) + if (m_sel.ztest) { vmovdqa(_z, _rip_local(p.z)); } - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) vmovdqa(_f, _rip_local(p.f)); } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.edge || m_sel.tfx != TFX_NONE) + if (m_sel.edge || m_sel.tfx != TFX_NONE) { vmovaps(xmm0, ptr[a3 + offsetof(GSVertexSW, t)]); // v.t } - if(m_sel.edge) + if (m_sel.edge) { // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); @@ -404,11 +404,11 @@ void GSDrawScanlineCodeGenerator::Init_AVX() #endif } - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { // a1 = &m_local.d[skip] - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i vti(vt); @@ -422,11 +422,11 @@ void GSDrawScanlineCodeGenerator::Init_AVX() vpaddd(_s, ptr[a1 + offsetof(GSScanlineLocalData::skip, s)]); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpaddd(_t, ptr[a1 + offsetof(GSScanlineLocalData::skip, t)]); } - else if(m_sel.ltf) + else if (m_sel.ltf) { vpshuflw(xmm7, _t, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm7, xmm7, _MM_SHUFFLE(2, 2, 0, 0)); @@ -449,9 +449,9 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); @@ -482,7 +482,7 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } } - if(m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe) + if (m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe) { // On linux, a2 is edx which will be used for fzm // In all case, it will require a mov in dthe code, so let's keep the value on the stack @@ -494,7 +494,7 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } mov(_m_local__gd__vm, _rip_global(vm)); - if(m_sel.fb && m_sel.tfx != TFX_NONE) + if (m_sel.fb && m_sel.tfx != TFX_NONE) mov(_m_local__gd__tex, _rip_global(tex)); } @@ -508,34 +508,34 @@ void GSDrawScanlineCodeGenerator::Step_AVX() add(t0, 8); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // z += m_local.d4.z; - if(m_sel.zb) + if (m_sel.zb) { vaddps(_z, _rip_local(d4.z)); } // f = f.add16(m_local.d4.f); - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { vpaddw(_f, _rip_local(d4.f)); } } else { - if(m_sel.ztest) + if (m_sel.ztest) { } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i st = m_local.d4.st; @@ -547,7 +547,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); vpaddd(_s, xmm1); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); vpaddd(_t, xmm1); @@ -573,9 +573,9 @@ void GSDrawScanlineCodeGenerator::Step_AVX() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i c = m_local.d4.c; @@ -598,7 +598,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { } } @@ -608,7 +608,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() } } - if(!m_sel.notest) + if (!m_sel.notest) { // test = m_test[7 + (steps & (steps >> 31))]; @@ -624,7 +624,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) { - if(!m_sel.zb) + if (!m_sel.zb) { return; } @@ -637,9 +637,9 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) // GSVector4i zs = zi; - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.zoverflow) + if (m_sel.zoverflow) { // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); @@ -664,7 +664,7 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) vcvttps2dq(xmm0, _z); } - if(m_sel.zwrite) + if (m_sel.zwrite) { #ifdef _WIN64 vmovdqa(_rip_local(temp.zs), xmm0); @@ -678,11 +678,11 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) movdqa(xmm0, _z); } - if(m_sel.ztest) + if (m_sel.ztest) { ReadPixel_AVX(xmm1, rbp); - if(m_sel.zwrite && m_sel.zpsm < 2) + if (m_sel.zwrite && m_sel.zpsm < 2) { #ifdef _WIN64 vmovdqa(_rip_local(temp.zd), xmm1); @@ -693,13 +693,13 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) // zd &= 0xffffffff >> m_sel.zpsm * 8; - if(m_sel.zpsm) + if (m_sel.zpsm) { vpslld(xmm1, static_cast(m_sel.zpsm * 8)); vpsrld(xmm1, static_cast(m_sel.zpsm * 8)); } - if(m_sel.zoverflow || m_sel.zpsm == 0) + if (m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector4i o = GSVector4i::x80000000(); @@ -713,21 +713,21 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) vpsubd(xmm1, xmm2); } - switch(m_sel.ztst) + switch (m_sel.ztst) { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - vpcmpgtd(xmm1, xmm0); - vpor(_test, xmm1); - break; + case ZTST_GEQUAL: + // test |= zso < zdo; // ~(zso >= zdo) + vpcmpgtd(xmm1, xmm0); + vpor(_test, xmm1); + break; - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - vpcmpgtd(xmm0, xmm1); - vpcmpeqd(xmm2, xmm2); - vpxor(xmm0, xmm2); - vpor(_test, xmm0); - break; + case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL + // test |= zso <= zdo; // ~(zso > zdo) + vpcmpgtd(xmm0, xmm1); + vpcmpeqd(xmm2, xmm2); + vpxor(xmm0, xmm2); + vpor(_test, xmm0); + break; } alltrue(_test); @@ -736,12 +736,12 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) void GSDrawScanlineCodeGenerator::SampleTexture_AVX() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } - if(!m_sel.fst) + if (!m_sel.fst) { vrcpps(xmm0, _q); @@ -751,7 +751,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() vcvttps2dq(xmm4, xmm4); vcvttps2dq(xmm5, xmm5); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -770,7 +770,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() vmovdqa(xmm5, _t); } - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uf = u.xxzzlh().srl16(12); @@ -778,7 +778,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(xmm6, 12); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4i vf = v.xxzzlh().srl16(12); @@ -794,7 +794,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() vpsrad(xmm5, 16); vpackssdw(xmm4, xmm5); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -835,7 +835,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() // xmm6 = uf // xmm7 = vf - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i x1 = uv1.upl16(); // GSVector4i y1 = uv1.uph16() << tw; @@ -986,11 +986,11 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv, _rip_global(t.min)); } @@ -1006,7 +1006,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv) { vpand(uv, _rip_global(t.min)); - if(region) + if (region) { vpor(uv, _rip_global(t.max)); } @@ -1022,7 +1022,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv) vpand(xmm1, uv, xmm2); - if(region) + if (region) { vpor(xmm1, xmm3); } @@ -1047,11 +1047,11 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vmovdqa(xmm0, _rip_global(t.min)); vpmaxsw(uv0, xmm0); @@ -1074,7 +1074,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) vpand(uv0, xmm0); vpand(uv1, xmm0); - if(region) + if (region) { vmovdqa(xmm0, _rip_global(t.max)); vpor(uv0, xmm0); @@ -1094,7 +1094,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) vpand(xmm1, uv0, xmm2); - if(region) + if (region) { vpor(xmm1, xmm3); } @@ -1114,7 +1114,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) vpand(xmm1, uv1, xmm2); - if(region) + if (region) { vpor(xmm1, xmm3); } @@ -1144,96 +1144,96 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1) void GSDrawScanlineCodeGenerator::AlphaTFX_AVX() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // gat = gat.modulate16<1>(ga).clamp8(); + // gat = gat.modulate16<1>(ga).clamp8(); - modulate16(_ga, _f_ga, 1); + modulate16(_ga, _f_ga, 1); - clamp16(_ga, xmm0); + clamp16(_ga, xmm0); - // if(!tcc) gat = gat.mix16(ga.srl16(7)); + // if(!tcc) gat = gat.mix16(ga.srl16(7)); + + if (!m_sel.tcc) + { + vpsrlw(xmm1, _f_ga, 7); + + mix16(_ga, xmm1, xmm0); + } + + break; + + case TFX_DECAL: + + // if(!tcc) gat = gat.mix16(ga.srl16(7)); + + if (!m_sel.tcc) + { + vpsrlw(xmm1, _f_ga, 7); + + mix16(_ga, xmm1, xmm0); + } + + break; + + case TFX_HIGHLIGHT: + + // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); - if(!m_sel.tcc) - { vpsrlw(xmm1, _f_ga, 7); - mix16(_ga, xmm1, xmm0); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(xmm1, _f_ga, 7); + if (m_sel.tcc) + { + vpaddusb(xmm1, _ga); + } mix16(_ga, xmm1, xmm0); - } - break; + break; - case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - vpsrlw(xmm1, _f_ga, 7); + if (!m_sel.tcc) + { + vpsrlw(xmm1, _f_ga, 7); - if(m_sel.tcc) - { - vpaddusb(xmm1, _ga); - } + mix16(_ga, xmm1, xmm0); + } - mix16(_ga, xmm1, xmm0); + break; - break; + case TFX_NONE: - case TFX_HIGHLIGHT2: + // gat = iip ? ga.srl16(7) : ga; - // if(!tcc) gat = gat.mix16(ga.srl16(7)); + if (m_sel.iip) + { + vpsrlw(_ga, _f_ga, 7); + } - if(!m_sel.tcc) - { - vpsrlw(xmm1, _f_ga, 7); - - mix16(_ga, xmm1, xmm0); - } - - break; - - case TFX_NONE: - - // gat = iip ? ga.srl16(7) : ga; - - if(m_sel.iip) - { - vpsrlw(_ga, _f_ga, 7); - } - - break; + break; } - if(m_sel.aa1) + if (m_sel.aa1) { // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha // FIXME: bios config screen cubes - if(!m_sel.abe) + if (!m_sel.abe) { // a = cov - if(m_sel.edge) + if (m_sel.edge) { #ifdef _WIN64 vmovdqa(xmm0, _rip_local(temp.cov)); @@ -1258,7 +1258,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX_AVX() vpsllw(xmm0, 15); vpsrlw(xmm0, 8); - if(m_sel.edge) + if (m_sel.edge) { #ifdef _WIN64 vmovdqa(xmm1, _rip_local(temp.cov)); @@ -1282,12 +1282,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX_AVX() void GSDrawScanlineCodeGenerator::ReadMask_AVX() { - if(m_sel.fwrite) + if (m_sel.fwrite) { vmovdqa(_fm, _rip_global(fm)); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vmovdqa(_zm, _rip_global(zm)); } @@ -1295,143 +1295,143 @@ void GSDrawScanlineCodeGenerator::ReadMask_AVX() void GSDrawScanlineCodeGenerator::TestAlpha_AVX() { - switch(m_sel.atst) + switch (m_sel.atst) { - case ATST_NEVER: - // t = GSVector4i::xffffffff(); - vpcmpeqd(xmm1, xmm1); - break; + case ATST_NEVER: + // t = GSVector4i::xffffffff(); + vpcmpeqd(xmm1, xmm1); + break; - case ATST_ALWAYS: - return; + case ATST_ALWAYS: + return; - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - vpsrld(xmm1, _ga, 16); - vpcmpgtd(xmm1, _rip_global(aref)); - break; + case ATST_LESS: + case ATST_LEQUAL: + // t = (ga >> 16) > m_local.gd->aref; + vpsrld(xmm1, _ga, 16); + vpcmpgtd(xmm1, _rip_global(aref)); + break; - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - vpsrld(xmm1, _ga, 16); - vpcmpeqd(xmm1, _rip_global(aref)); - vpcmpeqd(xmm0, xmm0); - vpxor(xmm1, xmm0); - break; + case ATST_EQUAL: + // t = (ga >> 16) != m_local.gd->aref; + vpsrld(xmm1, _ga, 16); + vpcmpeqd(xmm1, _rip_global(aref)); + vpcmpeqd(xmm0, xmm0); + vpxor(xmm1, xmm0); + break; - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - vpsrld(xmm0, _ga, 16); - vmovdqa(xmm1, _rip_global(aref)); - vpcmpgtd(xmm1, xmm0); - break; + case ATST_GEQUAL: + case ATST_GREATER: + // t = (ga >> 16) < m_local.gd->aref; + vpsrld(xmm0, _ga, 16); + vmovdqa(xmm1, _rip_global(aref)); + vpcmpgtd(xmm1, xmm0); + break; - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - vpsrld(xmm1, _ga, 16); - vpcmpeqd(xmm1, _rip_global(aref)); - break; + case ATST_NOTEQUAL: + // t = (ga >> 16) == m_local.gd->aref; + vpsrld(xmm1, _ga, 16); + vpcmpeqd(xmm1, _rip_global(aref)); + break; } - switch(m_sel.afail) + switch (m_sel.afail) { - case AFAIL_KEEP: - // test |= t; - vpor(_test, xmm1); - alltrue(_test); - break; + case AFAIL_KEEP: + // test |= t; + vpor(_test, xmm1); + alltrue(_test); + break; - case AFAIL_FB_ONLY: - // zm |= t; - vpor(_zm, xmm1); - break; + case AFAIL_FB_ONLY: + // zm |= t; + vpor(_zm, xmm1); + break; - case AFAIL_ZB_ONLY: - // fm |= t; - vpor(_fm, xmm1); - break; + case AFAIL_ZB_ONLY: + // fm |= t; + vpor(_fm, xmm1); + break; - case AFAIL_RGB_ONLY: - // zm |= t; - vpor(_zm, xmm1); - // fm |= t & GSVector4i::xff000000(); - vpsrld(xmm1, 24); - vpslld(xmm1, 24); - vpor(_fm, xmm1); - break; + case AFAIL_RGB_ONLY: + // zm |= t; + vpor(_zm, xmm1); + // fm |= t & GSVector4i::xff000000(); + vpsrld(xmm1, 24); + vpslld(xmm1, 24); + vpor(_fm, xmm1); + break; } } void GSDrawScanlineCodeGenerator::ColorTFX_AVX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // rbt = rbt.modulate16<1>(rb).clamp8(); + // rbt = rbt.modulate16<1>(rb).clamp8(); - modulate16(_rb, _f_rb, 1); + modulate16(_rb, _f_rb, 1); - clamp16(_rb, xmm0); + clamp16(_rb, xmm0); - break; + break; - case TFX_DECAL: + case TFX_DECAL: - break; + break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: + case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); + // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - vmovdqa(xmm1, _ga); + vmovdqa(xmm1, _ga); - modulate16(_ga, _f_ga, 1); + modulate16(_ga, _f_ga, 1); - vpshuflw(xmm6, _f_ga, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1)); - vpsrlw(xmm6, 7); + vpshuflw(xmm6, _f_ga, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1)); + vpsrlw(xmm6, 7); - vpaddw(_ga, xmm6); + vpaddw(_ga, xmm6); - clamp16(_ga, xmm0); + clamp16(_ga, xmm0); - mix16(_ga, xmm1, xmm0); + mix16(_ga, xmm1, xmm0); - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); + // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - modulate16(_rb, _f_rb, 1); + modulate16(_rb, _f_rb, 1); - vpaddw(_rb, xmm6); + vpaddw(_rb, xmm6); - clamp16(_rb, xmm0); + clamp16(_rb, xmm0); - break; + break; - case TFX_NONE: + case TFX_NONE: - // rbt = iip ? rb.srl16(7) : rb; + // rbt = iip ? rb.srl16(7) : rb; - if(m_sel.iip) - { - vpsrlw(_rb, _f_rb, 7); - } + if (m_sel.iip) + { + vpsrlw(_rb, _f_rb, 7); + } - break; + break; } } void GSDrawScanlineCodeGenerator::Fog_AVX() { - if(!m_sel.fwrite || !m_sel.fge) + if (!m_sel.fwrite || !m_sel.fge) { return; } @@ -1452,7 +1452,7 @@ void GSDrawScanlineCodeGenerator::Fog_AVX() void GSDrawScanlineCodeGenerator::ReadFrame_AVX() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } @@ -1463,7 +1463,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame_AVX() add(ebx, dword[t0]); and(ebx, HALF_VM_SIZE - 1); - if(!m_sel.rfb) + if (!m_sel.rfb) { return; } @@ -1473,16 +1473,16 @@ void GSDrawScanlineCodeGenerator::ReadFrame_AVX() void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() { - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) + if (!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - if(m_sel.datm) + if (m_sel.datm) { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpxor(xmm0, xmm0); //vpsrld(xmm1, _fd, 15); @@ -1499,7 +1499,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() } else { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpslld(xmm1, _fd, 16); vpsrad(xmm1, 31); @@ -1517,7 +1517,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() void GSDrawScanlineCodeGenerator::WriteMask_AVX() { - if(m_sel.notest) + if (m_sel.notest) { return; } @@ -1525,12 +1525,12 @@ void GSDrawScanlineCodeGenerator::WriteMask_AVX() // fm |= test; // zm |= test; - if(m_sel.fwrite) + if (m_sel.fwrite) { vpor(_fm, _test); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vpor(_zm, _test); } @@ -1539,18 +1539,18 @@ void GSDrawScanlineCodeGenerator::WriteMask_AVX() vpcmpeqd(xmm1, xmm1); - if(m_sel.fwrite && m_sel.zwrite) + if (m_sel.fwrite && m_sel.zwrite) { vpcmpeqd(xmm0, xmm1, _zm); vpcmpeqd(xmm1, _fm); vpackssdw(xmm1, xmm0); } - else if(m_sel.fwrite) + else if (m_sel.fwrite) { vpcmpeqd(xmm1, _fm); vpackssdw(xmm1, xmm1); } - else if(m_sel.zwrite) + else if (m_sel.zwrite) { vpcmpeqd(xmm1, _zm); vpackssdw(xmm1, xmm1); @@ -1563,7 +1563,7 @@ void GSDrawScanlineCodeGenerator::WriteMask_AVX() void GSDrawScanlineCodeGenerator::WriteZBuf_AVX() { - if(!m_sel.zwrite) + if (!m_sel.zwrite) { return; } @@ -1577,7 +1577,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf_AVX() else vmovdqa(xmm1, _rip_local(p.z)); - if(m_sel.ztest && m_sel.zpsm < 2) + if (m_sel.ztest && m_sel.zpsm < 2) { // zs = zs.blend8(zd, zm); @@ -1595,12 +1595,12 @@ void GSDrawScanlineCodeGenerator::WriteZBuf_AVX() void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.abe == 0 && m_sel.aa1 == 0) + if (m_sel.abe == 0 && m_sel.aa1 == 0) { return; } @@ -1608,48 +1608,48 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() const Xmm& _dst_rb = xmm0; const Xmm& _dst_ga = xmm1; - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) + if ((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) { - switch(m_sel.fpsm) + switch (m_sel.fpsm) { - case 0: - case 1: + case 0: + case 1: - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; + // c[2] = fd & mask; + // c[3] = (fd >> 8) & mask; - split16_2x8(_dst_rb, _dst_ga, _fd); + split16_2x8(_dst_rb, _dst_ga, _fd); - break; + break; - case 2: + case 2: - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); + // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); + // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - vpcmpeqd(xmm15, xmm15); + vpcmpeqd(xmm15, xmm15); - vpsrld(xmm15, 27); // 0x0000001f - vpand(_dst_rb, _fd, xmm15); - vpslld(_dst_rb, 3); + vpsrld(xmm15, 27); // 0x0000001f + vpand(_dst_rb, _fd, xmm15); + vpslld(_dst_rb, 3); - vpslld(xmm15, 10); // 0x00007c00 - vpand(xmm5, _fd, xmm15); - vpslld(xmm5, 9); + vpslld(xmm15, 10); // 0x00007c00 + vpand(xmm5, _fd, xmm15); + vpslld(xmm5, 9); - vpor(_dst_rb, xmm5); + vpor(_dst_rb, xmm5); - vpsrld(xmm15, 5); // 0x000003e0 - vpand(_dst_ga, _fd, xmm15); - vpsrld(_dst_ga, 2); + vpsrld(xmm15, 5); // 0x000003e0 + vpand(_dst_ga, _fd, xmm15); + vpsrld(_dst_ga, 2); - vpsllw(xmm15, 10); // 0x00008000 - vpand(xmm5, _fd, xmm15); - vpslld(xmm5, 8); + vpsllw(xmm15, 10); // 0x00008000 + vpand(xmm5, _fd, xmm15); + vpslld(xmm5, 8); - vpor(_dst_ga, xmm5); + vpor(_dst_ga, xmm5); - break; + break; } } @@ -1657,46 +1657,56 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // xmm0, xmm1 = dst rb, ga // xmm5, xmm15 = free - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) + if (m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { vmovdqa(xmm5, _rb); } - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // rb = c[aba * 2 + 0]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(_rb, _dst_rb); break; - case 2: vpxor(_rb, _rb); break; + case 0: + break; + case 1: + vmovdqa(_rb, _dst_rb); + break; + case 2: + vpxor(_rb, _rb); + break; } // rb = rb.sub16(c[abb * 2 + 0]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(_rb, xmm5); break; - case 1: vpsubw(_rb, _dst_rb); break; - case 2: break; + case 0: + vpsubw(_rb, xmm5); + break; + case 1: + vpsubw(_rb, _dst_rb); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - switch(m_sel.abc) + switch (m_sel.abc) { - case 0: - case 1: - vpshuflw(xmm15, m_sel.abc ? _dst_ga : _ga, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm15, xmm15, _MM_SHUFFLE(3, 3, 1, 1)); - vpsllw(xmm15, 7); - break; - case 2: - vmovdqa(xmm15, _rip_global(afix)); - break; + case 0: + case 1: + vpshuflw(xmm15, m_sel.abc ? _dst_ga : _ga, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(xmm15, xmm15, _MM_SHUFFLE(3, 3, 1, 1)); + vpsllw(xmm15, 7); + break; + case 2: + vmovdqa(xmm15, _rip_global(afix)); + break; } // rb = rb.modulate16<1>(a); @@ -1706,26 +1716,36 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // rb = rb.add16(c[abd * 2 + 0]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(_rb, xmm5); break; - case 1: vpaddw(_rb, _dst_rb); break; - case 2: break; + case 0: + vpaddw(_rb, xmm5); + break; + case 1: + vpaddw(_rb, _dst_rb); + break; + case 2: + break; } } else { // rb = c[abd * 2 + 0]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(_rb, _dst_rb); break; - case 2: vpxor(_rb, _rb); break; + case 0: + break; + case 1: + vmovdqa(_rb, _dst_rb); + break; + case 2: + vpxor(_rb, _rb); + break; } } - if(m_sel.pabe) + if (m_sel.pabe) { // mask = (c[1] << 8).sra32(31); @@ -1746,27 +1766,37 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() vmovdqa(xmm5, _ga); - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // ga = c[aba * 2 + 1]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(_ga, _dst_ga); break; - case 2: vpxor(_ga, _ga); break; + case 0: + break; + case 1: + vmovdqa(_ga, _dst_ga); + break; + case 2: + vpxor(_ga, _ga); + break; } // ga = ga.sub16(c[abeb * 2 + 1]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(_ga, xmm5); break; - case 1: vpsubw(_ga, _dst_ga); break; - case 2: break; + case 0: + vpsubw(_ga, xmm5); + break; + case 1: + vpsubw(_ga, _dst_ga); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); @@ -1775,22 +1805,32 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // ga = ga.add16(c[abd * 2 + 1]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(_ga, xmm5); break; - case 1: vpaddw(_ga, _dst_ga); break; - case 2: break; + case 0: + vpaddw(_ga, xmm5); + break; + case 1: + vpaddw(_ga, _dst_ga); + break; + case 2: + break; } } else { // ga = c[abd * 2 + 1]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(_ga, _dst_ga); break; - case 2: vpxor(_ga, _ga); break; + case 0: + break; + case 1: + vmovdqa(_ga, _dst_ga); + break; + case 2: + vpxor(_ga, _ga); + break; } } @@ -1800,7 +1840,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // xmm3 = ga // xmm1, xmm15 = free - if(m_sel.pabe) + if (m_sel.pabe) { vpsrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) @@ -1810,7 +1850,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() } else { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx + if (m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(_ga, xmm5, xmm15); } @@ -1819,12 +1859,12 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() void GSDrawScanlineCodeGenerator::WriteFrame_AVX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.fpsm == 2 && m_sel.dthe) + if (m_sel.fpsm == 2 && m_sel.dthe) { // y = (top & 3) << 5 @@ -1843,10 +1883,9 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() vpaddw(xmm2, ptr[rax + sizeof(GSVector4i) * 0]); vpaddw(xmm3, ptr[rax + sizeof(GSVector4i) * 1]); - } - if(m_sel.colclamp == 0) + if (m_sel.colclamp == 0) { // c[0] &= 0x00ff00ff; // c[1] &= 0x00ff00ff; @@ -1863,7 +1902,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() vpunpcklwd(xmm2, xmm3); vpackuswb(xmm2, xmm15); - if(m_sel.fba && m_sel.fpsm != 1) + if (m_sel.fba && m_sel.fpsm != 1) { // fs |= 0x80000000; @@ -1876,7 +1915,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() // xmm4 = fm // xmm6 = fd - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { // GSVector4i rb = fs & 0x00f800f8; // GSVector4i ga = fs & 0x8000f800; @@ -1904,7 +1943,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() vpor(xmm2, xmm0); } - if(m_sel.rfb) + if (m_sel.rfb) { // fs = fs.blend(fd, fm); @@ -1924,9 +1963,9 @@ void GSDrawScanlineCodeGenerator::ReadPixel_AVX(const Xmm& dst, const Reg64& add void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg64& addr, const Reg8& mask, bool fast, int psm, int fz) { - if(m_sel.notest) + if (m_sel.notest) { - if(fast) + if (fast) { vmovq(qword[_m_local__gd__vm + addr * 2], src); vmovhps(qword[_m_local__gd__vm + addr * 2 + 8 * 2], src); @@ -1941,7 +1980,7 @@ void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg64& ad } else { - if(fast) + if (fast) { // if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs); // if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs); @@ -1994,23 +2033,27 @@ void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg64& ad { Address dst = ptr[_m_local__gd__vm + addr * 2 + s_offsets[i] * 2]; - switch(psm) + switch (psm) { - case 0: - if(i == 0) vmovd(dst, src); - else vpextrd(dst, src, i); - break; - case 1: - if(i == 0) vmovd(eax, src); - else vpextrd(eax, src, i); - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - vpextrw(eax, src, i * 2); - mov(dst, ax); - break; + case 0: + if (i == 0) + vmovd(dst, src); + else + vpextrd(dst, src, i); + break; + case 1: + if (i == 0) + vmovd(eax, src); + else + vpextrd(eax, src, i); + xor(eax, dst); + and(eax, 0xffffff); + xor(dst, eax); + break; + case 2: + vpextrw(eax, src, i * 2); + mov(dst, ax); + break; } } @@ -2019,9 +2062,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(int pixels, int mip_offset) const int in[] = {0, 1, 2, 3}; const int out[] = {4, 5, 0, 1}; - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { ReadTexel_AVX(Xmm(out[i]), Xmm(in[i]), j); } @@ -2033,14 +2076,19 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(const Xmm& dst, const Xmm& addr, const Address& src = m_sel.tlu ? ptr[_m_local__gd__clut + rax * 4] : ptr[_m_local__gd__tex + rax * 4]; // Extract address offset - if(i == 0) vmovd(eax, addr); - else vpextrd(eax, addr, i); + if (i == 0) + vmovd(eax, addr); + else + vpextrd(eax, addr, i); // If clut, load the value as a byte index - if(m_sel.tlu) movzx(eax, byte[_m_local__gd__tex + rax]); + if (m_sel.tlu) + movzx(eax, byte[_m_local__gd__tex + rax]); - if(i == 0) vmovd(dst, src); - else vpinsrd(dst, src, i); + if (i == 0) + vmovd(dst, src); + else + vpinsrd(dst, src, i); } // Gather example (AVX2). Not faster on Haswell but potentially better on recent CPU diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx2.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx2.cpp index 804f861d37..1389d802da 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx2.cpp +++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x64.avx2.cpp @@ -36,20 +36,20 @@ static const int _v = _args + 8; #define _m_local__gd__clut r11 #define _m_local__gd__tex a3 // More pretty name -#define _z ymm8 -#define _f ymm9 -#define _s ymm10 -#define _t ymm11 -#define _q ymm12 -#define _f_rb ymm13 -#define _f_ga ymm14 -#define _test ymm15 +#define _z ymm8 +#define _f ymm9 +#define _s ymm10 +#define _t ymm11 +#define _q ymm12 +#define _f_rb ymm13 +#define _f_ga ymm14 +#define _test ymm15 // Extra bonus -#define _rb ymm2 -#define _ga ymm3 -#define _fm ymm4 -#define _zm ymm5 -#define _fd ymm6 +#define _rb ymm2 +#define _ga ymm3 +#define _fm ymm4 +#define _zm ymm5 +#define _fd ymm6 #define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[_m_local + offsetof(GSScanlineLocalData, field)]) #define _rip_global(field) (m_rip ? ptr[rip + &m_local.gd->field] : ptr[_m_local__gd + offsetof(GSScanlineGlobalData, field)]) @@ -88,7 +88,7 @@ void GSDrawScanlineCodeGenerator::Generate() sub(rsp, 8 + 10 * 16); - for(int i = 6; i < 16; i++) + for (int i = 6; i < 16; i++) { vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i)); } @@ -110,14 +110,14 @@ void GSDrawScanlineCodeGenerator::Generate() mov(_m_local__gd, _rip_local(gd)); } - if(need_clut) + if (need_clut) mov(_m_local__gd__clut, _rip_global(clut)); //db(0xcc); Init(); - if(!m_sel.edge) + if (!m_sel.edge) { align(16); } @@ -149,7 +149,7 @@ L("loop"); // ymm6 = ga (!tme) // ymm7 = test - if(m_sel.mmin) + if (m_sel.mmin) { SampleTextureLOD(); } @@ -302,7 +302,7 @@ L("step"); // if(steps <= 0) break; - if(!m_sel.edge) + if (!m_sel.edge) { test(ecx, ecx); @@ -316,7 +316,7 @@ L("step"); L("exit"); #ifdef _WIN64 - for(int i = 6; i < 16; i++) + for (int i = 6; i < 16; i++) { vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]); } @@ -342,7 +342,7 @@ L("exit"); void GSDrawScanlineCodeGenerator::Init() { - if(!m_sel.notest) + if (!m_sel.notest) { // int skip = left & 7; @@ -371,8 +371,8 @@ void GSDrawScanlineCodeGenerator::Init() } else { - mov(ebx, edx); // left - xor(edx, edx); // skip + mov(ebx, edx); // left + xor(edx, edx); // skip lea(ecx, ptr[ecx - 8]); // steps } @@ -387,7 +387,7 @@ void GSDrawScanlineCodeGenerator::Init() lea(edi, ptr[ebx * 2]); add(edi, ptr[&m_local.gd->fzbc]); - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) + if (m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_local.d[skip] @@ -398,13 +398,13 @@ void GSDrawScanlineCodeGenerator::Init() mov(ebx, ptr[esp + _v]); } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) + if (m_sel.fwrite && m_sel.fge || m_sel.zb) { vbroadcastf128(ymm0, ptr[ebx + offsetof(GSVertexSW, p)]); // v.p - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { // f = GSVector8i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); @@ -416,7 +416,7 @@ void GSDrawScanlineCodeGenerator::Init() vmovdqa(ptr[&m_local.temp.f], ymm1); } - if(m_sel.zb) + if (m_sel.zb) { // z = vp.zzzz() + m_local.d[skip].z; @@ -430,20 +430,20 @@ void GSDrawScanlineCodeGenerator::Init() } else { - if(m_sel.ztest) + if (m_sel.ztest) { vpbroadcastd(ymm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.edge || m_sel.tfx != TFX_NONE) + if (m_sel.edge || m_sel.tfx != TFX_NONE) { vbroadcastf128(ymm4, ptr[ebx + offsetof(GSVertexSW, t)]); // v.t } - if(m_sel.edge) + if (m_sel.edge) { // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); @@ -454,9 +454,9 @@ void GSDrawScanlineCodeGenerator::Init() vmovdqa(ptr[&m_local.temp.cov], ymm3); } - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i vti(vt); @@ -470,13 +470,13 @@ void GSDrawScanlineCodeGenerator::Init() vpaddd(ymm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpaddd(ymm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); } else { - if(m_sel.ltf) + if (m_sel.ltf) { vpshuflw(ymm6, ymm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm6, ymm6, _MM_SHUFFLE(2, 2, 0, 0)); @@ -508,9 +508,9 @@ void GSDrawScanlineCodeGenerator::Init() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); @@ -536,7 +536,7 @@ void GSDrawScanlineCodeGenerator::Init() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vmovdqa(ymm5, ptr[&m_local.c.rb]); vmovdqa(ymm6, ptr[&m_local.c.ga]); @@ -556,11 +556,11 @@ void GSDrawScanlineCodeGenerator::Step() add(t0, 16); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // zo += GSVector8::broadcast32(&m_local.d8.p.z); - if(m_sel.zb) + if (m_sel.zb) { vbroadcastss(ymm0, ptr[&m_local.d8.p.z]); vaddps(ymm0, ptr[&m_local.temp.zo]); @@ -570,7 +570,7 @@ void GSDrawScanlineCodeGenerator::Step() // f = f.add16(GSVector8i::broadcast16(&m_local.d8.p.f)); - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { vpbroadcastw(ymm1, ptr[&m_local.d8.p.f]); vpaddw(ymm1, ptr[&m_local.temp.f]); @@ -579,17 +579,17 @@ void GSDrawScanlineCodeGenerator::Step() } else { - if(m_sel.ztest) + if (m_sel.ztest) { vpbroadcastd(ymm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector8i stq = GSVector8i::cast(GSVector8(m_local.d8.stq)); @@ -601,7 +601,7 @@ void GSDrawScanlineCodeGenerator::Step() vpaddd(ymm2, ptr[&m_local.temp.s]); vmovdqa(ptr[&m_local.temp.s], ymm2); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { // t = GSVector8::cast(GSVector8i::cast(t) + stq.yyyy()); @@ -638,9 +638,9 @@ void GSDrawScanlineCodeGenerator::Step() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector8i c = GSVector8i::broadcast64(&m_local.d8.c); @@ -666,7 +666,7 @@ void GSDrawScanlineCodeGenerator::Step() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vmovdqa(ymm5, ptr[&m_local.c.rb]); vmovdqa(ymm6, ptr[&m_local.c.ga]); @@ -675,7 +675,7 @@ void GSDrawScanlineCodeGenerator::Step() } } - if(!m_sel.notest) + if (!m_sel.notest) { // test = m_test[15 + (steps & (steps >> 31))]; @@ -689,7 +689,7 @@ void GSDrawScanlineCodeGenerator::Step() void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) { - if(!m_sel.zb) + if (!m_sel.zb) { return; } @@ -702,9 +702,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) // GSVector8i zs = zi; - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.zoverflow) + if (m_sel.zoverflow) { // zs = (GSVector8i(z * 0.5f) << 1) | (GSVector8i(z) & GSVector8i::x00000001()); @@ -727,7 +727,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) vcvttps2dq(ymm0, ymm0); } - if(m_sel.zwrite) + if (m_sel.zwrite) { #ifdef _WIN64 vmovdqa(ptr[&m_local.temp.zs], ymm0); @@ -737,11 +737,11 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) } } - if(m_sel.ztest) + if (m_sel.ztest) { ReadPixel(ymm1, temp1, rbp); - if(m_sel.zwrite && m_sel.zpsm < 2) + if (m_sel.zwrite && m_sel.zpsm < 2) { #ifdef _WIN64 vmovdqa(_rip_local(temp.zd), ymm1); @@ -752,13 +752,13 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) // zd &= 0xffffffff >> m_sel.zpsm * 8; - if(m_sel.zpsm) + if (m_sel.zpsm) { vpslld(ymm1, (uint8)(m_sel.zpsm * 8)); vpsrld(ymm1, (uint8)(m_sel.zpsm * 8)); } - if(m_sel.zoverflow || m_sel.zpsm == 0) + if (m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector8i o = GSVector8i::x80000000(); @@ -772,21 +772,21 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) vpsubd(ymm1, temp1); } - switch(m_sel.ztst) + switch (m_sel.ztst) { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - vpcmpgtd(ymm1, ymm0); - vpor(ymm7, ymm1); - break; + case ZTST_GEQUAL: + // test |= zso < zdo; // ~(zso >= zdo) + vpcmpgtd(ymm1, ymm0); + vpor(ymm7, ymm1); + break; - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - vpcmpgtd(ymm0, ymm1); - vpcmpeqd(temp1, temp1); - vpxor(ymm0, temp1); - vpor(ymm7, ymm0); - break; + case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL + // test |= zso <= zdo; // ~(zso > zdo) + vpcmpgtd(ymm0, ymm1); + vpcmpeqd(temp1, temp1); + vpxor(ymm0, temp1); + vpor(ymm7, ymm0); + break; } alltrue(ymm7); @@ -795,14 +795,14 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) void GSDrawScanlineCodeGenerator::SampleTexture() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } mov(ebx, ptr[&m_local.gd->tex[0]]); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } @@ -810,7 +810,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // ebx = tex // edx = clut - if(!m_sel.fst) + if (!m_sel.fst) { vrcpps(ymm0, ymm4); @@ -820,7 +820,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vcvttps2dq(ymm2, ymm2); vcvttps2dq(ymm3, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -837,7 +837,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // ymm2 = u // ymm3 = v - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uf = u.xxzzlh().srl16(1); @@ -846,7 +846,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpsrlw(ymm0, 12); vmovdqa(ptr[&m_local.temp.uf], ymm0); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector8i vf = v.xxzzlh().srl16(1); @@ -863,7 +863,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpsrad(ymm3, 16); vpackssdw(ymm2, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uv1 = uv0.add16(GSVector8i::x0001()); @@ -904,7 +904,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // ymm1, ymm5, ymm6 = free // ymm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i y1 = uv1.uph16() << tw; // GSVector8i x1 = uv1.upl16(); @@ -1064,11 +1064,11 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vbroadcasti128(ymm0, ptr[&m_local.gd->t.min]); vpmaxsw(uv, ymm0); @@ -1087,7 +1087,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv) vbroadcasti128(ymm0, ptr[&m_local.gd->t.min]); vpand(uv, ymm0); - if(region) + if (region) { vbroadcasti128(ymm0, ptr[&m_local.gd->t.max]); vpor(uv, ymm0); @@ -1104,7 +1104,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv) vpand(ymm1, uv, ymm4); - if(region) + if (region) { vpor(ymm1, ymm5); } @@ -1129,11 +1129,11 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vbroadcasti128(ymm4, ptr[&m_local.gd->t.min]); vpmaxsw(uv0, ymm4); @@ -1156,7 +1156,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) vpand(uv0, ymm4); vpand(uv1, ymm4); - if(region) + if (region) { vbroadcasti128(ymm5, ptr[&m_local.gd->t.max]); vpor(uv0, ymm5); @@ -1176,7 +1176,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv0, ymm4); - if(region) + if (region) { vpor(ymm1, ymm5); } @@ -1196,7 +1196,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv1, ymm4); - if(region) + if (region) { vpor(ymm1, ymm5); } @@ -1214,7 +1214,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) void GSDrawScanlineCodeGenerator::SampleTextureLOD() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } @@ -1223,12 +1223,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() mov(ebp, (size_t)m_local.gd->tex); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } - if(!m_sel.fst) + if (!m_sel.fst) { vrcpps(ymm0, ymm4); @@ -1246,7 +1246,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (just take the exp) (but can we round it?) - if(!m_sel.lcm) + if (!m_sel.lcm) { // lod = -log2(Q) * (1 << L) + K @@ -1265,7 +1265,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // ymm4 = mant(q) | 1.0f - if(m_cpu.has(util::Cpu::tFMA)) + if (m_cpu.has(util::Cpu::tFMA)) { vmovaps(ymm5, ptr[g_const->m_log2_coef_256b[0]]); // c0 vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[1]]); // c0 * ymm4 + c1 @@ -1286,7 +1286,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // ymm4 = log2(Q) = ((((c0 * ymm4) + c1) * ymm4) + c2) * (ymm4 - 1.0f) + ymm0 - if(m_cpu.has(util::Cpu::tFMA)) + if (m_cpu.has(util::Cpu::tFMA)) { vmovaps(ymm5, ptr[&m_local.gd->l]); vfmadd213ps(ymm4, ymm5, ptr[&m_local.gd->k]); @@ -1304,7 +1304,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() vmaxps(ymm4, ymm0); vcvtps2dq(ymm4, ymm4); - if(m_sel.mmin == 1) // round-off mode + if (m_sel.mmin == 1) // round-off mode { mov(eax, 0x8000); vmovd(xmm0, eax); @@ -1321,7 +1321,7 @@ vpslld(ymm6, ymm4, 16); vpsrld(ymm6, ymm6, 24); return; */ - if(m_sel.mmin == 2) // trilinear mode + if (m_sel.mmin == 2) // trilinear mode { vpshuflw(ymm1, ymm4, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm1, ymm1, _MM_SHUFFLE(2, 2, 0, 0)); @@ -1378,7 +1378,7 @@ return; // ymm5 = minuv // ymm6 = maxuv - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1391,7 +1391,7 @@ return; vpsubd(ymm3, ymm4); // GSVector8i uf = u.xxzzlh().srl16(1); - + vpshuflw(ymm0, ymm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(ymm0, 12); @@ -1411,7 +1411,7 @@ return; vpsrad(ymm3, 16); vpackssdw(ymm2, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uv1 = uv0.add16(GSVector8i::x0001()); @@ -1452,7 +1452,7 @@ return; // ymm1, ymm5, ymm6 = free // ymm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i x1 = uv1.upl16(); // GSVector8i y1 = uv1.uph16() << tw; @@ -1602,7 +1602,7 @@ return; vpsrlw(ymm6, 8); } - if(m_sel.mmin != 1) // !round-off mode + if (m_sel.mmin != 1) // !round-off mode { vmovdqa(ptr[&m_local.temp.trb], ymm5); vmovdqa(ptr[&m_local.temp.tga], ymm6); @@ -1619,7 +1619,7 @@ return; vpsrlw(ymm5, 1); vpsrlw(ymm6, 1); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1632,7 +1632,7 @@ return; vpsubd(ymm3, ymm4); // GSVector8i uf = u.xxzzlh().srl16(1); - + vpshuflw(ymm0, ymm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(ymm0, 12); @@ -1652,7 +1652,7 @@ return; vpsrad(ymm3, 16); vpackssdw(ymm2, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uv1 = uv0.add16(GSVector4i::x0001()); @@ -1693,7 +1693,7 @@ return; // ymm1, ymm5, ymm6 = free // ymm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i x1 = uv1.upl16(); // GSVector8i y1 = uv1.uph16() << tw; @@ -1867,11 +1867,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv, ymm5); } @@ -1887,7 +1887,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv) { vpand(uv, ymm5); - if(region) + if (region) { vpor(uv, ymm6); } @@ -1901,7 +1901,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv) vpand(ymm1, uv, ymm5); - if(region) + if (region) { vpor(ymm1, ymm6); } @@ -1928,11 +1928,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv0, ymm5); vpmaxsw(uv1, ymm5); @@ -1952,7 +1952,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) vpand(uv0, ymm5); vpand(uv1, ymm5); - if(region) + if (region) { vpor(uv0, ymm6); vpor(uv1, ymm6); @@ -1969,7 +1969,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv0, ymm5); - if(region) + if (region) { vpor(ymm1, ymm6); } @@ -1989,7 +1989,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv1, ymm5); - if(region) + if (region) { vpor(ymm1, ymm6); } @@ -2007,114 +2007,114 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) void GSDrawScanlineCodeGenerator::AlphaTFX() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector8i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(ymm6, ymm4, 1); - - clamp16(ymm6, ymm3); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(ymm4, 7); - - mix16(ymm6, ymm4, ymm3); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; + // GSVector8i ga = iip ? gaf : m_local.c.ga; vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vpsrlw(ymm4, 7); + // gat = gat.modulate16<1>(ga).clamp8(); - mix16(ymm6, ymm4, ymm3); - } + modulate16(ymm6, ymm4, 1); - break; + clamp16(ymm6, ymm3); - case TFX_HIGHLIGHT: + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - // GSVector4i ga = iip ? gaf : m_local.c.ga; + if (!m_sel.tcc) + { + vpsrlw(ymm4, 7); - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vmovdqa(ymm2, ymm4); + mix16(ymm6, ymm4, ymm3); + } - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + break; - vpsrlw(ymm4, 7); + case TFX_DECAL: - if(m_sel.tcc) - { - vpaddusb(ymm4, ymm6); - } + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - mix16(ymm6, ymm4, ymm3); + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - case TFX_HIGHLIGHT2: + vpsrlw(ymm4, 7); - // if(!tcc) gat = gat.mix16(ga.srl16(7)); + mix16(ymm6, ymm4, ymm3); + } + + break; + + case TFX_HIGHLIGHT: - if(!m_sel.tcc) - { // GSVector4i ga = iip ? gaf : m_local.c.ga; vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); vmovdqa(ymm2, ymm4); + // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + vpsrlw(ymm4, 7); + if (m_sel.tcc) + { + vpaddusb(ymm4, ymm6); + } + mix16(ymm6, ymm4, ymm3); - } - break; + break; - case TFX_NONE: + case TFX_HIGHLIGHT2: - // gat = iip ? ga.srl16(7) : ga; + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - if(m_sel.iip) - { - vpsrlw(ymm6, 7); - } + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + vmovdqa(ymm2, ymm4); + + vpsrlw(ymm4, 7); + + mix16(ymm6, ymm4, ymm3); + } + + break; + + case TFX_NONE: + + // gat = iip ? ga.srl16(7) : ga; + + if (m_sel.iip) + { + vpsrlw(ymm6, 7); + } + + break; } - if(m_sel.aa1) + if (m_sel.aa1) { // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha // FIXME: bios config screen cubes - if(!m_sel.abe) + if (!m_sel.abe) { // a = cov - if(m_sel.edge) + if (m_sel.edge) { vmovdqa(ymm0, ptr[&m_local.temp.cov]); } @@ -2135,7 +2135,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() vpsllw(ymm0, 15); vpsrlw(ymm0, 8); - if(m_sel.edge) + if (m_sel.edge) { vmovdqa(ymm1, ptr[&m_local.temp.cov]); } @@ -2155,12 +2155,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() void GSDrawScanlineCodeGenerator::ReadMask() { - if(m_sel.fwrite) + if (m_sel.fwrite) { vpbroadcastd(ymm3, ptr[&m_local.gd->fm]); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vpbroadcastd(ymm4, ptr[&m_local.gd->zm]); } @@ -2168,157 +2168,157 @@ void GSDrawScanlineCodeGenerator::ReadMask() void GSDrawScanlineCodeGenerator::TestAlpha() { - switch(m_sel.atst) + switch (m_sel.atst) { - case ATST_NEVER: - // t = GSVector8i::xffffffff(); - vpcmpeqd(ymm1, ymm1); - break; + case ATST_NEVER: + // t = GSVector8i::xffffffff(); + vpcmpeqd(ymm1, ymm1); + break; - case ATST_ALWAYS: - return; + case ATST_ALWAYS: + return; - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpgtd(ymm1, ymm0); - break; + case ATST_LESS: + case ATST_LEQUAL: + // t = (ga >> 16) > m_local.gd->aref; + vpsrld(ymm1, ymm6, 16); + vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); + vpcmpgtd(ymm1, ymm0); + break; - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpeqd(ymm1, ymm0); - vpcmpeqd(ymm0, ymm0); - vpxor(ymm1, ymm0); - break; + case ATST_EQUAL: + // t = (ga >> 16) != m_local.gd->aref; + vpsrld(ymm1, ymm6, 16); + vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); + vpcmpeqd(ymm1, ymm0); + vpcmpeqd(ymm0, ymm0); + vpxor(ymm1, ymm0); + break; - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - vpsrld(ymm0, ymm6, 16); - vbroadcasti128(ymm1, ptr[&m_local.gd->aref]); - vpcmpgtd(ymm1, ymm0); - break; + case ATST_GEQUAL: + case ATST_GREATER: + // t = (ga >> 16) < m_local.gd->aref; + vpsrld(ymm0, ymm6, 16); + vbroadcasti128(ymm1, ptr[&m_local.gd->aref]); + vpcmpgtd(ymm1, ymm0); + break; - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpeqd(ymm1, ymm0); - break; + case ATST_NOTEQUAL: + // t = (ga >> 16) == m_local.gd->aref; + vpsrld(ymm1, ymm6, 16); + vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); + vpcmpeqd(ymm1, ymm0); + break; } - switch(m_sel.afail) + switch (m_sel.afail) { - case AFAIL_KEEP: - // test |= t; - vpor(ymm7, ymm1); - alltrue(ymm7); - break; + case AFAIL_KEEP: + // test |= t; + vpor(ymm7, ymm1); + alltrue(ymm7); + break; - case AFAIL_FB_ONLY: - // zm |= t; - vpor(ymm4, ymm1); - break; + case AFAIL_FB_ONLY: + // zm |= t; + vpor(ymm4, ymm1); + break; - case AFAIL_ZB_ONLY: - // fm |= t; - vpor(ymm3, ymm1); - break; + case AFAIL_ZB_ONLY: + // fm |= t; + vpor(ymm3, ymm1); + break; - case AFAIL_RGB_ONLY: - // zm |= t; - vpor(ymm4, ymm1); - // fm |= t & GSVector8i::xff000000(); - vpsrld(ymm1, 24); - vpslld(ymm1, 24); - vpor(ymm3, ymm1); - break; + case AFAIL_RGB_ONLY: + // zm |= t; + vpor(ymm4, ymm1); + // fm |= t & GSVector8i::xff000000(); + vpsrld(ymm1, 24); + vpslld(ymm1, 24); + vpor(ymm3, ymm1); + break; } } void GSDrawScanlineCodeGenerator::ColorTFX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector8i rb = iip ? rbf : m_local.c.rb; + // GSVector8i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).clamp8(); + // rbt = rbt.modulate16<1>(rb).clamp8(); - modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - clamp16(ymm5, ymm1); + clamp16(ymm5, ymm1); - break; + break; - case TFX_DECAL: + case TFX_DECAL: - break; + break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: + case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: - if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) - { - // GSVector8i ga = iip ? gaf : m_local.c.ga; + if (m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) + { + // GSVector8i ga = iip ? gaf : m_local.c.ga; - vmovdqa(ymm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - } + vmovdqa(ymm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + } - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); + // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - vmovdqa(ymm1, ymm6); + vmovdqa(ymm1, ymm6); - modulate16(ymm6, ymm2, 1); + modulate16(ymm6, ymm2, 1); - vpshuflw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpsrlw(ymm2, 7); + vpshuflw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); + vpsrlw(ymm2, 7); - vpaddw(ymm6, ymm2); + vpaddw(ymm6, ymm2); - clamp16(ymm6, ymm0); + clamp16(ymm6, ymm0); - mix16(ymm6, ymm1, ymm0); + mix16(ymm6, ymm1, ymm0); - // GSVector8i rb = iip ? rbf : m_local.c.rb; + // GSVector8i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); + // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - vpaddw(ymm5, ymm2); + vpaddw(ymm5, ymm2); - clamp16(ymm5, ymm0); + clamp16(ymm5, ymm0); - break; + break; - case TFX_NONE: + case TFX_NONE: - // rbt = iip ? rb.srl16(7) : rb; + // rbt = iip ? rb.srl16(7) : rb; - if(m_sel.iip) - { - vpsrlw(ymm5, 7); - } + if (m_sel.iip) + { + vpsrlw(ymm5, 7); + } - break; + break; } } void GSDrawScanlineCodeGenerator::Fog() { - if(!m_sel.fwrite || !m_sel.fge) + if (!m_sel.fwrite || !m_sel.fge) { return; } @@ -2326,7 +2326,7 @@ void GSDrawScanlineCodeGenerator::Fog() // rb = m_local.gd->frb.lerp16<0>(rb, f); // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { vmovdqa(ymm0, ptr[&m_local.temp.f]); } @@ -2347,7 +2347,7 @@ void GSDrawScanlineCodeGenerator::Fog() void GSDrawScanlineCodeGenerator::ReadFrame() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } @@ -2358,7 +2358,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame() add(ebx, ptr[edi]); and(ebx, HALF_VM_SIZE - 1); - if(!m_sel.rfb) + if (!m_sel.rfb) { return; } @@ -2368,16 +2368,16 @@ void GSDrawScanlineCodeGenerator::ReadFrame() void GSDrawScanlineCodeGenerator::TestDestAlpha() { - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) + if (!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - if(m_sel.datm) + if (m_sel.datm) { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpxor(ymm0, ymm0); //vpsrld(ymm1, ymm2, 15); @@ -2394,7 +2394,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() } else { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpslld(ymm1, ymm2, 16); vpsrad(ymm1, 31); @@ -2412,7 +2412,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() void GSDrawScanlineCodeGenerator::WriteMask() { - if(m_sel.notest) + if (m_sel.notest) { return; } @@ -2420,12 +2420,12 @@ void GSDrawScanlineCodeGenerator::WriteMask() // fm |= test; // zm |= test; - if(m_sel.fwrite) + if (m_sel.fwrite) { vpor(ymm3, ymm7); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vpor(ymm4, ymm7); } @@ -2434,18 +2434,18 @@ void GSDrawScanlineCodeGenerator::WriteMask() vpcmpeqd(ymm1, ymm1); - if(m_sel.fwrite && m_sel.zwrite) + if (m_sel.fwrite && m_sel.zwrite) { vpcmpeqd(ymm0, ymm1, ymm4); vpcmpeqd(ymm1, ymm3); vpackssdw(ymm1, ymm0); } - else if(m_sel.fwrite) + else if (m_sel.fwrite) { vpcmpeqd(ymm1, ymm3); vpackssdw(ymm1, ymm1); } - else if(m_sel.zwrite) + else if (m_sel.zwrite) { vpcmpeqd(ymm1, ymm4); vpackssdw(ymm1, ymm1); @@ -2458,12 +2458,12 @@ void GSDrawScanlineCodeGenerator::WriteMask() void GSDrawScanlineCodeGenerator::WriteZBuf() { - if(!m_sel.zwrite) + if (!m_sel.zwrite) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { vmovdqa(ymm1, ptr[&m_local.temp.zs]); } @@ -2472,7 +2472,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf() vpbroadcastd(ymm1, ptr[&m_local.p.z]); } - if(m_sel.ztest && m_sel.zpsm < 2) + if (m_sel.ztest && m_sel.zpsm < 2) { // zs = zs.blend8(zd, zm); @@ -2486,60 +2486,60 @@ void GSDrawScanlineCodeGenerator::WriteZBuf() void GSDrawScanlineCodeGenerator::AlphaBlend() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.abe == 0 && m_sel.aa1 == 0) + if (m_sel.abe == 0 && m_sel.aa1 == 0) { return; } - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) + if ((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) { - switch(m_sel.fpsm) + switch (m_sel.fpsm) { - case 0: - case 1: + case 0: + case 1: - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; + // c[2] = fd & mask; + // c[3] = (fd >> 8) & mask; - vpsllw(ymm0, ymm2, 8); - vpsrlw(ymm0, 8); - vpsrlw(ymm1, ymm2, 8); + vpsllw(ymm0, ymm2, 8); + vpsrlw(ymm0, 8); + vpsrlw(ymm1, ymm2, 8); - break; + break; - case 2: + case 2: - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); + // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); + // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - vpcmpeqd(ymm7, ymm7); + vpcmpeqd(ymm7, ymm7); - vpsrld(ymm7, 27); // 0x0000001f - vpand(ymm0, ymm2, ymm7); - vpslld(ymm0, 3); + vpsrld(ymm7, 27); // 0x0000001f + vpand(ymm0, ymm2, ymm7); + vpslld(ymm0, 3); - vpslld(ymm7, 10); // 0x00007c00 - vpand(ymm4, ymm2, ymm7); - vpslld(ymm4, 9); + vpslld(ymm7, 10); // 0x00007c00 + vpand(ymm4, ymm2, ymm7); + vpslld(ymm4, 9); - vpor(ymm0, ymm4); + vpor(ymm0, ymm4); - vpsrld(ymm7, 5); // 0x000003e0 - vpand(ymm1, ymm2, ymm7); - vpsrld(ymm1, 2); + vpsrld(ymm7, 5); // 0x000003e0 + vpand(ymm1, ymm2, ymm7); + vpsrld(ymm1, 2); - vpsllw(ymm7, 10); // 0x00008000 - vpand(ymm4, ymm2, ymm7); - vpslld(ymm4, 8); + vpsllw(ymm7, 10); // 0x00008000 + vpand(ymm4, ymm2, ymm7); + vpslld(ymm4, 8); - vpor(ymm1, ymm4); + vpor(ymm1, ymm4); - break; + break; } } @@ -2548,46 +2548,56 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // ymm2, ymm3 = used // ymm4, ymm7 = free - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) + if (m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { vmovdqa(ymm4, ymm5); } - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // rb = c[aba * 2 + 0]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(ymm5, ymm0); break; - case 2: vpxor(ymm5, ymm5); break; + case 0: + break; + case 1: + vmovdqa(ymm5, ymm0); + break; + case 2: + vpxor(ymm5, ymm5); + break; } // rb = rb.sub16(c[abb * 2 + 0]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(ymm5, ymm4); break; - case 1: vpsubw(ymm5, ymm0); break; - case 2: break; + case 0: + vpsubw(ymm5, ymm4); + break; + case 1: + vpsubw(ymm5, ymm0); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - switch(m_sel.abc) + switch (m_sel.abc) { - case 0: - case 1: - vpshuflw(ymm7, m_sel.abc ? ymm1 : ymm6, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(ymm7, ymm7, _MM_SHUFFLE(3, 3, 1, 1)); - vpsllw(ymm7, 7); - break; - case 2: - vpbroadcastw(ymm7, ptr[&m_local.gd->afix]); - break; + case 0: + case 1: + vpshuflw(ymm7, m_sel.abc ? ymm1 : ymm6, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(ymm7, ymm7, _MM_SHUFFLE(3, 3, 1, 1)); + vpsllw(ymm7, 7); + break; + case 2: + vpbroadcastw(ymm7, ptr[&m_local.gd->afix]); + break; } // rb = rb.modulate16<1>(a); @@ -2597,26 +2607,36 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // rb = rb.add16(c[abd * 2 + 0]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(ymm5, ymm4); break; - case 1: vpaddw(ymm5, ymm0); break; - case 2: break; + case 0: + vpaddw(ymm5, ymm4); + break; + case 1: + vpaddw(ymm5, ymm0); + break; + case 2: + break; } } else { // rb = c[abd * 2 + 0]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(ymm5, ymm0); break; - case 2: vpxor(ymm5, ymm5); break; + case 0: + break; + case 1: + vmovdqa(ymm5, ymm0); + break; + case 2: + vpxor(ymm5, ymm5); + break; } } - if(m_sel.pabe) + if (m_sel.pabe) { // mask = (c[1] << 8).sra32(31); @@ -2637,27 +2657,37 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() vmovdqa(ymm4, ymm6); - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // ga = c[aba * 2 + 1]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(ymm6, ymm1); break; - case 2: vpxor(ymm6, ymm6); break; + case 0: + break; + case 1: + vmovdqa(ymm6, ymm1); + break; + case 2: + vpxor(ymm6, ymm6); + break; } // ga = ga.sub16(c[abeb * 2 + 1]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(ymm6, ymm4); break; - case 1: vpsubw(ymm6, ymm1); break; - case 2: break; + case 0: + vpsubw(ymm6, ymm4); + break; + case 1: + vpsubw(ymm6, ymm1); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); @@ -2666,22 +2696,32 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // ga = ga.add16(c[abd * 2 + 1]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(ymm6, ymm4); break; - case 1: vpaddw(ymm6, ymm1); break; - case 2: break; + case 0: + vpaddw(ymm6, ymm4); + break; + case 1: + vpaddw(ymm6, ymm1); + break; + case 2: + break; } } else { // ga = c[abd * 2 + 1]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(ymm6, ymm1); break; - case 2: vpxor(ymm6, ymm6); break; + case 0: + break; + case 1: + vmovdqa(ymm6, ymm1); + break; + case 2: + vpxor(ymm6, ymm6); + break; } } @@ -2691,7 +2731,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // ymm2, ymm3 = used // ymm0, ymm1, ymm7 = free - if(m_sel.pabe) + if (m_sel.pabe) { vpsrld(ymm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) @@ -2701,7 +2741,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() } else { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx + if (m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(ymm6, ymm4, ymm7); } @@ -2710,12 +2750,12 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() void GSDrawScanlineCodeGenerator::WriteFrame() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.fpsm == 2 && m_sel.dthe) + if (m_sel.fpsm == 2 && m_sel.dthe) { mov(eax, ptr[esp + _top]); and(eax, 3); @@ -2727,7 +2767,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpaddw(ymm6, ymm7); } - if(m_sel.colclamp == 0) + if (m_sel.colclamp == 0) { // c[0] &= 0x00ff00ff; // c[1] &= 0x00ff00ff; @@ -2744,7 +2784,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpunpcklwd(ymm5, ymm6); vpackuswb(ymm5, ymm7); - if(m_sel.fba && m_sel.fpsm != 1) + if (m_sel.fba && m_sel.fpsm != 1) { // fs |= 0x80000000; @@ -2753,7 +2793,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpor(ymm5, ymm7); } - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { // GSVector8i rb = fs & 0x00f800f8; // GSVector8i ga = fs & 0x8000f800; @@ -2781,7 +2821,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpor(ymm5, ymm7); } - if(m_sel.rfb) + if (m_sel.rfb) { // fs = fs.blend(fd, fm); @@ -2800,7 +2840,7 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Ymm& dst, const Ymm& temp, con vmovq(Xmm(temp.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm + 16 * 2]); vmovhps(Xmm(temp.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm + 24 * 2]); vinserti128(dst, dst, Xmm(temp.getIdx()), 1); -/* + /* vmovdqu(dst, ptr[addr * 2 + (size_t)m_local.gd->vm]); vmovdqu(temp, ptr[addr * 2 + (size_t)m_local.gd->vm + 16 * 2]); vpunpcklqdq(dst, dst, temp); @@ -2813,11 +2853,11 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Ymm& src, const Ymm& temp, co Xmm src1 = Xmm(src.getIdx()); Xmm src2 = Xmm(temp.getIdx()); - vextracti128(src2, src, 1); + vextracti128(src2, src, 1); - if(m_sel.notest) + if (m_sel.notest) { - if(fast) + if (fast) { vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src1); vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src1); @@ -2840,7 +2880,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Ymm& src, const Ymm& temp, co { // cascade tests? - if(fast) + if (fast) { test(mask, 0x0000000f << (fz * 8)); je("@f"); @@ -2915,24 +2955,30 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const RegLong& addr { Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; - switch(psm) + switch (psm) { - case 0: - if(j == 0) vmovd(dst, src); - else vpextrd(dst, src, j); - break; - case 1: - if(j == 0) vmovd(eax, src); - else vpextrd(eax, src, j); - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - if(j == 0) vmovd(eax, src); - else vpextrw(eax, src, j * 2); - mov(dst, ax); - break; + case 0: + if (j == 0) + vmovd(dst, src); + else + vpextrd(dst, src, j); + break; + case 1: + if (j == 0) + vmovd(eax, src); + else + vpextrd(eax, src, j); + xor(eax, dst); + and(eax, 0xffffff); + xor(dst, eax); + break; + case 2: + if (j == 0) + vmovd(eax, src); + else + vpextrw(eax, src, j * 2); + mov(dst, ax); + break; } } @@ -2959,12 +3005,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) const GSVector8i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - if(m_sel.mmin && !m_sel.lcm) + if (m_sel.mmin && !m_sel.lcm) { const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int t[] = {1, 4, 5, 1, 2, 5, 0, 2}; - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { Ymm src = Ymm(r[i * 2 + 0]); Ymm dst = Ymm(r[i * 2 + 1]); @@ -2973,7 +3019,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) vextracti128(Xmm(t1.getIdx()), src, 1); - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { mov(ebx, ptr[&lod_i->u32[j + 0]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); @@ -2994,20 +3040,20 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int t[] = {1, 4, 5, 1, 2, 5, 0, 2}; - if(m_sel.mmin && m_sel.lcm) + if (m_sel.mmin && m_sel.lcm) { mov(ebx, ptr[&lod_i->u32[0]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); } - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { Ymm src = Ymm(r[i * 2 + 0]); Ymm dst = Ymm(r[i * 2 + 1]); Ymm t1 = Ymm(t[i * 2 + 0]); Ymm t2 = Ymm(t[i * 2 + 1]); - if(!m_sel.tlu) + if (!m_sel.tlu) { vpcmpeqd(t1, t1); vpgatherdd(dst, ptr[ebx + src * 4], t1); @@ -3016,7 +3062,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) { vextracti128(Xmm(t1.getIdx()), src, 1); - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { ReadTexel(dst, src, j); ReadTexel(t2, t1, j); @@ -3042,13 +3088,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Ymm& dst, const Ymm& addr, uin const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; - if(i == 0) vmovd(eax, Xmm(addr.getIdx())); - else vpextrd(eax, Xmm(addr.getIdx()), i); - - if(m_sel.tlu) movzx(eax, byte[ebx + eax]); + if (i == 0) + vmovd(eax, Xmm(addr.getIdx())); + else + vpextrd(eax, Xmm(addr.getIdx()), i); - if(i == 0) vmovd(Xmm(dst.getIdx()), src); - else vpinsrd(Xmm(dst.getIdx()), src, i); + if (m_sel.tlu) + movzx(eax, byte[ebx + eax]); + + if (i == 0) + vmovd(Xmm(dst.getIdx()), src); + else + vpinsrd(Xmm(dst.getIdx()), src, i); } diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx.cpp index 1814776868..b81cf13f52 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -38,7 +38,7 @@ void GSDrawScanlineCodeGenerator::Generate_AVX() Init_AVX(); - if(!m_sel.edge) + if (!m_sel.edge) { align(16); } @@ -72,7 +72,7 @@ L("loop"); // xmm6 = ga (!tme) // xmm7 = test - if(m_sel.mmin) + if (m_sel.mmin) { SampleTextureLOD_AVX(); } @@ -225,7 +225,7 @@ L("step"); // if(steps <= 0) break; - if(!m_sel.edge) + if (!m_sel.edge) { test(ecx, ecx); @@ -250,7 +250,7 @@ L("exit"); void GSDrawScanlineCodeGenerator::Init_AVX() { - if(!m_sel.notest) + if (!m_sel.notest) { // int skip = left & 3; @@ -280,8 +280,8 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } else { - mov(ebx, edx); // left - xor(edx, edx); // skip + mov(ebx, edx); // left + xor(edx, edx); // skip lea(ecx, ptr[ecx - 4]); // steps } @@ -296,7 +296,7 @@ void GSDrawScanlineCodeGenerator::Init_AVX() lea(edi, ptr[ebx * 2]); add(edi, ptr[&m_local.gd->fzbc]); - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) + if (m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_local.d[skip] @@ -307,13 +307,13 @@ void GSDrawScanlineCodeGenerator::Init_AVX() mov(ebx, ptr[esp + _v]); } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) + if (m_sel.fwrite && m_sel.fge || m_sel.zb) { vmovaps(xmm0, ptr[ebx + offsetof(GSVertexSW, p)]); // v.p - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { // f = GSVector4i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); @@ -325,7 +325,7 @@ void GSDrawScanlineCodeGenerator::Init_AVX() vmovdqa(ptr[&m_local.temp.f], xmm1); } - if(m_sel.zb) + if (m_sel.zb) { // z = vp.zzzz() + m_local.d[skip].z; @@ -339,20 +339,20 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } else { - if(m_sel.ztest) + if (m_sel.ztest) { vmovdqa(xmm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.edge || m_sel.tfx != TFX_NONE) + if (m_sel.edge || m_sel.tfx != TFX_NONE) { vmovaps(xmm4, ptr[ebx + offsetof(GSVertexSW, t)]); // v.t } - if(m_sel.edge) + if (m_sel.edge) { // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); @@ -363,9 +363,9 @@ void GSDrawScanlineCodeGenerator::Init_AVX() vmovdqa(ptr[&m_local.temp.cov], xmm3); } - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i vti(vt); @@ -379,13 +379,13 @@ void GSDrawScanlineCodeGenerator::Init_AVX() vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); } else { - if(m_sel.ltf) + if (m_sel.ltf) { vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); @@ -417,9 +417,9 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); @@ -444,7 +444,7 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vmovdqa(xmm5, ptr[&m_local.c.rb]); vmovdqa(xmm6, ptr[&m_local.c.ga]); @@ -464,11 +464,11 @@ void GSDrawScanlineCodeGenerator::Step_AVX() add(edi, 8); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // z += m_local.d4.z; - if(m_sel.zb) + if (m_sel.zb) { vmovaps(xmm0, ptr[&m_local.temp.zo]); vaddps(xmm0, ptr[&m_local.d4.z]); @@ -478,7 +478,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() // f = f.add16(m_local.d4.f); - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { vmovdqa(xmm1, ptr[&m_local.temp.f]); vpaddw(xmm1, ptr[&m_local.d4.f]); @@ -487,17 +487,17 @@ void GSDrawScanlineCodeGenerator::Step_AVX() } else { - if(m_sel.ztest) + if (m_sel.ztest) { vmovdqa(xmm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i stq = m_local.d4.stq; @@ -510,7 +510,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() vpaddd(xmm2, ptr[&m_local.temp.s]); vmovdqa(ptr[&m_local.temp.s], xmm2); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); vpaddd(xmm3, ptr[&m_local.temp.t]); @@ -545,9 +545,9 @@ void GSDrawScanlineCodeGenerator::Step_AVX() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i c = m_local.d4.c; @@ -573,7 +573,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vmovdqa(xmm5, ptr[&m_local.c.rb]); vmovdqa(xmm6, ptr[&m_local.c.ga]); @@ -582,7 +582,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() } } - if(!m_sel.notest) + if (!m_sel.notest) { // test = m_test[7 + (steps & (steps >> 31))]; @@ -597,7 +597,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) { - if(!m_sel.zb) + if (!m_sel.zb) { return; } @@ -610,9 +610,9 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) // GSVector4i zs = zi; - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.zoverflow) + if (m_sel.zoverflow) { // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); @@ -643,30 +643,30 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) vpminsd(xmm0, temp1); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vmovdqa(ptr[&m_local.temp.zs], xmm0); } } - if(m_sel.ztest) + if (m_sel.ztest) { ReadPixel_AVX(xmm1, ebp); - if(m_sel.zwrite && m_sel.zpsm < 2) + if (m_sel.zwrite && m_sel.zpsm < 2) { vmovdqa(ptr[&m_local.temp.zd], xmm1); } // zd &= 0xffffffff >> m_sel.zpsm * 8; - if(m_sel.zpsm) + if (m_sel.zpsm) { vpslld(xmm1, static_cast(m_sel.zpsm * 8)); vpsrld(xmm1, static_cast(m_sel.zpsm * 8)); } - if(m_sel.zoverflow || m_sel.zpsm == 0) + if (m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector4i o = GSVector4i::x80000000(); @@ -680,21 +680,21 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) vpsubd(xmm1, temp1); } - switch(m_sel.ztst) + switch (m_sel.ztst) { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - vpcmpgtd(xmm1, xmm0); - vpor(xmm7, xmm1); - break; + case ZTST_GEQUAL: + // test |= zso < zdo; // ~(zso >= zdo) + vpcmpgtd(xmm1, xmm0); + vpor(xmm7, xmm1); + break; - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - vpcmpgtd(xmm0, xmm1); - vpcmpeqd(temp1, temp1); - vpxor(xmm0, temp1); - vpor(xmm7, xmm0); - break; + case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL + // test |= zso <= zdo; // ~(zso > zdo) + vpcmpgtd(xmm0, xmm1); + vpcmpeqd(temp1, temp1); + vpxor(xmm0, temp1); + vpor(xmm7, xmm0); + break; } alltrue(xmm7); @@ -703,14 +703,14 @@ void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2) void GSDrawScanlineCodeGenerator::SampleTexture_AVX() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } mov(ebx, ptr[&m_local.gd->tex[0]]); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } @@ -718,7 +718,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() // ebx = tex // edx = clut - if(!m_sel.fst) + if (!m_sel.fst) { vrcpps(xmm0, xmm4); @@ -728,7 +728,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() vcvttps2dq(xmm2, xmm2); vcvttps2dq(xmm3, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -745,7 +745,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() // xmm2 = u // xmm3 = v - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uf = u.xxzzlh().srl16(12); @@ -754,7 +754,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() vpsrlw(xmm0, 12); vmovdqa(ptr[&m_local.temp.uf], xmm0); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4i vf = v.xxzzlh().srl16(12); @@ -771,7 +771,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() vpsrad(xmm3, 16); vpackssdw(xmm2, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -812,7 +812,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_AVX() // xmm1, xmm5, xmm6 = free // xmm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i y1 = uv1.uph16() << tw; // GSVector4i x1 = uv1.upl16(); @@ -962,11 +962,11 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv, ptr[&m_local.gd->t.min]); } @@ -982,7 +982,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv) { vpand(uv, ptr[&m_local.gd->t.min]); - if(region) + if (region) { vpor(uv, ptr[&m_local.gd->t.max]); } @@ -998,7 +998,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv) vpand(xmm1, uv, xmm4); - if(region) + if (region) { vpor(xmm1, xmm5); } @@ -1023,11 +1023,11 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vmovdqa(xmm4, ptr[&m_local.gd->t.min]); vpmaxsw(uv0, xmm4); @@ -1050,7 +1050,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) vpand(uv0, xmm4); vpand(uv1, xmm4); - if(region) + if (region) { vmovdqa(xmm5, ptr[&m_local.gd->t.max]); vpor(uv0, xmm5); @@ -1070,7 +1070,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) vpand(xmm1, uv0, xmm4); - if(region) + if (region) { vpor(xmm1, xmm5); } @@ -1090,7 +1090,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) vpand(xmm1, uv1, xmm4); - if(region) + if (region) { vpor(xmm1, xmm5); } @@ -1108,7 +1108,7 @@ void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1) void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } @@ -1117,12 +1117,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() mov(ebp, (size_t)m_local.gd->tex); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } - if(!m_sel.fst) + if (!m_sel.fst) { vrcpps(xmm0, xmm4); @@ -1140,7 +1140,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() // TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (just take the exp) (but can we round it?) - if(!m_sel.lcm) + if (!m_sel.lcm) { // lod = -log2(Q) * (1 << L) + K @@ -1159,7 +1159,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() // xmm4 = mant(q) | 1.0f - if(m_cpu.has(util::Cpu::tFMA)) + if (m_cpu.has(util::Cpu::tFMA)) { vmovaps(xmm5, ptr[g_const->m_log2_coef_128b[0]]); // c0 vfmadd213ps(xmm5, xmm4, ptr[g_const->m_log2_coef_128b[1]]); // c0 * xmm4 + c1 @@ -1180,7 +1180,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() // xmm4 = log2(Q) = ((((c0 * xmm4) + c1) * xmm4) + c2) * (xmm4 - 1.0f) + xmm0 - if(m_cpu.has(util::Cpu::tFMA)) + if (m_cpu.has(util::Cpu::tFMA)) { vmovaps(xmm5, ptr[&m_local.gd->l]); vfmadd213ps(xmm4, xmm5, ptr[&m_local.gd->k]); @@ -1198,7 +1198,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() vmaxps(xmm4, xmm0); vcvtps2dq(xmm4, xmm4); - if(m_sel.mmin == 1) // round-off mode + if (m_sel.mmin == 1) // round-off mode { mov(eax, 0x8000); vmovd(xmm0, eax); @@ -1215,7 +1215,7 @@ vpslld(xmm6, xmm4, 16); vpsrld(xmm6, xmm6, 24); return; */ - if(m_sel.mmin == 2) // trilinear mode + if (m_sel.mmin == 2) // trilinear mode { vpshuflw(xmm1, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 0, 0)); @@ -1224,7 +1224,7 @@ return; // shift u/v/minmax by (int)lod - if(m_cpu.has(util::Cpu::tAVX2)) + if (m_cpu.has(util::Cpu::tAVX2)) { vpsravd(xmm2, xmm2, xmm0); vpsravd(xmm3, xmm3, xmm0); @@ -1262,7 +1262,7 @@ return; vmovdqa(xmm2, xmm5); vmovdqa(xmm3, xmm6); - vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); + vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); vpsrad(xmm2, xmm0); vpsrlw(xmm1, xmm4, xmm0); vmovq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1); @@ -1323,7 +1323,7 @@ return; // xmm5 = minuv // xmm6 = maxuv - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1336,7 +1336,7 @@ return; vpsubd(xmm3, xmm4); // GSVector4i uf = u.xxzzlh().srl16(1); - + vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(xmm0, 12); @@ -1356,7 +1356,7 @@ return; vpsrad(xmm3, 16); vpackssdw(xmm2, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -1397,7 +1397,7 @@ return; // xmm1, xmm5, xmm6 = free // xmm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i x1 = uv1.upl16(); // GSVector4i y1 = uv1.uph16() << tw; @@ -1537,7 +1537,7 @@ return; split16_2x8(xmm5, xmm6, xmm6); } - if(m_sel.mmin != 1) // !round-off mode + if (m_sel.mmin != 1) // !round-off mode { vmovdqa(ptr[&m_local.temp.trb], xmm5); vmovdqa(ptr[&m_local.temp.tga], xmm6); @@ -1554,7 +1554,7 @@ return; vpsrlw(xmm5, 1); vpsrlw(xmm6, 1); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1567,7 +1567,7 @@ return; vpsubd(xmm3, xmm4); // GSVector4i uf = u.xxzzlh().srl16(1); - + vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(xmm0, 12); @@ -1587,7 +1587,7 @@ return; vpsrad(xmm3, 16); vpackssdw(xmm2, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -1628,7 +1628,7 @@ return; // xmm1, xmm5, xmm6 = free // xmm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i x1 = uv1.upl16(); // GSVector4i y1 = uv1.uph16() << tw; @@ -1792,11 +1792,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv, xmm5); } @@ -1812,7 +1812,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv) { vpand(uv, xmm5); - if(region) + if (region) { vpor(uv, xmm6); } @@ -1826,7 +1826,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv) vpand(xmm1, uv, xmm5); - if(region) + if (region) { vpor(xmm1, xmm6); } @@ -1853,11 +1853,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv0, xmm5); vpmaxsw(uv1, xmm5); @@ -1877,7 +1877,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1) vpand(uv0, xmm5); vpand(uv1, xmm5); - if(region) + if (region) { vpor(uv0, xmm6); vpor(uv1, xmm6); @@ -1894,7 +1894,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1) vpand(xmm1, uv0, xmm5); - if(region) + if (region) { vpor(xmm1, xmm6); } @@ -1914,7 +1914,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1) vpand(xmm1, uv1, xmm5); - if(region) + if (region) { vpor(xmm1, xmm6); } @@ -1932,114 +1932,114 @@ void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1) void GSDrawScanlineCodeGenerator::AlphaTFX_AVX() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(xmm6, xmm4, 1); - - clamp16(xmm6, xmm3); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { // GSVector4i ga = iip ? gaf : m_local.c.ga; vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vpsrlw(xmm4, 7); + // gat = gat.modulate16<1>(ga).clamp8(); - mix16(xmm6, xmm4, xmm3); - } + modulate16(xmm6, xmm4, 1); - break; + clamp16(xmm6, xmm3); - case TFX_HIGHLIGHT: + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - // GSVector4i ga = iip ? gaf : m_local.c.ga; + if (!m_sel.tcc) + { + vpsrlw(xmm4, 7); - vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vmovdqa(xmm2, xmm4); + mix16(xmm6, xmm4, xmm3); + } - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + break; - vpsrlw(xmm4, 7); + case TFX_DECAL: - if(m_sel.tcc) - { - vpaddusb(xmm4, xmm6); - } + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - mix16(xmm6, xmm4, xmm3); + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - case TFX_HIGHLIGHT2: + vpsrlw(xmm4, 7); - // if(!tcc) gat = gat.mix16(ga.srl16(7)); + mix16(xmm6, xmm4, xmm3); + } + + break; + + case TFX_HIGHLIGHT: - if(!m_sel.tcc) - { // GSVector4i ga = iip ? gaf : m_local.c.ga; vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); vmovdqa(xmm2, xmm4); + // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + vpsrlw(xmm4, 7); + if (m_sel.tcc) + { + vpaddusb(xmm4, xmm6); + } + mix16(xmm6, xmm4, xmm3); - } - break; + break; - case TFX_NONE: + case TFX_HIGHLIGHT2: - // gat = iip ? ga.srl16(7) : ga; + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - if(m_sel.iip) - { - vpsrlw(xmm6, 7); - } + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + vmovdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + vmovdqa(xmm2, xmm4); + + vpsrlw(xmm4, 7); + + mix16(xmm6, xmm4, xmm3); + } + + break; + + case TFX_NONE: + + // gat = iip ? ga.srl16(7) : ga; + + if (m_sel.iip) + { + vpsrlw(xmm6, 7); + } + + break; } - if(m_sel.aa1) + if (m_sel.aa1) { // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha // FIXME: bios config screen cubes - if(!m_sel.abe) + if (!m_sel.abe) { // a = cov - if(m_sel.edge) + if (m_sel.edge) { vmovdqa(xmm0, ptr[&m_local.temp.cov]); } @@ -2060,7 +2060,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX_AVX() vpsllw(xmm0, 15); vpsrlw(xmm0, 8); - if(m_sel.edge) + if (m_sel.edge) { vmovdqa(xmm1, ptr[&m_local.temp.cov]); } @@ -2080,12 +2080,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX_AVX() void GSDrawScanlineCodeGenerator::ReadMask_AVX() { - if(m_sel.fwrite) + if (m_sel.fwrite) { vmovdqa(xmm3, ptr[&m_local.gd->fm]); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vmovdqa(xmm4, ptr[&m_local.gd->zm]); } @@ -2093,154 +2093,154 @@ void GSDrawScanlineCodeGenerator::ReadMask_AVX() void GSDrawScanlineCodeGenerator::TestAlpha_AVX() { - switch(m_sel.atst) + switch (m_sel.atst) { - case ATST_NEVER: - // t = GSVector4i::xffffffff(); - vpcmpeqd(xmm1, xmm1); - break; + case ATST_NEVER: + // t = GSVector4i::xffffffff(); + vpcmpeqd(xmm1, xmm1); + break; - case ATST_ALWAYS: - return; + case ATST_ALWAYS: + return; - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - vpsrld(xmm1, xmm6, 16); - vpcmpgtd(xmm1, ptr[&m_local.gd->aref]); - break; + case ATST_LESS: + case ATST_LEQUAL: + // t = (ga >> 16) > m_local.gd->aref; + vpsrld(xmm1, xmm6, 16); + vpcmpgtd(xmm1, ptr[&m_local.gd->aref]); + break; - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - vpsrld(xmm1, xmm6, 16); - vpcmpeqd(xmm1, ptr[&m_local.gd->aref]); - vpcmpeqd(xmm0, xmm0); - vpxor(xmm1, xmm0); - break; + case ATST_EQUAL: + // t = (ga >> 16) != m_local.gd->aref; + vpsrld(xmm1, xmm6, 16); + vpcmpeqd(xmm1, ptr[&m_local.gd->aref]); + vpcmpeqd(xmm0, xmm0); + vpxor(xmm1, xmm0); + break; - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - vpsrld(xmm0, xmm6, 16); - vmovdqa(xmm1, ptr[&m_local.gd->aref]); - vpcmpgtd(xmm1, xmm0); - break; + case ATST_GEQUAL: + case ATST_GREATER: + // t = (ga >> 16) < m_local.gd->aref; + vpsrld(xmm0, xmm6, 16); + vmovdqa(xmm1, ptr[&m_local.gd->aref]); + vpcmpgtd(xmm1, xmm0); + break; - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - vpsrld(xmm1, xmm6, 16); - vpcmpeqd(xmm1, ptr[&m_local.gd->aref]); - break; + case ATST_NOTEQUAL: + // t = (ga >> 16) == m_local.gd->aref; + vpsrld(xmm1, xmm6, 16); + vpcmpeqd(xmm1, ptr[&m_local.gd->aref]); + break; } - switch(m_sel.afail) + switch (m_sel.afail) { - case AFAIL_KEEP: - // test |= t; - vpor(xmm7, xmm1); - alltrue(xmm7); - break; + case AFAIL_KEEP: + // test |= t; + vpor(xmm7, xmm1); + alltrue(xmm7); + break; - case AFAIL_FB_ONLY: - // zm |= t; - vpor(xmm4, xmm1); - break; + case AFAIL_FB_ONLY: + // zm |= t; + vpor(xmm4, xmm1); + break; - case AFAIL_ZB_ONLY: - // fm |= t; - vpor(xmm3, xmm1); - break; + case AFAIL_ZB_ONLY: + // fm |= t; + vpor(xmm3, xmm1); + break; - case AFAIL_RGB_ONLY: - // zm |= t; - vpor(xmm4, xmm1); - // fm |= t & GSVector4i::xff000000(); - vpsrld(xmm1, 24); - vpslld(xmm1, 24); - vpor(xmm3, xmm1); - break; + case AFAIL_RGB_ONLY: + // zm |= t; + vpor(xmm4, xmm1); + // fm |= t & GSVector4i::xff000000(); + vpsrld(xmm1, 24); + vpslld(xmm1, 24); + vpor(xmm3, xmm1); + break; } } void GSDrawScanlineCodeGenerator::ColorTFX_AVX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector4i rb = iip ? rbf : m_local.c.rb; + // GSVector4i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).clamp8(); + // rbt = rbt.modulate16<1>(rb).clamp8(); - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - clamp16(xmm5, xmm1); + clamp16(xmm5, xmm1); - break; + break; - case TFX_DECAL: + case TFX_DECAL: - break; + break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: + case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: - if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; + if (m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - vmovdqa(xmm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - } + vmovdqa(xmm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + } - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); + // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - vmovdqa(xmm1, xmm6); + vmovdqa(xmm1, xmm6); - modulate16(xmm6, xmm2, 1); + modulate16(xmm6, xmm2, 1); - vpshuflw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpsrlw(xmm2, 7); + vpshuflw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); + vpsrlw(xmm2, 7); - vpaddw(xmm6, xmm2); + vpaddw(xmm6, xmm2); - clamp16(xmm6, xmm0); + clamp16(xmm6, xmm0); - mix16(xmm6, xmm1, xmm0); + mix16(xmm6, xmm1, xmm0); - // GSVector4i rb = iip ? rbf : m_local.c.rb; + // GSVector4i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); + // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - vpaddw(xmm5, xmm2); + vpaddw(xmm5, xmm2); - clamp16(xmm5, xmm0); + clamp16(xmm5, xmm0); - break; + break; - case TFX_NONE: + case TFX_NONE: - // rbt = iip ? rb.srl16(7) : rb; + // rbt = iip ? rb.srl16(7) : rb; - if(m_sel.iip) - { - vpsrlw(xmm5, 7); - } + if (m_sel.iip) + { + vpsrlw(xmm5, 7); + } - break; + break; } } void GSDrawScanlineCodeGenerator::Fog_AVX() { - if(!m_sel.fwrite || !m_sel.fge) + if (!m_sel.fwrite || !m_sel.fge) { return; } @@ -2261,7 +2261,7 @@ void GSDrawScanlineCodeGenerator::Fog_AVX() void GSDrawScanlineCodeGenerator::ReadFrame_AVX() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } @@ -2272,7 +2272,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame_AVX() add(ebx, ptr[edi]); and(ebx, HALF_VM_SIZE - 1); - if(!m_sel.rfb) + if (!m_sel.rfb) { return; } @@ -2282,16 +2282,16 @@ void GSDrawScanlineCodeGenerator::ReadFrame_AVX() void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() { - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) + if (!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - if(m_sel.datm) + if (m_sel.datm) { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpxor(xmm0, xmm0); //vpsrld(xmm1, xmm2, 15); @@ -2308,7 +2308,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() } else { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpslld(xmm1, xmm2, 16); vpsrad(xmm1, 31); @@ -2326,7 +2326,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX() void GSDrawScanlineCodeGenerator::WriteMask_AVX() { - if(m_sel.notest) + if (m_sel.notest) { return; } @@ -2334,12 +2334,12 @@ void GSDrawScanlineCodeGenerator::WriteMask_AVX() // fm |= test; // zm |= test; - if(m_sel.fwrite) + if (m_sel.fwrite) { vpor(xmm3, xmm7); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vpor(xmm4, xmm7); } @@ -2348,18 +2348,18 @@ void GSDrawScanlineCodeGenerator::WriteMask_AVX() vpcmpeqd(xmm1, xmm1); - if(m_sel.fwrite && m_sel.zwrite) + if (m_sel.fwrite && m_sel.zwrite) { vpcmpeqd(xmm0, xmm1, xmm4); vpcmpeqd(xmm1, xmm3); vpackssdw(xmm1, xmm0); } - else if(m_sel.fwrite) + else if (m_sel.fwrite) { vpcmpeqd(xmm1, xmm3); vpackssdw(xmm1, xmm1); } - else if(m_sel.zwrite) + else if (m_sel.zwrite) { vpcmpeqd(xmm1, xmm4); vpackssdw(xmm1, xmm1); @@ -2372,14 +2372,14 @@ void GSDrawScanlineCodeGenerator::WriteMask_AVX() void GSDrawScanlineCodeGenerator::WriteZBuf_AVX() { - if(!m_sel.zwrite) + if (!m_sel.zwrite) { return; } vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]); - if(m_sel.ztest && m_sel.zpsm < 2) + if (m_sel.ztest && m_sel.zpsm < 2) { // zs = zs.blend8(zd, zm); @@ -2401,58 +2401,58 @@ void GSDrawScanlineCodeGenerator::WriteZBuf_AVX() void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.abe == 0 && m_sel.aa1 == 0) + if (m_sel.abe == 0 && m_sel.aa1 == 0) { return; } - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) + if ((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) { - switch(m_sel.fpsm) + switch (m_sel.fpsm) { - case 0: - case 1: + case 0: + case 1: - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; + // c[2] = fd & mask; + // c[3] = (fd >> 8) & mask; - split16_2x8(xmm0, xmm1, xmm2); + split16_2x8(xmm0, xmm1, xmm2); - break; + break; - case 2: + case 2: - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); + // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); + // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - vpcmpeqd(xmm7, xmm7); + vpcmpeqd(xmm7, xmm7); - vpsrld(xmm7, 27); // 0x0000001f - vpand(xmm0, xmm2, xmm7); - vpslld(xmm0, 3); + vpsrld(xmm7, 27); // 0x0000001f + vpand(xmm0, xmm2, xmm7); + vpslld(xmm0, 3); - vpslld(xmm7, 10); // 0x00007c00 - vpand(xmm4, xmm2, xmm7); - vpslld(xmm4, 9); + vpslld(xmm7, 10); // 0x00007c00 + vpand(xmm4, xmm2, xmm7); + vpslld(xmm4, 9); - vpor(xmm0, xmm4); + vpor(xmm0, xmm4); - vpsrld(xmm7, 5); // 0x000003e0 - vpand(xmm1, xmm2, xmm7); - vpsrld(xmm1, 2); + vpsrld(xmm7, 5); // 0x000003e0 + vpand(xmm1, xmm2, xmm7); + vpsrld(xmm1, 2); - vpsllw(xmm7, 10); // 0x00008000 - vpand(xmm4, xmm2, xmm7); - vpslld(xmm4, 8); + vpsllw(xmm7, 10); // 0x00008000 + vpand(xmm4, xmm2, xmm7); + vpslld(xmm4, 8); - vpor(xmm1, xmm4); + vpor(xmm1, xmm4); - break; + break; } } @@ -2461,46 +2461,56 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // xmm2, xmm3 = used // xmm4, xmm7 = free - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) + if (m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { vmovdqa(xmm4, xmm5); } - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // rb = c[aba * 2 + 0]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(xmm5, xmm0); break; - case 2: vpxor(xmm5, xmm5); break; + case 0: + break; + case 1: + vmovdqa(xmm5, xmm0); + break; + case 2: + vpxor(xmm5, xmm5); + break; } // rb = rb.sub16(c[abb * 2 + 0]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(xmm5, xmm4); break; - case 1: vpsubw(xmm5, xmm0); break; - case 2: break; + case 0: + vpsubw(xmm5, xmm4); + break; + case 1: + vpsubw(xmm5, xmm0); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - switch(m_sel.abc) + switch (m_sel.abc) { - case 0: - case 1: - vpshuflw(xmm7, m_sel.abc ? xmm1 : xmm6, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); - vpsllw(xmm7, 7); - break; - case 2: - vmovdqa(xmm7, ptr[&m_local.gd->afix]); - break; + case 0: + case 1: + vpshuflw(xmm7, m_sel.abc ? xmm1 : xmm6, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); + vpsllw(xmm7, 7); + break; + case 2: + vmovdqa(xmm7, ptr[&m_local.gd->afix]); + break; } // rb = rb.modulate16<1>(a); @@ -2510,26 +2520,36 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // rb = rb.add16(c[abd * 2 + 0]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(xmm5, xmm4); break; - case 1: vpaddw(xmm5, xmm0); break; - case 2: break; + case 0: + vpaddw(xmm5, xmm4); + break; + case 1: + vpaddw(xmm5, xmm0); + break; + case 2: + break; } } else { // rb = c[abd * 2 + 0]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(xmm5, xmm0); break; - case 2: vpxor(xmm5, xmm5); break; + case 0: + break; + case 1: + vmovdqa(xmm5, xmm0); + break; + case 2: + vpxor(xmm5, xmm5); + break; } } - if(m_sel.pabe) + if (m_sel.pabe) { // mask = (c[1] << 8).sra32(31); @@ -2550,27 +2570,37 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() vmovdqa(xmm4, xmm6); - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // ga = c[aba * 2 + 1]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(xmm6, xmm1); break; - case 2: vpxor(xmm6, xmm6); break; + case 0: + break; + case 1: + vmovdqa(xmm6, xmm1); + break; + case 2: + vpxor(xmm6, xmm6); + break; } // ga = ga.sub16(c[abeb * 2 + 1]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(xmm6, xmm4); break; - case 1: vpsubw(xmm6, xmm1); break; - case 2: break; + case 0: + vpsubw(xmm6, xmm4); + break; + case 1: + vpsubw(xmm6, xmm1); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); @@ -2579,22 +2609,32 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // ga = ga.add16(c[abd * 2 + 1]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(xmm6, xmm4); break; - case 1: vpaddw(xmm6, xmm1); break; - case 2: break; + case 0: + vpaddw(xmm6, xmm4); + break; + case 1: + vpaddw(xmm6, xmm1); + break; + case 2: + break; } } else { // ga = c[abd * 2 + 1]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(xmm6, xmm1); break; - case 2: vpxor(xmm6, xmm6); break; + case 0: + break; + case 1: + vmovdqa(xmm6, xmm1); + break; + case 2: + vpxor(xmm6, xmm6); + break; } } @@ -2604,7 +2644,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() // xmm2, xmm3 = used // xmm0, xmm1, xmm7 = free - if(m_sel.pabe) + if (m_sel.pabe) { vpsrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) @@ -2614,7 +2654,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() } else { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx + if (m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(xmm6, xmm4, xmm7); } @@ -2623,12 +2663,12 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_AVX() void GSDrawScanlineCodeGenerator::WriteFrame_AVX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.fpsm == 2 && m_sel.dthe) + if (m_sel.fpsm == 2 && m_sel.dthe) { mov(eax, ptr[esp + _top]); and(eax, 3); @@ -2638,7 +2678,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() vpaddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); } - if(m_sel.colclamp == 0) + if (m_sel.colclamp == 0) { // c[0] &= 0x00ff00ff; // c[1] &= 0x00ff00ff; @@ -2655,7 +2695,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() vpunpcklwd(xmm5, xmm6); vpackuswb(xmm5, xmm7); - if(m_sel.fba && m_sel.fpsm != 1) + if (m_sel.fba && m_sel.fpsm != 1) { // fs |= 0x80000000; @@ -2664,7 +2704,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() vpor(xmm5, xmm7); } - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { // GSVector4i rb = fs & 0x00f800f8; // GSVector4i ga = fs & 0x8000f800; @@ -2692,7 +2732,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_AVX() vpor(xmm5, xmm7); } - if(m_sel.rfb) + if (m_sel.rfb) { // fs = fs.blend(fd, fm); @@ -2712,9 +2752,9 @@ void GSDrawScanlineCodeGenerator::ReadPixel_AVX(const Xmm& dst, const Reg32& add void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz) { - if(m_sel.notest) + if (m_sel.notest) { - if(fast) + if (fast) { vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src); vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src); @@ -2729,7 +2769,7 @@ void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg32& ad } else { - if(fast) + if (fast) { // if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs); // if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs); @@ -2782,24 +2822,30 @@ void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg32& ad { Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; - switch(psm) + switch (psm) { - case 0: - if(i == 0) vmovd(dst, src); - else vpextrd(dst, src, i); - break; - case 1: - if(i == 0) vmovd(eax, src); - else vpextrd(eax, src, i); - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - if(i == 0) vmovd(eax, src); - else vpextrw(eax, src, i * 2); - mov(dst, ax); - break; + case 0: + if (i == 0) + vmovd(dst, src); + else + vpextrd(dst, src, i); + break; + case 1: + if (i == 0) + vmovd(eax, src); + else + vpextrd(eax, src, i); + xor(eax, dst); + and(eax, 0xffffff); + xor(dst, eax); + break; + case 2: + if (i == 0) + vmovd(eax, src); + else + vpextrw(eax, src, i * 2); + mov(dst, ax); + break; } } @@ -2826,27 +2872,27 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(int pixels, int mip_offset) const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - if(m_sel.mmin && !m_sel.lcm) + if (m_sel.mmin && !m_sel.lcm) { const int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; - if(pixels == 4) + if (pixels == 4) { vmovdqa(ptr[&m_local.temp.test], xmm7); } - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { mov(ebx, ptr[&lod_i->u32[j]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { ReadTexel_AVX(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); } } - if(pixels == 4) + if (pixels == 4) { vmovdqa(xmm5, xmm7); vmovdqa(xmm7, ptr[&m_local.temp.test]); @@ -2854,7 +2900,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(int pixels, int mip_offset) } else { - if(m_sel.mmin && m_sel.lcm) + if (m_sel.mmin && m_sel.lcm) { mov(ebx, ptr[&lod_i->u32[0]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); @@ -2862,9 +2908,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(int pixels, int mip_offset) const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { ReadTexel_AVX(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); } @@ -2878,13 +2924,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel_AVX(const Xmm& dst, const Xmm& addr, const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; - if(i == 0) vmovd(eax, addr); - else vpextrd(eax, addr, i); + if (i == 0) + vmovd(eax, addr); + else + vpextrd(eax, addr, i); - if(m_sel.tlu) movzx(eax, byte[ebx + eax]); + if (m_sel.tlu) + movzx(eax, byte[ebx + eax]); - if(i == 0) vmovd(dst, src); - else vpinsrd(dst, src, i); + if (i == 0) + vmovd(dst, src); + else + vpinsrd(dst, src, i); } #endif diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx2.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx2.cpp index 1797036c09..cd1dd29d67 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx2.cpp +++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.avx2.cpp @@ -42,7 +42,7 @@ void GSDrawScanlineCodeGenerator::Generate() Init(); - if(!m_sel.edge) + if (!m_sel.edge) { align(16); } @@ -76,7 +76,7 @@ L("loop"); // ymm6 = ga (!tme) // ymm7 = test - if(m_sel.mmin) + if (m_sel.mmin) { SampleTextureLOD(); } @@ -229,7 +229,7 @@ L("step"); // if(steps <= 0) break; - if(!m_sel.edge) + if (!m_sel.edge) { test(ecx, ecx); @@ -252,7 +252,7 @@ L("exit"); void GSDrawScanlineCodeGenerator::Init() { - if(!m_sel.notest) + if (!m_sel.notest) { // int skip = left & 7; @@ -281,8 +281,8 @@ void GSDrawScanlineCodeGenerator::Init() } else { - mov(ebx, edx); // left - xor(edx, edx); // skip + mov(ebx, edx); // left + xor(edx, edx); // skip lea(ecx, ptr[ecx - 8]); // steps } @@ -297,7 +297,7 @@ void GSDrawScanlineCodeGenerator::Init() lea(edi, ptr[ebx * 2]); add(edi, ptr[&m_local.gd->fzbc]); - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) + if (m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_local.d[skip] @@ -308,13 +308,13 @@ void GSDrawScanlineCodeGenerator::Init() mov(ebx, ptr[esp + _v]); } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) + if (m_sel.fwrite && m_sel.fge || m_sel.zb) { vbroadcastf128(ymm0, ptr[ebx + offsetof(GSVertexSW, p)]); // v.p - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { // f = GSVector8i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); @@ -326,7 +326,7 @@ void GSDrawScanlineCodeGenerator::Init() vmovdqa(ptr[&m_local.temp.f], ymm1); } - if(m_sel.zb) + if (m_sel.zb) { // z = vp.zzzz() + m_local.d[skip].z; @@ -340,20 +340,20 @@ void GSDrawScanlineCodeGenerator::Init() } else { - if(m_sel.ztest) + if (m_sel.ztest) { vpbroadcastd(ymm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.edge || m_sel.tfx != TFX_NONE) + if (m_sel.edge || m_sel.tfx != TFX_NONE) { vbroadcastf128(ymm4, ptr[ebx + offsetof(GSVertexSW, t)]); // v.t } - if(m_sel.edge) + if (m_sel.edge) { // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); @@ -364,9 +364,9 @@ void GSDrawScanlineCodeGenerator::Init() vmovdqa(ptr[&m_local.temp.cov], ymm3); } - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i vti(vt); @@ -380,13 +380,13 @@ void GSDrawScanlineCodeGenerator::Init() vpaddd(ymm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { vpaddd(ymm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); } else { - if(m_sel.ltf) + if (m_sel.ltf) { vpshuflw(ymm6, ymm3, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm6, ymm6, _MM_SHUFFLE(2, 2, 0, 0)); @@ -418,9 +418,9 @@ void GSDrawScanlineCodeGenerator::Init() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); @@ -446,7 +446,7 @@ void GSDrawScanlineCodeGenerator::Init() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vmovdqa(ymm5, ptr[&m_local.c.rb]); vmovdqa(ymm6, ptr[&m_local.c.ga]); @@ -466,11 +466,11 @@ void GSDrawScanlineCodeGenerator::Step() add(edi, 16); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // zo += GSVector8::broadcast32(&m_local.d8.p.z); - if(m_sel.zb) + if (m_sel.zb) { vbroadcastss(ymm0, ptr[&m_local.d8.p.z]); vaddps(ymm0, ptr[&m_local.temp.zo]); @@ -480,7 +480,7 @@ void GSDrawScanlineCodeGenerator::Step() // f = f.add16(GSVector8i::broadcast16(&m_local.d8.p.f)); - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { vpbroadcastw(ymm1, ptr[&m_local.d8.p.f]); vpaddw(ymm1, ptr[&m_local.temp.f]); @@ -489,17 +489,17 @@ void GSDrawScanlineCodeGenerator::Step() } else { - if(m_sel.ztest) + if (m_sel.ztest) { vpbroadcastd(ymm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector8i stq = GSVector8i::cast(GSVector8(m_local.d8.stq)); @@ -511,7 +511,7 @@ void GSDrawScanlineCodeGenerator::Step() vpaddd(ymm2, ptr[&m_local.temp.s]); vmovdqa(ptr[&m_local.temp.s], ymm2); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { // t = GSVector8::cast(GSVector8i::cast(t) + stq.yyyy()); @@ -548,9 +548,9 @@ void GSDrawScanlineCodeGenerator::Step() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector8i c = GSVector8i::broadcast64(&m_local.d8.c); @@ -576,7 +576,7 @@ void GSDrawScanlineCodeGenerator::Step() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vmovdqa(ymm5, ptr[&m_local.c.rb]); vmovdqa(ymm6, ptr[&m_local.c.ga]); @@ -585,7 +585,7 @@ void GSDrawScanlineCodeGenerator::Step() } } - if(!m_sel.notest) + if (!m_sel.notest) { // test = m_test[15 + (steps & (steps >> 31))]; @@ -599,7 +599,7 @@ void GSDrawScanlineCodeGenerator::Step() void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) { - if(!m_sel.zb) + if (!m_sel.zb) { return; } @@ -612,9 +612,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) // GSVector8i zs = zi; - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.zoverflow) + if (m_sel.zoverflow) { // zs = (GSVector8i(z * 0.5f) << 1) | (GSVector8i(z) & GSVector8i::x00000001()); @@ -645,30 +645,30 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) vpminsd(ymm0, temp1); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vmovdqa(ptr[&m_local.temp.zs], ymm0); } } - if(m_sel.ztest) + if (m_sel.ztest) { ReadPixel(ymm1, temp1, ebp); - if(m_sel.zwrite && m_sel.zpsm < 2) + if (m_sel.zwrite && m_sel.zpsm < 2) { vmovdqa(ptr[&m_local.temp.zd], ymm1); } // zd &= 0xffffffff >> m_sel.zpsm * 8; - if(m_sel.zpsm) + if (m_sel.zpsm) { vpslld(ymm1, (uint8)(m_sel.zpsm * 8)); vpsrld(ymm1, (uint8)(m_sel.zpsm * 8)); } - if(m_sel.zoverflow || m_sel.zpsm == 0) + if (m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector8i o = GSVector8i::x80000000(); @@ -682,21 +682,21 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) vpsubd(ymm1, temp1); } - switch(m_sel.ztst) + switch (m_sel.ztst) { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - vpcmpgtd(ymm1, ymm0); - vpor(ymm7, ymm1); - break; + case ZTST_GEQUAL: + // test |= zso < zdo; // ~(zso >= zdo) + vpcmpgtd(ymm1, ymm0); + vpor(ymm7, ymm1); + break; - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - vpcmpgtd(ymm0, ymm1); - vpcmpeqd(temp1, temp1); - vpxor(ymm0, temp1); - vpor(ymm7, ymm0); - break; + case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL + // test |= zso <= zdo; // ~(zso > zdo) + vpcmpgtd(ymm0, ymm1); + vpcmpeqd(temp1, temp1); + vpxor(ymm0, temp1); + vpor(ymm7, ymm0); + break; } alltrue(ymm7); @@ -705,14 +705,14 @@ void GSDrawScanlineCodeGenerator::TestZ(const Ymm& temp1, const Ymm& temp2) void GSDrawScanlineCodeGenerator::SampleTexture() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } mov(ebx, ptr[&m_local.gd->tex[0]]); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } @@ -720,7 +720,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // ebx = tex // edx = clut - if(!m_sel.fst) + if (!m_sel.fst) { vrcpps(ymm0, ymm4); @@ -730,7 +730,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vcvttps2dq(ymm2, ymm2); vcvttps2dq(ymm3, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -747,7 +747,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // ymm2 = u // ymm3 = v - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uf = u.xxzzlh().srl16(1); @@ -756,7 +756,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpsrlw(ymm0, 12); vmovdqa(ptr[&m_local.temp.uf], ymm0); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector8i vf = v.xxzzlh().srl16(1); @@ -773,7 +773,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() vpsrad(ymm3, 16); vpackssdw(ymm2, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uv1 = uv0.add16(GSVector8i::x0001()); @@ -814,7 +814,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture() // ymm1, ymm5, ymm6 = free // ymm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i y1 = uv1.uph16() << tw; // GSVector8i x1 = uv1.upl16(); @@ -974,11 +974,11 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vbroadcasti128(ymm0, ptr[&m_local.gd->t.min]); vpmaxsw(uv, ymm0); @@ -997,7 +997,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv) vbroadcasti128(ymm0, ptr[&m_local.gd->t.min]); vpand(uv, ymm0); - if(region) + if (region) { vbroadcasti128(ymm0, ptr[&m_local.gd->t.max]); vpor(uv, ymm0); @@ -1014,7 +1014,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv) vpand(ymm1, uv, ymm4); - if(region) + if (region) { vpor(ymm1, ymm5); } @@ -1039,11 +1039,11 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vbroadcasti128(ymm4, ptr[&m_local.gd->t.min]); vpmaxsw(uv0, ymm4); @@ -1066,7 +1066,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) vpand(uv0, ymm4); vpand(uv1, ymm4); - if(region) + if (region) { vbroadcasti128(ymm5, ptr[&m_local.gd->t.max]); vpor(uv0, ymm5); @@ -1086,7 +1086,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv0, ymm4); - if(region) + if (region) { vpor(ymm1, ymm5); } @@ -1106,7 +1106,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv1, ymm4); - if(region) + if (region) { vpor(ymm1, ymm5); } @@ -1124,7 +1124,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Ymm& uv0, const Ymm& uv1) void GSDrawScanlineCodeGenerator::SampleTextureLOD() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } @@ -1133,12 +1133,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() mov(ebp, (size_t)m_local.gd->tex); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } - if(!m_sel.fst) + if (!m_sel.fst) { vrcpps(ymm0, ymm4); @@ -1156,7 +1156,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (just take the exp) (but can we round it?) - if(!m_sel.lcm) + if (!m_sel.lcm) { // lod = -log2(Q) * (1 << L) + K @@ -1175,7 +1175,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // ymm4 = mant(q) | 1.0f - if(m_cpu.has(util::Cpu::tFMA)) + if (m_cpu.has(util::Cpu::tFMA)) { vmovaps(ymm5, ptr[g_const->m_log2_coef_256b[0]]); // c0 vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[1]]); // c0 * ymm4 + c1 @@ -1196,7 +1196,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() // ymm4 = log2(Q) = ((((c0 * ymm4) + c1) * ymm4) + c2) * (ymm4 - 1.0f) + ymm0 - if(m_cpu.has(util::Cpu::tFMA)) + if (m_cpu.has(util::Cpu::tFMA)) { vmovaps(ymm5, ptr[&m_local.gd->l]); vfmadd213ps(ymm4, ymm5, ptr[&m_local.gd->k]); @@ -1214,7 +1214,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() vmaxps(ymm4, ymm0); vcvtps2dq(ymm4, ymm4); - if(m_sel.mmin == 1) // round-off mode + if (m_sel.mmin == 1) // round-off mode { mov(eax, 0x8000); vmovd(xmm0, eax); @@ -1225,13 +1225,13 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() vpsrld(ymm0, ymm4, 16); vmovdqa(ptr[&m_local.temp.lod.i], ymm0); -/* + /* vpslld(ymm5, ymm0, 6); vpslld(ymm6, ymm4, 16); vpsrld(ymm6, ymm6, 24); return; */ - if(m_sel.mmin == 2) // trilinear mode + if (m_sel.mmin == 2) // trilinear mode { vpshuflw(ymm1, ymm4, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm1, ymm1, _MM_SHUFFLE(2, 2, 0, 0)); @@ -1288,7 +1288,7 @@ return; // ymm5 = minuv // ymm6 = maxuv - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1301,7 +1301,7 @@ return; vpsubd(ymm3, ymm4); // GSVector8i uf = u.xxzzlh().srl16(1); - + vpshuflw(ymm0, ymm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(ymm0, 12); @@ -1321,7 +1321,7 @@ return; vpsrad(ymm3, 16); vpackssdw(ymm2, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uv1 = uv0.add16(GSVector8i::x0001()); @@ -1362,7 +1362,7 @@ return; // ymm1, ymm5, ymm6 = free // ymm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i x1 = uv1.upl16(); // GSVector8i y1 = uv1.uph16() << tw; @@ -1512,7 +1512,7 @@ return; vpsrlw(ymm6, 8); } - if(m_sel.mmin != 1) // !round-off mode + if (m_sel.mmin != 1) // !round-off mode { vmovdqa(ptr[&m_local.temp.trb], ymm5); vmovdqa(ptr[&m_local.temp.tga], ymm6); @@ -1529,7 +1529,7 @@ return; vpsrlw(ymm5, 1); vpsrlw(ymm6, 1); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1542,7 +1542,7 @@ return; vpsubd(ymm3, ymm4); // GSVector8i uf = u.xxzzlh().srl16(1); - + vpshuflw(ymm0, ymm2, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpsrlw(ymm0, 12); @@ -1562,7 +1562,7 @@ return; vpsrad(ymm3, 16); vpackssdw(ymm2, ymm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i uv1 = uv0.add16(GSVector4i::x0001()); @@ -1603,7 +1603,7 @@ return; // ymm1, ymm5, ymm6 = free // ymm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector8i x1 = uv1.upl16(); // GSVector8i y1 = uv1.uph16() << tw; @@ -1777,11 +1777,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv, ymm5); } @@ -1797,7 +1797,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv) { vpand(uv, ymm5); - if(region) + if (region) { vpor(uv, ymm6); } @@ -1811,7 +1811,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv) vpand(ymm1, uv, ymm5); - if(region) + if (region) { vpor(ymm1, ymm6); } @@ -1838,11 +1838,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { vpmaxsw(uv0, ymm5); vpmaxsw(uv1, ymm5); @@ -1862,7 +1862,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) vpand(uv0, ymm5); vpand(uv1, ymm5); - if(region) + if (region) { vpor(uv0, ymm6); vpor(uv1, ymm6); @@ -1879,7 +1879,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv0, ymm5); - if(region) + if (region) { vpor(ymm1, ymm6); } @@ -1899,7 +1899,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) vpand(ymm1, uv1, ymm5); - if(region) + if (region) { vpor(ymm1, ymm6); } @@ -1917,114 +1917,114 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Ymm& uv0, const Ymm& uv1) void GSDrawScanlineCodeGenerator::AlphaTFX() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector8i ga = iip ? gaf : m_local.c.ga; - - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(ymm6, ymm4, 1); - - clamp16(ymm6, ymm3); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - vpsrlw(ymm4, 7); - - mix16(ymm6, ymm4, ymm3); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; + // GSVector8i ga = iip ? gaf : m_local.c.ga; vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vpsrlw(ymm4, 7); + // gat = gat.modulate16<1>(ga).clamp8(); - mix16(ymm6, ymm4, ymm3); - } + modulate16(ymm6, ymm4, 1); - break; + clamp16(ymm6, ymm3); - case TFX_HIGHLIGHT: + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - // GSVector4i ga = iip ? gaf : m_local.c.ga; + if (!m_sel.tcc) + { + vpsrlw(ymm4, 7); - vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - vmovdqa(ymm2, ymm4); + mix16(ymm6, ymm4, ymm3); + } - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + break; - vpsrlw(ymm4, 7); + case TFX_DECAL: - if(m_sel.tcc) - { - vpaddusb(ymm4, ymm6); - } + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - mix16(ymm6, ymm4, ymm3); + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - case TFX_HIGHLIGHT2: + vpsrlw(ymm4, 7); - // if(!tcc) gat = gat.mix16(ga.srl16(7)); + mix16(ymm6, ymm4, ymm3); + } + + break; + + case TFX_HIGHLIGHT: - if(!m_sel.tcc) - { // GSVector4i ga = iip ? gaf : m_local.c.ga; vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); vmovdqa(ymm2, ymm4); + // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + vpsrlw(ymm4, 7); + if (m_sel.tcc) + { + vpaddusb(ymm4, ymm6); + } + mix16(ymm6, ymm4, ymm3); - } - break; + break; - case TFX_NONE: + case TFX_HIGHLIGHT2: - // gat = iip ? ga.srl16(7) : ga; + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - if(m_sel.iip) - { - vpsrlw(ymm6, 7); - } + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + vmovdqa(ymm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + vmovdqa(ymm2, ymm4); + + vpsrlw(ymm4, 7); + + mix16(ymm6, ymm4, ymm3); + } + + break; + + case TFX_NONE: + + // gat = iip ? ga.srl16(7) : ga; + + if (m_sel.iip) + { + vpsrlw(ymm6, 7); + } + + break; } - if(m_sel.aa1) + if (m_sel.aa1) { // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha // FIXME: bios config screen cubes - if(!m_sel.abe) + if (!m_sel.abe) { // a = cov - if(m_sel.edge) + if (m_sel.edge) { vmovdqa(ymm0, ptr[&m_local.temp.cov]); } @@ -2045,7 +2045,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() vpsllw(ymm0, 15); vpsrlw(ymm0, 8); - if(m_sel.edge) + if (m_sel.edge) { vmovdqa(ymm1, ptr[&m_local.temp.cov]); } @@ -2065,12 +2065,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX() void GSDrawScanlineCodeGenerator::ReadMask() { - if(m_sel.fwrite) + if (m_sel.fwrite) { vpbroadcastd(ymm3, ptr[&m_local.gd->fm]); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vpbroadcastd(ymm4, ptr[&m_local.gd->zm]); } @@ -2078,157 +2078,157 @@ void GSDrawScanlineCodeGenerator::ReadMask() void GSDrawScanlineCodeGenerator::TestAlpha() { - switch(m_sel.atst) + switch (m_sel.atst) { - case ATST_NEVER: - // t = GSVector8i::xffffffff(); - vpcmpeqd(ymm1, ymm1); - break; + case ATST_NEVER: + // t = GSVector8i::xffffffff(); + vpcmpeqd(ymm1, ymm1); + break; - case ATST_ALWAYS: - return; + case ATST_ALWAYS: + return; - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpgtd(ymm1, ymm0); - break; + case ATST_LESS: + case ATST_LEQUAL: + // t = (ga >> 16) > m_local.gd->aref; + vpsrld(ymm1, ymm6, 16); + vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); + vpcmpgtd(ymm1, ymm0); + break; - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpeqd(ymm1, ymm0); - vpcmpeqd(ymm0, ymm0); - vpxor(ymm1, ymm0); - break; + case ATST_EQUAL: + // t = (ga >> 16) != m_local.gd->aref; + vpsrld(ymm1, ymm6, 16); + vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); + vpcmpeqd(ymm1, ymm0); + vpcmpeqd(ymm0, ymm0); + vpxor(ymm1, ymm0); + break; - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - vpsrld(ymm0, ymm6, 16); - vbroadcasti128(ymm1, ptr[&m_local.gd->aref]); - vpcmpgtd(ymm1, ymm0); - break; + case ATST_GEQUAL: + case ATST_GREATER: + // t = (ga >> 16) < m_local.gd->aref; + vpsrld(ymm0, ymm6, 16); + vbroadcasti128(ymm1, ptr[&m_local.gd->aref]); + vpcmpgtd(ymm1, ymm0); + break; - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - vpsrld(ymm1, ymm6, 16); - vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); - vpcmpeqd(ymm1, ymm0); - break; + case ATST_NOTEQUAL: + // t = (ga >> 16) == m_local.gd->aref; + vpsrld(ymm1, ymm6, 16); + vbroadcasti128(ymm0, ptr[&m_local.gd->aref]); + vpcmpeqd(ymm1, ymm0); + break; } - switch(m_sel.afail) + switch (m_sel.afail) { - case AFAIL_KEEP: - // test |= t; - vpor(ymm7, ymm1); - alltrue(ymm7); - break; + case AFAIL_KEEP: + // test |= t; + vpor(ymm7, ymm1); + alltrue(ymm7); + break; - case AFAIL_FB_ONLY: - // zm |= t; - vpor(ymm4, ymm1); - break; + case AFAIL_FB_ONLY: + // zm |= t; + vpor(ymm4, ymm1); + break; - case AFAIL_ZB_ONLY: - // fm |= t; - vpor(ymm3, ymm1); - break; + case AFAIL_ZB_ONLY: + // fm |= t; + vpor(ymm3, ymm1); + break; - case AFAIL_RGB_ONLY: - // zm |= t; - vpor(ymm4, ymm1); - // fm |= t & GSVector8i::xff000000(); - vpsrld(ymm1, 24); - vpslld(ymm1, 24); - vpor(ymm3, ymm1); - break; + case AFAIL_RGB_ONLY: + // zm |= t; + vpor(ymm4, ymm1); + // fm |= t & GSVector8i::xff000000(); + vpsrld(ymm1, 24); + vpslld(ymm1, 24); + vpor(ymm3, ymm1); + break; } } void GSDrawScanlineCodeGenerator::ColorTFX() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector8i rb = iip ? rbf : m_local.c.rb; + // GSVector8i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).clamp8(); + // rbt = rbt.modulate16<1>(rb).clamp8(); - modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - clamp16(ymm5, ymm1); + clamp16(ymm5, ymm1); - break; + break; - case TFX_DECAL: + case TFX_DECAL: - break; + break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: + case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: - if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) - { - // GSVector8i ga = iip ? gaf : m_local.c.ga; + if (m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) + { + // GSVector8i ga = iip ? gaf : m_local.c.ga; - vmovdqa(ymm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - } + vmovdqa(ymm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + } - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); + // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - vmovdqa(ymm1, ymm6); + vmovdqa(ymm1, ymm6); - modulate16(ymm6, ymm2, 1); + modulate16(ymm6, ymm2, 1); - vpshuflw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); - vpsrlw(ymm2, 7); + vpshuflw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(ymm2, ymm2, _MM_SHUFFLE(3, 3, 1, 1)); + vpsrlw(ymm2, 7); - vpaddw(ymm6, ymm2); + vpaddw(ymm6, ymm2); - clamp16(ymm6, ymm0); + clamp16(ymm6, ymm0); - mix16(ymm6, ymm1, ymm0); + mix16(ymm6, ymm1, ymm0); - // GSVector8i rb = iip ? rbf : m_local.c.rb; + // GSVector8i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); + // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(ymm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - vpaddw(ymm5, ymm2); + vpaddw(ymm5, ymm2); - clamp16(ymm5, ymm0); + clamp16(ymm5, ymm0); - break; + break; - case TFX_NONE: + case TFX_NONE: - // rbt = iip ? rb.srl16(7) : rb; + // rbt = iip ? rb.srl16(7) : rb; - if(m_sel.iip) - { - vpsrlw(ymm5, 7); - } + if (m_sel.iip) + { + vpsrlw(ymm5, 7); + } - break; + break; } } void GSDrawScanlineCodeGenerator::Fog() { - if(!m_sel.fwrite || !m_sel.fge) + if (!m_sel.fwrite || !m_sel.fge) { return; } @@ -2236,7 +2236,7 @@ void GSDrawScanlineCodeGenerator::Fog() // rb = m_local.gd->frb.lerp16<0>(rb, f); // ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { vmovdqa(ymm0, ptr[&m_local.temp.f]); } @@ -2257,7 +2257,7 @@ void GSDrawScanlineCodeGenerator::Fog() void GSDrawScanlineCodeGenerator::ReadFrame() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } @@ -2268,7 +2268,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame() add(ebx, ptr[edi]); and(ebx, HALF_VM_SIZE - 1); - if(!m_sel.rfb) + if (!m_sel.rfb) { return; } @@ -2278,16 +2278,16 @@ void GSDrawScanlineCodeGenerator::ReadFrame() void GSDrawScanlineCodeGenerator::TestDestAlpha() { - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) + if (!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } // test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31); - if(m_sel.datm) + if (m_sel.datm) { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpxor(ymm0, ymm0); //vpsrld(ymm1, ymm2, 15); @@ -2304,7 +2304,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() } else { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { vpslld(ymm1, ymm2, 16); vpsrad(ymm1, 31); @@ -2322,7 +2322,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha() void GSDrawScanlineCodeGenerator::WriteMask() { - if(m_sel.notest) + if (m_sel.notest) { return; } @@ -2330,12 +2330,12 @@ void GSDrawScanlineCodeGenerator::WriteMask() // fm |= test; // zm |= test; - if(m_sel.fwrite) + if (m_sel.fwrite) { vpor(ymm3, ymm7); } - if(m_sel.zwrite) + if (m_sel.zwrite) { vpor(ymm4, ymm7); } @@ -2344,18 +2344,18 @@ void GSDrawScanlineCodeGenerator::WriteMask() vpcmpeqd(ymm1, ymm1); - if(m_sel.fwrite && m_sel.zwrite) + if (m_sel.fwrite && m_sel.zwrite) { vpcmpeqd(ymm0, ymm1, ymm4); vpcmpeqd(ymm1, ymm3); vpackssdw(ymm1, ymm0); } - else if(m_sel.fwrite) + else if (m_sel.fwrite) { vpcmpeqd(ymm1, ymm3); vpackssdw(ymm1, ymm1); } - else if(m_sel.zwrite) + else if (m_sel.zwrite) { vpcmpeqd(ymm1, ymm4); vpackssdw(ymm1, ymm1); @@ -2368,12 +2368,12 @@ void GSDrawScanlineCodeGenerator::WriteMask() void GSDrawScanlineCodeGenerator::WriteZBuf() { - if(!m_sel.zwrite) + if (!m_sel.zwrite) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { vmovdqa(ymm1, ptr[&m_local.temp.zs]); } @@ -2390,7 +2390,7 @@ void GSDrawScanlineCodeGenerator::WriteZBuf() vpminsd(ymm1, ymm7); } - if(m_sel.ztest && m_sel.zpsm < 2) + if (m_sel.ztest && m_sel.zpsm < 2) { // zs = zs.blend8(zd, zm); @@ -2404,60 +2404,60 @@ void GSDrawScanlineCodeGenerator::WriteZBuf() void GSDrawScanlineCodeGenerator::AlphaBlend() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.abe == 0 && m_sel.aa1 == 0) + if (m_sel.abe == 0 && m_sel.aa1 == 0) { return; } - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) + if ((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) { - switch(m_sel.fpsm) + switch (m_sel.fpsm) { - case 0: - case 1: + case 0: + case 1: - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; + // c[2] = fd & mask; + // c[3] = (fd >> 8) & mask; - vpsllw(ymm0, ymm2, 8); - vpsrlw(ymm0, 8); - vpsrlw(ymm1, ymm2, 8); + vpsllw(ymm0, ymm2, 8); + vpsrlw(ymm0, 8); + vpsrlw(ymm1, ymm2, 8); - break; + break; - case 2: + case 2: - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); + // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); + // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - vpcmpeqd(ymm7, ymm7); + vpcmpeqd(ymm7, ymm7); - vpsrld(ymm7, 27); // 0x0000001f - vpand(ymm0, ymm2, ymm7); - vpslld(ymm0, 3); + vpsrld(ymm7, 27); // 0x0000001f + vpand(ymm0, ymm2, ymm7); + vpslld(ymm0, 3); - vpslld(ymm7, 10); // 0x00007c00 - vpand(ymm4, ymm2, ymm7); - vpslld(ymm4, 9); + vpslld(ymm7, 10); // 0x00007c00 + vpand(ymm4, ymm2, ymm7); + vpslld(ymm4, 9); - vpor(ymm0, ymm4); + vpor(ymm0, ymm4); - vpsrld(ymm7, 5); // 0x000003e0 - vpand(ymm1, ymm2, ymm7); - vpsrld(ymm1, 2); + vpsrld(ymm7, 5); // 0x000003e0 + vpand(ymm1, ymm2, ymm7); + vpsrld(ymm1, 2); - vpsllw(ymm7, 10); // 0x00008000 - vpand(ymm4, ymm2, ymm7); - vpslld(ymm4, 8); + vpsllw(ymm7, 10); // 0x00008000 + vpand(ymm4, ymm2, ymm7); + vpslld(ymm4, 8); - vpor(ymm1, ymm4); + vpor(ymm1, ymm4); - break; + break; } } @@ -2466,46 +2466,56 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // ymm2, ymm3 = used // ymm4, ymm7 = free - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) + if (m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { vmovdqa(ymm4, ymm5); } - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // rb = c[aba * 2 + 0]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(ymm5, ymm0); break; - case 2: vpxor(ymm5, ymm5); break; + case 0: + break; + case 1: + vmovdqa(ymm5, ymm0); + break; + case 2: + vpxor(ymm5, ymm5); + break; } // rb = rb.sub16(c[abb * 2 + 0]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(ymm5, ymm4); break; - case 1: vpsubw(ymm5, ymm0); break; - case 2: break; + case 0: + vpsubw(ymm5, ymm4); + break; + case 1: + vpsubw(ymm5, ymm0); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - switch(m_sel.abc) + switch (m_sel.abc) { - case 0: - case 1: - vpshuflw(ymm7, m_sel.abc ? ymm1 : ymm6, _MM_SHUFFLE(3, 3, 1, 1)); - vpshufhw(ymm7, ymm7, _MM_SHUFFLE(3, 3, 1, 1)); - vpsllw(ymm7, 7); - break; - case 2: - vpbroadcastw(ymm7, ptr[&m_local.gd->afix]); - break; + case 0: + case 1: + vpshuflw(ymm7, m_sel.abc ? ymm1 : ymm6, _MM_SHUFFLE(3, 3, 1, 1)); + vpshufhw(ymm7, ymm7, _MM_SHUFFLE(3, 3, 1, 1)); + vpsllw(ymm7, 7); + break; + case 2: + vpbroadcastw(ymm7, ptr[&m_local.gd->afix]); + break; } // rb = rb.modulate16<1>(a); @@ -2515,26 +2525,36 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // rb = rb.add16(c[abd * 2 + 0]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(ymm5, ymm4); break; - case 1: vpaddw(ymm5, ymm0); break; - case 2: break; + case 0: + vpaddw(ymm5, ymm4); + break; + case 1: + vpaddw(ymm5, ymm0); + break; + case 2: + break; } } else { // rb = c[abd * 2 + 0]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(ymm5, ymm0); break; - case 2: vpxor(ymm5, ymm5); break; + case 0: + break; + case 1: + vmovdqa(ymm5, ymm0); + break; + case 2: + vpxor(ymm5, ymm5); + break; } } - if(m_sel.pabe) + if (m_sel.pabe) { // mask = (c[1] << 8).sra32(31); @@ -2555,27 +2575,37 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() vmovdqa(ymm4, ymm6); - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // ga = c[aba * 2 + 1]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: vmovdqa(ymm6, ymm1); break; - case 2: vpxor(ymm6, ymm6); break; + case 0: + break; + case 1: + vmovdqa(ymm6, ymm1); + break; + case 2: + vpxor(ymm6, ymm6); + break; } // ga = ga.sub16(c[abeb * 2 + 1]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: vpsubw(ymm6, ymm4); break; - case 1: vpsubw(ymm6, ymm1); break; - case 2: break; + case 0: + vpsubw(ymm6, ymm4); + break; + case 1: + vpsubw(ymm6, ymm1); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); @@ -2584,22 +2614,32 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // ga = ga.add16(c[abd * 2 + 1]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: vpaddw(ymm6, ymm4); break; - case 1: vpaddw(ymm6, ymm1); break; - case 2: break; + case 0: + vpaddw(ymm6, ymm4); + break; + case 1: + vpaddw(ymm6, ymm1); + break; + case 2: + break; } } else { // ga = c[abd * 2 + 1]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: vmovdqa(ymm6, ymm1); break; - case 2: vpxor(ymm6, ymm6); break; + case 0: + break; + case 1: + vmovdqa(ymm6, ymm1); + break; + case 2: + vpxor(ymm6, ymm6); + break; } } @@ -2609,7 +2649,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() // ymm2, ymm3 = used // ymm0, ymm1, ymm7 = free - if(m_sel.pabe) + if (m_sel.pabe) { vpsrld(ymm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) @@ -2619,7 +2659,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() } else { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx + if (m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(ymm6, ymm4, ymm7); } @@ -2628,12 +2668,12 @@ void GSDrawScanlineCodeGenerator::AlphaBlend() void GSDrawScanlineCodeGenerator::WriteFrame() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.fpsm == 2 && m_sel.dthe) + if (m_sel.fpsm == 2 && m_sel.dthe) { mov(eax, ptr[esp + _top]); and(eax, 3); @@ -2645,7 +2685,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpaddw(ymm6, ymm7); } - if(m_sel.colclamp == 0) + if (m_sel.colclamp == 0) { // c[0] &= 0x00ff00ff; // c[1] &= 0x00ff00ff; @@ -2662,7 +2702,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpunpcklwd(ymm5, ymm6); vpackuswb(ymm5, ymm7); - if(m_sel.fba && m_sel.fpsm != 1) + if (m_sel.fba && m_sel.fpsm != 1) { // fs |= 0x80000000; @@ -2671,7 +2711,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpor(ymm5, ymm7); } - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { // GSVector8i rb = fs & 0x00f800f8; // GSVector8i ga = fs & 0x8000f800; @@ -2699,7 +2739,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpor(ymm5, ymm7); } - if(m_sel.rfb) + if (m_sel.rfb) { // fs = fs.blend(fd, fm); @@ -2718,7 +2758,7 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Ymm& dst, const Ymm& temp, con vmovq(Xmm(temp.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm + 16 * 2]); vmovhps(Xmm(temp.getIdx()), qword[addr * 2 + (size_t)m_local.gd->vm + 24 * 2]); vinserti128(dst, dst, Xmm(temp.getIdx()), 1); -/* + /* vmovdqu(dst, ptr[addr * 2 + (size_t)m_local.gd->vm]); vmovdqu(temp, ptr[addr * 2 + (size_t)m_local.gd->vm + 16 * 2]); vpunpcklqdq(dst, dst, temp); @@ -2731,11 +2771,11 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Ymm& src, const Ymm& temp, co Xmm src1 = Xmm(src.getIdx()); Xmm src2 = Xmm(temp.getIdx()); - vextracti128(src2, src, 1); + vextracti128(src2, src, 1); - if(m_sel.notest) + if (m_sel.notest) { - if(fast) + if (fast) { vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src1); vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src1); @@ -2758,7 +2798,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Ymm& src, const Ymm& temp, co { // cascade tests? - if(fast) + if (fast) { test(mask, 0x0000000f << (fz * 8)); je("@f"); @@ -2833,24 +2873,30 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, { Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; - switch(psm) + switch (psm) { - case 0: - if(j == 0) vmovd(dst, src); - else vpextrd(dst, src, j); - break; - case 1: - if(j == 0) vmovd(eax, src); - else vpextrd(eax, src, j); - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - if(j == 0) vmovd(eax, src); - else vpextrw(eax, src, j * 2); - mov(dst, ax); - break; + case 0: + if (j == 0) + vmovd(dst, src); + else + vpextrd(dst, src, j); + break; + case 1: + if (j == 0) + vmovd(eax, src); + else + vpextrd(eax, src, j); + xor(eax, dst); + and(eax, 0xffffff); + xor(dst, eax); + break; + case 2: + if (j == 0) + vmovd(eax, src); + else + vpextrw(eax, src, j * 2); + mov(dst, ax); + break; } } @@ -2877,12 +2923,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) const GSVector8i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - if(m_sel.mmin && !m_sel.lcm) + if (m_sel.mmin && !m_sel.lcm) { const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int t[] = {1, 4, 5, 1, 2, 5, 0, 2}; - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { Ymm src = Ymm(r[i * 2 + 0]); Ymm dst = Ymm(r[i * 2 + 1]); @@ -2891,7 +2937,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) vextracti128(Xmm(t1.getIdx()), src, 1); - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { mov(ebx, ptr[&lod_i->u32[j + 0]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); @@ -2912,20 +2958,20 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int t[] = {1, 4, 5, 1, 2, 5, 0, 2}; - if(m_sel.mmin && m_sel.lcm) + if (m_sel.mmin && m_sel.lcm) { mov(ebx, ptr[&lod_i->u32[0]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); } - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { Ymm src = Ymm(r[i * 2 + 0]); Ymm dst = Ymm(r[i * 2 + 1]); Ymm t1 = Ymm(t[i * 2 + 0]); Ymm t2 = Ymm(t[i * 2 + 1]); - if(!m_sel.tlu) + if (!m_sel.tlu) { vpcmpeqd(t1, t1); vpgatherdd(dst, ptr[ebx + src * 4], t1); @@ -2934,7 +2980,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) { vextracti128(Xmm(t1.getIdx()), src, 1); - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { ReadTexel(dst, src, j); ReadTexel(t2, t1, j); @@ -2960,13 +3006,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Ymm& dst, const Ymm& addr, uin const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4]; - if(i == 0) vmovd(eax, Xmm(addr.getIdx())); - else vpextrd(eax, Xmm(addr.getIdx()), i); - - if(m_sel.tlu) movzx(eax, byte[ebx + eax]); + if (i == 0) + vmovd(eax, Xmm(addr.getIdx())); + else + vpextrd(eax, Xmm(addr.getIdx()), i); - if(i == 0) vmovd(Xmm(dst.getIdx()), src); - else vpinsrd(Xmm(dst.getIdx()), src, i); + if (m_sel.tlu) + movzx(eax, byte[ebx + eax]); + + if (i == 0) + vmovd(Xmm(dst.getIdx()), src); + else + vpinsrd(Xmm(dst.getIdx()), src, i); } diff --git a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp index ea59e11cf8..fbaab96677 100644 --- a/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/Renderers/SW/GSDrawScanlineCodeGenerator.x86.cpp @@ -38,7 +38,7 @@ void GSDrawScanlineCodeGenerator::Generate_SSE() Init_SSE(); - if(!m_sel.edge) + if (!m_sel.edge) { align(16); } @@ -72,7 +72,7 @@ L("loop"); // xmm6 = ga (!tme) // xmm7 = test - if(m_sel.mmin) + if (m_sel.mmin) { SampleTextureLOD_SSE(); } @@ -225,7 +225,7 @@ L("step"); // if(steps <= 0) break; - if(!m_sel.edge) + if (!m_sel.edge) { test(ecx, ecx); @@ -250,7 +250,7 @@ L("exit"); void GSDrawScanlineCodeGenerator::Init_SSE() { - if(!m_sel.notest) + if (!m_sel.notest) { // int skip = left & 3; @@ -280,8 +280,8 @@ void GSDrawScanlineCodeGenerator::Init_SSE() } else { - mov(ebx, edx); // left - xor(edx, edx); // skip + mov(ebx, edx); // left + xor(edx, edx); // skip lea(ecx, ptr[ecx - 4]); // steps } @@ -296,7 +296,7 @@ void GSDrawScanlineCodeGenerator::Init_SSE() lea(edi, ptr[ebx * 2]); add(edi, ptr[&m_local.gd->fzbc]); - if(m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) + if (m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip)) { // edx = &m_local.d[skip] @@ -307,13 +307,13 @@ void GSDrawScanlineCodeGenerator::Init_SSE() mov(ebx, ptr[esp + _v]); } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.fwrite && m_sel.fge || m_sel.zb) + if (m_sel.fwrite && m_sel.fge || m_sel.zb) { movaps(xmm0, ptr[ebx + offsetof(GSVertexSW, p)]); // v.p - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { // f = GSVector4i(vp).zzzzh().zzzz().add16(m_local.d[skip].f); @@ -325,7 +325,7 @@ void GSDrawScanlineCodeGenerator::Init_SSE() movdqa(ptr[&m_local.temp.f], xmm1); } - if(m_sel.zb) + if (m_sel.zb) { // z = vp.zzzz() + m_local.d[skip].z; @@ -339,20 +339,20 @@ void GSDrawScanlineCodeGenerator::Init_SSE() } else { - if(m_sel.ztest) + if (m_sel.ztest) { movdqa(xmm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.edge || m_sel.tfx != TFX_NONE) + if (m_sel.edge || m_sel.tfx != TFX_NONE) { movaps(xmm4, ptr[ebx + offsetof(GSVertexSW, t)]); // v.t } - if(m_sel.edge) + if (m_sel.edge) { // m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9); @@ -363,9 +363,9 @@ void GSDrawScanlineCodeGenerator::Init_SSE() movdqa(ptr[&m_local.temp.cov], xmm3); } - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i vti(vt); @@ -379,13 +379,13 @@ void GSDrawScanlineCodeGenerator::Init_SSE() paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]); } else { - if(m_sel.ltf) + if (m_sel.ltf) { pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0)); @@ -420,9 +420,9 @@ void GSDrawScanlineCodeGenerator::Init_SSE() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i vc = GSVector4i(v.c); @@ -447,7 +447,7 @@ void GSDrawScanlineCodeGenerator::Init_SSE() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { movdqa(xmm5, ptr[&m_local.c.rb]); movdqa(xmm6, ptr[&m_local.c.ga]); @@ -467,11 +467,11 @@ void GSDrawScanlineCodeGenerator::Step_SSE() add(edi, 8); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // z += m_local.d4.z; - if(m_sel.zb) + if (m_sel.zb) { movaps(xmm0, ptr[&m_local.temp.zo]); addps(xmm0, ptr[&m_local.d4.z]); @@ -481,7 +481,7 @@ void GSDrawScanlineCodeGenerator::Step_SSE() // f = f.add16(m_local.d4.f); - if(m_sel.fwrite && m_sel.fge) + if (m_sel.fwrite && m_sel.fge) { movdqa(xmm1, ptr[&m_local.temp.f]); paddw(xmm1, ptr[&m_local.d4.f]); @@ -490,17 +490,17 @@ void GSDrawScanlineCodeGenerator::Step_SSE() } else { - if(m_sel.ztest) + if (m_sel.ztest) { movdqa(xmm0, ptr[&m_local.p.z]); } } - if(m_sel.fb) + if (m_sel.fb) { - if(m_sel.tfx != TFX_NONE) + if (m_sel.tfx != TFX_NONE) { - if(m_sel.fst) + if (m_sel.fst) { // GSVector4i stq = m_local.d4.stq; @@ -513,7 +513,7 @@ void GSDrawScanlineCodeGenerator::Step_SSE() paddd(xmm2, ptr[&m_local.temp.s]); movdqa(ptr[&m_local.temp.s], xmm2); - if(m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) + if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin) { pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1)); paddd(xmm3, ptr[&m_local.temp.t]); @@ -550,9 +550,9 @@ void GSDrawScanlineCodeGenerator::Step_SSE() } } - if(!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) + if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc)) { - if(m_sel.iip) + if (m_sel.iip) { // GSVector4i c = m_local.d4.c; @@ -578,7 +578,7 @@ void GSDrawScanlineCodeGenerator::Step_SSE() } else { - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { movdqa(xmm5, ptr[&m_local.c.rb]); movdqa(xmm6, ptr[&m_local.c.ga]); @@ -587,7 +587,7 @@ void GSDrawScanlineCodeGenerator::Step_SSE() } } - if(!m_sel.notest) + if (!m_sel.notest) { // test = m_test[7 + (steps & (steps >> 31))]; @@ -602,7 +602,7 @@ void GSDrawScanlineCodeGenerator::Step_SSE() void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2) { - if(!m_sel.zb) + if (!m_sel.zb) { return; } @@ -615,9 +615,9 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2) // GSVector4i zs = zi; - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { - if(m_sel.zoverflow) + if (m_sel.zoverflow) { // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001()); @@ -649,30 +649,30 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2) pminsd(xmm0, temp1); } - if(m_sel.zwrite) + if (m_sel.zwrite) { movdqa(ptr[&m_local.temp.zs], xmm0); } } - if(m_sel.ztest) + if (m_sel.ztest) { ReadPixel_SSE(xmm1, ebp); - if(m_sel.zwrite && m_sel.zpsm < 2) + if (m_sel.zwrite && m_sel.zpsm < 2) { movdqa(ptr[&m_local.temp.zd], xmm1); } // zd &= 0xffffffff >> m_sel.zpsm * 8; - if(m_sel.zpsm) + if (m_sel.zpsm) { pslld(xmm1, m_sel.zpsm * 8); psrld(xmm1, m_sel.zpsm * 8); } - if(m_sel.zoverflow || m_sel.zpsm == 0) + if (m_sel.zoverflow || m_sel.zpsm == 0) { // GSVector4i o = GSVector4i::x80000000(); @@ -686,21 +686,21 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2) psubd(xmm1, temp1); } - switch(m_sel.ztst) + switch (m_sel.ztst) { - case ZTST_GEQUAL: - // test |= zso < zdo; // ~(zso >= zdo) - pcmpgtd(xmm1, xmm0); - por(xmm7, xmm1); - break; + case ZTST_GEQUAL: + // test |= zso < zdo; // ~(zso >= zdo) + pcmpgtd(xmm1, xmm0); + por(xmm7, xmm1); + break; - case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL - // test |= zso <= zdo; // ~(zso > zdo) - pcmpgtd(xmm0, xmm1); - pcmpeqd(temp1, temp1); - pxor(xmm0, temp1); - por(xmm7, xmm0); - break; + case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL + // test |= zso <= zdo; // ~(zso > zdo) + pcmpgtd(xmm0, xmm1); + pcmpeqd(temp1, temp1); + pxor(xmm0, temp1); + por(xmm7, xmm0); + break; } alltrue(xmm7); @@ -709,14 +709,14 @@ void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2) void GSDrawScanlineCodeGenerator::SampleTexture_SSE() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } mov(ebx, ptr[&m_local.gd->tex[0]]); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } @@ -724,7 +724,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_SSE() // ebx = tex // edx = clut - if(!m_sel.fst) + if (!m_sel.fst) { rcpps(xmm4, xmm4); @@ -734,7 +734,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_SSE() cvttps2dq(xmm2, xmm2); cvttps2dq(xmm3, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -751,7 +751,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_SSE() // xmm2 = u // xmm3 = v - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uf = u.xxzzlh().srl16(1); @@ -760,7 +760,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_SSE() psrlw(xmm0, 12); movdqa(ptr[&m_local.temp.uf], xmm0); - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4i vf = v.xxzzlh().srl16(1); @@ -777,7 +777,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_SSE() psrad(xmm3, 16); packssdw(xmm2, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -820,7 +820,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture_SSE() // xmm1, xmm5, xmm6 = free // xmm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i y1 = uv1.uph16() << tw; // GSVector4i x1 = uv1.upl16(); @@ -975,11 +975,11 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { pmaxsw(uv, ptr[&m_local.gd->t.min]); } @@ -995,7 +995,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv) { pand(uv, ptr[&m_local.gd->t.min]); - if(region) + if (region) { por(uv, ptr[&m_local.gd->t.max]); } @@ -1013,7 +1013,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv) pand(xmm1, xmm4); - if(region) + if (region) { por(xmm1, xmm5); } @@ -1038,11 +1038,11 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { movdqa(xmm4, ptr[&m_local.gd->t.min]); pmaxsw(uv0, xmm4); @@ -1065,7 +1065,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1) pand(uv0, xmm4); pand(uv1, xmm4); - if(region) + if (region) { movdqa(xmm5, ptr[&m_local.gd->t.max]); por(uv0, xmm5); @@ -1088,7 +1088,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1) pand(xmm1, xmm4); - if(region) + if (region) { por(xmm1, xmm5); } @@ -1109,7 +1109,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1) pand(xmm1, xmm4); - if(region) + if (region) { por(xmm1, xmm5); } @@ -1126,7 +1126,7 @@ void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1) void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() { - if(!m_sel.fb || m_sel.tfx == TFX_NONE) + if (!m_sel.fb || m_sel.tfx == TFX_NONE) { return; } @@ -1135,12 +1135,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() mov(ebp, (size_t)m_local.gd->tex); - if(m_sel.tlu) + if (m_sel.tlu) { mov(edx, ptr[&m_local.gd->clut]); } - if(!m_sel.fst) + if (!m_sel.fst) { rcpps(xmm0, xmm4); @@ -1158,7 +1158,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() // TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (just take the exp) (but can we round it?) - if(!m_sel.lcm) + if (!m_sel.lcm) { // store u/v @@ -1207,7 +1207,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() maxps(xmm4, xmm0); cvtps2dq(xmm4, xmm4); - if(m_sel.mmin == 1) // round-off mode + if (m_sel.mmin == 1) // round-off mode { mov(eax, 0x8000); movd(xmm0, eax); @@ -1219,7 +1219,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() psrld(xmm4, 16); movdqa(ptr[&m_local.temp.lod.i], xmm4); - if(m_sel.mmin == 2) // trilinear mode + if (m_sel.mmin == 2) // trilinear mode { pshuflw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); @@ -1235,7 +1235,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() movdqa(xmm3, ptr[&m_local.temp.uv[1]]); movdqa(xmm6, xmm3); - movd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); + movd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); psrad(xmm2, xmm0); movdqa(xmm1, xmm4); psrlw(xmm1, xmm0); @@ -1302,7 +1302,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() // xmm5 = minuv // xmm6 = maxuv - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1315,7 +1315,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() psubd(xmm3, xmm4); // GSVector4i uf = u.xxzzlh().srl16(1); - + pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); psrlw(xmm0, 12); @@ -1335,7 +1335,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() psrad(xmm3, 16); packssdw(xmm2, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -1378,7 +1378,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() // xmm1, xmm5, xmm6 = free // xmm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i x1 = uv1.upl16(); // GSVector4i y1 = uv1.uph16() << tw; @@ -1523,7 +1523,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() split16_2x8(xmm5, xmm6, xmm6); } - if(m_sel.mmin != 1) // !round-off mode + if (m_sel.mmin != 1) // !round-off mode { movdqa(ptr[&m_local.temp.trb], xmm5); movdqa(ptr[&m_local.temp.tga], xmm6); @@ -1540,7 +1540,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() psrlw(xmm5, 1); psrlw(xmm6, 1); - if(m_sel.ltf) + if (m_sel.ltf) { // u -= 0x8000; // v -= 0x8000; @@ -1553,7 +1553,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() psubd(xmm3, xmm4); // GSVector4i uf = u.xxzzlh().srl16(1); - + pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); psrlw(xmm0, 12); @@ -1573,7 +1573,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() psrad(xmm3, 16); packssdw(xmm2, xmm3); - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i uv1 = uv0.add16(GSVector4i::x0001()); @@ -1616,7 +1616,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() // xmm1, xmm5, xmm6 = free // xmm7 = used - if(m_sel.ltf) + if (m_sel.ltf) { // GSVector4i x1 = uv1.upl16(); // GSVector4i y1 = uv1.uph16() << tw; @@ -1785,11 +1785,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { pmaxsw(uv, xmm5); } @@ -1805,7 +1805,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv) { pand(uv, xmm5); - if(region) + if (region) { por(uv, xmm6); } @@ -1821,7 +1821,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv) pand(xmm1, xmm5); - if(region) + if (region) { por(xmm1, xmm6); } @@ -1848,11 +1848,11 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1) int region = ((m_sel.wms | m_sel.wmt) >> 1) & 1; - if(wms_clamp == wmt_clamp) + if (wms_clamp == wmt_clamp) { - if(wms_clamp) + if (wms_clamp) { - if(region) + if (region) { pmaxsw(uv0, xmm5); pmaxsw(uv1, xmm5); @@ -1872,7 +1872,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1) pand(uv0, xmm5); pand(uv1, xmm5); - if(region) + if (region) { por(uv0, xmm6); por(uv1, xmm6); @@ -1891,7 +1891,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1) pand(xmm1, xmm5); - if(region) + if (region) { por(xmm1, xmm6); } @@ -1912,7 +1912,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1) pand(xmm1, xmm5); - if(region) + if (region) { por(xmm1, xmm6); } @@ -1924,120 +1924,120 @@ void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1) // clamp.blend8(repeat, m_local.gd->t.mask); - pblendvb(uv1, xmm1); + pblendvb(uv1, xmm1); } } void GSDrawScanlineCodeGenerator::AlphaTFX_SSE() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector4i ga = iip ? gaf : m_local.c.ga; - - movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - - // gat = gat.modulate16<1>(ga).clamp8(); - - modulate16(xmm6, xmm4, 1); - - clamp16(xmm6, xmm3); - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { - psrlw(xmm4, 7); - - mix16(xmm6, xmm4, xmm3); - } - - break; - - case TFX_DECAL: - - // if(!tcc) gat = gat.mix16(ga.srl16(7)); - - if(!m_sel.tcc) - { // GSVector4i ga = iip ? gaf : m_local.c.ga; movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - psrlw(xmm4, 7); + // gat = gat.modulate16<1>(ga).clamp8(); - mix16(xmm6, xmm4, xmm3); - } + modulate16(xmm6, xmm4, 1); - break; + clamp16(xmm6, xmm3); - case TFX_HIGHLIGHT: + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - // GSVector4i ga = iip ? gaf : m_local.c.ga; + if (!m_sel.tcc) + { + psrlw(xmm4, 7); - movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - movdqa(xmm2, xmm4); + mix16(xmm6, xmm4, xmm3); + } - // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + break; - psrlw(xmm4, 7); + case TFX_DECAL: - if(m_sel.tcc) - { - paddusb(xmm4, xmm6); - } + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - mix16(xmm6, xmm4, xmm3); + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - case TFX_HIGHLIGHT2: + psrlw(xmm4, 7); - // if(!tcc) gat = gat.mix16(ga.srl16(7)); + mix16(xmm6, xmm4, xmm3); + } + + break; + + case TFX_HIGHLIGHT: - if(!m_sel.tcc) - { // GSVector4i ga = iip ? gaf : m_local.c.ga; movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); movdqa(xmm2, xmm4); + // gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); + psrlw(xmm4, 7); + if (m_sel.tcc) + { + paddusb(xmm4, xmm6); + } + mix16(xmm6, xmm4, xmm3); - } - break; + break; - case TFX_NONE: + case TFX_HIGHLIGHT2: - // gat = iip ? ga.srl16(7) : ga; + // if(!tcc) gat = gat.mix16(ga.srl16(7)); - if(m_sel.iip) - { - psrlw(xmm6, 7); - } + if (!m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - break; + movdqa(xmm4, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + movdqa(xmm2, xmm4); + + psrlw(xmm4, 7); + + mix16(xmm6, xmm4, xmm3); + } + + break; + + case TFX_NONE: + + // gat = iip ? ga.srl16(7) : ga; + + if (m_sel.iip) + { + psrlw(xmm6, 7); + } + + break; } - if(m_sel.aa1) + if (m_sel.aa1) { // gs_user figure 3-2: anti-aliasing after tfx, before tests, modifies alpha // FIXME: bios config screen cubes - if(!m_sel.abe) + if (!m_sel.abe) { // a = cov - if(m_sel.edge) + if (m_sel.edge) { movdqa(xmm0, ptr[&m_local.temp.cov]); } @@ -2058,7 +2058,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX_SSE() psllw(xmm0, 15); psrlw(xmm0, 8); - if(m_sel.edge) + if (m_sel.edge) { movdqa(xmm1, ptr[&m_local.temp.cov]); } @@ -2078,12 +2078,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX_SSE() void GSDrawScanlineCodeGenerator::ReadMask_SSE() { - if(m_sel.fwrite) + if (m_sel.fwrite) { movdqa(xmm3, ptr[&m_local.gd->fm]); } - if(m_sel.zwrite) + if (m_sel.zwrite) { movdqa(xmm4, ptr[&m_local.gd->zm]); } @@ -2091,158 +2091,158 @@ void GSDrawScanlineCodeGenerator::ReadMask_SSE() void GSDrawScanlineCodeGenerator::TestAlpha_SSE() { - switch(m_sel.atst) + switch (m_sel.atst) { - case ATST_NEVER: - // t = GSVector4i::xffffffff(); - pcmpeqd(xmm1, xmm1); - break; + case ATST_NEVER: + // t = GSVector4i::xffffffff(); + pcmpeqd(xmm1, xmm1); + break; - case ATST_ALWAYS: - return; + case ATST_ALWAYS: + return; - case ATST_LESS: - case ATST_LEQUAL: - // t = (ga >> 16) > m_local.gd->aref; - movdqa(xmm1, xmm6); - psrld(xmm1, 16); - pcmpgtd(xmm1, ptr[&m_local.gd->aref]); - break; + case ATST_LESS: + case ATST_LEQUAL: + // t = (ga >> 16) > m_local.gd->aref; + movdqa(xmm1, xmm6); + psrld(xmm1, 16); + pcmpgtd(xmm1, ptr[&m_local.gd->aref]); + break; - case ATST_EQUAL: - // t = (ga >> 16) != m_local.gd->aref; - movdqa(xmm1, xmm6); - psrld(xmm1, 16); - pcmpeqd(xmm1, ptr[&m_local.gd->aref]); - pcmpeqd(xmm0, xmm0); - pxor(xmm1, xmm0); - break; + case ATST_EQUAL: + // t = (ga >> 16) != m_local.gd->aref; + movdqa(xmm1, xmm6); + psrld(xmm1, 16); + pcmpeqd(xmm1, ptr[&m_local.gd->aref]); + pcmpeqd(xmm0, xmm0); + pxor(xmm1, xmm0); + break; - case ATST_GEQUAL: - case ATST_GREATER: - // t = (ga >> 16) < m_local.gd->aref; - movdqa(xmm0, xmm6); - psrld(xmm0, 16); - movdqa(xmm1, ptr[&m_local.gd->aref]); - pcmpgtd(xmm1, xmm0); - break; + case ATST_GEQUAL: + case ATST_GREATER: + // t = (ga >> 16) < m_local.gd->aref; + movdqa(xmm0, xmm6); + psrld(xmm0, 16); + movdqa(xmm1, ptr[&m_local.gd->aref]); + pcmpgtd(xmm1, xmm0); + break; - case ATST_NOTEQUAL: - // t = (ga >> 16) == m_local.gd->aref; - movdqa(xmm1, xmm6); - psrld(xmm1, 16); - pcmpeqd(xmm1, ptr[&m_local.gd->aref]); - break; + case ATST_NOTEQUAL: + // t = (ga >> 16) == m_local.gd->aref; + movdqa(xmm1, xmm6); + psrld(xmm1, 16); + pcmpeqd(xmm1, ptr[&m_local.gd->aref]); + break; } - switch(m_sel.afail) + switch (m_sel.afail) { - case AFAIL_KEEP: - // test |= t; - por(xmm7, xmm1); - alltrue(xmm7); - break; + case AFAIL_KEEP: + // test |= t; + por(xmm7, xmm1); + alltrue(xmm7); + break; - case AFAIL_FB_ONLY: - // zm |= t; - por(xmm4, xmm1); - break; + case AFAIL_FB_ONLY: + // zm |= t; + por(xmm4, xmm1); + break; - case AFAIL_ZB_ONLY: - // fm |= t; - por(xmm3, xmm1); - break; + case AFAIL_ZB_ONLY: + // fm |= t; + por(xmm3, xmm1); + break; - case AFAIL_RGB_ONLY: - // zm |= t; - por(xmm4, xmm1); - // fm |= t & GSVector4i::xff000000(); - psrld(xmm1, 24); - pslld(xmm1, 24); - por(xmm3, xmm1); - break; + case AFAIL_RGB_ONLY: + // zm |= t; + por(xmm4, xmm1); + // fm |= t & GSVector4i::xff000000(); + psrld(xmm1, 24); + pslld(xmm1, 24); + por(xmm3, xmm1); + break; } } void GSDrawScanlineCodeGenerator::ColorTFX_SSE() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - switch(m_sel.tfx) + switch (m_sel.tfx) { - case TFX_MODULATE: + case TFX_MODULATE: - // GSVector4i rb = iip ? rbf : m_local.c.rb; + // GSVector4i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).clamp8(); + // rbt = rbt.modulate16<1>(rb).clamp8(); - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - clamp16(xmm5, xmm1); + clamp16(xmm5, xmm1); - break; + break; - case TFX_DECAL: + case TFX_DECAL: - break; + break; - case TFX_HIGHLIGHT: - case TFX_HIGHLIGHT2: + case TFX_HIGHLIGHT: + case TFX_HIGHLIGHT2: - if(m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) - { - // GSVector4i ga = iip ? gaf : m_local.c.ga; + if (m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc) + { + // GSVector4i ga = iip ? gaf : m_local.c.ga; - movdqa(xmm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); - } + movdqa(xmm2, ptr[m_sel.iip ? &m_local.temp.ga : &m_local.c.ga]); + } - // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); + // gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); - movdqa(xmm1, xmm6); + movdqa(xmm1, xmm6); - modulate16(xmm6, xmm2, 1); + modulate16(xmm6, xmm2, 1); - pshuflw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - pshufhw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); - psrlw(xmm2, 7); + pshuflw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); + pshufhw(xmm2, xmm2, _MM_SHUFFLE(3, 3, 1, 1)); + psrlw(xmm2, 7); - paddw(xmm6, xmm2); + paddw(xmm6, xmm2); - clamp16(xmm6, xmm0); + clamp16(xmm6, xmm0); - mix16(xmm6, xmm1, xmm0); + mix16(xmm6, xmm1, xmm0); - // GSVector4i rb = iip ? rbf : m_local.c.rb; + // GSVector4i rb = iip ? rbf : m_local.c.rb; - // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); + // rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); - modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); + modulate16(xmm5, ptr[m_sel.iip ? &m_local.temp.rb : &m_local.c.rb], 1); - paddw(xmm5, xmm2); + paddw(xmm5, xmm2); - clamp16(xmm5, xmm0); + clamp16(xmm5, xmm0); - break; + break; - case TFX_NONE: + case TFX_NONE: - // rbt = iip ? rb.srl16(7) : rb; + // rbt = iip ? rb.srl16(7) : rb; - if(m_sel.iip) - { - psrlw(xmm5, 7); - } + if (m_sel.iip) + { + psrlw(xmm5, 7); + } - break; + break; } } void GSDrawScanlineCodeGenerator::Fog_SSE() { - if(!m_sel.fwrite || !m_sel.fge) + if (!m_sel.fwrite || !m_sel.fge) { return; } @@ -2263,7 +2263,7 @@ void GSDrawScanlineCodeGenerator::Fog_SSE() void GSDrawScanlineCodeGenerator::ReadFrame_SSE() { - if(!m_sel.fb) + if (!m_sel.fb) { return; } @@ -2274,7 +2274,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame_SSE() add(ebx, ptr[edi]); and(ebx, HALF_VM_SIZE - 1); - if(!m_sel.rfb) + if (!m_sel.rfb) { return; } @@ -2284,7 +2284,7 @@ void GSDrawScanlineCodeGenerator::ReadFrame_SSE() void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE() { - if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) + if (!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2) { return; } @@ -2293,9 +2293,9 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE() movdqa(xmm1, xmm2); - if(m_sel.datm) + if (m_sel.datm) { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { pxor(xmm0, xmm0); // psrld(xmm1, 15); @@ -2312,7 +2312,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE() } else { - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { pslld(xmm1, 16); } @@ -2327,7 +2327,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE() void GSDrawScanlineCodeGenerator::WriteMask_SSE() { - if(m_sel.notest) + if (m_sel.notest) { return; } @@ -2335,12 +2335,12 @@ void GSDrawScanlineCodeGenerator::WriteMask_SSE() // fm |= test; // zm |= test; - if(m_sel.fwrite) + if (m_sel.fwrite) { por(xmm3, xmm7); } - if(m_sel.zwrite) + if (m_sel.zwrite) { por(xmm4, xmm7); } @@ -2349,19 +2349,19 @@ void GSDrawScanlineCodeGenerator::WriteMask_SSE() pcmpeqd(xmm1, xmm1); - if(m_sel.fwrite && m_sel.zwrite) + if (m_sel.fwrite && m_sel.zwrite) { movdqa(xmm0, xmm1); pcmpeqd(xmm1, xmm3); pcmpeqd(xmm0, xmm4); packssdw(xmm1, xmm0); } - else if(m_sel.fwrite) + else if (m_sel.fwrite) { pcmpeqd(xmm1, xmm3); packssdw(xmm1, xmm1); } - else if(m_sel.zwrite) + else if (m_sel.zwrite) { pcmpeqd(xmm1, xmm4); packssdw(xmm1, xmm1); @@ -2374,14 +2374,14 @@ void GSDrawScanlineCodeGenerator::WriteMask_SSE() void GSDrawScanlineCodeGenerator::WriteZBuf_SSE() { - if(!m_sel.zwrite) + if (!m_sel.zwrite) { return; } movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]); - if(m_sel.ztest && m_sel.zpsm < 2) + if (m_sel.ztest && m_sel.zpsm < 2) { // zs = zs.blend8(zd, zm); @@ -2405,63 +2405,63 @@ void GSDrawScanlineCodeGenerator::WriteZBuf_SSE() void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.abe == 0 && m_sel.aa1 == 0) + if (m_sel.abe == 0 && m_sel.aa1 == 0) { return; } - if((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) + if ((m_sel.aba != m_sel.abb) && (m_sel.aba == 1 || m_sel.abb == 1 || m_sel.abc == 1) || m_sel.abd == 1) { - switch(m_sel.fpsm) + switch (m_sel.fpsm) { - case 0: - case 1: + case 0: + case 1: - // c[2] = fd & mask; - // c[3] = (fd >> 8) & mask; + // c[2] = fd & mask; + // c[3] = (fd >> 8) & mask; - split16_2x8(xmm0, xmm1, xmm2); + split16_2x8(xmm0, xmm1, xmm2); - break; + break; - case 2: + case 2: - // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); - // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); + // c[2] = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3); + // c[3] = ((fd & 0x8000) << 8) | ((fd & 0x03e0) >> 2); - movdqa(xmm0, xmm2); - movdqa(xmm1, xmm2); - movdqa(xmm4, xmm2); + movdqa(xmm0, xmm2); + movdqa(xmm1, xmm2); + movdqa(xmm4, xmm2); - pcmpeqd(xmm7, xmm7); - psrld(xmm7, 27); // 0x0000001f - pand(xmm0, xmm7); - pslld(xmm0, 3); + pcmpeqd(xmm7, xmm7); + psrld(xmm7, 27); // 0x0000001f + pand(xmm0, xmm7); + pslld(xmm0, 3); - pslld(xmm7, 10); // 0x00007c00 - pand(xmm4, xmm7); - pslld(xmm4, 9); + pslld(xmm7, 10); // 0x00007c00 + pand(xmm4, xmm7); + pslld(xmm4, 9); - por(xmm0, xmm4); + por(xmm0, xmm4); - movdqa(xmm4, xmm1); + movdqa(xmm4, xmm1); - psrld(xmm7, 5); // 0x000003e0 - pand(xmm1, xmm7); - psrld(xmm1, 2); + psrld(xmm7, 5); // 0x000003e0 + pand(xmm1, xmm7); + psrld(xmm1, 2); - psllw(xmm7, 10); // 0x00008000 - pand(xmm4, xmm7); - pslld(xmm4, 8); + psllw(xmm7, 10); // 0x00008000 + pand(xmm4, xmm7); + pslld(xmm4, 8); - por(xmm1, xmm4); + por(xmm1, xmm4); - break; + break; } } @@ -2470,46 +2470,56 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() // xmm2, xmm3 = used // xmm4, xmm7 = free - if(m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) + if (m_sel.pabe || (m_sel.aba != m_sel.abb) && (m_sel.abb == 0 || m_sel.abd == 0)) { movdqa(xmm4, xmm5); } - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // rb = c[aba * 2 + 0]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: movdqa(xmm5, xmm0); break; - case 2: pxor(xmm5, xmm5); break; + case 0: + break; + case 1: + movdqa(xmm5, xmm0); + break; + case 2: + pxor(xmm5, xmm5); + break; } // rb = rb.sub16(c[abb * 2 + 0]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: psubw(xmm5, xmm4); break; - case 1: psubw(xmm5, xmm0); break; - case 2: break; + case 0: + psubw(xmm5, xmm4); + break; + case 1: + psubw(xmm5, xmm0); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // GSVector4i a = abc < 2 ? c[abc * 2 + 1].yywwlh().sll16(7) : m_local.gd->afix; - switch(m_sel.abc) + switch (m_sel.abc) { - case 0: - case 1: - pshuflw(xmm7, m_sel.abc ? xmm1 : xmm6, _MM_SHUFFLE(3, 3, 1, 1)); - pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); - psllw(xmm7, 7); - break; - case 2: - movdqa(xmm7, ptr[&m_local.gd->afix]); - break; + case 0: + case 1: + pshuflw(xmm7, m_sel.abc ? xmm1 : xmm6, _MM_SHUFFLE(3, 3, 1, 1)); + pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); + psllw(xmm7, 7); + break; + case 2: + movdqa(xmm7, ptr[&m_local.gd->afix]); + break; } // rb = rb.modulate16<1>(a); @@ -2519,26 +2529,36 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() // rb = rb.add16(c[abd * 2 + 0]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: paddw(xmm5, xmm4); break; - case 1: paddw(xmm5, xmm0); break; - case 2: break; + case 0: + paddw(xmm5, xmm4); + break; + case 1: + paddw(xmm5, xmm0); + break; + case 2: + break; } } else { // rb = c[abd * 2 + 0]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: movdqa(xmm5, xmm0); break; - case 2: pxor(xmm5, xmm5); break; + case 0: + break; + case 1: + movdqa(xmm5, xmm0); + break; + case 2: + pxor(xmm5, xmm5); + break; } } - if(m_sel.pabe) + if (m_sel.pabe) { // mask = (c[1] << 8).sra32(31); @@ -2560,27 +2580,37 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() movdqa(xmm4, xmm6); - if(m_sel.aba != m_sel.abb) + if (m_sel.aba != m_sel.abb) { // ga = c[aba * 2 + 1]; - switch(m_sel.aba) + switch (m_sel.aba) { - case 0: break; - case 1: movdqa(xmm6, xmm1); break; - case 2: pxor(xmm6, xmm6); break; + case 0: + break; + case 1: + movdqa(xmm6, xmm1); + break; + case 2: + pxor(xmm6, xmm6); + break; } // ga = ga.sub16(c[abeb * 2 + 1]); - switch(m_sel.abb) + switch (m_sel.abb) { - case 0: psubw(xmm6, xmm4); break; - case 1: psubw(xmm6, xmm1); break; - case 2: break; + case 0: + psubw(xmm6, xmm4); + break; + case 1: + psubw(xmm6, xmm1); + break; + case 2: + break; } - if(!(m_sel.fpsm == 1 && m_sel.abc == 1)) + if (!(m_sel.fpsm == 1 && m_sel.abc == 1)) { // ga = ga.modulate16<1>(a); @@ -2589,22 +2619,32 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() // ga = ga.add16(c[abd * 2 + 1]); - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: paddw(xmm6, xmm4); break; - case 1: paddw(xmm6, xmm1); break; - case 2: break; + case 0: + paddw(xmm6, xmm4); + break; + case 1: + paddw(xmm6, xmm1); + break; + case 2: + break; } } else { // ga = c[abd * 2 + 1]; - switch(m_sel.abd) + switch (m_sel.abd) { - case 0: break; - case 1: movdqa(xmm6, xmm1); break; - case 2: pxor(xmm6, xmm6); break; + case 0: + break; + case 1: + movdqa(xmm6, xmm1); + break; + case 2: + pxor(xmm6, xmm6); + break; } } @@ -2614,7 +2654,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() // xmm2, xmm3 = used // xmm0, xmm1, xmm7 = free - if(m_sel.pabe) + if (m_sel.pabe) { psrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) @@ -2624,7 +2664,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() } else { - if(m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx + if (m_sel.fpsm != 1) // TODO: fm == 0xffxxxxxx { mix16(xmm6, xmm4, xmm7); } @@ -2633,12 +2673,12 @@ void GSDrawScanlineCodeGenerator::AlphaBlend_SSE() void GSDrawScanlineCodeGenerator::WriteFrame_SSE() { - if(!m_sel.fwrite) + if (!m_sel.fwrite) { return; } - if(m_sel.fpsm == 2 && m_sel.dthe) + if (m_sel.fpsm == 2 && m_sel.dthe) { mov(eax, ptr[esp + _top]); and(eax, 3); @@ -2648,7 +2688,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_SSE() paddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); } - if(m_sel.colclamp == 0) + if (m_sel.colclamp == 0) { // c[0] &= 0x000000ff; // c[1] &= 0x000000ff; @@ -2666,7 +2706,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_SSE() punpckhwd(xmm7, xmm6); packuswb(xmm5, xmm7); - if(m_sel.fba && m_sel.fpsm != 1) + if (m_sel.fba && m_sel.fpsm != 1) { // fs |= 0x80000000; @@ -2675,7 +2715,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_SSE() por(xmm5, xmm7); } - if(m_sel.fpsm == 2) + if (m_sel.fpsm == 2) { // GSVector4i rb = fs & 0x00f800f8; // GSVector4i ga = fs & 0x8000f800; @@ -2707,7 +2747,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame_SSE() por(xmm5, xmm7); } - if(m_sel.rfb) + if (m_sel.rfb) { // fs = fs.blend(fd, fm); @@ -2727,9 +2767,9 @@ void GSDrawScanlineCodeGenerator::ReadPixel_SSE(const Xmm& dst, const Reg32& add void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz) { - if(m_sel.notest) + if (m_sel.notest) { - if(fast) + if (fast) { movq(qword[addr * 2 + (size_t)m_local.gd->vm], src); movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src); @@ -2744,7 +2784,7 @@ void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& ad } else { - if(fast) + if (fast) { // if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs); // if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs); @@ -2795,28 +2835,34 @@ void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& ad { Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; - switch(psm) + switch (psm) { - case 0: - if(i == 0) movd(dst, src); - else { - pextrd(dst, src, i); - } - break; - case 1: - if(i == 0) movd(eax, src); - else { - pextrd(eax, src, i); - } - xor(eax, dst); - and(eax, 0xffffff); - xor(dst, eax); - break; - case 2: - if(i == 0) movd(eax, src); - else pextrw(eax, src, i * 2); - mov(dst, ax); - break; + case 0: + if (i == 0) + movd(dst, src); + else + { + pextrd(dst, src, i); + } + break; + case 1: + if (i == 0) + movd(eax, src); + else + { + pextrd(eax, src, i); + } + xor(eax, dst); + and(eax, 0xffffff); + xor(dst, eax); + break; + case 2: + if (i == 0) + movd(eax, src); + else + pextrw(eax, src, i * 2); + mov(dst, ax); + break; } } @@ -2843,27 +2889,27 @@ void GSDrawScanlineCodeGenerator::ReadTexel_SSE(int pixels, int mip_offset) const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - if(m_sel.mmin && !m_sel.lcm) + if (m_sel.mmin && !m_sel.lcm) { const int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; - if(pixels == 4) + if (pixels == 4) { movdqa(ptr[&m_local.temp.test], xmm7); } - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { mov(ebx, ptr[&lod_i->u32[j]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); } } - if(pixels == 4) + if (pixels == 4) { movdqa(xmm5, xmm7); movdqa(xmm7, ptr[&m_local.temp.test]); @@ -2871,7 +2917,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel_SSE(int pixels, int mip_offset) } else { - if(m_sel.mmin && m_sel.lcm) + if (m_sel.mmin && m_sel.lcm) { mov(ebx, ptr[&lod_i->u32[0]]); mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]); @@ -2879,9 +2925,9 @@ void GSDrawScanlineCodeGenerator::ReadTexel_SSE(int pixels, int mip_offset) const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; - for(int i = 0; i < pixels; i++) + for (int i = 0; i < pixels; i++) { - for(uint8 j = 0; j < 4; j++) + for (uint8 j = 0; j < 4; j++) { ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j); } @@ -2895,13 +2941,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel_SSE(const Xmm& dst, const Xmm& addr, ASSERT(i == 0 || m_cpu.has(util::Cpu::tSSE41)); - if(i == 0) movd(eax, addr); - else pextrd(eax, addr, i); + if (i == 0) + movd(eax, addr); + else + pextrd(eax, addr, i); - if(m_sel.tlu) movzx(eax, byte[ebx + eax]); + if (m_sel.tlu) + movzx(eax, byte[ebx + eax]); - if(i == 0) movd(dst, src); - else pinsrd(dst, src, i); + if (i == 0) + movd(dst, src); + else + pinsrd(dst, src, i); } #endif diff --git a/plugins/GSdx/Renderers/SW/GSRasterizer.cpp b/plugins/GSdx/Renderers/SW/GSRasterizer.cpp index c547c61488..ab545fc5ec 100644 --- a/plugins/GSdx/Renderers/SW/GSRasterizer.cpp +++ b/plugins/GSdx/Renderers/SW/GSRasterizer.cpp @@ -26,7 +26,8 @@ int GSRasterizerData::s_counter = 0; -static int compute_best_thread_height(int threads) { +static int compute_best_thread_height(int threads) +{ // - for more threads screen segments should be smaller to better distribute the pixels // - but not too small to keep the threading overhead low // - ideal value between 3 and 5, or log2(64 / number of threads) @@ -57,9 +58,9 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe int row = 0; - while(row < rows) + while (row < rows) { - for(int i = 0; i < threads; i++, row++) + for (int i = 0; i < threads; i++, row++) { m_scanline[row] = i == id ? 1 : 0; } @@ -70,7 +71,8 @@ GSRasterizer::~GSRasterizer() { _aligned_free(m_scanline); - if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048); + if (m_edge.buff != NULL) + vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048); delete m_ds; } @@ -89,9 +91,9 @@ bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const top = top >> m_thread_height; bottom = (bottom + (1 << m_thread_height) - 1) >> m_thread_height; - while(top < bottom) + while (top < bottom) { - if(m_scanline[top++]) + if (m_scanline[top++]) { return true; } @@ -104,9 +106,10 @@ int GSRasterizer::FindMyNextScanline(int top) const { int i = top >> m_thread_height; - if(m_scanline[i] == 0) + if (m_scanline[i] == 0) { - while(m_scanline[++i] == 0); + while (m_scanline[++i] == 0) + ; top = i << m_thread_height; } @@ -123,7 +126,7 @@ int GSRasterizer::GetPixels(bool reset) { int pixels = m_pixels.sum; - if(reset) + if (reset) { m_pixels.sum = 0; } @@ -135,7 +138,8 @@ void GSRasterizer::Draw(GSRasterizerData* data) { GSPerfMonAutoTimer pmat(m_perfmon, GSPerfMon::WorkerDraw0 + m_id); - if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return; + if (data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) + return; m_pixels.actual = 0; m_pixels.total = 0; @@ -158,73 +162,91 @@ void GSRasterizer::Draw(GSRasterizerData* data) m_fscissor_x = GSVector4(data->scissor).xzxz(); m_fscissor_y = GSVector4(data->scissor).ywyw(); - switch(data->primclass) + switch (data->primclass) { - case GS_POINT_CLASS: + case GS_POINT_CLASS: - if(scissor_test) - { - DrawPoint(vertex, data->vertex_count, index, data->index_count); - } - else - { - DrawPoint(vertex, data->vertex_count, index, data->index_count); - } + if (scissor_test) + { + DrawPoint(vertex, data->vertex_count, index, data->index_count); + } + else + { + DrawPoint(vertex, data->vertex_count, index, data->index_count); + } - break; + break; - case GS_LINE_CLASS: + case GS_LINE_CLASS: - if(index != NULL) - { - do {DrawLine(vertex, index); index += 2;} - while(index < index_end); - } - else - { - do {DrawLine(vertex, tmp_index); vertex += 2;} - while(vertex < vertex_end); - } + if (index != NULL) + { + do + { + DrawLine(vertex, index); + index += 2; + } while (index < index_end); + } + else + { + do + { + DrawLine(vertex, tmp_index); + vertex += 2; + } while (vertex < vertex_end); + } - break; + break; - case GS_TRIANGLE_CLASS: + case GS_TRIANGLE_CLASS: - if(index != NULL) - { - do {DrawTriangle(vertex, index); index += 3;} - while(index < index_end); - } - else - { - do {DrawTriangle(vertex, tmp_index); vertex += 3;} - while(vertex < vertex_end); - } + if (index != NULL) + { + do + { + DrawTriangle(vertex, index); + index += 3; + } while (index < index_end); + } + else + { + do + { + DrawTriangle(vertex, tmp_index); + vertex += 3; + } while (vertex < vertex_end); + } - break; + break; - case GS_SPRITE_CLASS: + case GS_SPRITE_CLASS: - if(index != NULL) - { - do {DrawSprite(vertex, index); index += 2;} - while(index < index_end); - } - else - { - do {DrawSprite(vertex, tmp_index); vertex += 2;} - while(vertex < vertex_end); - } + if (index != NULL) + { + do + { + DrawSprite(vertex, index); + index += 2; + } while (index < index_end); + } + else + { + do + { + DrawSprite(vertex, tmp_index); + vertex += 2; + } while (vertex < vertex_end); + } - break; + break; - default: - __assume(0); + default: + __assume(0); } - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 _mm256_zeroupper(); - #endif +#endif data->pixels = m_pixels.actual; @@ -235,20 +257,20 @@ void GSRasterizer::Draw(GSRasterizerData* data) m_ds->EndDraw(data->frame, ticks, m_pixels.actual, m_pixels.total); } -template +template void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count) { - if(index != NULL) + if (index != NULL) { - for(int i = 0; i < index_count; i++, index++) + for (int i = 0; i < index_count; i++, index++) { const GSVertexSW& v = vertex[*index]; GSVector4i p(v.p); - if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) + if (!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) { - if(IsOneOfMyScanlines(p.y)) + if (IsOneOfMyScanlines(p.y)) { m_ds->SetupPrim(vertex, index, GSVertexSW::zero()); @@ -261,15 +283,15 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u { uint32 tmp_index[1] = {0}; - for(int i = 0; i < vertex_count; i++, vertex++) + for (int i = 0; i < vertex_count; i++, vertex++) { const GSVertexSW& v = vertex[0]; GSVector4i p(v.p); - if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) + if (!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) { - if(IsOneOfMyScanlines(p.y)) + if (IsOneOfMyScanlines(p.y)) { m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero()); @@ -291,7 +313,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) int i = (dp < dp.yxwz()).mask() & 1; // |dx| <= |dy| - if(m_ds->HasEdge()) + if (m_ds->HasEdge()) { DrawEdge(v0, v1, dv, i, 0); DrawEdge(v0, v1, dv, i, 1); @@ -303,9 +325,9 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) GSVector4i dpi(dp); - if(dpi.y == 0) + if (dpi.y == 0) { - if(dpi.x > 0) + if (dpi.x > 0) { // shortcut for horizontal lines @@ -319,7 +341,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) GSVector4i p(scan.p); - if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y)) + if (m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y)) { GSVector4 lrf = scan.p.upl(v1.p.blend32(v0.p, mask)).ceil(); GSVector4 l = lrf.max(m_fscissor_x); @@ -331,7 +353,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) int pixels = right - left; - if(pixels > 0) + if (pixels > 0) { GSVertexSW dscan = dv / dv.p.xxxx(); @@ -349,20 +371,20 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) int steps = dpi.v[i]; - if(steps > 0) + if (steps > 0) { GSVertexSW edge = v0; GSVertexSW dedge = dv / GSVector4(dp.v[i]); GSVertexSW* RESTRICT e = m_edge.buff; - while(1) + while (1) { GSVector4i p(edge.p); - if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) + if (m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom) { - if(IsOneOfMyScanlines(p.y)) + if (IsOneOfMyScanlines(p.y)) { AddScanline(e, 1, p.x, p.y, edge); @@ -370,7 +392,8 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index) } } - if(--steps == 0) break; + if (--steps == 0) + break; edge += dedge; } @@ -428,7 +451,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) // if(i == 1) => y0 == y1 < y2 // if(i == 4) => y0 < y1 == y2 - if(m1 == 7) return; // y0 == y1 == y2 + if (m1 == 7) // y0 == y1 == y2 + return; GSVector4 tbf = y0011.xzxz(y1221).ceil(); GSVector4 tbmax = tbf.max(m_fscissor_y); @@ -447,7 +471,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) int m2 = cross.upl(cross == GSVector4::zero()).mask(); - if(m2 & 2) return; + if (m2 & 2) + return; m2 &= 1; @@ -476,9 +501,9 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) dedge.p = dv[0].p * _dxy01c.zzzz().extract<0>() - dv[1].p * _dxy01c.xxxx().extract<0>(); dedge.tc = dv[0].tc * _dxy01c.zzzz() - dv[1].tc * _dxy01c.xxxx(); - if(m1 & 1) + if (m1 & 1) { - if(tb.y < tb.w) + if (tb.y < tb.w) { edge = _v[i[1 - m2]]; @@ -490,7 +515,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) } else { - if(tb.x < tb.z) + if (tb.x < tb.z) { edge = v0; @@ -500,7 +525,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p); } - if(tb.y < tb.w) + if (tb.y < tb.w) { edge = v1; @@ -513,10 +538,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) Flush(vertex, index, (GSVertexSW&)dscan); - if(m_ds->HasEdge()) + if (m_ds->HasEdge()) { - GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy| - GSVector4 b = dx < GSVector4::zero(); // dx < 0 + GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy| + GSVector4 b = dx < GSVector4::zero(); // dx < 0 GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0 int orientation = a.mask(); @@ -541,7 +566,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c top = FindMyNextScanline(top); - while(top < bottom) + while (top < bottom) { GSVector8 dy(GSVector4(top) - p0.yyyy()); @@ -559,7 +584,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c int pixels = right - left; - if(pixels > 0) + if (pixels > 0) { scan.tc = edge.tc + dedge.tc * dy; @@ -573,7 +598,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c top++; - if(!IsOneOfMyScanlines(top)) + if (!IsOneOfMyScanlines(top)) { top += (m_threads - 1) << m_thread_height; } @@ -615,7 +640,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) // if(i == 1) => y0 == y1 < y2 // if(i == 4) => y0 < y1 == y2 - if(m1 == 7) return; // y0 == y1 == y2 + if (m1 == 7) + return; // y0 == y1 == y2 GSVector4 tbf = y0011.xzxz(y1221).ceil(); GSVector4 tbmax = tbf.max(m_fscissor_y); @@ -634,7 +660,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) int m2 = cross.upl(cross == GSVector4::zero()).mask(); - if(m2 & 2) return; + if (m2 & 2) + return; m2 &= 1; @@ -665,9 +692,9 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx(); dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx(); - if(m1 & 1) + if (m1 & 1) { - if(tb.y < tb.w) + if (tb.y < tb.w) { edge = vertex[i[1 - m2]]; @@ -679,7 +706,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) } else { - if(tb.x < tb.z) + if (tb.x < tb.z) { edge = v0; @@ -689,7 +716,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p); } - if(tb.y < tb.w) + if (tb.y < tb.w) { edge = v1; @@ -702,10 +729,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index) Flush(vertex, index, dscan); - if(m_ds->HasEdge()) + if (m_ds->HasEdge()) { - GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy| - GSVector4 b = dx < GSVector4::zero(); // dx < 0 + GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy| + GSVector4 b = dx < GSVector4::zero(); // dx < 0 GSVector4 c = cross < GSVector4::zero(); // longest.p.x < 0 int orientation = a.mask(); @@ -730,7 +757,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co top = FindMyNextScanline(top); - while(top < bottom) + while (top < bottom) { GSVector4 dy = GSVector4(top) - p0.yyyy(); @@ -748,7 +775,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co int pixels = right - left; - if(pixels > 0) + if (pixels > 0) { scan.t = edge.t + dedge.t * dy; scan.c = edge.c + dedge.c * dy; @@ -764,7 +791,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co top++; - if(!IsOneOfMyScanlines(top)) + if (!IsOneOfMyScanlines(top)) { top += (m_threads - 1) << m_thread_height; } @@ -795,13 +822,14 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index) r = r.rintersect(m_scissor); - if(r.rempty()) return; + if (r.rempty()) + return; GSVertexSW scan = v[0]; - if(m_ds->IsSolidRect()) + if (m_ds->IsSolidRect()) { - if(m_threads == 1) + if (m_threads == 1) { m_ds->DrawRect(r, scan); @@ -815,7 +843,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index) int top = FindMyNextScanline(r.top); int bottom = r.bottom; - while(top < bottom) + while (top < bottom) { r.top = top; r.bottom = std::min((top + (1 << m_thread_height)) & ~((1 << m_thread_height) - 1), bottom); @@ -848,19 +876,20 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index) int m = (prestep == GSVector4::zero()).mask(); - if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy(); - if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx(); + if ((m & 2) == 0) scan.t += dedge.t * prestep.yyyy(); + if ((m & 1) == 0) scan.t += dscan.t * prestep.xxxx(); m_ds->SetupPrim(vertex, index, dscan); - while(1) + while (1) { - if(IsOneOfMyScanlines(r.top)) + if (IsOneOfMyScanlines(r.top)) { DrawScanline(r.width(), r.left, r.top, scan); } - if(++r.top >= r.bottom) break; + if (++r.top >= r.bottom) + break; scan.t += dedge.t; } @@ -881,7 +910,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count]; - if(orientation) + if (orientation) { GSVector4 tbf = v0.p.yyyy(v1.p).ceil(); // t t b b GSVector4 tbmax = tbf.max(m_fscissor_y); // max(t, st) max(t, sb) max(b, st) max(b, sb) @@ -892,12 +921,13 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS GSVertexSW edge, dedge; - if((dv.p >= GSVector4::zero()).mask() & 2) + if ((dv.p >= GSVector4::zero()).mask() & 2) { - top = tb.extract32<0>(); // max(t, st) + top = tb.extract32<0>(); // max(t, st) bottom = tb.extract32<3>(); // min(b, sb) - if(top >= bottom) return; + if (top >= bottom) + return; edge = v0; dedge = dv / dv.p.yyyy(); @@ -906,10 +936,11 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS } else { - top = tb.extract32<1>(); // max(b, st) + top = tb.extract32<1>(); // max(b, st) bottom = tb.extract32<2>(); // min(t, sb) - if(top >= bottom) return; + if (top >= bottom) + return; edge = v1; dedge = dv / dv.p.yyyy(); @@ -922,14 +953,14 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS int x = p.extract32<0>(); int dx = p.extract32<1>(); - if(side) + if (side) { - while(1) + while (1) { int xi = x >> 16; int xf = x & 0xffff; - if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top)) + if (m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top)) { AddScanline(e, 1, xi, top, edge); @@ -938,7 +969,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS e++; } - if(++top >= bottom) break; + if (++top >= bottom) + break; edge += dedge; x += dx; @@ -946,12 +978,12 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS } else { - while(1) + while (1) { int xi = (x >> 16) + 1; int xf = x & 0xffff; - if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top)) + if (m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(top)) { AddScanline(e, 1, xi, top, edge); @@ -960,7 +992,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS e++; } - if(++top >= bottom) break; + if (++top >= bottom) + break; edge += dedge; x += dx; @@ -978,12 +1011,13 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS GSVertexSW edge, dedge; - if((dv.p >= GSVector4::zero()).mask() & 1) + if ((dv.p >= GSVector4::zero()).mask() & 1) { - left = lr.extract32<0>(); // max(l, sl) + left = lr.extract32<0>(); // max(l, sl) right = lr.extract32<3>(); // min(r, sr) - if(left >= right) return; + if (left >= right) + return; edge = v0; dedge = dv / dv.p.xxxx(); @@ -992,10 +1026,11 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS } else { - left = lr.extract32<1>(); // max(r, sl) + left = lr.extract32<1>(); // max(r, sl) right = lr.extract32<2>(); // min(l, sr) - if(left >= right) return; + if (left >= right) + return; edge = v1; dedge = dv / dv.p.xxxx(); @@ -1008,14 +1043,14 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS int y = p.extract32<2>(); int dy = p.extract32<3>(); - if(side) + if (side) { - while(1) + while (1) { int yi = y >> 16; int yf = y & 0xffff; - if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) + if (m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) { AddScanline(e, 1, left, yi, edge); @@ -1024,7 +1059,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS e++; } - if(++left >= right) break; + if (++left >= right) + break; edge += dedge; y += dy; @@ -1032,12 +1068,12 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS } else { - while(1) + while (1) { int yi = (y >> 16) + 1; int yf = y & 0xffff; - if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) + if (m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi)) { AddScanline(e, 1, left, yi, edge); @@ -1046,7 +1082,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS e++; } - if(++left >= right) break; + if (++left >= right) + break; edge += dedge; y += dy; @@ -1072,14 +1109,14 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS int count = m_edge.count; - if(count > 0) + if (count > 0) { m_ds->SetupPrim(vertex, index, dscan); const GSVertexSW* RESTRICT e = m_edge.buff; const GSVertexSW* RESTRICT ee = e + count; - if(!edge) + if (!edge) { do { @@ -1088,8 +1125,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS int top = e->_pad.i32[2]; DrawScanline(pixels, left, top, *e++); - } - while(e < ee); + } while (e < ee); } else { @@ -1100,8 +1136,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS int top = e->_pad.i32[2]; DrawEdge(pixels, left, top, *e++); - } - while(e < ee); + } while (e < ee); } m_edge.count = 0; @@ -1147,9 +1182,9 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon) int row = 0; - while(row < rows) + while (row < rows) { - for(int i = 0; i < threads; i++, row++) + for (int i = 0; i < threads; i++, row++) { m_scanline[row] = (uint8)i; } @@ -1170,7 +1205,7 @@ void GSRasterizerList::Queue(const std::shared_ptr& data) int top = r.top >> m_thread_height; int bottom = std::min((r.bottom + (1 << m_thread_height) - 1) >> m_thread_height, top + m_workers.size()); - while(top < bottom) + while (top < bottom) { m_workers[m_scanline[top++]]->Push(data); } @@ -1178,9 +1213,9 @@ void GSRasterizerList::Queue(const std::shared_ptr& data) void GSRasterizerList::Sync() { - if(!IsSynced()) + if (!IsSynced()) { - for(size_t i = 0; i < m_workers.size(); i++) + for (size_t i = 0; i < m_workers.size(); i++) { m_workers[i]->Wait(); } @@ -1191,9 +1226,9 @@ void GSRasterizerList::Sync() bool GSRasterizerList::IsSynced() const { - for(size_t i = 0; i < m_workers.size(); i++) + for (size_t i = 0; i < m_workers.size(); i++) { - if(!m_workers[i]->IsEmpty()) + if (!m_workers[i]->IsEmpty()) { return false; } @@ -1206,7 +1241,7 @@ int GSRasterizerList::GetPixels(bool reset) { int pixels = 0; - for(size_t i = 0; i < m_workers.size(); i++) + for (size_t i = 0; i < m_workers.size(); i++) { pixels += m_r[i]->GetPixels(reset); } diff --git a/plugins/GSdx/Renderers/SW/GSRasterizer.h b/plugins/GSdx/Renderers/SW/GSRasterizer.h index 15857ec958..c7822726ee 100644 --- a/plugins/GSdx/Renderers/SW/GSRasterizer.h +++ b/plugins/GSdx/Renderers/SW/GSRasterizer.h @@ -46,7 +46,7 @@ public: int pixels; int counter; - GSRasterizerData() + GSRasterizerData() : scissor(GSVector4i::zero()) , bbox(GSVector4i::zero()) , primclass(GS_INVALID_CLASS) @@ -62,9 +62,10 @@ public: counter = s_counter++; } - virtual ~GSRasterizerData() + virtual ~GSRasterizerData() { - if(buff != NULL) _aligned_free(buff); + if (buff != NULL) + _aligned_free(buff); } }; @@ -72,7 +73,7 @@ class IDrawScanline : public GSAlignedClass<32> { public: typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan); - typedef void (__fastcall *DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan); + typedef void(__fastcall* DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan); typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit protected: @@ -82,7 +83,13 @@ protected: DrawRectPtr m_dr; public: - IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {} + IDrawScanline() + : m_sp(NULL) + , m_ds(NULL) + , m_de(NULL) + , m_dr(NULL) + { + } virtual ~IDrawScanline() {} virtual void BeginDraw(const GSRasterizerData* data) = 0; @@ -90,10 +97,10 @@ public: #ifdef ENABLE_JIT_RASTERIZER - __forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) {m_sp(vertex, index, dscan);} - __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);} - __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);} - __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} + __forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) { m_sp(vertex, index, dscan); } + __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) { m_ds(pixels, left, top, scan); } + __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) { m_de(pixels, left, top, scan); } + __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) { (this->*m_dr)(r, v); } #else @@ -101,13 +108,13 @@ public: virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0; virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0; - + #endif virtual void PrintStats() = 0; - __forceinline bool HasEdge() const {return m_de != NULL;} - __forceinline bool IsSolidRect() const {return m_dr != NULL;} + __forceinline bool HasEdge() const { return m_de != NULL; } + __forceinline bool IsSolidRect() const { return m_dr != NULL; } }; class IRasterizer : public GSAlignedClass<32> @@ -134,22 +141,22 @@ protected: GSVector4i m_scissor; GSVector4 m_fscissor_x; GSVector4 m_fscissor_y; - struct {GSVertexSW* buff; int count;} m_edge; - struct {int sum, actual, total;} m_pixels; + struct { GSVertexSW* buff; int count; } m_edge; + struct { int sum, actual, total; } m_pixels; typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count); - template + template void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count); void DrawLine(const GSVertexSW* vertex, const uint32* index); void DrawTriangle(const GSVertexSW* vertex, const uint32* index); void DrawSprite(const GSVertexSW* vertex, const uint32* index); - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, const GSVertexSW2& dedge, const GSVertexSW2& dscan, const GSVector4& p0); - #else +#else __forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0); - #endif +#endif void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side); @@ -173,9 +180,9 @@ public: void Queue(const std::shared_ptr& data); void Sync() {} - bool IsSynced() const {return true;} + bool IsSynced() const { return true; } int GetPixels(bool reset); - void PrintStats() {m_ds->PrintStats();} + void PrintStats() { m_ds->PrintStats(); } }; class GSRasterizerList : public IRasterizer @@ -195,23 +202,24 @@ protected: public: virtual ~GSRasterizerList(); - template static IRasterizer* Create(int threads, GSPerfMon* perfmon) + template + static IRasterizer* Create(int threads, GSPerfMon* perfmon) { threads = std::max(threads, 0); - if(threads == 0) + if (threads == 0) { return new GSRasterizer(new DS(), 0, 1, perfmon); } GSRasterizerList* rl = new GSRasterizerList(threads, perfmon); - for(int i = 0; i < threads; i++) + for (int i = 0; i < threads; i++) { rl->m_r.push_back(std::unique_ptr(new GSRasterizer(new DS(), i, threads, perfmon))); - auto &r = *rl->m_r[i]; + auto& r = *rl->m_r[i]; rl->m_workers.push_back(std::unique_ptr(new GSWorker( - [&r](std::shared_ptr &item) { r.Draw(item.get()); }))); + [&r](std::shared_ptr& item) { r.Draw(item.get()); }))); } return rl; diff --git a/plugins/GSdx/Renderers/SW/GSRendererSW.cpp b/plugins/GSdx/Renderers/SW/GSRendererSW.cpp index cb98275428..bbe1e161d4 100644 --- a/plugins/GSdx/Renderers/SW/GSRendererSW.cpp +++ b/plugins/GSdx/Renderers/SW/GSRendererSW.cpp @@ -44,10 +44,12 @@ GSRendererSW::GSRendererSW(int threads) m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); - for (uint32 i = 0; i < countof(m_fzb_pages); i++) { + for (uint32 i = 0; i < countof(m_fzb_pages); i++) + { m_fzb_pages[i] = 0; } - for (uint32 i = 0; i < countof(m_tex_pages); i++) { + for (uint32 i = 0; i < countof(m_tex_pages); i++) + { m_tex_pages[i] = 0; } @@ -70,7 +72,8 @@ GSRendererSW::GSRendererSW(int threads) // Reset handler with the auto flush hack enabled on the SW renderer. // Some games run better without the hack so rely on ini/gui option. - if (!GLLoader::in_replayer && theApp.GetConfigB("autoflush_sw")) { + if (!GLLoader::in_replayer && theApp.GetConfigB("autoflush_sw")) + { m_userhacks_auto_flush = true; ResetHandlers(); } @@ -80,7 +83,7 @@ GSRendererSW::~GSRendererSW() { delete m_tc; - for(size_t i = 0; i < countof(m_texture); i++) + for (size_t i = 0; i < countof(m_texture); i++) { delete m_texture[i]; } @@ -103,7 +106,7 @@ void GSRendererSW::VSync(int field) { Sync(0); // IncAge might delete a cached texture in use - if(0) if(LOG) + if (0) if (LOG) { fprintf(s_fp, "%llu\n", m_perfmon.GetFrame()); @@ -145,7 +148,7 @@ void GSRendererSW::VSync(int field) void GSRendererSW::ResetDevice() { - for(size_t i = 0; i < countof(m_texture); i++) + for (size_t i = 0; i < countof(m_texture); i++) { delete m_texture[i]; @@ -164,7 +167,7 @@ GSTexture* GSRendererSW::GetOutput(int i, int& y_offset) // TODO: round up bottom - if(m_dev->ResizeTexture(&m_texture[i], w, h)) + if (m_dev->ResizeTexture(&m_texture[i], w, h)) { static int pitch = 1024 * 4; @@ -176,9 +179,9 @@ GSTexture* GSRendererSW::GetOutput(int i, int& y_offset) m_texture[i]->Update(r, m_output, pitch); - if(s_dump) + if (s_dump) { - if(s_savef && s_n >= s_saven) + if (s_savef && s_n >= s_saven) { m_texture[i]->Save(m_dump_root + format("%05d_f%lld_fr%d_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), psm_str(DISPFB.PSM))); } @@ -193,7 +196,8 @@ GSTexture* GSRendererSW::GetFeedbackOutput() int dummy; // It is enough to emulate Xenosaga cutscene. (or any game that will do a basic loopback) - for (int i = 0; i < 2; i++) { + for (int i = 0; i < 2; i++) + { if (m_regs->EXTBUF.EXBP == m_regs->DISP[i].DISPFB.Block()) return GetOutput(i, dummy); } @@ -202,12 +206,12 @@ GSTexture* GSRendererSW::GetFeedbackOutput() } -template +template void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) { // FIXME q_div wasn't added to AVX2 code path. - #if 0//_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 // TODO: something isn't right here, this makes other functions slower (split load/store? old sse code in 3rd party lib?) @@ -273,13 +277,13 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* } } - #else - +#else + GSVector4i off = (GSVector4i)m_context->XYOFFSET; GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0); GSVector4i z_max = GSVector4i::xffffffff().srl32(GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8); - for(int i = (int)m_vertex.next; i > 0; i--, src++, dst++) + for (int i = (int)m_vertex.next; i > 0; i--, src++, dst++) { GSVector4 stcq = GSVector4::load(&src->m[0]); // s t rgba q @@ -293,16 +297,16 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* GSVector4 t = GSVector4::zero(); - if(tme) + if (tme) { - if(fst) + if (fst) { t = GSVector4(xyzuvf.uph16() << (16 - 4)); } - else if(q_div) + else if (q_div) { // Division is required if number are huge (Pro Soccer Club) - if(primclass == GS_SPRITE_CLASS && (i & 1) == 0) + if (primclass == GS_SPRITE_CLASS && (i & 1) == 0) { // q(n) isn't valid, you need to take q(n+1) const GSVertex* next = src + 1; @@ -320,7 +324,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* } } - if(primclass == GS_SPRITE_CLASS) + if (primclass == GS_SPRITE_CLASS) { xyzuvf = xyzuvf.min_u32(z_max); t = t.insert32<1, 3>(GSVector4::cast(xyzuvf)); @@ -328,14 +332,14 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* dst->t = t; - #if 0 //_M_SSE >= 0x501 +#if 0 //_M_SSE >= 0x501 dst->_pad = GSVector4::zero(); - #endif +#endif } - #endif +#endif } void GSRendererSW::Draw() @@ -367,43 +371,42 @@ void GSRendererSW::Draw() // points and lines may have zero area bbox (single line: 0, 0 - 256, 0) - if(m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) + if (m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) { - if(bbox.x == bbox.z) bbox.z++; - if(bbox.y == bbox.w) bbox.w++; + if (bbox.x == bbox.z) bbox.z++; + if (bbox.y == bbox.w) bbox.w++; } GSVector4i r = bbox.rintersect(scissor); scissor.z = std::min(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour - + sd->scissor = scissor; sd->bbox = bbox; sd->frame = m_perfmon.GetFrame(); - if(!GetScanlineGlobalData(sd)) + if (!GetScanlineGlobalData(sd)) { return; } - if(0) if(LOG) + if (0) if (LOG) { int n = GSUtil::GetVertexCount(PRIM->PRIM); - - for(uint32 i = 0, j = 0; i < m_index.tail; i += n, j++) + + for (uint32 i = 0, j = 0; i < m_index.tail; i += n, j++) { - for(int k = 0; k < n; k++) + for (int k = 0; k < n; k++) { GSVertex* v = &m_vertex.buff[m_index.buff[i + k]]; GSVertex* vn = &m_vertex.buff[m_index.buff[i + n - 1]]; - - fprintf(s_fp, "%d:%d %f %f %f %f\n", + + fprintf(s_fp, "%d:%d %f %f %f %f\n", j, k, (float)(v->XYZ.X - context->XYOFFSET.OFX) / 16, (float)(v->XYZ.Y - context->XYOFFSET.OFY) / 16, PRIM->FST ? (float)(v->U) / 16 : v->ST.S / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q), - PRIM->FST ? (float)(v->V) / 16 : v->ST.T / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q) - ); + PRIM->FST ? (float)(v->V) / 16 : v->ST.T / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q)); } } } @@ -415,26 +418,26 @@ void GSRendererSW::Draw() uint32* fb_pages = NULL; uint32* zb_pages = NULL; - if(sd->global.sel.fb) + if (sd->global.sel.fb) { fb_pages = m_context->offset.fb->GetPages(r); } - if(sd->global.sel.zb) + if (sd->global.sel.zb) { zb_pages = m_context->offset.zb->GetPages(r); } // check if there is an overlap between this and previous targets - if(CheckTargetPages(fb_pages, zb_pages, r)) + if (CheckTargetPages(fb_pages, zb_pages, r)) { sd->m_syncpoint = SharedData::SyncTarget; } // check if the texture is not part of a target currently in use - if(CheckSourcePages(sd)) + if (CheckSourcePages(sd)) { sd->m_syncpoint = SharedData::SyncSource; } @@ -445,7 +448,7 @@ void GSRendererSW::Draw() // - if(s_dump) + if (s_dump) { Sync(2); @@ -456,71 +459,74 @@ void GSRendererSW::Draw() std::string s; - if(s_n >= s_saven) + if (s_n >= s_saven) { // Dump Register state s = format("%05d_context.txt", s_n); - m_env.Dump(m_dump_root+s); - m_context->Dump(m_dump_root+s); + m_env.Dump(m_dump_root + s); + m_context->Dump(m_dump_root + s); } - if(s_savet && s_n >= s_saven && PRIM->TME) + if (s_savet && s_n >= s_saven && PRIM->TME) { - if (texture_shuffle) { + if (texture_shuffle) + { // Dump the RT in 32 bits format. It helps to debug texture shuffle effect s = format("%05d_f%lld_itexraw_%05x_32bits.bmp", s_n, frame, (int)m_context->TEX0.TBP0); - m_mem.SaveBMP(m_dump_root+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); + m_mem.SaveBMP(m_dump_root + s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); } s = format("%05d_f%lld_itexraw_%05x_%s.bmp", s_n, frame, (int)m_context->TEX0.TBP0, psm_str(m_context->TEX0.PSM)); - m_mem.SaveBMP(m_dump_root+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); + m_mem.SaveBMP(m_dump_root + s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); } - if(s_save && s_n >= s_saven) + if (s_save && s_n >= s_saven) { - if (texture_shuffle) { + if (texture_shuffle) + { // Dump the RT in 32 bits format. It helps to debug texture shuffle effect s = format("%05d_f%lld_rt0_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); - m_mem.SaveBMP(m_dump_root+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); } s = format("%05d_f%lld_rt0_%05x_%s.bmp", s_n, frame, m_context->FRAME.Block(), psm_str(m_context->FRAME.PSM)); - m_mem.SaveBMP(m_dump_root+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); } - if(s_savez && s_n >= s_saven) + if (s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_context->ZBUF.Block(), psm_str(m_context->ZBUF.PSM)); - m_mem.SaveBMP(m_dump_root+s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); } Queue(data); Sync(3); - if(s_save && s_n >= s_saven) + if (s_save && s_n >= s_saven) { - if (texture_shuffle) { + if (texture_shuffle) + { // Dump the RT in 32 bits format. It helps to debug texture shuffle effect s = format("%05d_f%lld_rt1_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); - m_mem.SaveBMP(m_dump_root+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); } s = format("%05d_f%lld_rt1_%05x_%s.bmp", s_n, frame, m_context->FRAME.Block(), psm_str(m_context->FRAME.PSM)); - m_mem.SaveBMP(m_dump_root+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); } - if(s_savez && s_n >= s_saven) + if (s_savez && s_n >= s_saven) { s = format("%05d_f%lld_rz1_%05x_%s.bmp", s_n, frame, m_context->ZBUF.Block(), psm_str(m_context->ZBUF.PSM)); - m_mem.SaveBMP(m_dump_root+s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); } - if(s_savel > 0 && (s_n - s_saven) > s_savel) + if (s_savel > 0 && (s_n - s_saven) > s_savel) { s_dump = 0; } @@ -546,7 +552,7 @@ void GSRendererSW::Queue(std::shared_ptr& item) { SharedData* sd = (SharedData*)item.get(); - if(sd->m_syncpoint == SharedData::SyncSource) + if (sd->m_syncpoint == SharedData::SyncSource) { Sync(4); } @@ -555,21 +561,21 @@ void GSRendererSW::Queue(std::shared_ptr& item) sd->UpdateSource(); - if(sd->m_syncpoint == SharedData::SyncTarget) + if (sd->m_syncpoint == SharedData::SyncTarget) { Sync(5); } - if(LOG) + if (LOG) { GSScanlineGlobalData& gd = ((SharedData*)item.get())->global; fprintf(s_fp, "[%d] queue %05x %d (%d) %05x %d (%d) %05x %d %dx%d (%d %d %d) | %u %d %d\n", sd->counter, - m_context->FRAME.Block(), m_context->FRAME.PSM, gd.sel.fwrite, + m_context->FRAME.Block(), m_context->FRAME.PSM, gd.sel.fwrite, m_context->ZBUF.Block(), m_context->ZBUF.PSM, gd.sel.zwrite, PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH, m_context->TEX0.CSM, m_context->TEX0.CPSM, m_context->TEX0.CSA, - PRIM->PRIM, sd->vertex_count, sd->index_count); + PRIM->PRIM, sd->vertex_count, sd->index_count); fflush(s_fp); } @@ -578,14 +584,14 @@ void GSRendererSW::Queue(std::shared_ptr& item) // invalidate new parts rendered onto - if(sd->global.sel.fwrite) + if (sd->global.sel.fwrite) { m_tc->InvalidatePages(sd->m_fb_pages, sd->m_fpsm); m_mem.m_clut.Invalidate(m_context->FRAME.Block()); } - if(sd->global.sel.zwrite) + if (sd->global.sel.zwrite) { m_tc->InvalidatePages(sd->m_zb_pages, sd->m_zpsm); } @@ -601,22 +607,22 @@ void GSRendererSW::Sync(int reason) m_rl->Sync(); - if(0) if(LOG) + if (0) if (LOG) { std::string s; - - if(s_save) + + if (s_save) { s = format("%05d_f%lld_rt1_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), m_context->FRAME.Block(), psm_str(m_context->FRAME.PSM)); - m_mem.SaveBMP(m_dump_root+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); } - if(s_savez) + if (s_savez) { s = format("%05d_f%lld_zb1_%05x_%s.bmp", s_n, m_perfmon.GetFrame(), m_context->ZBUF.Block(), psm_str(m_context->ZBUF.PSM)); - m_mem.SaveBMP(m_dump_root+s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); + m_mem.SaveBMP(m_dump_root + s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); } } @@ -624,26 +630,34 @@ void GSRendererSW::Sync(int reason) int pixels = m_rl->GetPixels(); - if(LOG) {fprintf(s_fp, "sync n=%d r=%d t=%llu p=%d %c\n", s_n, reason, t, pixels, t > 10000000 ? '*' : ' '); fflush(s_fp);} + if (LOG) + { + fprintf(s_fp, "sync n=%d r=%d t=%llu p=%d %c\n", s_n, reason, t, pixels, t > 10000000 ? '*' : ' '); + fflush(s_fp); + } m_perfmon.Put(GSPerfMon::Fillrate, pixels); } void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) { - if(LOG) {fprintf(s_fp, "w %05x %u %u, %d %d %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} - + if (LOG) + { + fprintf(s_fp, "w %05x %u %u, %d %d %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM, r.x, r.y, r.z, r.w); + fflush(s_fp); + } + GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); off->GetPages(r, m_tmp_pages); // check if the changing pages either used as a texture or a target - if(!m_rl->IsSynced()) + if (!m_rl->IsSynced()) { - for(uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++) + for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++) { - if(m_fzb_pages[*p] | m_tex_pages[*p]) + if (m_fzb_pages[*p] | m_tex_pages[*p]) { Sync(6); @@ -657,17 +671,21 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) { - if(LOG) {fprintf(s_fp, "%s %05x %u %u, %d %d %d %d\n", clut ? "rp" : "r", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); fflush(s_fp);} + if (LOG) + { + fprintf(s_fp, "%s %05x %u %u, %d %d %d %d\n", clut ? "rp" : "r", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); + fflush(s_fp); + } - if(!m_rl->IsSynced()) + if (!m_rl->IsSynced()) { GSOffset* off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); off->GetPages(r, m_tmp_pages); - for(uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++) + for (uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++) { - if(m_fzb_pages[*p]) + if (m_fzb_pages[*p]) { Sync(7); @@ -679,8 +697,10 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS void GSRendererSW::UsePages(const uint32* pages, const int type) { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - switch (type) { + for (const uint32* p = pages; *p != GSOffset::EOP; p++) + { + switch (type) + { case 0: ASSERT((m_fzb_pages[*p] & 0xFFFF) < USHRT_MAX); m_fzb_pages[*p] += 1; @@ -693,15 +713,18 @@ void GSRendererSW::UsePages(const uint32* pages, const int type) ASSERT(m_tex_pages[*p] < USHRT_MAX); m_tex_pages[*p] += 1; break; - default:break; + default: + break; } } } void GSRendererSW::ReleasePages(const uint32* pages, const int type) { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) { - switch (type) { + for (const uint32* p = pages; *p != GSOffset::EOP; p++) + { + switch (type) + { case 0: ASSERT((m_fzb_pages[*p] & 0xFFFF) > 0); m_fzb_pages[*p] -= 1; @@ -714,7 +737,8 @@ void GSRendererSW::ReleasePages(const uint32* pages, const int type) ASSERT(m_tex_pages[*p] > 0); m_tex_pages[*p] -= 1; break; - default:break; + default: + break; } } } @@ -728,21 +752,21 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag bool res = false; - if(m_fzb != m_context->offset.fzb4) + if (m_fzb != m_context->offset.fzb4) { // targets changed, check everything m_fzb = m_context->offset.fzb4; m_fzb_bbox = r; - if(fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r); - if(zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r); + if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r); + if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r); memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages)); uint32 used = 0; - for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++) + for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++) { uint32 i = *p; @@ -755,7 +779,7 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag used |= m_tex_pages[i]; } - for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++) + for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++) { uint32 i = *p; @@ -768,11 +792,15 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag used |= m_tex_pages[i]; } - if(!synced) + if (!synced) { - if(used) + if (used) { - if(LOG) {fprintf(s_fp, "syncpoint 0\n"); fflush(s_fp);} + if (LOG) + { + fprintf(s_fp, "syncpoint 0\n"); + fflush(s_fp); + } res = true; } @@ -790,23 +818,23 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag m_fzb_bbox = bbox; - if(check) + if (check) { // drawing area is larger than previous time, check new parts only to avoid false positives (m_fzb_cur_pages guards) - if(fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r); - if(zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r); + if (fb_pages == NULL) fb_pages = m_context->offset.fb->GetPages(r); + if (zb_pages == NULL) zb_pages = m_context->offset.zb->GetPages(r); uint32 used = 0; - for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++) + for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++) { uint32 i = *p; uint32 row = i >> 5; uint32 col = 1 << (i & 31); - - if((m_fzb_cur_pages[row] & col) == 0) + + if ((m_fzb_cur_pages[row] & col) == 0) { m_fzb_cur_pages[row] |= col; @@ -814,14 +842,14 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag } } - for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++) + for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++) { uint32 i = *p; uint32 row = i >> 5; uint32 col = 1 << (i & 31); - - if((m_fzb_cur_pages[row] & col) == 0) + + if ((m_fzb_cur_pages[row] & col) == 0) { m_fzb_cur_pages[row] |= col; @@ -829,29 +857,37 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag } } - if(!synced) + if (!synced) { - if(used) + if (used) { - if(LOG) {fprintf(s_fp, "syncpoint 1\n"); fflush(s_fp);} + if (LOG) + { + fprintf(s_fp, "syncpoint 1\n"); + fflush(s_fp); + } res = true; } } } - if(!synced) + if (!synced) { // chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue, // have to be careful when the two buffers are mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300) - if(fb && !res) + if (fb && !res) { - for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++) + for (const uint32* p = fb_pages; *p != GSOffset::EOP; p++) { - if(m_fzb_pages[*p] & 0xffff0000) + if (m_fzb_pages[*p] & 0xffff0000) { - if(LOG) {fprintf(s_fp, "syncpoint 2\n"); fflush(s_fp);} + if (LOG) + { + fprintf(s_fp, "syncpoint 2\n"); + fflush(s_fp); + } res = true; @@ -860,13 +896,17 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag } } - if(zb && !res) + if (zb && !res) { - for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++) + for (const uint32* p = zb_pages; *p != GSOffset::EOP; p++) { - if(m_fzb_pages[*p] & 0x0000ffff) + if (m_fzb_pages[*p] & 0x0000ffff) { - if(LOG) {fprintf(s_fp, "syncpoint 3\n"); fflush(s_fp);} + if (LOG) + { + fprintf(s_fp, "syncpoint 3\n"); + fflush(s_fp); + } res = true; @@ -877,27 +917,27 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag } } - if(!fb && fb_pages != NULL) delete [] fb_pages; - if(!zb && zb_pages != NULL) delete [] zb_pages; + if (!fb && fb_pages != NULL) delete[] fb_pages; + if (!zb && zb_pages != NULL) delete[] zb_pages; return res; } bool GSRendererSW::CheckSourcePages(SharedData* sd) { - if(!m_rl->IsSynced()) + if (!m_rl->IsSynced()) { - for(size_t i = 0; sd->m_tex[i].t != NULL; i++) + for (size_t i = 0; sd->m_tex[i].t != NULL; i++) { sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages); uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n; - for(const uint32* p = pages; *p != GSOffset::EOP; p++) + for (const uint32* p = pages; *p != GSOffset::EOP; p++) { // TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2) - if(m_fzb_pages[*p]) // currently being drawn to? => sync + if (m_fzb_pages[*p]) // currently being drawn to? => sync { return true; } @@ -939,39 +979,39 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) uint32 fm = context->FRAME.FBMSK; uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; - if(context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) + if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER) { fm = 0xffffffff; zm = 0xffffffff; } - if(PRIM->TME) + if (PRIM->TME) { - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) + if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } } - if(context->TEST.ATE) + if (context->TEST.ATE) { - if(!TryAlphaTest(fm, zm)) + if (!TryAlphaTest(fm, zm)) { gd.sel.atst = context->TEST.ATST; gd.sel.afail = context->TEST.AFAIL; gd.aref = GSVector4i((int)context->TEST.AREF); - switch(gd.sel.atst) + switch (gd.sel.atst) { - case ATST_LESS: - gd.sel.atst = ATST_LEQUAL; - gd.aref -= GSVector4i::x00000001(); - break; - case ATST_GREATER: - gd.sel.atst = ATST_GEQUAL; - gd.aref += GSVector4i::x00000001(); - break; + case ATST_LESS: + gd.sel.atst = ATST_LEQUAL; + gd.aref -= GSVector4i::x00000001(); + break; + case ATST_GREATER: + gd.sel.atst = ATST_GEQUAL; + gd.aref += GSVector4i::x00000001(); + break; } } } @@ -987,28 +1027,29 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) zwrite || ztest ? m_context->ZBUF.Block() : 0xfffff, m_context->ZBUF.PSM, PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH); */ - if(!fwrite && !zwrite) return false; + if (!fwrite && !zwrite) + return false; gd.sel.fwrite = fwrite; gd.sel.ftest = ftest; - if(fwrite || ftest) + if (fwrite || ftest) { gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; - if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) + if ((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) { gd.sel.iip = PRIM->IIP; } - if(PRIM->TME) + if (PRIM->TME) { gd.sel.tfx = context->TEX0.TFX; gd.sel.tcc = context->TEX0.TCC; gd.sel.fst = PRIM->FST; gd.sel.ltf = m_vt.IsLinear(); - if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) + if (GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { gd.sel.tlu = 1; @@ -1020,7 +1061,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.sel.wms = context->CLAMP.WMS; gd.sel.wmt = context->CLAMP.WMT; - if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) + if (gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) { // modulate does not do anything when vertex color is 0x80 @@ -1037,13 +1078,17 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); - if(t == NULL) {ASSERT(0); return false;} + if (t == NULL) + { + ASSERT(0); + return false; + } data->SetSource(t, r, 0); gd.sel.tw = t->m_tw - 3; - if(mipmap) + if (mipmap) { // TEX1.MMIN // 000 p @@ -1053,7 +1098,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) // 100 l round // 101 l tri - if(m_vt.m_lod.x > 0) + if (m_vt.m_lod.x > 0) { gd.sel.ltf = context->TEX1.MMIN >> 2; } @@ -1068,20 +1113,20 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) int mxl = std::min((int)context->TEX1.MXL, 6) << 16; int k = context->TEX1.K << 12; - if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL) + if ((int)m_vt.m_lod.x >= (int)context->TEX1.MXL) { k = (int)m_vt.m_lod.x << 16; // set lod to max level - gd.sel.lcm = 1; // lod is constant + gd.sel.lcm = 1; // lod is constant gd.sel.mmin = 1; // tri-linear is meaningless } - if(gd.sel.mmin == 2) + if (gd.sel.mmin == 2) { mxl--; // don't sample beyond the last level (TODO: add a dummy level instead?) } - if(gd.sel.fst) + if (gd.sel.fst) { ASSERT(gd.sel.lcm == 1); ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) @@ -1089,11 +1134,11 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.sel.lcm = 1; } - if(gd.sel.lcm) + if (gd.sel.lcm) { int lod = std::max(std::min(k, mxl), 0); - if(gd.sel.mmin == 1) + if (gd.sel.mmin == 1) { lod = (lod + 0x8000) & 0xffff0000; // rounding } @@ -1117,7 +1162,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) static int s_counter = 0; - for(int i = 1, j = std::min((int)context->TEX1.MXL, 6); i <= j; i++) + for (int i = 1, j = std::min((int)context->TEX1.MXL, 6); i <= j; i++) { const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(i); @@ -1131,7 +1176,11 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3); - if(t == NULL) {ASSERT(0); return false;} + if (t == NULL) + { + ASSERT(0); + return false; + } GSVector4i r; @@ -1152,7 +1201,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) // Note: the 'q' division was done in GSRendererSW::ConvertVertexBuffer gd.sel.fst |= (m_vt.m_eq.q || primclass == GS_SPRITE_CLASS); - if(gd.sel.ltf && gd.sel.fst) + if (gd.sel.ltf && gd.sel.fst) { // if q is constant we can do the half pel shift for bilinear sampling on the vertices @@ -1162,7 +1211,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GSVertexSW* RESTRICT v = data->vertex; - for(int i = 0, j = data->vertex_count; i < j; i++) + for (int i = 0, j = data->vertex_count; i < j; i++) { GSVector4 t = v[i].t; @@ -1174,56 +1223,56 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) uint16 tw = 1u << TEX0.TW; uint16 th = 1u << TEX0.TH; - switch(context->CLAMP.WMS) + switch (context->CLAMP.WMS) { - case CLAMP_REPEAT: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = tw - 1; - gd.t.max.u16[0] = gd.t.minmax.u16[2] = 0; - gd.t.mask.u32[0] = 0xffffffff; - break; - case CLAMP_CLAMP: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = 0; - gd.t.max.u16[0] = gd.t.minmax.u16[2] = tw - 1; - gd.t.mask.u32[0] = 0; - break; - case CLAMP_REGION_CLAMP: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = std::min(context->CLAMP.MINU, tw - 1); - gd.t.max.u16[0] = gd.t.minmax.u16[2] = std::min(context->CLAMP.MAXU, tw - 1); - gd.t.mask.u32[0] = 0; - break; - case CLAMP_REGION_REPEAT: - gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU & (tw - 1); - gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU & (tw - 1); - gd.t.mask.u32[0] = 0xffffffff; - break; - default: - __assume(0); + case CLAMP_REPEAT: + gd.t.min.u16[0] = gd.t.minmax.u16[0] = tw - 1; + gd.t.max.u16[0] = gd.t.minmax.u16[2] = 0; + gd.t.mask.u32[0] = 0xffffffff; + break; + case CLAMP_CLAMP: + gd.t.min.u16[0] = gd.t.minmax.u16[0] = 0; + gd.t.max.u16[0] = gd.t.minmax.u16[2] = tw - 1; + gd.t.mask.u32[0] = 0; + break; + case CLAMP_REGION_CLAMP: + gd.t.min.u16[0] = gd.t.minmax.u16[0] = std::min(context->CLAMP.MINU, tw - 1); + gd.t.max.u16[0] = gd.t.minmax.u16[2] = std::min(context->CLAMP.MAXU, tw - 1); + gd.t.mask.u32[0] = 0; + break; + case CLAMP_REGION_REPEAT: + gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU & (tw - 1); + gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU & (tw - 1); + gd.t.mask.u32[0] = 0xffffffff; + break; + default: + __assume(0); } - switch(context->CLAMP.WMT) + switch (context->CLAMP.WMT) { - case CLAMP_REPEAT: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = th - 1; - gd.t.max.u16[4] = gd.t.minmax.u16[3] = 0; - gd.t.mask.u32[2] = 0xffffffff; - break; - case CLAMP_CLAMP: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = 0; - gd.t.max.u16[4] = gd.t.minmax.u16[3] = th - 1; - gd.t.mask.u32[2] = 0; - break; - case CLAMP_REGION_CLAMP: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = std::min(context->CLAMP.MINV, th - 1); - gd.t.max.u16[4] = gd.t.minmax.u16[3] = std::min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) - gd.t.mask.u32[2] = 0; - break; - case CLAMP_REGION_REPEAT: - gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127 - gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV & (th - 1); - gd.t.mask.u32[2] = 0xffffffff; - break; - default: - __assume(0); + case CLAMP_REPEAT: + gd.t.min.u16[4] = gd.t.minmax.u16[1] = th - 1; + gd.t.max.u16[4] = gd.t.minmax.u16[3] = 0; + gd.t.mask.u32[2] = 0xffffffff; + break; + case CLAMP_CLAMP: + gd.t.min.u16[4] = gd.t.minmax.u16[1] = 0; + gd.t.max.u16[4] = gd.t.minmax.u16[3] = th - 1; + gd.t.mask.u32[2] = 0; + break; + case CLAMP_REGION_CLAMP: + gd.t.min.u16[4] = gd.t.minmax.u16[1] = std::min(context->CLAMP.MINV, th - 1); + gd.t.max.u16[4] = gd.t.minmax.u16[3] = std::min(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256) + gd.t.mask.u32[2] = 0; + break; + case CLAMP_REGION_REPEAT: + gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127 + gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV & (th - 1); + gd.t.mask.u32[2] = 0xffffffff; + break; + default: + __assume(0); } gd.t.min = gd.t.min.xxxxlh(); @@ -1232,7 +1281,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.t.invmask = ~gd.t.mask; } - if(PRIM->FGE) + if (PRIM->FGE) { gd.sel.fge = 1; @@ -1240,23 +1289,23 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.fga = (env.FOGCOL.u32[0] >> 8) & 0x00ff00ff; } - if(context->FRAME.PSM != PSM_PSMCT24) + if (context->FRAME.PSM != PSM_PSMCT24) { gd.sel.date = context->TEST.DATE; gd.sel.datm = context->TEST.DATM; } - if(!IsOpaque()) + if (!IsOpaque()) { gd.sel.abe = PRIM->ABE; gd.sel.ababcd = context->ALPHA.u32[0]; - if(env.PABE.PABE) + if (env.PABE.PABE) { gd.sel.pabe = 1; } - if(m_aa1 && PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS)) + if (m_aa1 && PRIM->AA1 && (primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS)) { gd.sel.aa1 = 1; } @@ -1264,12 +1313,12 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.afix = GSVector4i((int)context->ALPHA.FIX << 7).xxzzlh(); } - if(gd.sel.date - || gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1 - || gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY - || gd.sel.fpsm == 0 && fm != 0 && fm != 0xffffffff - || gd.sel.fpsm == 1 && (fm & 0x00ffffff) != 0 && (fm & 0x00ffffff) != 0x00ffffff - || gd.sel.fpsm == 2 && (fm & 0x80f8f8f8) != 0 && (fm & 0x80f8f8f8) != 0x80f8f8f8) + if (gd.sel.date + || gd.sel.aba == 1 || gd.sel.abb == 1 || gd.sel.abc == 1 || gd.sel.abd == 1 + || gd.sel.atst != ATST_ALWAYS && gd.sel.afail == AFAIL_RGB_ONLY + || gd.sel.fpsm == 0 && fm != 0 && fm != 0xffffffff + || gd.sel.fpsm == 1 && (fm & 0x00ffffff) != 0 && (fm & 0x00ffffff) != 0x00ffffff + || gd.sel.fpsm == 2 && (fm & 0x80f8f8f8) != 0 && (fm & 0x80f8f8f8) != 0x80f8f8f8) { gd.sel.rfb = 1; } @@ -1277,7 +1326,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.sel.colclamp = env.COLCLAMP.CLAMP; gd.sel.fba = context->FBA.FBA; - if(env.DTHE.DTHE) + if (env.DTHE.DTHE) { gd.sel.dthe = 1; @@ -1290,7 +1339,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.sel.zwrite = zwrite; gd.sel.ztest = ztest; - if(zwrite || ztest) + if (zwrite || ztest) { uint32_t z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8); @@ -1300,16 +1349,16 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.sel.zclamp = (uint32)GSVector4i(m_vt.m_max.p).z > z_max; } - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 gd.fm = fm; gd.zm = zm; - if(gd.sel.fpsm == 1) + if (gd.sel.fpsm == 1) { gd.fm |= 0xff000000; } - else if(gd.sel.fpsm == 2) + else if (gd.sel.fpsm == 2) { uint32 rb = gd.fm & 0x00f800f8; uint32 ga = gd.fm & 0x8000f800; @@ -1317,25 +1366,25 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000; } - if(gd.sel.zpsm == 1) + if (gd.sel.zpsm == 1) { gd.zm |= 0xff000000; } - else if(gd.sel.zpsm == 2) + else if (gd.sel.zpsm == 2) { gd.zm |= 0xffff0000; } - #else +#else gd.fm = GSVector4i(fm); gd.zm = GSVector4i(zm); - if(gd.sel.fpsm == 1) + if (gd.sel.fpsm == 1) { gd.fm |= GSVector4i::xff000000(); } - else if(gd.sel.fpsm == 2) + else if (gd.sel.fpsm == 2) { GSVector4i rb = gd.fm & 0x00f800f8; GSVector4i ga = gd.fm & 0x8000f800; @@ -1343,33 +1392,33 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | GSVector4i::xffff0000(); } - if(gd.sel.zpsm == 1) + if (gd.sel.zpsm == 1) { gd.zm |= GSVector4i::xff000000(); } - else if(gd.sel.zpsm == 2) + else if (gd.sel.zpsm == 2) { gd.zm |= GSVector4i::xffff0000(); } - #endif +#endif - if(gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor))) // TODO: check scissor horizontally only + if (gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor))) // TODO: check scissor horizontally only { gd.sel.notest = 1; uint32 ofx = context->XYOFFSET.OFX; - for(int i = 0, j = m_vertex.tail; i < j; i++) + for (int i = 0, j = m_vertex.tail; i < j; i++) { - #if _M_SSE >= 0x501 - if((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 - #else - if((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 - #endif +#if _M_SSE >= 0x501 + if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 7) // aligned to 8 +#else + if ((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4 +#endif { gd.sel.notest = 0; - + break; } } @@ -1399,38 +1448,43 @@ GSRendererSW::SharedData::~SharedData() { ReleasePages(); - if(global.clut) _aligned_free(global.clut); - if(global.dimx) _aligned_free(global.dimx); + if (global.clut) + _aligned_free(global.clut); + if (global.dimx) + _aligned_free(global.dimx); - if(LOG) {fprintf(s_fp, "[%d] done t=%lld p=%d | %d %d %d | %08x_%08x\n", - counter, - __rdtsc() - start, pixels, - primclass, vertex_count, index_count, - global.sel.hi, global.sel.lo - ); - fflush(s_fp);} + if (LOG) + { + fprintf(s_fp, "[%d] done t=%lld p=%d | %d %d %d | %08x_%08x\n", + counter, + __rdtsc() - start, pixels, + primclass, vertex_count, index_count, + global.sel.hi, global.sel.lo); + fflush(s_fp); + } } //static TransactionScope::Lock s_lock; void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm) { - if(m_using_pages) return; + if (m_using_pages) + return; { //TransactionScope scope(s_lock); - if(global.sel.fb && fb_pages != NULL) + if (global.sel.fb && fb_pages != NULL) { m_parent->UsePages(fb_pages, 0); } - if(global.sel.zb && zb_pages != NULL) + if (global.sel.zb && zb_pages != NULL) { m_parent->UsePages(zb_pages, 1); } - for(size_t i = 0; m_tex[i].t != NULL; i++) + for (size_t i = 0; m_tex[i].t != NULL; i++) { m_parent->UsePages(m_tex[i].t->m_pages.n, 2); } @@ -1446,29 +1500,30 @@ void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const void GSRendererSW::SharedData::ReleasePages() { - if(!m_using_pages) return; + if (!m_using_pages) + return; { //TransactionScope scope(s_lock); - if(global.sel.fb) + if (global.sel.fb) { m_parent->ReleasePages(m_fb_pages, 0); } - if(global.sel.zb) + if (global.sel.zb) { m_parent->ReleasePages(m_zb_pages, 1); } - for(size_t i = 0; m_tex[i].t != NULL; i++) + for (size_t i = 0; m_tex[i].t != NULL; i++) { m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2); } } - delete [] m_fb_pages; - delete [] m_zb_pages; + delete[] m_fb_pages; + delete[] m_zb_pages; m_fb_pages = NULL; m_zb_pages = NULL; @@ -1488,9 +1543,9 @@ void GSRendererSW::SharedData::SetSource(GSTextureCacheSW::Texture* t, const GSV void GSRendererSW::SharedData::UpdateSource() { - for(size_t i = 0; m_tex[i].t != NULL; i++) + for (size_t i = 0; m_tex[i].t != NULL; i++) { - if(m_tex[i].t->Update(m_tex[i].r)) + if (m_tex[i].t->Update(m_tex[i].r)) { global.tex[i] = m_tex[i].t->m_buff; } @@ -1503,25 +1558,25 @@ void GSRendererSW::SharedData::UpdateSource() } // TODO - - if(m_parent->s_dump) + + if (m_parent->s_dump) { uint64 frame = m_parent->m_perfmon.GetFrame(); std::string s; - if(m_parent->s_savet && m_parent->s_n >= m_parent->s_saven) + if (m_parent->s_savet && m_parent->s_n >= m_parent->s_saven) { - for(size_t i = 0; m_tex[i].t != NULL; i++) + for (size_t i = 0; m_tex[i].t != NULL; i++) { const GIFRegTEX0& TEX0 = m_parent->GetTex0Layer(i); s = format("%05d_f%lld_itex%d_%05x_%s.bmp", m_parent->s_n, frame, i, TEX0.TBP0, psm_str(TEX0.PSM)); - m_tex[i].t->Save(root_sw+s); + m_tex[i].t->Save(root_sw + s); } - if(global.clut != NULL) + if (global.clut != NULL) { GSTextureSW* t = new GSTextureSW(0, 256, 1); @@ -1529,7 +1584,7 @@ void GSRendererSW::SharedData::UpdateSource() s = format("%05d_f%lld_itexp_%05x_%s.bmp", m_parent->s_n, frame, (int)m_parent->m_context->TEX0.CBP, psm_str(m_parent->m_context->TEX0.CPSM)); - t->Save(root_sw+s); + t->Save(root_sw + s); delete t; } diff --git a/plugins/GSdx/Renderers/SW/GSRendererSW.h b/plugins/GSdx/Renderers/SW/GSRendererSW.h index 2cd61076d0..edfe2532c0 100644 --- a/plugins/GSdx/Renderers/SW/GSRendererSW.h +++ b/plugins/GSdx/Renderers/SW/GSRendererSW.h @@ -35,7 +35,7 @@ class GSRendererSW : public GSRenderer { struct alignas(16) TextureLevel { - GSVector4i r; + GSVector4i r; GSTextureCacheSW::Texture* t; }; @@ -47,7 +47,12 @@ class GSRendererSW : public GSRenderer int m_zpsm; bool m_using_pages; TextureLevel m_tex[7 + 1]; // NULL terminated - enum {SyncNone, SyncSource, SyncTarget} m_syncpoint; + enum + { + SyncNone, + SyncSource, + SyncTarget + } m_syncpoint; public: SharedData(GSRendererSW* parent); @@ -64,7 +69,7 @@ class GSRendererSW : public GSRenderer ConvertVertexBufferPtr m_cvb[4][2][2][2]; - template + template void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); protected: diff --git a/plugins/GSdx/Renderers/SW/GSScanlineEnvironment.h b/plugins/GSdx/Renderers/SW/GSScanlineEnvironment.h index 5c68bc7365..1d10eba496 100644 --- a/plugins/GSdx/Renderers/SW/GSScanlineEnvironment.h +++ b/plugins/GSdx/Renderers/SW/GSScanlineEnvironment.h @@ -28,61 +28,61 @@ union GSScanlineSelector { struct { - uint32 fpsm:2; // 0 - uint32 zpsm:2; // 2 - uint32 ztst:2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g)) - uint32 atst:3; // 6 - uint32 afail:2; // 9 - uint32 iip:1; // 11 - uint32 tfx:3; // 12 - uint32 tcc:1; // 15 - uint32 fst:1; // 16 - uint32 ltf:1; // 17 - uint32 tlu:1; // 18 - uint32 fge:1; // 19 - uint32 date:1; // 20 - uint32 abe:1; // 21 - uint32 aba:2; // 22 - uint32 abb:2; // 24 - uint32 abc:2; // 26 - uint32 abd:2; // 28 - uint32 pabe:1; // 30 - uint32 aa1:1; // 31 + uint32 fpsm : 2; // 0 + uint32 zpsm : 2; // 2 + uint32 ztst : 2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g)) + uint32 atst : 3; // 6 + uint32 afail : 2; // 9 + uint32 iip : 1; // 11 + uint32 tfx : 3; // 12 + uint32 tcc : 1; // 15 + uint32 fst : 1; // 16 + uint32 ltf : 1; // 17 + uint32 tlu : 1; // 18 + uint32 fge : 1; // 19 + uint32 date : 1; // 20 + uint32 abe : 1; // 21 + uint32 aba : 2; // 22 + uint32 abb : 2; // 24 + uint32 abc : 2; // 26 + uint32 abd : 2; // 28 + uint32 pabe : 1; // 30 + uint32 aa1 : 1; // 31 - uint32 fwrite:1; // 32 - uint32 ftest:1; // 33 - uint32 rfb:1; // 34 - uint32 zwrite:1; // 35 - uint32 ztest:1; // 36 - uint32 zoverflow:1; // 37 (z max >= 0x80000000) - uint32 zclamp:1; // 38 - uint32 wms:2; // 39 - uint32 wmt:2; // 41 - uint32 datm:1; // 43 - uint32 colclamp:1; // 44 - uint32 fba:1; // 45 - uint32 dthe:1; // 46 - uint32 prim:2; // 47 + uint32 fwrite : 1; // 32 + uint32 ftest : 1; // 33 + uint32 rfb : 1; // 34 + uint32 zwrite : 1; // 35 + uint32 ztest : 1; // 36 + uint32 zoverflow : 1; // 37 (z max >= 0x80000000) + uint32 zclamp : 1; // 38 + uint32 wms : 2; // 39 + uint32 wmt : 2; // 41 + uint32 datm : 1; // 43 + uint32 colclamp : 1; // 44 + uint32 fba : 1; // 45 + uint32 dthe : 1; // 46 + uint32 prim : 2; // 47 - uint32 edge:1; // 49 - uint32 tw:3; // 50 (encodes values between 3 -> 10, texture cache makes sure it is at least 3) - uint32 lcm:1; // 53 - uint32 mmin:2; // 54 - uint32 notest:1; // 55 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels) + uint32 edge : 1; // 49 + uint32 tw : 3; // 50 (encodes values between 3 -> 10, texture cache makes sure it is at least 3) + uint32 lcm : 1; // 53 + uint32 mmin : 2; // 54 + uint32 notest : 1; // 55 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels) // TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction - uint32 breakpoint:1; // Insert a trap to stop the program, helpful to stop debugger on a program + uint32 breakpoint : 1; // Insert a trap to stop the program, helpful to stop debugger on a program }; struct { - uint32 _pad1:22; - uint32 ababcd:8; - uint32 _pad2:2; + uint32 _pad1 : 22; + uint32 ababcd : 8; + uint32 _pad2 : 2; - uint32 fb:2; - uint32 _pad3:1; - uint32 zb:2; + uint32 fb : 2; + uint32 _pad3 : 1; + uint32 zb : 2; }; struct @@ -94,33 +94,29 @@ union GSScanlineSelector uint64 key; GSScanlineSelector() = default; - GSScanlineSelector(uint64 k) : key(k) {} + GSScanlineSelector(uint64 k) + : key(k) + { + } - operator uint32() const {return lo;} - operator uint64() const {return key;} + operator uint32() const { return lo; } + operator uint64() const { return key; } bool IsSolidRect() const { - return prim == GS_SPRITE_CLASS - && iip == 0 - && tfx == TFX_NONE - && abe == 0 - && ztst <= 1 - && atst <= 1 - && date == 0 - && fge == 0; + return prim == GS_SPRITE_CLASS && iip == 0 && tfx == TFX_NONE && abe == 0 && ztst <= 1 && atst <= 1 && date == 0 && fge == 0; } void Print() const { fprintf(stderr, "fpsm:%d zpsm:%d ztst:%d ztest:%d atst:%d afail:%d iip:%d rfb:%d fb:%d zb:%d zw:%d " - "tfx:%d tcc:%d fst:%d ltf:%d tlu:%d wms:%d wmt:%d mmin:%d lcm:%d tw:%d " - "fba:%d cclamp:%d date:%d datm:%d " - "prim:%d abe:%d %d%d%d%d fge:%d dthe:%d notest:%d\n", - fpsm, zpsm, ztst, ztest, atst, afail, iip, rfb, fb, zb, zwrite, - tfx, tcc, fst, ltf, tlu, wms, wmt, mmin, lcm, tw, - fba, colclamp, date, datm, - prim, abe, aba, abb, abc, abd , fge, dthe, notest); + "tfx:%d tcc:%d fst:%d ltf:%d tlu:%d wms:%d wmt:%d mmin:%d lcm:%d tw:%d " + "fba:%d cclamp:%d date:%d datm:%d " + "prim:%d abe:%d %d%d%d%d fge:%d dthe:%d notest:%d\n", + fpsm, zpsm, ztst, ztest, atst, afail, iip, rfb, fb, zb, zwrite, + tfx, tcc, fst, ltf, tlu, wms, wmt, mmin, lcm, tw, + fba, colclamp, date, datm, + prim, abe, aba, abb, abc, abd, fge, dthe, notest); } }; @@ -146,37 +142,37 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p GSVector4i aref; GSVector4i afix; - struct {GSVector4i min, max, minmax, mask, invmask;} t; // [u] x 4 [v] x 4 + struct { GSVector4i min, max, minmax, mask, invmask; } t; // [u] x 4 [v] x 4 - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 uint32 fm, zm; uint32 frb, fga; GSVector8 mxl; GSVector8 k; // TEX1.K * 0x10000 GSVector8 l; // TEX1.L * -0x10000 - struct {GSVector8i i, f;} lod; // lcm == 1 + struct { GSVector8i i, f; } lod; // lcm == 1 - #else +#else GSVector4i fm, zm; GSVector4i frb, fga; GSVector4 mxl; GSVector4 k; // TEX1.K * 0x10000 GSVector4 l; // TEX1.L * -0x10000 - struct {GSVector4i i, f;} lod; // lcm == 1 + struct { GSVector4i i, f; } lod; // lcm == 1 - #endif +#endif }; struct alignas(32) GSScanlineLocalData // per prim variables, each thread has its own { - #if _M_SSE >= 0x501 +#if _M_SSE >= 0x501 - struct skip {GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad;} d[8]; - struct step {GSVector4 stq; struct {uint32 rb, ga;} c; struct {uint32 z, f;} p;} d8; - struct {GSVector8i rb, ga;} c; - struct {uint32 z, f;} p; + struct skip { GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad; } d[8]; + struct step { GSVector4 stq; struct { uint32 rb, ga; } c; struct { uint32 z, f; } p; } d8; + struct { GSVector8i rb, ga; } c; + struct { uint32 z, f; } p; // these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack) @@ -192,19 +188,19 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it // mipmapping - struct {GSVector8i i, f;} lod; + struct { GSVector8i i, f; } lod; GSVector8i uv[2]; GSVector8i uv_minmax[2]; GSVector8i trb, tga; GSVector8i test; } temp; - #else +#else - struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4]; - struct step {GSVector4 z, stq; GSVector4i c, f;} d4; - struct {GSVector4i rb, ga;} c; - struct {GSVector4i z, f;} p; + struct skip { GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad; } d[4]; + struct step { GSVector4 z, stq; GSVector4i c, f; } d4; + struct { GSVector4i rb, ga; } c; + struct { GSVector4i z, f; } p; // these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack) @@ -220,14 +216,14 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it // mipmapping - struct {GSVector4i i, f;} lod; + struct { GSVector4i i, f; } lod; GSVector4i uv[2]; GSVector4i uv_minmax[2]; GSVector4i trb, tga; GSVector4i test; } temp; - #endif +#endif // @@ -277,14 +273,14 @@ struct GSScanlineConstantData : public GSAlignedClass<32> }; uint32 I_hate_vs2013_m_test_128b[8][4] = { - { 0x00000000, 0x00000000, 0x00000000, 0x00000000}, - { 0xffffffff, 0x00000000, 0x00000000, 0x00000000}, - { 0xffffffff, 0xffffffff, 0x00000000, 0x00000000}, - { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000}, - { 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}, - { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff}, - { 0x00000000, 0x00000000, 0x00000000, 0xffffffff}, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000} + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0xffffffff, 0x00000000, 0x00000000, 0x00000000}, + {0xffffffff, 0xffffffff, 0x00000000, 0x00000000}, + {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000}, + {0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}, + {0x00000000, 0x00000000, 0xffffffff, 0xffffffff}, + {0x00000000, 0x00000000, 0x00000000, 0xffffffff}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000} }; float I_hate_vs2013_m_shift_256b[9][8] = { @@ -319,14 +315,15 @@ struct GSScanlineConstantData : public GSAlignedClass<32> 1.0f }; - for (size_t n = 0; n < countof(log2_coef); ++n) { - for (size_t i = 0; i < 4; ++i) { + for (size_t n = 0; n < countof(log2_coef); ++n) + { + for (size_t i = 0; i < 4; ++i) + { m_log2_coef_128b[n][i] = log2_coef[n]; m_log2_coef_256b[n][i] = log2_coef[n]; - m_log2_coef_256b[n][i+4] = log2_coef[n]; + m_log2_coef_256b[n][i + 4] = log2_coef[n]; } } - } }; diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.cpp b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.cpp index 7ce66f2599..56544f80a9 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.cpp @@ -36,16 +36,19 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0; m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0; - try { + try + { #if _M_SSE >= 0x501 Generate_AVX2(); #else - if(m_cpu.has(util::Cpu::tAVX)) + if (m_cpu.has(util::Cpu::tAVX)) Generate_AVX(); else Generate_SSE(); #endif - } catch (std::exception& e) { + } + catch (std::exception& e) + { fprintf(stderr, "ERR:GSSetupPrimCodeGenerator %s\n", e.what()); } } diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.h b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.h index 0f288eed05..59c03d934b 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.h +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.h @@ -27,13 +27,16 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator { - void operator = (const GSSetupPrimCodeGenerator&); + void operator=(const GSSetupPrimCodeGenerator&); GSScanlineSelector m_sel; GSScanlineLocalData& m_local; bool m_rip; - struct {uint32 z:1, f:1, t:1, c:1;} m_en; + struct + { + uint32 z : 1, f : 1, t : 1, c : 1; + } m_en; #if _M_SSE < 0x501 void Generate_SSE(); diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp index f41568b3b9..ca418e9436 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp @@ -46,11 +46,11 @@ void GSSetupPrimCodeGenerator::Generate_AVX() if (!m_rip) mov(t0, (size_t)&m_local); - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) + if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { mov(rax, (size_t)g_const->m_shift_128b); - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) + for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) { vmovaps(Xmm(3 + i), ptr[rax + i * 16]); } @@ -74,18 +74,18 @@ void GSSetupPrimCodeGenerator::Generate_AVX() void GSSetupPrimCodeGenerator::Depth_AVX() { - if(!m_en.z && !m_en.f) + if (!m_en.z && !m_en.f) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4 p = dscan.p; vmovaps(xmm0, ptr[a2 + offsetof(GSVertexSW, p)]); - if(m_en.f) + if (m_en.f) { // GSVector4 df = p.wwww(); @@ -99,7 +99,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vmovdqa(_rip_local(d4.f), xmm2); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); @@ -113,7 +113,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() } } - if(m_en.z) + if (m_en.z) { // GSVector4 dz = p.zzzz(); @@ -124,7 +124,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() vmulps(xmm1, xmm0, xmm3); vmovdqa(_rip_local(d4.z), xmm1); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].z = dz * m_shift[i]; @@ -143,7 +143,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() shl(eax, 6); // * sizeof(GSVertexSW) add(rax, a0); - if(m_en.f) + if (m_en.f) { // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); vmovaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]); @@ -154,7 +154,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() vmovdqa(_rip_local(p.f), xmm1); } - if(m_en.z) + if (m_en.z) { // uint32 z is bypassed in t.w @@ -167,7 +167,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() void GSSetupPrimCodeGenerator::Texture_AVX() { - if(!m_en.t) + if (!m_en.t) { return; } @@ -178,7 +178,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX() vmulps(xmm1, xmm0, xmm3); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d4.stq = GSVector4i(t * 4.0f); @@ -193,7 +193,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX() vmovaps(_rip_local(d4.stq), xmm1); } - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector4 ds = t.xxxx(); // GSVector4 dt = t.yyyy(); @@ -201,13 +201,13 @@ void GSSetupPrimCodeGenerator::Texture_AVX() vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4 v = ds/dt * m_shift[i]; vmulps(xmm2, xmm1, Xmm(4 + i)); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d[i].s/t = GSVector4i(v); @@ -216,10 +216,10 @@ void GSSetupPrimCodeGenerator::Texture_AVX() const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - switch(j) + switch (j) { - case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), xmm2); break; - case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), xmm2); break; + case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), xmm2); break; + case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), xmm2); break; } } else @@ -230,11 +230,11 @@ void GSSetupPrimCodeGenerator::Texture_AVX() const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - switch(j) + switch (j) { - case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), xmm2); break; - case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), xmm2); break; - case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), xmm2); break; + case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), xmm2); break; + case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), xmm2); break; + case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), xmm2); break; } } } @@ -243,12 +243,12 @@ void GSSetupPrimCodeGenerator::Texture_AVX() void GSSetupPrimCodeGenerator::Color_AVX() { - if(!m_en.c) + if (!m_en.c) { return; } - if(m_sel.iip) + if (m_sel.iip) { // GSVector4 c = dscan.c; @@ -270,7 +270,7 @@ void GSSetupPrimCodeGenerator::Color_AVX() vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); @@ -302,7 +302,7 @@ void GSSetupPrimCodeGenerator::Color_AVX() vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); @@ -330,15 +330,15 @@ void GSSetupPrimCodeGenerator::Color_AVX() int last = 0; - switch(m_sel.prim) + switch (m_sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_SPRITE_CLASS: last = 1; break; } - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() + if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() { mov(eax, ptr[a1 + sizeof(uint32) * last]); shl(eax, 6); // * sizeof(GSVertexSW) @@ -354,7 +354,7 @@ void GSSetupPrimCodeGenerator::Color_AVX() // if(!tme) c = c.srl16(7); - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vpsrlw(xmm0, 7); } diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp index da3efa89b2..5823a7304e 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp @@ -31,7 +31,7 @@ using namespace Xbyak; #define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offsetof(GSScanlineLocalData, field)]) #define _rip_local_v(field, offset) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offset]) -#define _m_shift(i) (Ymm(7+i)) +#define _m_shift(i) (Ymm(7 + i)) // FIXME windows ? #define _vertex rcx @@ -51,11 +51,11 @@ void GSSetupPrimCodeGenerator::Generate_AVX2() if (!m_rip) mov(t0, (size_t)&m_local); - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) + if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { mov(rax, (size_t)g_const->m_shift_256b); - for(int i = 0; i < (m_sel.notest ? 2 : 9); i++) + for (int i = 0; i < (m_sel.notest ? 2 : 9); i++) { vmovaps(_m_shift(i), ptr[rax + i * 32]); } @@ -80,12 +80,12 @@ void GSSetupPrimCodeGenerator::Generate_AVX2() void GSSetupPrimCodeGenerator::Depth_AVX2() { - if(!m_en.z && !m_en.f) + if (!m_en.z && !m_en.f) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { const Ymm& dscan_p = ymm6; @@ -95,7 +95,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() vmulps(ymm1, dscan_p, _m_shift(0)); - if(m_en.z) + if (m_en.z) { // m_local.d8.p.z = dp8.extract32<2>(); @@ -105,7 +105,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() vshufps(ymm2, dscan_p, dscan_p, _MM_SHUFFLE(2, 2, 2, 2)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // m_local.d[i].z = dz * shift[1 + i]; @@ -116,7 +116,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() } } - if(m_en.f) + if (m_en.f) { // m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); @@ -129,7 +129,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() vshufps(ymm3, dscan_p, dscan_p, _MM_SHUFFLE(3, 3, 3, 3)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); @@ -152,7 +152,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() shl(_vertex.cvt32(), 6); // * sizeof(GSVertexSW) add(_vertex, a0); - if(m_en.f) + if (m_en.f) { // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); @@ -161,7 +161,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() vpextrd(_rip_local(p.f), xmm0, 3); } - if(m_en.z) + if (m_en.z) { // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w @@ -173,7 +173,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() void GSSetupPrimCodeGenerator::Texture_AVX2() { - if(!m_en.t) + if (!m_en.t) { return; } @@ -186,7 +186,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() vmulps(ymm1, ymm0, _m_shift(0)); - if(m_sel.fst) + if (m_sel.fst) { // m_local.84.stq = GSVector4i(t * 4.0f); @@ -201,19 +201,19 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() vmovaps(_rip_local(d8.stq), xmm1); } - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector8 dstq = dt.xxxx/yyyy/zzzz(); vshufps(ymm1, ymm0, ymm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // GSVector8 v = dstq * shift[1 + i]; - vmulps(ymm2, ymm1, _m_shift(1+i)); + vmulps(ymm2, ymm1, _m_shift(1 + i)); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d[i].s/t = GSVector8::cast(GSVector8i(v)); @@ -222,10 +222,10 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - switch(j) + switch (j) { - case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), ymm2); break; - case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), ymm2); break; + case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), ymm2); break; + case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), ymm2); break; } } else @@ -236,11 +236,11 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - switch(j) + switch (j) { - case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), ymm2); break; - case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), ymm2); break; - case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), ymm2); break; + case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), ymm2); break; + case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), ymm2); break; + case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), ymm2); break; } } } @@ -249,12 +249,12 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() void GSSetupPrimCodeGenerator::Color_AVX2() { - if(!m_en.c) + if (!m_en.c) { return; } - if(m_sel.iip) + if (m_sel.iip) { const Ymm& dscan_c = ymm6; @@ -276,7 +276,7 @@ void GSSetupPrimCodeGenerator::Color_AVX2() vshufps(ymm2, dscan_c, dscan_c, _MM_SHUFFLE(0, 0, 0, 0)); vshufps(ymm3, dscan_c, dscan_c, _MM_SHUFFLE(2, 2, 2, 2)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); @@ -304,7 +304,7 @@ void GSSetupPrimCodeGenerator::Color_AVX2() vshufps(ymm2, dscan_c, dscan_c, _MM_SHUFFLE(1, 1, 1, 1)); vshufps(ymm3, dscan_c, dscan_c, _MM_SHUFFLE(3, 3, 3, 3)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); @@ -332,15 +332,15 @@ void GSSetupPrimCodeGenerator::Color_AVX2() int last = 0; - switch(m_sel.prim) + switch (m_sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; } - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() + if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() { mov(_vertex.cvt32(), ptr[a1 + sizeof(uint32) * last]); shl(_vertex.cvt32(), 6); // * sizeof(GSVertexSW) @@ -357,7 +357,7 @@ void GSSetupPrimCodeGenerator::Color_AVX2() // if(!tme) c = c.srl16(7); - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vpsrlw(ymm0, 7); } diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp index 76b5fbbf35..bb6e2f1224 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp @@ -38,11 +38,11 @@ void GSSetupPrimCodeGenerator::Generate_SSE() mov(t0, (size_t)&m_local); - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) + if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { mov(rax, (size_t)g_const->m_shift_128b[0]); - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) + for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) { movaps(Xmm(3 + i), ptr[rax + i * 16]); } @@ -66,18 +66,18 @@ void GSSetupPrimCodeGenerator::Generate_SSE() void GSSetupPrimCodeGenerator::Depth_SSE() { - if(!m_en.z && !m_en.f) + if (!m_en.z && !m_en.f) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4 p = dscan.p; movaps(xmm0, ptr[a2 + offsetof(GSVertexSW, p)]); - if(m_en.f) + if (m_en.f) { // GSVector4 df = p.wwww(); @@ -93,7 +93,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.f)], xmm2); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); @@ -108,7 +108,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() } } - if(m_en.z) + if (m_en.z) { // GSVector4 dz = p.zzzz(); @@ -120,7 +120,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() mulps(xmm1, xmm3); movdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.z)], xmm1); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].z = dz * m_shift[i]; @@ -142,7 +142,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() movaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]); - if(m_en.f) + if (m_en.f) { // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); @@ -152,7 +152,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() movdqa(ptr[t0 + offsetof(GSScanlineLocalData, p.f)], xmm1); } - if(m_en.z) + if (m_en.z) { // uint32 z is bypassed in t.w @@ -165,7 +165,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() void GSSetupPrimCodeGenerator::Texture_SSE() { - if(!m_en.t) + if (!m_en.t) { return; } @@ -177,7 +177,7 @@ void GSSetupPrimCodeGenerator::Texture_SSE() movaps(xmm1, xmm0); mulps(xmm1, xmm3); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d4.stq = GSVector4i(t * 4.0f); @@ -192,7 +192,7 @@ void GSSetupPrimCodeGenerator::Texture_SSE() movaps(ptr[t0 + offsetof(GSScanlineLocalData, d4.stq)], xmm1); } - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector4 ds = t.xxxx(); // GSVector4 dt = t.yyyy(); @@ -201,14 +201,14 @@ void GSSetupPrimCodeGenerator::Texture_SSE() movaps(xmm1, xmm0); shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4 v = ds/dt * m_shift[i]; movaps(xmm2, xmm1); mulps(xmm2, Xmm(4 + i)); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d[i].s/t = GSVector4i(v); @@ -217,10 +217,10 @@ void GSSetupPrimCodeGenerator::Texture_SSE() const size_t variableOffsetS = offsetof(GSScanlineLocalData, d[0].s) + (i * sizeof(GSScanlineLocalData::d[0])); const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); - switch(j) + switch (j) { - case 0: movdqa(ptr[t0 + variableOffsetS], xmm2); break; - case 1: movdqa(ptr[t0 + variableOffsetT], xmm2); break; + case 0: movdqa(ptr[t0 + variableOffsetS], xmm2); break; + case 1: movdqa(ptr[t0 + variableOffsetT], xmm2); break; } } else @@ -231,11 +231,11 @@ void GSSetupPrimCodeGenerator::Texture_SSE() const size_t variableOffsetT = offsetof(GSScanlineLocalData, d[0].t) + (i * sizeof(GSScanlineLocalData::d[0])); const size_t variableOffsetQ = offsetof(GSScanlineLocalData, d[0].q) + (i * sizeof(GSScanlineLocalData::d[0])); - switch(j) + switch (j) { - case 0: movaps(ptr[t0 + variableOffsetS], xmm2); break; - case 1: movaps(ptr[t0 + variableOffsetT], xmm2); break; - case 2: movaps(ptr[t0 + variableOffsetQ], xmm2); break; + case 0: movaps(ptr[t0 + variableOffsetS], xmm2); break; + case 1: movaps(ptr[t0 + variableOffsetT], xmm2); break; + case 2: movaps(ptr[t0 + variableOffsetQ], xmm2); break; } } } @@ -244,12 +244,12 @@ void GSSetupPrimCodeGenerator::Texture_SSE() void GSSetupPrimCodeGenerator::Color_SSE() { - if(!m_en.c) + if (!m_en.c) { return; } - if(m_sel.iip) + if (m_sel.iip) { // GSVector4 c = dscan.c; @@ -273,7 +273,7 @@ void GSSetupPrimCodeGenerator::Color_SSE() shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); @@ -308,7 +308,7 @@ void GSSetupPrimCodeGenerator::Color_SSE() shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); @@ -338,15 +338,15 @@ void GSSetupPrimCodeGenerator::Color_SSE() int last = 0; - switch(m_sel.prim) + switch (m_sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; } - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() + if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() { mov(eax, ptr[a1 + sizeof(uint32) * last]); shl(eax, 6); // * sizeof(GSVertexSW) @@ -362,7 +362,7 @@ void GSSetupPrimCodeGenerator::Color_SSE() // if(!tme) c = c.srl16(7); - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { psrlw(xmm0, 7); } diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp index fbaf27775d..669e7ee924 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx.cpp @@ -34,11 +34,11 @@ static const int _dscan = _args + 12; void GSSetupPrimCodeGenerator::Generate_AVX() { - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) + if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { mov(edx, dword[esp + _dscan]); - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) + for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) { vmovaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]); } @@ -55,18 +55,18 @@ void GSSetupPrimCodeGenerator::Generate_AVX() void GSSetupPrimCodeGenerator::Depth_AVX() { - if(!m_en.z && !m_en.f) + if (!m_en.z && !m_en.f) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4 p = dscan.p; vmovaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]); - if(m_en.f) + if (m_en.f) { // GSVector4 df = p.wwww(); @@ -80,7 +80,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); vmovdqa(ptr[&m_local.d4.f], xmm2); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); @@ -92,7 +92,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() } } - if(m_en.z) + if (m_en.z) { // GSVector4 dz = p.zzzz(); @@ -103,7 +103,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() vmulps(xmm1, xmm0, xmm3); vmovdqa(ptr[&m_local.d4.z], xmm1); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].z = dz * m_shift[i]; @@ -123,7 +123,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() vmovaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - if(m_en.f) + if (m_en.f) { // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); @@ -133,7 +133,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() vmovdqa(ptr[&m_local.p.f], xmm1); } - if(m_en.z) + if (m_en.z) { // uint32 z is bypassed in t.w @@ -146,7 +146,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX() void GSSetupPrimCodeGenerator::Texture_AVX() { - if(!m_en.t) + if (!m_en.t) { return; } @@ -157,7 +157,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX() vmulps(xmm1, xmm0, xmm3); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d4.stq = GSVector4i(t * 4.0f); @@ -172,7 +172,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX() vmovaps(ptr[&m_local.d4.stq], xmm1); } - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector4 ds = t.xxxx(); // GSVector4 dt = t.yyyy(); @@ -180,33 +180,33 @@ void GSSetupPrimCodeGenerator::Texture_AVX() vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4 v = ds/dt * m_shift[i]; vmulps(xmm2, xmm1, Xmm(4 + i)); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d[i].s/t = GSVector4i(v); vcvttps2dq(xmm2, xmm2); - switch(j) + switch (j) { - case 0: vmovdqa(ptr[&m_local.d[i].s], xmm2); break; - case 1: vmovdqa(ptr[&m_local.d[i].t], xmm2); break; + case 0: vmovdqa(ptr[&m_local.d[i].s], xmm2); break; + case 1: vmovdqa(ptr[&m_local.d[i].t], xmm2); break; } } else { // m_local.d[i].s/t/q = v; - switch(j) + switch (j) { - case 0: vmovaps(ptr[&m_local.d[i].s], xmm2); break; - case 1: vmovaps(ptr[&m_local.d[i].t], xmm2); break; - case 2: vmovaps(ptr[&m_local.d[i].q], xmm2); break; + case 0: vmovaps(ptr[&m_local.d[i].s], xmm2); break; + case 1: vmovaps(ptr[&m_local.d[i].t], xmm2); break; + case 2: vmovaps(ptr[&m_local.d[i].q], xmm2); break; } } } @@ -215,12 +215,12 @@ void GSSetupPrimCodeGenerator::Texture_AVX() void GSSetupPrimCodeGenerator::Color_AVX() { - if(!m_en.c) + if (!m_en.c) { return; } - if(m_sel.iip) + if (m_sel.iip) { // GSVector4 c = dscan.c; @@ -242,7 +242,7 @@ void GSSetupPrimCodeGenerator::Color_AVX() vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); @@ -272,7 +272,7 @@ void GSSetupPrimCodeGenerator::Color_AVX() vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); @@ -298,15 +298,15 @@ void GSSetupPrimCodeGenerator::Color_AVX() int last = 0; - switch(m_sel.prim) + switch (m_sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; } - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() + if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() { mov(ecx, ptr[esp + _index]); mov(ecx, ptr[ecx + sizeof(uint32) * last]); @@ -323,7 +323,7 @@ void GSSetupPrimCodeGenerator::Color_AVX() // if(!tme) c = c.srl16(7); - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vpsrlw(xmm0, 7); } diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp index a42600cb82..e34d8bfb97 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.avx2.cpp @@ -34,11 +34,11 @@ static const int _dscan = _args + 12; void GSSetupPrimCodeGenerator::Generate_AVX2() { - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) + if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { mov(edx, dword[esp + _dscan]); - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) + for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) { vmovaps(Ymm(3 + i), ptr[g_const->m_shift_256b[i]]); } @@ -55,12 +55,12 @@ void GSSetupPrimCodeGenerator::Generate_AVX2() void GSSetupPrimCodeGenerator::Depth_AVX2() { - if(!m_en.z && !m_en.f) + if (!m_en.z && !m_en.f) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]); @@ -68,14 +68,14 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() vmulps(ymm1, ymm0, ymm3); - if(m_en.z) + if (m_en.z) { // m_local.d8.p.z = dp8.extract32<2>(); vextractps(ptr[&m_local.d8.p.z], xmm1, 2); } - if(m_en.f) + if (m_en.f) { // m_local.d8.p.f = GSVector4i(dp8).extract32<3>(); @@ -83,37 +83,41 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() vpextrd(ptr[&m_local.d8.p.f], xmm2, 3); } - if(m_en.z) + if (m_en.z) { // GSVector8 dz = GSVector8(dscan.p).zzzz(); vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); } - if(m_en.f) + if (m_en.f) { // GSVector8 df = GSVector8(dscan.p).wwww(); vshufps(ymm1, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3)); } - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { - if(m_en.z) + if (m_en.z) { // m_local.d[i].z = dz * shift[1 + i]; - if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); + if (i < 4) + vmulps(ymm0, ymm2, Ymm(4 + i)); + else + vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vmovaps(ptr[&m_local.d[i].z], ymm0); } - if(m_en.f) + if (m_en.f) { // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); - if(i < 4) vmulps(ymm0, ymm1, Ymm(4 + i)); - else vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]); + if (i < 4) + vmulps(ymm0, ymm1, Ymm(4 + i)); + else + vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm0, ymm0); vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); @@ -130,7 +134,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() shl(ecx, 6); // * sizeof(GSVertexSW) add(ecx, ptr[esp + _vertex]); - if(m_en.f) + if (m_en.f) { // m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>(); @@ -139,7 +143,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() vpextrd(ptr[&m_local.p.f], xmm0, 3); } - if(m_en.z) + if (m_en.z) { // m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w @@ -151,7 +155,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() void GSSetupPrimCodeGenerator::Texture_AVX2() { - if(!m_en.t) + if (!m_en.t) { return; } @@ -164,7 +168,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() vmulps(ymm1, ymm0, ymm3); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d8.stq = GSVector8::cast(GSVector8i(dt8)); @@ -179,40 +183,42 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() vmovaps(ptr[&m_local.d8.stq], xmm1); } - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector8 dstq = dt.xxxx/yyyy/zzzz(); vshufps(ymm1, ymm0, ymm0, (uint8)_MM_SHUFFLE(j, j, j, j)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // GSVector8 v = dstq * shift[1 + i]; - if(i < 4) vmulps(ymm2, ymm1, Ymm(4 + i)); - else vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]); + if (i < 4) + vmulps(ymm2, ymm1, Ymm(4 + i)); + else + vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d[i].s/t = GSVector8::cast(GSVector8i(v)); vcvttps2dq(ymm2, ymm2); - switch(j) + switch (j) { - case 0: vmovdqa(ptr[&m_local.d[i].s], ymm2); break; - case 1: vmovdqa(ptr[&m_local.d[i].t], ymm2); break; + case 0: vmovdqa(ptr[&m_local.d[i].s], ymm2); break; + case 1: vmovdqa(ptr[&m_local.d[i].t], ymm2); break; } } else { // m_local.d[i].s/t/q = v; - switch(j) + switch (j) { - case 0: vmovaps(ptr[&m_local.d[i].s], ymm2); break; - case 1: vmovaps(ptr[&m_local.d[i].t], ymm2); break; - case 2: vmovaps(ptr[&m_local.d[i].q], ymm2); break; + case 0: vmovaps(ptr[&m_local.d[i].s], ymm2); break; + case 1: vmovaps(ptr[&m_local.d[i].t], ymm2); break; + case 2: vmovaps(ptr[&m_local.d[i].q], ymm2); break; } } } @@ -221,12 +227,12 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() void GSSetupPrimCodeGenerator::Color_AVX2() { - if(!m_en.c) + if (!m_en.c) { return; } - if(m_sel.iip) + if (m_sel.iip) { // GSVector8 dc(dscan.c); @@ -248,19 +254,23 @@ void GSSetupPrimCodeGenerator::Color_AVX2() vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(0, 0, 0, 0)); vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); - if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); + if (i < 4) + vmulps(ymm0, ymm2, Ymm(4 + i)); + else + vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm0, ymm0); vpackssdw(ymm0, ymm0); // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32(); - if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); - else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); + if (i < 4) + vmulps(ymm1, ymm3, Ymm(4 + i)); + else + vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm1, ymm1); vpackssdw(ymm1, ymm1); @@ -280,19 +290,23 @@ void GSSetupPrimCodeGenerator::Color_AVX2() vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(1, 1, 1, 1)); vshufps(ymm3, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3)); - for(int i = 0; i < (m_sel.notest ? 1 : 8); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 8); i++) { // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); - if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); + if (i < 4) + vmulps(ymm0, ymm2, Ymm(4 + i)); + else + vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm0, ymm0); vpackssdw(ymm0, ymm0); // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); - if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); - else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); + if (i < 4) + vmulps(ymm1, ymm3, Ymm(4 + i)); + else + vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm1, ymm1); vpackssdw(ymm1, ymm1); @@ -308,15 +322,15 @@ void GSSetupPrimCodeGenerator::Color_AVX2() int last = 0; - switch(m_sel.prim) + switch (m_sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; } - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() + if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() { mov(ecx, ptr[esp + _index]); mov(ecx, ptr[ecx + sizeof(uint32) * last]); @@ -334,7 +348,7 @@ void GSSetupPrimCodeGenerator::Color_AVX2() // if(!tme) c = c.srl16(7); - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { vpsrlw(ymm0, 7); } diff --git a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp index 6416ee1051..eaafd3a836 100644 --- a/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp +++ b/plugins/GSdx/Renderers/SW/GSSetupPrimCodeGenerator.x86.cpp @@ -34,11 +34,11 @@ static const int _dscan = _args + 12; void GSSetupPrimCodeGenerator::Generate_SSE() { - if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) + if ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { mov(edx, dword[esp + _dscan]); - for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) + for (int i = 0; i < (m_sel.notest ? 2 : 5); i++) { movaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]); } @@ -55,18 +55,18 @@ void GSSetupPrimCodeGenerator::Generate_SSE() void GSSetupPrimCodeGenerator::Depth_SSE() { - if(!m_en.z && !m_en.f) + if (!m_en.z && !m_en.f) { return; } - if(m_sel.prim != GS_SPRITE_CLASS) + if (m_sel.prim != GS_SPRITE_CLASS) { // GSVector4 p = dscan.p; movaps(xmm0, ptr[edx + offsetof(GSVertexSW, p)]); - if(m_en.f) + if (m_en.f) { // GSVector4 df = p.wwww(); @@ -82,7 +82,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); movdqa(ptr[&m_local.d4.f], xmm2); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh(); @@ -95,7 +95,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() } } - if(m_en.z) + if (m_en.z) { // GSVector4 dz = p.zzzz(); @@ -107,7 +107,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() mulps(xmm1, xmm3); movdqa(ptr[&m_local.d4.z], xmm1); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // m_local.d[i].z = dz * m_shift[i]; @@ -128,7 +128,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, p)]); - if(m_en.f) + if (m_en.f) { // m_local.p.f = GSVector4i(p).zzzzh().zzzz(); @@ -138,7 +138,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() movdqa(ptr[&m_local.p.f], xmm1); } - if(m_en.z) + if (m_en.z) { // uint32 z is bypassed in t.w @@ -151,7 +151,7 @@ void GSSetupPrimCodeGenerator::Depth_SSE() void GSSetupPrimCodeGenerator::Texture_SSE() { - if(!m_en.t) + if (!m_en.t) { return; } @@ -163,7 +163,7 @@ void GSSetupPrimCodeGenerator::Texture_SSE() movaps(xmm1, xmm0); mulps(xmm1, xmm3); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d4.stq = GSVector4i(t * 4.0f); @@ -178,7 +178,7 @@ void GSSetupPrimCodeGenerator::Texture_SSE() movaps(ptr[&m_local.d4.stq], xmm1); } - for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) + for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector4 ds = t.xxxx(); // GSVector4 dt = t.yyyy(); @@ -187,34 +187,34 @@ void GSSetupPrimCodeGenerator::Texture_SSE() movaps(xmm1, xmm0); shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4 v = ds/dt * m_shift[i]; movaps(xmm2, xmm1); mulps(xmm2, Xmm(4 + i)); - if(m_sel.fst) + if (m_sel.fst) { // m_local.d[i].s/t = GSVector4i(v); cvttps2dq(xmm2, xmm2); - switch(j) + switch (j) { - case 0: movdqa(ptr[&m_local.d[i].s], xmm2); break; - case 1: movdqa(ptr[&m_local.d[i].t], xmm2); break; + case 0: movdqa(ptr[&m_local.d[i].s], xmm2); break; + case 1: movdqa(ptr[&m_local.d[i].t], xmm2); break; } } else { // m_local.d[i].s/t/q = v; - switch(j) + switch (j) { - case 0: movaps(ptr[&m_local.d[i].s], xmm2); break; - case 1: movaps(ptr[&m_local.d[i].t], xmm2); break; - case 2: movaps(ptr[&m_local.d[i].q], xmm2); break; + case 0: movaps(ptr[&m_local.d[i].s], xmm2); break; + case 1: movaps(ptr[&m_local.d[i].t], xmm2); break; + case 2: movaps(ptr[&m_local.d[i].q], xmm2); break; } } } @@ -223,12 +223,12 @@ void GSSetupPrimCodeGenerator::Texture_SSE() void GSSetupPrimCodeGenerator::Color_SSE() { - if(!m_en.c) + if (!m_en.c) { return; } - if(m_sel.iip) + if (m_sel.iip) { // GSVector4 c = dscan.c; @@ -252,7 +252,7 @@ void GSSetupPrimCodeGenerator::Color_SSE() shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i r = GSVector4i(dr * m_shift[i]).ps32(); @@ -285,7 +285,7 @@ void GSSetupPrimCodeGenerator::Color_SSE() shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); - for(int i = 0; i < (m_sel.notest ? 1 : 4); i++) + for (int i = 0; i < (m_sel.notest ? 1 : 4); i++) { // GSVector4i g = GSVector4i(dg * m_shift[i]).ps32(); @@ -313,15 +313,15 @@ void GSSetupPrimCodeGenerator::Color_SSE() int last = 0; - switch(m_sel.prim) + switch (m_sel.prim) { - case GS_POINT_CLASS: last = 0; break; - case GS_LINE_CLASS: last = 1; break; - case GS_TRIANGLE_CLASS: last = 2; break; - case GS_SPRITE_CLASS: last = 1; break; + case GS_POINT_CLASS: last = 0; break; + case GS_LINE_CLASS: last = 1; break; + case GS_TRIANGLE_CLASS: last = 2; break; + case GS_SPRITE_CLASS: last = 1; break; } - if(!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() + if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth() { mov(ecx, ptr[esp + _index]); mov(ecx, ptr[ecx + sizeof(uint32) * last]); @@ -338,7 +338,7 @@ void GSSetupPrimCodeGenerator::Color_SSE() // if(!tme) c = c.srl16(7); - if(m_sel.tfx == TFX_NONE) + if (m_sel.tfx == TFX_NONE) { psrlw(xmm0, 7); } diff --git a/plugins/GSdx/Renderers/SW/GSTextureCacheSW.cpp b/plugins/GSdx/Renderers/SW/GSTextureCacheSW.cpp index 1a839c71fe..da1cca63a3 100644 --- a/plugins/GSdx/Renderers/SW/GSTextureCacheSW.cpp +++ b/plugins/GSdx/Renderers/SW/GSTextureCacheSW.cpp @@ -38,21 +38,21 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons auto& m = m_map[TEX0.TBP0 >> 5]; - for(auto i = m.begin(); i != m.end(); ++i) + for (auto i = m.begin(); i != m.end(); ++i) { Texture* t = *i; - if(((TEX0.u32[0] ^ t->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH + if (((TEX0.u32[0] ^ t->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH { continue; } - if((psm.trbpp == 16 || psm.trbpp == 24) && TEX0.TCC && TEXA != t->m_TEXA) + if ((psm.trbpp == 16 || psm.trbpp == 24) && TEX0.TCC && TEXA != t->m_TEXA) { continue; } - if(tw0 != 0 && t->m_tw != tw0) + if (tw0 != 0 && t->m_tw != tw0) { continue; } @@ -68,7 +68,7 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons m_textures.insert(t); - for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) + for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) { const uint32 page = *p; t->m_erase_it[page] = m_map[page].InsertFront(t); @@ -79,19 +79,19 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm) { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) + for (const uint32* p = pages; *p != GSOffset::EOP; p++) { const uint32 page = *p; - - for(Texture* t : m_map[page]) + + for (Texture* t : m_map[page]) { - if(GSUtil::HasSharedBits(psm, t->m_sharedbits)) + if (GSUtil::HasSharedBits(psm, t->m_sharedbits)) { uint32* RESTRICT valid = t->m_valid; - if(t->m_repeating) + if (t->m_repeating) { - for(const GSVector2i& j : t->m_p2t[page]) + for (const GSVector2i& j : t->m_p2t[page]) { valid[j.x] &= j.y; } @@ -109,11 +109,12 @@ void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm) void GSTextureCacheSW::RemoveAll() { - for(auto i : m_textures) delete i; + for (auto i : m_textures) + delete i; m_textures.clear(); - for(auto& l : m_map) + for (auto& l : m_map) { l.clear(); } @@ -121,15 +122,15 @@ void GSTextureCacheSW::RemoveAll() void GSTextureCacheSW::IncAge() { - for(auto i = m_textures.begin(); i != m_textures.end(); ) + for (auto i = m_textures.begin(); i != m_textures.end();) { Texture* t = *i; - if(++t->m_age > 10) + if (++t->m_age > 10) { i = m_textures.erase(i); - for(const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) + for (const uint32* p = t->m_pages.n; *p != GSOffset::EOP; p++) { const uint32 page = *p; m_map[page].EraseIndex(t->m_erase_it[page]); @@ -157,7 +158,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& m_TEX0 = TEX0; m_TEXA = TEXA; - if(m_tw == 0) + if (m_tw == 0) { m_tw = std::max(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff } @@ -173,7 +174,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& m_repeating = m_TEX0.IsRepeating(); // repeating mode always works, it is just slightly slower - if(m_repeating) + if (m_repeating) { m_p2t = m_state->m_mem.GetPage2TileMap(m_TEX0); } @@ -181,9 +182,9 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& GSTextureCacheSW::Texture::~Texture() { - delete [] m_pages.n; + delete[] m_pages.n; - if(m_buff) + if (m_buff) { _aligned_free(m_buff); } @@ -191,7 +192,7 @@ GSTextureCacheSW::Texture::~Texture() bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) { - if(m_complete) + if (m_complete) { return true; } @@ -209,18 +210,18 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) r = r.ralign(bs); - if(r.eq(GSVector4i(0, 0, tw, th))) + if (r.eq(GSVector4i(0, 0, tw, th))) { m_complete = true; // lame, but better than nothing } - if(m_buff == NULL) + if (m_buff == NULL) { uint32 pitch = (1 << m_tw) << shift; - + m_buff = _aligned_malloc(pitch * th * 4, 32); - if(m_buff == NULL) + if (m_buff == NULL) { return false; } @@ -247,20 +248,20 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) shift += 3; - if(m_repeating) + if (m_repeating) { - for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) + for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) { uint32 base = off->block.row[y]; - for(int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) + for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x) { uint32 block = (base + off->block.col[x]) % MAX_BLOCKS; uint32 row = i >> 5; uint32 col = 1 << (i & 31); - if((m_valid[row] & col) == 0) + if ((m_valid[row] & col) == 0) { m_valid[row] |= col; @@ -273,18 +274,18 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) } else { - for(int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) + for (int y = r.top; y < r.bottom; y += bs.y, dst += block_pitch) { uint32 base = off->block.row[y]; - for(int x = r.left; x < r.right; x += bs.x) + for (int x = r.left; x < r.right; x += bs.x) { uint32 block = (base + off->block.col[x]) % MAX_BLOCKS; uint32 row = block >> 5; uint32 col = 1 << (block & 31); - if((m_valid[row] & col) == 0) + if ((m_valid[row] & col) == 0) { m_valid[row] |= col; @@ -296,7 +297,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) } } - if(blocks > 0) + if (blocks > 0) { m_state->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << shift); } @@ -317,22 +318,22 @@ bool GSTextureCacheSW::Texture::Save(const std::string& fn, bool dds) const GSTexture::GSMap m; - if(t.Map(m, NULL)) + if (t.Map(m, NULL)) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; const uint8* RESTRICT src = (uint8*)m_buff; int pitch = 1 << (m_tw + (psm.pal == 0 ? 2 : 0)); - for(int j = 0; j < h; j++, src += pitch, m.bits += m.pitch) + for (int j = 0; j < h; j++, src += pitch, m.bits += m.pitch) { - if(psm.pal == 0) + if (psm.pal == 0) { memcpy(m.bits, src, sizeof(uint32) * w); } else { - for(int i = 0; i < w; i++) + for (int i = 0; i < w; i++) { ((uint32*)m.bits)[i] = clut[src[i]]; } diff --git a/plugins/GSdx/Renderers/SW/GSTextureCacheSW.h b/plugins/GSdx/Renderers/SW/GSTextureCacheSW.h index 008c131d6c..7d7df71cc6 100644 --- a/plugins/GSdx/Renderers/SW/GSTextureCacheSW.h +++ b/plugins/GSdx/Renderers/SW/GSTextureCacheSW.h @@ -42,7 +42,7 @@ public: std::vector* m_p2t; uint32 m_valid[MAX_PAGES]; std::array m_erase_it; - struct {uint32 bm[16]; const uint32* n;} m_pages; + struct { uint32 bm[16]; const uint32* n; } m_pages; const uint32* RESTRICT m_sharedbits; // m_valid diff --git a/plugins/GSdx/Renderers/SW/GSTextureSW.cpp b/plugins/GSdx/Renderers/SW/GSTextureSW.cpp index b8c69f09cd..ea003be42a 100644 --- a/plugins/GSdx/Renderers/SW/GSTextureSW.cpp +++ b/plugins/GSdx/Renderers/SW/GSTextureSW.cpp @@ -42,14 +42,14 @@ bool GSTextureSW::Update(const GSVector4i& r, const void* data, int pitch, int l { GSMap m; - if(m_data != NULL && Map(m, &r)) + if (m_data != NULL && Map(m, &r)) { uint8* RESTRICT src = (uint8*)data; uint8* RESTRICT dst = m.bits; int rowbytes = r.width() << 2; - for(int h = r.height(); h > 0; h--, src += pitch, dst += m.pitch) + for (int h = r.height(); h > 0; h--, src += pitch, dst += m.pitch) { memcpy(dst, src, rowbytes); } @@ -66,7 +66,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r, int layer) { GSVector4i r2 = r != NULL ? *r : GSVector4i(0, 0, m_size.x, m_size.y); - if(m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y) + if (m_data != NULL && r2.left >= 0 && r2.right <= m_size.x && r2.top >= 0 && r2.bottom <= m_size.y) { if (!m_mapped.test_and_set(std::memory_order_acquire)) { diff --git a/plugins/GSdx/Renderers/SW/GSVertexSW.h b/plugins/GSdx/Renderers/SW/GSVertexSW.h index b902fd4f41..9b7dba7aeb 100644 --- a/plugins/GSdx/Renderers/SW/GSVertexSW.h +++ b/plugins/GSdx/Renderers/SW/GSVertexSW.h @@ -28,7 +28,7 @@ struct alignas(32) GSVertexSW GSVector4 p, _pad, t, c; __forceinline GSVertexSW() {} - __forceinline GSVertexSW(const GSVertexSW& v) {*this = v;} + __forceinline GSVertexSW(const GSVertexSW& v) { *this = v; } __forceinline static GSVertexSW zero() { @@ -40,21 +40,21 @@ struct alignas(32) GSVertexSW return v; } - __forceinline void operator = (const GSVertexSW& v) + __forceinline void operator=(const GSVertexSW& v) { - p = v.p; + p = v.p; t = v.t; - c = v.c; - } - - __forceinline void operator += (const GSVertexSW& v) - { - p += v.p; - t += v.t; - c += v.c; + c = v.c; } - __forceinline friend GSVertexSW operator + (const GSVertexSW& a, const GSVertexSW& b) + __forceinline void operator+=(const GSVertexSW& v) + { + p += v.p; + t += v.t; + c += v.c; + } + + __forceinline friend GSVertexSW operator+(const GSVertexSW& a, const GSVertexSW& b) { GSVertexSW v; @@ -65,7 +65,7 @@ struct alignas(32) GSVertexSW return v; } - __forceinline friend GSVertexSW operator - (const GSVertexSW& a, const GSVertexSW& b) + __forceinline friend GSVertexSW operator-(const GSVertexSW& a, const GSVertexSW& b) { GSVertexSW v; @@ -76,7 +76,7 @@ struct alignas(32) GSVertexSW return v; } - __forceinline friend GSVertexSW operator * (const GSVertexSW& a, const GSVector4& b) + __forceinline friend GSVertexSW operator*(const GSVertexSW& a, const GSVector4& b) { GSVertexSW v; @@ -87,7 +87,7 @@ struct alignas(32) GSVertexSW return v; } - __forceinline friend GSVertexSW operator / (const GSVertexSW& a, const GSVector4& b) + __forceinline friend GSVertexSW operator/(const GSVertexSW& a, const GSVector4& b) { GSVertexSW v; @@ -114,21 +114,21 @@ struct alignas(32) GSVertexSW int i; - if(v12.allfalse()) + if (v12.allfalse()) { test = (v01 ^ v02) & (v01 ^ v02.zwxy()); vtl = v0; vbr = v1 + (v2 - v0); i = 0; } - else if(v02.allfalse()) + else if (v02.allfalse()) { test = (v01 ^ v12) & (v01 ^ v12.zwxy()); vtl = v1; vbr = v0 + (v2 - v1); i = 1; } - else if(v01.allfalse()) + else if (v01.allfalse()) { test = (v02 ^ v12) & (v02 ^ v12.zwxy()); vtl = v2; @@ -140,7 +140,7 @@ struct alignas(32) GSVertexSW return false; } - if(!test.alltrue()) + if (!test.alltrue()) { return false; } @@ -155,17 +155,17 @@ struct alignas(32) GSVertexSW GSVector4 v45 = v4 == v5; GSVector4 v35 = v3 == v5; - if(v34.allfalse()) + if (v34.allfalse()) { test = (v35 ^ v45) & (v35 ^ v45.zwxy()) & (vtl + v5 == v3 + v4) & (vbr == v5); i = 5; } - else if(v35.allfalse()) + else if (v35.allfalse()) { test = (v34 ^ v45) & (v34 ^ v45.zwxy()) & (vtl + v4 == v3 + v5) & (vbr == v4); i = 4; } - else if(v45.allfalse()) + else if (v45.allfalse()) { test = (v34 ^ v35) & (v34 ^ v35.zwxy()) & (vtl + v3 == v5 + v4) & (vbr == v3); i = 3; @@ -175,14 +175,14 @@ struct alignas(32) GSVertexSW return false; } - if(!test.alltrue()) + if (!test.alltrue()) { return false; } br = i; - #if _M_SSE >= 0x500 +#if _M_SSE >= 0x500 { // p.z, p.w, t.z, t.w, c.x, c.y, c.z, c.w @@ -198,8 +198,8 @@ struct alignas(32) GSVertexSW return test.alltrue(); } - - #else + +#else v0 = v[0].p.zwzw(v[0].t); v1 = v[1].p.zwzw(v[1].t); @@ -210,7 +210,7 @@ struct alignas(32) GSVertexSW test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5); - if(!test.alltrue()) + if (!test.alltrue()) { return false; } @@ -224,14 +224,14 @@ struct alignas(32) GSVertexSW test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5); - if(!test.alltrue()) + if (!test.alltrue()) { return false; } return true; - #endif +#endif } }; @@ -243,15 +243,15 @@ struct alignas(32) GSVertexSW2 GSVector8 tc; __forceinline GSVertexSW2() {} - __forceinline GSVertexSW2(const GSVertexSW2& v) {*this = v;} + __forceinline GSVertexSW2(const GSVertexSW2& v) { *this = v; } - __forceinline void operator = (const GSVertexSW2& v) + __forceinline void operator=(const GSVertexSW2& v) { - p = v.p; + p = v.p; tc = v.tc; } - __forceinline friend GSVertexSW2 operator - (const GSVertexSW2& a, const GSVertexSW2& b) + __forceinline friend GSVertexSW2 operator-(const GSVertexSW2& a, const GSVertexSW2& b) { GSVertexSW2 v; @@ -263,4 +263,3 @@ struct alignas(32) GSVertexSW2 }; #endif - diff --git a/plugins/GSdx/Window/GSCaptureDlg.cpp b/plugins/GSdx/Window/GSCaptureDlg.cpp index cbbc29782b..e59b3815ae 100644 --- a/plugins/GSdx/Window/GSCaptureDlg.cpp +++ b/plugins/GSdx/Window/GSCaptureDlg.cpp @@ -24,14 +24,14 @@ #include "GSCaptureDlg.h" #define BeginEnumSysDev(clsid, pMoniker) \ - {CComPtr pDevEnum4$##clsid; \ - pDevEnum4$##clsid.CoCreateInstance(CLSID_SystemDeviceEnum); \ - CComPtr pClassEnum4$##clsid; \ - if(SUCCEEDED(pDevEnum4$##clsid->CreateClassEnumerator(clsid, &pClassEnum4$##clsid, 0)) \ - && pClassEnum4$##clsid) \ { \ - for(CComPtr pMoniker; pClassEnum4$##clsid->Next(1, &pMoniker, 0) == S_OK; pMoniker = NULL) \ + CComPtr pDevEnum4$##clsid; \ + pDevEnum4$##clsid.CoCreateInstance(CLSID_SystemDeviceEnum); \ + CComPtr pClassEnum4$##clsid; \ + if (SUCCEEDED(pDevEnum4$##clsid->CreateClassEnumerator(clsid, &pClassEnum4$##clsid, 0)) && pClassEnum4$##clsid) \ { \ + for (CComPtr pMoniker; pClassEnum4$##clsid->Next(1, &pMoniker, 0) == S_OK; pMoniker = NULL) \ + { #define EndEnumSysDev }}} @@ -53,17 +53,19 @@ int GSCaptureDlg::GetSelCodec(Codec& c) { INT_PTR data = 0; - if(ComboBoxGetSelData(IDC_CODECS, data)) + if (ComboBoxGetSelData(IDC_CODECS, data)) { - if(data == 0) return 2; + if (data == 0) + return 2; c = *(Codec*)data; - if(!c.filter) + if (!c.filter) { c.moniker->BindToObject(NULL, NULL, __uuidof(IBaseFilter), (void**)&c.filter); - if(!c.filter) return 0; + if (!c.filter) + return 0; } return 1; @@ -124,12 +126,12 @@ void GSCaptureDlg::OnInit() LPOLESTR str = NULL; - if(FAILED(moniker->GetDisplayName(NULL, NULL, &str))) + if (FAILED(moniker->GetDisplayName(NULL, NULL, &str))) continue; - if(wcsstr(str, L"@device:dmo:")) prefix = L"(DMO) "; - else if(wcsstr(str, L"@device:sw:")) prefix = L"(DS) "; - else if(wcsstr(str, L"@device:cm:")) prefix = L"(VfW) "; + if (wcsstr(str, L"@device:dmo:")) prefix = L"(DMO) "; + else if (wcsstr(str, L"@device:sw:")) prefix = L"(DS) "; + else if (wcsstr(str, L"@device:cm:")) prefix = L"(VfW) "; c.DisplayName = str; @@ -137,12 +139,12 @@ void GSCaptureDlg::OnInit() CComPtr pPB; - if(FAILED(moniker->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pPB))) + if (FAILED(moniker->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pPB))) continue; _variant_t var; - if(FAILED(pPB->Read(_bstr_t(_T("FriendlyName")), &var, NULL))) + if (FAILED(pPB->Read(_bstr_t(_T("FriendlyName")), &var, NULL))) continue; c.FriendlyName = prefix + var.bstrVal; @@ -159,102 +161,102 @@ bool GSCaptureDlg::OnCommand(HWND hWnd, UINT id, UINT code) { switch (id) { - case IDC_FILENAME: - { - EnableWindow(GetDlgItem(m_hWnd, IDOK), GetText(IDC_FILENAME).length() != 0); - return false; - } - case IDC_BROWSE: - { - if (code == BN_CLICKED) + case IDC_FILENAME: { - wchar_t buff[MAX_PATH] = { 0 }; - - OPENFILENAME ofn; - memset(&ofn, 0, sizeof(ofn)); - - ofn.lStructSize = sizeof(ofn); - ofn.hwndOwner = m_hWnd; - ofn.lpstrFile = buff; - ofn.nMaxFile = countof(buff); - ofn.lpstrFilter = L"Avi files (*.avi)\0*.avi\0"; - ofn.Flags = OFN_EXPLORER | OFN_ENABLESIZING | OFN_HIDEREADONLY | OFN_OVERWRITEPROMPT | OFN_PATHMUSTEXIST; - - wcscpy(ofn.lpstrFile, m_filename.c_str()); - if (GetSaveFileName(&ofn)) - { - m_filename = ofn.lpstrFile; - SetText(IDC_FILENAME, m_filename.c_str()); - } - - return true; + EnableWindow(GetDlgItem(m_hWnd, IDOK), GetText(IDC_FILENAME).length() != 0); + return false; } - break; - } - case IDC_CONFIGURE: - { - if (code == BN_CLICKED) + case IDC_BROWSE: { - Codec c; - if (GetSelCodec(c) == 1) + if (code == BN_CLICKED) { - if (CComQIPtr pSPP = c.filter) + wchar_t buff[MAX_PATH] = {0}; + + OPENFILENAME ofn; + memset(&ofn, 0, sizeof(ofn)); + + ofn.lStructSize = sizeof(ofn); + ofn.hwndOwner = m_hWnd; + ofn.lpstrFile = buff; + ofn.nMaxFile = countof(buff); + ofn.lpstrFilter = L"Avi files (*.avi)\0*.avi\0"; + ofn.Flags = OFN_EXPLORER | OFN_ENABLESIZING | OFN_HIDEREADONLY | OFN_OVERWRITEPROMPT | OFN_PATHMUSTEXIST; + + wcscpy(ofn.lpstrFile, m_filename.c_str()); + if (GetSaveFileName(&ofn)) { - CAUUID caGUID; - memset(&caGUID, 0, sizeof(caGUID)); + m_filename = ofn.lpstrFile; + SetText(IDC_FILENAME, m_filename.c_str()); + } - if (SUCCEEDED(pSPP->GetPages(&caGUID))) + return true; + } + break; + } + case IDC_CONFIGURE: + { + if (code == BN_CLICKED) + { + Codec c; + if (GetSelCodec(c) == 1) + { + if (CComQIPtr pSPP = c.filter) { - IUnknown* lpUnk = NULL; - pSPP.QueryInterface(&lpUnk); - OleCreatePropertyFrame(m_hWnd, 0, 0, c.FriendlyName.c_str(), 1, (IUnknown**)&lpUnk, caGUID.cElems, caGUID.pElems, 0, 0, NULL); - lpUnk->Release(); + CAUUID caGUID; + memset(&caGUID, 0, sizeof(caGUID)); - if (caGUID.pElems) - CoTaskMemFree(caGUID.pElems); + if (SUCCEEDED(pSPP->GetPages(&caGUID))) + { + IUnknown* lpUnk = NULL; + pSPP.QueryInterface(&lpUnk); + OleCreatePropertyFrame(m_hWnd, 0, 0, c.FriendlyName.c_str(), 1, (IUnknown**)&lpUnk, caGUID.cElems, caGUID.pElems, 0, 0, NULL); + lpUnk->Release(); + + if (caGUID.pElems) + CoTaskMemFree(caGUID.pElems); + } + } + else if (CComQIPtr pAMVfWCD = c.filter) + { + if (pAMVfWCD->ShowDialog(VfwCompressDialog_QueryConfig, NULL) == S_OK) + pAMVfWCD->ShowDialog(VfwCompressDialog_Config, m_hWnd); } } - else if (CComQIPtr pAMVfWCD = c.filter) - { - if (pAMVfWCD->ShowDialog(VfwCompressDialog_QueryConfig, NULL) == S_OK) - pAMVfWCD->ShowDialog(VfwCompressDialog_Config, m_hWnd); - } + return true; } - return true; + break; } - break; - } - case IDC_CODECS: - { - UpdateConfigureButton(); - break; - } - case IDOK: - { - m_width = GetTextAsInt(IDC_WIDTH); - m_height = GetTextAsInt(IDC_HEIGHT); - m_filename = GetText(IDC_FILENAME); - ComboBoxGetSelData(IDC_COLORSPACE, m_colorspace); + case IDC_CODECS: + { + UpdateConfigureButton(); + break; + } + case IDOK: + { + m_width = GetTextAsInt(IDC_WIDTH); + m_height = GetTextAsInt(IDC_HEIGHT); + m_filename = GetText(IDC_FILENAME); + ComboBoxGetSelData(IDC_COLORSPACE, m_colorspace); - Codec c; - int ris = GetSelCodec(c); - if (ris == 0) - return false; + Codec c; + int ris = GetSelCodec(c); + if (ris == 0) + return false; - m_enc = c.filter; + m_enc = c.filter; - theApp.SetConfig("CaptureWidth", m_width); - theApp.SetConfig("CaptureHeight", m_height); - theApp.SetConfig("CaptureFileName", convert_utf16_to_utf8(m_filename).c_str()); + theApp.SetConfig("CaptureWidth", m_width); + theApp.SetConfig("CaptureHeight", m_height); + theApp.SetConfig("CaptureFileName", convert_utf16_to_utf8(m_filename).c_str()); - if (ris != 2) - theApp.SetConfig("CaptureVideoCodecDisplayName", c.DisplayName); - else - theApp.SetConfig("CaptureVideoCodecDisplayName", ""); - break; - } - default: - break; + if (ris != 2) + theApp.SetConfig("CaptureVideoCodecDisplayName", c.DisplayName); + else + theApp.SetConfig("CaptureVideoCodecDisplayName", ""); + break; + } + default: + break; } return __super::OnCommand(hWnd, id, code); } diff --git a/plugins/GSdx/Window/GSDialog.cpp b/plugins/GSdx/Window/GSDialog.cpp index 55f39d97c9..17a7f92559 100644 --- a/plugins/GSdx/Window/GSDialog.cpp +++ b/plugins/GSdx/Window/GSDialog.cpp @@ -41,7 +41,7 @@ INT_PTR CALLBACK GSDialog::DialogProc(HWND hWnd, UINT message, WPARAM wParam, LP { GSDialog* dlg = NULL; - if(message == WM_INITDIALOG) + if (message == WM_INITDIALOG) { dlg = (GSDialog*)lParam; SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)dlg); @@ -91,11 +91,13 @@ INT_PTR CALLBACK GSDialog::DialogProc(HWND hWnd, UINT message, WPARAM wParam, LP UINT GSDialog::GetTooltipStructSize() { DLLGETVERSIONPROC dllGetVersion = (DLLGETVERSIONPROC)GetProcAddress(GetModuleHandle(L"ComCtl32.dll"), "DllGetVersion"); - if (dllGetVersion) { - DLLVERSIONINFO2 dllversion = { 0 }; + if (dllGetVersion) + { + DLLVERSIONINFO2 dllversion = {0}; dllversion.info1.cbSize = sizeof(DLLVERSIONINFO2); - if (dllGetVersion((DLLVERSIONINFO*)&dllversion) == S_OK) { + if (dllGetVersion((DLLVERSIONINFO*)&dllversion) == S_OK) + { // Minor, then major version. DWORD version = MAKELONG(dllversion.info1.dwMinorVersion, dllversion.info1.dwMajorVersion); DWORD tooltip_v3 = MAKELONG(0, 6); @@ -115,7 +117,7 @@ bool GSDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) bool GSDialog::OnCommand(HWND hWnd, UINT id, UINT code) { - if(id == IDOK || id == IDCANCEL) + if (id == IDOK || id == IDCANCEL) { EndDialog(m_hWnd, id); @@ -131,17 +133,17 @@ std::wstring GSDialog::GetText(UINT id) wchar_t* buff = NULL; - for(int size = 256, limit = 65536; size < limit; size <<= 1) + for (int size = 256, limit = 65536; size < limit; size <<= 1) { buff = new wchar_t[size]; - if(GetDlgItemText(m_hWnd, id, buff, size)) + if (GetDlgItemText(m_hWnd, id, buff, size)) { s = buff; size = limit; } - delete [] buff; + delete[] buff; } return s; @@ -177,15 +179,15 @@ void GSDialog::ComboBoxInit(UINT id, const std::vector& settings, int if (std::none_of(settings.begin(), settings.end(), is_present)) selectionValue = settings.front().value; - for(size_t i = 0; i < settings.size(); i++) + for (size_t i = 0; i < settings.size(); i++) { const GSSetting& s = settings[i]; - if(s.value <= maxValue) + if (s.value <= maxValue) { std::string str(s.name); - if(!s.note.empty()) + if (!s.note.empty()) { str = str + " (" + s.note + ")"; } @@ -229,7 +231,7 @@ bool GSDialog::ComboBoxGetSelData(UINT id, INT_PTR& data) const int item = (int)SendMessage(hWnd, CB_GETCURSEL, 0, 0); - if(item >= 0) + if (item >= 0) { data = SendMessage(hWnd, CB_GETITEMDATA, item, 0); @@ -245,7 +247,7 @@ void GSDialog::ComboBoxFixDroppedWidth(UINT id) int count = (int)SendMessage(hWnd, CB_GETCOUNT, 0, 0); - if(count > 0) + if (count > 0) { HDC hDC = GetDC(hWnd); @@ -253,42 +255,43 @@ void GSDialog::ComboBoxFixDroppedWidth(UINT id) int width = (int)SendMessage(hWnd, CB_GETDROPPEDWIDTH, 0, 0); - for(int i = 0; i < count; i++) + for (int i = 0; i < count; i++) { int len = (int)SendMessage(hWnd, CB_GETLBTEXTLEN, i, 0); - if(len > 0) + if (len > 0) { wchar_t* buff = new wchar_t[len + 1]; SendMessage(hWnd, CB_GETLBTEXT, i, (LPARAM)buff); SIZE size; - - if(GetTextExtentPoint32(hDC, buff, wcslen(buff), &size)) + + if (GetTextExtentPoint32(hDC, buff, wcslen(buff), &size)) { size.cx += 10; - if(size.cx > width) width = size.cx; + if (size.cx > width) + width = size.cx; } - delete [] buff; + delete[] buff; } } ReleaseDC(hWnd, hDC); - if(width > 0) + if (width > 0) { SendMessage(hWnd, CB_SETDROPPEDWIDTH, width, 0); } } } -void GSDialog::OpenFileDialog(UINT id, const wchar_t *title) +void GSDialog::OpenFileDialog(UINT id, const wchar_t* title) { wchar_t filename[512]; - OPENFILENAME ofn = { 0 }; + OPENFILENAME ofn = {0}; ofn.lStructSize = sizeof(OPENFILENAME); ofn.hwndOwner = m_hWnd; ofn.Flags = OFN_EXPLORER | OFN_FILEMUSTEXIST; @@ -307,7 +310,6 @@ void GSDialog::OpenFileDialog(UINT id, const wchar_t *title) SendMessage(GetDlgItem(m_hWnd, id), WM_SETTEXT, 0, (LPARAM)filename); SetCurrentDirectory(current_directory); - } void GSDialog::AddTooltip(UINT id) @@ -331,7 +333,7 @@ void GSDialog::AddTooltip(UINT id) if (hwndTip == NULL) return; - TOOLINFO toolInfo = { 0 }; + TOOLINFO toolInfo = {0}; toolInfo.cbSize = tooltipStructSize; toolInfo.hwnd = m_hWnd; toolInfo.uFlags = TTF_IDISHWND | TTF_SUBCLASS; diff --git a/plugins/GSdx/Window/GSDialog.h b/plugins/GSdx/Window/GSDialog.h index ffa6297ad4..3cecbbd1df 100644 --- a/plugins/GSdx/Window/GSDialog.h +++ b/plugins/GSdx/Window/GSDialog.h @@ -41,7 +41,7 @@ public: GSDialog(UINT id); virtual ~GSDialog() {} - int GetId() const {return m_id;} + int GetId() const { return m_id; } INT_PTR DoModal(); @@ -57,7 +57,7 @@ public: bool ComboBoxGetSelData(UINT id, INT_PTR& data); void ComboBoxFixDroppedWidth(UINT id); - void OpenFileDialog(UINT id, const wchar_t *title); + void OpenFileDialog(UINT id, const wchar_t* title); void AddTooltip(UINT id); diff --git a/plugins/GSdx/Window/GSSetting.cpp b/plugins/GSdx/Window/GSSetting.cpp index 0628ddb0b7..63377bcd6a 100644 --- a/plugins/GSdx/Window/GSSetting.cpp +++ b/plugins/GSdx/Window/GSSetting.cpp @@ -223,4 +223,4 @@ const char* dialog_message(int ID, bool* updateText) return cvtString(""); } } -#undef cvtString \ No newline at end of file +#undef cvtString diff --git a/plugins/GSdx/Window/GSSetting.h b/plugins/GSdx/Window/GSSetting.h index e2d0248242..76120feda6 100644 --- a/plugins/GSdx/Window/GSSetting.h +++ b/plugins/GSdx/Window/GSSetting.h @@ -29,11 +29,11 @@ struct GSSetting std::string name; std::string note; - template< typename T> - explicit GSSetting(T value, const char* name, const char* note) : - value(static_cast(value)), - name(name), - note(note) + template + explicit GSSetting(T value, const char* name, const char* note) + : value(static_cast(value)) + , name(name) + , note(note) { } }; @@ -45,7 +45,8 @@ const char* dialog_message(int ID, bool* updateText = NULL); #endif #ifndef _WIN32 -enum { +enum +{ IDC_FILTER, IDC_HALF_SCREEN_TS, IDC_TRI_FILTER, diff --git a/plugins/GSdx/Window/GSSettingsDlg.cpp b/plugins/GSdx/Window/GSSettingsDlg.cpp index e80cae8b7f..c1e7009ef7 100644 --- a/plugins/GSdx/Window/GSSettingsDlg.cpp +++ b/plugins/GSdx/Window/GSSettingsDlg.cpp @@ -37,7 +37,7 @@ GSSettingsDlg::GSSettingsDlg() { if (m_d3d11_adapters.empty()) { - auto is_d3d11_renderer = [](const auto &renderer) { + auto is_d3d11_renderer = [](const auto& renderer) { const GSRendererType type = static_cast(renderer.value); return type == GSRendererType::DX1011_HW; }; @@ -52,7 +52,7 @@ std::vector GSSettingsDlg::EnumerateD3D11Adapters() if (dxgi_factory == nullptr) return {}; - std::vector adapters { + std::vector adapters{ {"Default Hardware Device", "default", GSUtil::CheckDirect3D11Level(nullptr, D3D_DRIVER_TYPE_HARDWARE)}, #ifdef _DEBUG {"Reference Device", "ref", GSUtil::CheckDirect3D11Level(nullptr, D3D_DRIVER_TYPE_REFERENCE)}, @@ -74,7 +74,7 @@ std::vector GSSettingsDlg::EnumerateD3D11Adapters() adapters.push_back({buf.data(), GSAdapter(desc), level}); } - auto unsupported_adapter = [](const auto &adapter) { return adapter.level < D3D_FEATURE_LEVEL_10_0; }; + auto unsupported_adapter = [](const auto& adapter) { return adapter.level < D3D_FEATURE_LEVEL_10_0; }; adapters.erase(std::remove_if(adapters.begin(), adapters.end(), unsupported_adapter), adapters.end()); return adapters; @@ -188,17 +188,17 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) { INT_PTR data; - if(ComboBoxGetSelData(IDC_ADAPTER, data)) + if (ComboBoxGetSelData(IDC_ADAPTER, data)) { theApp.SetConfig("Adapter", (*m_current_adapters)[data].id.c_str()); } - if(ComboBoxGetSelData(IDC_RENDERER, data)) + if (ComboBoxGetSelData(IDC_RENDERER, data)) { theApp.SetConfig("Renderer", (int)data); } - if(ComboBoxGetSelData(IDC_INTERLACE, data)) + if (ComboBoxGetSelData(IDC_INTERLACE, data)) { theApp.SetConfig("interlace", (int)data); } @@ -208,7 +208,7 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) theApp.SetConfig("mipmap_hw", (int)data); } - if(ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, data)) + if (ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, data)) { theApp.SetConfig("upscale_multiplier", (int)data); } @@ -222,12 +222,12 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) theApp.SetConfig("filter", (int)data); } - if(ComboBoxGetSelData(IDC_ACCURATE_BLEND_UNIT, data)) + if (ComboBoxGetSelData(IDC_ACCURATE_BLEND_UNIT, data)) { theApp.SetConfig("accurate_blending_unit", (int)data); } - if(ComboBoxGetSelData(IDC_ACCURATE_BLEND_UNIT_D3D11, data)) + if (ComboBoxGetSelData(IDC_ACCURATE_BLEND_UNIT_D3D11, data)) { theApp.SetConfig("accurate_blending_unit_d3d11", (int)data); } @@ -237,7 +237,7 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) theApp.SetConfig("crc_hack_level", (int)data); } - if(ComboBoxGetSelData(IDC_AFCOMBO, data)) + if (ComboBoxGetSelData(IDC_AFCOMBO, data)) { theApp.SetConfig("MaxAnisotropy", (int)data); } @@ -303,20 +303,20 @@ void GSSettingsDlg::UpdateControls() int integer_scaling = 0; // in case reading the combo doesn't work, enable the custom res control anyway - if(ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, i)) + if (ComboBoxGetSelData(IDC_UPSCALE_MULTIPLIER, i)) { integer_scaling = (int)i; } - if(ComboBoxGetSelData(IDC_RENDERER, i)) + if (ComboBoxGetSelData(IDC_RENDERER, i)) { const GSRendererType renderer = static_cast(i); const bool dx11 = renderer == GSRendererType::DX1011_HW; const bool ogl = renderer == GSRendererType::OGL_HW || renderer == GSRendererType::OGL_SW; - const bool hw = renderer == GSRendererType::DX1011_HW || renderer == GSRendererType::OGL_HW; - const bool sw = renderer == GSRendererType::OGL_SW; + const bool hw = renderer == GSRendererType::DX1011_HW || renderer == GSRendererType::OGL_HW; + const bool sw = renderer == GSRendererType::OGL_SW; const bool null = renderer == GSRendererType::Null; const int sw_threads = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SWTHREADS), UDM_GETPOS, 0, 0); @@ -377,9 +377,10 @@ void GSSettingsDlg::UpdateControls() // Shader Configuration Dialog -GSShaderDlg::GSShaderDlg() : - GSDialog(IDD_SHADER) -{} +GSShaderDlg::GSShaderDlg() + : GSDialog(IDD_SHADER) +{ +} void GSShaderDlg::OnInit() { @@ -455,112 +456,118 @@ void GSShaderDlg::UpdateControls() bool GSShaderDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) { - switch(message) + switch (message) { - case WM_HSCROLL: - { - if((HWND)lParam == GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER)) + case WM_HSCROLL: { - wchar_t text[8] = {0}; - - m_saturation = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER),TBM_GETPOS,0,0); - - wprintf(text, "%d", m_saturation); - SetDlgItemText(m_hWnd, IDC_SATURATION_VALUE, text); - } - else if((HWND)lParam == GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER)) - { - wchar_t text[8] = {0}; - - m_brightness = (int)SendMessage(GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER),TBM_GETPOS,0,0); - - wprintf(text, "%d", m_brightness); - SetDlgItemText(m_hWnd, IDC_BRIGHTNESS_VALUE, text); - } - else if((HWND)lParam == GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER)) - { - wchar_t text[8] = {0}; - - m_contrast = (int)SendMessage(GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER),TBM_GETPOS,0,0); - - wprintf(text, "%d", m_contrast); - SetDlgItemText(m_hWnd, IDC_CONTRAST_VALUE, text); - } - } break; - - case WM_COMMAND: - { - const int id = LOWORD(wParam); - - switch(id) - { - case IDOK: - { - INT_PTR data; - //TV Shader - if (ComboBoxGetSelData(IDC_TVSHADER, data)) + if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER)) { - theApp.SetConfig("TVShader", (int)data); + wchar_t text[8] = {0}; + + m_saturation = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SATURATION_SLIDER), TBM_GETPOS, 0, 0); + + wprintf(text, "%d", m_saturation); + SetDlgItemText(m_hWnd, IDC_SATURATION_VALUE, text); } - // Shade Boost - theApp.SetConfig("ShadeBoost", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADEBOOST)); - theApp.SetConfig("ShadeBoost_Contrast", m_contrast); - theApp.SetConfig("ShadeBoost_Brightness", m_brightness); - theApp.SetConfig("ShadeBoost_Saturation", m_saturation); + else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER)) + { + wchar_t text[8] = {0}; - // FXAA shader - theApp.SetConfig("fxaa", (int)IsDlgButtonChecked(m_hWnd, IDC_FXAA)); + m_brightness = (int)SendMessage(GetDlgItem(m_hWnd, IDC_BRIGHTNESS_SLIDER), TBM_GETPOS, 0, 0); - // Texture Filtering Of Display - theApp.SetConfig("linear_present", (int)IsDlgButtonChecked(m_hWnd, IDC_LINEAR_PRESENT)); + wprintf(text, "%d", m_brightness); + SetDlgItemText(m_hWnd, IDC_BRIGHTNESS_VALUE, text); + } + else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER)) + { + wchar_t text[8] = {0}; - // External FX Shader - theApp.SetConfig("shaderfx", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADER_FX)); + m_contrast = (int)SendMessage(GetDlgItem(m_hWnd, IDC_CONTRAST_SLIDER), TBM_GETPOS, 0, 0); - // External FX Shader(OpenGL) - const int shader_fx_length = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), WM_GETTEXTLENGTH, 0, 0); - const int shader_fx_conf_length = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), WM_GETTEXTLENGTH, 0, 0); - const int length = std::max(shader_fx_length, shader_fx_conf_length) + 1; - std::unique_ptr buffer = std::make_unique(length); - - SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), WM_GETTEXT, (WPARAM)length, (LPARAM)buffer.get()); - std::string output = convert_utf16_to_utf8(buffer.get()); - theApp.SetConfig("shaderfx_glsl", output.c_str()); // Not really glsl only ;) - SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), WM_GETTEXT, (WPARAM)length, (LPARAM)buffer.get()); - output = convert_utf16_to_utf8(buffer.get()); - theApp.SetConfig("shaderfx_conf", output.c_str()); - - EndDialog(m_hWnd, id); - } break; - case IDC_SHADEBOOST: - UpdateControls(); - case IDC_SHADER_FX: - if (HIWORD(wParam) == BN_CLICKED) - UpdateControls(); - break; - case IDC_SHADER_FX_BUTTON: - if (HIWORD(wParam) == BN_CLICKED) - OpenFileDialog(IDC_SHADER_FX_EDIT, L"Select External Shader"); - break; - - case IDC_SHADER_FX_CONF_BUTTON: - if (HIWORD(wParam) == BN_CLICKED) - OpenFileDialog(IDC_SHADER_FX_CONF_EDIT, L"Select External Shader Config"); - break; - - case IDCANCEL: - { - EndDialog(m_hWnd, IDCANCEL); - } break; + wprintf(text, "%d", m_contrast); + SetDlgItemText(m_hWnd, IDC_CONTRAST_VALUE, text); + } } + break; - } break; + case WM_COMMAND: + { + const int id = LOWORD(wParam); - case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; + switch (id) + { + case IDOK: + { + INT_PTR data; + //TV Shader + if (ComboBoxGetSelData(IDC_TVSHADER, data)) + { + theApp.SetConfig("TVShader", (int)data); + } + // Shade Boost + theApp.SetConfig("ShadeBoost", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADEBOOST)); + theApp.SetConfig("ShadeBoost_Contrast", m_contrast); + theApp.SetConfig("ShadeBoost_Brightness", m_brightness); + theApp.SetConfig("ShadeBoost_Saturation", m_saturation); - default: return false; + // FXAA shader + theApp.SetConfig("fxaa", (int)IsDlgButtonChecked(m_hWnd, IDC_FXAA)); + + // Texture Filtering Of Display + theApp.SetConfig("linear_present", (int)IsDlgButtonChecked(m_hWnd, IDC_LINEAR_PRESENT)); + + // External FX Shader + theApp.SetConfig("shaderfx", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADER_FX)); + + // External FX Shader(OpenGL) + const int shader_fx_length = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), WM_GETTEXTLENGTH, 0, 0); + const int shader_fx_conf_length = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), WM_GETTEXTLENGTH, 0, 0); + const int length = std::max(shader_fx_length, shader_fx_conf_length) + 1; + std::unique_ptr buffer = std::make_unique(length); + + SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_EDIT), WM_GETTEXT, (WPARAM)length, (LPARAM)buffer.get()); + std::string output = convert_utf16_to_utf8(buffer.get()); + theApp.SetConfig("shaderfx_glsl", output.c_str()); // Not really glsl only ;) + SendMessage(GetDlgItem(m_hWnd, IDC_SHADER_FX_CONF_EDIT), WM_GETTEXT, (WPARAM)length, (LPARAM)buffer.get()); + output = convert_utf16_to_utf8(buffer.get()); + theApp.SetConfig("shaderfx_conf", output.c_str()); + + EndDialog(m_hWnd, id); + } + break; + case IDC_SHADEBOOST: + UpdateControls(); + case IDC_SHADER_FX: + if (HIWORD(wParam) == BN_CLICKED) + UpdateControls(); + break; + case IDC_SHADER_FX_BUTTON: + if (HIWORD(wParam) == BN_CLICKED) + OpenFileDialog(IDC_SHADER_FX_EDIT, L"Select External Shader"); + break; + + case IDC_SHADER_FX_CONF_BUTTON: + if (HIWORD(wParam) == BN_CLICKED) + OpenFileDialog(IDC_SHADER_FX_CONF_EDIT, L"Select External Shader Config"); + break; + + case IDCANCEL: + { + EndDialog(m_hWnd, IDCANCEL); + } + break; + } + } + break; + + case WM_CLOSE: + EndDialog(m_hWnd, IDCANCEL); + break; + + default: + return false; } - + return true; } @@ -729,88 +736,93 @@ void GSHacksDlg::UpdateControls() bool GSHacksDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) { - switch(message) + switch (message) { - case WM_COMMAND: - { - const int id = LOWORD(wParam); - - switch(id) + case WM_COMMAND: { - case IDC_SKIPDRAWHACKEDIT: - case IDC_SKIPDRAWOFFSETEDIT: - case IDC_TCOFFSETX2: - case IDC_TCOFFSETY2: - if (HIWORD(wParam) == EN_CHANGE) - UpdateControls(); - break; - case IDOK: - { - INT_PTR data; - if (ComboBoxGetSelData(IDC_HALF_SCREEN_TS, data)) - { - theApp.SetConfig("UserHacks_Half_Bottom_Override", (int)data); - } - if (ComboBoxGetSelData(IDC_TRI_FILTER, data)) - { - theApp.SetConfig("UserHacks_TriFilter", (int)data); - } - if (ComboBoxGetSelData(IDC_ROUND_SPRITE, data)) - { - theApp.SetConfig("UserHacks_round_sprite_offset", (int)data); - } - if (ComboBoxGetSelData(IDC_OFFSETHACK, data)) - { - theApp.SetConfig("UserHacks_HalfPixelOffset", (int)data); - } - if (ComboBoxGetSelData(IDC_GEOMETRY_SHADER_OVERRIDE, data)) - { - theApp.SetConfig("override_geometry_shader", (int)data); - } - if (ComboBoxGetSelData(IDC_IMAGE_LOAD_STORE, data)) - { - theApp.SetConfig("override_GL_ARB_shader_image_load_store", (int)data); - } - if (ComboBoxGetSelData(IDC_SPARSE_TEXTURE, data)) - { - theApp.SetConfig("override_GL_ARB_sparse_texture", (int)data); - } + const int id = LOWORD(wParam); - // It's more user friendly to lower the skipdraw offset value here - it prevents the skipdraw offset - // value from decreasing unnecessarily if the user types a skipdraw value that is temporarily lower - // than the skipdraw offset value. - const int skipdraw_offset = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SKIPDRAWOFFSET), UDM_GETPOS, 0, 0); - const int skipdraw = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SKIPDRAWHACK), UDM_GETPOS, 0, 0); - theApp.SetConfig("UserHacks_SkipDraw_Offset", std::min(skipdraw_offset, skipdraw)); - theApp.SetConfig("UserHacks_SkipDraw", skipdraw); + switch (id) + { + case IDC_SKIPDRAWHACKEDIT: + case IDC_SKIPDRAWOFFSETEDIT: + case IDC_TCOFFSETX2: + case IDC_TCOFFSETY2: + if (HIWORD(wParam) == EN_CHANGE) + UpdateControls(); + break; + case IDOK: + { + INT_PTR data; + if (ComboBoxGetSelData(IDC_HALF_SCREEN_TS, data)) + { + theApp.SetConfig("UserHacks_Half_Bottom_Override", (int)data); + } + if (ComboBoxGetSelData(IDC_TRI_FILTER, data)) + { + theApp.SetConfig("UserHacks_TriFilter", (int)data); + } + if (ComboBoxGetSelData(IDC_ROUND_SPRITE, data)) + { + theApp.SetConfig("UserHacks_round_sprite_offset", (int)data); + } + if (ComboBoxGetSelData(IDC_OFFSETHACK, data)) + { + theApp.SetConfig("UserHacks_HalfPixelOffset", (int)data); + } + if (ComboBoxGetSelData(IDC_GEOMETRY_SHADER_OVERRIDE, data)) + { + theApp.SetConfig("override_geometry_shader", (int)data); + } + if (ComboBoxGetSelData(IDC_IMAGE_LOAD_STORE, data)) + { + theApp.SetConfig("override_GL_ARB_shader_image_load_store", (int)data); + } + if (ComboBoxGetSelData(IDC_SPARSE_TEXTURE, data)) + { + theApp.SetConfig("override_GL_ARB_sparse_texture", (int)data); + } - theApp.SetConfig("UserHacks_WildHack", (int)IsDlgButtonChecked(m_hWnd, IDC_WILDHACK)); - theApp.SetConfig("preload_frame_with_gs_data", (int)IsDlgButtonChecked(m_hWnd, IDC_PRELOAD_GS)); - theApp.SetConfig("UserHacks_align_sprite_X", (int)IsDlgButtonChecked(m_hWnd, IDC_ALIGN_SPRITE)); - theApp.SetConfig("UserHacks_DisableDepthSupport", (int)IsDlgButtonChecked(m_hWnd, IDC_TC_DEPTH)); - theApp.SetConfig("UserHacks_CPU_FB_Conversion", (int)IsDlgButtonChecked(m_hWnd, IDC_CPU_FB_CONVERSION)); - theApp.SetConfig("UserHacks_DisablePartialInvalidation", (int)IsDlgButtonChecked(m_hWnd, IDC_FAST_TC_INV)); - theApp.SetConfig("UserHacks_AutoFlush", (int)IsDlgButtonChecked(m_hWnd, IDC_AUTO_FLUSH_HW)); - theApp.SetConfig("UserHacks_Disable_Safe_Features", (int)IsDlgButtonChecked(m_hWnd, IDC_SAFE_FEATURES)); - theApp.SetConfig("wrap_gs_mem", (int)IsDlgButtonChecked(m_hWnd, IDC_MEMORY_WRAPPING)); - theApp.SetConfig("UserHacks_merge_pp_sprite", (int)IsDlgButtonChecked(m_hWnd, IDC_MERGE_PP_SPRITE)); - theApp.SetConfig("UserHacks_TCOffsetX", (int)SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETX), UDM_GETPOS, 0, 0)); - theApp.SetConfig("UserHacks_TCOffsetY", (int)SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETY), UDM_GETPOS, 0, 0)); + // It's more user friendly to lower the skipdraw offset value here - it prevents the skipdraw offset + // value from decreasing unnecessarily if the user types a skipdraw value that is temporarily lower + // than the skipdraw offset value. + const int skipdraw_offset = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SKIPDRAWOFFSET), UDM_GETPOS, 0, 0); + const int skipdraw = (int)SendMessage(GetDlgItem(m_hWnd, IDC_SKIPDRAWHACK), UDM_GETPOS, 0, 0); + theApp.SetConfig("UserHacks_SkipDraw_Offset", std::min(skipdraw_offset, skipdraw)); + theApp.SetConfig("UserHacks_SkipDraw", skipdraw); - EndDialog(m_hWnd, id); - } break; + theApp.SetConfig("UserHacks_WildHack", (int)IsDlgButtonChecked(m_hWnd, IDC_WILDHACK)); + theApp.SetConfig("preload_frame_with_gs_data", (int)IsDlgButtonChecked(m_hWnd, IDC_PRELOAD_GS)); + theApp.SetConfig("UserHacks_align_sprite_X", (int)IsDlgButtonChecked(m_hWnd, IDC_ALIGN_SPRITE)); + theApp.SetConfig("UserHacks_DisableDepthSupport", (int)IsDlgButtonChecked(m_hWnd, IDC_TC_DEPTH)); + theApp.SetConfig("UserHacks_CPU_FB_Conversion", (int)IsDlgButtonChecked(m_hWnd, IDC_CPU_FB_CONVERSION)); + theApp.SetConfig("UserHacks_DisablePartialInvalidation", (int)IsDlgButtonChecked(m_hWnd, IDC_FAST_TC_INV)); + theApp.SetConfig("UserHacks_AutoFlush", (int)IsDlgButtonChecked(m_hWnd, IDC_AUTO_FLUSH_HW)); + theApp.SetConfig("UserHacks_Disable_Safe_Features", (int)IsDlgButtonChecked(m_hWnd, IDC_SAFE_FEATURES)); + theApp.SetConfig("wrap_gs_mem", (int)IsDlgButtonChecked(m_hWnd, IDC_MEMORY_WRAPPING)); + theApp.SetConfig("UserHacks_merge_pp_sprite", (int)IsDlgButtonChecked(m_hWnd, IDC_MERGE_PP_SPRITE)); + theApp.SetConfig("UserHacks_TCOffsetX", (int)SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETX), UDM_GETPOS, 0, 0)); + theApp.SetConfig("UserHacks_TCOffsetY", (int)SendMessage(GetDlgItem(m_hWnd, IDC_TCOFFSETY), UDM_GETPOS, 0, 0)); - case IDCANCEL: - { - EndDialog(m_hWnd, IDCANCEL); - } break; + EndDialog(m_hWnd, id); + } + break; + + case IDCANCEL: + { + EndDialog(m_hWnd, IDCANCEL); + } + break; + } } + break; - } break; + case WM_CLOSE: + EndDialog(m_hWnd, IDCANCEL); + break; - case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; - - default: return false; + default: + return false; } return true; @@ -818,9 +830,10 @@ bool GSHacksDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) // OSD Configuration Dialog -GSOSDDlg::GSOSDDlg() : - GSDialog(IDD_OSD) -{} +GSOSDDlg::GSOSDDlg() + : GSDialog(IDD_OSD) +{ +} void GSOSDDlg::OnInit() { @@ -860,7 +873,7 @@ void GSOSDDlg::UpdateControls() SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_GREEN_SLIDER), TBM_SETPOS, TRUE, m_color.g); SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_BLUE_SLIDER), TBM_SETPOS, TRUE, m_color.b); - wchar_t text[8] = { 0 }; + wchar_t text[8] = {0}; wprintf(text, "%d", m_color.a); SetDlgItemText(m_hWnd, IDC_OSD_OPACITY_AMOUNT, text); @@ -913,93 +926,99 @@ bool GSOSDDlg::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { - case WM_HSCROLL: - { - if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_OPACITY_SLIDER)) + case WM_HSCROLL: { - wchar_t text[8] = { 0 }; + if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_OPACITY_SLIDER)) + { + wchar_t text[8] = {0}; - m_color.a = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_OPACITY_SLIDER), TBM_GETPOS, 0, 0); + m_color.a = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_OPACITY_SLIDER), TBM_GETPOS, 0, 0); - wprintf(text, "%d", m_color.a); - SetDlgItemText(m_hWnd, IDC_OSD_OPACITY_AMOUNT, text); + wprintf(text, "%d", m_color.a); + SetDlgItemText(m_hWnd, IDC_OSD_OPACITY_AMOUNT, text); + } + else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_COLOR_RED_SLIDER)) + { + wchar_t text[8] = {0}; + + m_color.r = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_RED_SLIDER), TBM_GETPOS, 0, 0); + + wprintf(text, "%d", m_color.r); + SetDlgItemText(m_hWnd, IDC_OSD_COLOR_RED_AMOUNT, text); + } + else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_COLOR_GREEN_SLIDER)) + { + wchar_t text[8] = {0}; + + m_color.g = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_GREEN_SLIDER), TBM_GETPOS, 0, 0); + + wprintf(text, "%d", m_color.g); + SetDlgItemText(m_hWnd, IDC_OSD_COLOR_GREEN_AMOUNT, text); + } + else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_COLOR_BLUE_SLIDER)) + { + wchar_t text[8] = {0}; + + m_color.b = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_BLUE_SLIDER), TBM_GETPOS, 0, 0); + + wprintf(text, "%d", m_color.b); + SetDlgItemText(m_hWnd, IDC_OSD_COLOR_BLUE_AMOUNT, text); + } } - else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_COLOR_RED_SLIDER)) + break; + + case WM_COMMAND: { - wchar_t text[8] = { 0 }; + const int id = LOWORD(wParam); - m_color.r = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_RED_SLIDER), TBM_GETPOS, 0, 0); + switch (id) + { + case IDOK: + { + theApp.SetConfig("osd_color_opacity", m_color.a); + theApp.SetConfig("osd_color_r", m_color.r); + theApp.SetConfig("osd_color_g", m_color.g); + theApp.SetConfig("osd_color_b", m_color.b); - wprintf(text, "%d", m_color.r); - SetDlgItemText(m_hWnd, IDC_OSD_COLOR_RED_AMOUNT, text); + theApp.SetConfig("osd_fontsize", (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_SIZE), UDM_GETPOS, 0, 0)); + theApp.SetConfig("osd_log_timeout", (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_TIMEOUT), UDM_GETPOS, 0, 0)); + theApp.SetConfig("osd_max_log_messages", (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_MAX_LOG), UDM_GETPOS, 0, 0)); + + theApp.SetConfig("osd_log_enabled", (int)IsDlgButtonChecked(m_hWnd, IDC_OSD_LOG)); + theApp.SetConfig("osd_monitor_enabled", (int)IsDlgButtonChecked(m_hWnd, IDC_OSD_MONITOR)); + + EndDialog(m_hWnd, id); + } + break; + case IDC_OSD_LOG: + if (HIWORD(wParam) == BN_CLICKED) + UpdateControls(); + break; + case IDC_OSD_MONITOR: + if (HIWORD(wParam) == BN_CLICKED) + UpdateControls(); + break; + case IDC_OSD_SIZE_EDIT: + case IDC_OSD_TIMEOUT_EDIT: + case IDC_OSD_MAX_LOG_EDIT: + if (HIWORD(wParam) == EN_CHANGE) + UpdateControls(); + break; + case IDCANCEL: + { + EndDialog(m_hWnd, IDCANCEL); + } + break; + } } - else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_COLOR_GREEN_SLIDER)) - { - wchar_t text[8] = { 0 }; + break; - m_color.g = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_GREEN_SLIDER), TBM_GETPOS, 0, 0); - - wprintf(text, "%d", m_color.g); - SetDlgItemText(m_hWnd, IDC_OSD_COLOR_GREEN_AMOUNT, text); - } - else if ((HWND)lParam == GetDlgItem(m_hWnd, IDC_OSD_COLOR_BLUE_SLIDER)) - { - wchar_t text[8] = { 0 }; - - m_color.b = (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_COLOR_BLUE_SLIDER), TBM_GETPOS, 0, 0); - - wprintf(text, "%d", m_color.b); - SetDlgItemText(m_hWnd, IDC_OSD_COLOR_BLUE_AMOUNT, text); - } - } break; - - case WM_COMMAND: - { - const int id = LOWORD(wParam); - - switch (id) - { - case IDOK: - { - theApp.SetConfig("osd_color_opacity", m_color.a); - theApp.SetConfig("osd_color_r", m_color.r); - theApp.SetConfig("osd_color_g", m_color.g); - theApp.SetConfig("osd_color_b", m_color.b); - - theApp.SetConfig("osd_fontsize", (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_SIZE), UDM_GETPOS, 0, 0)); - theApp.SetConfig("osd_log_timeout", (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_TIMEOUT), UDM_GETPOS, 0, 0)); - theApp.SetConfig("osd_max_log_messages", (int)SendMessage(GetDlgItem(m_hWnd, IDC_OSD_MAX_LOG), UDM_GETPOS, 0, 0)); - - theApp.SetConfig("osd_log_enabled", (int)IsDlgButtonChecked(m_hWnd, IDC_OSD_LOG)); - theApp.SetConfig("osd_monitor_enabled", (int)IsDlgButtonChecked(m_hWnd, IDC_OSD_MONITOR)); - - EndDialog(m_hWnd, id); - } break; - case IDC_OSD_LOG: - if (HIWORD(wParam) == BN_CLICKED) - UpdateControls(); - break; - case IDC_OSD_MONITOR: - if (HIWORD(wParam) == BN_CLICKED) - UpdateControls(); - break; - case IDC_OSD_SIZE_EDIT: - case IDC_OSD_TIMEOUT_EDIT: - case IDC_OSD_MAX_LOG_EDIT: - if (HIWORD(wParam) == EN_CHANGE) - UpdateControls(); - break; - case IDCANCEL: - { + case WM_CLOSE: EndDialog(m_hWnd, IDCANCEL); - } break; - } + break; - } break; - - case WM_CLOSE:EndDialog(m_hWnd, IDCANCEL); break; - - default: return false; + default: + return false; } diff --git a/plugins/GSdx/Window/GSSettingsDlg.h b/plugins/GSdx/Window/GSSettingsDlg.h index 2adc296d2d..a3eaa47e28 100644 --- a/plugins/GSdx/Window/GSSettingsDlg.h +++ b/plugins/GSdx/Window/GSSettingsDlg.h @@ -34,7 +34,7 @@ class GSShaderDlg : public GSDialog protected: void OnInit(); - bool OnMessage(UINT message, WPARAM wParam, LPARAM lParam); + bool OnMessage(UINT message, WPARAM wParam, LPARAM lParam); public: GSShaderDlg(); @@ -83,12 +83,17 @@ class GSSettingsDlg : public GSDialog std::string name; std::string id; D3D_FEATURE_LEVEL level; - Adapter(const std::string &n, const std::string &i, const D3D_FEATURE_LEVEL &l) : name(n), id(i), level(l) {} + Adapter(const std::string& n, const std::string& i, const D3D_FEATURE_LEVEL& l) + : name(n) + , id(i) + , level(l) + { + } }; std::vector m_renderers; std::vector m_d3d11_adapters; - std::vector *m_current_adapters; + std::vector* m_current_adapters; std::string m_last_selected_adapter_id; std::vector EnumerateD3D11Adapters(); diff --git a/plugins/GSdx/Window/GSWnd.cpp b/plugins/GSdx/Window/GSWnd.cpp index 20c5859020..310aa213fb 100644 --- a/plugins/GSdx/Window/GSWnd.cpp +++ b/plugins/GSdx/Window/GSWnd.cpp @@ -26,7 +26,7 @@ void GSWndGL::PopulateGlFunction() { // Load mandatory function pointer -#define GL_EXT_LOAD(ext) *(void**)&(ext) = GetProcAddress(#ext, false) +#define GL_EXT_LOAD(ext) *(void**)&(ext) = GetProcAddress(#ext, false) // Load extra function pointer #define GL_EXT_LOAD_OPT(ext) *(void**)&(ext) = GetProcAddress(#ext, true) diff --git a/plugins/GSdx/Window/GSWnd.h b/plugins/GSdx/Window/GSWnd.h index 08fa3f93d2..2cac87016f 100644 --- a/plugins/GSdx/Window/GSWnd.h +++ b/plugins/GSdx/Window/GSWnd.h @@ -31,29 +31,31 @@ protected: bool m_managed; // set true when we're attached to a 3rdparty window that's amanged by the emulator public: - GSWnd() : m_managed(false) {}; - virtual ~GSWnd() {}; + GSWnd() + : m_managed(false) + { + } + virtual ~GSWnd() {} virtual bool Create(const std::string& title, int w, int h) = 0; virtual bool Attach(void* handle, bool managed = true) = 0; virtual void Detach() = 0; - bool IsManaged() const {return m_managed;} + bool IsManaged() const { return m_managed; } virtual void* GetDisplay() = 0; virtual void* GetHandle() = 0; virtual GSVector4i GetClientRect() = 0; virtual bool SetWindowText(const char* title) = 0; - virtual void AttachContext() {}; - virtual void DetachContext() {}; + virtual void AttachContext() {} + virtual void DetachContext() {} virtual void Show() = 0; virtual void Hide() = 0; virtual void HideFrame() = 0; - virtual void Flip() {}; - virtual void SetVSync(int vsync) {}; - + virtual void Flip() {} + virtual void SetVSync(int vsync) {} }; class GSWndGL : public GSWnd @@ -73,8 +75,13 @@ protected: virtual bool HasLateVsyncSupport() = 0; public: - GSWndGL() : m_ctx_attached(false), m_vsync_change_requested(false), m_vsync(0) {}; - virtual ~GSWndGL() {}; + GSWndGL() + : m_ctx_attached(false) + , m_vsync_change_requested(false) + , m_vsync(0) + { + } + virtual ~GSWndGL() {} virtual bool Create(const std::string& title, int w, int h) = 0; virtual bool Attach(void* handle, bool managed = true) = 0; diff --git a/plugins/GSdx/Window/GSWndDX.cpp b/plugins/GSdx/Window/GSWndDX.cpp index 88da4b77bc..6f877e6384 100644 --- a/plugins/GSdx/Window/GSWndDX.cpp +++ b/plugins/GSdx/Window/GSWndDX.cpp @@ -37,7 +37,7 @@ LRESULT CALLBACK GSWndDX::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM { GSWndDX* wnd = NULL; - if(message == WM_NCCREATE) + if (message == WM_NCCREATE) { wnd = (GSWndDX*)((LPCREATESTRUCT)lParam)->lpCreateParams; @@ -50,7 +50,7 @@ LRESULT CALLBACK GSWndDX::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM wnd = (GSWndDX*)GetWindowLongPtr(hWnd, GWLP_USERDATA); } - if(wnd == NULL) + if (wnd == NULL) { return DefWindowProc(hWnd, message, wParam, lParam); } @@ -60,20 +60,20 @@ LRESULT CALLBACK GSWndDX::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM LRESULT GSWndDX::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) { - switch(message) + switch (message) { - case WM_CLOSE: - Hide(); - // DestroyWindow(m_hWnd); - return 0; - case WM_DESTROY: - // This kills the emulator when GS is closed, which *really* isn't desired behavior, - // especially in STGS mode (worked in MTGS mode since it only quit the thread, but even - // that wasn't needed). - //PostQuitMessage(0); - return 0; - default: - break; + case WM_CLOSE: + Hide(); + // DestroyWindow(m_hWnd); + return 0; + case WM_DESTROY: + // This kills the emulator when GS is closed, which *really* isn't desired behavior, + // especially in STGS mode (worked in MTGS mode since it only quit the thread, but even + // that wasn't needed). + //PostQuitMessage(0); + return 0; + default: + break; } return DefWindowProc((HWND)m_hWnd, message, wParam, lParam); @@ -81,7 +81,7 @@ LRESULT GSWndDX::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) bool GSWndDX::Create(const std::string& title, int w, int h) { - if(m_hWnd) + if (m_hWnd) throw GSDXRecoverableError(); m_managed = true; @@ -98,9 +98,9 @@ bool GSWndDX::Create(const std::string& title, int w, int h) wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); wc.lpszClassName = L"GSWndDX"; - if(!GetClassInfo(wc.hInstance, wc.lpszClassName, &wc)) + if (!GetClassInfo(wc.hInstance, wc.lpszClassName, &wc)) { - if(!RegisterClass(&wc)) + if (!RegisterClass(&wc)) { throw GSDXRecoverableError(); } @@ -114,12 +114,12 @@ bool GSWndDX::Create(const std::string& title, int w, int h) bool remote = !!GetSystemMetrics(SM_REMOTESESSION); - if(w <= 0 || h <= 0 || remote) + if (w <= 0 || h <= 0 || remote) { w = r.width() / 3; h = r.width() / 4; - if(!remote) + if (!remote) { w *= 2; h *= 2; @@ -153,7 +153,7 @@ bool GSWndDX::Attach(void* handle, bool managed) void GSWndDX::Detach() { - if(m_hWnd && m_managed) + if (m_hWnd && m_managed) { // close the window, since it's under GSdx care. It's not taking messages anyway, and // that means its big, ugly, and in the way. @@ -179,7 +179,8 @@ GSVector4i GSWndDX::GetClientRect() bool GSWndDX::SetWindowText(const char* title) { - if(!m_managed) return false; + if (!m_managed) + return false; const size_t tmp_size = strlen(title) + 1; std::wstring tmp(tmp_size, L'#'); @@ -191,7 +192,8 @@ bool GSWndDX::SetWindowText(const char* title) void GSWndDX::Show() { - if(!m_managed) return; + if (!m_managed) + return; SetForegroundWindow(m_hWnd); ShowWindow(m_hWnd, SW_SHOWNORMAL); @@ -200,16 +202,18 @@ void GSWndDX::Show() void GSWndDX::Hide() { - if(!m_managed) return; + if (!m_managed) + return; ShowWindow(m_hWnd, SW_HIDE); } void GSWndDX::HideFrame() { - if(!m_managed) return; + if (!m_managed) + return; - SetWindowLong(m_hWnd, GWL_STYLE, GetWindowLong(m_hWnd, GWL_STYLE) & ~(WS_CAPTION|WS_THICKFRAME)); + SetWindowLong(m_hWnd, GWL_STYLE, GetWindowLong(m_hWnd, GWL_STYLE) & ~(WS_CAPTION | WS_THICKFRAME)); SetWindowPos(m_hWnd, NULL, 0, 0, 0, 0, SWP_NOSIZE | SWP_NOMOVE | SWP_NOZORDER | SWP_NOACTIVATE); SetMenu(m_hWnd, NULL); diff --git a/plugins/GSdx/Window/GSWndDX.h b/plugins/GSdx/Window/GSWndDX.h index c2e0119549..674fae4e0e 100644 --- a/plugins/GSdx/Window/GSWndDX.h +++ b/plugins/GSdx/Window/GSWndDX.h @@ -40,8 +40,8 @@ public: bool Attach(void* handle, bool managed = true); void Detach(); - void* GetDisplay() {return m_hWnd;} - void* GetHandle() {return m_hWnd;} + void* GetDisplay() { return m_hWnd; } + void* GetHandle() { return m_hWnd; } GSVector4i GetClientRect(); bool SetWindowText(const char* title); diff --git a/plugins/GSdx/Window/GSWndEGL.cpp b/plugins/GSdx/Window/GSWndEGL.cpp index 6754d23186..94bc89e108 100644 --- a/plugins/GSdx/Window/GSWndEGL.cpp +++ b/plugins/GSdx/Window/GSWndEGL.cpp @@ -28,28 +28,32 @@ int GSWndEGL::SelectPlatform() { // Check the supported extension - const char *client_extensions = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); - if (!client_extensions) { + const char* client_extensions = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); + if (!client_extensions) + { fprintf(stderr, "EGL: Client extension not supported\n"); return 0; } fprintf(stdout, "EGL: Supported extensions: %s\n", client_extensions); // Check platform extensions are supported (Note: there are core in 1.5) - if (!strstr(client_extensions, "EGL_EXT_platform_base")) { + if (!strstr(client_extensions, "EGL_EXT_platform_base")) + { fprintf(stderr, "EGL: Dynamic platform selection isn't supported\n"); return 0; } // Finally we can select the platform #if GS_EGL_X11 - if (strstr(client_extensions, "EGL_EXT_platform_x11")) { + if (strstr(client_extensions, "EGL_EXT_platform_x11")) + { fprintf(stdout, "EGL: select X11 platform\n"); return EGL_PLATFORM_X11_KHR; } #endif #if GS_EGL_WL - if (strstr(client_extensions, "EGL_EXT_platform_wayland")) { + if (strstr(client_extensions, "EGL_EXT_platform_wayland")) + { fprintf(stdout, "EGL: select Wayland platform\n"); return EGL_PLATFORM_WAYLAND_KHR; } @@ -83,7 +87,7 @@ void GSWndEGL::CreateContext(int major, int minor) EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR, EGL_NONE }; - EGLint NullContextAttribs[] = { EGL_NONE }; + EGLint NullContextAttribs[] = {EGL_NONE}; EGLint attrList[] = { EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, @@ -96,36 +100,37 @@ void GSWndEGL::CreateContext(int major, int minor) BindAPI(); eglChooseConfig(m_eglDisplay, attrList, &eglConfig, 1, &numConfigs); - if ( numConfigs == 0 ) + if (numConfigs == 0) { - fprintf(stderr,"EGL: Failed to get a frame buffer config! (0x%x)\n", eglGetError() ); + fprintf(stderr, "EGL: Failed to get a frame buffer config! (0x%x)\n", eglGetError()); throw GSDXRecoverableError(); } m_eglSurface = eglCreatePlatformWindowSurface(m_eglDisplay, eglConfig, m_native_window, nullptr); - if ( m_eglSurface == EGL_NO_SURFACE ) + if (m_eglSurface == EGL_NO_SURFACE) { - fprintf(stderr,"EGL: Failed to get a window surface\n"); + fprintf(stderr, "EGL: Failed to get a window surface\n"); throw GSDXRecoverableError(); } m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, contextAttribs); EGLint status = eglGetError(); - if (status == EGL_BAD_ATTRIBUTE || status == EGL_BAD_MATCH) { + if (status == EGL_BAD_ATTRIBUTE || status == EGL_BAD_MATCH) + { // Radeon/Gallium don't support advance attribute. Fallback to random value // Note: Intel gives an EGL_BAD_MATCH. I don't know why but let's by stubborn and retry. fprintf(stderr, "EGL: warning your driver doesn't support advance openGL context attributes\n"); m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, NullContextAttribs); status = eglGetError(); } - if ( m_eglContext == EGL_NO_CONTEXT ) + if (m_eglContext == EGL_NO_CONTEXT) { - fprintf(stderr,"EGL: Failed to create the context\n"); - fprintf(stderr,"EGL STATUS: %x\n", status); + fprintf(stderr, "EGL: Failed to create the context\n"); + fprintf(stderr, "EGL STATUS: %x\n", status); throw GSDXRecoverableError(); } - if ( !eglMakeCurrent(m_eglDisplay, m_eglSurface, m_eglSurface, m_eglContext) ) + if (!eglMakeCurrent(m_eglDisplay, m_eglSurface, m_eglSurface, m_eglContext)) { throw GSDXRecoverableError(); } @@ -133,7 +138,8 @@ void GSWndEGL::CreateContext(int major, int minor) void GSWndEGL::AttachContext() { - if (!IsContextAttached()) { + if (!IsContextAttached()) + { // The setting of the API is local to a thread. This function // can be called from 2 threads. BindAPI(); @@ -146,7 +152,8 @@ void GSWndEGL::AttachContext() void GSWndEGL::DetachContext() { - if (IsContextAttached()) { + if (IsContextAttached()) + { //fprintf(stderr, "Detach the context\n"); eglMakeCurrent(m_eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); m_ctx_attached = false; @@ -161,8 +168,9 @@ void GSWndEGL::BindAPI() { eglBindAPI(EGL_OPENGL_API); EGLenum api = eglQueryAPI(); - if (api != EGL_OPENGL_API) { - fprintf(stderr,"EGL: Failed to bind the OpenGL API got 0x%x instead\n", api); + if (api != EGL_OPENGL_API) + { + fprintf(stderr, "EGL: Failed to bind the OpenGL API got 0x%x instead\n", api); throw GSDXRecoverableError(); } } @@ -198,7 +206,8 @@ void GSWndEGL::Detach() bool GSWndEGL::Create(const std::string& title, int w, int h) { - if(w <= 0 || h <= 0) { + if (w <= 0 || h <= 0) + { w = theApp.GetConfigI("ModeWidth"); h = theApp.GetConfigI("ModeHeight"); } @@ -217,7 +226,8 @@ bool GSWndEGL::Create(const std::string& title, int w, int h) void* GSWndEGL::GetProcAddress(const char* name, bool opt) { void* ptr = (void*)eglGetProcAddress(name); - if (ptr == nullptr) { + if (ptr == nullptr) + { if (theApp.GetConfigB("debug_opengl")) fprintf(stderr, "Failed to find %s\n", name); @@ -263,17 +273,19 @@ void GSWndEGL::OpenEGLDisplay() // We only need a native display when we manage the window ourself. // By default, EGL will create its own native display. This way the driver knows // that display will be thread safe and so it can enable multithread optimization. - void *native_display = (m_managed) ? CreateNativeDisplay() : nullptr; + void* native_display = (m_managed) ? CreateNativeDisplay() : nullptr; // Create an EGL display from the native display m_eglDisplay = eglGetPlatformDisplay(m_platform, native_display, nullptr); - if (m_eglDisplay == EGL_NO_DISPLAY) { - fprintf(stderr,"EGL: Failed to open a display! (0x%x)\n", eglGetError() ); + if (m_eglDisplay == EGL_NO_DISPLAY) + { + fprintf(stderr, "EGL: Failed to open a display! (0x%x)\n", eglGetError()); throw GSDXRecoverableError(); } - if (!eglInitialize(m_eglDisplay, nullptr, nullptr)) { - fprintf(stderr,"EGL: Failed to initialize the display! (0x%x)\n", eglGetError() ); + if (!eglInitialize(m_eglDisplay, nullptr, nullptr)) + { + fprintf(stderr, "EGL: Failed to initialize the display! (0x%x)\n", eglGetError()); throw GSDXRecoverableError(); } } @@ -288,7 +300,7 @@ GSWndEGL_X11::GSWndEGL_X11() { } -void *GSWndEGL_X11::CreateNativeDisplay() +void* GSWndEGL_X11::CreateNativeDisplay() { if (m_NativeDisplay == nullptr) m_NativeDisplay = XOpenDisplay(nullptr); @@ -296,7 +308,7 @@ void *GSWndEGL_X11::CreateNativeDisplay() return (void*)m_NativeDisplay; } -void *GSWndEGL_X11::CreateNativeWindow(int w, int h) +void* GSWndEGL_X11::CreateNativeWindow(int w, int h) { const int depth = 0, x = 0, y = 0, border_width = 1; #if 0 @@ -305,35 +317,37 @@ void *GSWndEGL_X11::CreateNativeWindow(int w, int h) XMapWindow (m_NativeDisplay, m_NativeWindow); #endif - if (m_NativeDisplay == nullptr) { + if (m_NativeDisplay == nullptr) + { fprintf(stderr, "EGL X11: display wasn't created before the window\n"); throw GSDXRecoverableError(); } - xcb_connection_t *c = XGetXCBConnection(m_NativeDisplay); + xcb_connection_t* c = XGetXCBConnection(m_NativeDisplay); - const xcb_setup_t *setup = xcb_get_setup(c); + const xcb_setup_t* setup = xcb_get_setup(c); - xcb_screen_t *screen = (xcb_setup_roots_iterator (setup)).data; + xcb_screen_t* screen = (xcb_setup_roots_iterator(setup)).data; m_NativeWindow = xcb_generate_id(c); - if (m_NativeWindow == 0) { + if (m_NativeWindow == 0) + { fprintf(stderr, "EGL X11: failed to create the native window\n"); throw GSDXRecoverableError(); } - xcb_create_window (c, depth, m_NativeWindow, screen->root, x, y, w, h, - border_width, InputOutput, screen->root_visual, 0, nullptr); + xcb_create_window(c, depth, m_NativeWindow, screen->root, x, y, w, h, + border_width, InputOutput, screen->root_visual, 0, nullptr); - xcb_map_window (c, m_NativeWindow); + xcb_map_window(c, m_NativeWindow); xcb_flush(c); return (void*)&m_NativeWindow; } -void *GSWndEGL_X11::AttachNativeWindow(void *handle) +void* GSWndEGL_X11::AttachNativeWindow(void* handle) { m_NativeWindow = *(Window*)handle; return handle; @@ -341,7 +355,8 @@ void *GSWndEGL_X11::AttachNativeWindow(void *handle) void GSWndEGL_X11::DestroyNativeResources() { - if (m_NativeDisplay) { + if (m_NativeDisplay) + { XCloseDisplay(m_NativeDisplay); m_NativeDisplay = nullptr; } @@ -349,13 +364,14 @@ void GSWndEGL_X11::DestroyNativeResources() bool GSWndEGL_X11::SetWindowText(const char* title) { - if (!m_managed) return true; + if (!m_managed) + return true; - xcb_connection_t *c = XGetXCBConnection(m_NativeDisplay); + xcb_connection_t* c = XGetXCBConnection(m_NativeDisplay); xcb_change_property(c, XCB_PROP_MODE_REPLACE, m_NativeWindow, - XCB_ATOM_WM_NAME, XCB_ATOM_STRING, 8, - strlen (title), title); + XCB_ATOM_WM_NAME, XCB_ATOM_STRING, 8, + strlen(title), title); return true; } @@ -372,7 +388,7 @@ GSWndEGL_WL::GSWndEGL_WL() { } -void *GSWndEGL_WL::CreateNativeDisplay() +void* GSWndEGL_WL::CreateNativeDisplay() { if (m_NativeDisplay == nullptr) m_NativeDisplay = wl_display_connect(NULL); @@ -380,12 +396,12 @@ void *GSWndEGL_WL::CreateNativeDisplay() return (void*)m_NativeDisplay; } -void *GSWndEGL_WL::CreateNativeWindow(int w, int h) +void* GSWndEGL_WL::CreateNativeWindow(int w, int h) { return nullptr; } -void *GSWndEGL_WL::AttachNativeWindow(void *handle) +void* GSWndEGL_WL::AttachNativeWindow(void* handle) { m_NativeWindow = (wl_egl_window*)handle; return handle; @@ -393,7 +409,8 @@ void *GSWndEGL_WL::AttachNativeWindow(void *handle) void GSWndEGL_WL::DestroyNativeResources() { - if (m_NativeDisplay) { + if (m_NativeDisplay) + { wl_display_disconnect(m_NativeDisplay); m_NativeDisplay = nullptr; } diff --git a/plugins/GSdx/Window/GSWndEGL.h b/plugins/GSdx/Window/GSWndEGL.h index efc5ba549f..a786a16ec1 100644 --- a/plugins/GSdx/Window/GSWndEGL.h +++ b/plugins/GSdx/Window/GSWndEGL.h @@ -30,7 +30,7 @@ class GSWndEGL : public GSWndGL { - void *m_native_window; + void* m_native_window; EGLDisplay m_eglDisplay; EGLSurface m_eglSurface; @@ -50,15 +50,15 @@ class GSWndEGL : public GSWndGL public: GSWndEGL(int platform); - virtual ~GSWndEGL() {}; + virtual ~GSWndEGL(){}; bool Create(const std::string& title, int w, int h) final; bool Attach(void* handle, bool managed = true) final; void Detach() final; - virtual void *CreateNativeDisplay() = 0; - virtual void *CreateNativeWindow(int w, int h) = 0; // GSopen1/PSX API - virtual void *AttachNativeWindow(void *handle) = 0; + virtual void* CreateNativeDisplay() = 0; + virtual void* CreateNativeWindow(int w, int h) = 0; // GSopen1/PSX API + virtual void* AttachNativeWindow(void* handle) = 0; virtual void DestroyNativeResources() = 0; GSVector4i GetClientRect(); @@ -71,9 +71,9 @@ public: void Flip() final; // Deprecated API - void Show() final {}; - void Hide() final {}; - void HideFrame() final {}; // DX9 API + void Show() final {} + void Hide() final {} + void HideFrame() final {} // DX9 API virtual void* GetDisplay() = 0; // GSopen1 API virtual void* GetHandle() = 0; // DX API @@ -91,19 +91,19 @@ public: class GSWndEGL_X11 : public GSWndEGL { - Display *m_NativeDisplay; - Window m_NativeWindow; + Display* m_NativeDisplay; + Window m_NativeWindow; - public: +public: GSWndEGL_X11(); - virtual ~GSWndEGL_X11() {}; + virtual ~GSWndEGL_X11(){}; - void* GetDisplay() final { return (void*)m_NativeDisplay;} - void* GetHandle() final {return (void*)&m_NativeWindow;} + void* GetDisplay() final { return (void*)m_NativeDisplay; } + void* GetHandle() final { return (void*)&m_NativeWindow; } - void *CreateNativeDisplay() final; - void *CreateNativeWindow(int w, int h) final; - void *AttachNativeWindow(void *handle) final; + void* CreateNativeDisplay() final; + void* CreateNativeWindow(int w, int h) final; + void* AttachNativeWindow(void* handle) final; void DestroyNativeResources() final; bool SetWindowText(const char* title) final; @@ -121,19 +121,19 @@ class GSWndEGL_X11 : public GSWndEGL class GSWndEGL_WL : public GSWndEGL { - wl_display *m_NativeDisplay; - wl_egl_window *m_NativeWindow; + wl_display* m_NativeDisplay; + wl_egl_window* m_NativeWindow; - public: +public: GSWndEGL_WL(); - virtual ~GSWndEGL_WL() {}; + virtual ~GSWndEGL_WL(){}; - void* GetDisplay() final { return (void*)m_NativeDisplay;} - void* GetHandle() final {return (void*)m_NativeWindow;} + void* GetDisplay() final { return (void*)m_NativeDisplay; } + void* GetHandle() final { return (void*)m_NativeWindow; } - void *CreateNativeDisplay() final; - void *CreateNativeWindow(int w, int h) final; - void *AttachNativeWindow(void *handle) final; + void* CreateNativeDisplay() final; + void* CreateNativeWindow(int w, int h) final; + void* AttachNativeWindow(void* handle) final; void DestroyNativeResources() final; bool SetWindowText(const char* title) final; diff --git a/plugins/GSdx/Window/GSWndWGL.cpp b/plugins/GSdx/Window/GSWndWGL.cpp index 41c925b0b8..745073f6a4 100644 --- a/plugins/GSdx/Window/GSWndWGL.cpp +++ b/plugins/GSdx/Window/GSWndWGL.cpp @@ -26,14 +26,17 @@ static void win_error(const wchar_t* msg, bool fatal = true) { - DWORD errorID = ::GetLastError(); + DWORD errorID = ::GetLastError(); if (errorID) fprintf(stderr, "WIN API ERROR:%ld\t", errorID); - if (fatal) { + if (fatal) + { MessageBox(NULL, msg, L"ERROR", MB_OK | MB_ICONEXCLAMATION); throw GSDXRecoverableError(); - } else { + } + else + { fprintf(stderr, "ERROR:%ls\n", msg); } } @@ -50,13 +53,13 @@ LRESULT CALLBACK GSWndWGL::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARA { switch (message) { - case WM_CLOSE: - // This takes place before GSClose, so don't destroy the Window so we can clean up. - ShowWindow(hWnd, SW_HIDE); - // DestroyWindow(hWnd); - return 0; - default: - return DefWindowProc(hWnd, message, wParam, lParam); + case WM_CLOSE: + // This takes place before GSClose, so don't destroy the Window so we can clean up. + ShowWindow(hWnd, SW_HIDE); + // DestroyWindow(hWnd); + return 0; + default: + return DefWindowProc(hWnd, message, wParam, lParam); } } @@ -105,10 +108,11 @@ void GSWndWGL::CreateContext(int major, int minor) win_error(L"Failed to init wglCreateContextAttribsARB function pointer"); HGLRC context30 = wglCreateContextAttribsARB(m_NativeDisplay, NULL, context_attribs); - if (!context30) { + if (!context30) + { win_error(L"Failed to create a 3.x context with standard flags", false); // retry with more compatible option for (Mesa on Windows, OpenGL on WINE) - context_attribs[2*2+1] = 0; + context_attribs[2 * 2 + 1] = 0; context30 = wglCreateContextAttribsARB(m_NativeDisplay, NULL, context_attribs); } @@ -125,7 +129,8 @@ void GSWndWGL::CreateContext(int major, int minor) void GSWndWGL::AttachContext() { - if (!IsContextAttached()) { + if (!IsContextAttached()) + { wglMakeCurrent(m_NativeDisplay, m_context); m_ctx_attached = true; } @@ -133,7 +138,8 @@ void GSWndWGL::AttachContext() void GSWndWGL::DetachContext() { - if (IsContextAttached()) { + if (IsContextAttached()) + { wglMakeCurrent(NULL, NULL); m_ctx_attached = false; } @@ -145,10 +151,13 @@ void GSWndWGL::PopulateWndGlFunction() // To ease the process, extension management is itself an extension. Clever isn't it! PFNWGLGETEXTENSIONSSTRINGARBPROC wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC)wglGetProcAddress("wglGetExtensionsStringARB"); - if (wglGetExtensionsStringARB) { + if (wglGetExtensionsStringARB) + { const char* ext = wglGetExtensionsStringARB(m_NativeDisplay); m_has_late_vsync = m_swapinterval && ext && strstr(ext, "WGL_EXT_swap_control_tear"); - } else { + } + else + { m_has_late_vsync = false; } } @@ -173,7 +182,8 @@ void GSWndWGL::Detach() // The window still need to be closed DetachContext(); - if (m_context) wglDeleteContext(m_context); + if (m_context) + wglDeleteContext(m_context); m_context = NULL; CloseWGLDisplay(); @@ -184,33 +194,31 @@ void GSWndWGL::Detach() DestroyWindow(m_NativeWindow); m_NativeWindow = NULL; } - } void GSWndWGL::OpenWGLDisplay() { - GLuint PixelFormat; // Holds The Results After Searching For A Match - PIXELFORMATDESCRIPTOR pfd = // pfd Tells Windows How We Want Things To Be - + GLuint PixelFormat; // Holds The Results After Searching For A Match + PIXELFORMATDESCRIPTOR pfd = // pfd Tells Windows How We Want Things To Be { - sizeof(PIXELFORMATDESCRIPTOR), // Size Of This Pixel Format Descriptor - 1, // Version Number - PFD_DRAW_TO_WINDOW | // Format Must Support Window - PFD_SUPPORT_OPENGL | // Format Must Support OpenGL - PFD_DOUBLEBUFFER, // Must Support Double Buffering - PFD_TYPE_RGBA, // Request An RGBA Format - 32, // Select Our Color Depth - 0, 0, 0, 0, 0, 0, // Color Bits Ignored - 0, // 8bit Alpha Buffer - 0, // Shift Bit Ignored - 0, // No Accumulation Buffer - 0, 0, 0, 0, // Accumulation Bits Ignored - 0, // 24Bit Z-Buffer (Depth Buffer) - 8, // 8bit Stencil Buffer - 0, // No Auxiliary Buffer - PFD_MAIN_PLANE, // Main Drawing Layer - 0, // Reserved - 0, 0, 0 // Layer Masks Ignored + sizeof(PIXELFORMATDESCRIPTOR), // Size Of This Pixel Format Descriptor + 1, // Version Number + PFD_DRAW_TO_WINDOW | // Format Must Support Window + PFD_SUPPORT_OPENGL | // Format Must Support OpenGL + PFD_DOUBLEBUFFER, // Must Support Double Buffering + PFD_TYPE_RGBA, // Request An RGBA Format + 32, // Select Our Color Depth + 0, 0, 0, 0, 0, 0, // Color Bits Ignored + 0, // 8bit Alpha Buffer + 0, // Shift Bit Ignored + 0, // No Accumulation Buffer + 0, 0, 0, 0, // Accumulation Bits Ignored + 0, // 24Bit Z-Buffer (Depth Buffer) + 8, // 8bit Stencil Buffer + 0, // No Auxiliary Buffer + PFD_MAIN_PLANE, // Main Drawing Layer + 0, // Reserved + 0, 0, 0 // Layer Masks Ignored }; m_NativeDisplay = GetDC(m_NativeWindow); @@ -239,7 +247,8 @@ void GSWndWGL::CloseWGLDisplay() // modifications bool GSWndWGL::Create(const std::string& title, int w, int h) { - if(m_NativeWindow) return false; + if (m_NativeWindow) + return false; m_managed = true; @@ -293,7 +302,8 @@ bool GSWndWGL::Create(const std::string& title, int w, int h) std::wstring tmp = std::wstring(title.begin(), title.end()); m_NativeWindow = CreateWindow(wc.lpszClassName, tmp.c_str(), style, r.left, r.top, r.width(), r.height(), NULL, NULL, wc.hInstance, (LPVOID)this); - if (m_NativeWindow == NULL) return false; + if (m_NativeWindow == NULL) + return false; OpenWGLDisplay(); @@ -327,7 +337,8 @@ void* GSWndWGL::GetProcAddress(const char* name, bool opt) ptr = (void *)GetProcAddress(module, name); } #endif - if (ptr == NULL) { + if (ptr == NULL) + { if (theApp.GetConfigB("debug_opengl")) fprintf(stderr, "Failed to find %s\n", name); @@ -344,7 +355,8 @@ void GSWndWGL::SetSwapInterval() // m_swapinterval uses an integer as parameter // 0 -> disable vsync // n -> wait n frame - if (m_swapinterval) m_swapinterval(m_vsync); + if (m_swapinterval) + m_swapinterval(m_vsync); } void GSWndWGL::Flip() @@ -357,7 +369,8 @@ void GSWndWGL::Flip() void GSWndWGL::Show() { - if (!m_managed) return; + if (!m_managed) + return; // Used by GSReplay SetForegroundWindow(m_NativeWindow); @@ -378,7 +391,8 @@ void GSWndWGL::HideFrame() bool GSWndWGL::SetWindowText(const char* title) { - if (!m_managed) return false; + if (!m_managed) + return false; const size_t tmp_size = strlen(title) + 1; std::wstring tmp(tmp_size, L'#'); diff --git a/plugins/GSdx/Window/GSWndWGL.h b/plugins/GSdx/Window/GSWndWGL.h index feb1ba7b2a..ab8725af8a 100644 --- a/plugins/GSdx/Window/GSWndWGL.h +++ b/plugins/GSdx/Window/GSWndWGL.h @@ -25,10 +25,10 @@ class GSWndWGL : public GSWndGL { - HWND m_NativeWindow; - HDC m_NativeDisplay; - HGLRC m_context; - bool m_has_late_vsync; + HWND m_NativeWindow; + HDC m_NativeDisplay; + HGLRC m_context; + bool m_has_late_vsync; PFNWGLSWAPINTERVALEXTPROC m_swapinterval; @@ -45,14 +45,14 @@ class GSWndWGL : public GSWndGL public: GSWndWGL(); - virtual ~GSWndWGL() {}; + virtual ~GSWndWGL() {} bool Create(const std::string& title, int w, int h); bool Attach(void* handle, bool managed = true); void Detach(); - void* GetDisplay() {return m_NativeWindow;} - void* GetHandle() {return m_NativeWindow;} + void* GetDisplay() { return m_NativeWindow; } + void* GetHandle() { return m_NativeWindow; } GSVector4i GetClientRect(); bool SetWindowText(const char* title); diff --git a/plugins/GSdx/config.h b/plugins/GSdx/config.h index 8c7aa957a9..05aeab5a74 100644 --- a/plugins/GSdx/config.h +++ b/plugins/GSdx/config.h @@ -36,7 +36,7 @@ #if !defined(NDEBUG) || defined(_DEBUG) || defined(_DEVEL) -#define ENABLE_OGL_DEBUG // Create a debug context and check opengl command status. Allow also to dump various textures/states. +#define ENABLE_OGL_DEBUG // Create a debug context and check opengl command status. Allow also to dump various textures/states. //#define ENABLE_OGL_DEBUG_FENCE //#define ENABLE_OGL_DEBUG_MEM_BW // compute the quantity of data transfered (debug purpose) //#define ENABLE_TRACE_REG // print GS reg write diff --git a/plugins/GSdx/linux_replay.cpp b/plugins/GSdx/linux_replay.cpp index 2eeb866e74..c87753e7f0 100644 --- a/plugins/GSdx/linux_replay.cpp +++ b/plugins/GSdx/linux_replay.cpp @@ -31,37 +31,45 @@ void help() fprintf(stderr, "ARG1 GSdx plugin\n"); fprintf(stderr, "ARG2 .gs file\n"); fprintf(stderr, "ARG3 Ini directory\n"); - if (handle) { + if (handle) + { dlclose(handle); } exit(1); } -char* read_env(const char* var) { +char* read_env(const char* var) +{ char* v = getenv(var); - if (!v) { + if (!v) + { fprintf(stderr, "Failed to get %s\n", var); help(); } return v; } -int main ( int argc, char *argv[] ) +int main(int argc, char* argv[]) { - if (argc < 1) help(); + if (argc < 1) + help(); char* plugin; char* gs; - if (argc > 2) { + if (argc > 2) + { plugin = argv[1]; gs = argv[2]; - } else { + } + else + { plugin = read_env("GSDUMP_SO"); gs = argv[1]; } - handle = dlopen(plugin, RTLD_LAZY|RTLD_GLOBAL); - if (handle == NULL) { + handle = dlopen(plugin, RTLD_LAZY | RTLD_GLOBAL); + if (handle == NULL) + { fprintf(stderr, "Failed to dlopen plugin %s\n", plugin); help(); } @@ -72,17 +80,20 @@ int main ( int argc, char *argv[] ) GSsetSettingsDir_ptr = reinterpret_cast(dlsym(handle, "GSsetSettingsDir")); GSReplay_ptr = reinterpret_cast(dlsym(handle, "GSReplay")); - if (argc == 2) { - char *ini = read_env("GSDUMP_CONF"); + if (argc == 2) + { + char* ini = read_env("GSDUMP_CONF"); GSsetSettingsDir_ptr(ini); - - } else if (argc == 4) { + } + else if (argc == 4) + { GSsetSettingsDir_ptr(argv[3]); - - } else if ( argc == 3) { + } + else if (argc == 3) + { #ifdef XDG_STD - char *val = read_env("HOME"); + char* val = read_env("HOME"); std::string ini_dir(val); ini_dir += "/.config/pcsx2/inis"; @@ -96,7 +107,8 @@ int main ( int argc, char *argv[] ) GSReplay_ptr(gs, 12); - if (handle) { + if (handle) + { dlclose(handle); } } diff --git a/plugins/GSdx/stdafx.cpp b/plugins/GSdx/stdafx.cpp index fd5f23f46b..f20988039b 100644 --- a/plugins/GSdx/stdafx.cpp +++ b/plugins/GSdx/stdafx.cpp @@ -79,32 +79,38 @@ void* fifo_alloc(size_t size, size_t repeat) { ASSERT(s_fh == NULL); - if (repeat >= countof(s_Next)) { + if (repeat >= countof(s_Next)) + { fprintf(stderr, "Memory mapping overflow (%zu >= %u)\n", repeat, countof(s_Next)); return vmalloc(size * repeat, false); // Fallback to default vmalloc } s_fh = CreateFileMapping(INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0, size, nullptr); DWORD errorID = ::GetLastError(); - if (s_fh == NULL) { + if (s_fh == NULL) + { fprintf(stderr, "Failed to reserve memory. WIN API ERROR:%u\n", errorID); return vmalloc(size * repeat, false); // Fallback to default vmalloc } int mmap_segment_failed = 0; void* fifo = MapViewOfFile(s_fh, FILE_MAP_ALL_ACCESS, 0, 0, size); - for (size_t i = 1; i < repeat; i++) { + for (size_t i = 1; i < repeat; i++) + { void* base = (uint8*)fifo + size * i; s_Next[i] = (uint8*)MapViewOfFileEx(s_fh, FILE_MAP_ALL_ACCESS, 0, 0, size, base); errorID = ::GetLastError(); - if (s_Next[i] != base) { + if (s_Next[i] != base) + { mmap_segment_failed++; - if (mmap_segment_failed > 4) { + if (mmap_segment_failed > 4) + { fprintf(stderr, "Memory mapping failed after %d attempts, aborting. WIN API ERROR:%u\n", mmap_segment_failed, errorID); fifo_free(fifo, size, repeat); return vmalloc(size * repeat, false); // Fallback to default vmalloc } - do { + do + { UnmapViewOfFile(s_Next[i]); s_Next[i] = 0; } while (--i > 0); @@ -120,7 +126,8 @@ void fifo_free(void* ptr, size_t size, size_t repeat) { ASSERT(s_fh != NULL); - if (s_fh == NULL) { + if (s_fh == NULL) + { if (ptr != NULL) vmfree(ptr, size); return; @@ -128,8 +135,10 @@ void fifo_free(void* ptr, size_t size, size_t repeat) UnmapViewOfFile(ptr); - for (size_t i = 1; i < countof(s_Next); i++) { - if (s_Next[i] != 0) { + for (size_t i = 1; i < countof(s_Next); i++) + { + if (s_Next[i] != 0) + { UnmapViewOfFile(s_Next[i]); s_Next[i] = 0; } @@ -153,7 +162,8 @@ void* vmalloc(size_t size, bool code) int prot = PROT_READ | PROT_WRITE; int flags = MAP_PRIVATE | MAP_ANONYMOUS; - if(code) { + if (code) + { prot |= PROT_EXEC; #if defined(_M_AMD64) && !defined(__APPLE__) // macOS doesn't allow any mappings in the first 4GB of address space @@ -181,9 +191,12 @@ void* fifo_alloc(size_t size, size_t repeat) const char* file_name = "/GSDX.mem"; s_shm_fd = shm_open(file_name, O_RDWR | O_CREAT | O_EXCL, 0600); - if (s_shm_fd != -1) { + if (s_shm_fd != -1) + { shm_unlink(file_name); // file is deleted but descriptor is still open - } else { + } + else + { fprintf(stderr, "Failed to open %s due to %s\n", file_name, strerror(errno)); return nullptr; } @@ -193,7 +206,8 @@ void* fifo_alloc(size_t size, size_t repeat) void* fifo = mmap(nullptr, size * repeat, PROT_READ | PROT_WRITE, MAP_SHARED, s_shm_fd, 0); - for (size_t i = 1; i < repeat; i++) { + for (size_t i = 1; i < repeat; i++) + { void* base = (uint8*)fifo + size * i; uint8* next = (uint8*)mmap(base, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, s_shm_fd, 0); if (next != base) @@ -226,7 +240,7 @@ void fifo_free(void* ptr, size_t size, size_t repeat) void* _aligned_malloc(size_t size, size_t alignment) { - void *ret = 0; + void* ret = 0; posix_memalign(&ret, alignment, size); return ret; } diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index e2e9fff37f..5ad31ca7dc 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -31,7 +31,7 @@ #include "targetver.h" -#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers #include #include @@ -90,7 +90,7 @@ typedef int64 sint64; #include #include -#if __GNUC__ > 5 || ( __GNUC__ == 5 && __GNUC_MINOR__ >= 4 ) +#if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 4) #include #include #endif @@ -149,35 +149,35 @@ typedef int64 sint64; #ifdef _MSC_VER - #define EXPORT_C_(type) extern "C" type __stdcall - #define EXPORT_C EXPORT_C_(void) + #define EXPORT_C_(type) extern "C" type __stdcall + #define EXPORT_C EXPORT_C_(void) - #define ALIGN_STACK(n) alignas(n) int dummy__; + #define ALIGN_STACK(n) alignas(n) int dummy__; #else - #ifndef __fastcall - #define __fastcall __attribute__((fastcall)) - #endif + #ifndef __fastcall + #define __fastcall __attribute__((fastcall)) + #endif - #define EXPORT_C_(type) extern "C" __attribute__((stdcall,externally_visible,visibility("default"))) type - #define EXPORT_C EXPORT_C_(void) + #define EXPORT_C_(type) extern "C" __attribute__((stdcall, externally_visible, visibility("default"))) type + #define EXPORT_C EXPORT_C_(void) - #ifdef __GNUC__ - #define __forceinline __inline__ __attribute__((always_inline,unused)) - // #define __forceinline __inline__ __attribute__((__always_inline__,__gnu_inline__)) - #define __assume(c) do { if (!(c)) __builtin_unreachable(); } while(0) + #ifdef __GNUC__ + #define __forceinline __inline__ __attribute__((always_inline,unused)) + // #define __forceinline __inline__ __attribute__((__always_inline__, __gnu_inline__)) + #define __assume(c) do { if (!(c)) __builtin_unreachable(); } while(0) - // GCC removes the variable as dead code and generates some warnings. - // Stack is automatically realigned due to SSE/AVX operations - #define ALIGN_STACK(n) (void)0; + // GCC removes the variable as dead code and generates some warnings. + // Stack is automatically realigned due to SSE/AVX operations + #define ALIGN_STACK(n) (void)0; - #else + #else - // TODO Check clang behavior - #define ALIGN_STACK(n) alignas(n) int dummy__; + // TODO Check clang behavior + #define ALIGN_STACK(n) alignas(n) int dummy__; - #endif + #endif #endif @@ -186,23 +186,23 @@ typedef int64 sint64; #ifndef RESTRICT - #ifdef __INTEL_COMPILER + #ifdef __INTEL_COMPILER - #define RESTRICT restrict + #define RESTRICT restrict - #elif defined(_MSC_VER) + #elif defined(_MSC_VER) - #define RESTRICT __restrict + #define RESTRICT __restrict - #elif defined(__GNUC__) + #elif defined(__GNUC__) - #define RESTRICT __restrict__ + #define RESTRICT __restrict__ - #else + #else - #define RESTRICT + #define RESTRICT - #endif + #endif #endif @@ -296,7 +296,8 @@ typedef int64 sint64; #endif - static inline void _aligned_free(void* p) { + static inline void _aligned_free(void* p) + { free(p); } @@ -361,45 +362,49 @@ extern void fifo_free(void* ptr, size_t size, size_t repeat); // Note: GL messages are present in common code, so in all renderers. #define GL_INSERT(type, code, sev, ...) \ - do if (glDebugMessageInsert) glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION, type, code, sev, -1, format(__VA_ARGS__).c_str()); while(0); + do \ + if (glDebugMessageInsert) glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION, type, code, sev, -1, format(__VA_ARGS__).c_str()); \ + while(0); #if defined(_DEBUG) -#define GL_CACHE(...) GL_INSERT(GL_DEBUG_TYPE_OTHER, 0xFEAD, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) +# define GL_CACHE(...) GL_INSERT(GL_DEBUG_TYPE_OTHER, 0xFEAD, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) #else -#define GL_CACHE(...) (void)(0); +# define GL_CACHE(...) (void)(0); #endif #if defined(ENABLE_TRACE_REG) && defined(_DEBUG) -#define GL_REG(...) GL_INSERT(GL_DEBUG_TYPE_OTHER, 0xB0B0, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) +# define GL_REG(...) GL_INSERT(GL_DEBUG_TYPE_OTHER, 0xB0B0, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) #else -#define GL_REG(...) (void)(0); +# define GL_REG(...) (void)(0); #endif #if defined(ENABLE_EXTRA_LOG) && defined(_DEBUG) -#define GL_DBG(...) GL_INSERT(GL_DEBUG_TYPE_OTHER, 0xD0D0, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) +# define GL_DBG(...) GL_INSERT(GL_DEBUG_TYPE_OTHER, 0xD0D0, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) #else -#define GL_DBG(...) (void)(0); +# define GL_DBG(...) (void)(0); #endif #if defined(ENABLE_OGL_DEBUG) -struct GLAutoPop { - ~GLAutoPop() { - if (glPopDebugGroup) - glPopDebugGroup(); - } -}; + struct GLAutoPop + { + ~GLAutoPop() + { + if (glPopDebugGroup) + glPopDebugGroup(); + } + }; -#define GL_PUSH_(...) do if (glPushDebugGroup) glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0xBAD, -1, format(__VA_ARGS__).c_str()); while(0); -#define GL_PUSH(...) do if (glPushDebugGroup) glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0xBAD, -1, format(__VA_ARGS__).c_str()); while(0); GLAutoPop gl_auto_pop; -#define GL_POP() do if (glPopDebugGroup) glPopDebugGroup(); while(0); -#define GL_INS(...) GL_INSERT(GL_DEBUG_TYPE_ERROR, 0xDEAD, GL_DEBUG_SEVERITY_MEDIUM, __VA_ARGS__) -#define GL_PERF(...) GL_INSERT(GL_DEBUG_TYPE_PERFORMANCE, 0xFEE1, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) + #define GL_PUSH_(...) do if (glPushDebugGroup) glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0xBAD, -1, format(__VA_ARGS__).c_str()); while(0); + #define GL_PUSH(...) do if (glPushDebugGroup) glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0xBAD, -1, format(__VA_ARGS__).c_str()); while(0); GLAutoPop gl_auto_pop; + #define GL_POP() do if (glPopDebugGroup) glPopDebugGroup(); while(0); + #define GL_INS(...) GL_INSERT(GL_DEBUG_TYPE_ERROR, 0xDEAD, GL_DEBUG_SEVERITY_MEDIUM, __VA_ARGS__) + #define GL_PERF(...) GL_INSERT(GL_DEBUG_TYPE_PERFORMANCE, 0xFEE1, GL_DEBUG_SEVERITY_NOTIFICATION, __VA_ARGS__) #else -#define GL_PUSH_(...) (void)(0); -#define GL_PUSH(...) (void)(0); -#define GL_POP() (void)(0); -#define GL_INS(...) (void)(0); -#define GL_PERF(...) (void)(0); + #define GL_PUSH_(...) (void)(0); + #define GL_PUSH(...) (void)(0); + #define GL_POP() (void)(0); + #define GL_INS(...) (void)(0); + #define GL_PERF(...) (void)(0); #endif // Helper path to dump texture